Author: Dann Corbit
Date: 09:09:14 10/10/05
#include <string.h>
#include <limits.h>
#include <stdlib.h>
#include <ctype.h>
/* The default delimiters are chosen as some ordinary white space characters: */
static const char default_delimiters[] = {' ', '\n', '\t', '\r', '\f', 0};
/*
* The tokenize() function is similar to a reentrant version of strtok().
* It parses tokens from 'string', where tokens are substrings separated by
characters from 'delimiter_list'.
* To get the first token from 'string', tokenize() is called with 'string' as
its first parameter.
* Remaining tokens from 'string' are obtained by calling tokenize() with NULL
for the first parameter.
* The string of delimiters, identified by 'delimiter_list', can change from
call to call.
* If the string of delimiters is NULL, then the standard list
'default_delimiters' (see above) is used.
* tokenize() modifies the memory pointed to by 'string', because it writes null
characters into the buffer.
*/
char *tokenize(char *string, const char *delimiter_list, char
**placeholder)
{
if (delimiter_list == NULL)
delimiter_list = default_delimiters;
if (delimiter_list[0] == 0)
delimiter_list = default_delimiters;
if (string == NULL)
string = *placeholder;
if (string == NULL)
return NULL;
/*
* The strspn() function computes the length of the initial segment of the first
string
* that consists entirely of characters contained in the second string.
*/
string += strspn(string, delimiter_list);
if (!string[0]) {
*placeholder = string;
return NULL;
} else {
char *token;
token = string;
/*
* The strpbrk() function finds the first occurrence of any character contained
in the second string
* found in the first string.
*/
string = strpbrk(token, delimiter_list);
if (string == NULL)
*placeholder = token + strlen(token);
else {
*string++ = 0;
*placeholder = string;
}
return token;
}
}
#ifdef UNIT_TEST
char test_string0[] = "This is a test. This is only a test. If it
were an actual emergency, you would be dead.";
char test_string1[] = "This is a also a test. This is only a test.
If it were an actual emergency, you would be dead. 12345";
char test_string2[] = "The quick brown fox jumped over the lazy dog's
back 1234567890 times.";
char test_string3[] = " \t\r\n\fThe quick brown fox jumped over the
lazy dog's back 1234567890 times.";
char test_string4[] = "This is a test. This is only a test. If it
were an actual emergency, you would be dead.";
char test_string5[] = "This is a also a test. This is only a test.
If it were an actual emergency, you would be dead. 12345";
char test_string6[] = "The quick brown fox jumped over the lazy dog's
back 1234567890 times.";
char test_string7[] = " \t\r\n\fThe quick brown fox jumped over the
lazy dog's back 1234567890 times.";
#include <stdio.h>
char whitespace[UCHAR_MAX + 1];
/* This test will create token separators as any whitespace or any punctuation
marks: */
void init_whitespace()
{
int i;
int index = 0;
for (i = 0; i < UCHAR_MAX; i++) {
if (isspace(i)) {
whitespace[index++] = (char) i;
}
if (ispunct(i)) {
whitespace[index++] = (char) i;
}
}
}
void spin_test(char *test_string, char *white)
{
char *p = NULL;
char *token;
token = tokenize(test_string, white, &p);
if (token)
puts(token);
while (token) {
token = tokenize(NULL, white, &p);
if (token) puts(token);
}
}
int main(void)
{
init_whitespace();
puts("Whitespace is whitespace+punctuation");
spin_test(test_string0, whitespace);
spin_test(test_string1, whitespace);
spin_test(test_string2, whitespace);
spin_test(test_string3, whitespace);
puts("Whitespace is simple whitespace");
spin_test(test_string4, NULL);
spin_test(test_string5, NULL);
spin_test(test_string6, NULL);
spin_test(test_string7, NULL);
return 0;
}
#endif
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.