Author: Gerd Isenberg
Date: 10:57:05 10/10/05
Go up one level in this thread
On October 10, 2005 at 12:09:14, Dann Corbit wrote:
>#include <string.h>
>#include <limits.h>
>#include <stdlib.h>
>#include <ctype.h>
>
>/* The default delimiters are chosen as some ordinary white space characters: */
>static const char default_delimiters[] = {' ', '\n', '\t', '\r', '\f', 0};
>
>/*
> * The tokenize() function is similar to a reentrant version of strtok().
> * It parses tokens from 'string', where tokens are substrings separated by
>characters from 'delimiter_list'.
> * To get the first token from 'string', tokenize() is called with 'string' as
>its first parameter.
> * Remaining tokens from 'string' are obtained by calling tokenize() with NULL
>for the first parameter.
> * The string of delimiters, identified by 'delimiter_list', can change from
>call to call.
> * If the string of delimiters is NULL, then the standard list
>'default_delimiters' (see above) is used.
> * tokenize() modifies the memory pointed to by 'string', because it writes null
>characters into the buffer.
> */
>char *tokenize(char *string, const char *delimiter_list, char
>**placeholder)
>{
> if (delimiter_list == NULL)
> delimiter_list = default_delimiters;
>
> if (delimiter_list[0] == 0)
> delimiter_list = default_delimiters;
>
> if (string == NULL)
> string = *placeholder;
>
> if (string == NULL)
> return NULL;
>/*
> * The strspn() function computes the length of the initial segment of the first
>string
> * that consists entirely of characters contained in the second string.
> */
> string += strspn(string, delimiter_list);
> if (!string[0]) {
> *placeholder = string;
> return NULL;
> } else {
> char *token;
> token = string;
>/*
> * The strpbrk() function finds the first occurrence of any character contained
>in the second string
> * found in the first string.
> */
> string = strpbrk(token, delimiter_list);
> if (string == NULL)
> *placeholder = token + strlen(token);
> else {
> *string++ = 0;
> *placeholder = string;
> }
> return token;
> }
>}
>
>#ifdef UNIT_TEST
>char test_string0[] = "This is a test. This is only a test. If it
>were an actual emergency, you would be dead.";
>char test_string1[] = "This is a also a test. This is only a test.
>If it were an actual emergency, you would be dead. 12345";
>char test_string2[] = "The quick brown fox jumped over the lazy dog's
>back 1234567890 times.";
>char test_string3[] = " \t\r\n\fThe quick brown fox jumped over the
>lazy dog's back 1234567890 times.";
>char test_string4[] = "This is a test. This is only a test. If it
>were an actual emergency, you would be dead.";
>char test_string5[] = "This is a also a test. This is only a test.
>If it were an actual emergency, you would be dead. 12345";
>char test_string6[] = "The quick brown fox jumped over the lazy dog's
>back 1234567890 times.";
>char test_string7[] = " \t\r\n\fThe quick brown fox jumped over the
>lazy dog's back 1234567890 times.";
>
>#include <stdio.h>
>
>char whitespace[UCHAR_MAX + 1];
>
>/* This test will create token separators as any whitespace or any punctuation
>marks: */
>void init_whitespace()
>{
> int i;
> int index = 0;
> for (i = 0; i < UCHAR_MAX; i++) {
> if (isspace(i)) {
> whitespace[index++] = (char) i;
> }
> if (ispunct(i)) {
> whitespace[index++] = (char) i;
> }
> }
>}
>
>void spin_test(char *test_string, char *white)
>{
> char *p = NULL;
> char *token;
> token = tokenize(test_string, white, &p);
> if (token)
> puts(token);
>
> while (token) {
> token = tokenize(NULL, white, &p);
> if (token) puts(token);
> }
>
>}
>int main(void)
>{
> init_whitespace();
> puts("Whitespace is whitespace+punctuation");
> spin_test(test_string0, whitespace);
> spin_test(test_string1, whitespace);
> spin_test(test_string2, whitespace);
> spin_test(test_string3, whitespace);
> puts("Whitespace is simple whitespace");
> spin_test(test_string4, NULL);
> spin_test(test_string5, NULL);
> spin_test(test_string6, NULL);
> spin_test(test_string7, NULL);
> return 0;
>}
>#endif
Hi Dann,
thanks for sharing. I was not aware for strspn and strpbrk functions.
Some minor nitpicking on the spin_test routine ;-)
Gerd
void spin_test(char *test_string, char *white)
{
char *p = NULL;
char *token;
token = tokenize(test_string, white, &p);
while (token) {
puts(token);
token = tokenize(NULL, white, &p);
}
}
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.