Author: Dann Corbit
Date: 12:31:45 10/10/05
Go up one level in this thread
On October 10, 2005 at 13:57:05, Gerd Isenberg wrote:
>On October 10, 2005 at 12:09:14, Dann Corbit wrote:
>
>>#include <string.h>
>>#include <limits.h>
>>#include <stdlib.h>
>>#include <ctype.h>
>>
>>/* The default delimiters are chosen as some ordinary white space characters: */
>>static const char default_delimiters[] = {' ', '\n', '\t', '\r', '\f', 0};
>>
>>/*
>> * The tokenize() function is similar to a reentrant version of strtok().
>> * It parses tokens from 'string', where tokens are substrings separated by
>>characters from 'delimiter_list'.
>> * To get the first token from 'string', tokenize() is called with 'string' as
>>its first parameter.
>> * Remaining tokens from 'string' are obtained by calling tokenize() with NULL
>>for the first parameter.
>> * The string of delimiters, identified by 'delimiter_list', can change from
>>call to call.
>> * If the string of delimiters is NULL, then the standard list
>>'default_delimiters' (see above) is used.
>> * tokenize() modifies the memory pointed to by 'string', because it writes null
>>characters into the buffer.
>> */
>>char *tokenize(char *string, const char *delimiter_list, char
>>**placeholder)
>>{
>> if (delimiter_list == NULL)
>> delimiter_list = default_delimiters;
>>
>> if (delimiter_list[0] == 0)
>> delimiter_list = default_delimiters;
>>
>> if (string == NULL)
>> string = *placeholder;
>>
>> if (string == NULL)
>> return NULL;
>>/*
>> * The strspn() function computes the length of the initial segment of the first
>>string
>> * that consists entirely of characters contained in the second string.
>> */
>> string += strspn(string, delimiter_list);
>> if (!string[0]) {
>> *placeholder = string;
>> return NULL;
>> } else {
>> char *token;
>> token = string;
>>/*
>> * The strpbrk() function finds the first occurrence of any character contained
>>in the second string
>> * found in the first string.
>> */
>> string = strpbrk(token, delimiter_list);
>> if (string == NULL)
>> *placeholder = token + strlen(token);
>> else {
>> *string++ = 0;
>> *placeholder = string;
>> }
>> return token;
>> }
>>}
>>
>>#ifdef UNIT_TEST
>>char test_string0[] = "This is a test. This is only a test. If it
>>were an actual emergency, you would be dead.";
>>char test_string1[] = "This is a also a test. This is only a test.
>>If it were an actual emergency, you would be dead. 12345";
>>char test_string2[] = "The quick brown fox jumped over the lazy dog's
>>back 1234567890 times.";
>>char test_string3[] = " \t\r\n\fThe quick brown fox jumped over the
>>lazy dog's back 1234567890 times.";
>>char test_string4[] = "This is a test. This is only a test. If it
>>were an actual emergency, you would be dead.";
>>char test_string5[] = "This is a also a test. This is only a test.
>>If it were an actual emergency, you would be dead. 12345";
>>char test_string6[] = "The quick brown fox jumped over the lazy dog's
>>back 1234567890 times.";
>>char test_string7[] = " \t\r\n\fThe quick brown fox jumped over the
>>lazy dog's back 1234567890 times.";
>>
>>#include <stdio.h>
>>
>>char whitespace[UCHAR_MAX + 1];
>>
>>/* This test will create token separators as any whitespace or any punctuation
>>marks: */
>>void init_whitespace()
>>{
>> int i;
>> int index = 0;
>> for (i = 0; i < UCHAR_MAX; i++) {
>> if (isspace(i)) {
>> whitespace[index++] = (char) i;
>> }
>> if (ispunct(i)) {
>> whitespace[index++] = (char) i;
>> }
>> }
>>}
>>
>>void spin_test(char *test_string, char *white)
>>{
>> char *p = NULL;
>> char *token;
>> token = tokenize(test_string, white, &p);
>> if (token)
>> puts(token);
>>
>> while (token) {
>> token = tokenize(NULL, white, &p);
>> if (token) puts(token);
>> }
>>
>>}
>>int main(void)
>>{
>> init_whitespace();
>> puts("Whitespace is whitespace+punctuation");
>> spin_test(test_string0, whitespace);
>> spin_test(test_string1, whitespace);
>> spin_test(test_string2, whitespace);
>> spin_test(test_string3, whitespace);
>> puts("Whitespace is simple whitespace");
>> spin_test(test_string4, NULL);
>> spin_test(test_string5, NULL);
>> spin_test(test_string6, NULL);
>> spin_test(test_string7, NULL);
>> return 0;
>>}
>>#endif
>
>
>Hi Dann,
>thanks for sharing. I was not aware for strspn and strpbrk functions.
>Some minor nitpicking on the spin_test routine ;-)
>
>Gerd
>
>void spin_test(char *test_string, char *white)
>{
> char *p = NULL;
> char *token;
> token = tokenize(test_string, white, &p);
> while (token) {
> puts(token);
> token = tokenize(NULL, white, &p);
> }
>}
That's a lot better looking than my quick hack.
http://cap.connx.com/chess-engines/new-approach/str.c
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.