Author: Gerd Isenberg
Date: 12:42:46 10/10/05
Go up one level in this thread
On October 10, 2005 at 15:35:19, Robert Hyatt wrote:
>On October 10, 2005 at 13:57:05, Gerd Isenberg wrote:
>
>>On October 10, 2005 at 12:09:14, Dann Corbit wrote:
>>
>>>#include <string.h>
>>>#include <limits.h>
>>>#include <stdlib.h>
>>>#include <ctype.h>
>>>
>>>/* The default delimiters are chosen as some ordinary white space characters: */
>>>static const char default_delimiters[] = {' ', '\n', '\t', '\r', '\f', 0};
>>>
>>>/*
>>> * The tokenize() function is similar to a reentrant version of strtok().
>>> * It parses tokens from 'string', where tokens are substrings separated by
>>>characters from 'delimiter_list'.
>>> * To get the first token from 'string', tokenize() is called with 'string' as
>>>its first parameter.
>>> * Remaining tokens from 'string' are obtained by calling tokenize() with NULL
>>>for the first parameter.
>>> * The string of delimiters, identified by 'delimiter_list', can change from
>>>call to call.
>>> * If the string of delimiters is NULL, then the standard list
>>>'default_delimiters' (see above) is used.
>>> * tokenize() modifies the memory pointed to by 'string', because it writes null
>>>characters into the buffer.
>>> */
>>>char *tokenize(char *string, const char *delimiter_list, char
>>>**placeholder)
>>>{
>>> if (delimiter_list == NULL)
>>> delimiter_list = default_delimiters;
>>>
>>> if (delimiter_list[0] == 0)
>>> delimiter_list = default_delimiters;
>>>
>>> if (string == NULL)
>>> string = *placeholder;
>>>
>>> if (string == NULL)
>>> return NULL;
>>>/*
>>> * The strspn() function computes the length of the initial segment of the first
>>>string
>>> * that consists entirely of characters contained in the second string.
>>> */
>>> string += strspn(string, delimiter_list);
>>> if (!string[0]) {
>>> *placeholder = string;
>>> return NULL;
>>> } else {
>>> char *token;
>>> token = string;
>>>/*
>>> * The strpbrk() function finds the first occurrence of any character contained
>>>in the second string
>>> * found in the first string.
>>> */
>>> string = strpbrk(token, delimiter_list);
>>> if (string == NULL)
>>> *placeholder = token + strlen(token);
>>> else {
>>> *string++ = 0;
>>> *placeholder = string;
>>> }
>>> return token;
>>> }
>>>}
>>>
>>>#ifdef UNIT_TEST
>>>char test_string0[] = "This is a test. This is only a test. If it
>>>were an actual emergency, you would be dead.";
>>>char test_string1[] = "This is a also a test. This is only a test.
>>>If it were an actual emergency, you would be dead. 12345";
>>>char test_string2[] = "The quick brown fox jumped over the lazy dog's
>>>back 1234567890 times.";
>>>char test_string3[] = " \t\r\n\fThe quick brown fox jumped over the
>>>lazy dog's back 1234567890 times.";
>>>char test_string4[] = "This is a test. This is only a test. If it
>>>were an actual emergency, you would be dead.";
>>>char test_string5[] = "This is a also a test. This is only a test.
>>>If it were an actual emergency, you would be dead. 12345";
>>>char test_string6[] = "The quick brown fox jumped over the lazy dog's
>>>back 1234567890 times.";
>>>char test_string7[] = " \t\r\n\fThe quick brown fox jumped over the
>>>lazy dog's back 1234567890 times.";
>>>
>>>#include <stdio.h>
>>>
>>>char whitespace[UCHAR_MAX + 1];
>>>
>>>/* This test will create token separators as any whitespace or any punctuation
>>>marks: */
>>>void init_whitespace()
>>>{
>>> int i;
>>> int index = 0;
>>> for (i = 0; i < UCHAR_MAX; i++) {
>>> if (isspace(i)) {
>>> whitespace[index++] = (char) i;
>>> }
>>> if (ispunct(i)) {
>>> whitespace[index++] = (char) i;
>>> }
>>> }
>>>}
>>>
>>>void spin_test(char *test_string, char *white)
>>>{
>>> char *p = NULL;
>>> char *token;
>>> token = tokenize(test_string, white, &p);
>>> if (token)
>>> puts(token);
>>>
>>> while (token) {
>>> token = tokenize(NULL, white, &p);
>>> if (token) puts(token);
>>> }
>>>
>>>}
>>>int main(void)
>>>{
>>> init_whitespace();
>>> puts("Whitespace is whitespace+punctuation");
>>> spin_test(test_string0, whitespace);
>>> spin_test(test_string1, whitespace);
>>> spin_test(test_string2, whitespace);
>>> spin_test(test_string3, whitespace);
>>> puts("Whitespace is simple whitespace");
>>> spin_test(test_string4, NULL);
>>> spin_test(test_string5, NULL);
>>> spin_test(test_string6, NULL);
>>> spin_test(test_string7, NULL);
>>> return 0;
>>>}
>>>#endif
>>
>>
>>Hi Dann,
>>thanks for sharing. I was not aware for strspn and strpbrk functions.
>>Some minor nitpicking on the spin_test routine ;-)
>>
>>Gerd
>>
>>void spin_test(char *test_string, char *white)
>>{
>> char *p = NULL;
>> char *token;
>> token = tokenize(test_string, white, &p);
>> while (token) {
>> puts(token);
>> token = tokenize(NULL, white, &p);
>> }
>>}
>
>
>"span" and "break" come from the old "Snobol" text-processing language.
>
>Personally I use strtok() to parse things myself, since it is so easy...
Yes, Dann's routine is reentrant and therefor thread-safe.
strtok() is not thread-safe, but strtok_r.
No idea about the protability of the latter.
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.