Every word should go into an array of strings. I am not allowed to use
strtok.
Interesting problem which could be resolved in a compact algorithm.
It handles multiple spaces and punctuation marks specified in check(char c).
The most difficult part of the problem is to properly handle corner cases. We may have situation when words are longer more than WORD_LEN length or the number of words exceeds the capacity of the array.
Both cases are properly handled. The algorithm truncates the excessive words and parses only to the capacity of the array.
(BTW. Do not use gets: Why is the gets function so dangerous that it should not be used?)
Edit: The fully tested find_tokens function has been presented.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define WORD_LEN 3 // 100 // MAX WORD LEN
#define NR_OF_WORDS 3 // 100 // MAX NUMBER OF WORDS
#define INPUT_SIZE 10000
int is_delimiter(const char * delimiters, char c) // check for a delimiter
{
char *p = strchr (delimiters, c); // if not NULL c is separator
if (p) return 1; // delimeter
else return 0; // not a delimeter
}
int skip(int *i, char *str, int skip_delimiters, const char *delimiters)
{
while(1){
if(skip_delimiters) {
if( (str[(*i)+1] =='\0') || (!is_delimiter(delimiters, str[(*i)+1])) )
break; // break on nondelimeter or '\0'
else (*i)++; // advance to next character
}
else{ // skip excess characters in the token
if( is_delimiter(delimiters, str[(*i)]) )
{
if( (str[(*i)+1] =='\0') || !is_delimiter(delimiters, str[(*i)+1]) )
break; // break on non delimiter or '\0'
else (*i)++; // skip delimiters
}
else (*i)++; // skip non delimiters
}
}
if ( str[(*i)+1] =='\0') return 0;
else return 1;
}
int find_tokens(int max_tokens, int token_len, char *str, char array[][token_len+1], const char *delimiters, int *nr_of_tokens)
{
int i = 0;
int j = 0;
int l = 0;
*nr_of_tokens = 0;
int status = 0; // all OK!
int skip_leading_delimiters = 1;
int token = 0;
int more;
for(i = 0; str[i] != '\0'; i++){ // loop until I reach the end
// skip leading delimiters
if( skip_leading_delimiters )
{
if( is_delimiter( delimiters, str[i]) ) continue;
skip_leading_delimiters = 0;
}
if( !is_delimiter(delimiters,str[i]) && (j < token_len) )
{
array[l][j] = str[i]; // put char in the array
//printf("%c!\n", array[l][j] );
j++;
array[l][j] = 0;
token = 1;
}
else
{
//printf("%c?\n", str[i] );
array[l][j] = '\0'; // token terminations
if (j < token_len) {
more = skip(&i, str, 1, delimiters); // skip delimiters
}
else{
more = skip(&i, str, 0, delimiters); // skip excess of the characters in token
status = status | 0x01; // token has been truncated
}
j = 0;
//printf("more %d\n",more);
if(token){
if (more) l++;
}
if(l >= max_tokens){
status = status | 0x02; // more tokens than expected
break;
}
}
}
if(l>=max_tokens)
*nr_of_tokens = max_tokens;
else{
if(l<=0 && token)
*nr_of_tokens = 1;
else
{
if(token)
*nr_of_tokens = l+1;
else
*nr_of_tokens = l;
}
}
return status;
}
int main(void){
char input[INPUT_SIZE+1]; // sentence
char array[NR_OF_WORDS][WORD_LEN+1]; // array where I put every word, remeber to include null terminator!!!
int number_of_words;
const char * delimiters = " .,;:\t"; // word delimiters
char *p;
printf("Insert sentence: "); // receive the sentence
fgets(input, INPUT_SIZE, stdin);
if ( (p = strchr(input, '\n')) != NULL) *p = '\0'; // remove '\n'
int ret = find_tokens(NR_OF_WORDS, WORD_LEN, input, array, delimiters, &number_of_words);
printf("tokens= %d ret= %d\n", number_of_words, ret);
for (int i=0; i < number_of_words; i++)
printf("%d: %s\n", i, array[i]);
printf("End\n");
return 0;
}
Test:
Insert sentence: ..........1234567,,,,,,abcdefgh....123::::::::::::
tokens= 3 ret= 1
0: 123
1: abc
2: 123
End