/* words.c - Enumerating the words in a file.
	We want to be able, given a file, to read in sequence all the 
	words in it. The reason is that we will want in time to 
	list all such words and find the number of their occurrences.
	We will treat capital and lower case characters as if they were
	just capital letters. We will treat characters that are not
	letters as if they were spaces. Then a word for us will be a 
	maximal sequence of letters.
 */

#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define MAXLINE 256

// Structure used to keep track of where we are in the current line
typedef struct Wordgen {
    FILE * source;	// The file whence we obtain text lines
    char buffer[MAXLINE+1]; // The line last read
    int len;		// The length of line in buffer
    int cursor;		// The position in buffer where we look next
    int inside;		// 1 if cursor is inside a word, 0 otherwise
    int first;		// If inside==1, position of first char in current word
} wordgen;

// Initialize the wordgen structure p. filename specifies the file from which 
// we will read lines. Returns 0 in case of success, -1 in case of failure
int initWordGen(const char *filename, wordgen *p) {
    p->source = fopen(filename, "r");
    if (p->source == NULL)
	return -1;
    p->len = 0;
    p->cursor = 0;
    p->first = 0;
    p->inside = 0;
    return 0;
}

// Returns 1 if we can get an additional word from p, 0 otherwise
// When it return 1, inside is also 1.
int hasNext(wordgen *p) {
    if (p->inside == 1)
	return 1;
    while(1) {
	while (p->cursor < p->len) {
	    char c = p->buffer[p->cursor];
	    if (isalpha(c)) {
		p->inside = 1;
		p->first = p->cursor;
		return 1;
	    } 
	    p->cursor++;
	}
	// We have run out od data in the buffer. Read a new line
        while (p->cursor >= p->len) {
            if (fgets(p->buffer, MAXLINE+1, p->source) == NULL) {
		return 0;
	    }
	    p->len = strlen(p->buffer);
	    p->inside = 0;
	    p->cursor = 0;
	}
    }
    return 0; // This is never executed
}

// Assuming it is called after hasNext returned true, returns the next word 
char * next(wordgen *p) {
    char c;
    while (p->cursor < p->len && isalpha(c = p->buffer[p->cursor])) {
	p->buffer[p->cursor] = toupper(c);
        p->cursor++;
    }
    p->inside = 0;
    p->buffer[p->cursor] = '\0';
    p->cursor++;
    return &(p->buffer[p->first]);
}

// It ends the use of p
void endWordGen(wordgen *p) {
    fclose(p->source);
}

int main(int argc, char *argv[]) {
    if (argc != 2) {
	printf("usage: %s fulename\n", argv[0]);
	return 0;
    }
    wordgen theSource;
    if (initWordGen(argv[1], &theSource) != 0) {
	printf("Unable to initialize our source\n");
	return 0;
    }
    while (hasNext(&theSource)) {
	char *word = next(&theSource);
	printf("%s\n", word);
    }
    endWordGen(&theSource);
    return 0;
}

