#include #include #define MAX_LINE_LEN 1024 #define HASH_TABLE_SIZE 100007 #define SUFFIX_LEN 5 #define MIN_OCCURENCES 500 struct HashNode { char *suffix; int listSize; struct HashNode *next; }; unsigned int hash( const char *str ) { unsigned int result = 0; int i; for( i = 0; str[ i ] != '\0'; i++ ) result = result * 31 + str[ i ]; return result % HASH_TABLE_SIZE; } void processWord( char *buffer, struct HashNode *ht[ ] ) { int listNumber; struct HashNode *hp; struct HashNode *hentry; int wordLen = strlen( buffer ); char *rep; if( wordLen < SUFFIX_LEN ) return; /* point to SUFFIX_LEN characters before end */ /* don't strdup yet */ rep = buffer + strlen( buffer ) - SUFFIX_LEN; hentry = ht[ listNumber = hash( rep ) ]; /* Search to see if already in map */ for( hp = hentry; hp != NULL; hp = hp->next ) if( strcmp( hp->suffix, rep ) == 0 ) { hp->listSize++; return; } /* No match; this a new suffix */ /* Will need strdup now */ hp = (struct HashNode *) malloc( sizeof( struct HashNode ) ); hp->suffix = strdup( rep ); hp->listSize = 1; hp->next = hentry; ht[ listNumber ] = hp; } void printEntry( struct HashNode *hp ) { printf( "%5s ", hp->suffix ); printf( "occurs %4d times\n", hp->listSize ); } int main( ) { FILE *fp; char buffer[ MAX_LINE_LEN ]; struct HashNode *ht[ HASH_TABLE_SIZE ]; struct HashNode *hp; int i; if( ( fp = fopen( "dict.txt", "r" ) ) == NULL ) { fprintf( stderr, "Error opening dictionary\n" ); return 1; } for( i = 0; i < HASH_TABLE_SIZE; i++ ) ht[ i ] = NULL; while( fscanf( fp, "%s", buffer ) == 1 ) processWord( buffer, ht ); /* Output sufficiently common suffixes */ for( i = 0; i < HASH_TABLE_SIZE; i++ ) for( hp = ht[ i ]; hp != NULL; hp = hp->next ) if( hp->listSize >= MIN_OCCURENCES ) printEntry( hp ); return 0; }