#include "wordStat.h"
/*Takes an array of wordNodes, and a pointer to a string array to add to the hashmap as its paramters
Returns 1 if it was a brand new word,
returns -1 if a variation of the word already existed in the Hashmap*/
int put(wordNode *wordHash[], char *word){
int i;
int hashcode = 0;
int sumOfChars = 0;
wordNode *newWord = malloc(sizeof(wordNode));
memset(newWord, 0, sizeof(wordNode));
wordNode *curr;
wordNode *prev;
strcpy(newWord->word, word);
/*Generate hashcode*/
for(i = 0; i < strlen(word); i++){
int asciiValue = word[i];
sumOfChars = sumOfChars + asciiValue;
}
hashcode = sumOfChars % HASH_MAP_SIZE;
curr = wordHash[hashcode];
/*If there is nothing currently occupying wordHash[hashcode] in the hashmap array*/
if (curr == NULL) {
wordHash[hashcode] = newWord;
newWord->occurrences = 1;
newWord->numVariations = 1;
newWord->hashcode = hashcode;
return 1;
}
/*Until you reach the end of the linked list*/
while (curr != NULL) {
/*Check if there is already a variation of the word in the linked list.*/
if (equalsIgnoreCase(word, curr->word) > 0) {
wordNode *currVar = curr;
curr->occurrences++;
int foundVariation = 0;
while(currVar != NULL){
/*If the variation already exists*/
if (strcmp( currVar->word , word) == 0) {
/*set flag to found*/
foundVariation = 1;
break;
}
currVar = currVar->nextVar;
}
/*If word is a new variation, increase the number of variations for current wordNode*/
if (foundVariation != 1) {
newWord->nextVar = curr->nextVar;
curr-> nextVar = newWord;
curr->numVariations++;
newWord->hashcode = hashcode;
}
else
free(newWord);
return -1;
}
else {
prev = curr;
curr = curr->nextWord;
}
}
/*No variation of word was found*/
prev->nextWord = newWord;
newWord->occurrences = 1;
newWord->numVariations = 1;
newWord->hashcode = hashcode;
return 1;
}
#include "wordStat.h"
int main (int argc, char * const argv[]) {
/*data file from which the input will be taken*/
FILE *dataFile;
/*Used to keep track of the number of different words in the input file*/
int numWords;
/*variable used to parse the data file*/
char c;
/*used to prompt user*/
char file[100] = {"\0"};
/*used to seperate words in the data file into words*/
char word[MAX_LENGTH];
/*Hashmap array to which the words will be hashed. (Size is prime)*/
wordNode *wordHash[HASH_MAP_SIZE];
int i;
/*Initialize hashmap*/
for (i = 0; i < HASH_MAP_SIZE; i++) {
wordHash[i] = NULL;
}
/*pointer the word variable*/
char *wordPtr = word;
/*counter for the current number of characters in the word being created*/
int charCount = 0;
/*flag used to determine whether or not a word is in progress*/
int wordFlag = 0;
printf("Enter the file path in its entirity: ");
scanf("%s", file);
printf("%s\n", file);
if((dataFile = fopen(file, "r")) == NULL){
printf("Could not open file.\n");
exit(-1);
}
/* parse the file*/
while ( ( c = fgetc(dataFile)) != EOF ) {
if (isLetter(c) > 0 || isDigit(c) > 0) {
/* set wordFlag to indicate a word is in progress*/
wordFlag = 1;
/* add the letter to the current word*/
wordPtr[charCount] = c;
/* set the index after the last character of the word to the terminating character*/
wordPtr[charCount + 1] = '\0';
/* Increment the count of the character in the word*/
charCount++;
}
else {
/* Set wordFlag to indicate a word is not in progress*/
if (wordFlag > 0) {
/*printf("%s\n", word);*/
if(put(wordHash, word) == 1)
numWords++;
}
wordFlag = -1;
/* Reset charCount in order to be able to build a new word*/
charCount = 0;
}
}
/* Don't forget about the last word in the file!*/
if (wordFlag > 0) {
/*printf("%s\n", word);*/
if(put(wordHash, word) == 1)
numWords++;
}
printf("numWords = %d\n", numWords);
/* Array in which all different words(completely different--not different variations)
will be stored in order to be sorted*/
char *wordsToSort[numWords];
/* Fill wordsToSortArray with all of the words that need to be sorted */
fillWordsToSort(wordHash, wordsToSort);
printHashmap(wordHash);
/* Sort the array of words */
sortWords(wordHash, wordsToSort, numWords);
if (numWords == 0) {
printf("File is empty.");
}
else
printLexico(wordHash, wordsToSort, numWords);
fclose(dataFile);
return 0;
}
#ifndef WORDSTAT_H
#define WORDSTAT_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_LENGTH 1024 /*Maximum length of any word*/
#define HASH_MAP_SIZE 104729/*Size of the hashmap array (Prime).*/
/************************
*** STRUCTS ***
************************/
typedef struct wordNode{
/*Hashcode to get word*/
int hashcode;
/*The number of times this particular word occurs in the data file.*/
int occurrences;
/*The number of variations of this particular word in the data file. (ie. word and WORD)*/
int numVariations;
/*key*/
char word[MAX_LENGTH];
/*Pointer to the next variation of this particular word (Can be the same variation)*/
struct wordNode *nextVar;
/*Pointer to the next word in the linked list. Next word will not be any variation of it's predecessor.*/
struct wordNode *nextWord;
} wordNode;
/**************************
*** FUNCTION SIGNATURES ***
**************************/
int equalsIgnoreCase(char* str1, char* str2);
void addLetterToWord(char* wordPtr, char c, int charCount);
int isLetter(char c);
int isDigit(char c);
int put(wordNode *wordHash[], char *word);
wordNode* get(wordNode *wordHash[], char *word);
void printHashmap(wordNode *wordHash[]);
void sortWords(wordNode *wordHash[], char *wordsToSort[], int numWords);
void fillWordsToSort(wordNode *wordHash[], char *wordsToSort[]);
void printLexico(wordNode *wordHash[], char *wordsToSort[], int numWords);
int compareIgnoreCase(const void* item1, const void* item2);
#endif
#include "wordStat.h"
/*Prints the unsorted wordNodes in wordHash*/
void printHashmap(wordNode *wordHash[]){
int i;
for (i = 0; i < HASH_MAP_SIZE; i++) {
wordNode *curr = wordHash[i];
while (curr != NULL) {
printf("Word: %s\n", curr->word);
printf("number of variations: %d\n", curr->numVariations);
printf("number of occurences: %d\n\n", curr->occurrences);
curr = curr->nextWord;
}
}
}

New Topic/Question
Reply



MultiQuote



|