// David Ferris - Capstone 2014 - Hash.h // Last Updated 5/10/2014 #ifndef HASH_H #define HASH_H #include #include #include #include "Word.h" using namespace std; const double FACTOR = 26; const int DEFAULT_SIZE = 10; class Hash { public: //------------Construction/Destruction------------ Hash(); Hash(int s); virtual ~Hash(); //---------------Hashing Arithmetic--------------- int HashFcn(string s); //Determines array index //---------------Accessors/Mutators--------------- int IsPresent(string text, Word * ptr, int size); void Add(Word wrd); int GetTotalWords(); void GetWordData(string * text, int * occurrances); Word * FindWordByText(string text); // Returns a pointer to the word with the given text void Dump(); private: Word ** words; int * array_sizes; // Contains sizes of each of the arrays pointed to by the pointers in the words array int hash_size; }; Hash::Hash() { // Using default SIZE of 10 for arrays words = new Word * [DEFAULT_SIZE]; array_sizes = new int [DEFAULT_SIZE]; hash_size = DEFAULT_SIZE; // Sets all pointers to null for(int i = 0; i < DEFAULT_SIZE; i++) { words[i] = 0; array_sizes[i] = 0; } } Hash::Hash(int s) { hash_size = s; // Using given hash_size for words and array_size arrays words = new Word * [hash_size]; array_sizes = new int [hash_size]; // Set all pointers to null in both arrays for(int i = 0; i < hash_size; i++) { words[i] = 0; array_sizes[i] = 0; } } Hash::~Hash() { // (a) First, delete each of the arrays pointed to by the pointers contained in the words array. // The Words contained in these arrays and their subarrays will be deleted using the Word class destructor // (b) The words and array_sizes arrays are dynamic, and thus must also be deleted. // Pointers are also set to null, just to be safe for(int i = 0; i < hash_size; i++) { // (a) Delete subarrays if(words[i] != 0) delete [] words[i]; } // (b) delete [] words; words = 0; delete [] array_sizes; array_sizes = 0; } /************************************************************ --------------------------HashFcn---------------------------- The HashFcn will be used to determine at which index to insert each word into the hash table. The value is determined using a calculation based upon the chars that make up each word's text string. ************************************************************/ int Hash::HashFcn(string s) { int total = 0; // Uses the sum of the int values of each char in the Word for(int i = 0; i < s.length(); i++) total += tolower(s[i]); //For now, simply adding int values together //TESTING //cout << s << ": " << total << " % " << hash_size << " = " << total % hash_size << endl; // Returns an int between 0 and hash_size - 1 return total % hash_size; } /************************************************************ -------------------------IsPresent--------------------------- The function is given a string and a pointer to the index of the words array that was generated by the HashFcn. It will then check to see if the string is already present in the array. If it is not, -1 will be returned. If it is, the corresponding index will be returned. ************************************************************/ int Hash::IsPresent(string text, Word * ptr, int size) { for(int i = 0; i < size; i++) { if(text == ptr[i].GetText()) return i; } return -1; } /************************************************************ -----------------------------Add----------------------------- (a) Uses the HashFcn to find the index at which the Word should be inserted. Uses Word * subarray to point to the subarray at this index. size is used to contain the size of this subarray. (b) The IsPresent function is used to determine whether the given word already exists in the subarray. If it returns -1 to presence, then the Word was not found in the subarray. Any other result indicates the index at which the Word was found in the subarray. (c) If the Word was not found in the subarray, it must be added. This is done by dynamically allocating one more array index and inserting the given Word. (d) If the Word was found in the subarray, then we must find the line # (of the sentences file) of the sentence in which the Word appears. Since the Word has just been constructed, this will be the first (and only) entry in the appearances array. This index is then added to the appearances array of the existing Word ************************************************************/ void Hash::Add(Word wrd) { // (a) int index = HashFcn(wrd.GetText()); Word * subarray = words[index]; //Used for easy access to data int size = array_sizes[index]; // (b) int presence = IsPresent(wrd.GetText(), subarray, size); // (c) if(presence == -1) { // Create a temp array to hold the new array contents Word * temp = new Word [size + 1]; // Copy array contents to the new array for(int i = 0; i < size; i++) temp[i] = subarray[i]; // Add the new Word to the end of the new array temp[size] = wrd; // Delete the old array and point subarray to null delete [] subarray; // Point words[index] to the new array, and temp to null words[index] = temp; temp = 0; // Increment the counter in array_sizes array_sizes[index]++; } // (d) else { // Contains the sentence index at which the Word appears (appearances[0]) int appears_in_sentence = wrd.GetFirstAppearanceIndex(); if(appears_in_sentence == -1) cout << "ERROR: WORD HAS NO APPEARANCES" << endl; else { // arr[presence] is the Word that has been encountered again. // This will add another sentence index to its appearances array. subarray[presence].AddSentenceIndex(appears_in_sentence); } } } /************************************************************ ------------------------GetTotalWords------------------------ This function's purpose is to return the total number of words contained in the Hash. It adds together each of the indices of the array_sizes array ************************************************************/ int Hash::GetTotalWords() { int total = 0; for(int i = 0; i < hash_size; i++) total += array_sizes[i]; return total; } /************************************************************ ----------------------GetWordData----------------------- Receives two pointers and points them to arrays containing data about the contents of the Hash. One array will contain the text of each word, and the second array will contain the number of times that each word occurred. The two arrays will be parallel. ************************************************************/ void Hash::GetWordData(string * text, int * occurrances) { const int N = GetTotalWords(); Word * subarray; int insertion_index = 0; for(int i = 0; i < hash_size; i++) { if(array_sizes[i] != 0) { subarray = words[i]; for(int j = 0; j < array_sizes[i]; j++) { text[insertion_index] = subarray[j].GetText(); occurrances[insertion_index] = subarray[j].GetAppearancesCount(); insertion_index++; } } } } /************************************************************ -----------------------FindWordByText------------------------ Given a string, this function will return a pointer to the word with the matching text. Returns a null and displays a message if the word is not found. ************************************************************/ Word * Hash::FindWordByText(string text) { // Begins by finding the appropriate index in the Hash int index = HashFcn(text); Word * Subarray = words[index]; int size = array_sizes[index]; int presence = IsPresent(text, Subarray, size); if(presence == -1) { cout << "ERROR: A word with the given text was not found." << endl; return 0; } else return &(Subarray[presence]); } void Hash::Dump() { Word * subarray; // Provides a little padding for the table cout << endl; for(int i = 0; i < hash_size; i++) { cout << "**************** Hash Index #" << i << " - " << array_sizes[i] << " Words ****************" << endl; // If the subarray is not empty if(array_sizes[i] != 0) { subarray = words[i]; for(int j = 0; j < array_sizes[i]; j++) { cout << " -------- Word #" << j << " --------" << endl; subarray[j].Dump(); cout << " ----------------------------" << endl; } } else cout << " No words at this Hash Index" << endl; // Provides a little bit of extra space for readability cout << endl; } } #endif