/** * file : Speller.java */ import java.io.*; // for PrintWriter // import our classes import List.*; import HashTable.*; import Timer.*; public class Speller { static final String dictFilename = "adverbs.in"; static final String docFilename = "somewords.in"; static final String outFilename = "timings.dat"; static FileWordList dictionary; // dictionary of Words static FileWordList document; // document to check static PrintWriter timings; // timing results /** * post : Runs a hashing algorithm race for a rudimentary * spell checker. */ public static void main( String[] argv ) throws FileNotFoundException, IOException { // open various files dictionary = new FileWordList( dictFilename ); document = new FileWordList( docFilename ); timings = new PrintWriter( new FileOutputStream( outFilename ) ); doTiming(); // clean-up timings.close(); } /** * pre : dictionary, document, and timings opened * post : Runs a standard race on several data structures. * Outputs timing results to the file given by * `timings'. */ private static void doTiming() throws FileNotFoundException, IOException { int cnt; int lookupMultiplier = 10; // how many more lookups than adds List lst; HashTable ht = null; // print a header for output file timings.println( "# of\t# of\tbuild\tfind" ); timings.println( "adds\tfinds\t(msec)\t(msec)" ); timings.println( "----\t-----\t-----------" ); for (cnt = 100; cnt <= 700; cnt +=150) { ht = new HashTable( 16 ); // Record the count timings.print( cnt + "\t" + cnt*lookupMultiplier + "\t" ); System.out.println( "Hash table results (" + cnt + "): " ); runRace( ht, cnt, lookupMultiplier, true ); timings.println(""); } // print a histogram of the distribution to the output file if( ht != null ) { timings.println( "\nHash table distribution of keys\n" + ht.toStringDistrib() ); } } /** * pre : dsize and multiplier are non-negative * three I/O files valid * * post : Runs a spelling check by inserting the first `dsize' * words from `dictionary' into `dict'. Then spell * checks the first `dsize' words in `document' * `multiplier' times. If either file has fewer words * than indicated, then the check is run using the * number of words in the file. The resulting times * are recorded in `timings'. If `showErrors' is true, * the misspelled words are printed on the standard * output. */ private static void runRace( HashTable dict, int dsize, int multiplier, boolean showErrors ) throws FileNotFoundException, IOException { Timer stopWatch = new Timer(); int cnt; long elapse; // build dictionary stopWatch.start(); buildDictionary( dict, dsize ); elapse = stopWatch.stop(); System.out.println( " Time to build dictionary: " + elapse + " msec" ); timings.print( elapse + "\t" ); // Read in document: // I/O is expensive, so rather than counting that in the // running time, we'll save the words in a list and print // them out when we're done. List spellWordList = new List(); cnt = 0; while( cnt < dsize && !document.isEOF() ) { Word checkWord = document.nextWord(); if( document.isEOF() ) break; spellWordList.insert( checkWord ); cnt++; } // do the look-ups stopWatch.reset(); stopWatch.start(); List badWordList = doSpellCheck( dict, spellWordList, multiplier ); elapse = stopWatch.stop(); System.out.println( " Time to spell check: " + elapse + " msec" ); timings.println( elapse + "\t" ); // print mistakes caught if (showErrors) { printMistakes( badWordList ); } // Reset to allow then next contestant to use the streams. dictionary.reset(); document.reset(); } /** * pre : dict is a valid HashTable (non-null), dsize >= 0 * post : Reads up to dsize words from the file `dictionary' * and stores them as (Word, length of Word) pairs in * `dict'. */ private static void buildDictionary( HashTable dict, int dsize ) throws IOException { int cnt = 0; while( cnt < dsize && !dictionary.isEOF() ) { Word newWord = dictionary.nextWord(); if( dictionary.isEOF() ) break; dict.insert( newWord, new Integer(newWord.length()) ); cnt++; } } /** * pre : spellWordList is a List of Words to check * dict is the dictionary against which to check * multiplier == number of times to spell check * * post : Performs the spell check `multiplier' times, and * returns a List of the misspelled words. Returns * null <==> multiplier <= 0 */ private static List doSpellCheck( HashTable dict, List spellWordList, int multiplier ) { List badWordList = null; for (int i = 0; i < multiplier-1; i++) { badWordList = new List(); ListEnum words = spellWordList.getElements(); while( words.hasMoreElements() ) { Word nextWord = (Word)words.nextElement(); Integer foundData = (Integer)dict.find( nextWord ); if( foundData == null ) badWordList.insert( nextWord ); } } return badWordList; } private static void printMistakes( List badWordList ) { System.out.println( " Misspellings:" ); ListEnum badWords = badWordList.getElements(); while( badWords.hasMoreElements() ) { Word nextWord = (Word)badWords.nextElement(); System.out.println( "\t" + nextWord ); } } } // eof