//An instance of this class maintains information on every word that occurs //in a text file: the word, its frequency, the line-numbers where it occurs // import java.util.*; import java.io.*; public class Concordance { //Instance Variables private String fileName; private ArrayList table; //Constructor public Concordance(String fileName) { //Initialize Instance Variables this.fileName = fileName; this.table = new ArrayList(); //Construct this Concordance by scanning words from the text file try { //Open the text file, and create a scanner over the file Scanner source = new Scanner( new FileReader(fileName) ); //Scan words from the file to create the concordance this.scanFile( source ); } catch (FileNotFoundException fnfe) { throw new RuntimeException( fnfe.getMessage() ); } } //Constructor Helper //Update this concordance table with each word from the text file private void scanFile(Scanner source) { int lineNumber = 0; //Scan all lines from the text file while ( source.hasNext() ) { //Input and count the next line from the file //Create a Scanner to process the input line lineNumber++; String input = source.nextLine().trim(); Scanner stream = new Scanner( input ); //Scan all "words" from the current input line while ( stream.hasNext() ) { //Extract, standardize the next "word" String word = prune(stream.next()).toLowerCase() ; //Update the concordance with the "word" if (word.length() > 0) this.update( word, lineNumber ); } } } //Accessor // @return: the name of the text file of this Concordance public String getFileName() { return this.fileName; } //Accessor // @return: the number of entries/words recorded in this Concordance public int getTableSize() { return this.table.size(); } //Accessor // @return: a list of all (unique) words recorded in this Concordance public String[] getWords() { String[] words = new String[this.table.size()]; for (int i = 0; i < this.table.size(); i++) words[i] = this.table.get(i).getWord(); return words; } //File Query Method // @return: the total number of words processed in constructing this Concordance public int numberOfWordsInFile() { int count = 0; for (WordRecord record : this.table) count += record.getFrequency(); return count; } //File Query Method // @return: the total number of text lines scanned in constructing this Concordance public int numberOfLinesInFile() { int maxLineNumber = 0; for (WordRecord record : this.table) { int hiIndex = record.getLineNumbers().length - 1; if (record.getLineNumbers()[hiIndex] > maxLineNumber) maxLineNumber = record.getLineNumbers()[hiIndex]; } return maxLineNumber; } //Word Query Method // @param word: a word to be searched in this Concordance // @return: true iff parameter word is recorded in an entry of this Concordance public boolean contains(String word) { return this.lookUp( word ) != null; } //Word Query Method // @param word: a word to be searched in this Concordance // @return: the number of times (parameter) word occurs in the text file of this Concordance public int frequency(String word) { WordRecord record = this.lookUp( word ); if (record != null) return record.getFrequency(); return 0; } //Word Query Method // @param word: a word to be searched in this Concordance // @return: a list of the line-numbers of the text file of this Concordance where (parameter) word occurs public int[] lineNumbers(String word) { WordRecord record = this.lookUp( word ); if (record != null) return record.getLineNumbers(); return null; } //Override public String toString() { String image = "CONCORDANCE File: " + this.fileName; for (WordRecord entry : this.table) image += "\n" + entry; return image.trim(); } //Eliminate leading and trailing white-space and punction symbols // @param word: a word to be "pruned" of white-space and punctuation // @return: the "pruned" word, meeting the following post-conditions // 2) a substring of the original word // 1) no LEADING or TRAILING white-space // 2) no LEADING or TRAILING punctuation symbols private static String prune(String word) { word = word.trim(); if ( word.length() == 0 ) return word; if ( isPunctuationSymbol(word.charAt(0)) ) return prune( word.substring(1) ); int endIndex = word.length() - 1 ; if ( isPunctuationSymbol(word.charAt(endIndex)) ) return prune( word.substring(0, endIndex) ); return word; } private static boolean isPunctuationSymbol(char ch) { return ".,;:!?()'\"".indexOf(ch) != -1; } //Helper Method - Update a concordance with a word from the text file //If the word is already in the concordance it's word-record is updated // with the line-number, otherwise a new word-record is created for the // new word and added to the concordance private void update(String word, int lineNumber) { WordRecord record = new WordRecord(word, lineNumber); //Locate the word in the Concordance table int index = this.table.size() - 1; while (index >= 0 && record.compareTo(this.table.get(index)) < 0) index--; //Update an existing WrodRecord, or insert a new WordRecord if (index < 0 || record.compareTo(this.table.get(index)) > 0) this.table.add(index + 1, record); //Insert a new WordRecord else this.table.get(index).update(lineNumber); //Update an existing WordRecord } //Helper Method - Find a given word in this Concordance // @param: a word to be searched in this Concordance // @return: a reference to the WordRecord for the given word in this Concordance // null if the parameter word is not recorded in this Concordance private WordRecord lookUp(String word) { WordRecord target = new WordRecord(prune(word).toLowerCase(), 1); int loIndex = 0; int hiIndex = this.table.size() - 1; while (loIndex <= hiIndex) { int middle = (loIndex + hiIndex)/2; int comparison = this.table.get(middle).compareTo(target); if ( comparison == 0 ) return this.table.get(middle); if ( comparison < 0 ) loIndex = middle + 1; else hiIndex = middle - 1; } return null; } }