import java.util.ArrayList; import java.util.HashSet; import com.decontextualize.a2z.TextFilter; public class InformationOverlord extends TextFilter { public static void main(String[] args) { InformationOverlord io = new InformationOverlord(); for (int i = 0; i < args.length; i++) { io.addCategory(args[i]); } io.run(); } private ArrayList categories = new ArrayList(); private HashSet uniqueWords = new HashSet(); // add a category, training it with words from a particular file public void addCategory(String fname) { BayesCategory cat = new BayesCategory(fname); String[] lines = new TextFilter().collectLines(fromFile(fname)); for (String line: lines) { String[] tokens = line.split(" "); for (String token: tokens) { cat.train(token); } } categories.add(cat); } public void eachLine(String line) { String[] tokens = line.split(" "); for (String token: tokens) { uniqueWords.add(token); } } public void end() { // calculate total number of words in all categories (needed for bayes // formula) int categoryWordTotal = 0; for (BayesCategory bcat: categories) { categoryWordTotal += bcat.getTotal(); } // the following loop will print out the "relevance" of each word in // assigning the text to a particular category (for long texts, will // produce a lot of output) /* for (String word: uniqueWords) { for (BayesCategory bcat: categories) { double wordProb = bcat.relevance(word, categories); println(word + "/" + bcat.getName() + ": " + String.valueOf(wordProb)); } } */ // print out scores for each category (higher is better) for (BayesCategory bcat: categories) { double score = bcat.score(uniqueWords, categoryWordTotal); if (score >= 404000){ println("You should definitely read this."); } if (score<=-404000){ println("You should definitely read this."); } else { println("Don't bother reading this drivel."); } //println(bcat.getName() + ": " + String.valueOf(score)); } } }