Package treeroot.util.Wordcount;
/ ** * The class is the elMent of the word set return by wordcount. * The instance have the word value and the apperature Times. * Note strong>: the word ignore the case, * so word "Hello "," Hello "," Hello "area * @Author treeroot * @version 1.0, 04/12/06 * @see wordcount * / public class word {private string value; / ** * construct a Word Object, HAS The Count 1. * / Public Word (String Value) {this.Value = Value.tolowerCase ();} private int count = 1; // this method is only invoked by WordCount Class Protected void Increase () { Count ;} / ** * @return the Word as the lower case. * / public string getWord () {return value;} / ** * @return the apperaness Times of this word. * / public int getCount () {Return Count;} / ** * @return if the word Was the Same ignore case, return true. * / public boolean equals (object o) {Return (o instanceof word) && ((Word) o) .Value.equals Value);} / ** * @Return the hashcode of the word. * / public int.com () { return value.hashCode ();}} package treeroot.util.wordcount; / ** * WordCount provides a static method to count the words of a text * You can give a hyphens that the words can use to connect letters, * or. IT Will Be Use The default hyphens. * The Dafault Hyphens Are '-', '_', '', But The First Letter Must Be * a English Letter (AZ, AZ). * SO: AB, AB, IT'S Are Words, But the --ab, _ab, 'as area it. * You can use a comparator to sort the set, by Dictionary or frequency, * if you don't give a compound, Dicitonary Comparator Will Be buy. * * /
Import java.util.map; import java.util.hashmap; import java.util.treeset; import java.util.comparator; import java.util.collections;
public class WordCount {// the default hyphen collection private static String regex = "// -_ '";. / ** * return the words as a Set by default comparator and hyphens * @see #getWordCount (String, String, Comparator ) * / public static Set getWordCount (String text) {return getWordCount (text, regex, WordCount.DICTIONARY_ORDER);} / ** * return the words as a Set by the default comparator * @see #getWordCount (String, String, Comparator ) * / public static Set getWordCount (String text, String regex) {return getWordCount (text, regex, WordCount.DICTIONARY_ORDER);} / ** * return the words as a Set by the default hyphens * @see #getWordConut (String, String, Comparer) * / public static set getWordcount (String text, comparator order) {return getWordcount (Text, regex, order);} / ** * returnid, the word, the word, the word, the words Lower case. * @Param text the English text you want to split. * @Param regex The Hyphens That The Word Can Use. * @Param order the order of the Set returned by * @return the word Set that the text contains * / public static Set getWordCount (String text, String regex, Comparator order) {Map map = new HashMap ();.. String split1 = "[^ A-ZA-Z" Regex "] "; string split2 = "[^ A-ZA-Z] " Regex "[^ a-za-z] *"; string split3 = "[ ^ a-za-z] * " regex " [^ a-za-z] "; string reg =" (" split2 ") | (" split1 ") "; String [] Words = text.split (reg); for (int i = 0; i