/ *
This Exercise is in the field of bibliometric (Words and text) analyysis. You will be supplied with A Text
File, Which Consists of Several Paragraphs of English Text. Your Task is to Write a Program Which Will
Analyze the text, And Output A Range of Statistics About The Text.
Your Program Should Do The Following:
• A List of all the Words That Occur, in Alphabetical ORDER. You Must Not Print The Same Word Twice.
• Print Out The 20 MOST Common Words in The Text, TOGETHER WITH The Number of Instances of Each Word,
With the Most Common Word At the top.
* /
//
// readxt.cpp:.
// by mythma
// List the words in the article in the form of a standard alphabet, can not be repeated
/ / And 20 prints with the highest frequency
// compiler: g
// Cannot compile this procedure with VC6
//
#include
#include
#include
#include
#include
#include
Using namespace std;
Const string str_interval = "/ n ------------------------------------------- ---------------- ";
Map
Vector
Bool Addwordtolist (String Strword)
{
IF (strWord.empty ())
Return False;
String straTemp = strword;
// Upper to Lower
For (int i = 0; i { Strtemp [I] = TOLOWER (Strtemp [i]); } // Remove Head Puncture and Number While (strTemp [0] <'a' || strTemp [0]> 'Z') { IF (strTemp.size ()> 1) Strtemp = startemp.substr (1, strTemp.size () - 1); Else Return False; } // Remove Tail puncture and number While (strTemp [stratemp.size () - 1] <'a' || strTemp [startemp.size () - 1]> 'z' { IF (strTemp.size ()> 1) Strtemp = startemp.substr (0, strTemp.size () - 2); Else Return False; } Map // Add to map if exist IF (it == gmap.end ()) GMap.Insert (Map // increase if not exist Else (* it). Second; Return True; } Void Outputwordslist () { Cout << str_interval << "/ n-- The total number of words in the file is:" << gmap.size () << ", arrangements according to the letters" << Str_Interval << endl; INT n = 0; For (Map { N; Cout.width (15); Cout.Flags (ios :: left); Cout << IT-> first.c_str (); IF (5 == n) { Cout << Endl; n = 0; } } Cout << str_interval << endl; } Bool CMP (Const Pair { Return p1.second> p2.second; } void outputcount () { Vector Sort (wd.begin (), wd.end (), cmp); Cout << str_interval << "/ n - There are several words with the most frequent frequency:" << Str_Interval; INT i = 0; For (Vector It! = wd.end () && i <20; IT, i) { Cout.width (15); Cout.Flags (ios :: left); Cout << Endl << IT-> first.c_str () << "----" << IT-> second; } Cout << str_interval; } Int main (int Argc, char * argv []) { Char * strpath; IF (argc == 2) StrPath = argv [1]; Else StrPath = "c: //words.txt"; // read file IFStream Infile (STRPATH); While (! infile.eof ()) { String strword; Infile >> strword; AddWordtolist (strword); Outputwordslist (); Outputcount (); Return 0; }