The procedure is as follows:
Import java.io. *; import java.net.URL; import java.util.StringTokenizer;
Public class htmlparse {// *************** Analysis news article start *************** Private static string url = "http: // Sports.Tom.com/1019/1042/2005313-552400.html";//(NEW JSPIDER ()). getnewslink ();
// http connection // Get HTML code, then return to the acquired HTML line public static string gethtml () {buffredreader reader = null; string tit = ""; string Title = ""; string time = ""; string content = "" "" "; String temp =" "; try {url url = new url (URL); // crete the url reader = new buffredreader (new inputReader (Url.OpenStream ())); line = reader.readline (); While (line! = null) {if (line.indexof ("")! = -1) {title = line;} if (line.indexof (" ")! = -1) {Time = line;} if (line.indexof (" ")! = -1) {content = line;} system.out.println (line) Line = reader.readline ();} temp = title time content; //system.out.println (t);} catch (exception e) {system.err.println (e);} finally {Try { Reader.close ();} catCH (Exception E) {}} Return Temp;
// Save the acquired HTML to the file public static void savetohtml (String line) {Try {// Get HTML, generate a file PrintWriter newfile = new printwriter (New FileWriter (New FileWriter (GetName ()))); // Write the acquired html newfile.print (line); newfile.close ();} catch (ooException e) {E.PrintStackTrace ();}}
/ / Get the file name to save public static string getName () {StringTokenizer st = new StringTokenizer (URL, "/"); int CT = st.countToKens () - 1; for (int i = 0; i If the URL address is http://sports.tom.com/1019/1042/2005313-552400.html, then print is garbled, if the URL is changed to http://www.tcren.org/bbs ,print comes out The right is correct, change the URL to http://sports.tom.com/1019/1042/2005313-552400.html, you can print correct, but if you change it to http://sports.tom.com / 1019/1094/2005119-512557.html, it is garbled If Reader = New BufferedReader (NRL.OpenStream ())); changed to Reader = New BufferedReader (Url.OpenStream (), "GB231"); Ah ~~~ What is going on? ?