Remove the E-mail address and relative link from the page and the absolute link code ...

xiaoxiao2021-03-06  80

private string FetchPage (String url) // take the specified source page URL of the page {String page = "null"; try {WebClient mywc = new WebClient (); using (Stream strm = mywc.OpenRead (url)) {StreamReader sr = New streamreader (STRM); Page = Sr.ReadToend (); strm.close ();}} catch {} return page;} PUBLIC STRING GETLINK (String URL) // Absolute link {String Content = this.FetchPage (URL ); // regex linkRegex = new regex ("HREF // S * = // s * (?: /" (? <1> [^ / "] *) /" | (? <1> // s ) ", Regexoptions.ignorecase); @@" ^/w ((-/w )) **/@/w ((((((@ )/w )**/w $" ; Regex link = new regex (@ "http: // ([/ w-] /.) [/ W-] (/ [/W-./? %&=] *)?", Regexoptions. IgnoreCase; stringbuilder sb = new stringbuilder (); matchcollection emailmatchs = link.matches (content); forward (Match n in emailmatchs) {sb.append (n.tostring ()); sb.append (";");} IF (sb.Length == 0) {sb.append ("null");} return sb.tostring ();} public string getemailaddr (String URL) // Relative link

{

String content = this.FetchPage (URL);

// regex linkRegex = new regex ("HREF / / S * = // s * (?: /" (^ / "] *) /" | (? <1> // s ) ", Regexoptions.ignorecase;

@@ "^/w ((-/w )| (/./w ))**@/w (((/.|-)/w )**/w $";

Regex r = new regex (@ "/ w ((- / w) | (/ w)) * / @ / w ((/. | -) / w ) * /. / W [com | CN | COM .cn | NET | ORG | CC | UK] {1,6} ", regexoptions.ignorecase;

Stringbuilder SB = New StringBuilder ();

Matchcollection emailmatch = r.matches (content);

FOREACH (Match N in emailmatchs)

{

sb.append (n.tostring ());

sb.append (";");

}

IF (sb.length == 0) sb.append ("null");

Return sb.toString ();

}

Some of the code must feel my friends Fan Fan.

转载请注明原文地址:https://www.9cbs.com/read-93515.html

New Post(0)