本文实例总结了三种常用的C#网页信息采集方法。分享给大家供大家参考。具体实现方法如下:
一、通过HttpWebResponse 来获取
public static string CheckTeamSiteUrl(string url) { string response = ""; HttpWebResponse httpResponse = null; //assert: user have access to URL try { HttpWebRequest httpRequest = (HttpWebRequest)WebRequest.Create(url); httpRequest.Headers.Set("Pragma", "no-cache"); // request.Headers.Set("KeepAlive", "true"); httpRequest.CookieContainer = new CookieContainer(); httpRequest.Referer = url; httpRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)"; httpRequest.Credentials = System.Net.CredentialCache.DefaultCredentials; httpResponse = (HttpWebResponse)httpRequest.GetResponse(); } catch (Exception ex) { throw new ApplicationException("HTTP 403 Access denied, URL: " + url, ex); } //if here, the URL is correct and the user has access try { string strEncod = httpResponse.ContentType; StreamReader stream; if (strEncod.ToLower().IndexOf("utf") != -1) { stream = new StreamReader(httpResponse.GetResponseStream(), System.Text.Encoding.UTF8); } else { stream = new StreamReader(httpResponse.GetResponseStream(), System.Text.Encoding.Default); } char[] buff = new char[4000]; stream.ReadBlock(buff,0,4000); response = new string(buff); stream.Close(); httpResponse.Close(); } catch (Exception ex) { throw new ApplicationException("HTTP 404 Page not found, URL: " + url, ex); } return response; }
public static string getPage(String url) { WebResponse result = null; string resultstring = ""; try { WebRequest req = WebRequest.Create(url); req.Timeout = 30000; result = req.GetResponse(); Stream ReceiveStream = result.GetResponseStream(); //read the stream into a string //StreamReader sr = new StreamReader(ReceiveStream, System.Text.Encoding.UTF8); string strEncod = result.ContentType; StreamReader sr; if (strEncod.ToLower().IndexOf("utf") != -1) { sr = new StreamReader(ReceiveStream, System.Text.Encoding.UTF8); } else { sr = new StreamReader(ReceiveStream, System.Text.Encoding.Default); } resultstring = sr.ReadToEnd(); js.alert(resultstring); //Console.WriteLine(resultstring); } catch { throw new Exception(); } finally { if (result != null) { result.Close(); } } return resultstring; }
public string get(int length) { try { getEncodeing(); WebClient wb = new WebClient(); Stream response = wb.OpenRead(url); StreamReader reader = new StreamReader(response, this.encoding, true, 256000); char[] a = new char[length]; int i = reader.Read(a,0,length); reader.Close(); return new string(a); } catch (Exception e) { return e.Message; //return null; } } private void getEncodeing() { switch (this.encode) { case "UTF-8": encoding = Encoding.UTF8; break; case "GB2312": encoding = Encoding.GetEncoding("GB2312"); break; case "ASCII": encoding = Encoding.ASCII; break; default: encoding = Encoding.GetEncoding(encode); break; } }
希望本文所述对大家的C#程序设计有所帮助。