C#网页信息采集方法汇总

本文实例总结了三种常用的C#网页信息采集方法。分享给大家供大家参考。具体实现方法如下:

一、通过HttpWebResponse 来获取

public static string CheckTeamSiteUrl(string url)  

{  

        string response = "";  

        HttpWebResponse httpResponse = null;  

  

        //assert: user have access to URL   

        try  

        {  

            HttpWebRequest httpRequest = (HttpWebRequest)WebRequest.Create(url);  

            httpRequest.Headers.Set("Pragma", "no-cache");  

  

                // request.Headers.Set("KeepAlive", "true");  

  

                httpRequest.CookieContainer = new CookieContainer();  

  

  

  

                httpRequest.Referer = url;  

  

                httpRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";  

  

                

  

            httpRequest.Credentials = System.Net.CredentialCache.DefaultCredentials;  

            httpResponse = (HttpWebResponse)httpRequest.GetResponse();  

              

        }  

        catch (Exception ex)  

        {  

            throw new ApplicationException("HTTP 403 Access denied, URL: " + url, ex);  

        }  

  

        //if here, the URL is correct and the user has access   

        try  

        {  

            string strEncod = httpResponse.ContentType;  

            StreamReader stream;  

            if (strEncod.ToLower().IndexOf("utf") != -1)  

            {  

                stream = new StreamReader(httpResponse.GetResponseStream(), System.Text.Encoding.UTF8);  

            }  

            else  

            {  

                stream = new StreamReader(httpResponse.GetResponseStream(), System.Text.Encoding.Default);  

            }  

             

            char[] buff = new char[4000];  

            stream.ReadBlock(buff,0,4000);  

            response = new string(buff);  

            stream.Close();  

            httpResponse.Close();  

        }  

        catch (Exception ex)  

        {  

            throw new ApplicationException("HTTP 404 Page not found, URL: " + url, ex);  

        }  

        return response;  

}

 
二、通过 WebResponse 来获取

public static string getPage(String url)  

{

        WebResponse result = null;  

        string resultstring = "";  

        try  

        {  

            WebRequest req = WebRequest.Create(url);  

            req.Timeout = 30000;  

            result = req.GetResponse();  

            Stream ReceiveStream = result.GetResponseStream();  

  

            //read the stream into a string  

            //StreamReader sr = new StreamReader(ReceiveStream, System.Text.Encoding.UTF8);  

            string strEncod = result.ContentType;  

            StreamReader sr;  

            if (strEncod.ToLower().IndexOf("utf") != -1)  

            {  

                sr = new StreamReader(ReceiveStream, System.Text.Encoding.UTF8);  

            }  

            else  

            {  

                sr = new StreamReader(ReceiveStream, System.Text.Encoding.Default);  

            }  

            resultstring = sr.ReadToEnd();  

            js.alert(resultstring);  

            //Console.WriteLine(resultstring);  

        }  

        catch  

        {  

            throw new Exception();  

        }  

        finally  

        {  

            if (result != null)  

            {  

                result.Close();  

            }  

        }  

        return resultstring;  

}

 
三、通过WebClient来获取

public string get(int length)  

{  

        try  

        {  

            getEncodeing();  

            WebClient wb = new WebClient();  

            Stream response = wb.OpenRead(url);  

            StreamReader reader = new StreamReader(response, this.encoding, true, 256000);  

            char[] a = new char[length];  

            int i  = reader.Read(a,0,length);  

            reader.Close();  

            return new string(a);  

        }  

        catch (Exception e)  

        {  

            return e.Message;  

            //return null;  

        }  

}  

private void getEncodeing()  

{

        switch (this.encode)  

        {  

            case "UTF-8": encoding = Encoding.UTF8; break;  

            case "GB2312": encoding = Encoding.GetEncoding("GB2312"); break;  

            case "ASCII": encoding = Encoding.ASCII; break;  

            default: encoding = Encoding.GetEncoding(encode); break;  

        }  

}

希望本文所述对大家的C#程序设计有所帮助。