资源简介
C#网页爬虫抓取国家地区省、市、区/县三级。需要更多级稍微改动即可实现。抓取国家统计局统计用区划和城乡划分代码2018年数据。数据截止于2018年10月31日.
代码片段和文件信息
using System;
using System.ComponentModel.DataAnnotations;
using System.ComponentModel.DataAnnotations.Schema;
using System.Net;
using System.Text;
using System.IO;
using HtmlAgilityPack;
namespace Ioc.Web.Common
{
public class Address
{
public Guid Id { get; set; }
public Guid ParentId { get; set; }
public string AreaCode { get; set; }
public string AreaName { get; set; }
public int AreaLevel { get; set; }
public int DisplayOrder { get; set; }
public int Deleted { get; set; }
}
public class GetAddress
{
private const string urlStr = “http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2018/“;
///
/// 下载页面
///
///
///
///
private static string DownloadHtml(string url Encoding encod)
{
string html = string.Empty;
try
{
//设置请求参数
HttpWebRequest request = HttpWebRequest.Create(url) as HttpWebRequest;
request.Timeout = 10 * 1000;//10s超时
request.ContentType = “text/html;charset=utf-8“;
request.UserAgent = “Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/50.0.2661.102 Safari/537.36“;
//获取结果
using (HttpWebResponse resp = request.GetResponse() as HttpWebResponse)
{
if (resp.StatusCode == HttpStatusCode.OK)
{
try
{
StreamReader sr = new StreamReader(resp.GetResponseStream() encod);
html = sr.ReadToEnd();
sr.Close();
}
catch
{ }
}
}
}
catch
{
}
return html;
}
///
/// 读取页面的地址列表
///
///
///
///
///
private static HtmlNodeCollection GetAddressList(string url string classStr string level = ““)
{
string HtmlStr = DownloadHtml(url Encoding.Default);
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(HtmlStr);
string liPath = “//tr[@class=‘“ + classStr + “‘]/td“ + level;
HtmlNodeCollection goodsNodeCollection = doc.DocumentNode.SelectNodes(liPath);
return goodsNodeCollection;
}
public static void GraspAddress()
{
var provinceUrl = urlStr + “index.html“;
- 上一篇:C#多元线性回归算法
- 下一篇:C#新中新DKQ-116D二次开发代码
评论
共有 条评论