资源简介
C#蜘蛛爬虫源代码、上传让大家评评,谢谢
代码片段和文件信息
using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.Web;
using System.IO;
using System.Collections;
using System.Text.Regularexpressions;
namespace chinaz
{
class Program
{
static void Main(string[] args)
{
string cookie = null;
using (StreamReader sr = new StreamReader(“cookie.txt“))
{
cookie = sr.ReadToEnd();
sr.Close();
}
//string tmp = SRWebClient.GetPage(“http://bbs.chinaz.com/Members.html?page=1&sort=CreateDate&desc=true&keyword=“ Encoding.UTF8 cookie);
int a = int.Parse(Console.ReadLine());
int b = int.Parse(Console.ReadLine());
string url = Console.ReadLine();
Hashtable hash = new Hashtable();
Encoding encoding = Encoding.GetEncoding(Console.ReadLine());
for (int i = a; i <= b; i++)
{
string html = SRWebClient.GetPage(string.Format(url i) encoding cookie);
//Console.WriteLine(html);
if (html != null && html.Length > 1000)
{
Match m = Regex.Match(html @“\w+([-+.‘]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*“ RegexOptions.Compiled | RegexOptions.IgnoreCase);
while (m != null && m.Value != null && m.Value.Trim() != string.Empty)
{
if (!hash.Contains(m.Value))
{
Console.WriteLine(m.Value);
using (StreamWriter sw = new StreamWriter(“mail.txt“ true))
{
sw.WriteLine(m.Value);
sw.Close();
}
hash.Add(m.Value string.Empty);
}
m = m.NextMatch();
}
}
}
Console.Write(“完成“);
Console.ReadLine();
}
}
public class SRWebClient
{
public CookieCollection cookie;
public SRWebClient()
{
cookie = null;
}
#region 从包含多个 Cookie 的字符串读取到 CookieCollection 集合中
private static void AddCookieWithCookieHead(ref CookieCollection cookieCol string cookieHead string defaultDomain)
{
if (cookieCol == null) cookieCol = new CookieCollection();
if (cookieHead == null) return;
string[] ary = cookieHead.Split(‘;‘);
for (int i = 0; i < ary.Length; i++)
{
Cookie ck = GetCookieFromString(ary[i].Trim() defaultDomain);
if (ck != null)
{
cookieCol.Add(ck);
}
}
}
#endregion
#region 读取某一个 Cookie 字
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 2106 2008-08-11 14:25 chinaz.csproj
文件 16466 2008-08-11 14:39 Program.cs
文件 1350 2008-08-11 14:25 Properties\AssemblyInfo.cs
目录 0 2008-08-11 14:25 Properties
----------- --------- ---------- ----- ----
19922 4
- 上一篇:C#高校教材管理系统
- 下一篇:C# -大地测量学-高斯投影正反算公式程序
评论
共有 条评论