Jsoup网络爬虫

大小: 2.26MB

文件类型: .rar

金币: 2

下载: 0 次

发布日期: 2023-10-27
语言: 其他
标签: Jsoup 网络爬虫

高速下载

资源简介

Jsoup网络爬虫

资源截图

小图大图

代码片段和文件信息

package com.github.webcrawder;

import java.io.IOException;

import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

public class CrawderDemo {
	public static void main（String[] args） throws ClientProtocolException IOException {
		// 创建httpClient客户端
		HttpClient hClient = new DefaultHttpClient（）;
		// 创建http发送请求对象，Httpget
		HttpGet hget = new HttpGet（“http://www.itcast.cn“）;
		// 发送请求
		HttpResponse response = hClient.execute（hget）;
		// 获取网页内容
		String content = EntityUtils.toString（response.getEntity（） “utf-8“）;
		// 使用Jsoup解析网页内容
		Document document = Jsoup.parse（content）;
		// 使用元素选择器选择网页的内容
		Elements elements = document.select（“ul.nav_li a“）;
		System.out.println（elements.text（））;
		System.out.println（elements）;

	}

}

属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----

     文件        859  2017-03-15 17:08  WebCrawler\.classpath

     文件        386  2017-03-15 17:04  WebCrawler\.project

     文件        670  2017-03-15 17:05  WebCrawler\.settings\org.eclipse.jdt.core.prefs

     文件       1887  2017-03-15 19:16  WebCrawler\bin\com\github\webcrawder\CrawderDemo.class

     文件       2679  2017-03-15 18:04  WebCrawler\bin\com\github\webcrawder\HttpClientCrawder.class

     文件       2718  2017-03-15 18:27  WebCrawler\bin\com\github\webcrawder\HttpClientJsoup.class

     文件       1786  2017-03-15 17:31  WebCrawler\bin\com\github\webcrawder\JsoupCrawder.class

     文件       1963  2017-03-15 19:06  WebCrawler\bin\com\github\webcrawder\Jsouptest.class

     文件       1891  2017-03-15 17:47  WebCrawler\bin\com\github\webcrawder\MyHttpClient.class

     文件       1707  2017-03-15 17:32  WebCrawler\bin\com\github\webcrawder\MyJsoup.class

     文件     345035  2017-03-15 17:07  WebCrawler\lib\apache-mime4j-0.6.jar

     文件      58160  2017-03-15 17:07  WebCrawler\lib\commons-codec-1.4.jar

     文件      60841  2017-03-15 17:07  WebCrawler\lib\commons-logging-1.1.1.jar

     文件     291039  2017-03-15 17:07  WebCrawler\lib\httpclient-4.0.1.jar

     文件     172888  2017-03-15 17:07  WebCrawler\lib\httpcore-4.0.1.jar

     文件      25443  2017-03-15 17:07  WebCrawler\lib\httpmime-4.0.1.jar

     文件     119888  2017-03-15 17:07  WebCrawler\lib\json.jar

     文件     293672  2017-03-15 17:07  WebCrawler\lib\jsoup-1.7.2.jar

     文件     489884  2017-03-15 17:07  WebCrawler\lib\log4j-1.2.17.jar

     文件     724225  2017-03-15 17:07  WebCrawler\lib\mysql-connector-java-5.1.10-bin.jar

     文件       1175  2017-03-15 19:16  WebCrawler\src\com\github\webcrawder\CrawderDemo.java

     文件       2213  2017-03-15 18:04  WebCrawler\src\com\github\webcrawder\HttpClientCrawder.java

     文件       2214  2017-03-15 18:27  WebCrawler\src\com\github\webcrawder\HttpClientJsoup.java

     文件       1595  2017-03-15 17:31  WebCrawler\src\com\github\webcrawder\JsoupCrawder.java

     文件       1282  2017-03-15 19:06  WebCrawler\src\com\github\webcrawder\Jsouptest.java

     文件       1546  2017-03-15 17:47  WebCrawler\src\com\github\webcrawder\MyHttpClient.java

     文件        824  2017-03-15 17:32  WebCrawler\src\com\github\webcrawder\MyJsoup.java

     目录          0  2017-03-15 19:09  WebCrawler\bin\com\github\webcrawder

     目录          0  2017-03-15 19:09  WebCrawler\src\com\github\webcrawder

     目录          0  2017-03-15 17:09  WebCrawler\bin\com\github

............此处省略11个文件信息

上一篇：opencv依靠颜色识别和跟踪物体
下一篇：SVM工具箱，用于数据的回归、预测以及分类

共有条评论

Jsoup网络爬虫

资源简介

资源截图

代码片段和文件信息

评论

相关资源