资源简介
自己利用空闲时间写的一款再dos下运行的简单搜索引擎,可以再自己给定的网页范围内查找信息,并且下载指定网页上的内容。内中包含简单的工程文档,代码还算规范,所以不需要太多注释就基本能看懂。学习python没多久的同学可以看一下,对于学习python能够给出一定的启发
代码片段和文件信息
#-*- coding:utf-8 -*-
import urllib
import os
import re
import HtmlToText
import SearchEngineLog
class SearchEngine:
def __init__(self):
self.lstSearchedItems = []
self.lstKeywords = []
self.strKeywords = ‘‘
self.iItemsEachPage = 10
self.iCurrentPage = 1
self.strConfigFile = ‘./config.ini‘
self.log = SearchEngineLog.SearchEngineLog()
self.initCommand()
self.readConfigFile()
self.headWidth = 80
self.strCurrentSite = ‘‘
def initCommand(self):
self.cmdCommand = ‘command‘
self.cmdKeywords = ‘keywords‘
self.cmdQuit = ‘q‘
self.cmdBack = ‘b‘
self.cmdNextPage = ‘n‘
self.cmdPrevPage = ‘l‘
self.cmdRefresh = ‘r‘
self.cmdSavePage = ‘s‘
def work(self):
while True:
self.mainSurface(‘‘)
self.useKeywordsInput()
def mainSurface(selfinfo):
self.strSurface = ‘MAIN_SURFACE‘
os.system(‘cls‘)
print ‘=‘ * self.headWidth
print ‘ ‘ * (( self.headWidth - len(‘SEARCH ENGINE‘) )/2) ‘SEARCH ENGINE‘
print ‘ ‘ * (( self.headWidth - len(info) )/2) info
print ‘=‘ * self.headWidth
def searchSurface(selfinfo):
self.strSurface = ‘SEARCH_SURFACE‘
os.system(‘cls‘)
print ‘=‘ * self.headWidth
print ‘The search result of : ‘ self.strKeywords
print ‘-‘ * self.headWidth
if len(self.lstSearchedItems) == 0:
print ‘Cannot find “%s“!‘ % self.strKeywords
else :
iCount = 0
for item in self.lstSearchedItems:
if item[0] > (self.iCurrentPage-1)*self.iItemsEachPage and item[0] <= self.iItemsEachPage * self.iCurrentPage:
iCount = iCount + 1
print ‘%d %s‘ % (iCountitem[1])
print ‘ ‘item[2]
print ‘‘
print ‘=‘ * self.headWidth
print ‘Current page: %d/%d‘ % (self.iCurrentPagelen(self.lstSearchedItems)/self.iItemsEachPage + 1)
def downWebsite(selfurlpath):
print urlpath ‘is downloading...‘
####创建存放网页内容的文件夹
regex = r‘(.*//www.)(.*)(.com|.cn|.net)‘
res = re.match(regexurlpath)
saveFolder = res.group(2)
cmd = ‘md ‘ + saveFolder
os.system(cmd)
textFilePath = saveFolder+ ‘/‘ +res.group(2)+‘.html‘
####下载文本网页
print ‘downloading the html file...‘
webContex = ‘‘
try:
ul = urllib.urlopen(urlpath)
webContext = ul.read()
ul.close()
except Exceptionerr:
print ‘Cannot open %splease check your network!‘ % urlpath
self.log.errorLog(‘download website “%s“ fail‘ % urlpath)
exit(-1)
try:
file = open(textFilePath‘w‘)
file.write(webContext)
file.close()
except Exceptionerr:
print ‘Create file “%s“ fail!‘ % textFilePath
self.log.errorLog(‘create file “%s“ fail‘ % textFilePath)
####下载图片
print ‘downloading pictures...‘
regex = r‘(http:.+?\.png|http:.+?\.jpg|http:.+?\.jpeg|http:.+?\.gif|http:.+?\.bmp)‘
lstPictures = re.findall(regexwebContext)
for picPath in lstPictures:
regex = r‘(.*)(<|>|“)(.*)‘
if re.match(regexpicPath): continue
regex = r‘(.*)(/.*)‘
picName
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2016-12-28 17:25 SearchEngine\
文件 49374 2016-12-28 17:12 SearchEngine\Capture.PNG
文件 45274 2016-12-28 17:12 SearchEngine\Capture1.PNG
文件 61643 2016-12-28 17:13 SearchEngine\Capture2.PNG
文件 34963 2016-12-28 17:15 SearchEngine\Capture3.PNG
文件 33687 2016-12-28 17:20 SearchEngine\Capture4.PNG
文件 105 2016-12-28 17:14 SearchEngine\config.ini
文件 3873 2016-12-28 16:58 SearchEngine\document.txt
文件 2673 2016-12-28 17:20 SearchEngine\SearchEngine.log
文件 8351 2016-12-28 17:19 SearchEngine\SearchEngine.py
文件 0 2016-12-28 16:54 SearchEngine\SearchEngine.pyc
文件 1757 2016-12-28 15:35 SearchEngine\SearchEngineLog.py
文件 3525 2016-12-28 15:36 SearchEngine\SearchEngineLog.pyc
- 上一篇:自制验证码数据集生成程序
- 下一篇:python3_爬取网上资源存入数据库中
相关资源
- python实现SGBM图像匹配算法
- python实现灰度直方图均衡化
- scrapy_qunar_one
- Python学习全系列教程永久可用
- python简明教程.chm
- 抽奖大转盘python的图形化界面
- 双边滤波器实验报告及代码python
- python +MYSQL+HTML实现21蛋糕网上商城
- Python-直播答题助手自动检测出题搜索
- OpenCV入门教程+OpenCV官方教程中文版
- Python 串口工具源码+.exe文件
- Python开发的全栈股票系统.zip
- Python操作Excel表格并将其中部分数据写
- python书籍 PDF
- 利用python绘制散点图
- python+labview+No1.vi
- 老男孩python项目实战
- python源码制作whl文件.rar
- python3.5可用的scipy
- PYTHON3 经典50案例.pptx
- 计算机科学导论-python.pdf
- python模拟鼠标点击屏幕
- windows鼠标自动点击py脚本
- 鱼c小甲鱼零基础学python全套课后题和
- Python 练习题100道
- Practical Programming 2nd Edition
- wxPython Application Development Cookbook
- python 3.6
- Python 3.5.2 中文文档 互联网唯一CHM版本
- python3.5.2.chm官方文档
评论
共有 条评论