资源简介
pdf2htmlEX renders PDF files in HTML, utilizing modern Web technologies. It aims to provide an accurate rendering, while keeping optimized for Web display.
pdf2htmlEX is best for text-based PDF files, for example scientific papers with complicated formulas and figures. Text, fonts and formats are natively preserved in HTML such that you can still search and copy. The generated HTML file is static, with optional features powered by JavaScript.
代码片段和文件信息
#!/usr/bin/env python
import os
import subprocess
import shutil
import unittest
from PIL import Image ImageChops
from test import Common
class BrowserTests(Common):
TEST_DATA_DIR = os.path.join(Common.TEST_DIR ‘browser_tests‘)
DEFAULT_PDF2HTMLEX_ARGS = [
‘--fit-width‘ 800
‘--last-page‘ 1
‘--embed‘ ‘fi‘ # avoid base64 to make it faster
]
BROWSER_WIDTH=800
BROWSER_HEIGHT=1200
@classmethod
def setUpClass(cls):
pass
@classmethod
def tearDownClass(cls):
pass
def run_test_case(self filename pdf2htmlEX_args=[] page_must_load=True):
basefilename extension = os.path.splitext(filename)
htmlfilename = basefilename + ‘.html‘
ref_htmlfolder = os.path.join(self.TEST_DATA_DIR basefilename)
ref_htmlfilename = os.path.join(ref_htmlfolder htmlfilename)
out_htmlfilename = os.path.join(self.cur_output_dir htmlfilename)
self.assertEquals(extension.lower() ‘.pdf‘ ‘Input file is not PDF‘)
pdf2htmlEX_args = self.DEFAULT_PDF2HTMLEX_ARGS \
+ list(pdf2htmlEX_args) + [
os.path.join(self.TEST_DATA_DIR filename)
htmlfilename
]
result = self.run_pdf2htmlEX(pdf2htmlEX_args)
self.assertIn(htmlfilename result[‘output_files‘] ‘HTML file is not generated‘)
if self.GENERATING_MODE:
# copy generated html files
shutil.rmtree(ref_htmlfolder True)
shutil.copytree(self.cur_output_dir ref_htmlfolder)
return
png_out_dir = os.path.join(self.cur_temp_dir ‘png_out‘)
os.mkdir(png_out_dir)
pngfilename_out_fullpath = os.path.join(png_out_dir basefilename + ‘.out.png‘)
self.generate_image(out_htmlfilename pngfilename_out_fullpath)
out_img = Image.open(pngfilename_out_fullpath)
pngfilename_ref_fullpath = os.path.join(png_out_dir basefilename + ‘.ref.png‘)
self.generate_image(ref_htmlfilename pngfilename_ref_fullpath page_must_load=page_must_load)
ref_img = Image.open(pngfilename_ref_fullpath)
diff_img = ImageChops.difference(ref_img out_img);
diff_bbox = diff_img.getbbox()
if diff_bbox is not None:
diff_size = (diff_bbox[2] - diff_bbox[0]) * (diff_bbox[3] - diff_bbox[1])
img_size = ref_img.size[0] * ref_img.size[1]
if self.SAVE_TMP:
# save the diff image
# http://stackoverflow.com/questions/15721484/saving-in-png-using-pil-library-after-taking-imagechops-difference-of-two-png
diff_img.convert(‘RGB‘).save(os.path.join(png_out_dir basefilename + ‘.diff.png‘))
self.fail(‘PNG files differ by <= %d pixels (%f%% of %d pixels in total)‘ % (diff_size 1.0*diff_size/img_size img_size))
@unittest.skipIf(Common.GENERATING_MODE ‘Do not auto generate reference for test_fail‘)
def test_fail(self):
# The
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 1019 2015-06-22 10:30 AUTHORS
文件 4550 2015-06-22 10:30 ChangeLog
目录 0 2015-06-22 10:30 data\
文件 2389 2015-06-22 10:30 data\ba
文件 29208 2015-06-22 10:30 data\pdf2htmlEX.js.in
文件 3422 2015-06-22 10:30 data\pdf2htmlEX-64x64.png
文件 940 2015-06-22 10:30 data\build_js.sh
文件 29099 2015-06-22 10:30 data\pdf2htmlEX.js
文件 10516 2015-06-22 10:30 data\pdf2htmlEX.min.js
文件 5457 2015-06-22 10:30 data\ba
文件 2554 2015-06-22 10:30 data\manifest
文件 638 2015-06-22 10:30 data\build_css.sh
文件 1195 2015-06-22 10:30 data\LICENSE
文件 4156 2015-06-22 10:30 data\fancy.css.in
文件 3189 2015-06-22 10:30 data\fancy.min.css
文件 4101 2015-06-22 10:30 data\fancy.css
文件 5113 2015-06-22 10:30 data\ba
文件 967 2015-06-22 10:31 LICENSE
文件 35147 2015-06-22 10:31 LICENSE_GPLv3
文件 7536128 2015-06-22 16:40 pdf2htmlEX.exe
文件 4010 2015-06-22 10:31 README.md
目录 0 2015-06-22 10:31 test\
文件 27 2015-06-22 10:31 test\.gitattributes
文件 4437 2015-06-22 10:31 test\test_remote_browser.py
目录 0 2015-06-22 10:31 test\browser_tests\
目录 0 2015-06-22 10:31 test\browser_tests\basic_text\
文件 1620 2015-06-22 10:31 test\browser_tests\basic_text\f2.woff
文件 7055 2015-06-22 10:31 test\browser_tests\basic_text\basic_text.html
文件 3492 2015-06-22 10:31 test\browser_tests\basic_text\f1.woff
文件 118971 2015-06-22 10:31 test\browser_tests\invalid_unicode_issue477.pdf
文件 137669 2015-06-22 10:31 test\browser_tests\fontfile3_opentype.pdf
............此处省略64个文件信息
- 上一篇:用Django 技术搭建的弹球游戏网站
- 下一篇:html学校网站
评论
共有 条评论