• 大小: 9.08MB
    文件类型: .zip
    金币: 1
    下载: 0 次
    发布日期: 2023-10-22
  • 语言: Html/CSS
  • 标签: 在线预览  

资源简介

pdf2htmlEX renders PDF files in HTML, utilizing modern Web technologies. It aims to provide an accurate rendering, while keeping optimized for Web display. pdf2htmlEX is best for text-based PDF files, for example scientific papers with complicated formulas and figures. Text, fonts and formats are natively preserved in HTML such that you can still search and copy. The generated HTML file is static, with optional features powered by JavaScript.

资源截图

代码片段和文件信息

#!/usr/bin/env python

import os
import subprocess
import shutil
import unittest

from PIL import Image ImageChops
from test import Common

class BrowserTests(Common):
    TEST_DATA_DIR = os.path.join(Common.TEST_DIR ‘browser_tests‘)

    DEFAULT_PDF2HTMLEX_ARGS = [
        ‘--fit-width‘ 800
        ‘--last-page‘ 1
        ‘--embed‘ ‘fi‘ # avoid base64 to make it faster
    ]

    BROWSER_WIDTH=800
    BROWSER_HEIGHT=1200

    @classmethod
    def setUpClass(cls):
        pass

    @classmethod
    def tearDownClass(cls):
        pass

    def run_test_case(self filename pdf2htmlEX_args=[] page_must_load=True):
        basefilename extension = os.path.splitext(filename)
        htmlfilename = basefilename + ‘.html‘

        ref_htmlfolder = os.path.join(self.TEST_DATA_DIR basefilename)
        ref_htmlfilename = os.path.join(ref_htmlfolder htmlfilename)

        out_htmlfilename = os.path.join(self.cur_output_dir htmlfilename)

        self.assertEquals(extension.lower() ‘.pdf‘ ‘Input file is not PDF‘)

        pdf2htmlEX_args = self.DEFAULT_PDF2HTMLEX_ARGS \
            + list(pdf2htmlEX_args) + [
                os.path.join(self.TEST_DATA_DIR filename)
                htmlfilename
            ]

        result = self.run_pdf2htmlEX(pdf2htmlEX_args)
        self.assertIn(htmlfilename result[‘output_files‘] ‘HTML file is not generated‘)

        if self.GENERATING_MODE:
            # copy generated html files
            shutil.rmtree(ref_htmlfolder True)
            shutil.copytree(self.cur_output_dir ref_htmlfolder)
            return

        png_out_dir = os.path.join(self.cur_temp_dir ‘png_out‘)
        os.mkdir(png_out_dir)

        pngfilename_out_fullpath = os.path.join(png_out_dir basefilename + ‘.out.png‘)
        self.generate_image(out_htmlfilename pngfilename_out_fullpath)
        out_img = Image.open(pngfilename_out_fullpath)

        pngfilename_ref_fullpath = os.path.join(png_out_dir basefilename + ‘.ref.png‘)
        self.generate_image(ref_htmlfilename pngfilename_ref_fullpath page_must_load=page_must_load)
        ref_img = Image.open(pngfilename_ref_fullpath)

        diff_img = ImageChops.difference(ref_img out_img);

        diff_bbox = diff_img.getbbox()
        if diff_bbox is not None:
            diff_size = (diff_bbox[2] - diff_bbox[0]) * (diff_bbox[3] - diff_bbox[1])
            img_size = ref_img.size[0] * ref_img.size[1]
            if self.SAVE_TMP:
                # save the diff image
                # http://stackoverflow.com/questions/15721484/saving-in-png-using-pil-library-after-taking-imagechops-difference-of-two-png
                diff_img.convert(‘RGB‘).save(os.path.join(png_out_dir basefilename + ‘.diff.png‘))
            self.fail(‘PNG files differ by <= %d pixels (%f%% of %d pixels in total)‘ % (diff_size 1.0*diff_size/img_size img_size))

    @unittest.skipIf(Common.GENERATING_MODE ‘Do not auto generate reference for test_fail‘)
    def test_fail(self):
        # The

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     文件        1019  2015-06-22 10:30  AUTHORS
     文件        4550  2015-06-22 10:30  ChangeLog
     目录           0  2015-06-22 10:30  data\
     文件        2389  2015-06-22 10:30  data\base.min.css
     文件       29208  2015-06-22 10:30  data\pdf2htmlEX.js.in
     文件        3422  2015-06-22 10:30  data\pdf2htmlEX-64x64.png
     文件         940  2015-06-22 10:30  data\build_js.sh
     文件       29099  2015-06-22 10:30  data\pdf2htmlEX.js
     文件       10516  2015-06-22 10:30  data\pdf2htmlEX.min.js
     文件        5457  2015-06-22 10:30  data\base.css.in
     文件        2554  2015-06-22 10:30  data\manifest
     文件         638  2015-06-22 10:30  data\build_css.sh
     文件        1195  2015-06-22 10:30  data\LICENSE
     文件        4156  2015-06-22 10:30  data\fancy.css.in
     文件        3189  2015-06-22 10:30  data\fancy.min.css
     文件        4101  2015-06-22 10:30  data\fancy.css
     文件        5113  2015-06-22 10:30  data\base.css
     文件         967  2015-06-22 10:31  LICENSE
     文件       35147  2015-06-22 10:31  LICENSE_GPLv3
     文件     7536128  2015-06-22 16:40  pdf2htmlEX.exe
     文件        4010  2015-06-22 10:31  README.md
     目录           0  2015-06-22 10:31  test\
     文件          27  2015-06-22 10:31  test\.gitattributes
     文件        4437  2015-06-22 10:31  test\test_remote_browser.py
     目录           0  2015-06-22 10:31  test\browser_tests\
     目录           0  2015-06-22 10:31  test\browser_tests\basic_text\
     文件        1620  2015-06-22 10:31  test\browser_tests\basic_text\f2.woff
     文件        7055  2015-06-22 10:31  test\browser_tests\basic_text\basic_text.html
     文件        3492  2015-06-22 10:31  test\browser_tests\basic_text\f1.woff
     文件      118971  2015-06-22 10:31  test\browser_tests\invalid_unicode_issue477.pdf
     文件      137669  2015-06-22 10:31  test\browser_tests\fontfile3_opentype.pdf
............此处省略64个文件信息

评论

共有 条评论