• 大小: 22.74MB
    文件类型: .rar
    金币: 2
    下载: 1 次
    发布日期: 2023-07-15
  • 语言: Python
  • 标签: python  论文  caj  

资源简介

python caj2pdf convert [input_file] -o/ --output [output_file] “”“python 工程需要配置PyPDF2”“”

资源截图

代码片段和文件信息

import os
import struct
from shutil import copy
from subprocess import check_output STDOUT CalledProcessError
from utils import fnd fnd_all add_outlines fnd_rvrs fnd_unuse_no

KDH_PASSPHRASE = b“FZHMEI“


class CAJParser(object):
    def __init__(self filename):
        self.filename = filename
        try:
            with open(filename “rb“) as caj:
                fmt = struct.unpack(“4s“ caj.read(4))[0].replace(b‘\x00‘ b‘‘).decode(“gb18030“)
            if fmt == “CAJ“:
                self.format = “CAJ“
                self._PAGE_NUMBER_OFFSET = 0x10
                self._TOC_NUMBER_OFFSET = 0x110
            elif fmt == “HN“:
                self.format = “HN“
                self._PAGE_NUMBER_OFFSET = 0x90
                self._TOC_NUMBER_OFFSET = 0x158
            elif fmt == “%PDF“:
                self.format = “PDF“
            elif fmt == “KDH “:
                self.format = “KDH“
            else:
                self.format = None
                raise SystemExit(“Unknown file type.“)
        except UnicodeDecodeError:
            raise SystemExit(“Unknown file type.“)

    @property
    def page_num(self):
        with open(self.filename “rb“) as caj:
            caj.seek(self._PAGE_NUMBER_OFFSET)
            [page_num] = struct.unpack(“i“ caj.read(4))
            return page_num

    @property
    def toc_num(self):
        with open(self.filename “rb“) as caj:
            caj.seek(self._TOC_NUMBER_OFFSET)
            [toc_num] = struct.unpack(“i“ caj.read(4))
            return toc_num

    def get_toc(self):
        toc = []
        with open(self.filename “rb“) as caj:
            for i in range(self.toc_num):
                caj.seek(self._TOC_NUMBER_OFFSET + 4 + 0x134 * i)
                toc_bytes = struct.unpack(“256s24s12s12si“ caj.read(0x134))
                ttl_end = toc_bytes[0].find(b“\x00“)
                title = toc_bytes[0][0:ttl_end].decode(“gb18030“).encode(“utf-8“)
                pg_end = toc_bytes[2].find(b“\x00“)
                page = int(toc_bytes[2][0:pg_end])
                level = toc_bytes[4]
                toc_entry = {“title“: title “page“: page “level“: level}
                toc.append(toc_entry)
        return toc

    def output_toc(self dest):
        toc_items = self.get_toc()
        with open(dest “wb“) as f:
            for toc in toc_items:
                f.write(b‘    ‘ * (toc[“level“] - 1) + toc[“title“]
                        + b‘    ‘ + str(toc[“page“]).encode(“utf-8“) + b‘\n‘)

    def convert(self dest):
        if self.format == “CAJ“:
            self._convert_caj(dest)
        elif self.format == “HN“:
            self._convert_hn(dest)
        elif self.format == “PDF“:
            self._convert_pdf(dest)
        elif self.format == “KDH“:
            self._convert_kdh(dest)

    def _convert_caj(self dest):
        caj = open(self.filename “rb“)

        # Extract original PDF data (and add header)
        caj.seek(self._PAGE_NUMBER_OFFSET + 4)
      

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----

     文件    1274316  2020-12-08 20:28  caj2pdf-master\.github\111.caj

    .......        30  2020-01-18 22:00  caj2pdf-master\.github\FUNDING.yml

    .......      1774  2020-01-18 22:00  caj2pdf-master\.gitignore

     文件        464  2020-12-08 20:45  caj2pdf-master\.idea\caj2pdf-master.iml

     文件        174  2020-12-08 20:45  caj2pdf-master\.idea\inspectionProfiles\profiles_settings.xml

     文件        204  2020-12-08 20:45  caj2pdf-master\.idea\misc.xml

     文件        287  2020-12-08 20:45  caj2pdf-master\.idea\modules.xml

     文件       3194  2020-12-08 21:09  caj2pdf-master\.idea\workspace.xml

     文件       1965  2020-12-08 21:09  caj2pdf-master\caj2pdf

     文件      11354  2020-12-08 21:00  caj2pdf-master\cajparser.py

    .......       958  2020-01-18 22:00  caj2pdf-master\LICENSE

     文件   37354496  2020-10-07 19:31  caj2pdf-master\mutool.exe

    .......      3199  2020-01-18 22:00  caj2pdf-master\README.md

    .......      5894  2020-01-18 22:00  caj2pdf-master\utils.py

     文件       7870  2020-12-08 21:00  caj2pdf-master\__pycache__\cajparser.cpython-37.pyc

     文件       5934  2020-12-08 20:48  caj2pdf-master\__pycache__\utils.cpython-37.pyc

     目录          0  2020-12-08 20:45  caj2pdf-master\.idea\inspectionProfiles

     目录          0  2020-12-08 20:47  caj2pdf-master\.github

     目录          0  2020-12-08 21:09  caj2pdf-master\.idea

     目录          0  2020-12-08 21:00  caj2pdf-master\__pycache__

     目录          0  2020-12-08 21:09  caj2pdf-master

----------- ---------  ---------- -----  ----

             38672113                    21


评论

共有 条评论