资源简介
python caj2pdf convert [input_file] -o/ --output [output_file] “”“python 工程需要配置PyPDF2”“”
代码片段和文件信息
import os
import struct
from shutil import copy
from subprocess import check_output STDOUT CalledProcessError
from utils import fnd fnd_all add_outlines fnd_rvrs fnd_unuse_no
KDH_PASSPHRASE = b“FZHMEI“
class CAJParser(object):
def __init__(self filename):
self.filename = filename
try:
with open(filename “rb“) as caj:
fmt = struct.unpack(“4s“ caj.read(4))[0].replace(b‘\x00‘ b‘‘).decode(“gb18030“)
if fmt == “CAJ“:
self.format = “CAJ“
self._PAGE_NUMBER_OFFSET = 0x10
self._TOC_NUMBER_OFFSET = 0x110
elif fmt == “HN“:
self.format = “HN“
self._PAGE_NUMBER_OFFSET = 0x90
self._TOC_NUMBER_OFFSET = 0x158
elif fmt == “%PDF“:
self.format = “PDF“
elif fmt == “KDH “:
self.format = “KDH“
else:
self.format = None
raise SystemExit(“Unknown file type.“)
except UnicodeDecodeError:
raise SystemExit(“Unknown file type.“)
@property
def page_num(self):
with open(self.filename “rb“) as caj:
caj.seek(self._PAGE_NUMBER_OFFSET)
[page_num] = struct.unpack(“i“ caj.read(4))
return page_num
@property
def toc_num(self):
with open(self.filename “rb“) as caj:
caj.seek(self._TOC_NUMBER_OFFSET)
[toc_num] = struct.unpack(“i“ caj.read(4))
return toc_num
def get_toc(self):
toc = []
with open(self.filename “rb“) as caj:
for i in range(self.toc_num):
caj.seek(self._TOC_NUMBER_OFFSET + 4 + 0x134 * i)
toc_bytes = struct.unpack(“256s24s12s12si“ caj.read(0x134))
ttl_end = toc_bytes[0].find(b“\x00“)
title = toc_bytes[0][0:ttl_end].decode(“gb18030“).encode(“utf-8“)
pg_end = toc_bytes[2].find(b“\x00“)
page = int(toc_bytes[2][0:pg_end])
level = toc_bytes[4]
toc_entry = {“title“: title “page“: page “level“: level}
toc.append(toc_entry)
return toc
def output_toc(self dest):
toc_items = self.get_toc()
with open(dest “wb“) as f:
for toc in toc_items:
f.write(b‘ ‘ * (toc[“level“] - 1) + toc[“title“]
+ b‘ ‘ + str(toc[“page“]).encode(“utf-8“) + b‘\n‘)
def convert(self dest):
if self.format == “CAJ“:
self._convert_caj(dest)
elif self.format == “HN“:
self._convert_hn(dest)
elif self.format == “PDF“:
self._convert_pdf(dest)
elif self.format == “KDH“:
self._convert_kdh(dest)
def _convert_caj(self dest):
caj = open(self.filename “rb“)
# Extract original PDF data (and add header)
caj.seek(self._PAGE_NUMBER_OFFSET + 4)
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 1274316 2020-12-08 20:28 caj2pdf-master\.github\111.caj
....... 30 2020-01-18 22:00 caj2pdf-master\.github\FUNDING.yml
....... 1774 2020-01-18 22:00 caj2pdf-master\.gitignore
文件 464 2020-12-08 20:45 caj2pdf-master\.idea\caj2pdf-master.iml
文件 174 2020-12-08 20:45 caj2pdf-master\.idea\inspectionProfiles\profiles_settings.xm
文件 204 2020-12-08 20:45 caj2pdf-master\.idea\misc.xm
文件 287 2020-12-08 20:45 caj2pdf-master\.idea\modules.xm
文件 3194 2020-12-08 21:09 caj2pdf-master\.idea\workspace.xm
文件 1965 2020-12-08 21:09 caj2pdf-master\caj2pdf
文件 11354 2020-12-08 21:00 caj2pdf-master\cajparser.py
....... 958 2020-01-18 22:00 caj2pdf-master\LICENSE
文件 37354496 2020-10-07 19:31 caj2pdf-master\mutool.exe
....... 3199 2020-01-18 22:00 caj2pdf-master\README.md
....... 5894 2020-01-18 22:00 caj2pdf-master\utils.py
文件 7870 2020-12-08 21:00 caj2pdf-master\__pycache__\cajparser.cpython-37.pyc
文件 5934 2020-12-08 20:48 caj2pdf-master\__pycache__\utils.cpython-37.pyc
目录 0 2020-12-08 20:45 caj2pdf-master\.idea\inspectionProfiles
目录 0 2020-12-08 20:47 caj2pdf-master\.github
目录 0 2020-12-08 21:09 caj2pdf-master\.idea
目录 0 2020-12-08 21:00 caj2pdf-master\__pycache__
目录 0 2020-12-08 21:09 caj2pdf-master
----------- --------- ---------- ----- ----
38672113 21
相关资源
- python实现SGBM图像匹配算法
- python实现灰度直方图均衡化
- scrapy_qunar_one
- Python学习全系列教程永久可用
- python简明教程.chm
- 抽奖大转盘python的图形化界面
- 双边滤波器实验报告及代码python
- python +MYSQL+HTML实现21蛋糕网上商城
- Python-直播答题助手自动检测出题搜索
- OpenCV入门教程+OpenCV官方教程中文版
- Python 串口工具源码+.exe文件
- Python开发的全栈股票系统.zip
- Python操作Excel表格并将其中部分数据写
- python书籍 PDF
- 利用python绘制散点图
- python+labview+No1.vi
- 老男孩python项目实战
- python源码制作whl文件.rar
- python3.5可用的scipy
- PYTHON3 经典50案例.pptx
- 计算机科学导论-python.pdf
- python模拟鼠标点击屏幕
- windows鼠标自动点击py脚本
- 鱼c小甲鱼零基础学python全套课后题和
- Python 练习题100道
- Practical Programming 2nd Edition
- wxPython Application Development Cookbook
- python 3.6
- Python 3.5.2 中文文档 互联网唯一CHM版本
- python3.5.2.chm官方文档
评论
共有 条评论