资源简介

从PDF中提取图片的项目实例,用VS2008可以直接运行。

资源截图

代码片段和文件信息

using System;
using System.Drawing.Imaging;
using System.IO;
using iTextSharp.text.pdf;

namespace itextsharp.ExtractImagesFromPDF
{
    public class PDFParser
    {
        public static void ExtractImagesFromPDF(string sourcePdf string outputPath)
        {
            // NOTE:  This will only get the first image it finds per page.
            PdfReader pdf = new PdfReader(sourcePdf);
            RandomAccessFileOrArray raf = new iTextSharp.text.pdf.RandomAccessFileOrArray(sourcePdf);

            try
            {
                for (int pageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++)
                {
                    PdfDictionary pg = pdf.GetPageN(pageNumber);

                    // recursively search pages forms and groups for images.
                    Pdfobject obj = FindImageInPDFDictionary(pg);
                    if (obj != null)
                    {

                        int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
                        Pdfobject pdfObj = pdf.GetPdfobject(XrefIndex);
                        PdfStream pdfStrem = (PdfStream)pdfObj;
                        byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem);
                        if ((bytes != null))
                        {
                            using (System.IO.MemoryStream memStream = new System.IO.MemoryStream(bytes))
                            {
                                memStream.Position = 0;
                                System.Drawing.Image img = System.Drawing.Image.FromStream(memStream);
                                // must save the file while stream is open.
                                if (!Directory.Exists(outputPath))
                                    Directory.CreateDirectory(outputPath);

                                string path = Path.Combine(outputPath String.Format(@“{0}.jpg“ pageNumber));
                                System.Drawing.Imaging.EncoderParameters parms = new System.Drawing.Imaging.EncoderParameters(1);
                                parms.Param[0] = new System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Compression 0);
                                System.Drawing.Imaging.ImageCodecInfo jpegEncoder = GetImageEncoder(“JPEG“);
                                img.Save(path jpegEncoder parms);
                            }
                        }
                    }
                }
            }
            catch
            {
                throw;
            }
            finally
            {
                pdf.Close();
                raf.Close();
            }


        }

        private static Pdfobject FindImageInPDFDictionary(PdfDictionary pg)
        {
            PdfDictionary res = (PdfDictionary)PdfReader.GetPdfobject(pg.Get(PdfName.RESOURCES));


            PdfDictionary xob

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2012-02-16 19:30  itextsharp.ExtractImagesFromPDF\
     文件        2735  2012-02-16 19:24  itextsharp.ExtractImagesFromPDF\itextsharp.ExtractImagesFromPDF.csproj
     文件         168  2012-02-11 14:55  itextsharp.ExtractImagesFromPDF\itextsharp.ExtractImagesFromPDF.csproj.user
     文件        5126  2012-02-16 19:21  itextsharp.ExtractImagesFromPDF\Program.cs
     目录           0  2012-02-16 19:17  itextsharp.ExtractImagesFromPDF\Properties\
     文件        1384  2012-02-11 14:37  itextsharp.ExtractImagesFromPDF\Properties\AssemblyInfo.cs

评论

共有 条评论