14 Mart 2016 Pazartesi

C#.net - Extract image from PDF file

In this article i will show you how to extract image from PDF file.

Step 1
First you need to download "ITextSharp.dll" from the following link.
http://sourceforge.net/projects/itextsharp/

Step 2
Create a Console application and give the solution name asConExtractImagefromPDF.

Step 3
Add two assembly reference to the project from solution explorer.

1.ITextSharp.dll




2.System.Drawing.dll






Step 4
Write a static method for extracting image from pdf file,it is look like this






/// <summary>
        ///  Extract Image from PDF file and Store in Image Object
        /// </summary>
        /// <param name="PDFSourcePath">Specify PDF Source Path</param>
        /// <returns>List</returns>
        private static List<System.Drawing.Image> ExtractImages(String PDFSourcePath)
        {
            List<System.Drawing.Image> ImgList = new List<System.Drawing.Image>();

            iTextSharp.text.pdf.RandomAccessFileOrArray RAFObj = null;
            iTextSharp.text.pdf.PdfReader PDFReaderObj = null;
            iTextSharp.text.pdf.PdfObject PDFObj = null;
            iTextSharp.text.pdf.PdfStream PDFStremObj = null;

            try
            {
                RAFObj = new iTextSharp.text.pdf.RandomAccessFileOrArray(PDFSourcePath);
                PDFReaderObj = new iTextSharp.text.pdf.PdfReader(RAFObj, null);

                for (int i = 0; i <= PDFReaderObj.XrefSize - 1; i++)
                {
                    PDFObj = PDFReaderObj.GetPdfObject(i);

                    if ((PDFObj != null) && PDFObj.IsStream())
                    {
                        PDFStremObj = (iTextSharp.text.pdf.PdfStream)PDFObj;
                        iTextSharp.text.pdf.PdfObject subtype = PDFStremObj.Get(iTextSharp.text.pdf.PdfName.SUBTYPE);

                        if ((subtype != null) && subtype.ToString() == iTextSharp.text.pdf.PdfName.IMAGE.ToString())
                        {
                             try
                                {

                                    iTextSharp.text.pdf.parser.PdfImageObject PdfImageObj =
                             new iTextSharp.text.pdf.parser.PdfImageObject((iTextSharp.text.pdf.PRStream)PDFStremObj);
                                    
                                    System.Drawing.Image ImgPDF = PdfImageObj.GetDrawingImage();
                                   

                                    ImgList.Add(ImgPDF);

                                }
                                catch (Exception)
                                {
                                    
                                }
                        }
                    }
                }
                PDFReaderObj.Close();
            }
            catch (Exception ex)
            {
                throw new Exception(ex.Message);
            }
            return ImgList;
        }


Step 5
Write a static method for store extracting image file in folder,it is look like this

 /// <summary>
        ///  Write Image File
        /// </summary>
        private static void WriteImageFile()
        {
            try
            {
                System.Console.WriteLine("Wait for extracting image from PDF file....");

                // Get a List of Image
                List<System.Drawing.Image> ListImage = ExtractImages(@"C:\Users\Kishor\Desktop\TuterPDF\ASP.net\ASP.NET 3.5 Unleashed.pdf");

                for (int i = 0; i < ListImage.Count; i++)
                {
                    try
                    {
                        // Write Image File
                        ListImage[i].Save(AppDomain.CurrentDomain.BaseDirectory + "ImageStore\\Image" + i + ".jpeg", System.Drawing.Imaging.ImageFormat.Jpeg);
                        System.Console.WriteLine("Image" + i + ".jpeg write sucessfully"); 
                    }
                    catch (Exception)
                    { }
                }

            }
            catch (Exception ex)
            {
                throw new Exception(ex.Message);
            }
        }

Step 6
Call above function in main method,it is look like this

static void Main(string[] args)
        {
            try
            {
                WriteImageFile(); // write image file
            }
            catch (Exception ex)
            {
                System.Console.WriteLine(ex.Message);  
            }
        }

Full Code
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace ConExtractImagefromPDF
{
    class Program
    {
        static void Main(string[] args)
        {
            try
            {
                WriteImageFile(); // write image file
            }
            catch (Exception ex)
            {
                System.Console.WriteLine(ex.Message);  
            }
        }

        #region Methods

        /// <summary>
        ///  Extract Image from PDF file and Store in Image Object
        /// </summary>
        /// <param name="PDFSourcePath">Specify PDF Source Path</param>
        /// <returns>List</returns>
        private static List<System.Drawing.Image> ExtractImages(String PDFSourcePath)
        {
            List<System.Drawing.Image> ImgList = new List<System.Drawing.Image>();

            iTextSharp.text.pdf.RandomAccessFileOrArray RAFObj = null;
            iTextSharp.text.pdf.PdfReader PDFReaderObj = null;
            iTextSharp.text.pdf.PdfObject PDFObj = null;
            iTextSharp.text.pdf.PdfStream PDFStremObj = null;

            try
            {
                RAFObj = new iTextSharp.text.pdf.RandomAccessFileOrArray(PDFSourcePath);
                PDFReaderObj = new iTextSharp.text.pdf.PdfReader(RAFObj, null);

                for (int i = 0; i <= PDFReaderObj.XrefSize - 1; i++)
                {
                    PDFObj = PDFReaderObj.GetPdfObject(i);

                    if ((PDFObj != null) && PDFObj.IsStream())
                    {
                        PDFStremObj = (iTextSharp.text.pdf.PdfStream)PDFObj;
                        iTextSharp.text.pdf.PdfObject subtype = PDFStremObj.Get(iTextSharp.text.pdf.PdfName.SUBTYPE);

                        if ((subtype != null) && subtype.ToString() == iTextSharp.text.pdf.PdfName.IMAGE.ToString())
                        {
                            try
                                {

                                    iTextSharp.text.pdf.parser.PdfImageObject PdfImageObj =
                             new iTextSharp.text.pdf.parser.PdfImageObject((iTextSharp.text.pdf.PRStream)PDFStremObj);
                                    
                                    System.Drawing.Image ImgPDF = PdfImageObj.GetDrawingImage();
                                   

                                    ImgList.Add(ImgPDF);

                                }
                                catch (Exception)
                                {
                                    
                                }
                        }
                    }
                }
                PDFReaderObj.Close();
            }
            catch (Exception ex)
            {
                throw new Exception(ex.Message);
            }
            return ImgList;
        }


        /// <summary>
        ///  Write Image File
        /// </summary>
        private static void WriteImageFile()
        {
            try
            {
                System.Console.WriteLine("Wait for extracting image from PDF file....");

                // Get a List of Image
                List<System.Drawing.Image> ListImage = ExtractImages(@"C:\Users\Kishor\Desktop\TuterPDF\ASP.net\ASP.NET 3.5 Unleashed.pdf");

                for (int i = 0; i < ListImage.Count; i++)
                {
                    try
                    {
                        // Write Image File
                        ListImage[i].Save(AppDomain.CurrentDomain.BaseDirectory + "ImageStore\\Image" + i + ".jpeg", System.Drawing.Imaging.ImageFormat.Jpeg);
                        System.Console.WriteLine("Image" + i + ".jpeg write sucessfully"); 
                    }
                    catch (Exception)
                    { }
                }

            }
            catch (Exception ex)
            {
                throw new Exception(ex.Message);
            }
        }
        #endregion
    }
}


Download
Download Source Code