import java.io.PrintStream; import java.io.UnsupportedEncodingException; import com.pdflib.TETException; import com.pdflib.TET; /** * PDF image extractor based on PDFlib TET. *

* Required software: TET 3 *

* Required data: PDF document * * @version $Id: image_extractor.java,v 1.6 2008/12/15 10:50:54 stm Exp $ */ public class image_extractor { /** * Global option list */ static final String GLOBAL_OPTLIST = "searchpath={../resource/cmap " + "../resource/glyphlist ../input}"; /** * Document-specific option list */ static final String DOC_OPTLIST = ""; /** * Page-specific option list */ static final String PAGE_OPTLIST = "granularity=page"; /** * Basic image extract options (more below) */ static final String BASE_IMAGE_OPTLIST = "compression=auto format=auto"; /** * The encoding in which the output is sent to System.out. For running the * example in a Windows command window, you can set this for example to * "windows-1252" for getting Latin-1 output. */ private static final String OUTPUT_ENCODING = System .getProperty("file.encoding"); /** * For printing to System.out in the encoding specified via OUTPUT_ENCODING. */ private static PrintStream out; public static void main(String argv[]) throws UnsupportedEncodingException { System.out.println("Using output encoding \"" + OUTPUT_ENCODING + "\""); out = new PrintStream(System.out, true, OUTPUT_ENCODING); TET tet = null; try { if (argv.length != 1) { throw new Exception("usage: image_extractor "); } String outfilebase = argv[0]; tet = new TET(); tet.set_option(GLOBAL_OPTLIST); int doc = tet.open_document(argv[0], DOC_OPTLIST); if (doc == -1) { throw new Exception("Error " + tet.get_errnum() + "in " + tet.get_apiname() + "(): " + tet.get_errmsg()); } /* get number of pages in the document */ int n_pages = (int) tet.pcos_get_number(doc, "length:pages"); /* loop over pages */ for (int pageno = 1; pageno <= n_pages; ++pageno) { int page = tet.open_page(doc, pageno, PAGE_OPTLIST); if (page < 0) { print_tet_error(tet, pageno); continue; /* try next page */ } /* Retrieve all images on the page */ int imageno = -1; while (tet.get_image_info(page) == 1) { imageno++; /* * Use the name of the input file and generate image * names from it. */ String imagename = outfilebase + "_p" + pageno + "_" + imageno; String imageoptlist = BASE_IMAGE_OPTLIST + " filename={" + imagename + "}"; out.println("Extracting image " + imagename); /* Fetch the image data and write it to a disk file */ if (tet.write_image_file(doc, tet.imageid, imageoptlist) == -1) { print_tet_error(tet, pageno); } } if (tet.get_errnum() != 0) { print_tet_error(tet, pageno); } tet.close_page(page); } tet.close_document(doc); } catch (TETException e) { System.err.println("TET exception occurred in extractor sample:"); System.err.println("[" + e.get_errnum() + "] " + e.get_apiname() + ": " + e.get_errmsg()); } catch (Exception e) { System.err.println(e.getMessage()); } finally { if (tet != null) { tet.delete(); } } } /** * Report a TET error. * * @param tet * The TET object * @param pageno * The page number on which the error occurred */ private static void print_tet_error(TET tet, int pageno) { System.err.println("Error " + tet.get_errnum() + " in " + tet.get_apiname() + "() on page " + pageno + ": " + tet.get_errmsg()); } }