PDFlib TET サンプル集(クックブック)
PDFlib TET により PDF イメージを抽出するサンプルプログラムです。
必要な製品:PDFlib TET
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import com.pdflib.TETException;
import com.pdflib.TET;
/**
* PDF image extractor based on PDFlib TET.
* <p>
* Required software: TET 3
* <p>
* Required data: PDF document
*
* @version $Id: image_extractor.java,v 1.6 2008/12/15 10:50:54 stm Exp $
*/
public class image_extractor
{
/**
* Global option list
*/
static final String GLOBAL_OPTLIST = "searchpath={../resource/cmap "
+ "../resource/glyphlist ../input}";
/**
* Document-specific option list
*/
static final String DOC_OPTLIST = "";
/**
* Page-specific option list
*/
static final String PAGE_OPTLIST = "granularity=page";
/**
* Basic image extract options (more below)
*/
static final String BASE_IMAGE_OPTLIST = "compression=auto format=auto";
/**
* The encoding in which the output is sent to System.out. For running the
* example in a Windows command window, you can set this for example to
* "windows-1252" for getting Latin-1 output.
*/
private static final String OUTPUT_ENCODING = System
.getProperty("file.encoding");
/**
* For printing to System.out in the encoding specified via OUTPUT_ENCODING.
*/
private static PrintStream out;
public static void main(String argv[]) throws UnsupportedEncodingException {
System.out.println("Using output encoding \"" + OUTPUT_ENCODING + "\"");
out = new PrintStream(System.out, true, OUTPUT_ENCODING);
TET tet = null;
try {
if (argv.length != 1) {
throw new Exception("usage: image_extractor <filename>");
}
String outfilebase = argv[0];
tet = new TET();
tet.set_option(GLOBAL_OPTLIST);
int doc = tet.open_document(argv[0], DOC_OPTLIST);
if (doc == -1) {
throw new Exception("Error " + tet.get_errnum() + "in "
+ tet.get_apiname() + "(): " + tet.get_errmsg());
}
/* get number of pages in the document */
int n_pages = (int) tet.pcos_get_number(doc, "length:pages");
/* loop over pages */
for (int pageno = 1; pageno <= n_pages; ++pageno) {
int page = tet.open_page(doc, pageno, PAGE_OPTLIST);
if (page < 0) {
print_tet_error(tet, pageno);
continue; /* try next page */
}
/* Retrieve all images on the page */
int imageno = -1;
while (tet.get_image_info(page) == 1) {
imageno++;
/*
* Use the name of the input file and generate image
* names from it.
*/
String imagename = outfilebase + "_p" + pageno
+ "_" + imageno;
String imageoptlist = BASE_IMAGE_OPTLIST
+ " filename={" + imagename + "}";
out.println("Extracting image " + imagename);
/* Fetch the image data and write it to a disk file */
if (tet.write_image_file(doc, tet.imageid,
imageoptlist) == -1) {
print_tet_error(tet, pageno);
}
}
if (tet.get_errnum() != 0) {
print_tet_error(tet, pageno);
}
tet.close_page(page);
}
tet.close_document(doc);
}
catch (TETException e) {
System.err.println("TET exception occurred in extractor sample:");
System.err.println("[" + e.get_errnum() + "] " + e.get_apiname()
+ ": " + e.get_errmsg());
}
catch (Exception e) {
System.err.println(e.getMessage());
}
finally {
if (tet != null) {
tet.delete();
}
}
}
/**
* Report a TET error.
*
* @param tet
* The TET object
* @param pageno
* The page number on which the error occurred
*/
private static void print_tet_error(TET tet, int pageno) {
System.err.println("Error " + tet.get_errnum() + " in "
+ tet.get_apiname() + "() on page " + pageno + ": "
+ tet.get_errmsg());
}
}
(May 6, 2010 - Oct 25, 2022)