package com.pdflib.cookbook.tet.tet_and_pdflib; import java.io.PrintStream; import java.io.UnsupportedEncodingException; import com.pdflib.PDFlibException; import com.pdflib.TET; import com.pdflib.TETException; import com.pdflib.pdflib; /** * Extract some text from a PDF based on certain typographic criteria (font, * fontsize) along with the corresponding page numbers, and use PDFlib to create * a table of contents (TOC) for the original document, possibly enriched with * active links to the respective pages. With PDFlib+PDI the TOC could be * prepended to the original pages. With plain PDFlib a stand-alone TOC can be * created. *
* Required software: TET 3 and PDFlib+PDI 8 or PDFlib 8 *
* Required data: PDF document
*
*/
class create_table_of_contents {
/**
* Common search path for PDI and TET to find the input document.
*/
private static final String DOC_SEARCH_PATH = "../input";
/**
* Global option list. The program expects the "resource" directory parallel to
* the "java" directory.
*/
private static final String GLOBAL_OPTLIST = "searchpath={../resource/cmap ../resource/glyphlist " + DOC_SEARCH_PATH
+ "}";
/**
* Document specific option list.
*/
private static final String DOC_OPTLIST = "";
/**
* Page-specific option list.
*/
private static final String PAGE_OPTLIST = "granularity=page";
/**
* The encoding in which the output is sent to System.out. For running the
* example in a Windows command window, you can set this for example to
* "windows-1252" for getting Latin-1 output.
*/
private static final String OUTPUT_ENCODING = System.getProperty("file.encoding");
/**
* For printing to System.out in the encoding specified via OUTPUT_ENCODING.
*/
private static PrintStream out;
/**
* The name of the input file
*/
private String infilename;
/**
* The name of the output file
*/
private String outfilename;
/**
* The name of the font to search for.
*/
private static final String FONT_NAME = "TheSansBold-Plain";
/**
* The font size to search for in points.
*/
private static final double FONT_SIZE = 9;
/**
* The tolerance for the font size in points.
*/
private static final double FONT_SIZE_TOLERANCE = 0.01;
/**
* Nudge factor for ascender height of the Web links (relative to the font size)
*/
private static final double ASCENDER = 0.85;
/**
* Whether to use PDI to create a new document that consists of the original
* document and the TOC prepended to it. Set this to false in order not to use
* PDI, and in order to produce a document that only contains the TOC.
*/
private static final boolean USE_PDI = true;
/**
* The page width for the TOC pages (see "width" option for begin_page_ext() in
* the PDFlib Reference Manual)
*/
private static final String TOC_WIDTH = "a4.width";
/**
* The page height for the TOC pages (see "height" option for begin_page_ext()
* PDFlib Reference Manual)
*/
private static final String TOC_HEIGHT = "a4.height";
/**
* The title for the TOC.
*/
private static final String TOC_TITLE = "Table of Contents";
/**
* The font to use for the headline that is placed on each page of the TOC.
*/
private static final String TOC_TITLE_FONT = "Helvetica-Bold";
/**
* The fontsize to use for the headline that is placed on each page of the TOC.
*/
private static final int TOC_TITLE_FONTSIZE = 18;
/**
* The font to use for the TOC entries.
*/
private static final String TOC_FONT = "Helvetica";
/**
* The fontsize for the TOC entries.
*/
private static final int TOC_FONTSIZE = 12;
/**
* x-position of the lower-left corner of the TOC fitbox.
*/
private static final int TOC_LLX = 110;
/**
* y-position of the lower-left corner of the TOC fitbox.
*/
private static final int TOC_LLY = 100;
/**
* x-position of the upper-right corner of the TOC fitbox.
*/
private static final int TOC_URX = 450;
/**
* y-position of the upper-right corner of the TOC fitbox.
*/
private static final int TOC_URY = 700;
/**
* Lower-left y-position for the TOC headline.
*/
private static final int TOC_TITLE_LLY = 740;
/**
* The prefix for a destination name.
*/
private static final String TOC_DESTINATION_PREFIX = "tmx";
/**
* The text flow (including options) for the TOC contents.
*/
private StringBuffer tocTextflow = new StringBuffer();
/**
* The current destination number. Used to generate unique destination names.
*/
private int destNumber = 0;
/**
* Import the current page from the PDI import document and place it in the
* ouput document.
*
* @param p the pdflib object
* @param pdiHandle the PDI handle for the input document
* @param pageno the current page number
*
* @throws PDFlibException an error occurred in the PDFlib API
*/
private int put_pdi_page(pdflib p, int pdiHandle, int pageno) throws PDFlibException {
/*
* The page size will be adjusted later to match the size of the input pages
*/
p.begin_page_ext(10, 10, "group content");
int pageHandle = p.open_pdi_page(pdiHandle, pageno, "");
if (pageHandle != -1) {
/* Place the input page and adjust the page size */
p.fit_pdi_page(pageHandle, 0, 0, "adjustpage");
}
return pageHandle;
}
/**
* Tests whether the current character matches the criteria for text that shall
* get a an entry in the TOC. get_char_info must have been called before in
* order to ensure that the TET object contains the information for the current
* character.
*
* @param tet The TET object
* @param doc The TET document handle
* @throws TETException
*/
private boolean font_matches(TET tet, final int doc) throws TETException {
String name = tet.pcos_get_string(doc, "fonts[" + tet.fontid + "]/name");
return name.equals(FONT_NAME) && (Math.abs(tet.fontsize - FONT_SIZE) <= FONT_SIZE_TOLERANCE);
}
/**
* Add text and options to the textflow for the current TOC entry. The options
* take care that the TOC will be properly formattted (do not split TOC entries
* over page boundaries).
*
* @param p The pdflib object
* @param tocText The text of the TOC entry to add
* @param pageno The page number for the TOC entry
* @param ulx x-position of the identified text on the page
* @param uly y-position of the identified text on the page
*
* @throws PDFlibException An error occurred in the PDFlib API
*/
private void add_toc_entry(pdflib p, String tocText, int pageno, double ulx, double uly) throws PDFlibException {
/*
* The same name is used for the matchbox name in the TOC and for the named
* destination that is the target of the "GoTo" action in the TOC.
*/
String destName = get_destination_name(destNumber);
/*
* We need pairwise marks that enclose the text that shall be kept together.
*/
tocTextflow.append("").append(tocText).append("