package com.pdflib.cookbook.tet.tet_and_pdflib; import java.io.PrintStream; import java.io.UnsupportedEncodingException; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import com.pdflib.PDFlibException; import com.pdflib.TET; import com.pdflib.TETException; import com.pdflib.pdflib; /** * Highlight unmapped glyphs: Find all glyphs for which TET could not determine * a Unicode mapping, and make them visible with the "Highlight" annotation. * Note that the counts of unmappable glyphs may include blanks, as blanks may * be undistinguishable from other glyphs for which no Unicode mapping is * available. *
* Required software: TET 5.2 and PDFlib+PDI 8 * (the code can be used with older TET versions if the constants * TET.CT_INSERTED etc. are replaced with the corresponding numerical values) *
* Required data: PDF document
*
*/
class highlight_unmapped_glyphs {
/**
* Common search path for PDI and TET to find the input document.
*/
private static final String DOC_SEARCH_PATH = "../input";
/**
* Global option list. The program expects the "resource" directory parallel to
* the "java" directory.
*/
private static final String GLOBAL_OPTLIST = "searchpath={../resource/cmap ../resource/glyphlist " + DOC_SEARCH_PATH
+ "}";
/**
* Document specific option list.
*/
private static final String DOC_OPTLIST = "";
/**
* Page-specific option list.
*/
private static final String PAGE_OPTLIST = "granularity=page";
/**
* The encoding in which the output is sent to System.out. For running the
* example in a Windows command window, you can set this for example to
* "windows-1252" for getting Latin-1 output.
*/
private static final String OUTPUT_ENCODING = System.getProperty("file.encoding");
/**
* For printing to System.out in the encoding specified via OUTPUT_ENCODING.
*/
private static PrintStream out;
/**
* The name of the input file
*/
private String infilename;
/**
* The name of the output file
*/
private String outfilename;
/**
* Nudge factor for ascender height of the annotations (relative to the font
* size)
*/
private static final double ASCENDER = 0.85;
/**
* Nudge factor for descender height of annotations (relative to the font size)
*/
private static final double DESCENDER = 0.25;
/**
* Counter for unmapped glyphs in document.
*/
private int unmappedGlyphs = 0;
/**
* Import the current page from the PDI import document and place it in the
* ouput document.
*
* @param p the pdflib object
* @param pdiHandle the PDI handle for the input document
* @param pageno the current page number
*
* @throws PDFlibException an error occurred in the PDFlib API
*/
private boolean importPdiPage(pdflib p, int pdiHandle, int pageno) throws PDFlibException {
/*
* The page size will be adjusted later to match the size of the input pages
*/
p.begin_page_ext(10, 10, "");
int pdiPage = p.open_pdi_page(pdiHandle, pageno, "");
if (pdiPage == -1) {
System.err.println("Error: " + p.get_errmsg());
return false;
}
/* Place the input page and adjust the page size */
p.fit_pdi_page(pdiPage, 0, 0, "adjustpage");
p.close_pdi_page(pdiPage);
return true;
}
/**
* Helper class to store rectangle data.
*/
private class rectangle {
rectangle(int unmappedGlyphs, double llx, double lly, double urx, double ury) {
this.llx = llx;
this.lly = lly;
this.urx = urx;
this.ury = ury;
this.unmappedGlyphs = unmappedGlyphs;
}
double llx;
double lly;
double urx;
double ury;
int unmappedGlyphs = 0;
}
/**
* Get the font name for the pCOS id of a font
*
* @param tet The TET object
* @param doc The TET document handle
* @param pcosId The pCOS id of the font to check
* @return The name of the font
* @throws TETException An error occurred in the TET API
*/
private String getFontName(TET tet, int doc, int pcosId) throws TETException {
String fontName = tet.pcos_get_string(doc, "fonts[" + pcosId + "]/name");
return fontName;
}
/**
* Create annotations for a given list of rectangles.
*
* @param tet The TET object
* @param doc The TET handle
* @param p The pdflib object
* @param rectangles The list of rectangles
* @throws TETException An error occurred in the TET API
* @throws PDFlibException An error occurred in the PDFlib API
*/
private void create_annotations(TET tet, final int doc, pdflib p, List
* The code assumes that all glyphs of a sequence of unmapped glyphs have the
* same font. It also ignores any angle of the text.
*
* @param tet TET object
* @param doc TET document handle
* @param p pdflib object
* @param pdiHandle PDI document handle
* @param pageno The current page number
* @throws TETException An error occurred in the TET API
* @throws PDFlibException An error occurred in the PDFlib API
*/
private void process_page(TET tet, final int doc, pdflib p, int pdiHandle, int pageno)
throws TETException, PDFlibException {
/*
* Copy page from input document to output document.
*/
importPdiPage(p, pdiHandle, pageno);
final int page = tet.open_page(doc, pageno, PAGE_OPTLIST);
if (page == -1) {
System.err.println("Error " + tet.get_errnum() + " in " + tet.get_apiname() + "(): " + tet.get_errmsg());
} else {
/* Retrieve all text fragments for the page */
for (String text = tet.get_text(page); text != null; text = tet.get_text(page)) {
/*
* List for collecting the rectangles that belong to an instance of the search
* term
*/
List