/** * Simple PDF glyph dumper based on PDFlib TET * * Required software: TET 5.2 (if you define the constants TET.CT_SEQ_START * etc. the code can also be used with TET 5.0) *

* Required data: PDF document * */ package com.pdflib.cookbook.tet.text; import java.io.BufferedWriter; import java.io.OutputStreamWriter; import java.io.Writer; import java.util.Formatter; import java.util.Locale; import com.pdflib.TET; import com.pdflib.TETException; public class glyphinfo { /** * Global option list */ static final String globaloptlist = "searchpath={{../input} {../resource/cmap}}"; /** * Document-specific option list */ static final String docoptlist = ""; /** * Page-specific option list */ static final String pageoptlist = "granularity=word"; private static void print_color_value(Formatter formatter, TET tet, int doc, int colorid) throws TETException { String csname; /* color space name */ int i; /* * We handle only the fill color, but ignore the stroke color. The stroke color * can be retrieved analogously with the keyword "stroke". */ tet.get_color_info(doc, colorid, "usage=fill"); if (tet.colorspaceid == -1 && tet.patternid == -1) { formatter.format(" (not filled)"); return; } formatter.format(" ("); if (tet.patternid != -1) { int patterntype = (int) tet.pcos_get_number(doc, "patterns[" + tet.patternid + "]/PatternType"); if (patterntype == 1) /* Tiling pattern */ { int painttype = (int) tet.pcos_get_number(doc, "patterns[" + tet.patternid + "]/PaintType"); if (painttype == 1) { formatter.format("colored Pattern)"); return; } else if (painttype == 2) { formatter.format("uncolored Pattern, base color: "); /* FALLTHROUGH to colorspaceid output */ } } else if (patterntype == 2) /* Shading pattern */ { int shadingtype = (int) tet.pcos_get_number(doc, "patterns[" + tet.patternid + "]/Shading/ShadingType"); formatter.format("shading Pattern, ShadingType=%d)", shadingtype); return; } } csname = tet.pcos_get_string(doc, "colorspaces[" + tet.colorspaceid + "]/name"); formatter.format("%s", csname); /* Emit more details depending on the colorspace type */ if (csname.equals("ICCBased")) { int iccprofileid; String profilename; String profilecs; String errormessage; iccprofileid = (int) tet.pcos_get_number(doc, "colorspaces[" + tet.colorspaceid + "]/iccprofileid"); errormessage = tet.pcos_get_string(doc, "iccprofiles[" + iccprofileid + "]/errormessage"); /* Check whether the embedded profile is damaged */ if (errormessage.equals("")) { formatter.format(" (%s)", errormessage); } else { profilename = tet.pcos_get_string(doc, "iccprofiles[" + iccprofileid + "]/profilename"); formatter.format(" '%s'", profilename); profilecs = tet.pcos_get_string(doc, "iccprofiles[" + iccprofileid + "]/profilecs"); formatter.format(" '%s'", profilecs); } } else if (csname.equals("Separation")) { String colorantname = tet.pcos_get_string(doc, "colorspaces[" + tet.colorspaceid + "]/colorantname"); formatter.format(" '%s'", colorantname); } else if (csname.equals("DeviceN")) { formatter.format(" "); for (i = 0; i < tet.components.length; i++) { String colorantname = tet.pcos_get_string(doc, "colorspaces[" + tet.colorspaceid + "]/colorantnames[" + i + "]"); formatter.format("%s", colorantname); if (i != tet.components.length - 1) formatter.format("/"); } } else if (csname.equals("Indexed")) { int baseid = (int) tet.pcos_get_number(doc, "colorspaces[" + tet.colorspaceid + "]/baseid"); csname = tet.pcos_get_string(doc, "colorspaces[" + baseid + "]/name"); formatter.format(" %s", csname); } formatter.format(" "); for (i = 0; i < tet.components.length; i++) { formatter.format("%g", tet.components[i]); if (i != tet.components.length - 1) formatter.format("/"); } formatter.format(")"); } public static void main(String argv[]) { TET tet = null; try { if (argv.length != 1) { throw new Exception("usage: glyphinfo "); } /* print UTF-8 BOM */ byte[] bom = new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF }; System.out.write(bom); Writer outfp = new BufferedWriter(new OutputStreamWriter(System.out, "UTF-8")); Formatter formatter = new Formatter(outfp, Locale.US); tet = new TET(); tet.set_option(globaloptlist); int doc = tet.open_document(argv[0], docoptlist); if (doc == -1) { formatter.close(); throw new Exception( "Error " + tet.get_errnum() + "in " + tet.get_apiname() + "(): " + tet.get_errmsg()); } /* get number of pages in the document */ int n_pages = (int) tet.pcos_get_number(doc, "length:pages"); /* loop over pages in the document */ for (int pageno = 1; pageno <= n_pages; ++pageno) { String text; int page; int previouscolor = -1; page = tet.open_page(doc, pageno, pageoptlist); if (page == -1) { print_tet_error(tet, pageno); continue; /* try next page */ } /* Administrative information */ formatter.format("\n[ Document: '" + tet.pcos_get_string(doc, "filename") + "' ]\n"); formatter.format("[ Document options: '%s' ]\n", docoptlist); formatter.format("[ Page options: '%s' ]\n", pageoptlist); formatter.format("[ ----- Page %d ----- ]\n", pageno); /* Retrieve all text fragments */ while ((text = tet.get_text(page)) != null) { @SuppressWarnings("unused") int ci; /* print the retrieved text */ outfp.write("[" + text + "]\n"); /* Loop over all glyphs and print their details */ while ((ci = tet.get_char_info(page)) != -1) { final String fontname; /* Fetch the font name with pCOS (based on its ID) */ fontname = tet.pcos_get_string(doc, "fonts[" + tet.fontid + "]/name"); /* Print the Unicode value of the character */ formatter.format("U+%04X", tet.uv); /* ...and its UTF-32 value */ formatter.format(" '%c'", tet.uv); /* Print font name, size, and position */ formatter.format(" %s size=%.2f x=%.2f y=%.2f", fontname, tet.fontsize, tet.x, tet.y); /* Print the color id */ formatter.format(" colorid=%d", tet.colorid); /* Check wheater the text color changed */ if (tet.colorid != previouscolor) { print_color_value(formatter, tet, doc, tet.colorid); previouscolor = tet.colorid; } /* Examine the "type" member */ if (tet.type == TET.CT_SEQ_START) formatter.format(" ligature_start"); else if (tet.type == TET.CT_SEQ_CONT) formatter.format(" ligature_cont"); /* Separators are only inserted for granularity > word*/ else if (tet.type == TET.CT_INSERTED) formatter.format(" inserted"); /* Examine the bit flags in the "attributes" member */ if (tet.attributes != TET.ATTR_NONE) { if ((tet.attributes & TET.ATTR_SUB) != 0) formatter.format("/sub"); if ((tet.attributes & TET.ATTR_SUP) != 0) formatter.format("/sup"); if ((tet.attributes & TET.ATTR_DROPCAP) != 0) formatter.format("/dropcap"); if ((tet.attributes & TET.ATTR_SHADOW) != 0) formatter.format("/shadow"); if ((tet.attributes & TET.ATTR_DEHYPHENATION_PRE) != 0) formatter.format("/dehyphenation_pre"); if ((tet.attributes & TET.ATTR_DEHYPHENATION_ARTIFACT) != 0) formatter.format("/dehyphenation_artifact"); if ((tet.attributes & TET.ATTR_DEHYPHENATION_POST) != 0) formatter.format("/dehyphenation_post"); if ((tet.attributes & TET.ATTR_ARTIFACT) != 0) formatter.format("/Artifact"); } formatter.format("\n"); } formatter.format("\n"); } if (tet.get_errnum() != 0) { print_tet_error(tet, pageno); } tet.close_page(page); } tet.close_document(doc); outfp.close(); } catch (TETException e) { System.err.println("TET exception occurred in glyphinfo sample:"); System.err.println("[" + e.get_errnum() + "] " + e.get_apiname() + ": " + e.get_errmsg()); } catch (Exception e) { System.err.println(e); } finally { if (tet != null) { tet.delete(); } } } /** * Report a TET error. * * @param tet The TET object * @param pageno The page number on which the error occurred */ private static void print_tet_error(TET tet, int pageno) { System.err.println("Error " + tet.get_errnum() + " in " + tet.get_apiname() + "() on page " + pageno + ": " + tet.get_errmsg()); } }