Index: src/main/java/com/itextpdf/text/pdf/ArrayBasedStringTokenizer.java
===================================================================
--- src/main/java/com/itextpdf/text/pdf/ArrayBasedStringTokenizer.java (revision 0)
+++ src/main/java/com/itextpdf/text/pdf/ArrayBasedStringTokenizer.java (working copy)
@@ -0,0 +1,75 @@
+package com.itextpdf.text.pdf;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Tokenizes the given text based on a given array of Strings. On assembling the output array,
+ * you should be able to get back the original text.
+ *
+ * @author Palash Ray
+ *
+ */
+public class ArrayBasedStringTokenizer {
+
+ private final Pattern regex;
+
+ public ArrayBasedStringTokenizer(String[] tokens) {
+ this.regex = Pattern.compile(getRegexFromTokens(tokens));
+ }
+
+ public String[] tokenize(String text) {
+
+ List tokens = new ArrayList();
+
+ Matcher matcher = regex.matcher(text);
+
+ int endIndexOfpreviousMatch = 0;
+
+ while (matcher.find()) {
+
+ int startIndexOfMatch = matcher.start();
+
+ String previousToken = text.substring(endIndexOfpreviousMatch, startIndexOfMatch);
+
+ if (previousToken.length() > 0) {
+ tokens.add(previousToken);
+ }
+
+ String currentMatch = matcher.group();
+
+// System.out.println("currentMatch=" + currentMatch);
+
+ tokens.add(currentMatch);
+
+ endIndexOfpreviousMatch = matcher.end();
+
+ }
+
+ String tail = text.substring(endIndexOfpreviousMatch, text.length());
+
+ if (tail.length() > 0) {
+ tokens.add(tail);
+ }
+
+ return tokens.toArray(new String[0]);
+
+ }
+
+ private String getRegexFromTokens(String[] tokens) {
+ StringBuilder regexBuilder = new StringBuilder(100);
+
+ for (String token : tokens) {
+ regexBuilder.append("(").append(token).append(")|");
+ }
+
+ regexBuilder.setLength(regexBuilder.length() - 1);
+
+ String regex = regexBuilder.toString();
+
+ return regex;
+ }
+
+}
Index: src/main/java/com/itextpdf/text/pdf/FontDetails.java
===================================================================
--- src/main/java/com/itextpdf/text/pdf/FontDetails.java (revision 5617)
+++ src/main/java/com/itextpdf/text/pdf/FontDetails.java (working copy)
@@ -44,10 +44,16 @@
package com.itextpdf.text.pdf;
import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
import com.itextpdf.text.ExceptionConverter;
import com.itextpdf.text.Utilities;
+import com.itextpdf.text.pdf.languages.IndicCompositeCharacterComparator;
/**
* Each font in the document will have an instance of this class
@@ -207,43 +213,98 @@
}
case BaseFont.FONT_TYPE_TTUNI: {
try {
- int len = text.length();
- int metrics[] = null;
- char glyph[] = new char[len];
- int i = 0;
- if (symbolic) {
- b = PdfEncodings.convertToBytes(text, "symboltt");
- len = b.length;
- for (int k = 0; k < len; ++k) {
- metrics = ttu.getMetricsTT(b[k] & 0xff);
- if (metrics == null)
- continue;
- longTag.put(Integer.valueOf(metrics[0]), new int[]{metrics[0], metrics[1], ttu.getUnicodeDifferences(b[k] & 0xff)});
- glyph[i++] = (char)metrics[0];
+
+ Map glyphSubstitutionMap = ttu.getGlyphSubstitutionMap();
+
+ if (glyphSubstitutionMap == null) {
+
+ int len = text.length();
+ int metrics[] = null;
+ char glyph[] = new char[len];
+ int i = 0;
+ if (symbolic) {
+ b = PdfEncodings.convertToBytes(text, "symboltt");
+ len = b.length;
+ for (int k = 0; k < len; ++k) {
+ metrics = ttu.getMetricsTT(b[k] & 0xff);
+ if (metrics == null)
+ continue;
+ longTag.put(Integer.valueOf(metrics[0]), new int[]{metrics[0], metrics[1], ttu.getUnicodeDifferences(b[k] & 0xff)});
+ glyph[i++] = (char)metrics[0];
+ }
}
- }
- else {
- for (int k = 0; k < len; ++k) {
- int val;
- if (Utilities.isSurrogatePair(text, k)) {
- val = Utilities.convertToUtf32(text, k);
- k++;
- }
- else {
- val = text.charAt(k);
- }
- metrics = ttu.getMetricsTT(val);
- if (metrics == null)
- continue;
- int m0 = metrics[0];
- Integer gl = Integer.valueOf(m0);
- if (!longTag.containsKey(gl))
- longTag.put(gl, new int[]{m0, metrics[1], val});
- glyph[i++] = (char)m0;
- }
+ else {
+ for (int k = 0; k < len; ++k) {
+ int val;
+ if (Utilities.isSurrogatePair(text, k)) {
+ val = Utilities.convertToUtf32(text, k);
+ k++;
+ }
+ else {
+ val = text.charAt(k);
+ }
+ metrics = ttu.getMetricsTT(val);
+ if (metrics == null)
+ continue;
+ int m0 = metrics[0];
+ Integer gl = Integer.valueOf(m0);
+ if (!longTag.containsKey(gl))
+ longTag.put(gl, new int[]{m0, metrics[1], val});
+ glyph[i++] = (char)m0;
+ }
+ }
+ String s = new String(glyph, 0, i);
+ b = s.getBytes(CJKFont.CJK_ENCODING);
+
+ } else {
+ // generate a regex from the characters to be substituted
+
+ // for Indic languages: push back the CompositeCharacters with smaller length
+ Set compositeCharacters = new TreeSet(new IndicCompositeCharacterComparator());
+ compositeCharacters.addAll(glyphSubstitutionMap.keySet());
+
+ // convert the text to a list of Glyph, also take care of the substitution
+ ArrayBasedStringTokenizer tokenizer = new ArrayBasedStringTokenizer(compositeCharacters.toArray(new String[0]));
+ String[] tokens = tokenizer.tokenize(text);
+
+ List glyphList = new ArrayList(50);
+
+ for (String token : tokens) {
+
+ // first check whether this is in the substitution map
+ Glyph subsGlyph = glyphSubstitutionMap.get(token);
+
+ if (subsGlyph != null) {
+ glyphList.add(subsGlyph);
+ } else {
+ // break up the string into individual characters
+ for (char c : token.toCharArray()) {
+ int[] metrics = ttu.getMetricsTT(c);
+ int glyphCode = metrics[0];
+ int glyphWidth = metrics[1];
+ glyphList.add(new Glyph(glyphCode, glyphWidth, String.valueOf(c)));
+ }
+ }
+
+ }
+
+ char[] charEncodedGlyphCodes = new char[glyphList.size()];
+
+ // process each Glyph thus obtained
+ for (int i = 0; i < glyphList.size(); i++) {
+ Glyph glyph = glyphList.get(i);
+ charEncodedGlyphCodes[i] = (char) glyph.code;
+ Integer glyphCode = Integer.valueOf(glyph.code);
+
+ if (!longTag.containsKey(glyphCode)) {
+ // FIXME: this is buggy as the 3rd arg. should be a String as a Glyph can represent more than 1 char
+ longTag.put(glyphCode, new int[]{glyph.code, glyph.width, glyph.chars.charAt(0)});
+ }
+ }
+
+ b = new String(charEncodedGlyphCodes).getBytes(CJKFont.CJK_ENCODING);
+
}
- String s = new String(glyph, 0, i);
- b = s.getBytes(CJKFont.CJK_ENCODING);
}
catch (UnsupportedEncodingException e) {
throw new ExceptionConverter(e);
Index: src/main/java/com/itextpdf/text/pdf/Glyph.java
===================================================================
--- src/main/java/com/itextpdf/text/pdf/Glyph.java (revision 0)
+++ src/main/java/com/itextpdf/text/pdf/Glyph.java (working copy)
@@ -0,0 +1,68 @@
+package com.itextpdf.text.pdf;
+
+/**
+ *
+ * @author Palash Ray
+ */
+public class Glyph {
+
+ /**
+ * The code or id by which this is represented in the Font File
+ */
+ public final int code;
+
+ /**
+ * The normalized width of this Glyph.
+ */
+ public final int width;
+
+ /**
+ * The Unicode text represented by this Glyph
+ */
+ public final String chars;
+
+ public Glyph(int code, int width, String chars) {
+ this.code = code;
+ this.width = width;
+ this.chars = chars;
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((chars == null) ? 0 : chars.hashCode());
+ result = prime * result + code;
+ result = prime * result + width;
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ Glyph other = (Glyph) obj;
+ if (chars == null) {
+ if (other.chars != null)
+ return false;
+ } else if (!chars.equals(other.chars))
+ return false;
+ if (code != other.code)
+ return false;
+ if (width != other.width)
+ return false;
+ return true;
+ }
+
+
+
+ @Override
+ public String toString() {
+ return Glyph.class.getSimpleName() + " [id=" + code + ", width=" + width + ", chars=" + chars + "]";
+ }
+
+}
Index: src/main/java/com/itextpdf/text/pdf/GlyphSubstitutionTableReader.java
===================================================================
--- src/main/java/com/itextpdf/text/pdf/GlyphSubstitutionTableReader.java (revision 0)
+++ src/main/java/com/itextpdf/text/pdf/GlyphSubstitutionTableReader.java (working copy)
@@ -0,0 +1,372 @@
+package com.itextpdf.text.pdf;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import com.itextpdf.text.io.RandomAccessSourceFactory;
+import com.itextpdf.text.log.Logger;
+import com.itextpdf.text.log.LoggerFactory;
+
+/**
+ *
+ * Parses an OpenTypeFont file and reads the Glyph Substitution Table. This table governs how two or more Glyphs should be merged
+ * to a single Glyph. This is especially useful for Asian languages like Bangla, Hindi, etc.
+ *
+ *
+ * This has been written according to the OPenTypeFont specifications. This may be found here.
+ *
+ *
+ * @author Palash Ray
+ */
+public class GlyphSubstitutionTableReader {
+
+ private static final Logger LOG = LoggerFactory.getLogger(GlyphSubstitutionTableReader.class);
+
+ private final RandomAccessFileOrArray rf;
+ private final int gsubTableLocation;
+ private final int[] glyphWidthByIndex;
+ private final Map glyphToCharacterMap;
+ private Map> rawLigatureSubstitutionMap;
+
+ public GlyphSubstitutionTableReader(String fontFilePath, int gsubTableLocation, Map glyphToCharacterMap, int[] glyphWidthByIndex) throws IOException {
+ rf = new RandomAccessFileOrArray(new RandomAccessSourceFactory().createBestSource(fontFilePath));
+ this.gsubTableLocation = gsubTableLocation;
+ this.glyphWidthByIndex = glyphWidthByIndex;
+ this.glyphToCharacterMap = glyphToCharacterMap;
+ }
+
+ public Map getGlyphSubstitutionMap() throws IOException {
+
+ readGsubTable(gsubTableLocation);
+
+ Map glyphSubstitutionMap = new HashMap();
+
+ for (Integer glyphIdToReplace : rawLigatureSubstitutionMap.keySet()) {
+ List constituentGlyphs = rawLigatureSubstitutionMap.get(glyphIdToReplace);
+ StringBuilder chars = new StringBuilder(constituentGlyphs.size());
+
+ for (Integer constituentGlyphId : constituentGlyphs) {
+ chars.append(getTextFromGlyph(constituentGlyphId, glyphToCharacterMap));
+ }
+
+ Glyph glyph = new Glyph(glyphIdToReplace, glyphWidthByIndex[glyphIdToReplace], chars.toString());
+
+ glyphSubstitutionMap.put(glyph.chars, glyph);
+ }
+
+ return Collections.unmodifiableMap(glyphSubstitutionMap);
+
+ }
+
+ private String getTextFromGlyph(int glyphId, Map glyphToCharacterMap) {
+
+ StringBuilder chars = new StringBuilder(1);
+
+ Character c = glyphToCharacterMap.get(glyphId);
+
+ if (c == null) {
+ // it means this represents a compound glyph
+ List constituentGlyphs = rawLigatureSubstitutionMap.get(glyphId);
+
+ if (constituentGlyphs == null || constituentGlyphs.isEmpty()) {
+ throw new IllegalArgumentException("No corresponding character or simple glyphs found for GlyphID=" + glyphId);
+ }
+
+ for (int constituentGlyphId : constituentGlyphs) {
+ chars.append(getTextFromGlyph(constituentGlyphId, glyphToCharacterMap));
+ }
+
+ } else {
+ chars.append(c.charValue());
+ }
+
+ return chars.toString();
+ }
+
+ private void readGsubTable(int gsubTableLocation) throws IOException {
+
+ rawLigatureSubstitutionMap = new HashMap>();
+
+ rf.seek(gsubTableLocation);
+ // 32 bit signed
+ int version = rf.readInt();
+ // 16 bit unsigned
+ int scriptListOffset = rf.readUnsignedShort();
+ int featureListOffset = rf.readUnsignedShort();
+ int lookupListOffset = rf.readUnsignedShort();
+
+ LOG.debug("version=" + version);
+ LOG.debug("scriptListOffset=" + scriptListOffset);
+ LOG.debug("featureListOffset=" + featureListOffset);
+ LOG.debug("lookupListOffset=" + lookupListOffset);
+
+ LOG.debug("************************************");
+
+ readLookupListTable(gsubTableLocation + lookupListOffset);
+
+ LOG.debug("************************************");
+
+ // Map scriptRecords =
+ // readScriptListTable(gsubTableLocationOffset + scriptListOffset);
+ //
+ // // read the Script tables
+ // for (String scriptName : scriptRecords.keySet()) {
+ // readScriptTable(scriptRecords.get(scriptName));
+ // }
+
+ }
+
+ private void readLookupListTable(int lookupListTableLocation) throws IOException {
+ rf.seek(lookupListTableLocation);
+ final int lookupCount = rf.readShort();
+ LOG.debug("lookupCount=" + lookupCount);
+
+ List lookupTableOffsets = new ArrayList();
+
+ for (int i = 0; i < lookupCount; i++) {
+ int lookupTableOffset = rf.readShort();
+ lookupTableOffsets.add(lookupTableOffset);
+ }
+
+ for (int lookupTableOffset : lookupTableOffsets) {
+ LOG.debug("lookupTableOffset=" + lookupTableOffset);
+ LOG.debug("--------------------");
+ readLookupTable(lookupListTableLocation + lookupTableOffset);
+ LOG.debug("--------------------");
+ }
+
+ }
+
+ private void readLookupTable(int lookupTableLocation) throws IOException {
+ rf.seek(lookupTableLocation);
+ int lookupType = rf.readShort();
+ LOG.debug("lookupType=" + lookupType);
+
+ if (lookupType == 1) {// LookupType 1: Single Substitution Subtable
+
+ int coverage = rf.readShort();
+ LOG.debug("coverage=" + coverage);
+
+ int deltaGlyphID = rf.readShort();
+ LOG.debug("deltaGlyphID=" + deltaGlyphID);
+
+ List coverageGlyphIds = readCoverageFormat(lookupTableLocation + coverage);
+
+ for (int coverageGlyphId : coverageGlyphIds) {
+ int substituteGlyphId = coverageGlyphId + deltaGlyphID;
+ rawLigatureSubstitutionMap.put(substituteGlyphId, Arrays.asList(coverageGlyphId));
+ }
+
+ } else if (lookupType == 4) {// LookupType4: Ligature Substitution Subtable
+
+ int lookupFlag = rf.readShort();
+ LOG.debug("lookupFlag=" + lookupFlag);
+ int subTableCount = rf.readShort();
+ LOG.debug("subTableCount=" + subTableCount);
+
+ List subTableOffsets = new ArrayList();
+
+ for (int i = 0; i < subTableCount; i++) {
+ int subTableOffset = rf.readShort();
+ subTableOffsets.add(subTableOffset);
+ }
+
+ for (int subTableOffset : subTableOffsets) {
+ LOG.debug("subTableOffset=" + subTableOffset);
+ LOG.debug("^^^^^^^^^^^^^");
+ readLigatureSubstitutionSubtable(lookupTableLocation + subTableOffset);
+ LOG.debug("^^^^^^^^^^^^^");
+ }
+ } else {
+ System.err.println("The lookup type " + lookupType + " is not yet handled");
+ }
+
+ }
+
+ private void readLigatureSubstitutionSubtable(int ligatureSubstitutionSubtableLocation) throws IOException {
+ rf.seek(ligatureSubstitutionSubtableLocation);
+ int substFormat = rf.readShort();
+ LOG.debug("substFormat=" + substFormat);
+
+ if (substFormat != 1) {
+ throw new IllegalArgumentException("The expected SubstFormat is 1");
+ }
+
+ int coverage = rf.readShort();
+ LOG.debug("coverage=" + coverage);
+
+ int ligSetCount = rf.readShort();
+ LOG.debug("^^^^^^^^^^^^^^^^^^^^^^^^^^^^ligSetCount=" + ligSetCount);
+
+ List ligatureOffsets = new ArrayList(ligSetCount);
+
+ for (int i = 0; i < ligSetCount; i++) {
+ int ligatureOffset = rf.readShort();
+ ligatureOffsets.add(ligatureOffset);
+ }
+
+ LOG.debug("::::::::::::::::::::::::::::::::::");
+
+ List coverageGlyphIds = readCoverageFormat(ligatureSubstitutionSubtableLocation + coverage);
+
+ if (ligSetCount != coverageGlyphIds.size()) {
+ throw new IllegalArgumentException("According to the OpenTypeFont specifications, the coverage count should be equal to the no. of LigatureSetTables");
+ }
+
+ for (int i = 0; i < ligSetCount; i++) {
+
+ int coverageGlyphId = coverageGlyphIds.get(i);
+ int ligatureOffset = ligatureOffsets.get(i);
+ LOG.debug("ligatureOffset=" + ligatureOffset);
+ readLigatureSetTable(ligatureSubstitutionSubtableLocation + ligatureOffset, coverageGlyphId);
+ }
+
+ }
+
+ private void readLigatureSetTable(int ligatureSetTableLocation, int coverageGlyphId) throws IOException {
+ rf.seek(ligatureSetTableLocation);
+ int ligatureCount = rf.readShort();
+ LOG.debug("ligatureCount=" + ligatureCount);
+
+ List ligatureOffsets = new ArrayList(ligatureCount);
+
+ for (int i = 0; i < ligatureCount; i++) {
+ int ligatureOffset = rf.readShort();
+ ligatureOffsets.add(ligatureOffset);
+ }
+
+ for (int ligatureOffset : ligatureOffsets) {
+ readLigatureTable(ligatureSetTableLocation + ligatureOffset, coverageGlyphId);
+ }
+ }
+
+ private void readLigatureTable(int ligatureTableLocation, int coverageGlyphId) throws IOException {
+ rf.seek(ligatureTableLocation);
+ int ligGlyph = rf.readShort();
+ LOG.debug("@@@@@@@@@@@@@@ ligGlyph=" + ligGlyph);
+ int compCount = rf.readShort();
+
+ List glyphIdList = new ArrayList();
+
+ glyphIdList.add(coverageGlyphId);
+
+ for (int i = 0; i < compCount - 1; i++) {
+ int glyphId = rf.readShort();
+ glyphIdList.add(glyphId);
+ LOG.debug("############################glyphId=" + glyphId);
+ }
+
+ rawLigatureSubstitutionMap.put(ligGlyph, glyphIdList);
+ }
+
+ private List readCoverageFormat(int coverageLocation) throws IOException {
+ rf.seek(coverageLocation);
+ int coverageFormat = rf.readShort();
+
+ List glyphIds;
+
+ if (coverageFormat == 1) {
+ int glyphCount = rf.readShort();
+
+ LOG.debug("^^^^^^^^^coverageCount=" + glyphCount);
+
+ glyphIds = new ArrayList(glyphCount);
+
+ for (int i = 0; i < glyphCount; i++) {
+ int coverageGlyphId = rf.readShort();
+ LOG.debug("############################coverageGlyphId=" + coverageGlyphId);
+ glyphIds.add(coverageGlyphId);
+ }
+
+ } else if (coverageFormat == 2) {
+
+ int rangeCount = rf.readShort();
+
+ LOG.debug("rangeCount=" + rangeCount);
+
+ glyphIds = new ArrayList();
+
+ for (int i = 0; i < rangeCount; i++) {
+ readRangeRecord(glyphIds);
+ }
+
+ } else {
+ throw new UnsupportedOperationException("The coverage format " + coverageFormat + " is not yet supported");
+ }
+
+ return Collections.unmodifiableList(glyphIds);
+ }
+
+ private void readRangeRecord(List glyphIds) throws IOException {
+ int startGlyphId = rf.readShort();
+ LOG.debug("startGlyphId=" + startGlyphId);
+ int endGlyphId = rf.readShort();
+ LOG.debug("endGlyphId=" + endGlyphId);
+ int startCoverageIndex = rf.readShort();
+ LOG.debug("startCoverageIndex=" + startCoverageIndex);
+
+ for (int glyphId = startGlyphId; glyphId <= endGlyphId; glyphId++) {
+ glyphIds.add(glyphId);
+ }
+
+ }
+
+ // private Map readScriptListTable(final int
+ // scriptListTableLocationOffset) throws IOException {
+ // rf.seek(scriptListTableLocationOffset);
+ // // Number of ScriptRecords
+ // int scriptCount = rf.readShort();
+ //
+ // Map scriptRecords = new HashMap(scriptCount);
+ //
+ // LOG.debug("scriptCount=" + scriptCount);
+ //
+ // for (int scriptRecord = 1; scriptRecord <= scriptCount; scriptRecord++) {
+ // readScriptRecord(scriptListTableLocationOffset, scriptRecords);
+ // }
+ //
+ // return scriptRecords;
+ //
+ // }
+ //
+ // private void readScriptRecord(final int scriptListTableLocationOffset,
+ // Map scriptRecords) throws IOException {
+ // String scriptTag = readStandardString(4);
+ // LOG.debug("scriptTag=" + scriptTag);
+ //
+ // int scriptOffset = rf.readShort();
+ // LOG.debug("scriptOffset=" + scriptOffset);
+ //
+ // scriptRecords.put(scriptTag, scriptListTableLocationOffset +
+ // scriptOffset);
+ //
+ // }
+ //
+ // private void readScriptTable(final int scriptTableLocationOffset) throws
+ // IOException {
+ // rf.seek(scriptTableLocationOffset);
+ // int defaultLangSys = rf.readShort();
+ // LOG.debug("defaultLangSys=" + defaultLangSys);
+ // int langSysCount = rf.readShort();
+ // LOG.debug("langSysCount=" + langSysCount);
+ //
+ // for (int langSysRecord = 1; langSysRecord <= langSysCount;
+ // langSysRecord++) {
+ // readLangSysRecord();
+ // }
+ // }
+ //
+ // private void readLangSysRecord() throws IOException {
+ // String langSysTag = readStandardString(4);
+ // LOG.debug("langSysTag=" + langSysTag);
+ // int langSys = rf.readShort();
+ // LOG.debug("langSys=" + langSys);
+ // }
+
+}
Index: src/main/java/com/itextpdf/text/pdf/TrueTypeFont.java
===================================================================
--- src/main/java/com/itextpdf/text/pdf/TrueTypeFont.java (revision 5617)
+++ src/main/java/com/itextpdf/text/pdf/TrueTypeFont.java (working copy)
@@ -190,6 +190,8 @@
protected HashMap cmap31;
protected HashMap cmapExt;
+
+ private Map glyphSubstitutionMap;
/** The map containing the kerning information. It represents the content of
* table 'kern'. The key is an Integer
where the top 16 bits
@@ -667,6 +669,27 @@
readCMaps();
readKerning();
readBbox();
+
+ /////////////////////////////////////////////
+
+ if (tables.get("GSUB") != null) {
+
+ Map glyphToCharacterMap = new HashMap(cmap31.size());
+
+ for (Integer charCode : cmap31.keySet()) {
+ char c = (char) charCode.intValue();
+ int glyphCode = cmap31.get(charCode)[0];
+ glyphToCharacterMap.put(glyphCode, c);
+ }
+
+ GlyphSubstitutionTableReader openTypeFontReader = new GlyphSubstitutionTableReader(fileName, tables.get("GSUB")[0], glyphToCharacterMap, GlyphWidths);
+
+ glyphSubstitutionMap = openTypeFontReader.getGlyphSubstitutionMap();
+
+ }
+
+ ////////////////////////////////////////////
+
GlyphWidths = null;
}
}
@@ -1572,4 +1595,9 @@
return null;
return bboxes[metric[0]];
}
+
+ protected Map getGlyphSubstitutionMap() {
+ return glyphSubstitutionMap;
+ }
+
}
Index: src/main/java/com/itextpdf/text/pdf/languages/IndicCompositeCharacterComparator.java
===================================================================
--- src/main/java/com/itextpdf/text/pdf/languages/IndicCompositeCharacterComparator.java (revision 0)
+++ src/main/java/com/itextpdf/text/pdf/languages/IndicCompositeCharacterComparator.java (working copy)
@@ -0,0 +1,39 @@
+package com.itextpdf.text.pdf.languages;
+
+import java.util.Comparator;
+
+/**
+ *
+ * This works on CompositeCharcaters or Juktakshar-s of Indian languages like Bangla, Hindi, etc. CompositeCharcters
+ * are single glyphs consisting of more than one characters.
+ *
+ *
+ * This class works on these CompositeCharacters and places the Strings having higher number
+ * of Characters before the one with lower no. This is necessay to properly display the CompositeCharacters
+ * when they occur side by side.
+ *
+ *
+ *
Examples of CompositeCharactes from Bangla
+ *
+ * - ঙ্গ
+ * - ঙ্
+ * - ক্ষ্ম
+ * - ক্ষ
+ *
+ *
+ *
+ * @author Palash Ray
+ */
+public class IndicCompositeCharacterComparator implements Comparator {
+
+ public int compare(String o1, String o2) {
+ if (o2.length() > o1.length()) {
+ return 1;
+ } else if (o1.length() > o2.length()) {
+ return -1;
+ } else {
+ return o1.compareTo(o2);
+ }
+ }
+
+}
Index: src/test/java/com/itextpdf/text/pdf/ArrayBasedStringTokenizerTest.java
===================================================================
--- src/test/java/com/itextpdf/text/pdf/ArrayBasedStringTokenizerTest.java (revision 0)
+++ src/test/java/com/itextpdf/text/pdf/ArrayBasedStringTokenizerTest.java (working copy)
@@ -0,0 +1,102 @@
+package com.itextpdf.text.pdf;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.Test;
+
+/**
+ *
+ * @author Palash Ray
+ */
+public class ArrayBasedStringTokenizerTest {
+
+ @Test
+ public void testTokenize_happyPath() {
+ //given
+ ArrayBasedStringTokenizer tokenizer = new ArrayBasedStringTokenizer(new String[]{"aZx", "b2c", "bc2"});
+ String text = "12345aZxxabbccb2cxxxcfb1245678bc2889000";
+
+ //when
+ List tokens = Arrays.asList(tokenizer.tokenize(text));
+
+ //then
+ StringBuilder sb = new StringBuilder();
+ for (String token : tokens) {
+ sb.append(token);
+ }
+
+ assertEquals(text, sb.toString());
+ assertEquals(tokens, Arrays.asList("12345", "aZx", "xabbcc", "b2c", "xxxcfb1245678", "bc2", "889000"));
+ }
+
+ @Test
+ public void testTokenize_regexAtStart() {
+ //given
+ ArrayBasedStringTokenizer tokenizer = new ArrayBasedStringTokenizer(new String[]{"aZx", "b2c", "bc2"});
+ String text = "bc2e12345aZxxabbccb2cxxxcfb1245678bc2889000";
+
+ //when
+ String[] tokens = tokenizer.tokenize(text);
+
+ //then
+ StringBuilder sb = new StringBuilder();
+ for (String token : tokens) {
+ sb.append(token);
+ }
+
+ assertEquals(text, sb.toString());
+
+ List tokenList = Arrays.asList(tokens);
+
+ assertEquals(0, tokenList.indexOf("bc2"));
+ }
+
+ @Test
+ public void testTokenize_regexAtEnd() {
+ //given
+ ArrayBasedStringTokenizer tokenizer = new ArrayBasedStringTokenizer(new String[]{"aZx", "b2c", "bc2"});
+ String text = "bc2e12345aZxxabbccb2cxxxcfb1245678bc2889000aZx";
+
+ //when
+ List tokens = Arrays.asList(tokenizer.tokenize(text));
+
+ //then
+ StringBuilder sb = new StringBuilder();
+ for (String token : tokens) {
+ sb.append(token);
+ }
+
+ assertEquals(text, sb.toString());
+ assertEquals(0, tokens.indexOf("bc2"));
+ assertEquals(2, tokens.indexOf("aZx"));
+ assertEquals(tokens.size() - 1, tokens.lastIndexOf("aZx"));
+ }
+
+ @Test
+ public void testTokenize_Bangla() {
+ //given
+ ArrayBasedStringTokenizer tokenizer = new ArrayBasedStringTokenizer(new String[]{"\u0995\u09cd\u09b7", "পু"});
+ String text = "আমি কোন পথে ক্ষীরের ষন্ড পুতুল রুপো গঙ্গা ঋষি";
+
+ //when
+ String[] tokens = tokenizer.tokenize(text);
+
+ //then
+ StringBuilder sb = new StringBuilder();
+ for (String token : tokens) {
+ sb.append(token);
+ }
+
+ assertEquals(text, sb.toString());
+
+ List tokenList = Arrays.asList(tokens);
+
+ assertTrue(tokenList.contains("ক্ষ"));
+ assertTrue(tokenList.contains("পু"));
+ }
+
+}