Skip to content

Commit 388f0d7

Browse files
author
obo
committed
feat: add support for Ideographic Variation Sequences (IVS) in TrueType fonts
1 parent ffdb1b9 commit 388f0d7

File tree

3 files changed

+221
-1
lines changed

3 files changed

+221
-1
lines changed

openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
import com.lowagie.text.Utilities;
5555
import java.awt.font.GlyphVector;
5656
import java.io.UnsupportedEncodingException;
57+
import java.util.Arrays;
5758
import java.util.HashMap;
5859

5960
/**
@@ -222,6 +223,70 @@ byte[] convertToBytes(String text, TextRenderingOptions options) {
222223
b = s.getBytes(CJKFont.CJK_ENCODING);
223224

224225
} else {
226+
//ivs font handler,Simply judge whether it is IVS font or not
227+
if (mayContainIVS(text)) {
228+
glyph = new char[len * 2];
229+
for (int k = 0; k < len; ) {
230+
int baseCp;
231+
int charCount;
232+
if (k < len - 1 && Character.isHighSurrogate(text.charAt(k))
233+
&& Character.isLowSurrogate(text.charAt(k + 1))) {
234+
baseCp = Character.toCodePoint(text.charAt(k), text.charAt(k + 1));
235+
charCount = 2;
236+
} else {
237+
baseCp = text.charAt(k);
238+
charCount = 1;
239+
}
240+
241+
int vsCp = -1;
242+
int vsCharCount = 0;
243+
int nextIndex = k + charCount;
244+
245+
if (nextIndex < len) {
246+
int potentialVs = text.charAt(nextIndex);
247+
if (isVariationSelector(potentialVs)) {
248+
vsCp = potentialVs;
249+
vsCharCount = 1;
250+
}
251+
else if (nextIndex < len - 1
252+
&& Character.isHighSurrogate(text.charAt(nextIndex))
253+
&& Character.isLowSurrogate(text.charAt(nextIndex + 1))) {
254+
int potentialVsPair = Character.toCodePoint(
255+
text.charAt(nextIndex), text.charAt(nextIndex + 1));
256+
if (isVariationSelector(potentialVsPair)) {
257+
vsCp = potentialVsPair;
258+
vsCharCount = 2;
259+
}
260+
}
261+
}
262+
263+
if (vsCp != -1) {
264+
int[] format14Metrics = this.ttu.getFormat14MetricsTT(baseCp, vsCp);
265+
if (format14Metrics != null) {
266+
int gl = format14Metrics[0];
267+
if (!this.longTag.containsKey(gl)) {
268+
this.longTag.put(gl, new int[]{gl, format14Metrics[1], baseCp, vsCp});
269+
}
270+
glyph[i++] = (char) gl;
271+
k += charCount + vsCharCount;
272+
continue;
273+
}
274+
}
275+
metrics = this.ttu.getMetricsTT(baseCp);
276+
if (metrics != null) {
277+
int gl = metrics[0];
278+
if (!this.longTag.containsKey(gl)) {
279+
this.longTag.put(gl, new int[]{gl, metrics[1], baseCp});
280+
}
281+
glyph[i++] = (char) gl;
282+
}
283+
284+
k += charCount;
285+
}
286+
glyph = Arrays.copyOfRange(glyph, 0, i);
287+
b = convertCharsToBytes(glyph);
288+
break;
289+
}
225290
String fileName = ((TrueTypeFontUnicode) getBaseFont()).fileName;
226291
if (options.isGlyphSubstitutionEnabled() && FopGlyphProcessor.isFopSupported()
227292
&& (fileName != null && fileName.length() > 0
@@ -241,6 +306,43 @@ byte[] convertToBytes(String text, TextRenderingOptions options) {
241306
return b;
242307
}
243308

309+
private static boolean isVariationSelector(int codePoint) {
310+
return (codePoint >= 0xFE00 && codePoint <= 0xFE0F)
311+
|| (codePoint >= 0xE0100 && codePoint <= 0xE01EF);
312+
}
313+
314+
/**
315+
* Quickly determine whether the text may contain IVS (to decide whether to use the IVS dedicated path)
316+
* Note: This means "may contain," not "must contain"—err on the side of caution to avoid omissions
317+
*/
318+
private static boolean mayContainIVS(String text) {
319+
if (text == null) return false;
320+
321+
for (int i = 0; i < text.length(); i++) {
322+
char c = text.charAt(i);
323+
324+
if (c >= '\uFE00' && c <= '\uFE0F') {
325+
return true;
326+
}
327+
328+
if (c >= '\udb40' && c <= '\udb43') {
329+
return true;
330+
}
331+
}
332+
return false;
333+
}
334+
335+
private byte[] convertCharsToBytes(char[] chars) {
336+
byte[] result = new byte[chars.length * 2];
337+
338+
for (int i = 0; i < chars.length; ++i) {
339+
result[2 * i] = (byte) (chars[i] / 256);
340+
result[2 * i + 1] = (byte) (chars[i] % 256);
341+
}
342+
343+
return result;
344+
}
345+
244346
private byte[] convertToBytesWithGlyphs(String text) throws UnsupportedEncodingException {
245347
int len = text.length();
246348
int[] metrics = null;

openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,8 @@ class TrueTypeFont extends BaseFont {
206206

207207
protected HashMap<Integer, int[]> cmapExt;
208208

209+
protected HashMap<String, int[]> cmap05;
210+
209211
/**
210212
* The map containing the kerning information. It represents the content of table 'kern'. The key is an
211213
* <CODE>Integer</CODE> where the top 16 bits are the glyph number for the first character and the lower 16 bits
@@ -797,6 +799,7 @@ void readCMaps() throws DocumentException, IOException {
797799
int map31 = 0;
798800
int map30 = 0;
799801
int mapExt = 0;
802+
int map05 = 0;
800803
for (int k = 0; k < num_tables; ++k) {
801804
int platId = rf.readUnsignedShort();
802805
int platSpecId = rf.readUnsignedShort();
@@ -808,6 +811,8 @@ void readCMaps() throws DocumentException, IOException {
808811
map31 = offset;
809812
} else if (platId == 3 && platSpecId == 10) {
810813
mapExt = offset;
814+
} else if (platId == 0 && platSpecId == 5) {
815+
map05 = offset;
811816
}
812817
if (platId == 1 && platSpecId == 0) {
813818
map10 = offset;
@@ -860,6 +865,87 @@ void readCMaps() throws DocumentException, IOException {
860865
break;
861866
}
862867
}
868+
if (map05 > 0) {
869+
int format14Location = table_location[0] + map05;
870+
this.rf.seek((long) format14Location);
871+
int format = this.rf.readUnsignedShort();
872+
if (format == 14) {
873+
this.cmap05 = this.readFormat14(format14Location);
874+
}
875+
}
876+
}
877+
878+
HashMap<String, int[]> readFormat14(int format14Location) throws IOException {
879+
HashMap<String, int[]> result = new HashMap<>();
880+
long startPosition = this.rf.getFilePointer() - 4; // 回退到读取 byteLength 之前的位置
881+
882+
int byteLength = this.rf.readInt();
883+
int numVarSelectorRecords = this.rf.readInt();
884+
885+
if (numVarSelectorRecords < 0 || numVarSelectorRecords > 10000) {
886+
throw new IOException("Invalid numVarSelectorRecords: " + numVarSelectorRecords);
887+
}
888+
889+
Map<Integer, Integer> nonDefaultOffsetMap = new HashMap<>();
890+
891+
for (int i = 0; i < numVarSelectorRecords; ++i) {
892+
byte[] input = new byte[3];
893+
this.rf.read(input);
894+
int selectorUnicodeValue = this.byte2int(input, 3);
895+
int defaultUVSOffset = this.rf.readInt();
896+
int nonDefaultUVSOffset = this.rf.readInt();
897+
898+
if (nonDefaultUVSOffset > 0) { // 只处理非零偏移
899+
nonDefaultOffsetMap.put(selectorUnicodeValue, nonDefaultUVSOffset);
900+
}
901+
}
902+
903+
for (Map.Entry<Integer, Integer> entry : nonDefaultOffsetMap.entrySet()) {
904+
Integer selectorUnicodeValue = entry.getKey();
905+
int nonDefaultUVSOffset = entry.getValue();
906+
907+
this.rf.seek((long) (format14Location + nonDefaultUVSOffset));
908+
int mappingNums = this.rf.readInt();
909+
910+
if (mappingNums < 0 || mappingNums > 10000) {
911+
// invalid mapping
912+
continue;
913+
}
914+
915+
for (int i = 0; i < mappingNums; ++i) {
916+
byte[] input = new byte[3];
917+
this.rf.read(input);
918+
int unicodeValue = this.byte2int(input, 3);
919+
int glyphId = this.rf.readUnsignedShort();
920+
result.put(unicodeValue + "_" + selectorUnicodeValue,
921+
new int[]{glyphId, this.getGlyphWidth(glyphId)});
922+
}
923+
}
924+
return result;
925+
}
926+
927+
/**
928+
* 将大端序(Big-Endian)的字节数组转换为无符号整数(最多4字节)
929+
* @param data 输入字节数组
930+
* @param n 读取字节数(1~4)
931+
* @return 对应的非负整数值
932+
*/
933+
public int byte2int(byte[] data, int n) {
934+
if (data == null || n <= 0 || n > 4 || data.length < n) {
935+
return 0;
936+
}
937+
int result = 0;
938+
for (int i = 0; i < n; i++) {
939+
result = (result << 8) | (data[i] & 0xFF); // & 0xFF 确保无符号
940+
}
941+
return result;
942+
}
943+
944+
public int[] getFormat14MetricsTT(int char1, int char2) {
945+
if (this.cmap05 != null) {
946+
return this.cmap05.get(char1 + "_" + char2);
947+
}
948+
return null;
863949
}
864950

865951
HashMap<Integer, int[]> readFormat12() throws IOException {
@@ -1419,6 +1505,9 @@ public int[] getMetricsTT(int c) {
14191505
if (cmap10 != null) {
14201506
return cmap10.get(c);
14211507
}
1508+
if (cmap05 != null) {
1509+
return cmap05.get(c);
1510+
}
14221511
return null;
14231512
}
14241513

openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFontUnicode.java

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,13 @@ private PdfStream getToUnicode(int[][] metrics) {
268268
--size;
269269
int[] metric = metrics[k];
270270
String fromTo = toHex(metric[0]);
271-
buf.append(fromTo).append(fromTo).append(toHex(metric[2])).append('\n');
271+
String hexString;
272+
if (metric.length == 4) {
273+
hexString = toHex(metric[2], metric[3]);
274+
} else {
275+
hexString = toHex(metric[2]);
276+
}
277+
buf.append(fromTo).append(fromTo).append(hexString).append('\n');
272278
}
273279
buf.append(
274280
"endbfrange\n" +
@@ -585,4 +591,27 @@ public int[] getCharBBox(int c) {
585591
return bboxes[m[0]];
586592
}
587593

594+
private String toHex(int char1, int char2) {
595+
String hex1;
596+
int high;
597+
int low;
598+
if (char1 < 65536) {
599+
hex1 = toHex4(char1);
600+
} else {
601+
char1 -= 65536;
602+
high = char1 / 1024 + '\ud800';
603+
low = char1 % 1024 + '\udc00';
604+
hex1 = toHex4(high) + toHex4(low);
605+
}
606+
String hex2;
607+
if (char2 < 65536) {
608+
hex2 = toHex4(char2);
609+
} else {
610+
char2 -= 65536;
611+
high = char2 / 1024 + '\ud800';
612+
low = char2 % 1024 + '\udc00';
613+
hex2 = toHex4(high) + toHex4(low);
614+
}
615+
return "[<" + hex1 + hex2 + ">]";
616+
}
588617
}

0 commit comments

Comments
 (0)