Character sets are limited to GB2312, Shift_JIS, Unicode, and all singlebyte.

Fixed font code points - proper characters display, adding, etc.
This commit is contained in:
Jindra Petřík
2022-11-12 11:03:36 +01:00
parent c266037785
commit 4c6ccf0d63
20 changed files with 21146 additions and 34 deletions

View File

@@ -361,12 +361,12 @@ public class DefineFont2Tag extends FontTag {
@Override
public char glyphToChar(int glyphIndex) {
return (char) (int) codeTable.get(glyphIndex);
return Utf8Helper.codePointToChar(codeTable.get(glyphIndex), getCodesCharset());
}
@Override
public int charToGlyph(char c) {
return codeTable.indexOf((int) c);
return codeTable.indexOf(Utf8Helper.charToCodePoint(c, getCodesCharset()));
}
@Override
@@ -452,8 +452,13 @@ public class DefineFont2Tag extends FontTag {
int fontStyle = getFontStyle();
SHAPE shp = SHAPERECORD.fontCharacterToSHAPE(font, (int) Math.round(getDivider() * 1024), character);
int code = (int) character;
int code = (int) Utf8Helper.charToCodePoint(character, getCodesCharset());
if (code == -1) { //Fixme - throw exception, etc.
code = 0;
}
int pos = -1;
boolean exists = false;
for (int i = 0; i < codeTable.size(); i++) {
@@ -472,7 +477,7 @@ public class DefineFont2Tag extends FontTag {
if (!exists) {
shiftGlyphIndices(fontID, pos, true);
glyphShapeTable.add(pos, shp);
codeTable.add(pos, (int) character);
codeTable.add(pos, code);
} else {
glyphShapeTable.set(pos, shp);
}
@@ -488,25 +493,25 @@ public class DefineFont2Tag extends FontTag {
}
for (int k = 0; k < fontKerningTable.size(); k++) {
if (fontKerningTable.get(k).fontKerningCode1 == character
|| fontKerningTable.get(k).fontKerningCode2 == character) {
if (fontKerningTable.get(k).fontKerningCode1 == code
|| fontKerningTable.get(k).fontKerningCode2 == code) {
fontKerningTable.remove(k);
k--;
}
}
List<FontHelper.KerningPair> kerning = getFontKerningPairs(font, (int) (getDivider() * 1024));
for (FontHelper.KerningPair pair : kerning) {
if (pair.char1 != character && pair.char2 != character) {
if (pair.char1 != code && pair.char2 != code) {
continue;
}
int glyph1 = charToGlyph(pair.char1);
if (pair.char1 == character) {
if (pair.char1 == code) {
} else if (glyph1 == -1) {
continue;
}
int glyph2 = charToGlyph(pair.char2);
if (pair.char2 == character) {
if (pair.char2 == code) {
} else if (glyph2 == -1) {
continue;
@@ -596,7 +601,8 @@ public class DefineFont2Tag extends FontTag {
public String getCharacters() {
StringBuilder ret = new StringBuilder(codeTable.size());
for (int i : codeTable) {
ret.append((char) i);
Character c = Utf8Helper.codePointToChar(i, getCodesCharset());
ret.append(c == null ? "?" : c);
}
return ret.toString();
}
@@ -616,8 +622,10 @@ public class DefineFont2Tag extends FontTag {
@Override
public int getCharKerningAdjustment(char c1, char c2) {
int kerningAdjustment = 0;
int c1Code = Utf8Helper.charToCodePoint(c1, getCodesCharset());
int c2Code = Utf8Helper.charToCodePoint(c2, getCodesCharset());
for (KERNINGRECORD ker : fontKerningTable) {
if (ker.fontKerningCode1 == c1 && ker.fontKerningCode2 == c2) {
if (ker.fontKerningCode1 == c1Code && ker.fontKerningCode2 == c2Code) {
kerningAdjustment = ker.fontKerningAdjustment;
break;
}
@@ -687,4 +695,12 @@ public class DefineFont2Tag extends FontTag {
return hasLayout();
}
@Override
public String getCodesCharset() {
if (fontFlagsShiftJIS) {
return "Shift_JIS";
}
return getCharset();
}
}

View File

@@ -327,12 +327,12 @@ public class DefineFont3Tag extends FontTag {
@Override
public char glyphToChar(int glyphIndex) {
return (char) (int) codeTable.get(glyphIndex);
return Utf8Helper.codePointToChar(codeTable.get(glyphIndex), getCodesCharset());
}
@Override
public int charToGlyph(char c) {
return codeTable.indexOf((int) c);
return codeTable.indexOf(Utf8Helper.charToCodePoint(c, getCodesCharset()));
}
@Override
@@ -444,7 +444,11 @@ public class DefineFont3Tag extends FontTag {
}
int fontStyle = getFontStyle();
SHAPE shp = SHAPERECORD.fontCharacterToSHAPE(font, (int) Math.round(getDivider() * 1024), character);
int code = (int) character;
int code = (int) Utf8Helper.charToCodePoint(character, getCodesCharset());
if (code == -1) { //Fixme - throw exception, etc.
code = 0;
}
int pos = -1;
boolean exists = false;
for (int i = 0; i < codeTable.size(); i++) {
@@ -463,7 +467,7 @@ public class DefineFont3Tag extends FontTag {
if (!exists) {
shiftGlyphIndices(fontID, pos, true);
glyphShapeTable.add(pos, shp);
codeTable.add(pos, (int) character);
codeTable.add(pos, code);
} else {
glyphShapeTable.set(pos, shp);
}
@@ -479,25 +483,25 @@ public class DefineFont3Tag extends FontTag {
}
for (int k = 0; k < fontKerningTable.size(); k++) {
if (fontKerningTable.get(k).fontKerningCode1 == character
|| fontKerningTable.get(k).fontKerningCode2 == character) {
if (fontKerningTable.get(k).fontKerningCode1 == code
|| fontKerningTable.get(k).fontKerningCode2 == code) {
fontKerningTable.remove(k);
k--;
}
}
List<FontHelper.KerningPair> kerning = getFontKerningPairs(font, (int) (getDivider() * 1024));
for (FontHelper.KerningPair pair : kerning) {
if (pair.char1 != character && pair.char2 != character) {
if (pair.char1 != code && pair.char2 != code) {
continue;
}
int glyph1 = charToGlyph(pair.char1);
if (pair.char1 == character) {
if (pair.char1 == code) {
} else if (glyph1 == -1) {
continue;
}
int glyph2 = charToGlyph(pair.char2);
if (pair.char2 == character) {
if (pair.char2 == code) {
} else if (glyph2 == -1) {
continue;
@@ -599,7 +603,8 @@ public class DefineFont3Tag extends FontTag {
public String getCharacters() {
StringBuilder ret = new StringBuilder(codeTable.size());
for (int i : codeTable) {
ret.append((char) i);
Character c = Utf8Helper.codePointToChar(i, getCodesCharset());
ret.append(c == null ? "?" : c);
}
return ret.toString();
}
@@ -629,9 +634,12 @@ public class DefineFont3Tag extends FontTag {
@Override
public int getCharKerningAdjustment(char c1, char c2) {
int c1Code = Utf8Helper.charToCodePoint(c1, getCodesCharset());
int c2Code = Utf8Helper.charToCodePoint(c2, getCodesCharset());
int kerningAdjustment = 0;
for (KERNINGRECORD ker : fontKerningTable) {
if (ker.fontKerningCode1 == c1 && ker.fontKerningCode2 == c2) {
if (ker.fontKerningCode1 == c1Code && ker.fontKerningCode2 == c2Code) {
kerningAdjustment = ker.fontKerningAdjustment;
break;
}
@@ -701,4 +709,12 @@ public class DefineFont3Tag extends FontTag {
public boolean isLeadingEditable() {
return hasLayout();
}
@Override
public String getCodesCharset() {
if (fontFlagsShiftJIS) {
return "Shift_JIS";
}
return getCharset();
}
}

View File

@@ -179,4 +179,14 @@ public class DefineFontInfo2Tag extends FontInfoTag {
public void setFontFlagsItalic(boolean value) {
fontFlagsItalic = value;
}
@Override
public boolean isAnsi() {
return fontFlagsANSI;
}
@Override
public boolean isShiftJIS() {
return fontFlagsShiftJIS;
}
}

View File

@@ -59,7 +59,8 @@ public class DefineFontInfoTag extends FontInfoTag {
public boolean fontFlagsWideCodes;
@SWFType(value = BasicType.UI8, alternateValue = BasicType.UI16, alternateCondition = "fontFlagsWideCodes")
//@SWFType(value = BasicType.UI8, alternateValue = BasicType.UI16, alternateCondition = "fontFlagsWideCodes")
@SWFType(value = BasicType.UI16)
public List<Integer> codeTable;
/**
@@ -179,5 +180,15 @@ public class DefineFontInfoTag extends FontInfoTag {
@Override
public void setFontFlagsItalic(boolean value) {
fontFlagsItalic = value;
}
@Override
public boolean isShiftJIS() {
return fontFlagsShiftJIS;
}
@Override
public boolean isAnsi() {
return fontFlagsANSI;
}
}

View File

@@ -29,9 +29,11 @@ import com.jpexs.decompiler.flash.types.annotations.SWFType;
import com.jpexs.decompiler.flash.types.annotations.SWFVersion;
import com.jpexs.decompiler.flash.types.shaperecords.SHAPERECORD;
import com.jpexs.helpers.ByteArrayRange;
import com.jpexs.helpers.utf8.Utf8Helper;
import java.awt.Font;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
@@ -156,7 +158,7 @@ public class DefineFontTag extends FontTag {
public char glyphToChar(int glyphIndex) {
ensureFontInfo();
if (fontInfoTag != null) {
return (char) (int) fontInfoTag.getCodeTable().get(glyphIndex);
return Utf8Helper.codePointToChar(fontInfoTag.getCodeTable().get(glyphIndex), getCodesCharset());
} else {
return '?';
}
@@ -166,7 +168,7 @@ public class DefineFontTag extends FontTag {
public int charToGlyph(char c) {
ensureFontInfo();
if (fontInfoTag != null) {
return fontInfoTag.getCodeTable().indexOf((int) c);
return fontInfoTag.getCodeTable().indexOf(Utf8Helper.charToCodePoint(c, getCodesCharset()));
}
return -1;
@@ -265,11 +267,25 @@ public class DefineFontTag extends FontTag {
return 1;
}
@Override
public String getCodesCharset() {
if (fontInfoTag != null && fontInfoTag.isShiftJIS()) {
return "Shift_JIS";
}
return getCharset();
}
@Override
public void addCharacter(char character, Font font) {
SHAPE shp = SHAPERECORD.fontCharacterToSHAPE(font, (int) Math.round(getDivider() * 1024), character);
ensureFontInfo();
int code = (int) character;
int code = (int) Utf8Helper.charToCodePoint(character, getCodesCharset());
if (code == -1) { //Fixme - throw exception, etc.
code = 0;
}
int pos = -1;
boolean exists = false;
if (fontInfoTag != null) {
@@ -296,7 +312,7 @@ public class DefineFontTag extends FontTag {
shiftGlyphIndices(fontId, pos, true);
glyphShapeTable.add(pos, shp);
if (fontInfoTag != null) {
fontInfoTag.addFontCharacter(pos, (int) character);
fontInfoTag.addFontCharacter(pos, code);
}
} else {
glyphShapeTable.set(pos, shp);
@@ -362,8 +378,8 @@ public class DefineFontTag extends FontTag {
if (fontInfoTag != null) {
List<Integer> codeTable = fontInfoTag.getCodeTable();
StringBuilder ret = new StringBuilder(codeTable.size());
for (int i : codeTable) {
ret.append((char) i);
for (int i : codeTable) {
ret.append(Utf8Helper.codePointToChar(i, getCodesCharset()));
}
return ret.toString();
}

View File

@@ -62,6 +62,10 @@ public abstract class FontInfoTag extends Tag implements CharacterIdTag {
public abstract void setFontFlagsItalic(boolean value);
public abstract boolean isShiftJIS();
public abstract boolean isAnsi();
@Override
public String toString() {
return super.toString() + " (" + fontID + ")";

View File

@@ -531,4 +531,6 @@ public abstract class FontTag extends DrawableTag implements AloneTag {
return installedFontsByName;
}
public abstract String getCodesCharset();
}

View File

@@ -534,4 +534,9 @@ public final class DefineCompactedFont extends FontTag {
public RECT getRectWithStrokes() {
return getRect();
}
@Override
public String getCodesCharset() {
return getCharset();
}
}

View File

@@ -16,10 +16,21 @@
*/
package com.jpexs.helpers.utf8;
import com.jpexs.helpers.Helper;
import com.jpexs.helpers.utf8.charset.Gb2312;
import com.jpexs.helpers.utf8.charset.ShiftJis;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
*
@@ -28,8 +39,44 @@ import java.nio.charset.Charset;
public class Utf8Helper {
public static String charsetName = "UTF-8";
public static Charset charset = Charset.forName("UTF-8");
private static List<String> allowedVariableLengthCharsets = Arrays.asList(
"GB2312", "Shift_JIS", "UTF-8", "UTF-16", "UTF16-BE", "UTF-16-LE", "UTF-32", "UTF-32LE", "UTF-32BE");
/**
* Allowed charsets. They are limited to single byte charsets + allowedVariableLengthCharsets
*/
public static List<String> allowedCharsets = new ArrayList<>();
static {
Map<String, Charset> charsets = Charset.availableCharsets();
for (String s : charsets.keySet()) {
Charset charset = charsets.get(s);
int maxLen = 0;
int minLen = Integer.MAX_VALUE;
try {
for (int i = 0; i < 65536; i++) {
ByteBuffer buf = charset.encode("" + (char) i);
int len = buf.remaining();
if (len > maxLen) {
maxLen = len;
}
if (len < minLen) {
minLen = len;
}
}
if ((minLen == maxLen && minLen == 1) || allowedVariableLengthCharsets.contains(s)) {
allowedCharsets.add(s);
}
} catch (UnsupportedOperationException ex) {
//System.out.println(s + " ... ERROR");
}
}
}
public static String urlDecode(String s) {
try {
@@ -55,4 +102,73 @@ public class Utf8Helper {
// todo: make it faster without actually writing it to an array
return string.getBytes(charset).length;
}
public static char codePointToChar(int codePoint, String charsetName) {
int newCodePoint;
switch (charsetName) {
case "GB2312":
newCodePoint = new Gb2312().toUnicode(codePoint);
break;
case "Shift_JIS":
newCodePoint = new ShiftJis().toUnicode(codePoint);
break;
case "UTF-8":
case "UTF-16":
case "UTF-16BE":
case "UTF-16LE":
case "UTF-32":
case "UTF-32BE":
case "UTF-32LE":
newCodePoint = codePoint;
break;
default: {
//Assuming single byte - ANSI
newCodePoint = -1;
try {
newCodePoint = new String(new byte[]{(byte) codePoint}, charsetName).codePointAt(0);
} catch (UnsupportedEncodingException ex) {
Logger.getLogger(Utf8Helper.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if (newCodePoint >= 0) {
return (char) newCodePoint;
}
return '?';
}
public static int charToCodePoint(char character, String charsetName) {
int unicodeCodePoint = (int) character;
int codePoint;
switch (charsetName) {
case "GB2312":
codePoint = new Gb2312().fromUnicode(unicodeCodePoint);
break;
case "Shift_JIS":
codePoint = new ShiftJis().fromUnicode(unicodeCodePoint);
break;
case "UTF-8":
case "UTF-16":
case "UTF-16BE":
case "UTF-16LE":
case "UTF-32":
case "UTF-32BE":
case "UTF-32LE":
codePoint = unicodeCodePoint;
break;
default: {
codePoint = -1;
try {
//assuming single byte ANSI
codePoint = ("" + character).getBytes(charsetName)[0] & 0xff;
} catch (UnsupportedEncodingException ex) {
Logger.getLogger(Utf8Helper.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
return codePoint;
}
}

View File

@@ -0,0 +1,102 @@
package com.jpexs.helpers.utf8.charset;
import com.jpexs.decompiler.flash.action.parser.ActionParseException;
import com.jpexs.decompiler.flash.action.parser.script.ActionScriptLexer;
import com.jpexs.decompiler.flash.action.parser.script.ParsedSymbol;
import com.jpexs.decompiler.flash.action.parser.script.SymbolType;
import java.io.IOException;
import java.util.Map;
/**
*
* @author JPEXS
*/
public abstract class AbstractCharsetConverter {
protected static void readMap(Map<Integer, Integer> data, ActionScriptLexer lexer) throws IOException, ActionParseException {
ParsedSymbol s;
lexer.lex(); //identifier;
lexer.lex(); //=
lexer.lex(); // {
int pos1 = 0;
do {
s = lexer.lex(); //{
if (s.type == SymbolType.CURLY_CLOSE) {
break;
}
s = lexer.lex();
int key = (int) (long) (Long) s.value;
lexer.lex(); //,
s = lexer.lex();
int value = (int) (long) (Long) s.value;
data.put(key, value);
s = lexer.lex(); //}
s = lexer.lex();
pos1++;
} while ((s.type == SymbolType.COMMA));
lexer.lex(); //;
}
protected static void readOneDimensionalInt(int data[], ActionScriptLexer lexer) throws IOException, ActionParseException {
ParsedSymbol s;
lexer.lex(); //identifier
lexer.lex(); //=
lexer.lex(); // {
int pos = 0;
do {
s = lexer.lex();
if (s.type == SymbolType.CURLY_CLOSE) {
break;
}
boolean negative = false;
if (s.type == SymbolType.MINUS) {
negative = true;
s = lexer.lex();
}
data[pos] = (int) (long) (Long) s.value;
if (negative) {
data[pos] = -data[pos];
}
s = lexer.lex();
pos++;
} while (s.type == SymbolType.COMMA);
lexer.lex(); //;
}
protected static void readTwoDimensionalInt(int data[][], ActionScriptLexer lexer) throws IOException, ActionParseException {
ParsedSymbol s;
lexer.lex(); //identifier;
lexer.lex(); //=
lexer.lex(); // {
int pos1 = 0;
do {
s = lexer.lex(); //{
int pos2 = 0;
do {
s = lexer.lex();
if (s.type == SymbolType.CURLY_CLOSE) {
break;
}
boolean negative = false;
if (s.type == SymbolType.MINUS) {
negative = true;
s = lexer.lex();
}
data[pos1][pos2] = (int) (long) (Long) s.value;
if (negative) {
data[pos1][pos2] = -data[pos1][pos2];
}
s = lexer.lex();
pos2++;
} while (s.type == SymbolType.COMMA);
s = lexer.lex();
pos1++;
} while ((s.type == SymbolType.COMMA));
//lexer.lex(); // }
lexer.lex(); //;
}
public abstract int toUnicode(int codePoint);
public abstract int fromUnicode(int codePoint);
}

View File

@@ -0,0 +1,97 @@
package com.jpexs.helpers.utf8.charset;
import com.jpexs.decompiler.flash.action.parser.ActionParseException;
import com.jpexs.decompiler.flash.action.parser.script.ActionScriptLexer;
import com.jpexs.decompiler.flash.action.parser.script.ParsedSymbol;
import static com.jpexs.helpers.utf8.charset.AbstractCharsetConverter.readOneDimensionalInt;
import static com.jpexs.helpers.utf8.charset.AbstractCharsetConverter.readTwoDimensionalInt;
import com.jpexs.helpers.utf8.Utf8Helper;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
*
* @author JPEXS
*/
public class Gb18030 extends AbstractCharsetConverter {
private static int[][] gb18030_index_to_cp_len2_record = new int[126][191];
private static final int GB18030_NULL = 0;
private static int[] gb18030_len4_record_shift = new int[]{0, -1546, -2806, -4066, -5326, -6586,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -41987, -43247, -44507, -45767, -47027, -48287, -49547, -50807,
-52067, -53327, 11, -59963, -61223, -62483, 12, 13};
private static int[][] gb18030_index_to_cp_len4_record = new int[14][1260];
private static int[] gb18030_cp_to_index_record = new int[61339];
static {
//Since data is too long to save it directly into Java source, load it from bin
InputStream is = Gb18030.class.getResourceAsStream("/com/jpexs/helpers/utf8/charset/Gb18030data.bin");
if (is == null) {
System.exit(0);
}
ActionScriptLexer lexer = new ActionScriptLexer(new InputStreamReader(is, Utf8Helper.charset));
try {
ParsedSymbol s;
readTwoDimensionalInt(gb18030_index_to_cp_len2_record, lexer);
readTwoDimensionalInt(gb18030_index_to_cp_len4_record, lexer);
readOneDimensionalInt(gb18030_cp_to_index_record, lexer);
} catch (IOException | ActionParseException ex) {
Logger.getLogger(Gb2312.class.getName()).log(Level.SEVERE, null, ex);
}
}
public static int gb18030_index_to_cp_len2(int byte1, int byte2) {
if (0x81 <= byte1 && byte1 <= 0xfe && 0x40 <= byte2 && byte2 <= 0xfe) {
return gb18030_index_to_cp_len2_record[byte1 - 0x81][byte2 - 0x40];
} else {
return 0;
}
}
public static int gb18030_index_to_cp_len4(int byte1, int byte2, int byte3, int byte4) {
int pos_1, pos_2;
byte1 -= 0x81;
byte2 -= 0x30;
byte3 -= 0x81;
byte4 -= 0x30;
pos_1 = byte1 * 10 + byte2;
pos_2 = byte3 * 10 + byte4;
if (pos_1 <= 31 && pos_2 <= 1259) {
if (gb18030_len4_record_shift[pos_1] < 0) {
return pos_2 - gb18030_len4_record_shift[pos_1];
} else {
return gb18030_index_to_cp_len4_record[gb18030_len4_record_shift[pos_1]][pos_2];
}
} else {
return 0;
}
}
/**
*
* @param codepoint
* @return
*/
@Override
public int toUnicode(int codepoint) {
int result = 0;
if (0 <= codepoint && codepoint <= 55295) {
result = (int) gb18030_cp_to_index_record[codepoint];
} else if (59493 <= codepoint && codepoint <= 65535) {
result = (int) gb18030_cp_to_index_record[codepoint - 4197];
}
return result == 0 ? GB18030_NULL : result;
}
@Override
public int fromUnicode(int codePoint) {
throw new UnsupportedOperationException("Not supported yet.");
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,88 @@
package com.jpexs.helpers.utf8.charset;
import com.jpexs.decompiler.flash.action.parser.ActionParseException;
import com.jpexs.decompiler.flash.action.parser.script.ActionScriptLexer;
import com.jpexs.decompiler.flash.action.parser.script.ParsedSymbol;
import com.jpexs.decompiler.flash.action.parser.script.SymbolType;
import com.jpexs.helpers.utf8.Utf8Helper;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
*
* GB2312 to unicode and back conversion.
* Based on https://github.com/MoarVM/MoarVM/blob/master/src/strings/gb2312_codeindex.h
*/
public class Gb2312 extends AbstractCharsetConverter {
static final int GB2312_NULL = -1;
/* Conversion tables are generated according to mapping from
* unicode.org-mappings/EASTASIA/GB/GB2312.TXT
* at https://haible.de/bruno/charsets/conversion-tables/GB2312.html
* The following tables use EUC form for GB2312 characters.
* Unicode indexes 1106 - 8212, 9795 - 12287, 12842 - 19967,
* and 40865 - 65280 don't correspond to gb2312 codepoint.
* To reduce code length and save memory, these intervals are omitted
* in the conversion table and indexes are shifted in the function. */
private static int[][] gb2312_index_to_cp_record = new int[87][94];
private static int[] gb2312_cp_to_index_record = new int [24380];
static {
//Since data is too long to save it directly into Java source, load it from bin
InputStream is = Gb2312.class.getResourceAsStream("/com/jpexs/helpers/utf8/charset/Gb2312data.bin");
if (is == null) {
System.exit(0);
}
ActionScriptLexer lexer = new ActionScriptLexer(new InputStreamReader(is, Utf8Helper.charset));
try {
ParsedSymbol s;
readTwoDimensionalInt(gb2312_index_to_cp_record, lexer);
readOneDimensionalInt(gb2312_cp_to_index_record, lexer);
} catch (IOException | ActionParseException ex) {
Logger.getLogger(Gb2312.class.getName()).log(Level.SEVERE, null, ex);
}
}
@Override
public int toUnicode(int codePoint) {
if (codePoint < 128) {
return codePoint;
}
int zone = codePoint / 256 - 161;
int point = codePoint % 256 - 161;
if (0 <= zone && zone < 87 && 0 <= point && point < 94) {
return gb2312_index_to_cp_record[zone][point];
} else {
return GB2312_NULL;
}
}
@Override
public int fromUnicode(int codePoint) {
int result = 0;
if (0 <= codePoint && codePoint <= 1105) {
result = gb2312_cp_to_index_record[codePoint];
}
else if (8213 <= codePoint && codePoint <= 9794) {
result = gb2312_cp_to_index_record[codePoint - 7107];
}
else if (12288 <= codePoint && codePoint <= 12841) {
result = gb2312_cp_to_index_record[codePoint - 9600];
}
else if (19968 <= codePoint && codePoint <= 40864) {
result = gb2312_cp_to_index_record[codePoint - 16726];
}
else if (65281 <= codePoint && codePoint <= 65510) {
result = gb2312_cp_to_index_record[codePoint - 41142];
}
return result == 0 ? GB2312_NULL : result;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,92 @@
package com.jpexs.helpers.utf8.charset;
import com.jpexs.decompiler.flash.action.parser.ActionParseException;
import com.jpexs.decompiler.flash.action.parser.script.ActionScriptLexer;
import com.jpexs.decompiler.flash.action.parser.script.ParsedSymbol;
import com.jpexs.decompiler.flash.action.parser.script.SymbolType;
import com.jpexs.helpers.utf8.Utf8Helper;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* ShiftJis to unicode and back conversion.
* Based on https://github.com/MoarVM/MoarVM/blob/master/src/strings/shiftjis_codeindex.h
*/
public class ShiftJis extends AbstractCharsetConverter {
public static final int[][] shiftjis_offset_values = {
{107, 11},
{126, 8},
{141, 11},
{167, 7},
{182, 4},
{187, 15},
{212, 7},
{245, 6},
{277, 4},
{364, 11},
{461, 8},
{493, 8},
{525, 38},
{596, 15},
{644, 13},
{689, 438},
{1157, 1},
{1181, 8},
{1219, 190},
{4374, 43},
{7807, 2908}
};
public static final int SHIFTJIS_OFFSET_VALUES_ELEMS = 21;
public static final int SHIFTJIS_INDEX_TO_CP_CODEPOINTS_ELEMS = 7350;
public static final int SHIFTJIS_MAX_INDEX = 11103;
private static final int SHIFTJIS_NULL = -1;
private static int[] shiftjis_index_to_cp_codepoints = new int[7350];
private static Map<Integer, Integer> shiftjis_cp_to_index = new HashMap<>();
static {
//Since data is too long to save it directly into Java source, load it from bin
InputStream is = Gb2312.class.getResourceAsStream("/com/jpexs/helpers/utf8/charset/ShiftJisdata.bin");
if (is == null) {
System.exit(0);
}
ActionScriptLexer lexer = new ActionScriptLexer(new InputStreamReader(is, Utf8Helper.charset));
try {
ParsedSymbol s;
readOneDimensionalInt(shiftjis_index_to_cp_codepoints, lexer);
readMap(shiftjis_cp_to_index, lexer);
} catch (IOException | ActionParseException ex) {
Logger.getLogger(ShiftJis.class.getName()).log(Level.SEVERE, null, ex);
}
}
@Override
public int toUnicode(int codePoint) {
if (codePoint < 128) {
return codePoint;
}
if (shiftjis_cp_to_index.containsKey(codePoint)) {
return shiftjis_cp_to_index.get(codePoint);
}
return SHIFTJIS_NULL;
}
@Override
public int fromUnicode(int codePoint) {
if (codePoint < shiftjis_index_to_cp_codepoints.length) {
return shiftjis_index_to_cp_codepoints[codePoint];
}
return SHIFTJIS_NULL;
}
}

File diff suppressed because it is too large Load Diff