diff --git a/CHANGELOG.md b/CHANGELOG.md index 219a16928..71c4d02e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ All notable changes to this project will be documented in this file. ### Added - [#2090] Support for Mochicrypt packed binarydata tags - loading SWF as subtree - [#2079] Replace DefineSprite with GIF, Bulk import sprites from GIFs, also from commandline +- [#116] Show invalid utf-8 bytes in Strings as `{invalid_utf8:xxx}` ### Fixed - Close action on SWF inside DefineBinaryData diff --git a/libsrc/ffdec_lib/src/com/jpexs/decompiler/flash/SWFInputStream.java b/libsrc/ffdec_lib/src/com/jpexs/decompiler/flash/SWFInputStream.java index e544ddba5..0b26d534f 100644 --- a/libsrc/ffdec_lib/src/com/jpexs/decompiler/flash/SWFInputStream.java +++ b/libsrc/ffdec_lib/src/com/jpexs/decompiler/flash/SWFInputStream.java @@ -508,7 +508,10 @@ public class SWFInputStream implements AutoCloseable { r = readEx(); if (r == 0) { endDumpLevel(); - return new String(baos.toByteArray(), swf == null ? Utf8Helper.charsetName : swf.getCharset()); + if (swf == null || "UTF-8".equals(swf.getCharset())) { + return Utf8Helper.decode(baos.toByteArray()); + } + return new String(baos.toByteArray(), swf.getCharset()); } baos.write(r); } diff --git a/libsrc/ffdec_lib/src/com/jpexs/decompiler/flash/SWFOutputStream.java b/libsrc/ffdec_lib/src/com/jpexs/decompiler/flash/SWFOutputStream.java index d10bf162c..5d9f0365f 100644 --- a/libsrc/ffdec_lib/src/com/jpexs/decompiler/flash/SWFOutputStream.java +++ b/libsrc/ffdec_lib/src/com/jpexs/decompiler/flash/SWFOutputStream.java @@ -80,6 +80,7 @@ import com.jpexs.decompiler.flash.types.shaperecords.SHAPERECORD; import com.jpexs.decompiler.flash.types.shaperecords.StraightEdgeRecord; import com.jpexs.decompiler.flash.types.shaperecords.StyleChangeRecord; import com.jpexs.helpers.ByteArrayRange; +import com.jpexs.helpers.utf8.Utf8Helper; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -179,7 +180,12 @@ public class SWFOutputStream extends OutputStream { * @throws IOException */ public void writeString(String value) throws IOException { - byte[] data = value.getBytes(charset); + byte[] data; + if ("UTF-8".equals(charset)) { + data = Utf8Helper.getBytes(value); + } else { + data = value.getBytes(charset); + } for (int i = 0; i < data.length; i++) { if (data[i] == 0) { throw new IOException("String should not contain null character."); diff --git a/libsrc/ffdec_lib/src/com/jpexs/decompiler/flash/abc/ABCInputStream.java b/libsrc/ffdec_lib/src/com/jpexs/decompiler/flash/abc/ABCInputStream.java index 50bda6791..a7517b22f 100644 --- a/libsrc/ffdec_lib/src/com/jpexs/decompiler/flash/abc/ABCInputStream.java +++ b/libsrc/ffdec_lib/src/com/jpexs/decompiler/flash/abc/ABCInputStream.java @@ -34,6 +34,7 @@ import com.jpexs.decompiler.flash.abc.types.traits.Traits; import com.jpexs.decompiler.flash.dumpview.DumpInfo; import com.jpexs.decompiler.flash.dumpview.DumpInfoSpecial; import com.jpexs.decompiler.flash.dumpview.DumpInfoSpecialType; +import com.jpexs.helpers.Helper; import com.jpexs.helpers.MemoryInputStream; import com.jpexs.helpers.utf8.Utf8Helper; import java.io.ByteArrayOutputStream; @@ -523,8 +524,8 @@ public class ABCInputStream implements AutoCloseable { stringDataBuffer = new byte[newLength]; } - safeRead(length, stringDataBuffer); - String r = new String(stringDataBuffer, 0, length, Utf8Helper.charset); + safeRead(length, stringDataBuffer); + String r = Utf8Helper.decode(stringDataBuffer, 0, length); endDumpLevel(r); return r; } diff --git a/libsrc/ffdec_lib/src/com/jpexs/helpers/Helper.java b/libsrc/ffdec_lib/src/com/jpexs/helpers/Helper.java index 9d7d613bc..764d2a9e5 100644 --- a/libsrc/ffdec_lib/src/com/jpexs/helpers/Helper.java +++ b/libsrc/ffdec_lib/src/com/jpexs/helpers/Helper.java @@ -1757,4 +1757,5 @@ public class Helper { } return Integer.parseInt(version); } + } diff --git a/libsrc/ffdec_lib/src/com/jpexs/helpers/utf8/Utf8Helper.java b/libsrc/ffdec_lib/src/com/jpexs/helpers/utf8/Utf8Helper.java index 55ccc7eb2..c79675099 100644 --- a/libsrc/ffdec_lib/src/com/jpexs/helpers/utf8/Utf8Helper.java +++ b/libsrc/ffdec_lib/src/com/jpexs/helpers/utf8/Utf8Helper.java @@ -18,6 +18,8 @@ package com.jpexs.helpers.utf8; import com.jpexs.helpers.utf8.charset.Gb2312; import com.jpexs.helpers.utf8.charset.ShiftJis; +import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.net.URLEncoder; @@ -29,6 +31,8 @@ import java.util.List; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * @@ -98,13 +102,106 @@ public class Utf8Helper { } } - public static byte[] getBytes(String string) { - return string.getBytes(charset); + public static byte[] getBytes(String string) { + if (!string.contains("{invalid_utf8:")) { + return string.getBytes(charset); + } + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + + try { + Pattern invPattern = Pattern.compile("^(\\{invalid_utf8:([0-9]+)\\}).*", Pattern.DOTALL); + for (int i = 0; i < string.length(); i++) { + char c = string.charAt(i); + if (c == '{') { + String subStr = string.substring(i); + Matcher m = invPattern.matcher(subStr); + if (m.matches()) { + int v = Integer.parseInt(m.group(2)); + baos.write(v); + i += m.group(1).length(); + i--; + continue; + } + } + baos.write(("" + c).getBytes(charset)); + } + } catch (IOException iex) { + //should not happen + } + + return baos.toByteArray(); } public static int getBytesLength(String string) { // todo: make it faster without actually writing it to an array - return string.getBytes(charset).length; + return getBytes(string).length; + } + + public static String decode(byte[] data) { + return decode(data, 0, data.length); + } + + private static String escapeInvalidUtf8Char(int v) { + return "{invalid_utf8:" + v + "}"; + } + + public static String decode(byte[] data, int start, int length) { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try { + for (int i = 0; i < length; i++) { + int v = data[i] & 0xff; + int numNextBytes = 0; + if ((v & 0x80) == 0) { //0xxxxxxx + baos.write(v); + } else if ((v & 0xC0) == 0x80) { //10xxxxxx + baos.write(escapeInvalidUtf8Char(v).getBytes("UTF-8")); + } else if ((v & 0xE0) == 0xC0) { //110xxxxx + numNextBytes = 1; + } else if ((v & 0xF0) == 0xE0) { //1110xxxx + numNextBytes = 2; + } else if ((v & 0xF8) == 0xF0) { //11110xxx + numNextBytes = 3; + } else { + baos.write(escapeInvalidUtf8Char(v).getBytes("UTF-8")); + } + if (numNextBytes > 0) { + if (i + numNextBytes >= length) { + baos.write(escapeInvalidUtf8Char(v).getBytes("UTF-8")); + continue; + } + boolean validNextBytes = true; + for (int j = 0; j < numNextBytes; j++) { + int v2 = data[i + 1 + j] & 0xff; + if ((v2 & 0xC0) != 0x80) { //must be 10xxxxxx + + if ((v2 & 0x80) == 0) { //0xxxxxxx + numNextBytes = j; + } + validNextBytes = false; + break; + } + } + if (!validNextBytes) { + baos.write(escapeInvalidUtf8Char(v).getBytes("UTF-8")); + for (int j = 0; j < numNextBytes; j++) { + int v2 = data[i + 1 + j] & 0xff; + baos.write(escapeInvalidUtf8Char(v2).getBytes("UTF-8")); + } + } else { + baos.write(v); + for (int j = 0; j < numNextBytes; j++) { + int v2 = data[i + 1 + j] & 0xff; + baos.write(v2); + } + } + i += numNextBytes; + } + } + } catch (IOException ex) { + + } + return new String(baos.toByteArray(), Utf8Helper.charset); } public static char codePointToChar(int codePoint, String charsetName) { diff --git a/libsrc/ffdec_lib/test/com/jpexs/helpers/Utf8HelperTest.java b/libsrc/ffdec_lib/test/com/jpexs/helpers/Utf8HelperTest.java new file mode 100644 index 000000000..e3b21d180 --- /dev/null +++ b/libsrc/ffdec_lib/test/com/jpexs/helpers/Utf8HelperTest.java @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2010-2023 JPEXS, All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3.0 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. + */ +package com.jpexs.helpers; + +import com.jpexs.helpers.utf8.Utf8Helper; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +/** + * + * @author JPEXS + */ +public class Utf8HelperTest { + @DataProvider(name = "samples") + public static Object[][] provideSamples() { + return new Object[][]{ + {new byte[] {'A'}, "A"}, + {new byte[] {'A', (byte)0b10000111, 'B'}, "A{invalid_utf8:135}B"}, + {new byte[] {'A', (byte)0b11000101, (byte)0b10011001, 'B'}, "AřB"}, + {new byte[] {'A', (byte)0b11100000, (byte)0b10100000, (byte)0b10000000, 'B'}, "A" + (char)0x0800 + "B"}, + {new byte[] {'A', (byte)0b11110000, (byte)0b10011101, (byte) 0b10010011, (byte)0b10101100, 'B'}, "A𝓬B"}, + {new byte[] {'A', (byte)0b11000101}, "A{invalid_utf8:197}"}, + {new byte[] {'A', (byte)0b11000101, 'B'}, "A{invalid_utf8:197}B"} + }; + } + + @Test(dataProvider = "samples") + public void testInvalidBytes(byte[] data, String expected) { + Assert.assertEquals(Utf8Helper.decode(data), expected); + } +}