mirror of
https://github.com/neoStudiosLCE/neoLegacy.git
synced 2026-05-22 00:08:42 +00:00
feat: add 4JLibs comparison tooling
Tools for comparing 4JLibs binary changes between git refs: - compare-4jlibs.py: Extracts libs, parses symbol tables via dumpbin, demangles with undname, generates structured diff reports - extract_lib.py: Extracts .obj members from COFF .lib archives - ExportLibInfo.java: Ghidra headless script for non-LTCG object files - list-lib-symbols.sh / compare-4jlibs.sh: Shell wrappers
This commit is contained in:
2
tools/ghidra/.gitignore
vendored
Normal file
2
tools/ghidra/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
# Ghidra analysis output (generated, not tracked)
|
||||
output/report-*/
|
||||
139
tools/ghidra/ExportLibInfo.java
Normal file
139
tools/ghidra/ExportLibInfo.java
Normal file
@@ -0,0 +1,139 @@
|
||||
// Export symbols, functions, and external references from a COFF .lib to a JSON report.
|
||||
// Designed for headless mode: pass output path as first script argument.
|
||||
//
|
||||
// Usage with analyzeHeadless:
|
||||
// analyzeHeadless <projDir> <projName> -import <file.lib> \
|
||||
// -postScript ExportLibInfo.java <output.json> -deleteProject
|
||||
//
|
||||
//@category 4JLibs
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import ghidra.app.script.GhidraScript;
|
||||
import ghidra.program.model.address.Address;
|
||||
import ghidra.program.model.listing.*;
|
||||
import ghidra.program.model.symbol.*;
|
||||
|
||||
public class ExportLibInfo extends GhidraScript {
|
||||
|
||||
@Override
|
||||
public void run() throws Exception {
|
||||
String[] args = getScriptArgs();
|
||||
if (args.length < 1) {
|
||||
printerr("Usage: ExportLibInfo.java <output.json>");
|
||||
return;
|
||||
}
|
||||
|
||||
File outputFile = new File(args[0]);
|
||||
outputFile.getParentFile().mkdirs();
|
||||
|
||||
PrintWriter pw = new PrintWriter(new FileWriter(outputFile, true));
|
||||
|
||||
String programName = currentProgram.getName();
|
||||
Listing listing = currentProgram.getListing();
|
||||
SymbolTable symbolTable = currentProgram.getSymbolTable();
|
||||
ExternalManager extMgr = currentProgram.getExternalManager();
|
||||
|
||||
// Collect functions
|
||||
List<String> functions = new ArrayList<>();
|
||||
FunctionIterator funcIter = listing.getFunctions(true);
|
||||
while (funcIter.hasNext() && !monitor.isCancelled()) {
|
||||
Function f = funcIter.next();
|
||||
String sig = f.getPrototypeString(false, false);
|
||||
String callingConv = f.getCallingConventionName();
|
||||
long size = f.getBody().getNumAddresses();
|
||||
|
||||
functions.add(String.format(
|
||||
" {\"name\": %s, \"entry\": %s, \"signature\": %s, \"callingConvention\": %s, \"size\": %d, \"paramCount\": %d}",
|
||||
jsonStr(f.getName()),
|
||||
jsonStr(f.getEntryPoint().toString()),
|
||||
jsonStr(sig),
|
||||
jsonStr(callingConv),
|
||||
size,
|
||||
f.getParameterCount()
|
||||
));
|
||||
}
|
||||
|
||||
// Collect all symbols (non-function)
|
||||
List<String> symbols = new ArrayList<>();
|
||||
SymbolIterator symIter = symbolTable.getAllSymbols(true);
|
||||
while (symIter.hasNext() && !monitor.isCancelled()) {
|
||||
Symbol sym = symIter.next();
|
||||
if (sym.getSymbolType() == SymbolType.FUNCTION) {
|
||||
continue; // already captured above
|
||||
}
|
||||
if (sym.isExternal()) {
|
||||
continue; // captured below
|
||||
}
|
||||
symbols.add(String.format(
|
||||
" {\"name\": %s, \"type\": %s, \"address\": %s, \"source\": %s}",
|
||||
jsonStr(sym.getName(true)),
|
||||
jsonStr(sym.getSymbolType().toString()),
|
||||
jsonStr(sym.getAddress().toString()),
|
||||
jsonStr(sym.getSource().toString())
|
||||
));
|
||||
}
|
||||
|
||||
// Collect external symbols (imports from other libraries)
|
||||
List<String> externals = new ArrayList<>();
|
||||
symIter = symbolTable.getExternalSymbols();
|
||||
while (symIter.hasNext() && !monitor.isCancelled()) {
|
||||
Symbol sym = symIter.next();
|
||||
String extLib = "";
|
||||
ExternalLocation extLoc = extMgr.getExternalLocation(sym);
|
||||
if (extLoc != null && extLoc.getLibraryName() != null) {
|
||||
extLib = extLoc.getLibraryName();
|
||||
}
|
||||
externals.add(String.format(
|
||||
" {\"name\": %s, \"type\": %s, \"library\": %s}",
|
||||
jsonStr(sym.getName(true)),
|
||||
jsonStr(sym.getSymbolType().toString()),
|
||||
jsonStr(extLib)
|
||||
));
|
||||
}
|
||||
|
||||
// Write JSON object for this program/object-file
|
||||
pw.println("{");
|
||||
pw.println(" \"program\": " + jsonStr(programName) + ",");
|
||||
pw.println(" \"language\": " + jsonStr(currentProgram.getLanguageID().toString()) + ",");
|
||||
pw.println(" \"compiler\": " + jsonStr(currentProgram.getCompilerSpec().getCompilerSpecID().toString()) + ",");
|
||||
|
||||
pw.println(" \"functionCount\": " + functions.size() + ",");
|
||||
pw.println(" \"functions\": [");
|
||||
pw.println(String.join(",\n", functions));
|
||||
pw.println(" ],");
|
||||
|
||||
pw.println(" \"symbolCount\": " + symbols.size() + ",");
|
||||
pw.println(" \"symbols\": [");
|
||||
pw.println(String.join(",\n", symbols));
|
||||
pw.println(" ],");
|
||||
|
||||
pw.println(" \"externalCount\": " + externals.size() + ",");
|
||||
pw.println(" \"externals\": [");
|
||||
pw.println(String.join(",\n", externals));
|
||||
pw.println(" ]");
|
||||
|
||||
pw.println("}");
|
||||
|
||||
pw.flush();
|
||||
pw.close();
|
||||
|
||||
println("ExportLibInfo: wrote " + functions.size() + " functions, " +
|
||||
symbols.size() + " symbols, " + externals.size() + " externals for " +
|
||||
programName + " -> " + outputFile.getAbsolutePath());
|
||||
}
|
||||
|
||||
private String jsonStr(String s) {
|
||||
if (s == null) return "null";
|
||||
return "\"" + s.replace("\\", "\\\\")
|
||||
.replace("\"", "\\\"")
|
||||
.replace("\n", "\\n")
|
||||
.replace("\r", "\\r")
|
||||
.replace("\t", "\\t") + "\"";
|
||||
}
|
||||
}
|
||||
632
tools/ghidra/compare-4jlibs.py
Normal file
632
tools/ghidra/compare-4jlibs.py
Normal file
@@ -0,0 +1,632 @@
|
||||
"""Compare 4JLibs between two git refs.
|
||||
|
||||
Extracts .lib files from both refs, parses their symbol tables (using dumpbin
|
||||
or direct ar-archive parsing), demangles MSVC symbols, and generates a
|
||||
structured diff report.
|
||||
|
||||
Usage:
|
||||
python compare-4jlibs.py [OLD_REF] [NEW_REF] [--filter PATTERN] [--no-demangle]
|
||||
|
||||
Defaults:
|
||||
OLD_REF = HEAD
|
||||
NEW_REF = upstream/main
|
||||
|
||||
Output:
|
||||
tools/ghidra/output/report-<timestamp>/
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import struct
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
LIB_PATH = "Minecraft.Client/Windows64/4JLibs/libs"
|
||||
OUTPUT_BASE = Path(__file__).resolve().parent / "output"
|
||||
|
||||
# MSVC tool discovery
|
||||
MSVC_SEARCH_PATHS = [
|
||||
Path(r"C:\Program Files (x86)\Microsoft Visual Studio\18\BuildTools\VC\Tools\MSVC"),
|
||||
Path(r"C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC"),
|
||||
Path(r"C:\Program Files\Microsoft Visual Studio\2022\Professional\VC\Tools\MSVC"),
|
||||
Path(r"C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Tools\MSVC"),
|
||||
]
|
||||
|
||||
|
||||
def find_msvc_tool(name):
|
||||
"""Find an MSVC tool (dumpbin.exe, undname.exe) in VS installations."""
|
||||
for base in MSVC_SEARCH_PATHS:
|
||||
if not base.exists():
|
||||
continue
|
||||
for version_dir in sorted(base.iterdir(), reverse=True):
|
||||
tool = version_dir / "bin" / "Hostx64" / "x64" / name
|
||||
if tool.exists():
|
||||
return str(tool)
|
||||
return None
|
||||
|
||||
|
||||
DUMPBIN = find_msvc_tool("dumpbin.exe")
|
||||
UNDNAME = find_msvc_tool("undname.exe")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Symbol table parsing (direct, no external tools needed)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse_lib_symbols_direct(lib_path):
|
||||
"""Parse the first linker member of a .lib to get all public symbols."""
|
||||
symbols = []
|
||||
with open(lib_path, "rb") as f:
|
||||
magic = f.read(8)
|
||||
if magic != b"!<arch>\n":
|
||||
return symbols
|
||||
|
||||
# First linker member (big-endian)
|
||||
header = f.read(60)
|
||||
name = header[0:16].decode("ascii").strip()
|
||||
size = int(header[48:58].decode("ascii").strip())
|
||||
|
||||
if name != "/":
|
||||
return symbols
|
||||
|
||||
data = f.read(size)
|
||||
num_symbols = struct.unpack(">I", data[0:4])[0]
|
||||
offsets_end = 4 + num_symbols * 4
|
||||
string_data = data[offsets_end:]
|
||||
|
||||
pos = 0
|
||||
for _ in range(num_symbols):
|
||||
end = string_data.find(b"\x00", pos)
|
||||
if end == -1:
|
||||
break
|
||||
sym = string_data[pos:end].decode("ascii", errors="replace")
|
||||
symbols.append(sym)
|
||||
pos = end + 1
|
||||
|
||||
return symbols
|
||||
|
||||
|
||||
def parse_lib_symbols_dumpbin(lib_path):
|
||||
"""Use dumpbin /LINKERMEMBER to get symbols (more reliable for edge cases)."""
|
||||
if not DUMPBIN:
|
||||
return None
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[DUMPBIN, "/LINKERMEMBER:2", str(lib_path)],
|
||||
capture_output=True, text=True, timeout=60
|
||||
)
|
||||
symbols = []
|
||||
in_symbols = False
|
||||
for line in result.stdout.splitlines():
|
||||
line = line.strip()
|
||||
if "public symbols" in line:
|
||||
in_symbols = True
|
||||
continue
|
||||
if in_symbols and line:
|
||||
# Format: " offset symbol_name"
|
||||
parts = line.split(None, 1)
|
||||
if len(parts) == 2 and all(c in "0123456789ABCDEFabcdef" for c in parts[0]):
|
||||
symbols.append(parts[1])
|
||||
elif not line[0].isdigit():
|
||||
in_symbols = False
|
||||
return symbols
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError):
|
||||
return None
|
||||
|
||||
|
||||
def parse_lib_members(lib_path):
|
||||
"""Extract member (object file) names from a .lib archive."""
|
||||
members = []
|
||||
with open(lib_path, "rb") as f:
|
||||
magic = f.read(8)
|
||||
if magic != b"!<arch>\n":
|
||||
return members
|
||||
|
||||
long_names = b""
|
||||
|
||||
while True:
|
||||
header = f.read(60)
|
||||
if len(header) < 60:
|
||||
break
|
||||
|
||||
raw_name = header[0:16].decode("ascii", errors="replace").rstrip()
|
||||
size_str = header[48:58].decode("ascii").strip()
|
||||
end_marker = header[58:60]
|
||||
|
||||
if end_marker != b"\x60\x0a":
|
||||
break
|
||||
|
||||
size = int(size_str)
|
||||
|
||||
if raw_name == "/":
|
||||
f.seek(size + (size % 2), 1)
|
||||
continue
|
||||
if raw_name == "//":
|
||||
long_names = f.read(size)
|
||||
if size % 2:
|
||||
f.read(1)
|
||||
continue
|
||||
|
||||
name = raw_name
|
||||
if name.startswith("/") and name[1:].isdigit():
|
||||
offset = int(name[1:])
|
||||
end = long_names.find(b"\x00", offset)
|
||||
if end == -1:
|
||||
end = long_names.find(b"\n", offset)
|
||||
if end == -1:
|
||||
end = len(long_names)
|
||||
name = long_names[offset:end].decode("ascii", errors="replace").rstrip("/")
|
||||
|
||||
# Read first 2 bytes to check machine type
|
||||
member_data = f.read(min(size, 2))
|
||||
if len(member_data) >= 2:
|
||||
machine = struct.unpack("<H", member_data[:2])[0]
|
||||
else:
|
||||
machine = 0
|
||||
remaining = size - len(member_data)
|
||||
if remaining > 0:
|
||||
f.seek(remaining, 1)
|
||||
if size % 2:
|
||||
f.read(1)
|
||||
|
||||
members.append({
|
||||
"name": name,
|
||||
"size": size,
|
||||
"machine": f"0x{machine:04x}",
|
||||
"is_ltcg": machine == 0x01f2,
|
||||
})
|
||||
|
||||
return members
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Demangling
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def demangle_symbols(mangled_symbols):
|
||||
"""Demangle MSVC-mangled symbols using undname.exe."""
|
||||
if not UNDNAME or not mangled_symbols:
|
||||
return {}
|
||||
|
||||
demangled = {}
|
||||
|
||||
# undname takes symbols as command-line arguments (not stdin).
|
||||
# Output format:
|
||||
# Undecoration of :- "??0CProfile@@QAA@XZ"
|
||||
# is :- "public: __cdecl CProfile::CProfile(void)"
|
||||
# Process in batches to avoid command line length limits.
|
||||
batch_size = 100
|
||||
for i in range(0, len(mangled_symbols), batch_size):
|
||||
batch = mangled_symbols[i:i + batch_size]
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[UNDNAME] + batch,
|
||||
capture_output=True, text=True, timeout=60
|
||||
)
|
||||
current_mangled = None
|
||||
for line in result.stdout.splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith('Undecoration of :- "'):
|
||||
current_mangled = line.split('"')[1]
|
||||
elif line.startswith('is :- "') and current_mangled:
|
||||
dem = line.split('"')[1]
|
||||
demangled[current_mangled] = dem
|
||||
current_mangled = None
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError):
|
||||
break
|
||||
|
||||
return demangled
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Classification
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def classify_symbol(mangled, demangled=None):
|
||||
"""Classify a symbol into a category for organized reporting."""
|
||||
name = demangled or mangled
|
||||
|
||||
# Filter out std:: library symbols
|
||||
if "std::" in name or mangled.startswith("??_C@"):
|
||||
return "std/compiler"
|
||||
|
||||
# Constructor/destructor
|
||||
if mangled.startswith("??0"):
|
||||
return "constructor"
|
||||
if mangled.startswith("??1"):
|
||||
return "destructor"
|
||||
|
||||
# Operators
|
||||
if mangled.startswith("??"):
|
||||
return "operator"
|
||||
|
||||
# Virtual function table
|
||||
if mangled.startswith("??_7") or "vftable" in name.lower():
|
||||
return "vtable"
|
||||
|
||||
# Static data
|
||||
if mangled.startswith("?_") and "@" in mangled:
|
||||
return "static_data"
|
||||
|
||||
# Check class membership
|
||||
if "@C_4J" in mangled or "@C_4j" in mangled:
|
||||
return "4j_interface"
|
||||
|
||||
for prefix in ["CAwardManager", "CProfile", "CProfileData", "CRichPresence",
|
||||
"CSys", "CStorage", "CInput", "CRender", "CRenderer"]:
|
||||
if f"@{prefix}@@" in mangled or f"@{prefix}@" in mangled:
|
||||
return "4j_class"
|
||||
|
||||
return "other"
|
||||
|
||||
|
||||
def extract_class_name(mangled):
|
||||
"""Try to extract the class name from a mangled symbol."""
|
||||
# Pattern: ?Method@ClassName@@...
|
||||
m = re.match(r"\?\??\d?(\w+)@(\w+)@@", mangled)
|
||||
if m:
|
||||
return m.group(2)
|
||||
|
||||
m = re.match(r"\?(\w+)@(\w+)@@", mangled)
|
||||
if m:
|
||||
return m.group(2)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Git operations
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def git_extract_lib(ref, lib_rel_path, output_path):
|
||||
"""Extract a file from a git ref to a local path."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "cat-file", "-e", f"{ref}:{lib_rel_path}"],
|
||||
capture_output=True, cwd=str(REPO_ROOT)
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return False
|
||||
|
||||
result = subprocess.run(
|
||||
["git", "show", f"{ref}:{lib_rel_path}"],
|
||||
capture_output=True, cwd=str(REPO_ROOT)
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return False
|
||||
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(result.stdout)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f" WARNING: Failed to extract {lib_rel_path} from {ref}: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
|
||||
def git_changed_libs(old_ref, new_ref):
|
||||
"""Get list of .lib files that changed between two refs."""
|
||||
result = subprocess.run(
|
||||
["git", "diff", "--name-only", old_ref, new_ref, "--", f"{LIB_PATH}/*.lib"],
|
||||
capture_output=True, text=True, cwd=str(REPO_ROOT)
|
||||
)
|
||||
if result.returncode != 0 or not result.stdout.strip():
|
||||
# Fallback: list all libs at new ref
|
||||
result = subprocess.run(
|
||||
["git", "ls-tree", "--name-only", "-r", new_ref, "--", f"{LIB_PATH}/"],
|
||||
capture_output=True, text=True, cwd=str(REPO_ROOT)
|
||||
)
|
||||
return [l for l in result.stdout.strip().splitlines() if l.endswith(".lib")]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Report generation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def generate_lib_report(lib_name, old_syms, new_syms, old_demangled, new_demangled,
|
||||
old_members, new_members, old_size, new_size):
|
||||
"""Generate a detailed comparison report for one library."""
|
||||
lines = []
|
||||
lines.append(f"{'=' * 70}")
|
||||
lines.append(f" {lib_name}")
|
||||
lines.append(f"{'=' * 70}")
|
||||
lines.append("")
|
||||
|
||||
# Status
|
||||
if old_syms is None and new_syms is not None:
|
||||
lines.append("STATUS: ADDED (new library)")
|
||||
elif old_syms is not None and new_syms is None:
|
||||
lines.append("STATUS: DELETED")
|
||||
else:
|
||||
lines.append("STATUS: MODIFIED")
|
||||
lines.append("")
|
||||
|
||||
# Size
|
||||
if old_size and new_size:
|
||||
delta = new_size - old_size
|
||||
pct = (delta * 100) // old_size if old_size else 0
|
||||
sign = "+" if delta > 0 else ""
|
||||
lines.append(f"SIZE: {old_size:,} -> {new_size:,} bytes ({sign}{delta:,}, {sign}{pct}%)")
|
||||
elif new_size:
|
||||
lines.append(f"SIZE: (new) {new_size:,} bytes")
|
||||
elif old_size:
|
||||
lines.append(f"SIZE: {old_size:,} bytes (deleted)")
|
||||
lines.append("")
|
||||
|
||||
# Members
|
||||
if old_members or new_members:
|
||||
old_member_names = {m["name"] for m in (old_members or [])}
|
||||
new_member_names = {m["name"] for m in (new_members or [])}
|
||||
lines.append(f"OBJECT FILES: {len(old_member_names)} -> {len(new_member_names)}")
|
||||
added_m = new_member_names - old_member_names
|
||||
removed_m = old_member_names - new_member_names
|
||||
if added_m:
|
||||
lines.append(f" + Added: {', '.join(sorted(added_m))}")
|
||||
if removed_m:
|
||||
lines.append(f" - Removed: {', '.join(sorted(removed_m))}")
|
||||
lines.append("")
|
||||
|
||||
old_set = set(old_syms or [])
|
||||
new_set = set(new_syms or [])
|
||||
|
||||
# Filter out std/compiler symbols for the main diff
|
||||
old_user = {s for s in old_set if classify_symbol(s) not in ("std/compiler",)}
|
||||
new_user = {s for s in new_set if classify_symbol(s) not in ("std/compiler",)}
|
||||
|
||||
old_std = old_set - old_user
|
||||
new_std = new_set - new_user
|
||||
|
||||
lines.append(f"SYMBOLS: {len(old_set)} -> {len(new_set)} total")
|
||||
lines.append(f" User symbols: {len(old_user)} -> {len(new_user)}")
|
||||
lines.append(f" Std/compiler: {len(old_std)} -> {len(new_std)}")
|
||||
lines.append("")
|
||||
|
||||
# Added symbols (grouped by class)
|
||||
added = sorted(new_user - old_user)
|
||||
removed = sorted(old_user - new_user)
|
||||
unchanged = old_user & new_user
|
||||
|
||||
if added:
|
||||
lines.append(f"+++ ADDED SYMBOLS ({len(added)}) +++")
|
||||
by_class = defaultdict(list)
|
||||
for s in added:
|
||||
cls = extract_class_name(s) or "(global)"
|
||||
d = new_demangled.get(s, s)
|
||||
by_class[cls].append(d)
|
||||
for cls in sorted(by_class.keys()):
|
||||
lines.append(f" [{cls}]")
|
||||
for d in sorted(by_class[cls]):
|
||||
lines.append(f" + {d}")
|
||||
lines.append("")
|
||||
|
||||
if removed:
|
||||
lines.append(f"--- REMOVED SYMBOLS ({len(removed)}) ---")
|
||||
by_class = defaultdict(list)
|
||||
for s in removed:
|
||||
cls = extract_class_name(s) or "(global)"
|
||||
d = old_demangled.get(s, s)
|
||||
by_class[cls].append(d)
|
||||
for cls in sorted(by_class.keys()):
|
||||
lines.append(f" [{cls}]")
|
||||
for d in sorted(by_class[cls]):
|
||||
lines.append(f" - {d}")
|
||||
lines.append("")
|
||||
|
||||
lines.append(f"UNCHANGED: {len(unchanged)} symbols")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Compare 4JLibs between git refs")
|
||||
parser.add_argument("old_ref", nargs="?", default="HEAD", help="Old git ref (default: HEAD)")
|
||||
parser.add_argument("new_ref", nargs="?", default="upstream/main", help="New git ref (default: upstream/main)")
|
||||
parser.add_argument("--filter", "-f", default="", help="Only compare libs matching this pattern")
|
||||
parser.add_argument("--no-demangle", action="store_true", help="Skip demangling")
|
||||
parser.add_argument("--json", action="store_true", help="Also output JSON data")
|
||||
args = parser.parse_args()
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
report_dir = OUTPUT_BASE / f"report-{timestamp}"
|
||||
report_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print("=" * 56)
|
||||
print(" 4JLibs Comparison Tool")
|
||||
print("=" * 56)
|
||||
print(f" Old ref: {args.old_ref}")
|
||||
print(f" New ref: {args.new_ref}")
|
||||
print(f" Filter: {args.filter or '<all>'}")
|
||||
print(f" Demangle: {not args.no_demangle}")
|
||||
print(f" dumpbin: {'found' if DUMPBIN else 'not found (using direct parsing)'}")
|
||||
print(f" undname: {'found' if UNDNAME else 'not found (no demangling)'}")
|
||||
print(f" Output: {report_dir}")
|
||||
print()
|
||||
|
||||
# Step 1: Find changed libs
|
||||
print("[1/4] Finding changed libraries...")
|
||||
changed_libs = git_changed_libs(args.old_ref, args.new_ref)
|
||||
if args.filter:
|
||||
changed_libs = [l for l in changed_libs if args.filter in os.path.basename(l)]
|
||||
|
||||
if not changed_libs:
|
||||
print(" No matching .lib changes found.")
|
||||
return
|
||||
|
||||
for lib in changed_libs:
|
||||
print(f" {os.path.basename(lib)}")
|
||||
print()
|
||||
|
||||
# Step 2: Extract libs from git
|
||||
print("[2/4] Extracting libraries from git...")
|
||||
old_dir = report_dir / "old"
|
||||
new_dir = report_dir / "new"
|
||||
|
||||
lib_pairs = {} # name -> (old_path, new_path)
|
||||
for lib_rel in changed_libs:
|
||||
name = os.path.basename(lib_rel).replace(".lib", "")
|
||||
old_path = old_dir / f"{name}.lib"
|
||||
new_path = new_dir / f"{name}.lib"
|
||||
|
||||
old_ok = git_extract_lib(args.old_ref, lib_rel, str(old_path))
|
||||
new_ok = git_extract_lib(args.new_ref, lib_rel, str(new_path))
|
||||
|
||||
old_size = old_path.stat().st_size if old_ok else None
|
||||
new_size = new_path.stat().st_size if new_ok else None
|
||||
|
||||
print(f" {name}: old={'found' if old_ok else 'N/A'} new={'found' if new_ok else 'N/A'}")
|
||||
lib_pairs[name] = (
|
||||
str(old_path) if old_ok else None,
|
||||
str(new_path) if new_ok else None,
|
||||
old_size, new_size
|
||||
)
|
||||
print()
|
||||
|
||||
# Step 3: Parse symbols and generate diffs
|
||||
print("[3/4] Parsing symbols...")
|
||||
all_reports = []
|
||||
json_data = {}
|
||||
|
||||
all_mangled_to_demangle = set()
|
||||
|
||||
for name, (old_path, new_path, old_size, new_size) in sorted(lib_pairs.items()):
|
||||
print(f" Parsing {name}...")
|
||||
|
||||
old_syms = None
|
||||
new_syms = None
|
||||
old_members = None
|
||||
new_members = None
|
||||
|
||||
if old_path:
|
||||
old_syms = parse_lib_symbols_dumpbin(old_path) or parse_lib_symbols_direct(old_path)
|
||||
old_members = parse_lib_members(old_path)
|
||||
print(f" Old: {len(old_syms)} symbols, {len(old_members)} objects")
|
||||
|
||||
if new_path:
|
||||
new_syms = parse_lib_symbols_dumpbin(new_path) or parse_lib_symbols_direct(new_path)
|
||||
new_members = parse_lib_members(new_path)
|
||||
print(f" New: {len(new_syms)} symbols, {len(new_members)} objects")
|
||||
|
||||
# Collect symbols needing demangling
|
||||
if not args.no_demangle:
|
||||
if old_syms:
|
||||
all_mangled_to_demangle.update(old_syms)
|
||||
if new_syms:
|
||||
all_mangled_to_demangle.update(new_syms)
|
||||
|
||||
lib_pairs[name] = (old_path, new_path, old_size, new_size,
|
||||
old_syms, new_syms, old_members, new_members)
|
||||
print()
|
||||
|
||||
# Step 3b: Batch demangle
|
||||
old_demangled = {}
|
||||
new_demangled = {}
|
||||
if not args.no_demangle and all_mangled_to_demangle:
|
||||
print(f" Demangling {len(all_mangled_to_demangle)} unique symbols...")
|
||||
all_demangled = demangle_symbols(sorted(all_mangled_to_demangle))
|
||||
print(f" Demangled {len(all_demangled)} symbols")
|
||||
old_demangled = all_demangled
|
||||
new_demangled = all_demangled
|
||||
print()
|
||||
|
||||
# Step 4: Generate reports
|
||||
print("[4/4] Generating reports...")
|
||||
|
||||
for name in sorted(lib_pairs.keys()):
|
||||
entry = lib_pairs[name]
|
||||
old_path, new_path, old_size, new_size = entry[0], entry[1], entry[2], entry[3]
|
||||
old_syms, new_syms, old_members, new_members = entry[4], entry[5], entry[6], entry[7]
|
||||
|
||||
report = generate_lib_report(
|
||||
name, old_syms, new_syms, old_demangled, new_demangled,
|
||||
old_members, new_members, old_size, new_size
|
||||
)
|
||||
all_reports.append(report)
|
||||
|
||||
# Write individual report
|
||||
diff_dir = report_dir / "diff"
|
||||
diff_dir.mkdir(exist_ok=True)
|
||||
(diff_dir / f"{name}.txt").write_text(report, encoding="utf-8")
|
||||
|
||||
if args.json:
|
||||
json_data[name] = {
|
||||
"old_size": old_size,
|
||||
"new_size": new_size,
|
||||
"old_symbol_count": len(old_syms) if old_syms else 0,
|
||||
"new_symbol_count": len(new_syms) if new_syms else 0,
|
||||
"added": sorted(set(new_syms or []) - set(old_syms or [])),
|
||||
"removed": sorted(set(old_syms or []) - set(new_syms or [])),
|
||||
"old_members": old_members,
|
||||
"new_members": new_members,
|
||||
}
|
||||
|
||||
# Write combined report
|
||||
summary = []
|
||||
summary.append("=" * 70)
|
||||
summary.append(" 4JLibs Comparison Report")
|
||||
summary.append("=" * 70)
|
||||
summary.append(f" Old ref: {args.old_ref}")
|
||||
summary.append(f" New ref: {args.new_ref}")
|
||||
summary.append(f" Generated: {datetime.now().isoformat()}")
|
||||
summary.append("")
|
||||
summary.append("-" * 70)
|
||||
summary.append(" Quick Summary")
|
||||
summary.append("-" * 70)
|
||||
|
||||
for name in sorted(lib_pairs.keys()):
|
||||
entry = lib_pairs[name]
|
||||
old_syms, new_syms = entry[4], entry[5]
|
||||
old_set = set(old_syms or [])
|
||||
new_set = set(new_syms or [])
|
||||
added = len(new_set - old_set)
|
||||
removed = len(old_set - new_set)
|
||||
|
||||
if old_syms is None:
|
||||
status = "ADDED"
|
||||
elif new_syms is None:
|
||||
status = "DELETED"
|
||||
else:
|
||||
status = "MODIFIED"
|
||||
|
||||
summary.append(f" {name:30s} {status:10s} +{added} -{removed} symbols")
|
||||
|
||||
summary.append("")
|
||||
summary.append("=" * 70)
|
||||
summary.append("")
|
||||
|
||||
full_report = "\n".join(summary) + "\n\n" + "\n\n".join(all_reports)
|
||||
summary_path = report_dir / "summary.txt"
|
||||
summary_path.write_text(full_report, encoding="utf-8")
|
||||
|
||||
if args.json:
|
||||
json_path = report_dir / "data.json"
|
||||
json_path.write_text(json.dumps(json_data, indent=2), encoding="utf-8")
|
||||
|
||||
print()
|
||||
print("\n".join(summary))
|
||||
print()
|
||||
print(f"Full report: {summary_path}")
|
||||
print(f"Per-lib diffs: {report_dir / 'diff'}")
|
||||
if args.json:
|
||||
print(f"JSON data: {report_dir / 'data.json'}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
344
tools/ghidra/compare-4jlibs.sh
Normal file
344
tools/ghidra/compare-4jlibs.sh
Normal file
@@ -0,0 +1,344 @@
|
||||
#!/usr/bin/env bash
|
||||
# compare-4jlibs.sh - Compare 4JLibs between two git refs using Ghidra headless analysis.
|
||||
#
|
||||
# Extracts .lib files from both refs, runs Ghidra headless to export symbols/functions,
|
||||
# then generates a structured diff report.
|
||||
#
|
||||
# Usage:
|
||||
# ./tools/ghidra/compare-4jlibs.sh [OLD_REF] [NEW_REF] [LIB_FILTER]
|
||||
#
|
||||
# Arguments:
|
||||
# OLD_REF - Git ref for the old version (default: HEAD)
|
||||
# NEW_REF - Git ref for the new version (default: upstream/main)
|
||||
# LIB_FILTER - Optional: only compare libs matching this pattern (e.g. "4J_Input")
|
||||
#
|
||||
# Environment:
|
||||
# GHIDRA_HOME - Path to Ghidra installation
|
||||
# (default: C:/Users/revela/Documents/Minecraft/Libraries/ghidra_12.0.4_PUBLIC)
|
||||
#
|
||||
# Output:
|
||||
# tools/ghidra/output/report-<timestamp>/
|
||||
# old/ - Extracted old .lib files
|
||||
# new/ - Extracted new .lib files
|
||||
# analysis/ - Ghidra JSON exports (old_*.json, new_*.json)
|
||||
# diff/ - Per-library diff reports
|
||||
# summary.txt - Overall summary of changes
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
GHIDRA_HOME="${GHIDRA_HOME:-C:/Users/revela/Documents/Minecraft/Libraries/ghidra_12.0.4_PUBLIC}"
|
||||
HEADLESS="$GHIDRA_HOME/support/analyzeHeadless"
|
||||
|
||||
OLD_REF="${1:-HEAD}"
|
||||
NEW_REF="${2:-upstream/main}"
|
||||
LIB_FILTER="${3:-}"
|
||||
|
||||
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
||||
OUTPUT_DIR="$SCRIPT_DIR/output/report-$TIMESTAMP"
|
||||
OLD_DIR="$OUTPUT_DIR/old"
|
||||
NEW_DIR="$OUTPUT_DIR/new"
|
||||
ANALYSIS_DIR="$OUTPUT_DIR/analysis"
|
||||
DIFF_DIR="$OUTPUT_DIR/diff"
|
||||
PROJECT_DIR="$OUTPUT_DIR/ghidra-projects"
|
||||
|
||||
mkdir -p "$OLD_DIR" "$NEW_DIR" "$ANALYSIS_DIR" "$DIFF_DIR" "$PROJECT_DIR"
|
||||
|
||||
LIB_PATH="Minecraft.Client/Windows64/4JLibs/libs"
|
||||
|
||||
echo "============================================"
|
||||
echo " 4JLibs Comparison Tool (Ghidra Headless)"
|
||||
echo "============================================"
|
||||
echo ""
|
||||
echo " Old ref: $OLD_REF"
|
||||
echo " New ref: $NEW_REF"
|
||||
echo " Filter: ${LIB_FILTER:-<all>}"
|
||||
echo " Output: $OUTPUT_DIR"
|
||||
echo " Ghidra: $GHIDRA_HOME"
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
# Step 1: Extract .lib files from both git refs
|
||||
# -------------------------------------------------------
|
||||
echo "[1/4] Extracting .lib files from git..."
|
||||
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
# Get list of .lib files that changed between the two refs
|
||||
CHANGED_LIBS=$(git diff --name-only "$OLD_REF" "$NEW_REF" -- "$LIB_PATH/*.lib" 2>/dev/null || true)
|
||||
|
||||
if [ -z "$CHANGED_LIBS" ]; then
|
||||
echo " No .lib file changes found between $OLD_REF and $NEW_REF"
|
||||
echo " Falling back to listing all libs at $NEW_REF..."
|
||||
CHANGED_LIBS=$(git ls-tree --name-only -r "$NEW_REF" -- "$LIB_PATH/" | grep '\.lib$' || true)
|
||||
fi
|
||||
|
||||
if [ -z "$CHANGED_LIBS" ]; then
|
||||
echo "ERROR: No .lib files found."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo " Changed libraries:"
|
||||
for lib in $CHANGED_LIBS; do
|
||||
basename "$lib"
|
||||
LIBNAME=$(basename "$lib" .lib)
|
||||
|
||||
# Apply filter if specified
|
||||
if [ -n "$LIB_FILTER" ] && [[ "$LIBNAME" != *"$LIB_FILTER"* ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# Extract old version (may not exist if newly added)
|
||||
if git cat-file -e "$OLD_REF:$lib" 2>/dev/null; then
|
||||
git show "$OLD_REF:$lib" > "$OLD_DIR/$LIBNAME.lib"
|
||||
echo " old: extracted $LIBNAME.lib ($(wc -c < "$OLD_DIR/$LIBNAME.lib") bytes)"
|
||||
else
|
||||
echo " old: $LIBNAME.lib does not exist at $OLD_REF"
|
||||
fi
|
||||
|
||||
# Extract new version (may not exist if deleted)
|
||||
if git cat-file -e "$NEW_REF:$lib" 2>/dev/null; then
|
||||
git show "$NEW_REF:$lib" > "$NEW_DIR/$LIBNAME.lib"
|
||||
echo " new: extracted $LIBNAME.lib ($(wc -c < "$NEW_DIR/$LIBNAME.lib") bytes)"
|
||||
else
|
||||
echo " new: $LIBNAME.lib does not exist at $NEW_REF (deleted)"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
# Step 2: Run Ghidra headless analysis on each .lib
|
||||
# -------------------------------------------------------
|
||||
echo "[2/4] Running Ghidra headless analysis..."
|
||||
|
||||
analyze_lib() {
|
||||
local lib_file="$1"
|
||||
local label="$2" # "old" or "new"
|
||||
local libname
|
||||
libname=$(basename "$lib_file" .lib)
|
||||
local out_json="$ANALYSIS_DIR/${label}_${libname}.json"
|
||||
local proj_dir="$PROJECT_DIR/${label}_${libname}"
|
||||
|
||||
mkdir -p "$proj_dir"
|
||||
|
||||
echo " Analyzing ${label}/${libname}.lib ..."
|
||||
|
||||
# Run Ghidra headless: import the .lib, analyze, run our export script, then delete the project
|
||||
"$HEADLESS" "$proj_dir" "proj" \
|
||||
-import "$lib_file" \
|
||||
-postScript ExportLibInfo.java "$out_json" \
|
||||
-scriptPath "$SCRIPT_DIR" \
|
||||
-deleteProject \
|
||||
-analysisTimeoutPerFile 300 \
|
||||
-max-cpu 4 \
|
||||
> "$ANALYSIS_DIR/${label}_${libname}_ghidra.log" 2>&1 || {
|
||||
echo " WARNING: Ghidra analysis had issues for ${label}/${libname}. Check log."
|
||||
}
|
||||
|
||||
if [ -f "$out_json" ]; then
|
||||
local func_count
|
||||
func_count=$(grep -c '"name"' "$out_json" 2>/dev/null || echo "0")
|
||||
echo " Done: $out_json ($func_count entries)"
|
||||
else
|
||||
echo " WARNING: No output generated for ${label}/${libname}"
|
||||
fi
|
||||
}
|
||||
|
||||
# Analyze old libs
|
||||
for lib_file in "$OLD_DIR"/*.lib; do
|
||||
[ -f "$lib_file" ] || continue
|
||||
analyze_lib "$lib_file" "old"
|
||||
done
|
||||
|
||||
# Analyze new libs
|
||||
for lib_file in "$NEW_DIR"/*.lib; do
|
||||
[ -f "$lib_file" ] || continue
|
||||
analyze_lib "$lib_file" "new"
|
||||
done
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
# Step 3: Generate diff reports
|
||||
# -------------------------------------------------------
|
||||
echo "[3/4] Generating diff reports..."
|
||||
|
||||
generate_diff() {
|
||||
local libname="$1"
|
||||
local old_json="$ANALYSIS_DIR/old_${libname}.json"
|
||||
local new_json="$ANALYSIS_DIR/new_${libname}.json"
|
||||
local diff_file="$DIFF_DIR/${libname}.diff.txt"
|
||||
|
||||
echo " Diffing $libname..."
|
||||
echo "=== $libname ===" > "$diff_file"
|
||||
echo "" >> "$diff_file"
|
||||
|
||||
# Handle deleted libs
|
||||
if [ ! -f "$new_json" ]; then
|
||||
echo "STATUS: DELETED (library removed in new version)" >> "$diff_file"
|
||||
echo "" >> "$diff_file"
|
||||
if [ -f "$old_json" ]; then
|
||||
echo "--- Functions that were in old version ---" >> "$diff_file"
|
||||
grep '"name"' "$old_json" | head -200 >> "$diff_file"
|
||||
fi
|
||||
return
|
||||
fi
|
||||
|
||||
# Handle newly added libs
|
||||
if [ ! -f "$old_json" ]; then
|
||||
echo "STATUS: ADDED (library is new in new version)" >> "$diff_file"
|
||||
echo "" >> "$diff_file"
|
||||
echo "--- Functions in new version ---" >> "$diff_file"
|
||||
grep '"name"' "$new_json" | head -200 >> "$diff_file"
|
||||
return
|
||||
fi
|
||||
|
||||
# Both exist - compare
|
||||
echo "STATUS: MODIFIED" >> "$diff_file"
|
||||
echo "" >> "$diff_file"
|
||||
|
||||
# Extract function names from each
|
||||
local old_funcs new_funcs
|
||||
old_funcs=$(mktemp)
|
||||
new_funcs=$(mktemp)
|
||||
|
||||
grep -oP '"name"\s*:\s*"[^"]*"' "$old_json" | sort -u > "$old_funcs"
|
||||
grep -oP '"name"\s*:\s*"[^"]*"' "$new_json" | sort -u > "$new_funcs"
|
||||
|
||||
local old_count new_count
|
||||
old_count=$(wc -l < "$old_funcs")
|
||||
new_count=$(wc -l < "$new_funcs")
|
||||
|
||||
echo "Old function/symbol count: $old_count" >> "$diff_file"
|
||||
echo "New function/symbol count: $new_count" >> "$diff_file"
|
||||
echo "" >> "$diff_file"
|
||||
|
||||
# Functions only in old (removed)
|
||||
local removed
|
||||
removed=$(comm -23 "$old_funcs" "$new_funcs")
|
||||
if [ -n "$removed" ]; then
|
||||
echo "--- REMOVED (in old, not in new) ---" >> "$diff_file"
|
||||
echo "$removed" >> "$diff_file"
|
||||
echo "" >> "$diff_file"
|
||||
fi
|
||||
|
||||
# Functions only in new (added)
|
||||
local added
|
||||
added=$(comm -13 "$old_funcs" "$new_funcs")
|
||||
if [ -n "$added" ]; then
|
||||
echo "+++ ADDED (in new, not in old) +++" >> "$diff_file"
|
||||
echo "$added" >> "$diff_file"
|
||||
echo "" >> "$diff_file"
|
||||
fi
|
||||
|
||||
# Functions in both (check for signature changes)
|
||||
local common
|
||||
common=$(comm -12 "$old_funcs" "$new_funcs")
|
||||
if [ -n "$common" ]; then
|
||||
local common_count
|
||||
common_count=$(echo "$common" | wc -l)
|
||||
echo "=== UNCHANGED names: $common_count ===" >> "$diff_file"
|
||||
echo "(Signature changes require detailed Ghidra comparison)" >> "$diff_file"
|
||||
echo "" >> "$diff_file"
|
||||
fi
|
||||
|
||||
# Extract signatures for comparison
|
||||
local old_sigs new_sigs
|
||||
old_sigs=$(mktemp)
|
||||
new_sigs=$(mktemp)
|
||||
|
||||
grep -oP '"signature"\s*:\s*"[^"]*"' "$old_json" | sort -u > "$old_sigs" 2>/dev/null || true
|
||||
grep -oP '"signature"\s*:\s*"[^"]*"' "$new_json" | sort -u > "$new_sigs" 2>/dev/null || true
|
||||
|
||||
local sig_removed sig_added
|
||||
sig_removed=$(comm -23 "$old_sigs" "$new_sigs" 2>/dev/null || true)
|
||||
sig_added=$(comm -13 "$old_sigs" "$new_sigs" 2>/dev/null || true)
|
||||
|
||||
if [ -n "$sig_removed" ] || [ -n "$sig_added" ]; then
|
||||
echo "--- SIGNATURE CHANGES ---" >> "$diff_file"
|
||||
if [ -n "$sig_removed" ]; then
|
||||
echo " Old signatures no longer present:" >> "$diff_file"
|
||||
echo "$sig_removed" >> "$diff_file"
|
||||
fi
|
||||
if [ -n "$sig_added" ]; then
|
||||
echo " New signatures:" >> "$diff_file"
|
||||
echo "$sig_added" >> "$diff_file"
|
||||
fi
|
||||
echo "" >> "$diff_file"
|
||||
fi
|
||||
|
||||
# Size comparison
|
||||
if [ -f "$OLD_DIR/${libname}.lib" ] && [ -f "$NEW_DIR/${libname}.lib" ]; then
|
||||
local old_size new_size
|
||||
old_size=$(wc -c < "$OLD_DIR/${libname}.lib")
|
||||
new_size=$(wc -c < "$NEW_DIR/${libname}.lib")
|
||||
local delta=$((new_size - old_size))
|
||||
local pct=0
|
||||
if [ "$old_size" -gt 0 ]; then
|
||||
pct=$(( (delta * 100) / old_size ))
|
||||
fi
|
||||
echo "SIZE: $old_size -> $new_size bytes (${delta:+$delta} bytes, ${pct}%)" >> "$diff_file"
|
||||
fi
|
||||
|
||||
rm -f "$old_funcs" "$new_funcs" "$old_sigs" "$new_sigs"
|
||||
}
|
||||
|
||||
# Get unique lib names across old and new
|
||||
ALL_LIBS=$(cd "$OUTPUT_DIR" && (ls old/*.lib new/*.lib 2>/dev/null || true) | xargs -I{} basename {} .lib | sort -u)
|
||||
|
||||
for libname in $ALL_LIBS; do
|
||||
generate_diff "$libname"
|
||||
done
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
# Step 4: Generate summary
|
||||
# -------------------------------------------------------
|
||||
echo "[4/4] Generating summary..."
|
||||
|
||||
SUMMARY="$OUTPUT_DIR/summary.txt"
|
||||
{
|
||||
echo "============================================"
|
||||
echo " 4JLibs Comparison Report"
|
||||
echo "============================================"
|
||||
echo ""
|
||||
echo " Old ref: $OLD_REF"
|
||||
echo " New ref: $NEW_REF"
|
||||
echo " Generated: $(date)"
|
||||
echo ""
|
||||
echo "--------------------------------------------"
|
||||
echo " Library Status"
|
||||
echo "--------------------------------------------"
|
||||
|
||||
for libname in $ALL_LIBS; do
|
||||
local_diff="$DIFF_DIR/${libname}.diff.txt"
|
||||
if [ -f "$local_diff" ]; then
|
||||
status=$(grep "^STATUS:" "$local_diff" | head -1 | cut -d: -f2 | xargs)
|
||||
size_line=$(grep "^SIZE:" "$local_diff" | head -1 || echo "")
|
||||
echo ""
|
||||
echo " $libname: $status"
|
||||
if [ -n "$size_line" ]; then
|
||||
echo " $size_line"
|
||||
fi
|
||||
|
||||
# Count added/removed
|
||||
added_count=$(grep -c '^\+\+\+' "$local_diff" 2>/dev/null || echo "0")
|
||||
removed_count=$(grep -c '^---' "$local_diff" 2>/dev/null || echo "0")
|
||||
if [ "$added_count" -gt 0 ] || [ "$removed_count" -gt 0 ]; then
|
||||
echo " Sections: +$added_count added, -$removed_count removed"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "--------------------------------------------"
|
||||
echo " Detailed reports in: $DIFF_DIR/"
|
||||
echo " Raw Ghidra JSON in: $ANALYSIS_DIR/"
|
||||
echo " Ghidra logs in: $ANALYSIS_DIR/*_ghidra.log"
|
||||
echo "--------------------------------------------"
|
||||
} > "$SUMMARY"
|
||||
|
||||
cat "$SUMMARY"
|
||||
|
||||
echo ""
|
||||
echo "Done. Full report: $OUTPUT_DIR"
|
||||
104
tools/ghidra/extract_lib.py
Normal file
104
tools/ghidra/extract_lib.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""Extract .obj members from a COFF .lib (ar archive) into a directory.
|
||||
|
||||
Usage:
|
||||
python extract_lib.py <input.lib> <output_dir>
|
||||
|
||||
Each member is written as <output_dir>/<name>.obj. The first/second linker
|
||||
members and the long-name string table are skipped.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def extract_lib(lib_path, out_dir):
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
with open(lib_path, "rb") as f:
|
||||
magic = f.read(8)
|
||||
if magic != b"!<arch>\n":
|
||||
print(f"ERROR: Not an ar archive: {lib_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Read the long-name string table if present
|
||||
long_names = b""
|
||||
members = []
|
||||
|
||||
while True:
|
||||
pos = f.tell()
|
||||
header = f.read(60)
|
||||
if len(header) < 60:
|
||||
break
|
||||
|
||||
raw_name = header[0:16]
|
||||
size_str = header[48:58].decode("ascii").strip()
|
||||
end_marker = header[58:60]
|
||||
|
||||
if end_marker != b"\x60\x0a":
|
||||
print(f"WARNING: Bad end marker at offset {pos}, stopping.", file=sys.stderr)
|
||||
break
|
||||
|
||||
size = int(size_str)
|
||||
data = f.read(size)
|
||||
|
||||
# Pad to even boundary
|
||||
if size % 2 != 0:
|
||||
f.read(1)
|
||||
|
||||
name = raw_name.decode("ascii", errors="replace").rstrip()
|
||||
|
||||
# Skip first and second linker members (both named "/")
|
||||
if name == "/":
|
||||
continue
|
||||
|
||||
# Long-name string table
|
||||
if name == "//":
|
||||
long_names = data
|
||||
continue
|
||||
|
||||
# Resolve long name references like "/26"
|
||||
if name.startswith("/") and name[1:].isdigit():
|
||||
offset = int(name[1:])
|
||||
end = long_names.find(b"\x00", offset)
|
||||
if end == -1:
|
||||
# Try newline-terminated (common in MSVC libs)
|
||||
end = long_names.find(b"\n", offset)
|
||||
if end == -1:
|
||||
end = len(long_names)
|
||||
resolved = long_names[offset:end].decode("ascii", errors="replace").rstrip("/")
|
||||
name = resolved
|
||||
|
||||
# Clean the name for filesystem use
|
||||
safe_name = name.replace("/", "_").replace("\\", "_").replace("..", "_")
|
||||
if not safe_name.endswith(".obj"):
|
||||
safe_name += ".obj"
|
||||
|
||||
members.append((safe_name, data))
|
||||
|
||||
# Write members
|
||||
written = 0
|
||||
for safe_name, data in members:
|
||||
out_path = os.path.join(out_dir, safe_name)
|
||||
|
||||
# Handle duplicate names by appending a counter
|
||||
if os.path.exists(out_path):
|
||||
base, ext = os.path.splitext(safe_name)
|
||||
counter = 2
|
||||
while os.path.exists(os.path.join(out_dir, f"{base}_{counter}{ext}")):
|
||||
counter += 1
|
||||
out_path = os.path.join(out_dir, f"{base}_{counter}{ext}")
|
||||
|
||||
with open(out_path, "wb") as out_f:
|
||||
out_f.write(data)
|
||||
written += 1
|
||||
|
||||
print(f"Extracted {written} object files from {os.path.basename(lib_path)} -> {out_dir}")
|
||||
return written
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print(f"Usage: {sys.argv[0]} <input.lib> <output_dir>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
extract_lib(sys.argv[1], sys.argv[2])
|
||||
44
tools/ghidra/list-lib-symbols.sh
Normal file
44
tools/ghidra/list-lib-symbols.sh
Normal file
@@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env bash
|
||||
# list-lib-symbols.sh - Quick symbol listing for a single .lib file using Ghidra headless.
|
||||
#
|
||||
# Usage:
|
||||
# ./tools/ghidra/list-lib-symbols.sh <path-to-lib-file> [output.json]
|
||||
#
|
||||
# If no output path is given, writes to tools/ghidra/output/<libname>.json
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
GHIDRA_HOME="${GHIDRA_HOME:-C:/Users/revela/Documents/Minecraft/Libraries/ghidra_12.0.4_PUBLIC}"
|
||||
HEADLESS="$GHIDRA_HOME/support/analyzeHeadless"
|
||||
|
||||
LIB_FILE="${1:?Usage: list-lib-symbols.sh <path-to-lib-file> [output.json]}"
|
||||
LIBNAME=$(basename "$LIB_FILE" .lib)
|
||||
|
||||
OUTPUT="${2:-$SCRIPT_DIR/output/${LIBNAME}.json}"
|
||||
mkdir -p "$(dirname "$OUTPUT")"
|
||||
|
||||
PROJ_DIR=$(mktemp -d)
|
||||
|
||||
echo "Analyzing $LIB_FILE ..."
|
||||
echo " Output: $OUTPUT"
|
||||
|
||||
"$HEADLESS" "$PROJ_DIR" "proj" \
|
||||
-import "$LIB_FILE" \
|
||||
-postScript ExportLibInfo.java "$OUTPUT" \
|
||||
-scriptPath "$SCRIPT_DIR" \
|
||||
-deleteProject \
|
||||
-analysisTimeoutPerFile 300 \
|
||||
-max-cpu 4 \
|
||||
2>&1 | tail -5
|
||||
|
||||
rm -rf "$PROJ_DIR"
|
||||
|
||||
if [ -f "$OUTPUT" ]; then
|
||||
func_count=$(grep -c '"signature"' "$OUTPUT" 2>/dev/null || echo "0")
|
||||
echo ""
|
||||
echo "Done. $func_count function entries exported to $OUTPUT"
|
||||
else
|
||||
echo "ERROR: No output was generated."
|
||||
exit 1
|
||||
fi
|
||||
0
tools/ghidra/output/.gitkeep
Normal file
0
tools/ghidra/output/.gitkeep
Normal file
Reference in New Issue
Block a user