/// Core disassembler module. /// /// The API was inspired by fopen and uses Capstone for its backend. /// /// Tested with Capstone 4.0.2. /// /// Authors: dd86k <dd@dax.moe> /// Copyright: © dd86k <dd@dax.moe> /// License: BSD-3-Clause-Clear module adbg.disassembler; import adbg.include.capstone; import adbg.include.c.stdarg; import adbg.error; import adbg.debugger.process : adbg_process_t; import adbg.machines : AdbgMachine; import adbg.debugger.exception : adbg_exception_t; import adbg.debugger.memory : adbg_memory_read; import core.stdc.string : memcpy; import core.stdc.stdlib : malloc, free; //TODO: Capstone CS_MODE_BIG_ENDIAN // Depending on target endianness, Capstone may need this bit //TODO: Function to format machine code //TODO: Close function should close CS lib too // Make sure we have a function to reconfigure machine // NOTE: Longest architectural instruction contest // x86: 15 bytes // AArch32: 2 or 4 bytes // AArch64: 4 bytes // Power: 4 bytes // MIPS: 4 bytes // RISC-V: 24 bytes (reserved) // SPARC: 4 bytes // IA64: 16 bytes // Alpha: 4 bytes /// Maximum instruction size in bytes. enum MAX_INSTR_SIZE = 16; version (X86) { // CS_OPT_SYNTAX_DEFAULT private enum { CS_DEFAULT_PLATFORM = CS_ARCH_X86, /// Platform default platform CS_DEFAULT_MODE = CS_MODE_32, /// Platform default platform } } else version (X86_64) { private enum { CS_DEFAULT_PLATFORM = CS_ARCH_X86, /// Ditto CS_DEFAULT_MODE = CS_MODE_64, /// Ditto } } else version (Thumb) { private enum { CS_DEFAULT_PLATFORM = CS_ARCH_ARM, /// Ditto CS_DEFAULT_MODE = CS_MODE_THUMB, /// Ditto } } else version (ARM) { private enum { CS_DEFAULT_PLATFORM = CS_ARCH_ARM, /// Ditto CS_DEFAULT_MODE = CS_MODE_V8, // or CS_MODE_ARM?, /// Ditto } } else version (AArch64) { private enum { CS_DEFAULT_PLATFORM = CS_ARCH_ARM64, /// Ditto CS_DEFAULT_MODE = CS_MODE_ARM, /// Ditto } } else version (RISCV32) { private enum { CS_DEFAULT_PLATFORM = -3, /// Ditto CS_DEFAULT_MODE = -3, /// Ditto } } else version (RISCV64) { private enum { CS_DEFAULT_PLATFORM = -1, /// Ditto CS_DEFAULT_MODE = -3, /// Ditto } } else { static assert(0, "Set DEFAULT_PLATFORM and DEFAULT_SYNTAX"); } private { enum ADBG_MAGIC = 0xcafebabe; } extern (C): /// Disassembler structure. /// /// All fields are used internally, do not touch. struct adbg_disassembler_t { /// Used internally. int magic; // // User settings // /// User input buffer pointer. /// Adjusted when called. void *buffer; /// User input buffer size. /// Adjusted when called. size_t buffer_size; /// Base address for current disassembled instruction. ulong address_base; /// Attached process. adbg_process_t *process; // // Stats // /// Number of successfully decoded instructions. int decoded_count; // // Capstone // /// CS handle. /// Used internally. csh cs_handle; /// CS instruction instance. /// Used internally. cs_insn *cs_inst; } /// Decoded instruction information. struct adbg_opcode_t { ulong address; /// Base instruction address. int size; /// Instruction size in Bytes. ubyte[MAX_INSTR_SIZE] machine; /// Machine bytes. const(char) *mnemonic; /// Instruction mnemonic. const(char) *operands; /// Instruction operands. } /// Assembler syntax. enum AdbgDisSyntax { /// Default option for platform. native, /// Intel syntax /// Year: 1978 /// Destination: Left /// /// Similar to the Macro/Microsoft Assembler (MASM) syntax. /// This is the reference syntax for the x86 instruction set. /// For more information, consult the Intel and AMD reference manuals. /// /// Example: /// --- /// mov edx, dword ptr ss:[eax+ecx*2-0x20] /// --- intel, /// AT&T syntax /// Year: 1960s /// Destination: Right /// /// For more information, consult the IAS/RSX-11 MACRO-11 Reference /// Manual and the GNU Assembler documentation. /// /// Example: /// --- /// mov %ss:-0x20(%eax,%ecx,2), %edx /// --- att, } /// Disassembler options. enum AdbgDisOpt { /// Change syntax. /// Type: int /// Default: AdbgDasmSyntax.native syntax = 2, //TODO: Only get size, etc. //mode = 3, } // Platform to CS' ARCH and MODE types private int adbg_dis_lib_a2cs(ref int cs_arch, ref int cs_mode, AdbgMachine platform) { switch (platform) with (AdbgMachine) { case native: // equals 0 cs_arch = CS_DEFAULT_PLATFORM; cs_mode = CS_DEFAULT_MODE; break; // // x86 // case i8086: cs_arch = CS_ARCH_X86; cs_mode = CS_MODE_16; break; case x86: cs_arch = CS_ARCH_X86; cs_mode = CS_MODE_32; break; case amd64: cs_arch = CS_ARCH_X86; cs_mode = CS_MODE_64; break; // // Arm // case thumb: cs_arch = CS_ARCH_ARM; cs_mode = CS_MODE_THUMB; break; case thumb32: cs_arch = CS_ARCH_ARM; cs_mode = CS_MODE_THUMB | CS_MODE_V8; break; case arm: cs_arch = CS_ARCH_ARM; cs_mode = CS_MODE_ARM | CS_MODE_V8; break; case aarch64: cs_arch = CS_ARCH_ARM64; cs_mode = CS_MODE_ARM | CS_MODE_V8; break; // // Others // default: return adbg_oops(AdbgError.disasmUnsupportedMachine); } return 0; } /// Open a disassembler instance. /// Params: /// machine = Machine architecture. /// Returns: Error code. adbg_disassembler_t* adbg_dis_open(AdbgMachine machine = AdbgMachine.native) { //TODO: static if (CAPSTONE_DYNAMIC) if (libcapstone_dynload()) return null; int cs_arch = void, cs_mode = void; if (adbg_dis_lib_a2cs(cs_arch, cs_mode, machine)) return null; adbg_disassembler_t *dasm = cast(adbg_disassembler_t*)malloc(adbg_disassembler_t.sizeof); if (dasm == null) { adbg_oops(AdbgError.crt); return null; } if (cs_open(cs_arch, cs_mode, &dasm.cs_handle)) { free(dasm); adbg_oops(AdbgError.libCapstone, &dasm.cs_handle); return null; } dasm.cs_inst = cs_malloc(dasm.cs_handle); if (dasm.cs_inst == null) { free(dasm); adbg_oops(AdbgError.libCapstone, &dasm.cs_handle); return null; } dasm.decoded_count = 0; dasm.address_base = 0; dasm.buffer_size = 0; dasm.buffer = null; dasm.magic = ADBG_MAGIC; return dasm; } /// Closes a disassembler instance. /// Params: dasm = Reference to disassembler instance. void adbg_dis_close(adbg_disassembler_t *dasm) { if (dasm == null || dasm.magic != ADBG_MAGIC) return; if (dasm.cs_inst) cs_free(dasm.cs_inst, 1); cs_close(&dasm.cs_handle); free(dasm); } /// Configure an option to the disassembler. /// /// Always end the list of options with 0. /// /// Example: /// --- /// adbg_dis_options(dasm, /// AdbgDasmOption.syntax, AdbgDasmSyntax.intel, /// 0); /// --- /// Params: /// dasm = Reference to disassembler instance. /// ... = Options. /// Returns: Error code. int adbg_dis_options(adbg_disassembler_t *dasm, ...) { if (dasm == null) return adbg_oops(AdbgError.invalidArgument); if (dasm.magic != ADBG_MAGIC) return adbg_oops(AdbgError.uninitiated); va_list args = void; va_start(args, dasm); L_OPTION: switch (va_arg!int(args)) { case 0: break; case AdbgDisOpt.syntax: int cs_syntax = void; switch (va_arg!int(args)) { case AdbgDisSyntax.native: cs_syntax = CS_OPT_SYNTAX_DEFAULT; break; case AdbgDisSyntax.intel: cs_syntax = CS_OPT_SYNTAX_INTEL; break; case AdbgDisSyntax.att: cs_syntax = CS_OPT_SYNTAX_ATT; break; default: return adbg_oops(AdbgError.invalidValue); } if (cs_option(dasm.cs_handle, CS_OPT_SYNTAX, cs_syntax)) return adbg_oops(AdbgError.libCapstone, &dasm.cs_handle); goto L_OPTION; default: return adbg_oops(AdbgError.invalidOption); } return 0; } /// Start a disassembler session from user data. /// /// This is typically used before entering a loop. /// Params: /// dasm = Reference to disassembler instance. /// data = Reference to user data. /// size = Size of the user data. /// base_address = Base address. /// Returns: Error code. int adbg_dis_start(adbg_disassembler_t *dasm, void *data, size_t size, ulong base_address = 0) { if (dasm == null || data == null) return adbg_oops(AdbgError.invalidArgument); dasm.address_base = base_address; dasm.buffer = data; dasm.buffer_size = size; dasm.process = null; return 0; } /// Disassemble one instruction. /// Params: /// dasm = Disassembler instance. /// opcode = Opcode instance. /// Returns: Error code. int adbg_dis_step(adbg_disassembler_t *dasm, adbg_opcode_t *opcode) { if (dasm == null || opcode == null) return adbg_oops(AdbgError.invalidArgument); if (dasm.magic != ADBG_MAGIC) return adbg_oops(AdbgError.uninitiated); version (Trace) trace("buffer_size=%u", cast(uint)dasm.buffer_size); opcode.address = dasm.address_base; // Save before CS modifies it //TODO: Consider making a specific error code if decoded count is zero. // Use case: // If cs_disasm_iter returns false and cs_errno // returns CS_ERR_OK, this could mean that an invalid // machine type was specified when opening the instance. //TODO: Consider replacing mnemonic by "error" // Needs to be something specific (e.g. .bytes 0x11 0x22) // NOTE: CS modifies buffer, buffer_size, and address_base. if (cs_disasm_iter(dasm.cs_handle, cast(const(ubyte*)*)&dasm.buffer, &dasm.buffer_size, &dasm.address_base, dasm.cs_inst) == false) { if (cs_errno(dasm.cs_handle) != CS_ERR_OK) return adbg_oops(AdbgError.libCapstone, &dasm.cs_handle); // NOTE: Can't reliably check buffer_size left. // Can't decode instruction but no errors happened? // If there were no other instructions decoded, must be illegal if (dasm.decoded_count == 0) return adbg_oops(AdbgError.disasmIllegalInstruction); return adbg_oops(AdbgError.disasmEndOfData); } ++dasm.decoded_count; //TODO: disasm modes opcode.size = dasm.cs_inst.size; opcode.mnemonic = cs_insn_name(dasm.cs_handle, dasm.cs_inst.id); opcode.operands = dasm.cs_inst.op_str.ptr; memcpy(opcode.machine.ptr, dasm.buffer - opcode.size, opcode.size); return 0; } /// Setup buffer and disassemble one instruction. /// Params: /// dasm = Disassembler instance. /// opcode = Opcode instance. /// data = Pointer to user buffer. /// size = Size of user buffer. /// base_address = Base address. /// Returns: Error code. int adbg_dis_once(adbg_disassembler_t *dasm, adbg_opcode_t *opcode, void *data, size_t size, ulong base_address = 0) { int e = adbg_dis_start(dasm, data, size, base_address); return e ? e : adbg_dis_step(dasm, opcode); } // // Process wrappers // int adbg_dis_process_start(adbg_disassembler_t *dasm, adbg_process_t *process, ulong location) { if (dasm == null || process == null) return adbg_oops(AdbgError.invalidArgument); dasm.address_base = location; dasm.process = process; return 0; } int adbg_dis_process_step(adbg_disassembler_t *dasm, adbg_opcode_t *opcode) { if (dasm == null || opcode == null) return adbg_oops(AdbgError.invalidArgument); if (adbg_memory_read(dasm.process, cast(size_t)dasm.address_base, opcode.machine.ptr, MAX_INSTR_SIZE)) return adbg_errno; dasm.buffer = opcode.machine.ptr; dasm.buffer_size = MAX_INSTR_SIZE; return adbg_dis_step(dasm, opcode); } /// Wrapper that reads memory from process that disassembles one instruction. /// Params: /// dasm = Disassembler instance. /// opcode = Opcode instance. /// tracee = Debuggee process. /// address = Process virtual memory location. /// Returns: Error code. int adbg_dis_process_once(adbg_disassembler_t *dasm, adbg_opcode_t *opcode, adbg_process_t *tracee, ulong address) { if (dasm == null || tracee == null || opcode == null) return adbg_oops(AdbgError.invalidArgument); if (adbg_memory_read(tracee, cast(size_t)address, opcode.machine.ptr, MAX_INSTR_SIZE)) return adbg_errno; if (adbg_dis_once(dasm, opcode, opcode.machine.ptr, MAX_INSTR_SIZE)) return adbg_errno; return 0; }