Skip to content

Unicorn

Unicorn

Unicorn is a lightweight multi-platform, multi-architecture CPU emulator framework.

Example

Challenge from BreizhCTF 2024 - New world 2 (misc) by Itarow.

The goal was to implements the following syscalls:

Custom mmap - number 31337

Create a memory area at the address 0x13370000 with a size of 0x10000.

md5sum - number 31338

Compute the MD5 digest of a memory area.

Parameters:

  • rdi - memory area on which the digest is computed
  • rsi - destination area for the digest
  • rbx - length to digest

sha1sum - number 31339

Compute the SHA1 digest of a memory area.

Parameters:

  • rdi - memory area on which the digest is computed
  • rsi - destination area for the digest
  • rbx - length to digest

sha256sum - number 31340

Compute the SHA256 digest of a memory area.

Parameters:

  • rdi - memory area on which the digest is computed
  • rsi - destination area for the digest
  • rbx - length to digest

Modified write - number 31341

Perform an output display with encoding based on an index passed as a parameter.

Parameters:

  • rdi - area to display
  • rsi - length of the area taken into account for display (before encoding)
  • rbx - index for encoding
  • 1: base64
  • 2: base32
  • 3: base16
  • 4: base85

Python Script

import base64
import hashlib

from Hellf import ELF

from unicorn import *
from unicorn.x86_const import *


elf = ELF("./elf.bin")
SHELLCODE = elf.get_section_by_name(".shellcode").data

BASE = 0x4001000
X86_CODE64_SYSCALL = b'\x0f\x05' # SYSCALL

# Initialize emulator in X86-64bit mode
mu = Uc(UC_ARCH_X86, UC_MODE_64)    

# Map & write the code at base address
mu.mem_map(BASE, 1024 * 1024)
mu.mem_write(BASE, SHELLCODE)
mu.reg_write(UC_X86_REG_RIP, BASE)

def syscall_allocate_mem(mu):
    mu.mem_map(0x13370000, 0x10000)

def syscall_exit(mu):
    mu.emu_stop()
    print("=> Exit")

def syscall_base(mu, rdi, rsi, rbx):
    data = mu.mem_read(rdi, rsi)
    base_funcs = {
        1: base64.b64encode,
        2: base64.b32encode,
        3: base64.b16encode,
        4: base64.b85encode
    }
    res = base_funcs[rbx](data).decode()
    print("=> Base:", res)

def syscall_hash(mu, hash_func, rdi, rsi, rbx):
    data = mu.mem_read(rdi, rbx)
    res = hash_func(data).digest()
    print("=> Hash:", res.hex())
    mu.mem_write(rsi, res)

def hook_code(mu, address, size, user_data):
    opcode = mu.mem_read(address, size)

    if opcode == X86_CODE64_SYSCALL:
        syscall = mu.reg_read(UC_X86_REG_EAX)
        rdi = mu.reg_read(UC_X86_REG_RDI)
        rsi = mu.reg_read(UC_X86_REG_RSI)
        rbx = mu.reg_read(UC_X86_REG_RBX)

        print(f"[0x{address:x}] Syscall: {syscall} (rdi: {rdi} rsi: {rsi} rbx: {rbx})")
        match syscall:
            case 31337:
                syscall_allocate_mem(mu)
            case 31338:
                syscall_hash(mu, hashlib.md5, rdi, rsi, rbx)
            case 31339:
                syscall_hash(mu, hashlib.sha1, rdi, rsi, rbx)
            case 31340:
                syscall_hash(mu, hashlib.sha256, rdi, rsi, rbx)
            case 31341:
                syscall_base(mu, rdi, rsi, rbx)
            case 60:
                syscall_exit(mu)
            case _:
                raise NotImplementedError(f"Syscall {syscall} not implemented")

mu.hook_add(UC_HOOK_CODE, hook_code)
mu.emu_start(BASE, BASE + len(SHELLCODE))

Execution

$ python3 emulation.py
[...]
[0x4001725] Syscall: 31340 (rdi: 322371584 rsi: 322392292 rbx: 4)
=> Hash: 7673d1c5b102f0f77df43309d94c59a3fadaca301238297efe6b850491130d12
[0x4001732] Syscall: 31339 (rdi: 322371584 rsi: 322392296 rbx: 4)
=> Hash: dca2e65c9dd98fbf08b969ee05158b563d73c806
[0x400173f] Syscall: 31338 (rdi: 322371584 rsi: 322392300 rbx: 4)
=> Hash: aaafc7b0a4ea52e791cf29404891293b
[0x400174c] Syscall: 31339 (rdi: 322371584 rsi: 322392304 rbx: 4)
=> Hash: dca2e65c9dd98fbf08b969ee05158b563d73c806
[0x400176e] Syscall: 31341 (rdi: 322392064 rsi: 1024 rbx: 1)
=> Base: 3KLmXKqvx7DcouZc...
[0x4001777] Syscall: 60 (rdi: 322392064 rsi: 1024 rbx: 1)
=> Exit

References