hack.lu CTF - Challenge 12 WriteUp

Mon 29 November 2010 by jean

Here is the missing Hack.lu CTF write-up for the "seamonster" challenge. It was a Windows reverse engineering challenge, with a nice anti-debugging trick.

The challenge objective is to give "Ring3" the correct password to keep our ship afloat and get the gold ! Let's have a look at the main function :

char Buf[32];   //holds the users input

int wmain(int argc, wchar_t* argv[])
{
    DWORD pid_1, dwProcessId;
    PROCESS_INFORMATION ProcessInformation;
    STARTUPINFOW StartupInfo;
    WCHAR commandLineParams[16];

    if ( argc == 1 )    //first launch
    {
        print_ascii_message();
        pid_1 = GetCurrentProcessId();
        wsprintfW(commandLineParams, L"%d %d", 1, pid_1);

        std::basic_string<WCHAR> s;
        s += argv[0];
        s += L" ";
        s += commandLineParams;

        memset(&StartupInfo, 0, sizeof(StartupInfo));

        //seamonster.exe 1 <PID_1>
        CreateProcessW(argv[0], (LPWSTR) s.c_str(), 0, 0, 0,
                DEBUG_ONLY_THIS_PROCESS, 0, 0, &StartupInfo, &ProcessInformation);
        LFSR1_runloop();    //sub_401F80()  (noreturn)
    }
    if ( argc == 3 )    //second and third processes
    {
        if ( argv[1][0] == '1' )
        {
            wsprintfW(commandLineParams, L"%d %s", 2, argv[2]);

            std::basic_string<WCHAR> s;
            s += argv[0];
            s += L" ";
            s += commandLineParams;

            memset(&StartupInfo, 0, sizeof(StartupInfo));

            //seamonster.exe 2 <PID_1>
            CreateProcessW(argv[0], (LPWSTR)s.c_str(), 0, 0, 0,
                        DEBUG_ONLY_THIS_PROCESS, 0, 0, &StartupInfo, &ProcessInformation);
            LFSR2_runloop();    //sub_401FC0() (noreturn)
        }
        printf(">>> ");
        gets_s(Buf, 32);    //get user input

        swscanf(argv[2], L"%d", &dwProcessId);   //get <PID_1> from command line
        if ( DebugActiveProcess(dwProcessId) == 1 ) //attach 3rd process to 1st
            process3_main();
    }
    return 0;
}

The anti-debug technique used is interesting : 3 processes are spawned and debug each other in a circular fashion (1->2->3->1), hence preventing another ring3 debugger to attach. Furthermore, each process manages a 128 bit linear feedback shift register (LFSR), and the feedback bits are exchanged through debug interrupts : the debugger process injects the bit value in the eax register of the debuggee using SetThreadContext. Therefore, removing the debugging code makes the binary non functional. The keystream generated by the LFSRs is used by the third process to decrypt two functions, which we will call stage1 and stage2. The following C code shows the `` process3_main`` function :

void process3_main()    //0x402080
{
    //init LFSR function pointers
    ptr_f1 = &LFSR3_f1_output;
    ptr_f2 = &LFSR3_f2_feedback;
    ptr_f3 = &LFSR3_f3_update;
    ptr_setState = &LFSR3_setState;
    ptr_xor_string = &LFSR3_xor_string;
    LFSR3_newState = "FluxFingers";

    //wait debug attach to process 1
    WaitForOneDebugEvent();

    //tell parent processes to reset LFSR state
    __asm{
        push eax
        mov eax,0
        int 3
        pop eax
    }
    //wait for the interrupt to loop back to us
    while ( WaitForDebugAndSetEAX() != 1 );

    //init LFSR3 state with LFSR3_newState
    (*ptr_setState)();

    //drop first 50 keystream bytes
    for(i=0; i < 50; i++)
    {
        LFSR3_getByte();
    }

    //decrypt first encrypted function
    for(i=0; i < 384; i++)      //loc_4020F0
    {
        *(&stage1 + i) ^= LFSR3_getByte();
    }
    stage1();
}

Instead of reversing the LFSR algorithm, we went the easy way and patched the binary to dump the keystream by adding some calls to printf. This solution is probably not the most elegant but allowed us to quickly solve this challenge. For instance in the previous function, we simply patched the second loop to display each byte of the key by jumping to an existing call to printf and setting the format string to "%02x". The following "bindiff" shows the idea :

loc_4020F0:
 call    sub_402000     ; LFSR3_getByte ()
 mov     ecx, offset loc_402430  ; stage1
-xor     [ecx+esi], al
-add     ecx, esi
+and     eax, 0FFh
+call    sub_4021D0
 inc     esi
 cmp     esi, 384
 jl      short loc_4020F0
 call    loc_402430     ; stage1()

...

sub_4021D0:
-push    0
+push    eax
+nop
-push    offset aRing3T_t_h_s_0 ; "Ring3 T.T.H.S.M.> Mrrrmmmmmpffff!\nRing3"...
+push    offset a02x     ; "%02x"
 call    ds:printf
 add     esp, 8
 retn

Once we run the patched binary, the keystream is displayed and we can then decrypt the stage1 function and disassemble it to get the following code :

void stage1()
{
    struct MD5_CTX md5_context;
    char stage2key[214];
    char* heapBuffer;
    void (*stage2ptr)();

    MD5_Init(&md5_context); //inlined
    //hash the whole .text section except for the encrypted stage 2 at 0x4025B0
    MD5_Update(0x15B0, &md5_context, 0x401000); //first parameter is data size
    MD5_Update(0x97A, &md5_context, 0x402686);
    MD5_Final(&md5_context);

    //trigger LFSR state reset in parent processes
    __asm{
        push eax
        mov eax,0
        int 3
        pop eax
    }
    //wait for the interrupt to loop back to us
    while ( WaitForDebugAndSetEAX() != 1 );

    //use integrity check output to reinitialize LFSR
    LFSR3_newState = &md5_context->digest;
    (*ptr_setState)();


    //drop first 60 bytes of keystream
    for(i=0; i < 60; i++)
    {
        LFSR3_getByte();
    }

    HANDLE heap = HeapCreate(HEAP_CREATE_ENABLE_EXECUTE, 0, 0);

    srand((int) heap);
    int randomIndex = rand() % 1000;

    for(i=0; i < 214; i++)
    {
        //we'll put our second "printf patch" here to get the key
        stage2key[i] = LFSR3_getByte();
    }

    for(i=0 ; i < 1000; i++)
    {
        heapBuffer = HeapAlloc(heap, 0, 214);
        if ( i == randomIndex )
        {
            //copy encrypted stage 2 to heap memory
            memcpy(heapBuffer, 0x4025B0, 214);
            stage2ptr = heapBuffer;
        }

        for(i=0; i < 107; i+=2)
        {
            heapBuffer[i] ^= stage2key[i];
            heapBuffer[i+1] ^= stage2key[i+1];
        }
    }
    //call decrypted stage2 on heap
    *(stage2ptr)(stage2ptr);
}

This function allocates 1000 buffers and run the decryption code on each, but only one of them (randomly chosen) will be initialized with the encrypted stage2. If we do the same as before and patch the loop which gets the keystream, the decrypted code ends up being incorrect. In fact, the MD5 digest is used to reinitialize the LFSR, so I had to patch the MD5_final function to always return the right value (140e351638717e04e73ff8ec5e2a81db). Once this is done we can decrypt and reverse the last stage :

void stage2(void* stage2heapBuffer)
{
    DWORD stage2_opcodes[3];
    char good_passphrase[32];

    stage2_opcodes[0] = ((DWORD*)stage2heapBuffer)[0];
    stage2_opcodes[1] = ((DWORD*)stage2heapBuffer)[1];
    stage2_opcodes[2] = ((DWORD*)stage2heapBuffer)[2];

    //reset LFSR state with first stage2 bytes
    LFSR3_newState = &stage2_opcodes[0];
    (*ptr_setState)();

    memcpy(good_passphrase, encrypted_passphrase, 32);
    //xor encrypted passphrase with keystream
    (*ptr_xor_string)(good_passphrase, 31);

    //insert printf here :)
    int passOk = !strcmp(Buf, good_passphrase);

    memset(good_passphrase, 0, 32);

    //erase current function and jump to display_win/fail (ROP)
    //memset(stage2heapBuffer, 0, 214);

    __asm{
        push 0
        push 214
        push 0
        push stage2heapBuffer
    }
    if ( passOk )
        __asm{ push offset display_win}
    else
        __asm{ push offset display_fail}

    __asm{
        push offset memset
        retn
    }
}

The LFSR state is reset again, this time using the first bytes of the stage2 function, and then the correct passphrase is decrypted (it is stored in the .data section) and compared with the user's input. One last "printf patch" at the right spot and we finally get the right answer : "Ring3?! Lol! Joanna did Ring-1!".

After the CTF was over I took some time to fully reverse the LFSR algorithm, and the following Python script reproduces the seamonster functionality to decrypt the two stages and the passphrase.

import hashlib, struct

def xor_string(data_string, key_int):
    res = ""
    for i in xrange(len(data_string)):
        res += chr( ord(data_string[i]) ^ key_int[i % len(key_int)])
    return res

def getbit(x, pos):
    return (x >> pos) & 1

def insertHighBit(x, b):
    return (x >> 1) | (b << 127)

class LFSR_process1:
    #0x401A20
    def __init__(self):
        self.state = 0xBBCCDDEEFF0011223344000000000000

    #0x401BC0
    def f1_output(self):
        return  #not used

    #0x401AC0
    def f2_feedback(self):
        return (getbit(self.state,108) | getbit(self.state,44)) \
            ^ (getbit(self.state,123) | getbit(self.state,59)) \
            ^ (getbit(self.state,109) | getbit(self.state,45)) \
            & (getbit(self.state,110) | getbit(self.state,46))

    #0x401CA0
    def f3_update(self, feedback):
        newbit = feedback ^ (getbit(self.state,51) | getbit(self.state,114))
        self.state = insertHighBit(self.state, newbit)

class LFSR_process2:
    #0x401A50
    def __init__(self):
        self.state = 0x000000000000000000000000000e0000

    #0x401BF0
    def f1_output(self):
        return    #not used

    #0x401B10
    def f2_feedback(self):
        return (getbit(self.state,81) | getbit(self.state,17)) \
            ^ (getbit(self.state,126) | getbit(self.state,62)) \
            ^ (getbit(self.state,82) | getbit(self.state,18)) \
            & (getbit(self.state,83) | getbit(self.state,19))

    #0x401D10
    def f3_update(self, feedback):
        newbit = feedback ^ (getbit(self.state,42) | getbit(self.state,105))
        self.state = insertHighBit(self.state, newbit)

class LFSR_process3:
    def __init__(self,p1, p2):
        self.state = 0
        self.p1 = p1
        self.p2 = p2

    #0x401960
    def setState(self, str):
        str = str[:10]  #only use 10 bytes
        self.state = 0
        self.state |= struct.unpack(">L", str[:4])[0] << 96
        self.state |= struct.unpack(">L", str[4:8])[0] << 64
        self.state |= struct.unpack(">H", str[8:10])[0] << 48

    #0x401B90
    def f1_output(self):
        return (getbit(self.state, 99) | getbit(self.state,35)) \
             ^ (getbit(self.state,126) | getbit(self.state,62))

    #0x401A70
    def f2_feedback(self):
        return (getbit(self.state,99) | getbit(self.state,35)) \
            ^ (getbit(self.state,126) | getbit(self.state,62)) \
            ^ (getbit(self.state,100) | getbit(self.state,36)) \
            & (getbit(self.state,101) | getbit(self.state,37))

    #0x401C30
    def f3_update(self, feedback):
        newbit = feedback ^ (getbit(self.state,60) | getbit(self.state,123))
        self.state = insertHighBit(self.state, newbit)

    #0x402000
    def getByte(self):
        x = 0
        for i in xrange(8):
            x |= self.f1_output() << (7-i)
            feedback = self.p2.f2_feedback()
            self.p2.f3_update(self.p1.f2_feedback())
            self.p1.f3_update(self.f2_feedback())
            self.f3_update(feedback)
        return x

    def getBytes(self, n):
        return [self.getByte() for i in xrange(n)]

#sha1=7A46DC5D6C2A52824937B86A94528FFEF33CBDDC
f=open("seamonster.exe","rb")
binary=f.read()
f.close()

#0x402430
encrypted_stage1 = binary[0x1830:0x1830+384]
#0x4025b0
encrypted_stage2 = binary[0x19b0:0x19b0+214]
#0x403144
encrypted_password = binary[0x2544:0x2544+31]

lfsr3 = LFSR_process3(LFSR_process1(), LFSR_process2())
lfsr3.setState("FluxFingers")

#Drop first 50 bytes of keystream
drop = lfsr3.getBytes(50)

stage1key = lfsr3.getBytes(384)
stage1 = xor_string(encrypted_stage1, stage1key)

binary_with_stage1 = binary.replace(encrypted_stage1, stage1)

#stage1 operations

md5sum = hashlib.md5()
md5sum.update(binary_with_stage1[0x400:0x400+0x15b0])
md5sum.update(binary_with_stage1[0x1A86:0x1A86+0x97A])

print "MD5 integrity check :", md5sum.hexdigest()
#reset all states (int3 eax=1)
lfsr3 = LFSR_process3(LFSR_process1(), LFSR_process2())
#use md5 for process3 state
lfsr3.setState(md5sum.digest())

#Drop first 60 bytes of keystream
lfsr3.getBytes(60)

stage2key = lfsr3.getBytes(214)
stage2 = xor_string(encrypted_stage2, stage2key)

#stage2 operations
#reset LFSR3 state with decrypted stage2 instructions
lfsr3.setState(stage2[:10])

passkey = lfsr3.getBytes(len(encrypted_password))

#Ring3?! Lol! Joanna did Ring-1!
print "Decrypted password :", xor_string(encrypted_password, passkey)

That's it for the seamonster, thanks again to FluxFingers for this great CTF.