Log in

View Full Version : Fighting Oreans' VM (code virtualizer flavour)


_g_
August 19th, 2008, 10:00
If you don't know what code virtualizer is, or how it works, you should read this first:
http://rapidshare.com/files/16968098/Inside_Code_Virtualizer.rar
(Inside Code Virtualizer by scherzo)

Now, as you probably already know from paper by scherzo , one possible way recover virtualized code is to identify each mutated handler (find corresponding non-mutated version). After this done, we can trace virtual opcodes and "decompile" them to VM instructions. Having "clean" decompiled output, we can translate it to x86 assembly. I consider the last step, to be simple "find and replace" job with flex/yacc.

The problem is, oreans' vm engine can be a bitch. Consider this piece of code:

Code:
push ebx
mov ebx 0F06h
inc ebx
shr ebx 15h
push ecx
mov ecx 6156h
xor ebx ecx
pop ecx
add ebx 4114h
shl ebx 7
push ecx
mov ecx 51351Ch
xor ebx ecx
pop ecx
add ebx edi
mov eax [ebx]
pop ebx
push 67E0h
mov [esp] edx
mov edx 1
and eax edx
mov edx [esp]
push edx
mov edx esp
add edx 4
add edx 4
xchg edx [esp]
pop esp
or eax eax
push eax
mov eax 3B02h
not eax
push ecx
mov ecx 0FFFFC3FFh
sub eax ecx
pop ecx
and [edi+1Ch] eax
mov eax [esp]
add esp 4
push 3328h
mov [esp] ebx
mov ebx [esp]
push ecx
mov ecx esp
add ecx 4
add ecx 4
xchg ecx [esp]
pop esp

Can you tell what it does? Me neither, so let's try to deobfuscate this crap.

It turns out that simple strategies like contant folding (http://en.wikipedia.org/wiki/Constant_folding), dead code elimination (http://en.wikipedia.org/wiki/Dead_code), peephole optimisation (find and replace :P) plus some stack cleaning , suffice to recover obfuscated code:

Code:

NEW = 47, OLD = 0, -47 lines removed
################################ NEXT ROUND ###########################
################################ original
push ebx
mov ebx 00000f06
inc ebx
shr ebx 00000015
push ecx
mov ecx 00006156
xor ebx ecx
pop ecx
add ebx 00004114
shl ebx 00000007
push ecx
mov ecx 0051351c
xor ebx ecx
pop ecx
add ebx edi
mov eax [ebx ]
pop ebx
push 000067e0
mov [esp ] edx
mov edx 00000001
and eax edx
mov edx [esp ]
push edx
mov edx esp
add edx 00000004
add edx 00000004
xchg edx [esp ]
pop esp
or eax eax
push eax
mov eax 00003b02
not eax
push ecx
mov ecx ffffc3ff
sub eax ecx
pop ecx
and [edi 0000001c ] eax
mov eax [esp ]
add esp 00000004
push 00003328
mov [esp ] ebx
mov ebx [esp ]
push ecx
mov ecx esp
add ecx 00000004
add ecx 00000004
xchg ecx [esp ]
pop esp
################################ after constant propagation and folding
push ebx
mov ebx 00000000
push ecx
mov ecx 00006156
xor ebx 00006156
pop ecx
add ebx 00004114
shl ebx 00000007
push ecx
mov ecx 0051351c
xor ebx 0051351c
pop ecx
add ebx edi
mov eax [ebx ]
pop ebx
push 000067e0
mov [esp ] edx
mov edx 00000001
and eax 00000001
mov edx [esp ]
push edx
mov edx esp
add edx 00000004
add edx 00000004
xchg edx [esp ]
pop esp
or eax eax
push eax
mov eax ffffc4fd
push ecx
mov ecx ffffc3ff
sub eax ffffc3ff
pop ecx
and [edi 0000001c ] eax
mov eax [esp ]
add esp 00000004
push 00003328
mov [esp ] ebx
mov ebx [esp ]
push ecx
mov ecx esp
add ecx 00000004
add ecx 00000004
xchg ecx [esp ]
pop esp
################################ after dead code elimination
push ebx
mov ebx 00000000
push ecx
xor ebx 00006156
pop ecx
add ebx 00004114
shl ebx 00000007
push ecx
xor ebx 0051351c
pop ecx
add ebx edi
mov eax [ebx ]
pop ebx
push 000067e0
mov [esp ] edx
and eax 00000001
mov edx [esp ]
push edx
mov edx esp
add edx 00000004
add edx 00000004
xchg edx [esp ]
pop esp
or eax eax
push eax
mov eax ffffc4fd
push ecx
sub eax ffffc3ff
pop ecx
and [edi 0000001c ] eax
mov eax [esp ]
add esp 00000004
push 00003328
mov [esp ] ebx
mov ebx [esp ]
push ecx
mov ecx esp
add ecx 00000004
add ecx 00000004
xchg ecx [esp ]
pop esp
################################ after peephole optimisation
push ebx
mov ebx 00000000
push ecx
xor ebx 00006156
pop ecx
add ebx 00004114
shl ebx 00000007
push ecx
xor ebx 0051351c
pop ecx
add ebx edi
mov eax [ebx ]
pop ebx
push edx
and eax 00000001
pop edx
or eax eax
push eax
mov eax ffffc4fd
push ecx
sub eax ffffc3ff
pop ecx
and [edi 0000001c ] eax
pop eax
push ebx
pop ebx
################################ after stack cleaning
mov ebx 00000000
xor ebx 00006156
add ebx 00004114
shl ebx 00000007
xor ebx 0051351c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
mov eax ffffc4fd
sub eax ffffc3ff
and [edi 0000001c ] eax
NEW = 11, OLD = 47, 36 lines removed
################################ NEXT ROUND ###########################
################################ original
mov ebx 00000000
xor ebx 00006156
add ebx 00004114
shl ebx 00000007
xor ebx 0051351c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
mov eax ffffc4fd
sub eax ffffc3ff
and [edi 0000001c ] eax
################################ after constant propagation and folding
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
mov eax 000000fe
and [edi 0000001c ] 000000fe
################################ after dead code elimination
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
and [edi 0000001c ] 000000fe
################################ after peephole optimisation
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
and [edi 0000001c ] 000000fe
################################ after stack cleaning
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
and [edi 0000001c ] 000000fe
NEW = 5, OLD = 11, 6 lines removed
################################ NEXT ROUND ###########################
################################ original
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
and [edi 0000001c ] 000000fe
################################ after constant propagation and folding
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
and [edi 0000001c ] 000000fe
################################ after dead code elimination
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
and [edi 0000001c ] 000000fe
################################ after peephole optimisation
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
and [edi 0000001c ] 000000fe
################################ after stack cleaning
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
and [edi 0000001c ] 000000fe

Well almost . Above trash is the verbose output of my little "cleaner" tool. Cleaner is usable, it'll give nice results for most of included code samples. In handlers.clean folder (see link at bottom) there are nonmutated versions of CV handlers. After deobfuscation, few heuristics can be applied to match deobfuscated and clean versions: edit distance / rare instruction matching (for example rol, ror, rcr are rare and show up only in one handler).

The problem is, I got bored with all of this, so if anyone would like to help, I will be more than happy

Here is the code:
http://www.orange-bat.com/oreans.rar

compile with make, will work without problems under cygwin. it should work under linux. to use rip_handlers.py you will need idapython.

There are some bugs in my code, beware .

Arcane
August 19th, 2008, 10:44
cool stuff mate , ive been conceptualizing a similar concept..but havent gotten anything done yet gj! and props

Sab
August 19th, 2008, 21:13
thx. very welcome to see new code here, especially on fun things like this.

_g_
August 20th, 2008, 11:28
huh man, I don't know how to comment on this.

check out the wikipedia articles, they were online months before rolf published his work on vmprotect (which is very good ). do you see the analogy?

if you are accusing me of plagiarism, then yes, i am guilty -- as guilty as anyone who studies computer science.

btw. i also use simple techniques like addition and multiplication in my code. i guess it's not ok with you also?

ps. check timestamps on some files in the archive. you can belive it or not, but i was working on this for quite some time.

Nico
August 20th, 2008, 11:35
ok, let me reword it.

I was talking about the use of those techniques to attack VM/Code Virtualizer and i was saying that it would have been cool to give references at the end of your blog, to Rolf Rolles, as he was the first (to my knowledge), applying those techniques on this topic.
In computer science, people usually give references to previous work/publications.

I don't want to start a flaming fest, what you did is cool, but i think the lack of references, especially when they are very recent, weren't cool toward Rolf.

DeepBlueSea
August 20th, 2008, 12:12
Let's say i made a 3D graphics engine, and i used this technique [1] for instance. Now, do i have to reference every engine that uses this technique?

No Offence, but i knew Rolfs article and it was cool stuff.
And after reading this article here, at no point i thought that Rolfs work has to especially referenced here.

It is quite obvious that during analysis of a VM that complicates and obfuscates instructions, that you will somehow have to find a way to counter and optimize the code for analysis purposes.
I am pretty sure that, if Rolfs article didnt exist, he would come to the same conclusion as to optimizing and simplyfing the code. And he referenced Wiki-Articles to common and known techniques for this.

Rolf didn't reference anything. Nonetheless nobody would come to the idea that he "invented" optimization techniques.

And this is no science-paper, where more references are made than usually neccessary (imo). This is just a interesting blog-entry, where someone shares knowledge. So he is actually GIVING us something.

[1] http://en.wikipedia.org/wiki/Portal_rendering

_g_
August 20th, 2008, 12:54
@Nico:
yes, Rolf was the first to use these techniques in order to recover virtualized code. his work is very good, that's true. but keep in mind these techniques are common knowledge to any CS student on a decent university. there is no magic. in addition, these techniques were *designed* to simplify code. this is not a new application.
i was not "inspired" by anybody in this matter (as you write in your first post, and then you "reword" it into something completly different). again, check timestamps.

for your information, i stared coding all of this long time ago, when i was analysing game guard's driver. it's protected with oreans' code virtualizer. i wanted to post clean driver, with recovered code, but lost motivation and that's why i posted what i've written so far.

if anyone wants to collaborate on this, i can share more details on x86->vm code translation.

for readers interested in Rolf's work on the same subject (recovering virtualized code):

http://www.openrce.org/blog/view/1238/VMProtect,_Part_0:__Basics
http://www.openrce.org/blog/view/1239/Part_1:__Bytecode_and_IR
http://www.openrce.org/blog/view/1240/Part_2:__Introduction_to_Optimization
http://www.openrce.org/blog/view/1241/Part_3:__Optimizing_and_Compiling

Nico
August 20th, 2008, 13:44
I deleted my first post, i did not want to start a discussion like that.
Appologize to _g_ , english isn't my first language, i might sound a bit bad at times.

Rolf is a very good (real life) friend of mine, and i am just being protective with my lil brother ;-)

Everyone else, cheers, life goes on, and DeepBlue, choose better and related examples next time

forgot
August 20th, 2008, 21:53
applying some compiler techniques to deobuscate/decompile is not a new idea(dcc, hexray...decompyle).
virtualization is somehow compilation, even decompilation is compilation, IMO.
but Rolf and g public/shared the actual works , honor belongs to both of them.
sometime it's bullshit to discuss the COPYRIGHT of a discovery, like Newton and Leibnitz

rendari
September 18th, 2008, 12:18
Hmm, an interesting idea, but what I find more efficient is to take into account that each instruction in the Themida VM, once deobfuscated, is extremely simple. Therefore, if you deobfuscate the VM once you can look at the instructions & write a piece of code that will feed certain input into an unknown, and then scan its output. From the output it can try and deduce what instruction we are talking about. For example, the Themida add handler looks like this:

POP EAX
ADD DWORD PTR[ESP], EAX
PUSHFD

feed into it something like
[esp] = 111
[esp+4]=222

if output =
[esp] = (flags register)
[esp+4] = 333
EAX = 111

Then you know you have the add instruction. Your code marks this handler as an ADD handler and goes on to the next one.

Of course, this technique wouldn't work for every Themida instruction, since some of them are simply too weird Furthermore you will have to manually deobfuscate the LODs routines executed at the end of each instruction which calculates which instruction you should jump too next. But that's simple stuff (20-30 min.) if you're used to deobfuscating Themida And don't forget about the LODs BYTE, WORD, and DWORD routines when Themida reads and decodes data at [esi].

Another thing you can do is simply scan for the handlers that are unique. For example the POP DWORD PTR[EDX] only appears in one handler,

SUB [ESP], EAX
PUSHFD

only appears in one handler... so you can also automatedly ID handlers that way too. At the end of the day there are lot's of possibilities. I imagine with enough time and willpower some cracker out there might even code an automated or 80% automated generic rebuilder for the Themida VM Or maybe I'm just too enthusiastic about the idea and am talking out of my ass

_g_
September 18th, 2008, 15:53
you don't have to deobfuscate handlers to get clean implementations, you can just rip clean handlers from some test app you protected using weakest Themida options (lowest vm complexity etc). but even having these clean handlers, you can't just use "context signatures" for each of them.

the problem is, if vm instruction takes parameters using lods(b|w|d) (about 20% of them do), you would have to encrypt these parameters first, and then feed them to the handler to watch context modifications. for lodsb/lodsw you can bruteforce, but not for lodsd (10% of all handlers). ofc you can identify them by hand, but i assume that spoils the whole idea. so one way or another, you have to code deobfuscator for 100% automation.

you mixed control flow recovery with handler identification. simply following the VM handlers execution would explore only one code path, thus revealing only part of real code. recovering all paths requires backtracking at branches plus keeping track of changing decryption key (ebx) for VM opcodes. i don't know if that takes 20-30 min.

context signatures aren't better or worse than code deobfuscation, these two methods are uncomparable: there exist situations when first is better than second and vice versa.

rendari
November 1st, 2008, 17:17
Heya,

just a quick update to the thread. I found that you can automate recovery of most handlers for mathematical/logical operations (the "Normal handlers" as scherzo classifies them) simply by looking at the last bytes of the handler and deducing from there. For example, the CMP handler ends with:

Code:

007B6BC8 3BC8 CMP ECX,EAX
007B6BCA 9C PUSHFD


And if we look at the unprotected VM we know that the CMP handler unobfuscated is:

Code:

007B6BC6 58 POP EAX
007B6BC7 59 POP ECX
007B6BC8 3BC8 CMP ECX,EAX
007B6BCA 9C PUSHFD


So replace all the junk obfuscation in the handler that comes before CMP ECX, EAX with
Code:

007B6BC6 58 POP EAX
007B6BC7 59 POP ECX


...and boom, deobfuscated handler. So I sat down for 5 hours and coded a little utility that first cleans up the JMPs/multibranch jumps inserted by the Themida VM as obfuscation, and then does pattern searching for known ends of handlers, and as soon as it finds the pattern it is looking for it cleans up the gunk at the beginning of the handler and replaces it with the deobfuscated code that I ripped from an unprotected ver of Themida VM. The method took a lot of tweaking and a lot of time, but I've tested it on a target now and it appears to run. I replaced the pointers to the obfuscated handlers with pointers to my own unobfuscated handlers, and the unpackme ran, so I assume I am doing the right thing here Now I am looking into how to deobfuscate the LODS handlers. As you say, there is a problem due to the parameters being encrypted/decrypted. However, I'm planning to go around this by ripping the decryption routine for each handler and transcribing them in another place in memory. Then I do a call to the decryption after the LODS, and then let it continue with the rest of the handler code, which is unencrypted. Here, lemme show you an example.

Obfuscated:
Code:

00772D4B Main LODS BYTE PTR DS:[ESI] EAX=00000065, ESI=008F0F82
00772D4C Main PUSH EBX
00772D4D Main MOV BL,0B6 EBX=073369B6
00772D4F Main PUSH EDX
00772D50 Main MOV DL,63 EDX=00772A63
00772D52 Main AND DL,62 EDX=00772A62
00772D55 Main SUB DL,1A EDX=00772A48
00772D58 Main SUB AL,DL EAX=0000001D
00772D5A Main POP EDX EDX=00772A3C
00772D5B Main ADD AL,0B5 EAX=000000D2
00772D5D Main SUB AL,BL EAX=0000001C
00772D5F Main PUSH DX
00772D61 Main MOV DL,0B5 EDX=00772AB5
00772D63 Main JMP new_dump.0078130D
0078130D Main SUB AL,DL EAX=00000067
0078130F Main POP DX EDX=00772A3C
00781311 Main ADD AL,48 EAX=000000AF
00781313 Main POP EBX EBX=073369F1
00781314 Main SUB AL,36 EAX=00000079
00781316 Main SUB AL,BL EAX=00000088
00781318 Main ADD AL,36 EAX=000000BE
0078131A Main PUSH ECX
0078131B Main SUB ESP,4
0078131E Main MOV DWORD PTR SS:[ESP],EAX
00781321 Main MOV AH,98 EAX=000098BE
00781323 Main MOV CH,AH ECX=00009801
00781325 Main JMP new_dump.0078720E
0078720E Main MOV EAX,DWORD PTR SS:[ESP] EAX=000000BE
00787211 Main PUSH EDI
00787212 Main MOV EDI,ESP EDI=0012FF4C
00787214 Main ADD EDI,4 EDI=0012FF50
0078721A Main ADD EDI,4 EDI=0012FF54
0078721D Main XCHG DWORD PTR SS:[ESP],EDI EDI=00772A20
00787220 Main POP ESP
00787221 Main PUSH BX
00787223 Main MOV BH,66 EBX=073366F1
00787225 Main ADD CH,BH ECX=0000FE01
00787227 Main JMP new_dump.0078071F
0078071F Main POP BX EBX=073369F1
00780721 Main XCHG CH,AL EAX=000000FE, ECX=0000BE01
00780723 Main JMP new_dump.00778B88
00778B88 Main NOT AL EAX=00000001
00778B8A Main XOR CH,AL ECX=0000BF01
00778B8C Main XOR AL,CH EAX=000000BE
00778B8E Main XOR CH,AL ECX=00000101
00778B90 Main JMP new_dump.00786B03
00786B03 Main XOR CH,0BF ECX=0000BE01
00786B06 Main PUSH DX
00786B08 Main MOV DL,0C1 EDX=00772AC1
00786B0A Main AND CH,DL ECX=00008001
00786B0C Main POP DX EDX=00772A3C
00786B0E Main AND CH,58 ECX=00000001
00786B11 Main PUSH EDX
00786B12 Main MOV DL,0BA EDX=00772ABA
00786B14 Main SHL DL,4 EDX=00772AA0
00786B17 Main NOT DL EDX=00772A5F
00786B19 Main JMP new_dump.0077D858
0077D858 Main INC DL EDX=00772A60
0077D85A Main SHR DL,1 EDX=00772A30
0077D85C Main ADD DL,86 EDX=00772AB6
0077D85F Main ADD CH,DL ECX=0000B601
0077D861 Main POP EDX EDX=00772A3C
0077D862 Main JMP new_dump.00785C97
00785C97 Main PUSH BX
00785C99 Main MOV BH,0C5 EBX=0733C5F1
00785C9B Main SUB AL,BH EAX=000000F9
00785C9D Main POP BX EBX=073369F1
00785C9F Main ADD AL,CH EAX=000000AF
00785CA1 Main JMP new_dump.0077E44F
0077E44F Main ADD AL,0C5 EAX=00000074
0077E451 Main POP ECX ECX=00000001
0077E452 Main PUSH ECX
0077E453 Main MOV ECX,ESP ECX=0012FF54
0077E455 Main JMP new_dump.00777569
00777569 Main ADD ECX,4 ECX=0012FF58
0077756F Main SUB ECX,4 ECX=0012FF54
00777572 Main XCHG DWORD PTR SS:[ESP],ECX ECX=00000001
00777575 Main MOV ESP,DWORD PTR SS:[ESP]
00777578 Main MOV DWORD PTR SS:[ESP],ECX
0077757B Main MOV CH,0C ECX=00000C01
0077757D Main NOT CH ECX=0000F301
0077757F Main JMP new_dump.0078807A
0078807A Main DEC CH ECX=0000F201
0078807C Main OR CH,1A ECX=0000FA01
0078807F Main JMP new_dump.0078408E
0078408E Main JLE new_dump.0077BBFD
0077BBFD Main INC CH ECX=0000FB01
0077BBFF Main JNS new_dump.00777AA4
0077BC05 Main XOR CH,52 ECX=0000A901
0077BC08 Main SHL CH,7 ECX=00008001
0077BC0B Main SUB ESP,4
0077BC0E Main MOV DWORD PTR SS:[ESP],EDX
0077BC11 Main MOV DL,0A0 EDX=00772AA0
0077BC13 Main JMP new_dump.00781FDC
00781FDC Main SUB CH,DL ECX=0000E001
00781FDE Main JMP new_dump.0077F4A4
0077F4A4 Main POP EDX EDX=00772A3C
0077F4A5 Main ADD AL,0F3 EAX=00000067
0077F4A7 Main PUSH EDX
0077F4A8 Main JMP new_dump.0077656E
0077656E Main MOV DL,0E5 EDX=00772AE5
00776570 Main JMP new_dump.0077B7B4
0077B7B4 Main DEC DL EDX=00772AE4
0077B7B6 Main NEG DL EDX=00772A1C
0077B7B8 Main JMP new_dump.00785CB4
00785CB4 Main SUB DL,0A9 EDX=00772A73
00785CB7 Main ADD DL,0C8 EDX=00772A3B
00785CBA Main JMP new_dump.00787B12
00787B12 Main SUB AL,96 EAX=000000D1
00787B14 Main SUB AL,DL EAX=00000096
00787B16 Main JMP new_dump.00784939
00784939 Main ADD AL,96 EAX=0000002C
0078493B Main POP EDX EDX=00772A3C
0078493C Main SUB AL,CH EAX=0000004C
0078493E Main PUSH ECX
0078493F Main JMP new_dump.00780288
00780288 Main MOV CH,94 ECX=00009401
0078028A Main JMP new_dump.0077FDA7
0077FDA7 Main PUSH EAX
0077FDA8 Main MOV AH,0E1 EAX=0000E14C
0077FDAA Main AND AH,46 EAX=0000404C
0077FDAD Main SHL AH,6 EAX=0000004C
0077FDB0 Main JMP new_dump.0078474C
0078474C Main XOR AH,59 EAX=0000594C
0078474F Main SUB CH,AH ECX=00003B01
00784751 Main POP EAX EAX=0000004C
00784752 Main ADD AL,CH EAX=00000087
00784754 Main JMP new_dump.0077EB57
0077EB57 Main POP ECX ECX=0000E001
0077EB58 Main PUSH ECX
0077EB59 Main PUSH EBX
0077EB5A Main MOV BL,0F3 EBX=073369F3
0077EB5C Main MOV CL,BL ECX=0000E0F3
0077EB5E Main PUSH DWORD PTR SS:[ESP]
0077EB61 Main POP EBX EBX=073369F1
0077EB62 Main ADD ESP,4
0077EB68 Main JMP new_dump.0077C968
0077C968 Main ADD AL,0BB EAX=00000042
0077C96A Main SUB AL,CL EAX=0000004F
0077C96C Main PUSH CX
0077C96E Main MOV CH,0BB ECX=0000BBF3
0077C970 Main SUB AL,CH EAX=00000094
0077C972 Main POP CX ECX=0000E0F3
0077C974 Main MOV ECX,DWORD PTR SS:[ESP] ECX=0000E001
0077C977 Main ADD ESP,4
0077C97D Main POP ECX ECX=00000001
0077C97E Main PUSH ECX
0077C97F Main PUSH EBX
0077C980 Main MOV BL,54 EBX=07336954
0077C982 Main PUSH EAX
0077C983 Main PUSH EBX
0077C984 Main MOV BL,1D EBX=0733691D
0077C986 Main NOT BL EBX=073369E2
0077C988 Main SUB BL,74 EBX=0733696E
0077C98B Main JL new_dump.00780FA8
00780FA8 Main PUSH ECX
00780FA9 Main MOV CL,1C ECX=0000001C
00780FAB Main SHL CL,7 ECX=00000000
00780FAE Main JMP new_dump.00781A12
00781A12 Main ADD CL,0E9 ECX=000000E9
00781A15 Main SUB CL,2B ECX=000000BE
00781A18 Main SUB CL,7 ECX=000000B7
00781A1B Main SUB BL,CL EBX=073369B7
00781A1D Main POP ECX ECX=00000001
00781A1E Main MOV AH,BL EAX=0000B794
00781A20 Main MOV EBX,DWORD PTR SS:[ESP] EBX=07336954
00781A23 Main ADD ESP,4
00781A26 Main XOR AH,7D EAX=0000CA94
00781A29 Main PUSH EDX
00781A2A Main JMP new_dump.00773CAE
00773CAE Main MOV DH,AH EDX=0077CA3C
00773CB0 Main MOV CL,DH ECX=000000CA
00773CB2 Main POP EDX EDX=00772A3C
00773CB3 Main MOV EAX,DWORD PTR SS:[ESP] EAX=00000094
00773CB6 Main ADD ESP,4
00773CB9 Main JMP new_dump.0077A8CD
0077A8CD Main XOR CL,BL ECX=0000009E
0077A8CF Main JMP new_dump.007784D6
007784D6 Main MOV EBX,DWORD PTR SS:[ESP] EBX=073369F1
007784D9 Main ADD ESP,4
007784DC Main JMP new_dump.0078486C
0078486C Main PUSH EDX
0078486D Main MOV DH,80 EDX=0077803C
0078486F Main INC DH EDX=0077813C
00784871 Main JL new_dump.0077EFB4
0077EFB4 Main PUSH BX
0077EFB6 Main MOV BL,0B6 EBX=073369B6
0077EFB8 Main OR DH,BL EDX=0077B73C
0077EFBA Main POP BX EBX=073369F1
0077EFBC Main SHL DH,3 EDX=0077B83C
0077EFBF Main JNZ new_dump.007845B4
007845B4 Main INC DH EDX=0077B93C
007845B6 Main PUSH EBX
007845B7 Main JMP new_dump.0077DAC4
0077DAC4 Main MOV BL,2E EBX=0733692E
0077DAC6 Main XOR DH,BL EDX=0077973C
0077DAC8 Main JMP new_dump.007852D1
007852D1 Main POP EBX EBX=073369F1
007852D2 Main ADD AL,DH EAX=0000002B
007852D4 Main POP EDX EDX=00772A3C
007852D5 Main ADD AL,0F3 EAX=0000001E
007852D7 Main JMP new_dump.007735D8
007735D8 Main ADD AL,CL EAX=000000BC
007735DA Main SUB AL,0F3 EAX=000000C9
007735DC Main PUSH EBX
007735DD Main PUSH ECX
007735DE Main MOV CL,0BA ECX=000000BA
007735E0 Main JMP new_dump.007852F2
007852F2 Main INC CL ECX=000000BB
007852F4 Main NEG CL ECX=00000045
007852F6 Main JB new_dump.0077DFC7
0077DFC7 Main SUB CL,1 ECX=00000044
0077DFCA Main SUB CL,0AD ECX=00000097
0077DFCD Main MOV BL,CL EBX=07336997
0077DFCF Main POP ECX ECX=0000009E
0077DFD0 Main SUB AL,BL EAX=00000032
0077DFD2 Main POP EBX EBX=073369F1
0077DFD3 Main POP ECX ECX=00000001
0077DFD4 Main PUSH EDX
0077DFD5 Main PUSH ECX
0077DFD6 Main JMP new_dump.00774745
00774745 Main PUSH EBX
00774746 Main PUSH EDX
00774747 Main JMP new_dump.0077DA77
0077DA77 Main PUSH EAX
0077DA78 Main MOV AL,10 EAX=00000010
0077DA7A Main MOV DL,AL EDX=00772A10
0077DA7C Main POP EAX EAX=00000032
0077DA7D Main MOV BL,DL EBX=07336910
0077DA7F Main POP EDX EDX=00772A3C
0077DA80 Main PUSH EBX
0077DA81 Main MOV BL,0DB EBX=073369DB
0077DA83 Main MOV CL,62 ECX=00000062
0077DA85 Main XOR CL,BL ECX=000000B9
0077DA87 Main POP EBX EBX=07336910
0077DA88 Main XOR CL,BL ECX=000000A9
0077DA8A Main MOV EBX,DWORD PTR SS:[ESP] EBX=073369F1
0077DA8D Main JMP new_dump.0077D78C
0077D78C Main ADD ESP,4
0077D78F Main JMP new_dump.0077E286
0077E286 Main PUSH EAX
0077E287 Main MOV AH,CL EAX=0000A932
0077E289 Main PUSH ECX
0077E28A Main JMP new_dump.00779F80
00779F80 Main MOV CL,AH
00779F82 Main MOV DH,CL EDX=0077A93C
00779F84 Main JMP new_dump.00777BA3
00777BA3 Main POP ECX
00777BA4 Main POP EAX EAX=00000032
00777BA5 Main POP ECX ECX=00000001
00777BA6 Main ADD BL,34 EBX=07336925
00777BA9 Main PUSH ECX
00777BAA Main MOV CH,0DE ECX=0000DE01
00777BAC Main JMP new_dump.00774773
00774773 Main ADD CH,30 ECX=00000E01
00774776 Main NEG CH ECX=0000F201
00774778 Main JMP new_dump.0077E3EE
0077E3EE Main PUSH BX
0077E3F0 Main MOV BL,0C EBX=0733690C
0077E3F2 Main JMP new_dump.0077DAEC
0077DAEC Main XOR CH,BL ECX=0000FE01
0077DAEE Main POP BX EBX=07336925
0077DAF0 Main SUB BL,3F EBX=073369E6
0077DAF3 Main ADD BL,0BB EBX=073369A1
0077DAF6 Main ADD BL,CH EBX=0733699F
0077DAF8 Main SUB BL,0BB EBX=073369E4
0077DAFB Main ADD BL,3F EBX=07336923
0077DAFE Main JMP new_dump.00785CD1
00785CD1 Main POP ECX ECX=00000001
00785CD2 Main SUB BL,DH EBX=0733697A
00785CD4 Main JMP new_dump.007859F3
007859F3 Main PUSH EAX
007859F4 Main MOV AL,0BB EAX=000000BB
007859F6 Main ADD AL,43 EAX=000000FE
007859F8 Main SUB BL,AL EBX=0733697C
007859FA Main POP EAX EAX=00000032
007859FB Main JMP new_dump.0078506F
0078506F Main SUB BL,34 EBX=07336948
00785072 Main PUSH DWORD PTR SS:[ESP]
00785075 Main JMP new_dump.00778F6F
00778F6F Main POP EDX EDX=00772A3C
00778F70 Main ADD ESP,4
00778F73 Main SUB BL,AL EBX=07336916
00778F75 Main PUSH ECX
00778F76 Main JMP new_dump.00786A63
00786A63 Main MOV CL,1
00786A65 Main SUB CL,68 ECX=00000099
00786A68 Main AND CL,5A ECX=00000018
00786A6B Main XOR CL,0BA ECX=000000A2
00786A6E Main INC CL ECX=000000A3
00786A70 Main SUB CL,0FA ECX=000000A9
00786A73 Main JMP new_dump.0077ACB7
0077ACB7 Main ADD BL,CL EBX=073369BF
0077ACB9 Main POP ECX ECX=00000001
0077ACBA Main MOVZX EAX,AL
0077ACBD Main JMP DWORD PTR DS:[EDI+EAX*4]


So then, rip out 00772D4C - 0077ACB9 and put it into another location in memory, say 01A00000. Append a C3 ( ret ) to the end of the ripped routine at 01A00000, and then replace 00772D4C - 0077ACB9 with NOPS. Finally, under the LODS assemble a CALL 01A00000. That way, the routine should look like this after processing:

Code:

00772D4B Main LODS BYTE PTR DS:[ESI] EAX=00000065, ESI=008F0F82
0077DFD0 Main CALL 01A00000
<nops>
0077ACBA Main MOVZX EAX,AL
0077ACBD Main JMP DWORD PTR DS:[EDI+EAX*4]


And that looks a lot cleaner, no?

This is what I plan on doing. Haven't done the actual code yet, no time since I just moved into college dorms and am now busy exploring the differences in between male and female anatomy But please comment, I am still looking for better way to do handle the LODS deobfuscation...

-rendari

_g_
November 4th, 2008, 06:03
can you post your code?

sorry, but i don't see how moving code from one place to another helps? can you explain what do we get by doing this?

rendari
November 4th, 2008, 12:46
Ok, I can see that I have confused you. Sorry. And yet, I still see where you are coming from. I mean, I'm not doing any deobfuscation at all, I'm just copy pasting! Nothing glorious there. But my objective is to first make the LODS handlers simpler to read and easier to process for me and my program. This might not be deobfuscation, but its enough for me. After I have processed all the LODS handlers in this fashion, so that they are 6-7 lines except for the conditional JMP handlers, I will proceed to install hooks in the VM'd routine and let it run. The hooks that I installed will "compile" the handlers into normal code, so that instead of having a VM Loop, the handlers will execute one under another like normal code. You will then get a routine that is functionally equivalent to the VM Loop, except it is executed normally instead of a VM loop. It will look like this:

Code:

02804B86 E8 04FCFFFF CALL handlers.0280478F ; call processor routine, which decides which handler is next, and mutates ebs
02804B8B 90 NOP
02804B8C AC LODS BYTE PTR DS:[ESI]
02804B8D E8 6EB42BFF CALL handlers.01AC0000
02804B92 0FB6C0 MOVZX EAX,AL
02804B95 8D0487 LEA EAX,DWORD PTR DS:[EDI+EAX*4]
02804B98 68 2F320000 PUSH 322F
02804B9D 890424 MOV DWORD PTR SS:[ESP],EAX
02804BA0 90 NOP
02804BA1 E8 E9FBFFFF CALL handlers.0280478F
02804BA6 90 NOP
02804BA7 5A POP EDX
02804BA8 90 NOP
02804BA9 E8 E1FBFFFF CALL handlers.0280478F
02804BAE 90 NOP
02804BAF 8F02 POP DWORD PTR DS:[EDX]
02804BB1 90 NOP
02804BB2 E8 D8FBFFFF CALL handlers.0280478F
02804BB7 90 NOP


Here is some "compiled" code. It's functionally equivalent to executing it inside the VM loop, except it's executed normally, just like I said. Now I can take this code and further process it, i.e decrypt all the parameters to the LODS routines through static analysis. After that is done, we can start analyzing the code with pattern matching and start restoring it back to its x86 equivalent I suppose.

What I said is kind of confusing. I'm afraid I don't know how to clearly express myself since I have no comp sci education and tend to make up or redefine terms as a I go along Still I hope the general premise is clear.

_g_
November 4th, 2008, 16:14
ok, i think i understand your method.

i see 2 problems:
1. branching support
2. "decrypt all the parameters to the LODS routines through static analysis"

1)
with multiple (and nested) if/else/calls tracing can get nasty.

2)
how do you see this static analysis?

rendari
November 4th, 2008, 16:25
Quote:

1. branching support


Multibranch technology I have already taken care of using Ollyscript. Like I said, I have already deobfuscated all the logical/mathematical handlers, and cleared multibranch obfuscation in all handlers. I then replaced the pointers to the old handlers with pointers to my new deobfuscated ones, and the target ran fine. Therefore, I think it safe to assume that I am on the right track.

Quote:

1)
with multiple if/else/calls tracing can get nasty.


Indeed, you are correct. Still, I think I can modify my code when the time comes to take this into account. I already have a couple of ideas kicking around in my head. But for now I am focusing on reconstructing simpler chunks of the VM which have only a couple of conditional JMPS. I can go to reconstructing loops/if statements later

Quote:

2)
how do you see this static analysis?


After I have "compiled" the code, it should be a trivial matter to code a tracer of sorts that doesn't actually execute the code, but only simulates changes to EAX/EBX/ESI. I will then tell it to "trace" up to each LODS, let it read from [ESI], let it decrypt EAX using the decrypt routine ripped to another chunk in memory and called by a call after each LODS, and overwriting the LODS/CALL with a MOV EAX, decryptedParameter. After the "tracer" is done, the code will no longer depend upon EBX/ESI, since all the parameters are decrypted. Of course, there might be some problems in this part... but I'll deal with them when I come to them. For now I'm just focusing on cleaning up the LODS handlers and doing the actual "compilation". The point is this "tracer" doing the decryption won't be an actual tracer in the classical sense as it won't be executing the code, just simulating it's execution. So I think it falls into static analysis, correct? Dunno.

scherzo
January 13th, 2009, 06:42
Hi all!!!

Very, really very nice discussion here!!! Congratulations to both of you

Wow... there is a long a time I don't show up in any rce board... well... there's a lot of things that changed in my life... even the country where i live hehe... well these things impeded me to do any RE and also I lost some interest...

Well... this is a RCE blog so let's go! 1 year ago I was still enthusiasted with oreans stuff and I decide to understand Themida VM. And I was so impressed how simple it was... 1 month working in code virtualizer only to understand it and only 1 week to themida (and almost all its protections not only vm...) and I thought about writing something like "Inside Themida"... lucky me or not I chose to write a full themida unpacker...

Well, my target was Themida 1.9.9.0... and let me just remember you that there isn't any unpacked version of it on the net... So, in the project I did the conception of the entire thing and what exactly I did was giving a quite interesting name to the project hehehe, gather of information from PE header, a exact oep finder (really interesting this part...), ring-0 driver to bypass oreans detection and kinda finished the debugger engine. Also I worked in the vm decoder and the most interesting part is that I wrote code to detect all the themida handlers and it worked perfectly with a lot of examples.... sorry guys, didn't tell you before because well, I'm really far away from my home now... so I stopped the project and forgot it... until now!!!

So I think i'll learn a lot with these things you are doing.. give me just some time to "relearn" (don't know if it exists) the things about themida and to learn all the things you've done. Actually, if I remeber well, there is a weak point in that thing of transforming a simple pop eax in 120 lines of code. If you want I can share the old code with you... but i really don't understand well it now... just need some time hehe...

OO and you have no idea how easy is to analyse themida using themida itself as the target If you need some info I'll be glad to give you.

Well.... just wrote so much useless thing.... but I'm definetely BACK

See you,
Au revoir, à bientôt

scherzo

edit: damn... forgot to say.. happy new year everyone!!!

rendari
January 14th, 2009, 14:11
Nice to see you scherzo. Glad to see you did a lot of work on Themida, personally I found making an ollyscript to automate the Themida unpacking much easier than a full blown unpacker. And writing a ring 0 driver to bypass antidebug? lol, overkill. The hard part of course is automating the restoration of the VM. But, I have something in the works for that too. Progress is slow, but I'm at the point where it's guessing almost all the handlers automatically. Then I just gotta compiled the code, remove the junk opcodes, and translate it back into x86... ugh. At this rate that'll take another couple of months, but oh well

Anyways, if you want to discuss Themida further just post here or PM me. It's always fun to talk

And Happy New Year!

-rendari

scherzo
January 14th, 2009, 18:58
Hi rendari!!

Nice you are coding things too!!! I'm having hard times reorganizing old things and trying to upgrade my knowledge of themida vm to latest leaked custom version....

Ooo just found something interesting in a folder.... don't know if it is of interest for you... well it's quite interesting to see the results...

There's four exes... all of them packed with themida 1.9.1.0.
1 - Original packed file with no protections except VM
2 - Same Packed file without the "mutation" engine (That thing of transforming instructions like 'mov' for example using 'push', 'pop'; dead code and so on)
3 - Same Packed file without that crazy thing of inserting fucking idiots random jumps in handlers
4 - Same Packed file without mutation and jumps

Look at the size of the files also...
Ahh.... to produce these files I just did simple inline patching during themida protection procedure.

scherzo

P.S.: Sorry for rapidshare hosting... file too big to upload here

http://rapidshare.com/files/183379171/Example_files.rar
pass: ThemidaVM

rendari
January 14th, 2009, 19:11
Hi scherzo. Thanks for the files, I'll check em out. In the meantime, check your pm for some stuff I sent you.
new_dump_.exe - dump of a themida unpackme
handlers3.exe - dump with handlers deobfuscated, and stored in last section, and Themida VM redirected to these deobfuscated handlers.

To see how I did that, run the included Ollyscript on new_dump_.exe. It's 100% automatic and should produce an exe equivalent of handlers3.exe. Note: the lods handlers are not yet deobfuscated; I'm writing a deobfuscator for them as we speak. I'll go check out the exes you uploaded now Hope you like what I send...

-rendari

(Please wait 5 min for upload to finish)

PS The VM Macro uses a RISC vm in the unpackme you sent me? Not sure, but:

Code:

004F9F86 68 E466EC4D PUSH 4DEC66E4
004F9F8B ^ E9 21FDFFFF JMP Original.004F9CB1

004F9CB1 6A 00 PUSH 0
004F9CB3 9C PUSHFD
004F9CB4 60 PUSHAD


Enters VM here:

Code:

002807D0 FFB7 08040000 PUSH DWORD PTR DS:[EDI+408] ; Original.004F9D1F
002807D6 52 PUSH EDX
002807D7 BA 3A730920 MOV EDX,2009733A
002807DC 295424 04 SUB DWORD PTR SS:[ESP+4],EDX
002807E0 E9 3A000000 JMP 0028081F


Looks like RISC :S

I'm studying CISC variants (they're more popular :P )

scherzo
January 16th, 2009, 05:53
Yep, that's RISC-64 what I was studying...
But if you want examples files like that of CISC VM, just tell me the level of protection you want... now I can produce 2.0.5.0 version of these files.

scherzo

rendari
January 16th, 2009, 13:02
I'm good, thanks. Managed to write a script that removes all the junk JMPs

_g_
January 17th, 2009, 15:50
This should shed more light on how CV translates code step by step.

1. Disasm opcodes
2. Clean the disasm + add labels
3. Convert the code to a simple stack machine assembler
4. Encrypt + add some shit here and there

Step 4 is missing.

See long.txt for example.

http://orange-bat.com/code/rev.rar

Lithium
May 24th, 2009, 09:51
Sorry For The Bump

But has anyone here

Change Themida VM To X86 Code successfully ?

kNiGhT
March 21st, 2010, 12:18
Quote:
[Originally Posted by "_g_"]Here is the code:
http://www.orange-bat.com/oreans.rar


Has somebody the "cleaner" tool from _g_ because his site is down

mak
March 26th, 2010, 14:30
=) http://www.onlinedisk.ru/file/390105/ ("http://www.onlinedisk.ru/file/390105/")

Elester
February 12th, 2011, 04:52
Sorry for answering to old thread, but could please someone reupload the attachment?
The link above is not working for me.
Thanks

mak
February 12th, 2011, 06:09
http://rghost.ru/4312458/private/f83388f3b5612f161784a0efb1b92049 ("http://rghost.ru/4312458/private/f83388f3b5612f161784a0efb1b92049")

deepzero
May 18th, 2012, 14:58
sorry to bother everyone once again with this thread....but i`d love to take a look at that cleaner tool ("oreans.rar", too.
Does anyone still have it?

mak
May 18th, 2012, 18:01
Quote:
[Originally Posted by deepzero;92565]sorry to bother everyone once again with this thread....but i`d love to take a look at that cleaner tool ("oreans.rar", too.
Does anyone still have it?




http://depositfiles.com/files/2jo7fom9r

deepzero
May 19th, 2012, 03:40
thanks!
i`ll attach it here or future reference....


you dont happen to have "http://orange-bat.com/code/rev.rar"?

mak
May 19th, 2012, 05:13
Quote:
[Originally Posted by deepzero;92567]thanks!
i`ll attach it here or future reference....


you dont happen to have "http://orange-bat.com/code/rev.rar"?


25942593

There is a similar project for Vmprotekt. Exact same technique is used. However in the extended sense. You probably already know from tuts4you. Attached two archives, if anyone needs specific techniques and methods for RE

deepzero
May 19th, 2012, 10:25
great, thanks again.