Code we don’t want to have in our C Code
All register Saving and stack initialization is not ported to our C Code, because the compiler will do it later for us in our function.
Loading the saved register is ignored too.
Examples from our Source Code from part 2.
// Initialize Stack frame .text:0000000000010630 stdu %sp, -0x50(%sp) // Saving Register .text:0000000000010634 std %r31, 0x48(%sp)
Detecting Saving Register
First of all, all saved register are loaded before the blr instructions again… Else saving is senseless.
Second as you see in our example r31 is pushed on the stack without modifing it at all.
How do we start?
Since we always return r3 we can start of returning a 64-bit unsigned value, which get more precisly while we reverse but for not this is enough.
unsigned long long reversingFunction()
{
// It always returns r3 it is from the spec of our CPU
return r3;
}
Finding count of parameters?
In our CPU parameters are saved in r3 – rN and mostly saved in other registers at the start of the function.
If we check our code we will see this.
// Initialize Stack frame .text:0000000000010630 stdu %sp, -0x50(%sp) // Saving Register .text:0000000000010634 std %r31, 0x48(%sp) // stackpointer is copied to r31 we have to know that for the future but it is nothing special .text:0000000000010638 mr %r31, %sp // First parameter is saved in r0 .text:000000000001063C mr %r0, %r3 // Second parameter is saved in r9 .text:0000000000010640 mr %r9, %r4 // Third parameter is saved in r11 .text:0000000000010644 mr %r11, %r5
So what we know for now looks like this
#define uint64_t unsigned long long
uint64_t reversingFunction(uint64_t firstParameter, uint64_t secondParameter, uint64_t thirdParameter)
{
r0 = firstParameter;
r9 = secondParameter;
r11 = thirdParameter;
// It always returns r3 it is from the spec of our CPU
return r3;
}
Guessing and sometimes it is wrong but mostly it is right (If not this is no stone)
I let the first part as it is and won’t repeat copying it here again. Since I try to not jump that much around in the code. It will be for now okay todo so the code here just comes after the top one.
.text:0000000000010648 stw %r0, 0x80(%r31) .text:000000000001064C stw %r9, 0x88(%r31) .text:0000000000010650 stw %r11, 0x90(%r31) .text:0000000000010654 lwz %r0, 0x90(%r31) .text:0000000000010658 stw %r0, 0x38(%r31) .text:000000000001065C lwz %r0, 0x80(%r31) .text:0000000000010660 stw %r0, 0x34(%r31) .text:0000000000010664 lwz %r0, 0x88(%r31) .text:0000000000010668 stw %r0, 0x30(%r31)
Actually they all use word size instead of double word size values. r0, r9 and r11 are used earlier to save the parameter. So we guess it is used unsigned int instead of long longs.
So we change our parameters to unsigned int und define local variables for the stack addresses used. Since they are not used to save register. They are local variables.
What we got is this.
#define uint64_t unsigned long long
#define uint32_t unsigned int
uint64_t reversingFunction(uint32_t firstParameter, uint32_t secondParameter, uint32_t thirdParameter)
{
uint32_t sp80, sp88, sp90, sp38, sp34, sp30;
r0 = firstParameter;
r9 = secondParameter;
r11 = thirdParameter;
//.text:0000000000010648 stw %r0, 0x80(%r31)
sp80 = r0;
//.text:000000000001064C stw %r9, 0x88(%r31)
sp88 = r9;
//.text:0000000000010650 stw %r11, 0x90(%r31)
sp90 = r11;
//.text:0000000000010654 lwz %r0, 0x90(%r31)
r0 = sp90;
//.text:0000000000010658 stw %r0, 0x38(%r31)
sp38 = r0;
//.text:000000000001065C lwz %r0, 0x80(%r31)
r0 = sp80;
//.text:0000000000010660 stw %r0, 0x34(%r31)
sp34 = r0;
//.text:0000000000010664 lwz %r0, 0x88(%r31)
r0 = sp88;
//.text:0000000000010668 stw %r0, 0x30(%r31)
sp30 = r0;
// It always returns r3 it is from the spec of our CPU
return r3;
}
The CPU can’t access memory and push it in another memory part directly it uses in our case r0 todo that. In C this is no problem. So we minimize the code.
To precisly it reads from stack saves in r0 and saves r0 to a new local variable (stack). This can be done in one line in C. I commented out the two lines that get one all the time.
#define uint64_t unsigned long long
#define uint32_t unsigned int
uint64_t reversingFunction(uint32_t firstParameter, uint32_t secondParameter, uint32_t thirdParameter)
{
uint32_t sp80, sp88, sp90, sp38, sp34, sp30;
r0 = firstParameter;
r9 = secondParameter;
r11 = thirdParameter;
sp80 = r0;
sp88 = r9;
sp90 = r11;
// r0 = sp90;
// sp38 = r0;
sp38 = sp90;
// r0 = sp80;
// sp34 = r0;
sp34 = sp80;
// r0 = sp88;
// sp30 = r0;
sp30 = sp88;
// It always returns r3 it is from the spec of our CPU
return r3;
}
So weg got now:
#define uint64_t unsigned long long
#define uint32_t unsigned int
uint64_t reversingFunction(uint32_t firstParameter, uint32_t secondParameter, uint32_t thirdParameter)
{
uint32_t sp80, sp88, sp90, sp38, sp34, sp30;
r0 = firstParameter;
r9 = secondParameter;
r11 = thirdParameter;
sp80 = r0;
sp88 = r9;
sp90 = r11;
sp38 = sp90;
sp34 = sp80;
sp30 = sp88;
// It always returns r3 it is from the spec of our CPU
return r3;
}
We can minify that again (not needed but okay).
#define uint64_t unsigned long long
#define uint32_t unsigned int
uint64_t reversingFunction(uint32_t firstParameter, uint32_t secondParameter, uint32_t thirdParameter)
{
uint32_t sp80, sp88, sp90, sp38, sp34, sp30;
r0 = firstParameter;
r9 = secondParameter;
r11 = thirdParameter;
sp34 = sp80 = r0;
sp30 = sp88 = r9;
sp38 = sp90 = r11;
// It always returns r3 it is from the spec of our CPU
return r3;
}
Since we are able to remove all register useage we do so.
#define uint64_t unsigned long long
#define uint32_t unsigned int
uint64_t reversingFunction(uint32_t firstParameter, uint32_t secondParameter, uint32_t thirdParameter)
{
uint32_t sp80, sp88, sp90, sp38, sp34, sp30;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
// It always returns r3 it is from the spec of our CPU
return r3;
}
Now we managed to use no register (except return, but we don’t use that). If you look at the code we have extracted all parameter and local variables so far.
I recommend if you see such minimization options do it instant and try to use at least register as possible
so the code gets readable.
Let’s go on!
.text:000000000001066C b loc_1079C
We convert simple in:
#define uint64_t unsigned long long
#define uint32_t unsigned int
uint64_t reversingFunction(uint32_t firstParameter, uint32_t secondParameter, uint32_t thirdParameter)
{
uint32_t sp80, sp88, sp90, sp38, sp34, sp30;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
goto loc_1079C;
loc_1079C:
// It always returns r3 it is from the spec of our CPU
return r3;
}
Now we investigate loc_1079C
.text:000000000001079C lwz %r0, 0x38(%r31) .text:00000000000107A0 cmpwi cr7, %r0, 0 .text:00000000000107A4 bgt cr7, loc_10670
Again it uses r0 just as local register because it can’t compare with a stack value directly we ignore that and use our sp38 variable. r31 is like sp it was assigned at the start we remember that and ignore that it is r31 and substitute it in our head with sp
.
#define uint64_t unsigned long long
#define uint32_t unsigned int
uint64_t reversingFunction(uint32_t firstParameter, uint32_t secondParameter, uint32_t thirdParameter)
{
uint32_t sp80, sp88, sp90, sp38, sp34, sp30;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
goto loc_1079C;
loc_1079C:
//.text:000000000001079C lwz %r0, 0x38(%r31)
//.text:00000000000107A0 cmpwi cr7, %r0, 0
//.text:00000000000107A4 bgt cr7, loc_10670
if(sp38 > 0)
{
loc_10670:
}
loc_107A8:
// It always returns r3 it is from the spec of our CPU
return r3;
}
The Path splits
Since we have to reverse one way first we just take the shorter one here and this will fix the mysteri about the return value
.
.text:00000000000107A8 lwz %r0, 0x90(%r31) .text:00000000000107AC extsw %r0, %r0 .text:00000000000107B0 mr %r3, %r0 .text:00000000000107B4 ld %r11, arg_0(%sp) .text:00000000000107B8 ld %r31, -8(%r11) .text:00000000000107BC mr %sp, %r11 .text:00000000000107C0 blr
Looks like this:
#define uint64_t unsigned long long
#define uint32_t unsigned int
uint64_t reversingFunction(uint32_t firstParameter, uint32_t secondParameter, uint32_t thirdParameter)
{
uint32_t sp80, sp88, sp90, sp38, sp34, sp30;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
goto loc_1079C;
loc_1079C:
if(sp38 > 0)
{
loc_10670:
}
loc_107A8:
// load sp90 in r0
//.text:00000000000107A8 lwz %r0, 0x90(%r31)
// this is a sign that r0 is a signed integer because it extense the signed value from 32 bit to the full 64 bit register so r0 is a signed 32 bit integer value
//.text:00000000000107AC extsw %r0, %r0
// Assign our return value. Since r0 is a signed int return value is a signed int.
// I would guess sp90 is a signed int now too so we change that
//.text:00000000000107B0 mr %r3, %r0
// Loading the old stack pointer in r11 we ignore that (we can IGNORE THAT, just remember that it was done).
//.text:00000000000107B4 ld %r11, arg_0(%sp)
// Loading the saved register. Let me explain this in detail (/// = Explain this in detail)
/// The top line were like this
/// newSP = oldSP - 0x50;
/// newSP + 0x48 = r31
/// The oldSP is loaded so it is like
/// oldSP = newSP + 0x50
/// oldSP - 0x08 = newSP + 0x48
/// oldSP - 0x08 = (oldSP - 0x50) + 0x48
/// They are equal so it definitly loads the old saved r31 back in r31
//.text:00000000000107B8 ld %r31, -8(%r11)
// Restoring old stack (we can IGNORE THAT)
//.text:00000000000107BC mr %sp, %r11
//.text:00000000000107C0 blr
return r3;
}
Okay we change the commented lines now we know sp90 is a signed int. I conclude all related to sp90 is signed too. SP90 get returned so a signed int as return and we know the return value…
Looks like this (we got quite far already):
#define uint64_t unsigned long long
#define uint32_t unsigned int
int reversingFunction(uint32_t firstParameter, uint32_t secondParameter, int thirdParameter)
{
uint32_t sp80, sp88, sp34, sp30;
int sp90, sp38;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
goto loc_1079C;
loc_1079C:
if(sp38 > 0)
{
loc_10670:
}
loc_107A8:
return sp90;
}
Okay now loc_10670:
#define uint64_t unsigned long long
#define uint32_t unsigned int
int reversingFunction(uint32_t firstParameter, uint32_t secondParameter, int thirdParameter)
{
uint32_t sp80, sp88, sp34, sp30;
int sp90, sp38;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
goto loc_1079C;
loc_1079C:
if(sp38 > 0)
{
loc_10670:
// An simple if on sp38 again
//.text:0000000000010670 lwz %r0, 0x38(%r31)
//.text:0000000000010674 cmpwi cr7, %r0, 7
//.text:0000000000010678 ble cr7, loc_106BC
if(sp38 <= 7)
{
loc_106BC:
}
loc_1067C:
}
loc_107A8:
return sp90;
}
I will choose the if routes now so we got a basic structure of the code already. I will go on all of them in one hit now. Too shorten things it’s always the same.
#define uint64_t unsigned long long
#define uint32_t unsigned int
int reversingFunction(uint32_t firstParameter, uint32_t secondParameter, int thirdParameter)
{
uint32_t sp80, sp88, sp34, sp30;
int sp90, sp38;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
goto loc_1079C;
loc_1079C:
if(sp38 > 0)
{
loc_10670:
if(sp38 <= 7)
{
loc_106BC:
// An simple if on sp38 again
//.text:00000000000106BC lwz %r0, 0x38(%r31)
//.text:00000000000106C0 cmpwi cr7, %r0, 3
//.text:00000000000106C4 ble cr7, loc_10708
if(sp38 <= 3)
{
loc_10708:
// An simple if on sp38 again
//.text:0000000000010708 lwz %r0, 0x38(%r31)
//.text:000000000001070C cmpwi cr7, %r0, 1
//.text:0000000000010710 ble cr7, loc_10754
if(sp38 <= 1)
{
loc_10754:
// An simple if on sp38 again
//.text:0000000000010754 lwz %r0, 0x38(%r31)
//.text:0000000000010758 cmpwi cr7, %r0, 0
//.text:000000000001075C ble cr7, loc_1079C
if(sp38 <= 0)
{
goto loc_1079C; // this time we don't assigne a Label because we already have that one reversed at the top
}
loc_10760:
}
loc_10714:
}
loc_106C8:
}
loc_1067C:
}
loc_107A8:
return sp90;
}
Okay let’s look what we got here:
#define uint64_t unsigned long long
#define uint32_t unsigned int
int reversingFunction(uint32_t firstParameter, uint32_t secondParameter, int thirdParameter)
{
uint32_t sp80, sp88, sp34, sp30;
int sp90, sp38;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
goto loc_1079C;
loc_1079C:
if(sp38 > 0)
{
loc_10670:
if(sp38 <= 7)
{
loc_106BC:
if(sp38 <= 3)
{
loc_10708:
if(sp38 <= 1)
{
loc_10754:
if(sp38 <= 0)
{
goto loc_1079C;
}
loc_10760:
}
loc_10714:
}
loc_106C8:
}
loc_1067C:
}
loc_107A8:
return sp90;
}
Block loc_1067C
I will convert one block first. I choosed the highest one in the IDA graph that was missing.
All other parts are nearly as the same of that block after explained this one in detail I go through the rest faster. I know it since I know the real code too. It’s one of mine anyway, but normaly you would all do one by one. I just want to shorten things but you will understand what I mean soon.
Let’s start with one code block.
#define uint64_t unsigned long long
#define uint32_t unsigned int
int reversingFunction(uint32_t firstParameter, uint32_t secondParameter, int thirdParameter)
{
uint32_t sp80, sp88, sp34, sp30;
int sp90, sp38;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
goto loc_1079C;
loc_1079C:
if(sp38 > 0)
{
loc_10670:
if(sp38 <= 7)
{
loc_106BC:
if(sp38 <= 3)
{
loc_10708:
if(sp38 <= 1)
{
loc_10754:
if(sp38 <= 0)
{
goto loc_1079C;
}
loc_10760:
}
loc_10714:
}
loc_106C8:
}
loc_1067C:
// Okay we save sp34 in r11 and sp30 in r0 lets mind that in our brain no need to write that down for now
//.text:000000000001067C lwz %r11, 0x34(%r31)
//.text:0000000000010680 lwz %r0, 0x30(%r31)
// Since we just saved an int no need to write nulls to the top part of sp30...
// Let me explain r0 is sp30 so this looks in c like this I don't write this down in real code just to explain
/// r9 = (r0 << 0) & 0x00000000FFFFFFFF;
/// so just saves the int value of r0 (sp30) in r9 we keep that in mind
// so sp30 is casted in an int not interesting as long as we don't know what is done with the integer value
//.text:0000000000010684 rldicl %r9, %r0, 0,32
// Okay we decided just the line above is a cast of sp30 as int saved in r9
// which is used as pointer to load a double word so it is in uint64_t pointer here
// Our CPU uses 32 bit addresses mostly so we ignore the int conversion here.
// we could write this
// r0 = *((uint64_t*)(int)sp30);
// but the int cast is done from compiler anyway so we ignore it
// Look what we got:
//.text:0000000000010688 ld %r0, 0(%r9)
r0 = *((uint64_t*)sp30);
// Same as above just with sp34 now
//.text:000000000001068C rldicl %r9, %r11, 0,32
// instead of load it writes the double word to the adress of r9 (sp34)
//.text:0000000000010690 std %r0, 0(%r9)
*((uint64_t*)sp34) = r0;
// load our pointer sp34 to r9
//.text:0000000000010694 lwz %r9, 0x34(%r31)
// increase the pointer by 8 and save the result in r0
//.text:0000000000010698 addi %r0, %r9, 8
// store the by 8 increased pointer back to sp34
//.text:000000000001069C stw %r0, 0x34(%r31)
// We do it like this for example
sp34 = ((uint64_t*)sp34) + 1;
// Same as above just with sp30
//.text:00000000000106A0 lwz %r9, 0x30(%r31)
//.text:00000000000106A4 addi %r0, %r9, 8
//.text:00000000000106A8 stw %r0, 0x30(%r31)
sp30 = ((uint64_t*)sp30) + 1;
// oh sp38 gets decreased by 8
// same as above with sp38 just -8 instead of +8
// we know already that this is not a pointer because never is data loaded from it
// you may have a clue now what this is
//.text:00000000000106AC lwz %r9, 0x38(%r31)
//.text:00000000000106B0 addi %r0, %r9, -8
//.text:00000000000106B4 stw %r0, 0x38(%r31)
sp38 -= 8;
// simple go back to the top
//.text:00000000000106B8 b loc_1079C
goto loc_1079C;
}
loc_107A8:
return sp90;
}
NOTE: Since we know now that sp34 is a pointer to some address we can assign (uint64_t*) to all sp34 and sp30 assoziated variables now
We got something like this now:
#define uint64_t unsigned long long
#define uint32_t unsigned int
int reversingFunction(uint64_t* firstParameter, uint64_t* secondParameter, int thirdParameter)
{
uint64_t* sp80, sp88, sp34, sp30;
int sp90, sp38;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
goto loc_1079C;
loc_1079C:
if(sp38 > 0)
{
loc_10670:
if(sp38 <= 7)
{
loc_106BC:
if(sp38 <= 3)
{
loc_10708:
if(sp38 <= 1)
{
loc_10754:
if(sp38 <= 0)
{
goto loc_1079C;
}
loc_10760:
}
loc_10714:
}
loc_106C8:
}
loc_1067C:
*((uint64_t*)sp34) = *((uint64_t*)sp30);
sp34 = ((uint64_t*)sp34) + 1;
sp30 = ((uint64_t*)sp30) + 1;
sp38 -= 8;
goto loc_1079C;
}
loc_107A8:
return sp90;
}
The other 3 Blocks
Now the other 3 parts we go fast through since they are nearly the same as the loc_1067C block. Normaly you would investigate in detail, but I shorten here.
#define uint64_t unsigned long long
#define uint32_t unsigned int
// I changed it to void* because you will see the other blocks use the pointers as other pointers than uint64_t*
int reversingFunction(void* firstParameter, void* secondParameter, int thirdParameter)
{
void* sp80, sp88, sp34, sp30;
int sp90, sp38;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
goto loc_1079C;
loc_10760:
// look at Block loc_1067C it's the same just byte is used instead of double word
//.text:0000000000010760 lwz %r11, 0x34(%r31)
//.text:0000000000010764 lwz %r0, 0x30(%r31)
//.text:0000000000010768 rldicl %r9, %r0, 0,32
//.text:000000000001076C lbz %r0, 0(%r9)
//.text:0000000000010770 rldicl %r9, %r11, 0,32
//.text:0000000000010774 stb %r0, 0(%r9)
*((char*)sp34) = *((char*)sp30);
// look at Block loc_1067C it's the same just byte is used instead of double word
//.text:0000000000010778 lwz %r9, 0x34(%r31)
//.text:000000000001077C addi %r0, %r9, 1
//.text:0000000000010780 stw %r0, 0x34(%r31)
sp34 = ((char*) sp34) + 1;
// look at Block loc_1067C it's the same just byte is used instead of double word
//.text:0000000000010784 lwz %r9, 0x30(%r31)
//.text:0000000000010788 addi %r0, %r9, 1
//.text:000000000001078C stw %r0, 0x30(%r31)
sp30 = ((char*) sp30) + 1;
// look at Block loc_1067C it's the same just byte is used instead of double word
//.text:0000000000010790 lwz %r9, 0x38(%r31)
//.text:0000000000010794 addi %r0, %r9, -1
//.text:0000000000010798 stw %r0, 0x38(%r31)
sp38 -= 1;
loc_1079C:
if(sp38 > 0)
{
loc_10670:
if(sp38 <= 7)
{
loc_106BC:
if(sp38 <= 3)
{
loc_10708:
if(sp38 <= 1)
{
loc_10754:
if(sp38 <= 0)
{
goto loc_1079C;
}
// IMPROTANT!!!: To stay at the code flow you see in text line this block is just above loc_1079C
// In graph view of IDA we couldn't see that but in text view the code will continue after the execution at
// loc_1079C
// How to solve this
// 1. create a Label above loc_1079C and jump to that label with
// like this
goto loc_10760;
}
loc_10714:
// look at Block loc_1067C it's the same just half word is used instead of double word
//.text:0000000000010714 lwz %r11, 0x34(%r31)
//.text:0000000000010718 lwz %r0, 0x30(%r31)
//.text:000000000001071C rldicl %r9, %r0, 0,32
//.text:0000000000010720 lhz %r0, 0(%r9)
//.text:0000000000010724 rldicl %r9, %r11, 0,32
//.text:0000000000010728 sth %r0, 0(%r9)
*((unsigned short*)sp34) = *((unsigned short*)sp30);
// look at Block loc_1067C it's the same just half word is used instead of double word
//.text:000000000001072C lwz %r9, 0x34(%r31)
//.text:0000000000010730 addi %r0, %r9, 2
//.text:0000000000010734 stw %r0, 0x34(%r31)
sp34 = ((unsigned short*)sp34) + 1;
// look at Block loc_1067C it's the same just half word is used instead of double word
//.text:0000000000010738 lwz %r9, 0x30(%r31)
//.text:000000000001073C addi %r0, %r9, 2
//.text:0000000000010740 stw %r0, 0x30(%r31)
sp30 = ((unsigned short*)sp30) + 1;
// look at Block loc_1067C it's the same just half word is used instead of double word
//.text:0000000000010744 lwz %r9, 0x38(%r31)
//.text:0000000000010748 addi %r0, %r9, -2
//.text:000000000001074C stw %r0, 0x38(%r31)
sp38 -= 2;
//.text:0000000000010750 b loc_1079C
goto loc_1079C;
}
loc_106C8:
// look at Block loc_1067C it's the same just word is used instead of double word
//.text:00000000000106C8 lwz %r11, 0x34(%r31)
//.text:00000000000106CC lwz %r0, 0x30(%r31)
//.text:00000000000106D0 rldicl %r9, %r0, 0,32
//.text:00000000000106D4 lwz %r0, 0(%r9)
//.text:00000000000106D8 rldicl %r9, %r11, 0,32
//.text:00000000000106DC stw %r0, 0(%r9)
// Explained in Block loc_1067C just this time word instead of double word
*((uint32_t*)sp34) = *((uint32_t*)sp30);
// look at Block loc_1067C it's the same just word is used instead of double word
//.text:00000000000106E0 lwz %r9, 0x34(%r31)
//.text:00000000000106E4 addi %r0, %r9, 4
//.text:00000000000106E8 stw %r0, 0x34(%r31)
sp34 = ((uint32_t*)sp34) + 1;
// look at Block loc_1067C it's the same just word is used instead of double word
//.text:00000000000106EC lwz %r9, 0x30(%r31)
//.text:00000000000106F0 addi %r0, %r9, 4
//.text:00000000000106F4 stw %r0, 0x30(%r31)
sp30 = ((uint32_t*)sp30) + 1;
// look at Block loc_1067C it's the same just word is used instead of double word
//.text:00000000000106F8 lwz %r9, 0x38(%r31)
//.text:00000000000106FC addi %r0, %r9, -4
//.text:0000000000010700 stw %r0, 0x38(%r31)
sp38 -= 4;
//.text:0000000000010704 b loc_1079C
goto loc_1079C;
}
loc_1067C:
*((uint64_t*)sp34) = *((uint64_t*)sp30);
sp34 = ((uint64_t*)sp34) + 1;
sp30 = ((uint64_t*)sp30) + 1;
sp38 -= 8;
goto loc_1079C;
}
loc_107A8:
return sp90;
}
Okay we are done and it looks like this:
#define uint64_t unsigned long long
#define uint32_t unsigned int
int reversingFunction(void* firstParameter, void* secondParameter, int thirdParameter)
{
void* sp80, sp88, sp34, sp30;
int sp90, sp38;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
goto loc_1079C;
loc_10760:
*((char*)sp34) = *((char*)sp30);
sp34 = ((char*) sp34) + 1;
sp30 = ((char*) sp30) + 1;
sp38 -= 1;
loc_1079C:
if(sp38 > 0)
{
loc_10670:
if(sp38 <= 7)
{
loc_106BC:
if(sp38 <= 3)
{
loc_10708:
if(sp38 <= 1)
{
loc_10754:
if(sp38 <= 0)
{
goto loc_1079C;
}
goto loc_10760;
}
loc_10714:
*((unsigned short*)sp34) = *((unsigned short*)sp30);
sp34 = ((unsigned short*)sp34) + 1;
sp30 = ((unsigned short*)sp30) + 1;
sp38 -= 2;
goto loc_1079C;
}
loc_106C8:
*((uint32_t*)sp34) = *((uint32_t*)sp30);
sp34 = ((uint32_t*)sp34) + 1;
sp30 = ((uint32_t*)sp30) + 1;
sp38 -= 4;
goto loc_1079C;
}
loc_1067C:
*((uint64_t*)sp34) = *((uint64_t*)sp30);
sp34 = ((uint64_t*)sp34) + 1;
sp30 = ((uint64_t*)sp30) + 1;
sp38 -= 8;
goto loc_1079C;
}
loc_107A8:
return sp90;
}
Okay now two things we can do. Replace spXX with more speaking names by investigating what this code does, but I recommend to sort the code with the next part. Called Code Flow…
Code Flow
We are lucky since the code under every if (if the condition is false) always jumps somewhere different so we can inject else there for sure without damaging the code flow.
This will look like this:
NOTE: Only inject else clauses if you are sure it don’t disturbe the code flow.
#define uint64_t unsigned long long
#define uint32_t unsigned int
int reversingFunction(void* firstParameter, void* secondParameter, int thirdParameter)
{
void* sp80, sp88, sp34, sp30;
int sp90, sp38;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
goto loc_1079C;
loc_10760:
*((char*)sp34) = *((char*)sp30);
sp34 = ((char*) sp34) + 1;
sp30 = ((char*) sp30) + 1;
sp38 -= 1;
loc_1079C:
if(sp38 > 0)
{
loc_10670:
if(sp38 <= 7)
{
loc_106BC:
if(sp38 <= 3)
{
loc_10708:
if(sp38 <= 1)
{
loc_10754:
if(sp38 <= 0)
{
goto loc_1079C;
}
else
{
goto loc_10760;
}
}
else
{
loc_10714:
*((unsigned short*)sp34) = *((unsigned short*)sp30);
sp34 = ((unsigned short*)sp34) + 1;
sp30 = ((unsigned short*)sp30) + 1;
sp38 -= 2;
goto loc_1079C;
}
}
else
{
loc_106C8:
*((uint32_t*)sp34) = *((uint32_t*)sp30);
sp34 = ((uint32_t*)sp34) + 1;
sp30 = ((uint32_t*)sp30) + 1;
sp38 -= 4;
goto loc_1079C;
}
}
else
{
loc_1067C:
*((uint64_t*)sp34) = *((uint64_t*)sp30);
sp34 = ((uint64_t*)sp34) + 1;
sp30 = ((uint64_t*)sp30) + 1;
sp38 -= 8;
goto loc_1079C;
}
}
loc_107A8:
return sp90;
}
Sometimes switching the blocks if code block with else code block AND inverting the condition check will make a more common structure.
I guess you will directly see when you see this code transformation now:
#define uint64_t unsigned long long
#define uint32_t unsigned int
int reversingFunction(void* firstParameter, void* secondParameter, int thirdParameter)
{
void* sp80, sp88, sp34, sp30;
int sp90, sp38;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
goto loc_1079C;
loc_10760:
*((char*)sp34) = *((char*)sp30);
sp34 = ((char*) sp34) + 1;
sp30 = ((char*) sp30) + 1;
sp38 -= 1;
loc_1079C:
if(sp38 > 0)
{
loc_10670:
//if(sp38 <= 7)
if(sp38 > 7)
{
loc_1067C:
*((uint64_t*)sp34) = *((uint64_t*)sp30);
sp34 = ((uint64_t*)sp34) + 1;
sp30 = ((uint64_t*)sp30) + 1;
sp38 -= 8;
goto loc_1079C;
}
else
{
loc_106BC:
//if(sp38 <= 3)
if(sp38 > 3)
{
loc_106C8:
*((uint32_t*)sp34) = *((uint32_t*)sp30);
sp34 = ((uint32_t*)sp34) + 1;
sp30 = ((uint32_t*)sp30) + 1;
sp38 -= 4;
goto loc_1079C;
}
else
{
loc_10708:
//if(sp38 <= 1)
if(sp > 1)
{
loc_10714:
*((unsigned short*)sp34) = *((unsigned short*)sp30);
sp34 = ((unsigned short*)sp34) + 1;
sp30 = ((unsigned short*)sp30) + 1;
sp38 -= 2;
goto loc_1079C;
}
else
{
loc_10754:
if(sp38 > 0)
{
goto loc_10760;
}
else
{
goto loc_1079C;
}
}
}
}
}
loc_107A8:
return sp90;
}
Okay we remove all none called labels and restructure the else { if { } } structures to else ifs:
#define uint64_t unsigned long long
#define uint32_t unsigned int
int reversingFunction(void* firstParameter, void* secondParameter, int thirdParameter)
{
void* sp80, sp88, sp34, sp30;
int sp90, sp38;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
goto loc_1079C;
loc_1079C:
if(sp38 > 0)
{
if(sp38 > 7)
{
loc_1067C:
*((uint64_t*)sp34) = *((uint64_t*)sp30);
sp34 = ((uint64_t*)sp34) + 1;
sp30 = ((uint64_t*)sp30) + 1;
sp38 -= 8;
goto loc_1079C;
}
else if(sp38 > 3)
{
loc_106C8:
*((uint32_t*)sp34) = *((uint32_t*)sp30);
sp34 = ((uint32_t*)sp34) + 1;
sp30 = ((uint32_t*)sp30) + 1;
sp38 -= 4;
goto loc_1079C;
}
else if(sp > 1)
{
loc_10714:
*((unsigned short*)sp34) = *((unsigned short*)sp30);
sp34 = ((unsigned short*)sp34) + 1;
sp30 = ((unsigned short*)sp30) + 1;
sp38 -= 2;
goto loc_1079C;
}
else if(sp38 > 0)
{
// Since this is the only place the loc_10760 Code is called and executed we copy it here
// goto loc_10760;
loc_10760:
*((char*)sp34) = *((char*)sp30);
sp34 = ((char*) sp34) + 1;
sp30 = ((char*) sp30) + 1;
sp38 -= 1;
// AND we preserve the code flow with a goto to the code executed after loc_10760
goto loc_1079C;
}
else
{
goto loc_1079C;
}
}
return sp90;
}
Nearly finish…
We got this now looks quite nice already:
#define uint64_t unsigned long long
#define uint32_t unsigned int
int reversingFunction(void* firstParameter, void* secondParameter, int thirdParameter)
{
void* sp80, sp88, sp34, sp30;
int sp90, sp38;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
goto loc_1079C;
loc_1079C:
if(sp38 > 0)
{
if(sp38 > 7)
{
loc_1067C:
*((uint64_t*)sp34) = *((uint64_t*)sp30);
sp34 = ((uint64_t*)sp34) + 1;
sp30 = ((uint64_t*)sp30) + 1;
sp38 -= 8;
goto loc_1079C;
}
else if(sp38 > 3)
{
loc_106C8:
*((uint32_t*)sp34) = *((uint32_t*)sp30);
sp34 = ((uint32_t*)sp34) + 1;
sp30 = ((uint32_t*)sp30) + 1;
sp38 -= 4;
goto loc_1079C;
}
else if(sp > 1)
{
loc_10714:
*((unsigned short*)sp34) = *((unsigned short*)sp30);
sp34 = ((unsigned short*)sp34) + 1;
sp30 = ((unsigned short*)sp30) + 1;
sp38 -= 2;
goto loc_1079C;
}
else if(sp38 > 0)
{
loc_10760:
*((char*)sp34) = *((char*)sp30);
sp34 = ((char*) sp34) + 1;
sp30 = ((char*) sp30) + 1;
sp38 -= 1;
goto loc_1079C;
}
else
{
goto loc_1079C;
}
}
return sp90;
}
No matter what if or else if is executed all end with a jump to loc_1079C so we can just put it out of the ifs
#define uint64_t unsigned long long
#define uint32_t unsigned int
int reversingFunction(void* firstParameter, void* secondParameter, int thirdParameter)
{
void* sp80, sp88, sp34, sp30;
int sp90, sp38;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
goto loc_1079C;
loc_1079C:
if(sp38 > 0)
{
if(sp38 > 7)
{
loc_1067C:
*((uint64_t*)sp34) = *((uint64_t*)sp30);
sp34 = ((uint64_t*)sp34) + 1;
sp30 = ((uint64_t*)sp30) + 1;
sp38 -= 8;
}
else if(sp38 > 3)
{
loc_106C8:
*((uint32_t*)sp34) = *((uint32_t*)sp30);
sp34 = ((uint32_t*)sp34) + 1;
sp30 = ((uint32_t*)sp30) + 1;
sp38 -= 4;
}
else if(sp > 1)
{
loc_10714:
*((unsigned short*)sp34) = *((unsigned short*)sp30);
sp34 = ((unsigned short*)sp34) + 1;
sp30 = ((unsigned short*)sp30) + 1;
sp38 -= 2;
}
else if(sp38 > 0)
{
loc_10760:
*((char*)sp34) = *((char*)sp30);
sp34 = ((char*) sp34) + 1;
sp30 = ((char*) sp30) + 1;
sp38 -= 1;
}
goto loc_1079C;
}
return sp90;
}
While?
You have to know that, it’s hard to explain if you go into an if and the condition is true.
If that if body code jumps back to the if this is by definition a while clause.
Think about it makes totaly sense
#define uint64_t unsigned long long
#define uint32_t unsigned int
int reversingFunction(void* firstParameter, void* secondParameter, int thirdParameter)
{
void* sp80, sp88, sp34, sp30;
int sp90, sp38;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
// loc_1079C:
// if(sp38 > 0)
while(sp38 > 0)
{
if(sp38 > 7)
{
loc_1067C:
*((uint64_t*)sp34) = *((uint64_t*)sp30);
sp34 = ((uint64_t*)sp34) + 1;
sp30 = ((uint64_t*)sp30) + 1;
sp38 -= 8;
}
else if(sp38 > 3)
{
loc_106C8:
*((uint32_t*)sp34) = *((uint32_t*)sp30);
sp34 = ((uint32_t*)sp34) + 1;
sp30 = ((uint32_t*)sp30) + 1;
sp38 -= 4;
}
else if(sp > 1)
{
loc_10714:
*((unsigned short*)sp34) = *((unsigned short*)sp30);
sp34 = ((unsigned short*)sp34) + 1;
sp30 = ((unsigned short*)sp30) + 1;
sp38 -= 2;
}
else if(sp38 > 0)
{
loc_10760:
*((char*)sp34) = *((char*)sp30);
sp34 = ((char*) sp34) + 1;
sp30 = ((char*) sp30) + 1;
sp38 -= 1;
}
// goto loc_1079C;
}
return sp90;
}
Now we got this:
#define uint64_t unsigned long long
#define uint32_t unsigned int
int reversingFunction(void* firstParameter, void* secondParameter, int thirdParameter)
{
void* sp80, sp88, sp34, sp30;
int sp90, sp38;
sp34 = sp80 = firstParameter;
sp30 = sp88 = secondParameter;
sp38 = sp90 = thirdParameter;
while(sp38 > 0)
{
if(sp38 > 7)
{
*((uint64_t*)sp34) = *((uint64_t*)sp30);
sp34 = ((uint64_t*)sp34) + 1;
sp30 = ((uint64_t*)sp30) + 1;
sp38 -= 8;
}
else if(sp38 > 3)
{
*((uint32_t*)sp34) = *((uint32_t*)sp30);
sp34 = ((uint32_t*)sp34) + 1;
sp30 = ((uint32_t*)sp30) + 1;
sp38 -= 4;
}
else if(sp > 1)
{
*((unsigned short*)sp34) = *((unsigned short*)sp30);
sp34 = ((unsigned short*)sp34) + 1;
sp30 = ((unsigned short*)sp30) + 1;
sp38 -= 2;
}
else if(sp38 > 0)
{
*((char*)sp34) = *((char*)sp30);
sp34 = ((char*) sp34) + 1;
sp30 = ((char*) sp30) + 1;
sp38 -= 1;
}
}
return sp90;
}
Rename Local Variables and Remove unused Local variables
First we remove unused variables. In our case sp88, sp80.
sp90 get assigned and returned but never modified so we can use the third Parameter for it.
#define uint64_t unsigned long long
#define uint32_t unsigned int
int reversingFunction(void* firstParameter, void* secondParameter, int thirdParameter)
{
void* sp34, sp30;
int sp90, sp38;
sp34 = firstParameter;
sp30 = secondParameter;
sp38 = thirdParameter;
while(sp38 > 0)
{
if(sp38 > 7)
{
*((uint64_t*)sp34) = *((uint64_t*)sp30);
sp34 = ((uint64_t*)sp34) + 1;
sp30 = ((uint64_t*)sp30) + 1;
sp38 -= 8;
}
else if(sp38 > 3)
{
*((uint32_t*)sp34) = *((uint32_t*)sp30);
sp34 = ((uint32_t*)sp34) + 1;
sp30 = ((uint32_t*)sp30) + 1;
sp38 -= 4;
}
else if(sp > 1)
{
*((unsigned short*)sp34) = *((unsigned short*)sp30);
sp34 = ((unsigned short*)sp34) + 1;
sp30 = ((unsigned short*)sp30) + 1;
sp38 -= 2;
}
else if(sp38 > 0)
{
*((char*)sp34) = *((char*)sp30);
sp34 = ((char*) sp34) + 1;
sp30 = ((char*) sp30) + 1;
sp38 -= 1;
}
}
return thirdParameter;
}
Renaming the variables and all other names too…
Now look like this:
#define uint64_t unsigned long long
#define uint32_t unsigned int
#define uint16_t unsigned short
#define uint8_t char
int memcpy(void* dst, void* src, int size)
{
int sizeToCopy = size;
void* dstCopy = dst;
void* srcCopy = src;
while(sizeToCopy > 0)
{
if(sizeToCopy >= 8)
{
*((uint64_t*)dstCopy) = *((uint64_t*)srcCopy);
dstCopy = ((uint64_t*)dstCopy)+1;
srcCopy = ((uint64_t*)srcCopy)+1;
sizeToCopy -= 8;
}
else if(sizeToCopy >= 4)
{
*((uint32_t*)dstCopy) = *((uint32_t*)srcCopy);
dstCopy = ((uint32_t*)dstCopy)+1;
srcCopy = ((uint32_t*)srcCopy)+1;
sizeToCopy -= 4;
}
else if(sizeToCopy >= 2)
{
*((unsigned short*)dstCopy) = *((unsigned short*)srcCopy);
dstCopy = ((uint16_t*)dstCopy)+1;
srcCopy = ((uint16_t*)srcCopy)+1;
sizeToCopy -= 2;
}
else if(sizeToCopy >= 1)
{
*((uint8_t*)dstCopy) = *((uint8_t*)srcCopy);
dstCopy = ((uint8_t*)dstCopy)+1;
srcCopy = ((uint8_t*)srcCopy)+1;
sizeToCopy -= 1;
}
}
return size;
}
Ask me on twitter for questions this is a tough one I know. (@KDSBest on twitter
)
Stay tuned,
KDSBest
What is this part about?
The following Instructions will be shown:
Write to memory
Readfrom memory
Copy register
Branches
Others
And I will explain something about the memory and we will together reverse a big example.
The Stack
The Stack is mostly used for local variables in a function and to save register, so we don’t destroy data from the function we get called from. The stack gets freed after the function ran through. You can read more about stacks on wikipedia but this is enough to know for our purpose.
What brings the future?
Since I can’t manage to finish this part with the example reversing. I will just show you the upper assembler instructions like the one in part 1 and you got homework. At least this weekend I will finish a complete walkthrough through my example code. Which I will show you. Ignore it and wait for me to finish part 3 or try on your own the fun.
This is the code, you can ask me on twitter if you got a clue what it does
. It’s untested but basicly a very short code so it should work
. Sorry for the gardient background my mspaint skills are limited.
I will explain parameter detection and so on in part 3 with the code.
The Instructions
stdu (Store Double Word Update)
This is a very complex instruction, which is mostly used for stack initialization.
The value of the register is saved into the defined location and the new location is saved in the register.
The C code may explain it more.
Parameters
1. A Register, which value will be stored to EA (Adress) and it is the destination register for the calculated EA
2. Value for EA calculation
3. Source Register for EA calculation
EA is calculated value + content of register
Our example
stdu %sp, -0x50(%sp)
In C
// Calculate EA // EAValue is -0x50 in the example unsigned long long EA = EAValue + sp; // Save content of sp to EA in Memory *((unsigned long long*) EA) = sp; // Update sp sp = EA;
std (Store Double Word)
Same as stdu but the 1. Parameter value is untouched (not updated).
Parameters
1. A Register, which value will be stored to EA (Adress)
2. Value for EA calculation
3. Source Register for EA calculation
Our example
std %r31, 0x48(%sp)
In C
// Calculate EA // EAValue is 0x48 in the example unsigned long long EA = EAValue + sp; // Save content of r31 to EA in Memory *((unsigned long long*) EA) = r31;
stw (Store Word)
Same as std but just with a 4 byte value instead of the 8 byte value
Parameters
1. A Register, which value will be stored to EA (Adress)
2. Value for EA calculation
3. Source Register for EA calculation
Our example
stw %r0, 0x34(%sp)
In C
unsigned long long EA = EAValue + sp; *((unsigned int*) EA) = r0;
sth (Store Half Word)
Like std and stw just 2 byte value. I will shorten this.
Our example
sth %r0, 0x34(%sp)
In C
unsigned long long EA = EAValue + sp; *((unsigned short*) EA) = r0;
stb (Store Byte)
You will get it by now…
Our example
stb %r10, 0x34(%r20)
In C
unsigned long long EA = EAValue + r20; *((char*) EA) = r10;
ld (Load Double Word)
The opposite to std it loads the value to the register instead of writing the value from the register to the memory
Parameters
1. A Register, which will get the value from EA (Adress)
2. Value for EA calculation
3. Source Register for EA calculation
Our example
ld %r11, 0x00(%sp)
In C
unsigned long long EA = EAValue + sp; r11 = *((unsigned long long*) EA);
lwz (Load Word Zero)
The opposite to stw it loads the value to the register instead of writing the value from the register to the memory.
The none used part of the register in this case the upper 32 bits are filled with 0.
Parameters
1. A Register, which will get the value from EA (Adress)
2. Value for EA calculation
3. Source Register for EA calculation
Our example
lwz %r11, 0x10(%r0)
In C
// EAValue is in the example 0x10 to clearify this again unsigned long long EA = EAValue + r0; r11 = 0; r11 += *((unsigned int*) EA);
lhz (Load Half Word Zero)
The opposite to stw it loads the value to the register instead of writing the value from the register to the memory.
The none used part of the register in this case the upper 48 bits are filled with 0.
Parameters
1. A Register, which will get the value from EA (Adress)
2. Value for EA calculation
3. Source Register for EA calculation
Our example
lhz %r11, 0x10(%r0)
In C
// EAValue is in the example 0x10 to clearify this again unsigned long long EA = EAValue + r0; r11 = 0; r11 += *((unsigned short*) EA);
lbz (Load Byte Zero)
The opposite to stw it loads the value to the register instead of writing the value from the register to the memory.
The none used part of the register in this case the upper 56 bits are filled with 0.
Parameters
1. A Register, which will get the value from EA (Adress)
2. Value for EA calculation
3. Source Register for EA calculation
Our example
lbz %r11, 0x10(%r0)
In C
// EAValue is in the example 0x10 to clearify this again unsigned long long EA = EAValue + r0; r11 = 0; r11 += *((char*) EA);
Now we get to the new instructions which actually does something new
mr (Move Register)
Copies the content from one register to another register
Parameters
1. Destination Register
2. Source Register
Our example
mr %r3, %r0
In C
r3 = r0;
rldicl (Rotate Left Immediate Double Word then Clear Left)
A complex one again…
The Register gets shifted in the Left direction by n bits and anded with a mask. Better look at the C Code.
Parameters
1. Destination Register
2. Source Register
3. Value, how many bits get rotated
4. A bit mask which will be used with and on the shifted Source Value.
Our example
rldicl %r9, %r11, 2, 32
In C
r9 = r11 << 2; // since our test Value is 32 the lower 32 bits are 1s of the mask. // 64-n higher bits gets cleared in our example 64-32 = 32 higher bits are 0 of the mask r9 &= 0b0000000000000000000000000000000011111111111111111111111111111111;
Another example
rldicl %r9, %r11, 8, 16
In C
r9 = r11 << 8; // since our test Value is 16 the lower 16 bits are 1s of the mask. // 64-n higher bits gets cleared in our example 64-16 = 48 higher bits are 0 of the mask r9 &= 0b0000000000000000000000000000000000000000000000001111111111111111;
addi (Add Immediate)
Adds a value to the content of the source register and saves the calculated value to the destination register.
Parameters
1. Destination Register
2. Source Register
3. Value
Our example
addi %r0, %r9, 8
In C
r0 = r9 + 8;
extsw (Extend Sign Word)
Copies the lower 32 bits of the source register to the destination register.
The higher 32 bits of the destination register will be set to the sign bit of the source register
Parameters
1. Destination Register
2. Source Register
Our example
extsw %r0, %r1
In C
r0 = r1 if(r1 & 0b10000000000000000000000000000000 != 0) r0 |= 0b1111111111111111111111111111111100000000000000000000000000000000; else r0 &= 0b0000000000000000000000000000000011111111111111111111111111111111;
b (Branch)
Simply jump to the code at the EA (IDA calculates this for you).
Parameters
1. EA
Our example
b loc_1079C
In C
There is no good way to 100% implement this is in C it is very let’s say context base.
//at the right code of course the label for the branch //loc_1079C: goto loc_1079C;
blr (Branch Link Register)
Branches to the Link Register… In a function this is the return;
.
The value of the return is stored in our CPU in the r3 register.
Our example
blr;
In C
return;
Another example
mr %r3, %r31 blr
In C
return r31;
cmpwi (Compare Word Immediate)
Parameters
1. Compare Register (Holds the compare results, Greater Than, Lower Than, Equal, and so on)
2. Register which content gets compared
3. Value which gets compared to the register content
Our example
cmpwi %cr7, %r0, 7
In C
There is no good way to 100% implement this is in C it is very let’s say context base. We will implement in C this with the given branch to give a proper example look at the bottom 2 ones.
bgt (Branch Greater Than)
Jumps to the code if the compare register has the greater than flag of the last comparison else code execution goes on with the code below the instruction.
Parameters
1. Compare Register (Holds the compare results, Greater Than, Lower Than, Equal, and so on)
2. EA of the code which gets executed if the compare is greater than else the code execution continues just with the next command below this one
Our example
cmpwi %cr7, %r0, 7 bgt %cr7, loc_1234
In C
if(r0 > 7)
{
// loc_1234's code is here
}
ble (Branch Lower or Equal)
Jumps to the code if the compare register has the lower than or equal flag of the last comparison else code execution goes on with the code below the instruction.
Parameters
1. Compare Register (Holds the compare results, Greater Than, Lower Than, Equal, and so on)
2. EA of the code which gets executed if the compare is lower than or equal else the code execution continues just with the next command below this one
Our example
cmpwi %cr7, %r0, 7 ble %cr7, loc_1234
In C
if(r0 <= 7)
{
// loc_1234's code is here
}
continue in part 3…
I will show complexer combinations of instructions in the example walkthrough. Hope this is helpful since it is just information, which is not shown in a complexer example for now.
Wait for part 3 all I can say and hope ^^.
Stay tuned,
KDSBest
Let me make one thing clear. You have to know how to develope Software in a language C/C++, C#, Java or anything like that, before you will fully understand this. If u know how to develope software or if you are just interested how Reverse Engineering works for personal interests go on.
What is Reverse Engineering?
If you create source code like this:
#include <stdio.h>
int main()
{
printf("TEST\n");
return 1;
}
The compiler u use will generate assembler instruction formed in an exe file or similar to execute it on your CPU. The CPU instructions are assembler instructions (in hex format instead of human readable called Opcode).
Reconstructing source code from an assembler listing is called reverse engineering. It is used in many ways.
Finding Exploits, developing Shellcodes, Hacking Consoles and understanding other software are just some scenarios where this is used.
How do I learn todo this process? Books?
As always start reading about some of your tools u will use. Basicly I recommend IDA (Interactive Debugger). It is by far the best Disassembler in this world. If you want to learn ppc reverse engineering you can read this series. You are free to link to this site.
What CPU do you show your examples?
Since the theory works on alot assembler languages (yeah there are different ones), I will still explain that I will show it on the example PPC 64-bit CPU like in the Cell Broadband Engine.
How do we simplify the understanding first?
In our C code we start using registers of the CPU as global variables defined like this:
register unsigned long long r3 __asm("r3");
How do I start?
I always start with translating every assembler instruction into C code right away, because I’m much familiar reading obfuscated C code instead of a wall of assembler code. I will cover examples and the first instruction conversions I find most in code… While converting the instructions to C you will notice some things right away, later. Then feel free to optimize the code right away. A simple example is this C code:
r3 = 0x12340000003DCBA9;
This simple assignement works in 4 steps:
lis %r3, 0x1234 rldicr %r3, %r3, 32 oris %r3, %r3, 0x3D ori %r3, %r3, 0xCBA9
Going Deeper
First I provide u the first direct translations to C code… The bottom format will be used the whole series
lis instruction (Load Immediate Shifted)
This instruction is used to load a value to the bits 16 – 31 (0 is the lowest bit, 63 is the highest bit).
Parameters
1. register, which will be set (The first register is always the destination register)
2. Value, which will be the value (The value only has a limited size. Because of the size limit setting all 64-bit on an register is that complicated…)
Our example
lis %r3, 0x1234
In C
r3 = 0x12340000;
Pretty simple heh? Next instructions…
rldicr (Rotate Left Double Word Immediate then Clear Right)
This instruction rotats bitwise to the left direction and fills the right bits with 0.
Simple Example
You got r3 = 2; (2 is in binary 10). If u rotate left with 2 bits the new value would be 8 (8 is in binary 1000).
Parameters
1. Register, like always the destination register
2. Register, the value which will be used for the rotation (src and destination don’t have to match, I will give an example later)
3. Value, the bits that get rotated/shifted
Other Example
r4 has the value 2. r5 should get the value 8. The following code should do the trick. r4 will stay with the value 2 and is not modified.
rldicr %r5, %r4, 2
Our Example
rldicr %r3, %r3, 32
In C
r3 = r3 << 32;
Since source and destination register match the shorter way.
r3 <<= 32;
ori (OR Immediate)
A simple or operation on the lower bits 0-15.
Parameters
1. Register, like always the destination register
2. Register, the value which will be used for the or operation (src and destination don’t have to match)
3. Value which will be used for the or operation
Our Example
ori %r3, %r3, 0xCBA9
In C
r3 = r3 | 0xCBA9;
Since source and destination register match the shorter way.
r3 |= 0xCBA9;
Note: I showed ori before oris because understanding ori is easier and they are basicly the same.
oris (OR Immediate Shifted)
A simple or operation on the bits 16-31.
Parameters
1. Register, like always the destination register
2. Register, the value which will be used for the or operation (src and destination don’t have to match)
3. Value which will be used for the or operation (just add 4 “0000″ to the hex value of an ori and an oris is like an ori, the C code will show it)
Our Example
oris %r3, %r3, 0x3D
In C
r3 = r3 | 0x3D0000;
Since source and destination register match the shorter way.
r3 |= 0x3D0000;
The Example:
The way we got it.
lis %r3, 0x1234 rldicr %r3, %r3, 32 oris %r3, %r3, 0x3D ori %r3, %r3, 0xCBA9
Now just copy the C translation and fill in the right values and registers u will got this.
In C
r3 = 0x12340000; r3 <<= 32; r3 |= 0x3D0000; r3 |= 0xCBA9;
Simplify the code:
r3 = 0x12340000; r3 <<= 32;
Since the left shift only adds 4 bytes of 0 bits to the right side of the value the result will be like this:
r3 = 0x1234000000000000;
Next part…
r3 = 0x1234000000000000; r3 |= 0x3D0000;
Since u know we OR just with zeros u can simply put the value in there.
r3 = 0x12340000003D0000;
For the next OR it is the same.
r3 = 0x12340000003D0000; r3 |= 0xCBA9;
Which will become…
r3 = 0x12340000003DCBA9;
Last word and the next part
First of all, all instructions can be simply replaced with one liners of C code or two liners. It’s just like that and understanding C code or similar is a normal programming language.
The big problem while reversing is to gather start information and this will come with experience.
Those for lines were a common example, but like in software engineering small examples are easy todo. The big picture is the troublesome. We need to start somewhere or am I wrong. The next part will be showing some stack operations. So if you want to prepare yourself a bit, learn what a stack is and how local function variables are stored in it.
Stay tuned,
KDSBest
IMPORTANT: If u got any question I will always answer them on twitter (the fastest way to get intouch with me)… I am a nice guy don’t fear me
. There are no dumb questions. I try to answer them all…
https://twitter.com/KDSBest
I changed the image size of the XNA Pictures, because they were way too big.
The game progress is slow because the whole team is playing swtor alot
. We still managed to change some things. For example we have a background effect build in and we changed alot on the survival mode. Now not just all enemies spawns at once, they spawn time after time.
The team will discuss some things about what we implement next and what we will implement till release.
As soon as I spoke to the team I will get back to you.
Just wanted to inform you the project is fuckin’ far from dead
.
Stay tuned
KDSBest
We have added a new Mode “Survival Mode” (I guess I don’t have to talk much about it):
And we added a Shield, which can be activated to save you from bad positions.
We just have to create a Multiplayer and “Super”/”Special” Weapons. After that we just have to create the Levels and Graphics, because the features are nearly all finish.
Stay tuned
KDSBest
My friend (Pizzabroetchen) and me are creating a 2D Scroll Shooter XNA Game. The Manga figure talking is made by my sister and me. Currently we implemented a PoC of most of the features and soon we only have to merge and make everything more beautiful, because we are just using dummy pictures at the moment.
To show you what we are workin’ on and that I am still alive I will post some pictues. Remember these are just PoC and not the end result, because we are missing alot of images and so on.
I don’t want to spoiler too much so screens for you:
Pizzabroetchen and me started alot of Game Engines, Games, PoC and so on, but this we want to finish. Just some smaller features are missing than we are creating Content (Levels and Bosses and so on, interfaces are already there). We need to add some more effects and images alot of images. Since we got no xna creators club membership at the moment, the gamertag is always GamerTag at the Highscore, we can’t read the real GamerTag without that membership. Soon we will get the membership to test on Xbox360 too.
Stay tuned
KDSBest
My Server got hacked and I had to reinstall everything.
The Blog is up and running again, but I didn’t restore the comments.
I really want to thank Web-Developer Alo, because he helped me out with a notebook and internet to restore everything as fast as possible, because i had no computer at home that time. His Blog is at www.web-developer-alo.com.
Stay tuned
KDSBest

What i have done so far?
I read through books and bachelor/master thesises till i felt comfortable with everything. I even implemented a 3D impulsed based Engine. The 3D engine is based on Game Physics Engine Development by Ian Millington. The engine has problems if you want to extend the engine with joints and resting contacts (you don’t have to understand that it will be explained in the next parts).
The engine i build is based on Farseer Physics…
I can’t find a developer friendly physic engine theory explanation. So i read through alot math thingys till i got a basic understanding. Now i read physic engine codes and reverse them to build “my own” engine. This process is documented here. Once in a week or even more frequently if a special topic needs distinct explanation.
We build the basics and extend the engine till we got a full rigid body engine.
In the game loop before the rendering we do a world simulation with the given elapsed time. The world simulation has the following structure (based on Farseer):
* Things will be added later, because it’s just performance boost or features which will prevent the basic understanding.
What shapes do we support?
I try to seperate the Collision and Physic Engine so you could add your shapes. The first shape i use are Boxes. I don’t want spheres because:
What’s next?
The first 3 points of the Physic Engine Loop: Cheap Rendering, Management of Body Adding and Removing, Integrating Gravity and Damping, Integrate new Position/Rotation and Collision Detection
Cheap Rendering
We just draw boxes with orientation and position… Easy
Management of Body Adding and Removing
Short code to make it thread safer
Integrating Gravity and Damping
Very short code just basic formula
Integrate new Position/Rotation
Just basic formula
Collision Detection
This will be the main topic. Since we are in 2D Space not that hard ^^

What can we learn from the market?
There are alot open source engines and some free to use propertary engines. Read and lern
.
Engines i recommend to take a look at:
You can study the code, but i didn’t do that. I bought a book, that teaches me the missing knowledge. I used Farseer and PhysX in alot projects of mine so i knew the documentation pretty well. I recommend before writting any bigger “engine” look at the documentation of other engines. This helps you design interfaces and so on.
Parts of the Physic Engine?
Collision Detection
This topic is as big as the physic engine itself. The engine i created has an interface for collision data. It is possible for you to create your own collision detection part and use that interface to feed the physic engine part.
What will we implement?
The Bounding Spheres keep the polygon-polygon intersection tests low!
Facts Collision Detection Part of mine
It is slow
The algorithms can be improved alot.
Improvement example 1:
You can implement Bounding AABB (Axis-Aligned Bounding Box) or better OBB (Oriented Bounding Box) instead of Bounding Spheres. They fit in the polygon far more accurate than Boundins Spheres. The collision detection part is not my passion todo so i kept it simple.
Improvement example 2:
You can implement the Sweep Line Algorithm to remove sensless checking of all Polygon Edges… In the next parts i explain the algorithms more in detail so you might get a better understanding what i am talking about.
Not every collision gets detected
Let’s pretend we have 2 Polygons. We call them PolygonA and PolygonB. If PolygonB is completly in PolygonA so the edges doesn’t touch then we can’t detect the collision. It is not that important for me because this should never happen
. You can easily detect this. Just sum the angles from every vector from a point in PolygonA… If they are 360 degree the point is inside PolygonB. If that is true for all points of PolygonA the Polygon is completly in PolygonB. In 3D spaces more complicated scenarious can fail. I use this engine for 2D things so its ok for me.
Physic simulation
I implemented a Rigid Body Physic Engine. Forces and Impulses are used to simulate the physic. I recommend to look at a Particle Physic Engine first. Particle Physic Engines only simulate position changes and not Rotations. You can extend this engine with Rotation later. This is the way i did it. Force Generators are used to simulate Springs and so on. I am not completly finished with the engine yet (The reason why here is no source). Joints are missing for example. Sleep is missing too at the moment. Sleep improves the performance of the engine alot.
Want to learn for the next Series
Here is a list of things you can take a look at. You don’t have to learn. I try to explain everything in detail but that makes it easier to understand. If you find a mistake or some improvements. Just give me feedback
:
Next Series Part will start with the collision detection.

Hi,
your phone must be rooted else you don’t have the permission todo such “evil” things.
Why write your own and don’t use the apps out there?
I only found 1 App that is able todo this. (GameCIH) It is a pretty nice app, has more features than mine has, because i just wrote a PoC. Someone else can pick up my work say thanks to me give me a little credit and implement all the features. Should be easy with my code. There are big problems with GameCIH i explain them now.
Problems of GameCIH
GameCIH Not supported in Lite Version
This happens if the memory you want to manipulate is not accessable through pread on “/proc/<pid>/mem”. I have to say a nice restriction for a lite version. You can test it but mostly you want to modify memory in this region. My app has a restriction too. I don’t search the whole memory it is easy patchable in the source. I made it for performance issues. The code is far from perfect just use it as PoC like i titled it
. Maybe only god knows i bring a clean version with more features than GameCIH… My time is short…
Now the nerdy part
I try to show only the source i need to. The whole source is compilable as attachment
.
I introduced a Class that make executing code as root possible (NDK crafted executeables) and a root shell code. The whole thing is based on this: http://www.kdsbest.com/?p=192
The java side is basic Android GUI development i don’t discuss this here. RTFM
.
The C Code…
Let’s start with the main function. We support 2 Modes. Search in and Poke to Memory. I know the code is not performance tuned and you will see i match the memory in the GUI which is highly inefficient, but i really didn’t give a fuck. This is just to show you guys the “Behind the Scene” of apps like GameCIH.
Basicly it works like this:
Attach to Process… Process now is a child Process of our Process… Suspend Child Process… Do Shit (Search or Poke)… Resume Child Process… Free Child Process so it is without our parent Process
int main(int argc, char **argv) {
if (argc == 4) {
int pid = atoi(argv[1]);
int number = atoi(argv[2]);
if (strcmp(argv[3], "all") == 0) {
ptrace(PTRACE_ATTACH, pid, NULL, NULL);
wait(NULL);
searchAll(pid, number);
ptrace(PTRACE_CONT, pid, NULL, NULL);
ptrace(PTRACE_DETACH, pid, NULL, NULL);
} else {
ptrace(PTRACE_ATTACH, pid, NULL, NULL);
wait(NULL);
long add;
sscanf(argv[3], "0x%x", &add);
ptrace(PTRACE_POKEDATA, pid, add, number);
printf("Poke at 0x%x (%d): %d", add, add, number);
ptrace(PTRACE_CONT, pid, NULL, NULL);
ptrace(PTRACE_DETACH, pid, NULL, NULL);
}
}
}
The Poke should be clear and easy for everyone to understand. The search with PTRACE_PEEKDATA is highly inefficient cause it just gives you 4 bytes of the memory and you have to traverse it. You do alot function calls and loops which makes this slow as hell. Android will think your app is crashed if you do this. But we are lucky there is a “/proc/<pid>/mem” file which is basicly the remote memory. Higher Regions are not readable through this (i didn’t manage it), so we fallback to the PTRACE_PEEKDATA version. You don’t need to fear we got access to the whole memory “/proc/<pid>/maps” offers.
“/proc/<pid>/maps” shows you which memory regions are allocated
“/proc/<pid>/mem” is the remote memory
So we got 2 remote memory read methods:
The fast one but can fail:
ssize_t readregion(void *buf, pid_t pid, long start, long end, size_t max) {
ssize_t len;
char mem[32];
int fdMEM;
snprintf(mem, sizeof(mem), "/proc/%d/mem", pid);
if ((fdMEM = open(mem, O_RDONLY)) == -1) {
return -1;
}
if (end - start < max)
max = end - start;
len = pread(fdMEM, buf, max, start);
close(fdMEM);
return len;
}
The slow one but never fails:
ssize_t readregionpeek(long*buf, pid_t pid, long start, long end, size_t max) {
if (end - start < max)
max = end - start;
int i, imax;
imax = max / 4;
for (i = 0; i < imax; i++) {
buf[i] = ptrace(PTRACE_PEEKDATA, pid, start + (i * 4), NULL);
}
return max;
}
Got a question?
http://www.twitter.com/KDSBest
Source Download
APK Download