From 8ef018a2cb93dc1121fe0787a6f1a4019b5f9dec Mon Sep 17 00:00:00 2001 From: JC Yang Date: Mon, 21 Dec 2015 18:01:01 +0800 Subject: [PATCH 1/2] Fix possible wrong conditional branch in generated host code by fixing the tcg_liveness_analysis(). Refer to https://github.com/unicorn-engine/unicorn/issues/287 for further info. --- qemu/aarch64.h | 1 + qemu/arm.h | 1 + qemu/header_gen.py | 1 + qemu/m68k.h | 1 + qemu/mips.h | 1 + qemu/mips64.h | 1 + qemu/mips64el.h | 1 + qemu/mipsel.h | 1 + qemu/sparc.h | 1 + qemu/sparc64.h | 1 + qemu/tcg/tcg.c | 24 ++++++++++++++++++++++-- qemu/x86_64.h | 1 + 12 files changed, 33 insertions(+), 2 deletions(-) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index b919a355..fad1d153 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -2945,6 +2945,7 @@ #define tcg_init tcg_init_aarch64 #define tcg_invert_cond tcg_invert_cond_aarch64 #define tcg_la_bb_end tcg_la_bb_end_aarch64 +#define tcg_la_br_end tcg_la_br_end_aarch64 #define tcg_la_func_end tcg_la_func_end_aarch64 #define tcg_liveness_analysis tcg_liveness_analysis_aarch64 #define tcg_malloc tcg_malloc_aarch64 diff --git a/qemu/arm.h b/qemu/arm.h index bf858302..88617c02 100644 --- a/qemu/arm.h +++ b/qemu/arm.h @@ -2945,6 +2945,7 @@ #define tcg_init tcg_init_arm #define tcg_invert_cond tcg_invert_cond_arm #define tcg_la_bb_end tcg_la_bb_end_arm +#define tcg_la_br_end tcg_la_br_end_arm #define tcg_la_func_end tcg_la_func_end_arm #define tcg_liveness_analysis tcg_liveness_analysis_arm #define tcg_malloc tcg_malloc_arm diff --git a/qemu/header_gen.py b/qemu/header_gen.py index 021f8a65..ed74332d 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -2951,6 +2951,7 @@ symbols = ( 'tcg_init', 'tcg_invert_cond', 'tcg_la_bb_end', + 'tcg_la_br_end', 'tcg_la_func_end', 'tcg_liveness_analysis', 'tcg_malloc', diff --git a/qemu/m68k.h b/qemu/m68k.h index ceb5ca47..8314001b 100644 --- a/qemu/m68k.h +++ b/qemu/m68k.h @@ -2945,6 +2945,7 @@ #define tcg_init tcg_init_m68k #define tcg_invert_cond tcg_invert_cond_m68k #define tcg_la_bb_end tcg_la_bb_end_m68k +#define tcg_la_br_end tcg_la_br_end_m68k #define tcg_la_func_end tcg_la_func_end_m68k #define tcg_liveness_analysis tcg_liveness_analysis_m68k #define tcg_malloc tcg_malloc_m68k diff --git a/qemu/mips.h b/qemu/mips.h index 3c789c04..75269fb8 100644 --- a/qemu/mips.h +++ b/qemu/mips.h @@ -2945,6 +2945,7 @@ #define tcg_init tcg_init_mips #define tcg_invert_cond tcg_invert_cond_mips #define tcg_la_bb_end tcg_la_bb_end_mips +#define tcg_la_br_end tcg_la_br_end_mips #define tcg_la_func_end tcg_la_func_end_mips #define tcg_liveness_analysis tcg_liveness_analysis_mips #define tcg_malloc tcg_malloc_mips diff --git a/qemu/mips64.h b/qemu/mips64.h index 8fb5418e..b5c2908e 100644 --- a/qemu/mips64.h +++ b/qemu/mips64.h @@ -2945,6 +2945,7 @@ #define tcg_init tcg_init_mips64 #define tcg_invert_cond tcg_invert_cond_mips64 #define tcg_la_bb_end tcg_la_bb_end_mips64 +#define tcg_la_br_end tcg_la_br_end_mips64 #define tcg_la_func_end tcg_la_func_end_mips64 #define tcg_liveness_analysis tcg_liveness_analysis_mips64 #define tcg_malloc tcg_malloc_mips64 diff --git a/qemu/mips64el.h b/qemu/mips64el.h index 8a668325..522f998c 100644 --- a/qemu/mips64el.h +++ b/qemu/mips64el.h @@ -2945,6 +2945,7 @@ #define tcg_init tcg_init_mips64el #define tcg_invert_cond tcg_invert_cond_mips64el #define tcg_la_bb_end tcg_la_bb_end_mips64el +#define tcg_la_br_end tcg_la_br_end_mips64el #define tcg_la_func_end tcg_la_func_end_mips64el #define tcg_liveness_analysis tcg_liveness_analysis_mips64el #define tcg_malloc tcg_malloc_mips64el diff --git a/qemu/mipsel.h b/qemu/mipsel.h index 0def605f..26398a30 100644 --- a/qemu/mipsel.h +++ b/qemu/mipsel.h @@ -2945,6 +2945,7 @@ #define tcg_init tcg_init_mipsel #define tcg_invert_cond tcg_invert_cond_mipsel #define tcg_la_bb_end tcg_la_bb_end_mipsel +#define tcg_la_br_end tcg_la_br_end_mipsel #define tcg_la_func_end tcg_la_func_end_mipsel #define tcg_liveness_analysis tcg_liveness_analysis_mipsel #define tcg_malloc tcg_malloc_mipsel diff --git a/qemu/sparc.h b/qemu/sparc.h index 59a835f6..6a5cb27e 100644 --- a/qemu/sparc.h +++ b/qemu/sparc.h @@ -2945,6 +2945,7 @@ #define tcg_init tcg_init_sparc #define tcg_invert_cond tcg_invert_cond_sparc #define tcg_la_bb_end tcg_la_bb_end_sparc +#define tcg_la_br_end tcg_la_br_end_sparc #define tcg_la_func_end tcg_la_func_end_sparc #define tcg_liveness_analysis tcg_liveness_analysis_sparc #define tcg_malloc tcg_malloc_sparc diff --git a/qemu/sparc64.h b/qemu/sparc64.h index 043736b6..e67bbf00 100644 --- a/qemu/sparc64.h +++ b/qemu/sparc64.h @@ -2945,6 +2945,7 @@ #define tcg_init tcg_init_sparc64 #define tcg_invert_cond tcg_invert_cond_sparc64 #define tcg_la_bb_end tcg_la_bb_end_sparc64 +#define tcg_la_br_end tcg_la_br_end_sparc64 #define tcg_la_func_end tcg_la_func_end_sparc64 #define tcg_liveness_analysis tcg_liveness_analysis_sparc64 #define tcg_malloc tcg_malloc_sparc64 diff --git a/qemu/tcg/tcg.c b/qemu/tcg/tcg.c index abdcb9e4..c55878bc 100644 --- a/qemu/tcg/tcg.c +++ b/qemu/tcg/tcg.c @@ -1445,6 +1445,18 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps, } } +/* + Unicorn: for brcond, we should refresh liveness states for TCG globals +*/ +static inline void tcg_la_br_end(TCGContext *s, uint8_t *mem_temps) +{ + int i; + memset(mem_temps, 1, s->nb_globals); + for(i = s->nb_globals; i < s->nb_temps; i++) { + mem_temps[i] = s->temps[i].temp_local; + } +} + /* Liveness analysis : update the opc_dead_args array to tell if a given input arguments is dead. Instructions updating dead temporaries are removed. */ @@ -1682,12 +1694,20 @@ static void tcg_liveness_analysis(TCGContext *s) } /* if end of basic block, update */ - if (def->flags & TCG_OPF_BB_END && op != INDEX_op_brcond_i32) { + if (def->flags & TCG_OPF_BB_END) { // Unicorn: do not optimize dead temps on brcond, // this causes problem because check_exit_request() inserts // brcond instruction in the middle of the TB, // which incorrectly flags end-of-block - tcg_la_bb_end(s, dead_temps, mem_temps); + if (op != INDEX_op_brcond_i32) + tcg_la_bb_end(s, dead_temps, mem_temps); + // Unicorn: we do not touch dead temps for brcond, + // but we should refresh TCG globals In-Memory states, + // otherwise, important CPU states(especially conditional flags) might be forgotten, + // result in wrongly generated host code that run into wrong branch. + // Refer to https://github.com/unicorn-engine/unicorn/issues/287 for further information + else + tcg_la_br_end(s, mem_temps); } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { /* globals should be synced to memory */ memset(mem_temps, 1, s->nb_globals); diff --git a/qemu/x86_64.h b/qemu/x86_64.h index b2656541..57b72a98 100644 --- a/qemu/x86_64.h +++ b/qemu/x86_64.h @@ -2945,6 +2945,7 @@ #define tcg_init tcg_init_x86_64 #define tcg_invert_cond tcg_invert_cond_x86_64 #define tcg_la_bb_end tcg_la_bb_end_x86_64 +#define tcg_la_br_end tcg_la_br_end_x86_64 #define tcg_la_func_end tcg_la_func_end_x86_64 #define tcg_liveness_analysis tcg_liveness_analysis_x86_64 #define tcg_malloc tcg_malloc_x86_64 From 5be70eb91b4812b2e55c50c8c0797ba40857bf5c Mon Sep 17 00:00:00 2001 From: JC Yang Date: Tue, 22 Dec 2015 15:25:36 +0800 Subject: [PATCH 2/2] Add test case for issue 287. --- .../tcg_liveness_analysis_bug_issue-287.py | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 tests/regress/tcg_liveness_analysis_bug_issue-287.py diff --git a/tests/regress/tcg_liveness_analysis_bug_issue-287.py b/tests/regress/tcg_liveness_analysis_bug_issue-287.py new file mode 100644 index 00000000..4310a8cb --- /dev/null +++ b/tests/regress/tcg_liveness_analysis_bug_issue-287.py @@ -0,0 +1,134 @@ +from unicorn import * +from unicorn.arm_const import * +import binascii + +MB = 1024 * 1024 +PAGE = 4 * 1024 + +def PrintArmRegisters(uc_emu): + print 'R0 : '+hex(uc_emu.reg_read(UC_ARM_REG_R0)) + print 'R1 : '+hex(uc_emu.reg_read(UC_ARM_REG_R1)) + print 'R2 : '+hex(uc_emu.reg_read(UC_ARM_REG_R2)) + print 'R3 : '+hex(uc_emu.reg_read(UC_ARM_REG_R3)) + print 'R4 : '+hex(uc_emu.reg_read(UC_ARM_REG_R4)) + print 'R5 : '+hex(uc_emu.reg_read(UC_ARM_REG_R5)) + print 'R6 : '+hex(uc_emu.reg_read(UC_ARM_REG_R6)) + print 'R7 : '+hex(uc_emu.reg_read(UC_ARM_REG_R7)) + print 'R8 : '+hex(uc_emu.reg_read(UC_ARM_REG_R8)) + print 'R9 : '+hex(uc_emu.reg_read(UC_ARM_REG_R9)) + print 'R10 : '+hex(uc_emu.reg_read(UC_ARM_REG_R10)) + print 'R11 : '+hex(uc_emu.reg_read(UC_ARM_REG_R11)) + print 'R12 : '+hex(uc_emu.reg_read(UC_ARM_REG_R12)) + print 'SP : '+hex(uc_emu.reg_read(UC_ARM_REG_SP)) + print 'LR : '+hex(uc_emu.reg_read(UC_ARM_REG_LR)) + print 'PC : '+hex(uc_emu.reg_read(UC_ARM_REG_PC)) + flags = uc_emu.reg_read(UC_ARM_REG_CPSR) + print 'carry : '+str(flags >> 29 & 0x1) + print 'overflow : '+str(flags >> 28 & 0x1) + print 'negative : '+str(flags >> 31 & 0x1) + print 'zero : '+str(flags >> 30 & 0x1) + +''' + issue #287 + Initial Register States: R0=3, R1=24, R2=16, R3=0 + ----- code start ----- + CMP R0,R1,LSR#3 + SUBCS R0,R0,R1,LSR#3 # CPU flags got changed in these two instructions, and *REMEMBERED*, now NF == VF == 0 + CMP R0,#1 # CPU flags changed again, now NF == 1, VF == 0, but they are not properly *REMEMBERED* + MOV R1,R1,LSR#4 + SUBGES R2,R2,#4 # according to the result of CMP, we should skip this op + + MOVGE R3,#100 # since changed flags are not *REMEMBERED* in CMP, now NF == VF == 0, which result in wrong branch + # at the end of this code block, should R3 == 0 + ----- code end ------ + + # TCG ops are correct, plain op translation is done correctly, + # but there're In-Memory bits invisible from ops that control the host code generation. + # all these codes are in one TCG translation-block, so wrong things could happen. + # detail explanation is given on the right side. + # remember, both set_label and brcond are point to refresh the dead_temps and mem_temps states in TCG + ----- TCG ops ------ + ld_i32 tmp5,env,$0xfffffffffffffff4 + movi_i32 tmp6,$0x0 + brcond_i32 tmp5,tmp6,ne,$0x0 + mov_i32 tmp5,r1 ------------------------- + movi_i32 tmp6,$0x3 | + shr_i32 tmp5,r1,tmp6 | + mov_i32 tmp6,r0 | + sub_i32 NF,r0,tmp5 | + mov_i32 ZF,NF | + setcond_i32 CF,r0,tmp5,geu | # This part is "CMP R0,R1,LSR#3" + xor_i32 VF,NF,r0 |-----> # and "SUBCS R0,R0,R1,LSR#3" + xor_i32 tmp7,r0,tmp5 | # the last op in this block, set_label get a chance to refresh the TCG globals memory states, + and_i32 VF,VF,tmp7 | # so things get back to normal states + mov_i32 tmp6,NF | # these codes are not affected by the bug. Let's called this Part-D + movi_i32 tmp5,$0x0 | + brcond_i32 CF,tmp5,eq,$0x1 | + mov_i32 tmp5,r1 | + movi_i32 tmp6,$0x3 | + shr_i32 tmp5,r1,tmp6 | + mov_i32 tmp6,r0 | + sub_i32 tmp6,r0,tmp5 | + mov_i32 r0,tmp6 | + set_label $0x1 ------------------------- + movi_i32 tmp5,$0x1 ----------------- # Let's called this Part-C + mov_i32 tmp6,r0 | # NF is used as output operand again! + sub_i32 NF,r0,tmp5 ----------------|-----> # but it is stated as Not-In-Memory, + mov_i32 ZF,NF | # no need to sync it after calculation. + setcond_i32 CF,r0,tmp5,geu | # the generated host code does not write NF + xor_i32 VF,NF,r0 | # back to its memory location, hence forgot. And the CPU flags after this calculation is not changed. + xor_i32 tmp7,r0,tmp5 | # Caution: the following SUBGES's condition check is right, even though the generated host code does not *REMEMBER* NF, it will cache the calculated result and serve SUBGES correctly + and_i32 VF,VF,tmp7 | + mov_i32 tmp6,NF | + mov_i32 tmp5,r1 | # this part is "CMP R0,#1" + movi_i32 tmp6,$0x4 | # and "MOV R1,R1,LSR#4" + shr_i32 tmp5,r1,tmp6 | # and "SUBGES R2,R2,#4" + mov_i32 r1,tmp5 |-----> # This is the part where problem start to arise + xor_i32 tmp5,VF,NF | + movi_i32 tmp6,$0x0 | + brcond_i32 tmp5,tmp6,lt,$0x2 --------|-----> # QEMU will refresh the InMemory bit for TCG globals here, but Unicorn won't + movi_i32 tmp5,$0x4 | + mov_i32 tmp6,r2 | # this is the 1st bug-related op get analyzed. + sub_i32 NF,r2,tmp5 ----------------|-----> # here, NF is an output operand, it's flagged dead + mov_i32 ZF,NF | # and the InMemory bit is clear, tell the previous(above) ops + setcond_i32 CF,r2,tmp5,geu | # if it is used as output operand again, do not sync it + xor_i32 VF,NF,r2 | # so the generated host-code for previous ops will not write it back to Memory + xor_i32 tmp7,r2,tmp5 | # Caution: the CPU flags after this calculation is also right, because the set_label is a point of refresh, make them *REMEMBERED* + and_i32 VF,VF,tmp7 | # Let's call this Part-B + mov_i32 tmp6,NF | + mov_i32 r2,ZF | + set_label $0x2 ----------------- + xor_i32 tmp5,VF,NF ----------------- + movi_i32 tmp6,$0x0 | + brcond_i32 tmp5,tmp6,lt,$0x3 | # Let's call this Part-A + movi_i32 tmp5,$0x64 | # if Part-B is not skipped, this part won't go wrong, because we'll check the CPU flags as the result of Part-B, it's *REMEMBERED* + movi_i32 r3,$0x64 |-----> # but if Part-B is skipped, + set_label $0x3 | # what should we expected? we will check the condition based on the result of Part-D!!! + call wfi,$0x0,$0,env | # because result of Part-C is lost. this is why things go wrong. + set_label $0x0 | + exit_tb $0x7f6401714013 ----------------- + ########### + ----- TCG ends ------ +''' + +TestCode = b'\xa1\x01\x50\xe1\xa1\x01\x40\x20\x01\x00\x50\xe3\x21\x12\xa0\xe1\x04\x20\x52\xa2\x64\x30\xa0\xa3' + +def UseUcToEmulate(): + try: + uc_emu = Uc(UC_ARCH_ARM, UC_MODE_ARM) + #if LoadCode(uc_emu, 2*MB, 0x9004): + uc_emu.mem_map(0, 2*MB) + uc_emu.reg_write(UC_ARM_REG_SP, 0x40000) + uc_emu.reg_write(UC_ARM_REG_R0, 3) + uc_emu.reg_write(UC_ARM_REG_R1, 24) + uc_emu.reg_write(UC_ARM_REG_R2, 16) + uc_emu.mem_write(0, TestCode) + uc_emu.emu_start(0, 24) + PrintArmRegisters(uc_emu) + + except UcError as e: + print("ERROR: %s" % e) + PrintArmRegisters(uc_emu) + + +UseUcToEmulate()