diff --git a/IR/.vscode/launch.json b/IR/.vscode/launch.json new file mode 100644 index 00000000..2f528563 --- /dev/null +++ b/IR/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Debug", + "program": "${workspaceFolder}/build/read", + "args": ["tests/loop1.o"], + "cwd": "${workspaceFolder}" + } + ] +} diff --git a/IR/CMakeLists.txt b/IR/CMakeLists.txt index fe62baba..04b3eb5b 100644 --- a/IR/CMakeLists.txt +++ b/IR/CMakeLists.txt @@ -15,13 +15,20 @@ add_executable( passes/add_counter_pass.c passes/add_constraint_pass.c passes/cut_bb_pass.c - aux/add_stack_offset.c + passes/end_bb.c aux/live_variable.c aux/prog_check.c aux/eliminate_ssa.c aux/conflict_analysis.c aux/graph_coloring.c aux/explicit_reg.c + aux/coaleasing.c + aux/spill.c + aux/translate.c + aux/stack_alloc.c + aux/normalize.c + aux/fix_bb_succ.c + aux/relocate.c ir_code_gen.c ) diff --git a/IR/Readme.md b/IR/Readme.md index b2a6e5cc..a12aad09 100644 --- a/IR/Readme.md +++ b/IR/Readme.md @@ -8,6 +8,12 @@ To start with, a simple constaint would be "range constraint", meaning a registe One opinion, one benefit of designing the raw constraint from is that our runtime-check system will not depend heavily on the current linux verifier and will be portable to other verifiers. +## Roadmap + +- [x] Register spilling +- [x] Caller-saved/callee-saved register +- [ ] Translation + # TODO - More instructions diff --git a/IR/array.c b/IR/array.c index f74bbcd7..5158c71d 100644 --- a/IR/array.c +++ b/IR/array.c @@ -86,3 +86,10 @@ void array_free(struct array *arr) { __free(arr->data); *arr = array_null(); } + +void *array_get_void(struct array *arr, size_t idx) { + if (idx >= arr->num_elem) { + return NULL; + } + return (char *)(arr->data) + arr->elem_size * idx; +} diff --git a/IR/aux/coaleasing.c b/IR/aux/coaleasing.c new file mode 100644 index 00000000..672533f3 --- /dev/null +++ b/IR/aux/coaleasing.c @@ -0,0 +1,30 @@ +#include +#include "bpf_ir.h" +#include "code_gen.h" +#include "dbg.h" +#include "ir_fun.h" +#include "ir_insn.h" +#include "list.h" + +void coaleasing(struct ir_function *fun) { + struct ir_basic_block **pos; + // For each BB + array_for(pos, fun->reachable_bbs) { + struct ir_basic_block *bb = *pos; + struct ir_insn *pos2, *tmp; + // For each operation + list_for_each_entry_safe(pos2, tmp, &bb->ir_insn_head, list_ptr) { + struct ir_insn *insn_dst = dst(pos2); + if (pos2->op == IR_INSN_ASSIGN) { + if (pos2->values[0].type == IR_VALUE_INSN) { + struct ir_insn *src = pos2->values[0].data.insn_d; + DBGASSERT(src == dst(src)); + if (insn_cg(src)->alloc_reg == insn_cg(insn_dst)->alloc_reg) { + // Remove + erase_insn_raw(pos2); + } + } + } + } + } +} diff --git a/IR/aux/conflict_analysis.c b/IR/aux/conflict_analysis.c index 3a7727f3..4f3e748c 100644 --- a/IR/aux/conflict_analysis.c +++ b/IR/aux/conflict_analysis.c @@ -1,3 +1,4 @@ +#include #include #include "array.h" #include "bpf_ir.h" @@ -27,35 +28,50 @@ void print_interference_graph(struct ir_function *fun) { struct ir_insn **pos; array_for(pos, fun->cg_info.all_var) { struct ir_insn *insn = *pos; + if (insn->op == IR_INSN_REG) { + CRITICAL("Pre-colored register should not be in all_var"); + } if (!is_final(insn)) { // Not final value, give up CRITICAL("Not Final Value!"); } struct ir_insn_cg_extra *extra = insn_cg(insn); if (extra->allocated) { + // Allocated VR printf("%%%zu(", insn->_insn_id); if (extra->spilled) { printf("sp-%zu", extra->spilled * 8); } else { printf("r%u", extra->alloc_reg); } - printf("): "); + printf("):"); } else { - printf("%%%zu: ", insn->_insn_id); + // Pre-colored registers or unallocated VR + print_insn_ptr_base(insn); + printf(":"); } struct ir_insn **pos2; array_for(pos2, insn_cg(insn)->adj) { struct ir_insn *adj_insn = *pos2; - if (!is_final(insn)) { + if (!is_final(adj_insn)) { // Not final value, give up CRITICAL("Not Final Value!"); } - printf("%%%zu, ", adj_insn->_insn_id); + printf(" "); + print_insn_ptr_base(adj_insn); } printf("\n"); } } +void caller_constraint(struct ir_function *fun, struct ir_insn *insn) { + for (__u8 i = BPF_REG_0; i < BPF_REG_6; ++i) { + // R0-R5 are caller saved register + DBGASSERT(fun->cg_info.regs[i] == dst(fun->cg_info.regs[i])); + build_conflict(fun->cg_info.regs[i], insn); + } +} + void conflict_analysis(struct ir_function *fun) { // Basic conflict: // For every x in KILL set, x is conflict with every element in OUT set. @@ -67,12 +83,30 @@ void conflict_analysis(struct ir_function *fun) { struct ir_insn *insn; // For each operation list_for_each_entry(insn, &bb->ir_insn_head, list_ptr) { - struct ir_insn **pos2; struct ir_insn_cg_extra *insn_cg = insn->user_data; + if (insn->op == IR_INSN_CALL) { + // Add caller saved register constraints + struct ir_insn **pos2; + array_for(pos2, insn_cg->in) { + DBGASSERT(*pos2 == dst(*pos2)); + struct ir_insn **pos3; + array_for(pos3, insn_cg->out) { + DBGASSERT(*pos3 == dst(*pos3)); + if (*pos2 == *pos3) { + // Live across CALL! + printf("Found a VR live across CALL!\n"); + caller_constraint(fun, *pos2); + } + } + } + } + struct ir_insn **pos2; array_for(pos2, insn_cg->kill) { struct ir_insn *insn_dst = *pos2; DBGASSERT(insn_dst == dst(insn_dst)); - array_push_unique(&fun->cg_info.all_var, &insn_dst); + if (insn_dst->op != IR_INSN_REG) { + array_push_unique(&fun->cg_info.all_var, &insn_dst); + } struct ir_insn **pos3; array_for(pos3, insn_cg->out) { DBGASSERT(*pos3 == dst(*pos3)); diff --git a/IR/aux/explicit_reg.c b/IR/aux/explicit_reg.c index 9ed014bc..b7ce7567 100644 --- a/IR/aux/explicit_reg.c +++ b/IR/aux/explicit_reg.c @@ -22,37 +22,41 @@ void explicit_reg(struct ir_function *fun) { // Final value: v == dst(v) struct ir_basic_block **pos; // Maximum number of functions: MAX_FUNC_ARG - struct array call_insns = INIT_ARRAY(struct ir_insn *); array_for(pos, fun->reachable_bbs) { struct ir_basic_block *bb = *pos; struct ir_insn *insn; list_for_each_entry(insn, &bb->ir_insn_head, list_ptr) { if (insn->op == IR_INSN_CALL) { - array_push(&call_insns, &insn); + for (__u8 i = 0; i < insn->value_num; ++i) { + struct ir_value val = insn->values[i]; + struct ir_insn *new_insn = create_assign_insn_cg(insn, val, INSERT_FRONT); + insn_cg(new_insn)->dst = fun->cg_info.regs[i + 1]; + val_remove_user(val, insn); + } + insn->value_num = 0; // Remove all operands + struct ir_insn_cg_extra *extra = insn_cg(insn); + extra->dst = NULL; + if (insn->users.num_elem == 0) { + continue; + } + struct ir_insn *new_insn = + create_assign_insn_cg(insn, ir_value_insn(fun->cg_info.regs[0]), INSERT_BACK); + replace_all_usage(insn, ir_value_insn(new_insn)); + } + + if (insn->op == IR_INSN_RET) { + // ret x + // ==> + // R0 = x + // ret + struct ir_insn *new_insn = + create_assign_insn_cg(insn, insn->values[0], INSERT_FRONT); + val_remove_user(insn->values[0], insn); + insn_cg(new_insn)->dst = fun->cg_info.regs[0]; + insn->value_num = 0; } } } - // Functions that are called - struct ir_insn **pos2; - array_for(pos2, call_insns) { - struct ir_insn *insn = *pos2; - for (__u8 i = 0; i < insn->value_num; ++i) { - struct ir_value val = insn->values[i]; - struct ir_insn *new_insn = create_assign_insn_cg(insn, val, INSERT_FRONT); - insn_cg(new_insn)->dst = fun->cg_info.regs[i + 1]; - val_remove_user(val, insn); - } - insn->value_num = 0; // Remove all operands - struct ir_insn_cg_extra *extra = insn_cg(insn); - extra->dst = NULL; - if (insn->users.num_elem == 0) { - continue; - } - struct ir_insn *new_insn = - create_assign_insn_cg(insn, ir_value_insn(fun->cg_info.regs[0]), INSERT_BACK); - replace_all_usage(insn, ir_value_insn(new_insn)); - } - array_free(&call_insns); // Arg for (__u8 i = 0; i < MAX_FUNC_ARG; ++i) { if (fun->function_arg[i]->users.num_elem > 0) { diff --git a/IR/aux/fix_bb_succ.c b/IR/aux/fix_bb_succ.c new file mode 100644 index 00000000..561b7c7f --- /dev/null +++ b/IR/aux/fix_bb_succ.c @@ -0,0 +1,19 @@ +#include "bpf_ir.h" +#include "dbg.h" +#include "ir_bb.h" +#include "ir_fun.h" + +void fix_bb_succ(struct ir_function *fun) { + struct ir_basic_block **pos; + array_for(pos, fun->all_bbs) { + struct ir_basic_block *bb = *pos; + struct ir_insn *insn = get_last_insn(bb); + if (insn && insn->op >= IR_INSN_JEQ && insn->op < IR_INSN_PHI) { + // Conditional jmp + struct ir_basic_block **s1 = array_get(&bb->succs, 0, struct ir_basic_block *); + struct ir_basic_block **s2 = array_get(&bb->succs, 1, struct ir_basic_block *); + *s1 = insn->bb1; + *s2 = insn->bb2; + } + } +} diff --git a/IR/aux/graph_coloring.c b/IR/aux/graph_coloring.c index 5ebe51a4..2406ae40 100644 --- a/IR/aux/graph_coloring.c +++ b/IR/aux/graph_coloring.c @@ -23,7 +23,10 @@ void graph_coloring(struct ir_function *fun) { struct ir_insn **pos; array_for(pos, (*all_var)) { // Allocate register for *pos - struct ir_insn *insn = *pos; + struct ir_insn *insn = *pos; + if (insn->op == IR_INSN_REG) { + CRITICAL("Pre-colored register should not be in all_var"); + } struct ir_insn_cg_extra *extra = insn_cg(insn); struct ir_insn **pos2; @@ -44,7 +47,7 @@ void graph_coloring(struct ir_function *fun) { for (__u8 i = 0; i < MAX_BPF_REG; i++) { if (!used_reg[i]) { extra->allocated = 1; - printf("Allocate r%u for %zu\n", i, insn->_insn_id); + printf("Allocate r%u for %%%zu\n", i, insn->_insn_id); extra->alloc_reg = i; need_spill = 0; break; diff --git a/IR/aux/live_variable.c b/IR/aux/live_variable.c index d69968d5..b9956a61 100644 --- a/IR/aux/live_variable.c +++ b/IR/aux/live_variable.c @@ -153,22 +153,26 @@ void print_insn_extra(struct ir_insn *insn) { struct ir_insn **pos; array_for(pos, insn_cg->gen) { struct ir_insn *insn = *pos; - printf(" %%%zu", insn->_insn_id); + printf(" "); + print_insn_ptr_base(insn); } printf("\nKill:"); array_for(pos, insn_cg->kill) { struct ir_insn *insn = *pos; - printf(" %%%zu", insn->_insn_id); + printf(" "); + print_insn_ptr_base(insn); } printf("\nIn:"); array_for(pos, insn_cg->in) { struct ir_insn *insn = *pos; - printf(" %%%zu", insn->_insn_id); + printf(" "); + print_insn_ptr_base(insn); } printf("\nOut:"); array_for(pos, insn_cg->out) { struct ir_insn *insn = *pos; - printf(" %%%zu", insn->_insn_id); + printf(" "); + print_insn_ptr_base(insn); } printf("\n-------------\n"); } diff --git a/IR/aux/normalize.c b/IR/aux/normalize.c new file mode 100644 index 00000000..cfec5bd1 --- /dev/null +++ b/IR/aux/normalize.c @@ -0,0 +1,116 @@ +// Normalization + +#include "bpf_ir.h" +#include "code_gen.h" +#include "dbg.h" +#include "ir_fun.h" +#include "ir_insn.h" + +void normalize(struct ir_function *fun) { + struct ir_basic_block **pos; + array_for(pos, fun->reachable_bbs) { + struct ir_basic_block *bb = *pos; + struct ir_insn *insn; + list_for_each_entry(insn, &bb->ir_insn_head, list_ptr) { + struct ir_value *v0 = &insn->values[0]; + struct ir_value *v1 = &insn->values[1]; + enum val_type t0 = insn->value_num >= 1 ? vtype(*v0) : UNDEF; + enum val_type t1 = insn->value_num >= 2 ? vtype(*v1) : UNDEF; + enum val_type tdst = vtype_insn(insn); + struct ir_insn *dst_insn = dst(insn); + if (insn->op == IR_INSN_ALLOC) { + // Skip + } else if (insn->op == IR_INSN_STORE) { + // Should be converted to ASSIGN + CRITICAL("Error"); + } else if (insn->op == IR_INSN_LOAD) { + CRITICAL("Error"); + } else if (insn->op == IR_INSN_LOADRAW) { + // OK + } else if (insn->op == IR_INSN_STORERAW) { + // OK + } else if (insn->op >= IR_INSN_ADD && insn->op < IR_INSN_CALL) { + // Binary ALU + if (t0 == STACK && t1 == CONST) { + // reg1 = add stack const + // ==> + // reg1 = stack + // reg1 = add reg1 const + struct ir_insn *new_insn = create_assign_insn_cg(insn, *v0, INSERT_FRONT); + insn_cg(new_insn)->dst = dst_insn; + v0->type = IR_VALUE_INSN; + v0->data.insn_d = dst_insn; + } else if (t0 == REG && t1 == REG) { + // reg1 = add reg2 reg3 + __u8 reg1 = insn_cg(dst_insn)->alloc_reg; + __u8 reg2 = insn_cg(v0->data.insn_d)->alloc_reg; + __u8 reg3 = insn_cg(v1->data.insn_d)->alloc_reg; + if (reg1 != reg2) { + if (reg1 == reg3) { + // Exchange reg2 and reg3 + struct ir_value tmp = *v0; + *v0 = *v1; + *v1 = tmp; + } else { + // reg1 = add reg2 reg3 + // ==> + // reg1 = reg2 + // reg1 = add reg1 reg3 + struct ir_insn *new_insn = + create_assign_insn_cg(insn, *v0, INSERT_FRONT); + DBGASSERT(dst_insn == fun->cg_info.regs[reg1]); + insn_cg(new_insn)->dst = dst_insn; + v0->type = IR_VALUE_INSN; + v0->data.insn_d = dst_insn; + } + } + } else if (t0 == REG && t1 == CONST) { + // reg1 = add reg2 const + // ==> + // reg1 = reg2 + // reg1 = add reg1 const + struct ir_insn *new_insn = create_assign_insn_cg(insn, *v0, INSERT_FRONT); + insn_cg(new_insn)->dst = dst_insn; + v0->type = IR_VALUE_INSN; + v0->data.insn_d = dst_insn; + } else { + CRITICAL("Error"); + } + } else if (insn->op == IR_INSN_ASSIGN) { + // stack = reg + // stack = const + // reg = const + // reg = stack + // reg = reg + if (tdst == STACK) { + DBGASSERT(t0 != STACK); + // Change to STORERAW + insn->op = IR_INSN_STORERAW; + insn->addr_val.value = ir_value_stack_ptr(); + insn->addr_val.offset = -insn_cg(dst_insn)->spilled * 8; + insn->vr_type = IR_VR_TYPE_64; + } else { + if (t0 == STACK) { + // Change to LOADRAW + insn->op = IR_INSN_LOADRAW; + insn->addr_val.value = ir_value_stack_ptr(); + insn->addr_val.offset = -insn_cg(v0->data.insn_d)->spilled * 8; + insn->vr_type = IR_VR_TYPE_64; + } + } + } else if (insn->op == IR_INSN_RET) { + // OK + } else if (insn->op == IR_INSN_CALL) { + // OK + } else if (insn->op == IR_INSN_JA) { + // OK + } else if (insn->op >= IR_INSN_JEQ && insn->op < IR_INSN_PHI) { + // jeq reg const/reg + DBGASSERT(t0 == REG && (t1 == REG || t1 == CONST)); + // OK + } else { + CRITICAL("No such instruction"); + } + } + } +} diff --git a/IR/aux/prog_check.c b/IR/aux/prog_check.c index 02ae3e67..676fdc30 100644 --- a/IR/aux/prog_check.c +++ b/IR/aux/prog_check.c @@ -29,6 +29,12 @@ void check_insn_users_use_insn(struct ir_insn *insn) { } } +void check_insn(struct ir_function *fun) { + // Check syntax + // - Store uses alloc + // - `ret` at the end of BB chains +} + void check_insn_operand(struct ir_insn *insn) { struct array operands = get_operands(insn); struct ir_value **val; diff --git a/IR/aux/relocate.c b/IR/aux/relocate.c new file mode 100644 index 00000000..b74e55f7 --- /dev/null +++ b/IR/aux/relocate.c @@ -0,0 +1,8 @@ +// Relocate BB +#include "ir_fun.h" + +void calc_bb_insn_cnt(struct ir_function *fun) {} + +void relocate(struct ir_function *fun) { + calc_bb_insn_cnt(fun); +} diff --git a/IR/aux/spill.c b/IR/aux/spill.c index 0b7ff28f..423b6fbf 100644 --- a/IR/aux/spill.c +++ b/IR/aux/spill.c @@ -1,50 +1,339 @@ +#include +#include +#include +#include "array.h" #include "bpf_ir.h" #include "code_gen.h" #include "dbg.h" -enum val_type { - REG, - CONST, - STACK -}; +#include "ir_insn.h" +#include "ir_helper.h" + +enum val_type vtype_insn(struct ir_insn *insn) { + insn = dst(insn); + if (insn == NULL) { + // Void + return UNDEF; + } + struct ir_insn_cg_extra *extra = insn_cg(insn); + if (extra->spilled) { + return STACK; + } else { + return REG; + } +} enum val_type vtype(struct ir_value val) { if (val.type == IR_VALUE_INSN) { - struct ir_insn *insn = dst(val.data.insn_d); - struct ir_insn_cg_extra *extra = insn_cg(insn); - if (extra->spilled) { - return STACK; - } else { - return REG; - } + return vtype_insn(val.data.insn_d); + } else if (val.type == IR_VALUE_CONSTANT) { + return CONST; + } else if (val.type == IR_VALUE_STACK_PTR) { + return REG; } else { + CRITICAL("No such value type for dst"); + } +} + +void load_stack_to_r0(struct ir_function *fun, struct ir_insn *insn, struct ir_value *val) { + struct ir_insn *tmp = create_assign_insn_cg(insn, *val, INSERT_FRONT); + insn_cg(tmp)->dst = fun->cg_info.regs[0]; + + val->type = IR_VALUE_INSN; + val->data.insn_d = fun->cg_info.regs[0]; +} + +void load_const_to_vr(struct ir_insn *insn, struct ir_value *val) { + struct ir_insn *tmp = create_assign_insn_cg(insn, *val, INSERT_FRONT); + + val->type = IR_VALUE_INSN; + val->data.insn_d = tmp; +} + +void add_stack_offset_vr(struct ir_function *fun, size_t num) { + struct ir_insn **pos; + array_for(pos, fun->cg_info.all_var) { + struct ir_insn_cg_extra *extra = insn_cg(*pos); + if (extra->spilled > 0) { + extra->spilled += num; + } + } +} + +void spill_callee(struct ir_function *fun) { + // Spill Callee saved registers if used + __u8 reg_used[MAX_BPF_REG] = {0}; + + struct ir_insn **pos; + array_for(pos, fun->cg_info.all_var) { + struct ir_insn_cg_extra *extra = insn_cg(*pos); + reg_used[extra->alloc_reg] = 1; + } + size_t off = 0; + for (__u8 i = BPF_REG_6; i < BPF_REG_10; ++i) { + if (reg_used[i]) { + off++; + } + } + DBGASSERT(off == fun->cg_info.callee_num); + add_stack_offset_vr(fun, off); + off = 0; + for (__u8 i = BPF_REG_6; i < BPF_REG_10; ++i) { + // All callee saved registers + if (reg_used[i]) { + off++; + // Spill at sp-off + // struct ir_insn *st = create_assign_insn_bb_cg( + // fun->entry, ir_value_insn(fun->cg_info.regs[i]), INSERT_FRONT); + struct ir_insn *st = create_insn_base_cg(fun->entry); + insert_at_bb(st, fun->entry, INSERT_FRONT); + st->op = IR_INSN_STORERAW; + st->values[0] = ir_value_insn(fun->cg_info.regs[i]); + st->value_num = 1; + st->vr_type = IR_VR_TYPE_64; + struct ir_value val; + val.type = IR_VALUE_STACK_PTR; + st->addr_val.value = val; + st->addr_val.offset = -off * 8; + struct ir_insn_cg_extra *extra = insn_cg(st); + extra->dst = NULL; + + struct ir_basic_block **pos2; + array_for(pos2, fun->end_bbs) { + struct ir_basic_block *bb = *pos2; + struct ir_insn *ld = create_insn_base_cg(bb); + insert_at_bb(ld, bb, INSERT_BACK_BEFORE_JMP); + ld->op = IR_INSN_LOADRAW; + ld->value_num = 0; + ld->vr_type = IR_VR_TYPE_64; + struct ir_value val; + val.type = IR_VALUE_STACK_PTR; + ld->addr_val.value = val; + ld->addr_val.offset = -off * 8; + + extra = insn_cg(ld); + extra->dst = fun->cg_info.regs[i]; + } + } } } int check_need_spill(struct ir_function *fun) { // Check if all instruction values are OK for translating + int res = 0; struct ir_basic_block **pos; array_for(pos, fun->reachable_bbs) { struct ir_basic_block *bb = *pos; struct ir_insn *insn; list_for_each_entry(insn, &bb->ir_insn_head, list_ptr) { + struct ir_value *v0 = &insn->values[0]; + struct ir_value *v1 = &insn->values[1]; + enum val_type t0 = insn->value_num >= 1 ? vtype(*v0) : UNDEF; + enum val_type t1 = insn->value_num >= 2 ? vtype(*v1) : UNDEF; + enum val_type tdst = vtype_insn(insn); + struct ir_insn_cg_extra *extra = insn_cg(insn); + struct ir_insn *dst_insn = dst(insn); if (insn->op == IR_INSN_ALLOC) { // dst = alloc // Nothing to do } else if (insn->op == IR_INSN_STORE) { - // store v0 v1 - // v0: reg + v1: reg, const, stack - // v0: stack + v1: reg, const + // store v0(dst) v1 + // Eequivalent to `v0 = v1` + // TODO: sized store + // Currently all load & store are 8 bytes + insn->op = IR_INSN_ASSIGN; + DBGASSERT(v0->type == IR_VALUE_INSN); // Should be guaranteed by prog_check + extra->dst = v0->data.insn_d; + insn->value_num = 1; + *v0 = *v1; + res = 1; + } else if (insn->op == IR_INSN_LOAD) { + // stack = load stack + // stack = load reg + // reg = load reg + // reg = load stack + // TODO: sized load + insn->op = IR_INSN_ASSIGN; + DBGASSERT(v0->type == IR_VALUE_INSN); // Should be guaranteed by prog_check + res = 1; + } else if (insn->op == IR_INSN_LOADRAW) { + // reg = loadraw addr ==> OK + // stack = loadraw addr + // ==> + // R0 = loadraw addr + // stack = R0 + if (tdst == STACK) { + extra->dst = fun->cg_info.regs[0]; + struct ir_insn *tmp = create_assign_insn_cg( + insn, ir_value_insn(fun->cg_info.regs[0]), INSERT_BACK); + insn_cg(tmp)->dst = dst_insn; + res = 1; + } + } else if (insn->op == IR_INSN_STORERAW) { + // Built-in store instruction, OK + } else if (insn->op >= IR_INSN_ADD && insn->op < IR_INSN_CALL) { + // Binary ALU + // reg = add reg reg + // reg = add reg const + // There should be NO stack + if (tdst == STACK) { + // stack = add ? ? + // ==> + // R0 = add ? ? + // stack = R0 + extra->dst = fun->cg_info.regs[0]; + struct ir_insn *tmp = create_assign_insn_cg( + insn, ir_value_insn(fun->cg_info.regs[0]), INSERT_BACK); + insn_cg(tmp)->dst = dst_insn; + res = 1; + } else { + if ((t0 != REG && t1 == REG) || (t0 == CONST && t1 == STACK)) { + // reg = add !reg reg + // ==> + // reg = add reg !reg + struct ir_value tmp = *v0; + *v0 = *v1; + *v1 = tmp; + enum val_type ttmp = t0; + t0 = t1; + t1 = ttmp; + // No need to spill here + } + if (t0 == REG) { + // reg = add reg reg ==> OK + // reg = add reg const ==> OK + + // reg1 = add reg2 stack + // ==> + // If reg1 != reg2, + // reg1 = stack + // reg1 = add reg2 reg1 + // Else + // Choose reg3 != reg1, + // reg3 = stack + // reg1 = add reg1 reg3 + if (t1 == STACK) { + __u8 reg1 = insn_cg(dst_insn)->alloc_reg; + __u8 reg2 = insn_cg(v0->data.insn_d)->alloc_reg; + if (reg1 == reg2) { + __u8 reg = reg1 == 0 ? 1 : 0; + struct ir_insn *new_insn = + create_assign_insn_cg(insn, *v1, INSERT_FRONT); + insn_cg(new_insn)->dst = fun->cg_info.regs[reg]; + v1->type = IR_VALUE_INSN; + v1->data.insn_d = fun->cg_info.regs[reg]; + } else { + struct ir_insn *new_insn = + create_assign_insn_cg(insn, *v1, INSERT_FRONT); + insn_cg(new_insn)->dst = fun->cg_info.regs[reg1]; + v1->type = IR_VALUE_INSN; + v1->data.insn_d = fun->cg_info.regs[reg1]; + } + res = 1; + } + } else { + // reg = add const const ==> OK + // reg = add c1 c2 + // ==> + // reg = c1 + // reg = add reg c2 + // OK + // reg = add stack stack + if (t0 == STACK && t1 == STACK) { + // reg1 = add stack1 stack2 + // ==> + // Found reg2 != reg1 + // reg1 = stack1 + // reg1 = add reg1 stack2 + __u8 reg1 = insn_cg(dst_insn)->alloc_reg; + struct ir_insn *new_insn = + create_assign_insn_cg(insn, *v0, INSERT_FRONT); + insn_cg(new_insn)->dst = fun->cg_info.regs[reg1]; + v0->type = IR_VALUE_INSN; + v0->data.insn_d = fun->cg_info.regs[reg1]; + res = 1; + } + // reg = add stack const ==> OK + // ==> + // reg = stack + // reg = add reg const + } + } } else if (insn->op == IR_INSN_ASSIGN) { - // dst = - // MOV dst val + // stack = reg + // stack = const + // reg = const + // reg = stack + // reg = reg + if (tdst == STACK && t0 == STACK) { + load_stack_to_r0(fun, insn, v0); + res = 1; + } + // TODO: constant to stack: might need to first load to reg + } else if (insn->op == IR_INSN_RET) { + // ret const/reg + // Done in explicit_reg pass + DBGASSERT(insn->value_num == 0); + } else if (insn->op == IR_INSN_CALL) { + // call() + // Should have no arguments + DBGASSERT(insn->value_num == 0); + } else if (insn->op == IR_INSN_JA) { + // OK + } else if (insn->op >= IR_INSN_JEQ && insn->op < IR_INSN_PHI) { + // jeq reg const/reg + if ((t0 != REG && t1 == REG) || (t0 == CONST && t1 == STACK)) { + struct ir_value tmp = *v0; + *v0 = *v1; + *v1 = tmp; + enum val_type ttmp = t0; + t0 = t1; + t1 = ttmp; + // No need to spill here + } + if (t0 == REG) { + // jeq reg reg ==> OK + // jeq reg const ==> OK + // jeq reg stack + // ==> + // reg2 = stack + // jeq reg reg2 + if (t1 == STACK) { + __u8 reg1 = insn_cg(v0->data.insn_d)->alloc_reg; + __u8 reg2 = reg1 == 0 ? 1 : 0; + struct ir_insn *new_insn = create_assign_insn_cg(insn, *v1, INSERT_FRONT); + insn_cg(new_insn)->dst = fun->cg_info.regs[reg2]; + v1->type = IR_VALUE_INSN; + v1->data.insn_d = fun->cg_info.regs[reg2]; + res = 1; + } + } else { + // jeq const1 const2 + // ==> + // %tmp = const1 + // jeq %tmp const2 + if (t0 == CONST && t1 == CONST) { + struct ir_insn *new_insn = create_assign_insn_cg(insn, *v0, INSERT_FRONT); + v0->type = IR_VALUE_INSN; + v0->data.insn_d = new_insn; + res = 1; + } + // jeq stack const + if (t0 == STACK && t1 == CONST) { + load_stack_to_r0(fun, insn, v0); + res = 1; + } + // jeq stack stack + if (t0 == STACK && t1 == STACK) { + load_stack_to_r0(fun, insn, v0); + res = 1; + } + } } else { CRITICAL("No such instruction"); } } } - return 0; + return res; } - -void spill(struct ir_function *fun) {} \ No newline at end of file diff --git a/IR/aux/add_stack_offset.c b/IR/aux/stack_alloc.c similarity index 53% rename from IR/aux/add_stack_offset.c rename to IR/aux/stack_alloc.c index 387215e2..42498fb9 100644 --- a/IR/aux/add_stack_offset.c +++ b/IR/aux/stack_alloc.c @@ -1,8 +1,49 @@ -#include "add_stack_offset.h" +// Allocate stack in code generation + +#include +#include "code_gen.h" +#include "ir_fun.h" #include "array.h" #include "bpf_ir.h" #include "ir_insn.h" +void calc_callee_num(struct ir_function *fun) { + __u8 reg_used[MAX_BPF_REG] = {0}; + + struct ir_insn **pos; + array_for(pos, fun->cg_info.all_var) { + struct ir_insn_cg_extra *extra = insn_cg(*pos); + reg_used[extra->alloc_reg] = 1; + } + size_t off = 0; + for (__u8 i = BPF_REG_6; i < BPF_REG_10; ++i) { + if (reg_used[i]) { + off++; + } + } + fun->cg_info.callee_num = off; +} + +void calc_stack_size(struct ir_function *fun) { + // Check callee + size_t off = 0; + off += fun->cg_info.callee_num * 8; + // Check all VR + size_t max = 0; + struct ir_insn **pos; + array_for(pos, fun->cg_info.all_var) { + struct ir_insn_cg_extra *extra = insn_cg(*pos); + if (extra->spilled > 0) { + // Spilled! + if (extra->spilled > max) { + max = extra->spilled; + } + } + } + fun->cg_info.stack_offset = -(off + max * 8); + printf("Stack size: %d\n", fun->cg_info.stack_offset); +} + void add_stack_offset(struct ir_function *fun, __s16 offset) { struct array users = fun->sp_users; struct ir_insn **pos; diff --git a/IR/aux/translate.c b/IR/aux/translate.c index 82e6a206..52fa0b28 100644 --- a/IR/aux/translate.c +++ b/IR/aux/translate.c @@ -1,29 +1,114 @@ +#include +#include #include "bpf_ir.h" #include "code_gen.h" #include "dbg.h" -#define IU_REG0 BPF_REG_0 -#define IU_REG1 BPF_REG_1 +struct pre_ir_insn load_reg_to_reg(__u8 dst, __u8 src) { + // MOV dst src + struct pre_ir_insn insn; + insn.opcode = BPF_MOV | BPF_X | BPF_ALU64; + insn.dst_reg = dst; + insn.src_reg = src; + return insn; +} + +struct pre_ir_insn load_const_to_reg(__u8 dst, struct ir_constant c) { + // MOV dst imm + struct pre_ir_insn insn; + insn.dst_reg = dst; + if (c.type == IR_CONSTANT_U64) { + insn.it = IMM64; + insn.imm64 = c.data.u64_d; + insn.opcode = BPF_MOV | BPF_K | BPF_ALU64; + } + if (c.type == IR_CONSTANT_S64) { + insn.it = IMM64; + insn.imm64 = c.data.s64_d; + insn.opcode = BPF_MOV | BPF_K | BPF_ALU64; + } + if (c.type == IR_CONSTANT_U32) { + insn.it = IMM; + insn.imm = c.data.u32_d; + insn.opcode = BPF_MOV | BPF_K | BPF_ALU; + } + if (c.type == IR_CONSTANT_S32) { + insn.it = IMM; + insn.imm = c.data.s32_d; + insn.opcode = BPF_MOV | BPF_K | BPF_ALU; + } + if (c.type == IR_CONSTANT_U16) { + insn.it = IMM; + insn.imm = c.data.u16_d; + insn.opcode = BPF_MOV | BPF_K | BPF_ALU; + } + if (c.type == IR_CONSTANT_S16) { + insn.it = IMM; + insn.imm = c.data.s16_d; + insn.opcode = BPF_MOV | BPF_K | BPF_ALU; + } + return insn; +} + +struct pre_ir_insn load_addr_to_reg(__u8 dst, struct ir_address_value addr, enum ir_vr_type type) { + // MOV dst src + struct pre_ir_insn insn; + insn.dst_reg = dst; + insn.off = addr.offset; + int size = vr_type_to_size(type); + if (addr.value.type == IR_VALUE_STACK_PTR) { + insn.src_reg = BPF_REG_10; + insn.opcode = BPF_LDX | size | BPF_MEM; + } else if (addr.value.type == IR_VALUE_INSN) { + // Must be REG + DBGASSERT(vtype(addr.value) == REG); + // Load reg (addr) to reg + insn.src_reg = insn_cg(addr.value.data.insn_d)->alloc_reg; + insn.opcode = BPF_LDX | size | BPF_MEM; + } else if (addr.value.type == IR_VALUE_CONSTANT) { + // Must be U64 + DBGASSERT(addr.value.data.constant_d.type == IR_CONSTANT_U64); + insn.it = IMM64; + insn.imm64 = addr.value.data.constant_d.data.u64_d; + insn.opcode = BPF_IMM | size | BPF_LD; + } else { + CRITICAL("Error"); + } + return insn; +} void translate(struct ir_function *fun) { - // fun is still in IR form struct ir_basic_block **pos; array_for(pos, fun->reachable_bbs) { struct ir_basic_block *bb = *pos; struct ir_insn *insn; list_for_each_entry(insn, &bb->ir_insn_head, list_ptr) { - struct ir_insn_cg_extra *extra = insn_cg(insn); - + struct ir_value v0 = insn->values[0]; + struct ir_value v1 = insn->values[1]; + enum val_type t0 = insn->value_num >= 1 ? vtype(v0) : UNDEF; + enum val_type t1 = insn->value_num >= 2 ? vtype(v1) : UNDEF; + enum val_type tdst = vtype_insn(insn); + struct ir_insn_cg_extra *extra = insn_cg(insn); + struct ir_insn *dst_insn = dst(insn); if (insn->op == IR_INSN_ALLOC) { - // dst = alloc // Nothing to do + extra->translated_num = 0; + } else if (insn->op == IR_INSN_STORE) { + CRITICAL("Error"); + } else if (insn->op == IR_INSN_LOAD) { + CRITICAL("Error"); + } else if (insn->op == IR_INSN_LOADRAW) { + DBGASSERT(tdst == REG); + } else if (insn->op == IR_INSN_STORERAW) { + } else if (insn->op >= IR_INSN_ADD && insn->op < IR_INSN_CALL) { } else if (insn->op == IR_INSN_ASSIGN) { - // dst = - // MOV dst val - + } else if (insn->op == IR_INSN_RET) { + } else if (insn->op == IR_INSN_CALL) { + } else if (insn->op == IR_INSN_JA) { + } else if (insn->op >= IR_INSN_JEQ && insn->op < IR_INSN_PHI) { } else { CRITICAL("No such instruction"); } } } -} \ No newline at end of file +} diff --git a/IR/bpf_ir.c b/IR/bpf_ir.c index 39ef5b11..26039329 100644 --- a/IR/bpf_ir.c +++ b/IR/bpf_ir.c @@ -4,7 +4,6 @@ #include #include #include -#include "add_stack_offset.h" #include "ir_helper.h" #include "array.h" #include "code_gen.h" @@ -13,7 +12,6 @@ #include "dbg.h" #include "passes.h" #include "prog_check.h" -#include "reachable_bb.h" #include "read.h" // TODO: Change this to real function @@ -201,12 +199,14 @@ struct bb_info gen_bb(struct bpf_insn *insns, size_t len) { new_insn.src_reg = insn.src_reg; new_insn.dst_reg = insn.dst_reg; new_insn.imm = insn.imm; + new_insn.it = IMM; new_insn.imm64 = 0; new_insn.off = insn.off; new_insn.pos = pos; if (pos + 1 < real_bb->end_pos && insns[pos + 1].code == 0) { __u64 imml = (__u64)insn.imm & 0xFFFFFFFF; new_insn.imm64 = ((__s64)(insns[pos + 1].imm) << 32) | imml; + new_insn.it = IMM64; pos++; } real_bb->pre_insns[bb_pos] = new_insn; @@ -409,31 +409,31 @@ struct ir_insn *create_insn_front(struct ir_basic_block *bb) { return insn; } -enum ir_vr_type to_ir_ld_s(__u8 size) { +enum ir_vr_type to_ir_ld_u(__u8 size) { switch (size) { case BPF_W: - return IR_VR_TYPE_S32; + return IR_VR_TYPE_32; case BPF_H: - return IR_VR_TYPE_S16; + return IR_VR_TYPE_16; case BPF_B: - return IR_VR_TYPE_S8; + return IR_VR_TYPE_8; case BPF_DW: - return IR_VR_TYPE_S64; + return IR_VR_TYPE_64; default: CRITICAL("Error"); } } -enum ir_vr_type to_ir_ld_u(__u8 size) { - switch (size) { - case BPF_W: - return IR_VR_TYPE_U32; - case BPF_H: - return IR_VR_TYPE_U16; - case BPF_B: - return IR_VR_TYPE_U8; - case BPF_DW: - return IR_VR_TYPE_U64; +int vr_type_to_size(enum ir_vr_type type) { + switch (type) { + case IR_VR_TYPE_32: + return BPF_W; + case IR_VR_TYPE_16: + return BPF_H; + case IR_VR_TYPE_8: + return BPF_B; + case IR_VR_TYPE_64: + return BPF_DW; default: CRITICAL("Error"); } @@ -443,6 +443,10 @@ struct ir_value ir_value_insn(struct ir_insn *insn) { return (struct ir_value){.type = IR_VALUE_INSN, .data.insn_d = insn}; } +struct ir_value ir_value_stack_ptr() { + return (struct ir_value){.type = IR_VALUE_STACK_PTR}; +} + // User uses val void add_user(struct ssa_transform_env *env, struct ir_insn *user, struct ir_value val) { if (val.type == IR_VALUE_INSN) { @@ -817,6 +821,7 @@ void free_function(struct ir_function *fun) { } array_free(&fun->all_bbs); array_free(&fun->reachable_bbs); + array_free(&fun->end_bbs); array_free(&fun->cg_info.all_var); } @@ -830,6 +835,7 @@ struct ir_function gen_function(struct ssa_transform_env *env) { } fun.all_bbs = INIT_ARRAY(struct ir_basic_block *); fun.reachable_bbs = INIT_ARRAY(struct ir_basic_block *); + fun.end_bbs = INIT_ARRAY(struct ir_basic_block *); fun.cg_info.all_var = INIT_ARRAY(struct ir_insn *); for (size_t i = 0; i < MAX_BPF_REG; ++i) { struct array *currentDef = &env->currentDef[i]; @@ -884,6 +890,7 @@ void run(struct bpf_insn *insns, size_t len) { init_ir_bbs(&env); transform_bb(&env, info.entry); struct ir_function fun = gen_function(&env); + fix_bb_succ(&fun); // Drop env print_ir_prog(&fun); printf("--------------------\n"); diff --git a/IR/format.sh b/IR/format.sh index 2849228d..e9ec00b4 100755 --- a/IR/format.sh +++ b/IR/format.sh @@ -1,3 +1,12 @@ #!/bin/bash -find . -iname '*.h' -o -iname '*.c' | xargs clang-format -i +files=$(find . -iname '*.h' -o -iname '*.c' -not -path "./build/*") + +for file in $files; do + echo "Formatting $file" + clang-format -i $file & +done + +for job in `jobs -p`; do + wait $job +done diff --git a/IR/include/add_constraint_pass.h b/IR/include/add_constraint_pass.h deleted file mode 100644 index 06b79668..00000000 --- a/IR/include/add_constraint_pass.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __BPF_IR_ADD_CONSTRAINT_PASS_H__ -#define __BPF_IR_ADD_CONSTRAINT_PASS_H__ - -#include "ir_fun.h" - -void add_constraint(struct ir_function *fun); - -#endif diff --git a/IR/include/add_counter_pass.h b/IR/include/add_counter_pass.h deleted file mode 100644 index e37f669e..00000000 --- a/IR/include/add_counter_pass.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __BPF_IR_ADD_COUNTER_PASS_H__ -#define __BPF_IR_ADD_COUNTER_PASS_H__ - -#include "ir_fun.h" - -void add_counter(struct ir_function *fun); - -#endif diff --git a/IR/include/add_stack_offset.h b/IR/include/add_stack_offset.h deleted file mode 100644 index 82305924..00000000 --- a/IR/include/add_stack_offset.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __BPF_IR_ADD_STACK_OFFSET_H__ -#define __BPF_IR_ADD_STACK_OFFSET_H__ - -#include "ir_fun.h" - -// Add stack offset to all stack access -void add_stack_offset(struct ir_function *fun, __s16 offset); - -#endif diff --git a/IR/include/array.h b/IR/include/array.h index 81cd5987..2c7ee86a 100644 --- a/IR/include/array.h +++ b/IR/include/array.h @@ -17,6 +17,8 @@ void array_push_unique(struct array *arr, void *data); void array_free(struct array *); struct array array_null(); void array_erase(struct array *arr, size_t idx); +void *array_get_void(struct array *arr, size_t idx); +#define array_get(arr, idx, type) ((type *)array_get_void(arr, idx)) void *__malloc(size_t size); void __free(void *ptr); void array_clear(struct array *arr); diff --git a/IR/include/bpf_ir.h b/IR/include/bpf_ir.h index 462144e3..f9dd4cf2 100644 --- a/IR/include/bpf_ir.h +++ b/IR/include/bpf_ir.h @@ -8,6 +8,11 @@ #define MAX_FUNC_ARG 5 +enum imm_type { + IMM, + IMM64 +}; + /** Pre-IR instructions, similar to `bpf_insn` */ @@ -17,8 +22,10 @@ struct pre_ir_insn { __u8 dst_reg; __u8 src_reg; __s16 off; - __s32 imm; - __s64 imm64; // signed immediate constant for 64-bit immediate + + enum imm_type it; + __s32 imm; + __s64 imm64; // Immediate constant for 64-bit immediate size_t pos; // Original position }; @@ -69,6 +76,8 @@ struct ir_value { struct ir_value ir_value_insn(struct ir_insn *); +struct ir_value ir_value_stack_ptr(); + /** Value plus an offset */ @@ -90,15 +99,10 @@ struct phi_value { Virtual Register Type */ enum ir_vr_type { - IR_VR_TYPE_U8, - IR_VR_TYPE_U16, - IR_VR_TYPE_U32, - IR_VR_TYPE_U64, - IR_VR_TYPE_S8, - IR_VR_TYPE_S16, - IR_VR_TYPE_S32, - IR_VR_TYPE_S64, - IR_VR_TYPE_PTR, + IR_VR_TYPE_8, + IR_VR_TYPE_16, + IR_VR_TYPE_32, + IR_VR_TYPE_64, }; enum ir_insn_type { @@ -107,7 +111,7 @@ enum ir_insn_type { IR_INSN_LOAD, IR_INSN_STORERAW, IR_INSN_LOADRAW, - IR_INSN_FUNCTIONARG, // The function argument store + IR_INSN_FUNCTIONARG, // The function argument store, not an actual instruction // ALU IR_INSN_ADD, IR_INSN_SUB, @@ -161,6 +165,7 @@ enum ir_insn_type { | REG Note. must be the next basic block. + ASSIGN dst cannot be callee-saved registers */ struct ir_insn { struct ir_value values[MAX_FUNC_ARG]; @@ -322,4 +327,6 @@ __u8 ir_value_equal(struct ir_value a, struct ir_value b); struct ir_basic_block *init_ir_bb_raw(); +int vr_type_to_size(enum ir_vr_type type); + #endif diff --git a/IR/include/code_gen.h b/IR/include/code_gen.h index af03a544..4ad2e1aa 100644 --- a/IR/include/code_gen.h +++ b/IR/include/code_gen.h @@ -1,6 +1,7 @@ #ifndef __BPF_IR_CODE_GEN_H__ #define __BPF_IR_CODE_GEN_H__ +#include #include "bpf_ir.h" #include "ir_fun.h" @@ -8,7 +9,7 @@ void code_gen(struct ir_function *fun); // Extra information needed for code gen struct ir_bb_cg_extra { - // Liveness analysis + size_t insn_cnt; }; struct ir_insn_cg_extra { @@ -25,9 +26,14 @@ struct ir_insn_cg_extra { // Array of struct ir_insn* struct array adj; - // Translated pre_ir_insn: array of struct pre_ir_insn - struct array translated; + // Translated pre_ir_insn + struct pre_ir_insn translated[2]; + + // Translated number + __u8 translated_num; + // Whether the VR is allocated with a real register + // If it's a pre-colored register, it's also 1 __u8 allocated; // When allocating register, whether dst will be spilled @@ -36,11 +42,18 @@ struct ir_insn_cg_extra { // etc. size_t spilled; - // Valid if spilled == 0 + // Valid if spilled == 0 && allocated == 1 // Valid number: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 __u8 alloc_reg; }; +enum val_type { + UNDEF, + REG, + CONST, + STACK, +}; + struct ir_insn_cg_extra *insn_cg(struct ir_insn *insn); struct ir_insn_cg_extra *init_insn_cg(struct ir_insn *insn); @@ -63,4 +76,29 @@ void graph_coloring(struct ir_function *fun); void explicit_reg(struct ir_function *fun); +void coaleasing(struct ir_function *fun); + +enum val_type vtype(struct ir_value val); + +int check_need_spill(struct ir_function *fun); + +void translate(struct ir_function *fun); + +void spill_callee(struct ir_function *fun); + +enum val_type vtype_insn(struct ir_insn *insn); + +enum val_type vtype(struct ir_value val); + +void calc_callee_num(struct ir_function *fun); + +void calc_stack_size(struct ir_function *fun); + +// Add stack offset to all stack access +void add_stack_offset(struct ir_function *fun, __s16 offset); + +void normalize(struct ir_function *fun); + +void relocate(struct ir_function *fun); + #endif diff --git a/IR/include/cut_bb_pass.h b/IR/include/cut_bb_pass.h deleted file mode 100644 index 8a05cb63..00000000 --- a/IR/include/cut_bb_pass.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __BPF_IR_CUT_BB_PASS_H__ -#define __BPF_IR_CUT_BB_PASS_H__ - -#include "ir_fun.h" - -void cut_bb(struct ir_function *fun); - -#endif diff --git a/IR/include/dbg.h b/IR/include/dbg.h index ecd5a83f..c726bfc9 100644 --- a/IR/include/dbg.h +++ b/IR/include/dbg.h @@ -11,6 +11,9 @@ exit(1); \ } -#define DBGASSERT(cond) assert(cond) +#define DBGASSERT(cond) \ + if (!(cond)) { \ + CRITICAL("Assertion failed"); \ + } #endif diff --git a/IR/include/ir_fun.h b/IR/include/ir_fun.h index ccb0983d..825c263d 100644 --- a/IR/include/ir_fun.h +++ b/IR/include/ir_fun.h @@ -10,12 +10,16 @@ struct code_gen_info { // BPF Register Virtual Instruction (used as dst) struct ir_insn *regs[MAX_BPF_REG]; + + size_t callee_num; + + __s16 stack_offset; }; struct ir_function { size_t arg_num; - // Array of struct pre_ir_basic_block *, no entrance information anymore + // Array of struct ir_basic_block * struct array all_bbs; // The entry block @@ -24,6 +28,9 @@ struct ir_function { // Store any information about the function struct array reachable_bbs; + // BBs who has no successors + struct array end_bbs; + // Stack pointer (r10) users. Should be readonly. No more manual stack access should be allowed. struct array sp_users; @@ -42,4 +49,6 @@ struct ir_function gen_function(struct ssa_transform_env *env); void free_function(struct ir_function *fun); +void fix_bb_succ(struct ir_function *fun); + #endif diff --git a/IR/include/ir_helper.h b/IR/include/ir_helper.h index a91cf6e4..96f8450c 100644 --- a/IR/include/ir_helper.h +++ b/IR/include/ir_helper.h @@ -42,4 +42,6 @@ void print_raw_ir_insn(struct ir_insn *insn); void print_raw_ir_bb(struct ir_basic_block *bb); +void print_insn_ptr_base(struct ir_insn *insn); + #endif diff --git a/IR/include/ir_insn.h b/IR/include/ir_insn.h index 21eea069..eccdac74 100644 --- a/IR/include/ir_insn.h +++ b/IR/include/ir_insn.h @@ -2,6 +2,7 @@ #define __BPF_IR_INSN_H__ #include "bpf_ir.h" +#include "list.h" enum insert_position { INSERT_BACK, @@ -20,6 +21,10 @@ void replace_all_usage_except(struct ir_insn *insn, struct ir_value rep, struct void erase_insn(struct ir_insn *insn); +// Erase an instruction without checking the users +// Used in code gen +void erase_insn_raw(struct ir_insn *insn); + int is_void(struct ir_insn *insn); int is_jmp(struct ir_insn *insn); @@ -98,4 +103,12 @@ struct ir_insn *create_assign_insn_bb_cg(struct ir_basic_block *bb, struct ir_va void replace_operand(struct ir_insn *insn, struct ir_value v1, struct ir_value v2); +struct ir_insn *create_insn_base_cg(struct ir_basic_block *bb); + +struct ir_insn *create_insn_base(struct ir_basic_block *bb); + +void insert_at(struct ir_insn *new_insn, struct ir_insn *insn, enum insert_position pos); + +void insert_at_bb(struct ir_insn *new_insn, struct ir_basic_block *bb, enum insert_position pos); + #endif diff --git a/IR/include/passes.h b/IR/include/passes.h index 6656db88..8f06674d 100644 --- a/IR/include/passes.h +++ b/IR/include/passes.h @@ -1,19 +1,25 @@ #ifndef __BPF_IR_PASSES_H__ #define __BPF_IR_PASSES_H__ -#include "add_constraint_pass.h" -#include "code_gen.h" -#include "cut_bb_pass.h" -#include "phi_pass.h" -#include "add_counter_pass.h" #include "ir_fun.h" +void remove_trivial_phi(struct ir_function *fun); + +void cut_bb(struct ir_function *fun); + +void add_counter(struct ir_function *fun); + +void add_constraint(struct ir_function *fun); + +void gen_reachable_bbs(struct ir_function *); + +void gen_end_bbs(struct ir_function *fun); + /** All function passes. */ static void (*passes[])(struct ir_function *fun) = { - remove_trivial_phi, - // liveness_analysis, + remove_trivial_phi, gen_end_bbs // add_constraint, // add_counter, }; diff --git a/IR/include/phi_pass.h b/IR/include/phi_pass.h deleted file mode 100644 index 6776dda3..00000000 --- a/IR/include/phi_pass.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __BPF_IR_PHI_PASS_H__ -#define __BPF_IR_PHI_PASS_H__ - -#include "ir_fun.h" - -void remove_trivial_phi(struct ir_function *fun); - -#endif diff --git a/IR/include/reachable_bb.h b/IR/include/reachable_bb.h deleted file mode 100644 index edd0cca0..00000000 --- a/IR/include/reachable_bb.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __BPF_IR_REACHABLE_BB_H__ -#define __BPF_IR_REACHABLE_BB_H__ - -#include "ir_fun.h" - -void gen_reachable_bbs(struct ir_function *); - -#endif diff --git a/IR/ir_code_gen.c b/IR/ir_code_gen.c index 542520b0..9c35416a 100644 --- a/IR/ir_code_gen.c +++ b/IR/ir_code_gen.c @@ -1,3 +1,4 @@ +#include #include #include "array.h" #include "bpf_ir.h" @@ -5,6 +6,7 @@ #include "dbg.h" #include "ir_insn.h" #include "list.h" +#include "passes.h" #include "prog_check.h" #include "ir_helper.h" @@ -16,16 +18,15 @@ struct ir_insn_cg_extra *init_insn_cg(struct ir_insn *insn) { } else { extra->dst = insn; } - extra->adj = INIT_ARRAY(struct ir_insn *); - extra->allocated = 0; - extra->spilled = 0; - extra->alloc_reg = 0; - extra->translated = INIT_ARRAY(struct pre_ir_insn); - extra->gen = INIT_ARRAY(struct ir_insn *); - extra->kill = INIT_ARRAY(struct ir_insn *); - extra->in = INIT_ARRAY(struct ir_insn *); - extra->out = INIT_ARRAY(struct ir_insn *); - insn->user_data = extra; + extra->adj = INIT_ARRAY(struct ir_insn *); + extra->allocated = 0; + extra->spilled = 0; + extra->alloc_reg = 0; + extra->gen = INIT_ARRAY(struct ir_insn *); + extra->kill = INIT_ARRAY(struct ir_insn *); + extra->in = INIT_ARRAY(struct ir_insn *); + extra->out = INIT_ARRAY(struct ir_insn *); + insn->user_data = extra; return extra; } @@ -43,8 +44,9 @@ void init_cg(struct ir_function *fun) { } } - for (__u8 i = 0; i < MAX_FUNC_ARG; ++i) { - fun->cg_info.regs[i] = __malloc(sizeof(struct ir_insn)); + for (__u8 i = 0; i < MAX_BPF_REG; ++i) { + fun->cg_info.regs[i] = __malloc(sizeof(struct ir_insn)); + // Those should be read-only struct ir_insn *insn = fun->cg_info.regs[i]; insn->op = IR_INSN_REG; insn->parent_bb = NULL; @@ -53,13 +55,15 @@ void init_cg(struct ir_function *fun) { struct ir_insn_cg_extra *extra = init_insn_cg(insn); extra->alloc_reg = i; extra->dst = insn; + // Pre-colored registers are allocated + extra->allocated = 1; + extra->spilled = 0; } } void free_insn_cg(struct ir_insn *insn) { struct ir_insn_cg_extra *extra = insn_cg(insn); array_free(&extra->adj); - array_free(&extra->translated); array_free(&extra->gen); array_free(&extra->kill); array_free(&extra->in); @@ -81,7 +85,7 @@ void free_cg_res(struct ir_function *fun) { } } - for (__u8 i = 0; i < MAX_FUNC_ARG; ++i) { + for (__u8 i = 0; i < MAX_BPF_REG; ++i) { struct ir_insn *insn = fun->cg_info.regs[i]; array_free(&insn->users); free_insn_cg(insn); @@ -89,6 +93,36 @@ void free_cg_res(struct ir_function *fun) { } } +void clean_insn_cg(struct ir_insn *insn) { + struct ir_insn_cg_extra *extra = insn_cg(insn); + array_clear(&extra->adj); + array_clear(&extra->gen); + array_clear(&extra->kill); + array_clear(&extra->in); + array_clear(&extra->out); +} + +void clean_cg(struct ir_function *fun) { + struct ir_basic_block **pos; + array_for(pos, fun->reachable_bbs) { + struct ir_basic_block *bb = *pos; + struct ir_insn *insn; + list_for_each_entry(insn, &bb->ir_insn_head, list_ptr) { + clean_insn_cg(insn); + struct ir_insn_cg_extra *extra = insn_cg(insn); + extra->allocated = 0; + extra->spilled = 0; + extra->alloc_reg = 0; + } + } + + for (__u8 i = 0; i < MAX_BPF_REG; ++i) { + struct ir_insn *insn = fun->cg_info.regs[i]; + clean_insn_cg(insn); + } + array_clear(&fun->cg_info.all_var); +} + struct ir_insn_cg_extra *insn_cg(struct ir_insn *insn) { return insn->user_data; } @@ -97,11 +131,25 @@ struct ir_insn *dst(struct ir_insn *insn) { return insn_cg(insn)->dst; } -void print_ir_prog_cg(struct ir_function *fun) { +void print_ir_prog_pre_cg(struct ir_function *fun) { printf("-----------------\n"); print_ir_prog_advanced(fun, NULL, NULL, NULL); } +void print_ir_prog_cg_dst(struct ir_function *fun) { + printf("-----------------\n"); + print_ir_prog_advanced(fun, NULL, NULL, print_ir_dst); +} + +void print_ir_prog_cg_alloc(struct ir_function *fun) { + printf("-----------------\n"); + print_ir_prog_advanced(fun, NULL, NULL, print_ir_alloc); +} + +void synthesize(struct ir_function *fun) { + // The last step, synthesizes the program +} + void code_gen(struct ir_function *fun) { // Preparation @@ -110,36 +158,72 @@ void code_gen(struct ir_function *fun) { // Step 2: Eliminate SSA to_cssa(fun); check_users(fun); - print_ir_prog_cg(fun); + print_ir_prog_pre_cg(fun); // Init CG, start real code generation init_cg(fun); explicit_reg(fun); // Still in SSA form, users are available - print_ir_prog_cg(fun); - printf("-----------------\n"); - print_ir_prog_advanced(fun, NULL, NULL, print_ir_dst); + print_ir_prog_pre_cg(fun); + print_ir_prog_cg_dst(fun); // SSA Destruction // users not available from now on - // remove_phi(fun); + remove_phi(fun); + print_ir_prog_cg_dst(fun); + + int need_spill = 1; + + while (need_spill) { + // Step 3: Liveness Analysis + liveness_analysis(fun); + + // Step 4: Conflict Analysis + conflict_analysis(fun); + print_interference_graph(fun); + printf("-------------\n"); + + // Step 5: Graph coloring + graph_coloring(fun); + coaleasing(fun); + print_interference_graph(fun); + print_ir_prog_cg_alloc(fun); + + // Step 6: Check if need to spill and spill + need_spill = check_need_spill(fun); + if (need_spill) { + // Still need to spill + printf("Need to spill...\n"); + clean_cg(fun); + } + } + + // Register allocation finished (All registers are fixed) + printf("Register allocation finished\n"); + print_ir_prog_cg_alloc(fun); + + // Step 7: Calculate stack size + calc_callee_num(fun); + calc_stack_size(fun); + + // Step 8: Shift raw stack operations + add_stack_offset(fun, fun->cg_info.stack_offset); + print_ir_prog_cg_alloc(fun); - // // Step 3: Liveness Analysis - // liveness_analysis(fun); + // Step 9: Spill callee saved registers + spill_callee(fun); + print_ir_prog_cg_alloc(fun); - // // Step 4: Conflict Analysis - // conflict_analysis(fun); - // print_interference_graph(fun); - // printf("-------------\n"); + // Step 10: Normalize + normalize(fun); + print_ir_prog_cg_alloc(fun); - // // Step 5: Graph coloring - // graph_coloring(fun); - // print_interference_graph(fun); - // print_ir_prog_advanced(fun, NULL, NULL, print_ir_alloc); + // Step 11: Direct Translation + // translate(fun); - // Register allocation finished + relocate(fun); - // Step 6: Direct Translation + synthesize(fun); // Free CG resources free_cg_res(fun); diff --git a/IR/ir_helper.c b/IR/ir_helper.c index 5171d79d..a615e4f5 100644 --- a/IR/ir_helper.c +++ b/IR/ir_helper.c @@ -153,33 +153,18 @@ void print_address_value(struct ir_address_value v) { void print_vr_type(enum ir_vr_type t) { switch (t) { - case IR_VR_TYPE_U8: + case IR_VR_TYPE_8: printf("u8"); break; - case IR_VR_TYPE_U64: + case IR_VR_TYPE_64: printf("u64"); break; - case IR_VR_TYPE_U16: + case IR_VR_TYPE_16: printf("u16"); break; - case IR_VR_TYPE_U32: + case IR_VR_TYPE_32: printf("u32"); break; - case IR_VR_TYPE_S8: - printf("s8"); - break; - case IR_VR_TYPE_S16: - printf("s16"); - break; - case IR_VR_TYPE_S32: - printf("s32"); - break; - case IR_VR_TYPE_S64: - printf("s64"); - break; - case IR_VR_TYPE_PTR: - printf("ptr"); - break; default: CRITICAL("Unknown VR type"); } @@ -266,7 +251,9 @@ void print_ir_insn_full(struct ir_insn *insn, void (*print_ir)(struct ir_insn *) break; case IR_INSN_RET: printf("ret "); - print_ir_value_full(insn->values[0], print_ir); + if (insn->value_num > 0) { + print_ir_value_full(insn->values[0], print_ir); + } break; case IR_INSN_JA: printf("ja "); @@ -453,9 +440,19 @@ void tag_ir(struct ir_function *fun) { clean_env(fun); } +void print_bb_succ(struct ir_basic_block *bb) { + printf("succs: "); + struct ir_basic_block **next; + array_for(next, bb->succs) { + print_bb_ptr(*next); + printf(" "); + } + printf("\n\n"); +} + void print_ir_prog(struct ir_function *fun) { tag_ir(fun); - print_ir_bb(fun->entry, NULL, NULL, NULL); + print_ir_bb(fun->entry, print_bb_succ, NULL, NULL); clean_tag(fun); } diff --git a/IR/ir_insn.c b/IR/ir_insn.c index 32df4cf0..1ea880bc 100644 --- a/IR/ir_insn.c +++ b/IR/ir_insn.c @@ -100,6 +100,11 @@ __u8 is_last_insn(struct ir_insn *insn) { return insn->parent_bb->ir_insn_head.prev == &insn->list_ptr; } +void erase_insn_raw(struct ir_insn *insn) { + list_del(&insn->list_ptr); + __free(insn); +} + void erase_insn(struct ir_insn *insn) { // TODO: remove users struct array operands = get_operands(insn); diff --git a/IR/passes/add_constraint_pass.c b/IR/passes/add_constraint_pass.c index 08d3dea9..646fe258 100644 --- a/IR/passes/add_constraint_pass.c +++ b/IR/passes/add_constraint_pass.c @@ -1,4 +1,3 @@ -#include "add_constraint_pass.h" #include "array.h" #include "bpf_ir.h" #include "constraint.h" diff --git a/IR/passes/add_counter_pass.c b/IR/passes/add_counter_pass.c index 49396156..2a0453f9 100644 --- a/IR/passes/add_counter_pass.c +++ b/IR/passes/add_counter_pass.c @@ -4,7 +4,7 @@ void add_counter(struct ir_function *fun) { struct ir_basic_block *entry = fun->entry; - struct ir_insn *alloc_insn = create_alloc_insn_bb(entry, IR_VR_TYPE_U64, INSERT_FRONT); + struct ir_insn *alloc_insn = create_alloc_insn_bb(entry, IR_VR_TYPE_64, INSERT_FRONT); struct ir_value val; val.type = IR_VALUE_CONSTANT; val.data.constant_d.type = IR_CONSTANT_U64; @@ -26,7 +26,7 @@ void add_counter(struct ir_function *fun) { } val.type = IR_VALUE_INSN; val.data.insn_d = alloc_insn; - struct ir_insn *load_insn = create_load_insn(last, IR_VR_TYPE_U64, val, INSERT_FRONT); + struct ir_insn *load_insn = create_load_insn(last, IR_VR_TYPE_64, val, INSERT_FRONT); struct ir_value val1; val1.type = IR_VALUE_CONSTANT; val1.data.constant_d.type = IR_CONSTANT_U64; diff --git a/IR/passes/cut_bb_pass.c b/IR/passes/cut_bb_pass.c index 63e61786..de91eb67 100644 --- a/IR/passes/cut_bb_pass.c +++ b/IR/passes/cut_bb_pass.c @@ -1,7 +1,7 @@ -#include "cut_bb_pass.h" #include "array.h" #include "bpf_ir.h" #include "dbg.h" +#include "passes.h" #include "list.h" void cut_bb(struct ir_function *fun) { diff --git a/IR/passes/end_bb.c b/IR/passes/end_bb.c new file mode 100644 index 00000000..ae737daa --- /dev/null +++ b/IR/passes/end_bb.c @@ -0,0 +1,12 @@ +#include "array.h" +#include "ir_fun.h" + +void gen_end_bbs(struct ir_function *fun) { + struct ir_basic_block **pos; + array_for(pos, fun->reachable_bbs) { + struct ir_basic_block *bb = *pos; + if (bb->succs.num_elem == 0) { + array_push(&fun->end_bbs, &bb); + } + } +} diff --git a/IR/passes/phi_pass.c b/IR/passes/phi_pass.c index b5f1eaf6..70ccf2f2 100644 --- a/IR/passes/phi_pass.c +++ b/IR/passes/phi_pass.c @@ -1,10 +1,10 @@ -#include "phi_pass.h" #include #include "array.h" #include "bpf_ir.h" #include "dbg.h" #include "ir_insn.h" #include "list.h" +#include "passes.h" void try_remove_trivial_phi(struct ir_insn *phi) { if (phi->op != IR_INSN_PHI) { diff --git a/IR/passes/reachable_bb.c b/IR/passes/reachable_bb.c index 32416c38..65fa20e5 100644 --- a/IR/passes/reachable_bb.c +++ b/IR/passes/reachable_bb.c @@ -1,6 +1,6 @@ - #include "array.h" -#include "reachable_bb.h" +#include "dbg.h" +#include "passes.h" void add_reach(struct ir_function *fun, struct ir_basic_block *bb) { if (bb->_visited) { @@ -10,13 +10,21 @@ void add_reach(struct ir_function *fun, struct ir_basic_block *bb) { array_push(&fun->reachable_bbs, &bb); struct ir_basic_block **succ; + __u8 i = 0; array_for(succ, bb->succs) { + if (i == 0) { + i = 1; + // Check if visited + if ((*succ)->_visited) { + CRITICAL("Loop BB detected"); + } + } add_reach(fun, *succ); } } void gen_reachable_bbs(struct ir_function *fun) { array_free(&fun->reachable_bbs); - fun->reachable_bbs = array_init(sizeof(struct ir_basic_block *)); + fun->reachable_bbs = INIT_ARRAY(struct ir_basic_block *); add_reach(fun, fun->entry); } diff --git a/docs/IR.md b/docs/IR.md index eae582af..d7ba1a2b 100644 --- a/docs/IR.md +++ b/docs/IR.md @@ -189,3 +189,19 @@ enum insert_position { INSERT_FRONT, }; ``` + +# BPF ISA + +BPF has 10 general purpose registers and a read-only frame pointer register, all of which are 64-bits wide. + +The BPF calling convention is defined as: + +- R0: return value from function calls, and exit value for BPF programs +- R1 - R5: arguments for function calls +- R6 - R9: callee saved registers that function calls will preserve +- R10: read-only frame pointer to access stack + +R0 - R5 are scratch registers and BPF programs needs to spill/fill them if necessary across calls. + +The BPF program needs to store the return value into register R0 before doing an EXIT. +