diff --git a/IR/CMakeLists.txt b/IR/CMakeLists.txt index 8fc5001d..fac908ff 100644 --- a/IR/CMakeLists.txt +++ b/IR/CMakeLists.txt @@ -10,11 +10,18 @@ add_executable( ir_helper.c ir_bb.c ir_insn.c - phi_pass.c - reachable_bb.c - add_counter_pass.c - add_stack_offset.c - add_constraint_pass.c + passes/phi_pass.c + passes/reachable_bb.c + passes/add_counter_pass.c + passes/add_constraint_pass.c + passes/cut_bb_pass.c + aux/add_stack_offset.c + aux/live_variable.c + aux/prog_check.c + aux/eliminate_ssa.c + aux/conflict_analysis.c + aux/graph_coloring.c + ir_code_gen.c ) add_executable(probe probe.c read.c array.c) diff --git a/IR/array.c b/IR/array.c index 5997589d..f74bbcd7 100644 --- a/IR/array.c +++ b/IR/array.c @@ -65,6 +65,23 @@ void array_erase(struct array *arr, size_t idx) { arr->num_elem--; } +void array_clear(struct array *arr) { + __free(arr->data); + arr->data = __malloc(arr->elem_size * 4); + arr->max_elem = 4; + arr->num_elem = 0; +} + +struct array array_clone(struct array *arr) { + struct array res; + res.num_elem = arr->num_elem; + res.max_elem = arr->max_elem; + res.elem_size = arr->elem_size; + res.data = __malloc(arr->max_elem * arr->elem_size); + memcpy(res.data, arr->data, arr->num_elem * arr->elem_size); + return res; +} + void array_free(struct array *arr) { __free(arr->data); *arr = array_null(); diff --git a/IR/add_stack_offset.c b/IR/aux/add_stack_offset.c similarity index 95% rename from IR/add_stack_offset.c rename to IR/aux/add_stack_offset.c index 8cdc9eeb..387215e2 100644 --- a/IR/add_stack_offset.c +++ b/IR/aux/add_stack_offset.c @@ -13,7 +13,7 @@ void add_stack_offset(struct ir_function *fun, __s16 offset) { insn->addr_val.offset += offset; continue; } - struct array value_uses = find_value_uses(insn); + struct array value_uses = get_operands(insn); struct ir_value **pos2; array_for(pos2, value_uses) { struct ir_value *val = *pos2; diff --git a/IR/aux/conflict_analysis.c b/IR/aux/conflict_analysis.c new file mode 100644 index 00000000..0f25638d --- /dev/null +++ b/IR/aux/conflict_analysis.c @@ -0,0 +1,82 @@ +#include +#include "array.h" +#include "bpf_ir.h" +#include "code_gen.h" +#include "dbg.h" +#include "ir_helper.h" + +int is_final(struct ir_insn *v1) { + return v1 == dst(v1); +} + +void build_conflict(struct ir_insn *v1, struct ir_insn *v2) { + if (!is_final(v1) || !is_final(v2)) { + CRITICAL("Can only build conflict on final values"); + } + if (v1 == v2) { + return; + } + array_push_unique(&insn_cg(v1)->adj, &v2); + array_push_unique(&insn_cg(v2)->adj, &v1); +} + +void print_interference_graph(struct ir_function *fun) { + // Tag the IR to have the actual number to print + tag_ir(fun); + struct ir_insn **pos; + array_for(pos, fun->cg_info.all_var) { + struct ir_insn *insn = *pos; + if (!is_final(insn)) { + // Not final value, give up + CRITICAL("Not Final Value!"); + } + struct ir_insn_cg_extra *extra = insn_cg(insn); + if (extra->allocated) { + printf("%%%zu(", insn->_insn_id); + if (extra->spilled) { + printf("sp-%zu", extra->spilled * 8); + } else { + printf("r%u", extra->alloc_reg); + } + printf("): "); + } else { + printf("%%%zu: ", insn->_insn_id); + } + struct ir_insn **pos2; + array_for(pos2, insn_cg(insn)->adj) { + struct ir_insn *adj_insn = *pos2; + if (!is_final(insn)) { + // Not final value, give up + CRITICAL("Not Final Value!"); + } + printf("%%%zu, ", adj_insn->_insn_id); + } + printf("\n"); + } +} + +void conflict_analysis(struct ir_function *fun) { + // Basic conflict: + // For every x in KILL set, x is conflict with every element in OUT set. + + struct ir_basic_block **pos; + // For each BB + array_for(pos, fun->reachable_bbs) { + struct ir_basic_block *bb = *pos; + struct ir_bb_cg_extra *bb_cg = bb->user_data; + struct ir_insn **pos2; + array_for(pos2, bb_cg->out) { + struct ir_insn *insn_dst = dst(*pos2); + // Add the variable to the "all variable set" + array_push_unique(&fun->cg_info.all_var, &insn_dst); + } + array_for(pos2, bb_cg->kill) { + struct ir_insn *insn_dst = dst(*pos2); + array_push_unique(&fun->cg_info.all_var, &insn_dst); + struct ir_insn **pos3; + array_for(pos3, bb_cg->out) { + build_conflict(insn_dst, dst(*pos3)); + } + } + } +} diff --git a/IR/aux/eliminate_ssa.c b/IR/aux/eliminate_ssa.c new file mode 100644 index 00000000..bdfcdba1 --- /dev/null +++ b/IR/aux/eliminate_ssa.c @@ -0,0 +1,90 @@ +#include "code_gen.h" +#include "array.h" +#include "bpf_ir.h" +#include "dbg.h" +#include "ir_fun.h" +#include "ir_insn.h" + +// Convert from TSSA to CSSA +// Using "Method I" in paper "Translating Out of Static Single Assignment Form" +void to_cssa(struct ir_function *fun) { + struct array phi_insns = INIT_ARRAY(struct ir_insn *); + + struct ir_basic_block **pos; + array_for(pos, fun->reachable_bbs) { + struct ir_basic_block *bb = *pos; + struct ir_insn *insn; + list_for_each_entry(insn, &bb->ir_insn_head, list_ptr) { + if (insn->op == IR_INSN_PHI) { + array_push(&phi_insns, &insn); + } else { + break; + } + } + } + + struct ir_insn **pos2; + array_for(pos2, phi_insns) { + struct ir_insn *insn = *pos2; + // Create the moved PHI insn + struct ir_insn *new_phi = create_phi_insn(insn, INSERT_FRONT); + struct phi_value *pos3; + array_for(pos3, insn->phi) { + struct ir_insn *new_insn = + create_assign_insn_bb(pos3->bb, pos3->value, INSERT_BACK_BEFORE_JMP); + // Remove use + val_remove_user(pos3->value, insn); + phi_add_operand(new_phi, pos3->bb, ir_value_insn(new_insn)); + } + + array_free(&insn->phi); + insn->op = IR_INSN_ASSIGN; + struct ir_value val = ir_value_insn(new_phi); + insn->values[0] = val; + insn->value_num = 1; + val_add_user(val, insn); + } + + array_free(&phi_insns); +} + +// Remove PHI insn +void remove_phi(struct ir_function *fun) { + // dst information ready + struct array phi_insns = INIT_ARRAY(struct ir_insn *); + + struct ir_basic_block **pos; + array_for(pos, fun->reachable_bbs) { + struct ir_basic_block *bb = *pos; + struct ir_insn *insn; + list_for_each_entry(insn, &bb->ir_insn_head, list_ptr) { + if (insn->op == IR_INSN_PHI) { + array_push(&phi_insns, &insn); + } else { + break; + } + } + } + + struct ir_insn **pos2; + array_for(pos2, phi_insns) { + struct ir_insn *insn = *pos2; + struct ir_insn *repr = NULL; + struct phi_value *pos3; + array_for(pos3, insn->phi) { + if (!repr) { + repr = pos3->value.data.insn_d; + } else { + insn_cg(pos3->value.data.insn_d)->dst = repr; + } + } + if (!repr) { + CRITICAL("Empty Phi not removed!"); + } + + replace_all_usage(insn, ir_value_insn(repr)); + erase_insn(insn); + } + + array_free(&phi_insns); +} diff --git a/IR/aux/graph_coloring.c b/IR/aux/graph_coloring.c new file mode 100644 index 00000000..e81c420d --- /dev/null +++ b/IR/aux/graph_coloring.c @@ -0,0 +1,75 @@ +#include +#include +#include +#include +#include "array.h" +#include "bpf_ir.h" +#include "code_gen.h" +#include "dbg.h" +#include "ir_helper.h" + +int compare_insn(const void *a, const void *b) { + struct ir_insn *ap = *(struct ir_insn **)a; + struct ir_insn *bp = *(struct ir_insn **)b; + return ap->_insn_id > bp->_insn_id; +} + +void graph_coloring(struct ir_function *fun) { + // Using the Chaitin's Algorithm + // Using the simple dominance heuristic (Simple traversal of BB) + tag_ir(fun); + struct array *all_var = &fun->cg_info.all_var; + qsort(all_var->data, all_var->num_elem, all_var->elem_size, &compare_insn); + // all_var is now PEO + struct ir_insn **pos; + array_for(pos, (*all_var)) { + // Allocate register for *pos + struct ir_insn *insn = *pos; + struct ir_insn_cg_extra *extra = insn_cg(insn); + struct ir_insn **pos2; + + int used_reg[__MAX_BPF_REG] = {0}; + struct array used_spill = INIT_ARRAY(size_t); + array_for(pos2, extra->adj) { + struct ir_insn *insn2 = *pos2; // Adj instruction + struct ir_insn_cg_extra *extra2 = insn_cg(insn2); + if (extra2->allocated) { + if (extra2->spilled) { + array_push_unique(&used_spill, &extra2->spilled); + } else { + used_reg[extra2->alloc_reg] = 1; + } + } + } + __u8 need_spill = 1; + for (__u8 i = 0; i < __MAX_BPF_REG; i++) { + if (!used_reg[i]) { + extra->allocated = 1; + printf("Allocate r%u for %zu\n", i, insn->_insn_id); + extra->alloc_reg = i; + need_spill = 0; + break; + } + } + if (need_spill) { + size_t sp = 1; + while (1) { + __u8 found = 1; + size_t *pos3; + array_for(pos3, used_spill) { + if (*pos3 == sp) { + sp++; + found = 0; + break; + } + } + if (found) { + extra->allocated = 1; + extra->spilled = sp; + break; + } + } + } + array_free(&used_spill); + } +} diff --git a/IR/aux/live_variable.c b/IR/aux/live_variable.c new file mode 100644 index 00000000..f73f731c --- /dev/null +++ b/IR/aux/live_variable.c @@ -0,0 +1,161 @@ +// Live variable analysis +#include +#include +#include "array.h" +#include "bpf_ir.h" +#include "code_gen.h" +#include "dbg.h" +#include "ir_fun.h" +#include "ir_insn.h" +#include "list.h" +#include "ir_helper.h" + +void array_erase_elem(struct array *arr, struct ir_insn *insn) { + // Remove insn from arr + for (size_t i = 0; i < arr->num_elem; ++i) { + struct ir_insn *pos = ((struct ir_insn **)(arr->data))[i]; + if (pos == insn) { + array_erase(arr, i); + return; + } + } +} + +void gen_kill(struct ir_function *fun) { + struct ir_basic_block **pos; + // For each BB + array_for(pos, fun->reachable_bbs) { + struct ir_basic_block *bb = *pos; + struct ir_bb_cg_extra *bb_cg = bb->user_data; + struct ir_insn *pos2; + // For each operation in reverse + list_for_each_entry_reverse(pos2, &bb->ir_insn_head, list_ptr) { + struct ir_insn *insn_dst = dst(pos2); + if (!is_void(pos2)) { + array_erase_elem(&bb_cg->gen, insn_dst); + array_push_unique(&bb_cg->kill, &insn_dst); + } + struct array value_uses = get_operands(pos2); + struct ir_value **pos3; + array_for(pos3, value_uses) { + struct ir_value *val = *pos3; + if (val->type == IR_VALUE_INSN) { + struct ir_insn *insn = dst(val->data.insn_d); + array_push_unique(&bb_cg->gen, &insn); + array_erase_elem(&bb_cg->kill, insn); + } + } + } + } +} + +int array_contains(struct array *arr, struct ir_insn *insn) { + struct ir_insn **pos; + array_for(pos, (*arr)) { + if (*pos == insn) { + return 1; + } + } + return 0; +} + +struct array array_delta(struct array *a, struct array *b) { + struct array res = INIT_ARRAY(struct ir_insn *); + struct ir_insn **pos; + array_for(pos, (*a)) { + struct ir_insn *insn = *pos; + if (!array_contains(b, insn)) { + array_push(&res, &insn); + } + } + return res; +} + +void merge_array(struct array *a, struct array *b) { + struct ir_insn **pos; + array_for(pos, (*b)) { + struct ir_insn *insn = *pos; + array_push_unique(a, &insn); + } +} + +int equal_set(struct array *a, struct array *b) { + if (a->num_elem != b->num_elem) { + return 0; + } + struct ir_insn **pos; + array_for(pos, (*a)) { + struct ir_insn *insn = *pos; + if (!array_contains(b, insn)) { + return 0; + } + } + return 1; +} + +void in_out(struct ir_function *fun) { + int change = 1; + // For each BB + while (change) { + change = 0; + struct ir_basic_block **pos; + array_for(pos, fun->reachable_bbs) { + struct ir_basic_block *bb = *pos; + struct ir_bb_cg_extra *bb_cg = bb->user_data; + struct array old_in = bb_cg->in; + struct ir_basic_block **pos2; + array_clear(&bb_cg->out); + array_for(pos2, bb->succs) { + struct ir_bb_cg_extra *bb_cg2 = (*pos2)->user_data; + merge_array(&bb_cg->out, &bb_cg2->in); + } + struct array out_kill_delta = array_delta(&bb_cg->out, &bb_cg->kill); + bb_cg->in = array_clone(&bb_cg->gen); + merge_array(&bb_cg->in, &out_kill_delta); + // Check for change + if (!equal_set(&bb_cg->in, &old_in)) { + change = 1; + } + // Collect grabage + array_free(&out_kill_delta); + array_free(&old_in); + } + } +} + +void print_bb_extra(struct ir_basic_block *bb) { + struct ir_bb_cg_extra *bb_cg = bb->user_data; + if (bb->user_data == NULL) { + CRITICAL("NULL user data"); + } + printf("--\nGen:"); + struct ir_insn **pos; + array_for(pos, bb_cg->gen) { + struct ir_insn *insn = *pos; + printf(" %%%zu", insn->_insn_id); + } + printf("\nKill:"); + array_for(pos, bb_cg->kill) { + struct ir_insn *insn = *pos; + printf(" %%%zu", insn->_insn_id); + } + printf("\nIn:"); + array_for(pos, bb_cg->in) { + struct ir_insn *insn = *pos; + printf(" %%%zu", insn->_insn_id); + } + printf("\nOut:"); + array_for(pos, bb_cg->out) { + struct ir_insn *insn = *pos; + printf(" %%%zu", insn->_insn_id); + } + printf("\n-------------\n"); +} + +void liveness_analysis(struct ir_function *fun) { + // TODO: Encode Calling convention into GEN KILL + gen_kill(fun); + in_out(fun); + printf("--------------\n"); + print_ir_prog_advanced(fun, print_bb_extra, print_ir_dst); +} diff --git a/IR/aux/prog_check.c b/IR/aux/prog_check.c new file mode 100644 index 00000000..87925ede --- /dev/null +++ b/IR/aux/prog_check.c @@ -0,0 +1,30 @@ +#include "prog_check.h" +#include "array.h" +#include "bpf_ir.h" +#include "dbg.h" +#include "list.h" + +// Check if the PHI nodes are at the beginning of the BB +void check_phi(struct ir_function *fun) { + struct ir_basic_block **pos; + array_for(pos, fun->reachable_bbs) { + struct ir_basic_block *bb = *pos; + int all_phi = 1; + struct ir_insn *insn; + list_for_each_entry(insn, &bb->ir_insn_head, list_ptr) { + if (insn->op == IR_INSN_PHI) { + if (!all_phi) { + // Error! + CRITICAL("Phi node not at the beginning of a BB"); + } + } else { + all_phi = 0; + } + } + } +} + +// Check that the program is valid and able to be compiled +void prog_check(struct ir_function *fun) { + check_phi(fun); +} diff --git a/IR/bpf_ir.c b/IR/bpf_ir.c index d92f6440..93781e17 100644 --- a/IR/bpf_ir.c +++ b/IR/bpf_ir.c @@ -5,10 +5,14 @@ #include #include #include "add_stack_offset.h" +#include "ir_helper.h" #include "array.h" +#include "code_gen.h" +#include "ir_fun.h" #include "list.h" #include "dbg.h" #include "passes.h" +#include "reachable_bb.h" #include "read.h" // TODO: Change this to real function @@ -79,16 +83,23 @@ void init_entrance_info(struct array *bb_entrances, size_t entrance_pos) { array_push(bb_entrances, &new_bb); } +struct ir_basic_block *init_ir_bb_raw() { + struct ir_basic_block *new_bb = __malloc(sizeof(struct ir_basic_block)); + INIT_LIST_HEAD(&new_bb->ir_insn_head); + new_bb->user_data = NULL; + new_bb->preds = INIT_ARRAY(struct ir_basic_block *); + new_bb->succs = INIT_ARRAY(struct ir_basic_block *); + new_bb->users = INIT_ARRAY(struct ir_insn *); + return new_bb; +} + void init_ir_bb(struct pre_ir_basic_block *bb) { - bb->ir_bb = __malloc(sizeof(struct ir_basic_block)); + bb->ir_bb = init_ir_bb_raw(); bb->ir_bb->_visited = 0; bb->ir_bb->user_data = bb; for (__u8 i = 0; i < MAX_BPF_REG; ++i) { bb->incompletePhis[i] = NULL; } - INIT_LIST_HEAD(&bb->ir_bb->ir_insn_head); - bb->ir_bb->preds = array_init(sizeof(struct ir_basic_block *)); - bb->ir_bb->succs = array_init(sizeof(struct ir_basic_block *)); } struct bb_info gen_bb(struct bpf_insn *insns, size_t len) { @@ -314,6 +325,7 @@ struct ir_insn *add_phi_operands(struct ssa_transform_env *env, __u8 reg, struct phi.bb = pred; phi.value = read_variable(env, reg, (struct pre_ir_basic_block *)pred->user_data); add_user(env, insn, phi.value); + array_push(&pred->users, &insn); array_push(&insn->phi, &phi); } return insn; @@ -326,7 +338,7 @@ struct ir_value read_variable_recursive(struct ssa_transform_env *env, __u8 reg, // Incomplete CFG struct ir_insn *new_insn = create_insn_front(bb->ir_bb); new_insn->op = IR_INSN_PHI; - new_insn->phi = array_init(sizeof(struct phi_value)); + new_insn->phi = INIT_ARRAY(struct phi_value); bb->incompletePhis[reg] = new_insn; val.type = IR_VALUE_INSN; val.data.insn_d = new_insn; @@ -335,7 +347,7 @@ struct ir_value read_variable_recursive(struct ssa_transform_env *env, __u8 reg, } else { struct ir_insn *new_insn = create_insn_front(bb->ir_bb); new_insn->op = IR_INSN_PHI; - new_insn->phi = array_init(sizeof(struct phi_value)); + new_insn->phi = INIT_ARRAY(struct phi_value); val.type = IR_VALUE_INSN; val.data.insn_d = new_insn; write_variable(env, reg, bb, val); @@ -370,7 +382,7 @@ struct ir_value read_variable(struct ssa_transform_env *env, __u8 reg, struct ir_insn *create_insn() { struct ir_insn *insn = __malloc(sizeof(struct ir_insn)); - insn->users = array_init(sizeof(struct ir_insn *)); + insn->users = INIT_ARRAY(struct ir_insn *); return insn; } @@ -418,6 +430,10 @@ enum ir_vr_type to_ir_ld_u(__u8 size) { } } +struct ir_value ir_value_insn(struct ir_insn *insn) { + return (struct ir_value){.type = IR_VALUE_INSN, .data.insn_d = insn}; +} + // User uses val void add_user(struct ssa_transform_env *env, struct ir_insn *user, struct ir_value val) { if (val.type == IR_VALUE_INSN) { @@ -634,6 +650,7 @@ void transform_bb(struct ssa_transform_env *env, struct pre_ir_basic_block *bb) new_insn->op = IR_INSN_JA; size_t pos = insn.pos + insn.off + 1; new_insn->bb1 = get_ir_bb_from_position(env, pos); + array_push(&new_insn->bb1->users, &new_insn); } else if (BPF_OP(code) == BPF_EXIT) { // Exit struct ir_insn *new_insn = create_insn_back(bb->ir_bb); @@ -652,6 +669,8 @@ void transform_bb(struct ssa_transform_env *env, struct pre_ir_basic_block *bb) size_t pos = insn.pos + insn.off + 1; new_insn->bb1 = get_ir_bb_from_position(env, insn.pos + 1); new_insn->bb2 = get_ir_bb_from_position(env, pos); + array_push(&new_insn->bb1->users, &new_insn); + array_push(&new_insn->bb2->users, &new_insn); } else if (BPF_OP(code) == BPF_JLT) { // PC += offset if dst < src struct ir_insn *new_insn = create_insn_back(bb->ir_bb); @@ -664,6 +683,8 @@ void transform_bb(struct ssa_transform_env *env, struct pre_ir_basic_block *bb) size_t pos = insn.pos + insn.off + 1; new_insn->bb1 = get_ir_bb_from_position(env, insn.pos + 1); new_insn->bb2 = get_ir_bb_from_position(env, pos); + array_push(&new_insn->bb1->users, &new_insn); + array_push(&new_insn->bb2->users, &new_insn); } else if (BPF_OP(code) == BPF_JLE) { // PC += offset if dst <= src struct ir_insn *new_insn = create_insn_back(bb->ir_bb); @@ -676,6 +697,8 @@ void transform_bb(struct ssa_transform_env *env, struct pre_ir_basic_block *bb) size_t pos = insn.pos + insn.off + 1; new_insn->bb1 = get_ir_bb_from_position(env, insn.pos + 1); new_insn->bb2 = get_ir_bb_from_position(env, pos); + array_push(&new_insn->bb1->users, &new_insn); + array_push(&new_insn->bb2->users, &new_insn); } else if (BPF_OP(code) == BPF_JGT) { // PC += offset if dst > src struct ir_insn *new_insn = create_insn_back(bb->ir_bb); @@ -688,6 +711,8 @@ void transform_bb(struct ssa_transform_env *env, struct pre_ir_basic_block *bb) size_t pos = insn.pos + insn.off + 1; new_insn->bb1 = get_ir_bb_from_position(env, insn.pos + 1); new_insn->bb2 = get_ir_bb_from_position(env, pos); + array_push(&new_insn->bb1->users, &new_insn); + array_push(&new_insn->bb2->users, &new_insn); } else if (BPF_OP(code) == BPF_JGE) { // PC += offset if dst >= src struct ir_insn *new_insn = create_insn_back(bb->ir_bb); @@ -700,6 +725,8 @@ void transform_bb(struct ssa_transform_env *env, struct pre_ir_basic_block *bb) size_t pos = insn.pos + insn.off + 1; new_insn->bb1 = get_ir_bb_from_position(env, insn.pos + 1); new_insn->bb2 = get_ir_bb_from_position(env, pos); + array_push(&new_insn->bb1->users, &new_insn); + array_push(&new_insn->bb2->users, &new_insn); } else if (BPF_OP(code) == BPF_JNE) { // PC += offset if dst != src struct ir_insn *new_insn = create_insn_back(bb->ir_bb); @@ -712,6 +739,8 @@ void transform_bb(struct ssa_transform_env *env, struct pre_ir_basic_block *bb) size_t pos = insn.pos + insn.off + 1; new_insn->bb1 = get_ir_bb_from_position(env, insn.pos + 1); new_insn->bb2 = get_ir_bb_from_position(env, pos); + array_push(&new_insn->bb1->users, &new_insn); + array_push(&new_insn->bb2->users, &new_insn); } else if (BPF_OP(code) == BPF_CALL) { // imm is the function id struct ir_insn *new_insn = create_insn_back(bb->ir_bb); @@ -719,18 +748,16 @@ void transform_bb(struct ssa_transform_env *env, struct pre_ir_basic_block *bb) new_insn->fid = insn.imm; if (insn.imm < 0) { printf("Not supported function call\n"); - new_insn->f_arg_num = 0; new_insn->value_num = 0; } else { - new_insn->f_arg_num = helper_func_arg_num[insn.imm]; - if (new_insn->f_arg_num > MAX_FUNC_ARG) { + new_insn->value_num = helper_func_arg_num[insn.imm]; + if (new_insn->value_num > MAX_FUNC_ARG) { CRITICAL("Too many arguments"); } - for (size_t j = 0; j < new_insn->f_arg_num; ++j) { + for (size_t j = 0; j < new_insn->value_num; ++j) { new_insn->values[j] = read_variable(env, BPF_REG_1 + j, bb); add_user(env, new_insn, new_insn->values[j]); } - new_insn->value_num = new_insn->f_arg_num; } struct ir_value new_val; @@ -762,6 +789,7 @@ void free_function(struct ir_function *fun) { array_free(&bb->preds); array_free(&bb->succs); + array_free(&bb->users); // Free the instructions struct ir_insn *pos, *n; list_for_each_entry_safe(pos, n, &bb->ir_insn_head, list_ptr) { @@ -778,10 +806,12 @@ void free_function(struct ir_function *fun) { struct ir_function gen_function(struct ssa_transform_env *env) { struct ir_function fun; - fun.arg_num = 1; - fun.entry = env->info.entry->ir_bb; - fun.sp_users = env->sp_users; - fun.all_bbs = array_init(sizeof(struct ir_basic_block *)); + fun.arg_num = 1; + fun.entry = env->info.entry->ir_bb; + fun.sp_users = env->sp_users; + fun.all_bbs = INIT_ARRAY(struct ir_basic_block *); + fun.reachable_bbs = INIT_ARRAY(struct ir_basic_block *); + fun.cg_info.all_var = INIT_ARRAY(struct ir_insn *); for (size_t i = 0; i < MAX_BPF_REG; ++i) { struct array *currentDef = &env->currentDef[i]; array_free(currentDef); @@ -819,8 +849,11 @@ __u8 ir_value_equal(struct ir_value a, struct ir_value b) { void run_passes(struct ir_function *fun) { for (size_t i = 0; i < sizeof(passes) / sizeof(passes[0]); ++i) { - clean_env(fun); + clean_env_all(fun); + gen_reachable_bbs(fun); passes[i](fun); + printf("--------------------\n"); + print_ir_prog(fun); } } @@ -841,13 +874,16 @@ void run(struct bpf_insn *insns, size_t len) { run_passes(&fun); // End IR manipulation - printf("--------------------\n"); - print_ir_prog(&fun); + // printf("--------------------\n"); + // print_ir_prog(&fun); // Test - add_stack_offset(&fun, -8); + // add_stack_offset(&fun, -8); + // printf("--------------------\n"); + // print_ir_prog(&fun); + printf("--------------------\n"); - print_ir_prog(&fun); + code_gen(&fun); // Free the memory free_function(&fun); diff --git a/IR/include/array.h b/IR/include/array.h index f56dc939..81cd5987 100644 --- a/IR/include/array.h +++ b/IR/include/array.h @@ -19,6 +19,8 @@ struct array array_null(); void array_erase(struct array *arr, size_t idx); void *__malloc(size_t size); void __free(void *ptr); +void array_clear(struct array *arr); +struct array array_clone(struct array *arr); #define array_for(pos, arr) \ for (pos = ((typeof(pos))(arr.data)); pos < (typeof(pos))(arr.data) + arr.num_elem; pos++) diff --git a/IR/include/bpf_ir.h b/IR/include/bpf_ir.h index e736315e..fdcee64c 100644 --- a/IR/include/bpf_ir.h +++ b/IR/include/bpf_ir.h @@ -56,7 +56,7 @@ enum ir_value_type { }; /** - VALUE = CONSTANT | INSN | FUNCTIONARG | STACK_PTR + VALUE = CONSTANT | INSN | FUNCTIONARG "r1 = constant" pattern will use `CONSTANT` which will not be added to BB. */ @@ -69,6 +69,8 @@ struct ir_value { enum ir_value_type type; }; +struct ir_value ir_value_insn(struct ir_insn *); + /** Value plus an offset */ @@ -125,13 +127,15 @@ enum ir_insn_type { IR_INSN_JLE, IR_INSN_JNE, // PHI - IR_INSN_PHI + IR_INSN_PHI, + // Code-gen instructions + IR_INSN_ASSIGN }; /** INSN = ALLOC - | STORE , + | STORE , | LOAD | STORERAW , | LOADRAW @@ -140,7 +144,7 @@ enum ir_insn_type { | MUL , | LSH , | MOD , - | CALL + | CALL | RET | JA | JEQ , , , @@ -150,14 +154,16 @@ enum ir_insn_type { | JLE , , , | JNE , , , | PHI - + (For code gen usage) + | ASSIGN + Note. must be the next basic block. */ struct ir_insn { struct ir_value values[MAX_FUNC_ARG]; __u8 value_num; - // Used in ALLOC instructions + // Used in ALLOC and instructions enum ir_vr_type vr_type; // Used in RAW instructions @@ -170,8 +176,8 @@ struct ir_insn { // Array of phi_value struct array phi; - __s32 fid; - __u32 f_arg_num; + __s32 fid; + // __u32 f_arg_num; enum ir_insn_type op; // Linked list @@ -241,6 +247,9 @@ struct ir_basic_block { __u8 _visited; size_t _id; void *user_data; + + // Array of struct ir_insn * + struct array users; }; /** @@ -304,14 +313,8 @@ struct ir_insn *create_insn_front(struct ir_basic_block *bb); void add_user(struct ssa_transform_env *env, struct ir_insn *user, struct ir_value val); -void print_ir_insn(struct ir_insn *); - -void print_ir_value(struct ir_value v); - -void print_raw_ir_insn(struct ir_insn *insn); - -void print_raw_ir_bb(struct ir_basic_block *bb); - __u8 ir_value_equal(struct ir_value a, struct ir_value b); +struct ir_basic_block *init_ir_bb_raw(); + #endif diff --git a/IR/include/code_gen.h b/IR/include/code_gen.h new file mode 100644 index 00000000..34d478ab --- /dev/null +++ b/IR/include/code_gen.h @@ -0,0 +1,57 @@ +#ifndef __BPF_IR_CODE_GEN_H__ +#define __BPF_IR_CODE_GEN_H__ + +#include "bpf_ir.h" +#include "ir_fun.h" + +void code_gen(struct ir_function *fun); + +// Extra information needed for code gen +struct ir_bb_cg_extra { + // Liveness analysis + struct array in; + struct array out; + struct array gen; + struct array kill; +}; + +struct ir_insn_cg_extra { + // Destination (Not in SSA form anymore) + struct ir_insn *dst; + + // Adj list in interference graph + // Array of struct ir_insn* + struct array adj; + + __u8 allocated; + + // When allocating register, whether dst will be spilled + // 0: Not spilled + // 1: Spilled on stack position 1 + // etc. + size_t spilled; + + // Valid if spilled == 0 + // Valid number: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + __u8 alloc_reg; +}; + +struct ir_insn_cg_extra *insn_cg(struct ir_insn *insn); + +struct ir_insn *dst(struct ir_insn *insn); + +void to_cssa(struct ir_function *fun); + +void remove_phi(struct ir_function *fun); + +void print_ir_prog_cg(struct ir_function *fun); + +void liveness_analysis(struct ir_function *fun); + +void conflict_analysis(struct ir_function *fun); + +void print_interference_graph(struct ir_function *fun); + +void graph_coloring(struct ir_function *fun); + +#endif diff --git a/IR/include/cut_bb_pass.h b/IR/include/cut_bb_pass.h new file mode 100644 index 00000000..8a05cb63 --- /dev/null +++ b/IR/include/cut_bb_pass.h @@ -0,0 +1,8 @@ +#ifndef __BPF_IR_CUT_BB_PASS_H__ +#define __BPF_IR_CUT_BB_PASS_H__ + +#include "ir_fun.h" + +void cut_bb(struct ir_function *fun); + +#endif diff --git a/IR/include/ir_fun.h b/IR/include/ir_fun.h index f0434b3f..9ade35de 100644 --- a/IR/include/ir_fun.h +++ b/IR/include/ir_fun.h @@ -3,6 +3,12 @@ #include "bpf_ir.h" +struct code_gen_info { + // All vertex in interference graph + // Array of struct ir_insn* + struct array all_var; +}; + struct ir_function { size_t arg_num; @@ -20,16 +26,14 @@ struct ir_function { // Array of struct ir_constraint. Value constraints. struct array value_constraints; + + struct code_gen_info cg_info; }; -// Helper functions +// Constructor and Destructor struct ir_function gen_function(struct ssa_transform_env *env); -void clean_env(struct ir_function *); - -void clean_id(struct ir_function *); - -void print_ir_prog(struct ir_function *); +void free_function(struct ir_function *fun); #endif diff --git a/IR/include/ir_helper.h b/IR/include/ir_helper.h new file mode 100644 index 00000000..5e7b435c --- /dev/null +++ b/IR/include/ir_helper.h @@ -0,0 +1,45 @@ +#ifndef __BPF_IR_HELPER_H__ +#define __BPF_IR_HELPER_H__ + +#include "ir_fun.h" + +void clean_env_all(struct ir_function *fun); + +void print_ir_prog(struct ir_function *); + +void print_ir_prog_advanced(struct ir_function *fun, void (*post_fun)(struct ir_basic_block *), + void (*print_ir)(struct ir_insn *)); + +void print_ir_dst(struct ir_insn *insn); + +void print_ir_alloc(struct ir_insn *insn); + +void clean_env(struct ir_function *); + +void clean_env_all(struct ir_function *fun); + +// Tag the instruction and BB +void tag_ir(struct ir_function *fun); + +// Remove all tag information +void clean_tag(struct ir_function *); + +void print_constant(struct ir_constant d); + +void print_address_value(struct ir_address_value v); + +void print_vr_type(enum ir_vr_type t); + +void print_phi(struct array *phi); + +void assign_id(struct ir_basic_block *bb, size_t *cnt, size_t *bb_cnt); + +void print_ir_insn(struct ir_insn *); + +void print_ir_value(struct ir_value v); + +void print_raw_ir_insn(struct ir_insn *insn); + +void print_raw_ir_bb(struct ir_basic_block *bb); + +#endif diff --git a/IR/include/ir_insn.h b/IR/include/ir_insn.h index f2750aed..d1924f1b 100644 --- a/IR/include/ir_insn.h +++ b/IR/include/ir_insn.h @@ -6,15 +6,27 @@ enum insert_position { INSERT_BACK, INSERT_FRONT, + // BB-specific + INSERT_BACK_BEFORE_JMP, + INSERT_FRONT_AFTER_PHI }; // Return an array of struct ir_value* -struct array find_value_uses(struct ir_insn *insn); +struct array get_operands(struct ir_insn *insn); + +void replace_all_usage(struct ir_insn *insn, struct ir_value rep); void erase_insn(struct ir_insn *insn); +int is_void(struct ir_insn *insn); + +int is_jmp(struct ir_insn *insn); + struct ir_insn *prev_insn(struct ir_insn *insn); +struct ir_insn *create_alloc_insn(struct ir_insn *insn, enum ir_vr_type type, + enum insert_position pos); + struct ir_insn *create_alloc_insn_bb(struct ir_basic_block *bb, enum ir_vr_type type, enum insert_position pos); @@ -58,4 +70,20 @@ struct ir_insn *create_ret_insn(struct ir_insn *insn, struct ir_value val, struct ir_insn *create_ret_insn_bb(struct ir_basic_block *bb, struct ir_value val, enum insert_position pos); +struct ir_insn *create_assign_insn(struct ir_insn *insn, struct ir_value val, + enum insert_position pos); + +struct ir_insn *create_assign_insn_bb(struct ir_basic_block *bb, struct ir_value val, + enum insert_position pos); + +struct ir_insn *create_phi_insn(struct ir_insn *insn, enum insert_position pos); + +struct ir_insn *create_phi_insn_bb(struct ir_basic_block *bb, enum insert_position pos); + +void phi_add_operand(struct ir_insn *insn, struct ir_basic_block *bb, struct ir_value val); + +void val_add_user(struct ir_value val, struct ir_insn *user); + +void val_remove_user(struct ir_value val, struct ir_insn *user); + #endif diff --git a/IR/include/passes.h b/IR/include/passes.h index 5c0a38a8..6656db88 100644 --- a/IR/include/passes.h +++ b/IR/include/passes.h @@ -2,8 +2,9 @@ #define __BPF_IR_PASSES_H__ #include "add_constraint_pass.h" +#include "code_gen.h" +#include "cut_bb_pass.h" #include "phi_pass.h" -#include "reachable_bb.h" #include "add_counter_pass.h" #include "ir_fun.h" @@ -11,7 +12,8 @@ All function passes. */ static void (*passes[])(struct ir_function *fun) = { - gen_reachable_bbs, remove_trivial_phi, + remove_trivial_phi, + // liveness_analysis, // add_constraint, // add_counter, }; diff --git a/IR/include/prog_check.h b/IR/include/prog_check.h new file mode 100644 index 00000000..07fe631e --- /dev/null +++ b/IR/include/prog_check.h @@ -0,0 +1,8 @@ +#ifndef __BPF_IR_PROG_CHECK_H__ +#define __BPF_IR_PROG_CHECK_H__ + +#include "ir_fun.h" + +void prog_check(struct ir_function *fun); + +#endif diff --git a/IR/ir_bb.c b/IR/ir_bb.c index 78f0c867..f1d4615f 100644 --- a/IR/ir_bb.c +++ b/IR/ir_bb.c @@ -12,12 +12,8 @@ size_t bb_len(struct ir_basic_block *bb) { } struct ir_basic_block *create_bb(struct ir_function *fun) { - struct ir_basic_block *new_bb = __malloc(sizeof(struct ir_basic_block)); - INIT_LIST_HEAD(&new_bb->ir_insn_head); + struct ir_basic_block *new_bb = init_ir_bb_raw(); array_push(&fun->all_bbs, &new_bb); - new_bb->user_data = NULL; - new_bb->preds = array_init(sizeof(struct ir_insn *)); - new_bb->succs = array_init(sizeof(struct ir_insn *)); return new_bb; } diff --git a/IR/ir_code_gen.c b/IR/ir_code_gen.c new file mode 100644 index 00000000..38029765 --- /dev/null +++ b/IR/ir_code_gen.c @@ -0,0 +1,105 @@ +#include +#include "array.h" +#include "bpf_ir.h" +#include "code_gen.h" +#include "dbg.h" +#include "list.h" +#include "prog_check.h" +#include "ir_helper.h" + +void init_cg(struct ir_function *fun) { + struct ir_basic_block **pos; + array_for(pos, fun->reachable_bbs) { + struct ir_basic_block *bb = *pos; + struct ir_bb_cg_extra *bb_cg = __malloc(sizeof(struct ir_bb_cg_extra)); + bb_cg->gen = INIT_ARRAY(struct ir_insn *); + bb_cg->kill = INIT_ARRAY(struct ir_insn *); + bb_cg->in = INIT_ARRAY(struct ir_insn *); + bb_cg->out = INIT_ARRAY(struct ir_insn *); + + bb->user_data = bb_cg; + + struct ir_insn *insn; + list_for_each_entry(insn, &bb->ir_insn_head, list_ptr) { + struct ir_insn_cg_extra *insn_cg = __malloc(sizeof(struct ir_insn_cg_extra)); + // When init, the destination is itself + insn_cg->dst = insn; + insn_cg->adj = INIT_ARRAY(struct ir_insn *); + insn_cg->allocated = 0; + insn_cg->spilled = 0; + insn_cg->alloc_reg = 0; + insn->user_data = insn_cg; + } + } +} + +void free_cg_res(struct ir_function *fun) { + struct ir_basic_block **pos; + array_for(pos, fun->reachable_bbs) { + struct ir_basic_block *bb = *pos; + struct ir_bb_cg_extra *bb_cg = bb->user_data; + array_free(&bb_cg->gen); + array_free(&bb_cg->kill); + array_free(&bb_cg->in); + array_free(&bb_cg->out); + __free(bb->user_data); + bb->user_data = NULL; + struct ir_insn *insn; + list_for_each_entry(insn, &bb->ir_insn_head, list_ptr) { + struct ir_insn_cg_extra *insn_cg = insn->user_data; + array_free(&insn_cg->adj); + __free(insn_cg); + insn->user_data = NULL; + } + } +} + +struct ir_insn_cg_extra *insn_cg(struct ir_insn *insn) { + return insn->user_data; +} + +struct ir_insn *dst(struct ir_insn *insn) { + return insn_cg(insn)->dst; +} + +void print_ir_prog_cg(struct ir_function *fun) { + printf("-----------------\n"); + print_ir_prog_advanced(fun, NULL, NULL); +} + +void code_gen(struct ir_function *fun) { + // Preparation + + // Step 1: Check program + prog_check(fun); + // Step 2: Eliminate SSA + to_cssa(fun); + + // Init CG, start real code generation + // No "users" available after this step + init_cg(fun); + + remove_phi(fun); + + print_ir_prog_cg(fun); + + // Step 3: Liveness Analysis + liveness_analysis(fun); + + // Step 4: Conflict Analysis + conflict_analysis(fun); + print_interference_graph(fun); + printf("-------------\n"); + + // Step 5: Graph coloring + graph_coloring(fun); + print_interference_graph(fun); + print_ir_prog_advanced(fun, NULL, print_ir_alloc); + + // Register allocation finished + + // Step 6: Direct Translation + + // Free CG resources + free_cg_res(fun); +} diff --git a/IR/ir_helper.c b/IR/ir_helper.c index 91c0a5ab..95a4a9d7 100644 --- a/IR/ir_helper.c +++ b/IR/ir_helper.c @@ -2,12 +2,14 @@ #include #include "array.h" #include "bpf_ir.h" +#include "code_gen.h" #include "dbg.h" +#include "ir_insn.h" #include "list.h" #include "ir_fun.h" /// Reset visited flag -void clean_env(struct ir_function *fun) { +void clean_env_all(struct ir_function *fun) { for (size_t i = 0; i < fun->all_bbs.num_elem; ++i) { struct ir_basic_block *bb = ((struct ir_basic_block **)(fun->all_bbs.data))[i]; bb->_visited = 0; @@ -21,8 +23,20 @@ void clean_env(struct ir_function *fun) { } } +void clean_env(struct ir_function *fun) { + for (size_t i = 0; i < fun->all_bbs.num_elem; ++i) { + struct ir_basic_block *bb = ((struct ir_basic_block **)(fun->all_bbs.data))[i]; + bb->_visited = 0; + struct list_head *p = NULL; + list_for_each(p, &bb->ir_insn_head) { + struct ir_insn *insn = list_entry(p, struct ir_insn, list_ptr); + insn->_visited = 0; + } + } +} + /// Reset instruction/BB ID -void clean_id(struct ir_function *fun) { +void clean_tag(struct ir_function *fun) { for (size_t i = 0; i < fun->all_bbs.num_elem; ++i) { struct ir_basic_block *ir_bb = ((struct ir_basic_block **)(fun->all_bbs.data))[i]; ir_bb->_id = -1; @@ -71,12 +85,16 @@ void print_constant(struct ir_constant d) { } } -void print_insn_ptr(struct ir_insn *insn) { - if (insn->_insn_id == SIZE_MAX) { - printf("%p", insn); - return; +void print_insn_ptr(struct ir_insn *insn, void (*print_ir)(struct ir_insn *)) { + if (print_ir) { + print_ir(insn); + } else { + if (insn->_insn_id == SIZE_MAX) { + printf("%p", insn); + return; + } + printf("%%%zu", insn->_insn_id); } - printf("%%%zu", insn->_insn_id); } void print_bb_ptr(struct ir_basic_block *insn) { @@ -87,10 +105,10 @@ void print_bb_ptr(struct ir_basic_block *insn) { printf("b%zu", insn->_id); } -void print_ir_value(struct ir_value v) { +void print_ir_value_full(struct ir_value v, void (*print_ir)(struct ir_insn *)) { switch (v.type) { case IR_VALUE_INSN: - print_insn_ptr(v.data.insn_d); + print_insn_ptr(v.data.insn_d, print_ir); break; case IR_VALUE_STACK_PTR: printf("SP"); @@ -109,13 +127,21 @@ void print_ir_value(struct ir_value v) { } } -void print_address_value(struct ir_address_value v) { - print_ir_value(v.value); +void print_ir_value(struct ir_value v) { + print_ir_value_full(v, 0); +} + +void print_address_value_full(struct ir_address_value v, void (*print_ir)(struct ir_insn *)) { + print_ir_value_full(v.value, print_ir); if (v.offset != 0) { printf("+%d", v.offset); } } +void print_address_value(struct ir_address_value v) { + print_address_value_full(v, 0); +} + void print_vr_type(enum ir_vr_type t) { switch (t) { case IR_VR_TYPE_U8: @@ -150,21 +176,25 @@ void print_vr_type(enum ir_vr_type t) { } } -void print_phi(struct array *phi) { +void print_phi_full(struct array *phi, void (*print_ir)(struct ir_insn *)) { for (size_t i = 0; i < phi->num_elem; ++i) { struct phi_value v = ((struct phi_value *)(phi->data))[i]; printf(" <"); print_bb_ptr(v.bb); printf(" -> "); - print_ir_value(v.value); + print_ir_value_full(v.value, print_ir); printf(">"); } } +void print_phi(struct array *phi) { + print_phi_full(phi, 0); +} + /** Print the IR insn */ -void print_ir_insn(struct ir_insn *insn) { +void print_ir_insn_full(struct ir_insn *insn, void (*print_ir)(struct ir_insn *)) { switch (insn->op) { case IR_INSN_ALLOC: printf("alloc "); @@ -172,62 +202,62 @@ void print_ir_insn(struct ir_insn *insn) { break; case IR_INSN_STORE: printf("store "); - print_ir_value(insn->values[0]); + print_ir_value_full(insn->values[0], print_ir); printf(", "); - print_ir_value(insn->values[1]); + print_ir_value_full(insn->values[1], print_ir); break; case IR_INSN_LOAD: printf("load "); print_vr_type(insn->vr_type); printf(", "); - print_ir_value(insn->values[0]); + print_ir_value_full(insn->values[0], print_ir); break; case IR_INSN_LOADRAW: printf("loadraw "); print_vr_type(insn->vr_type); printf(" "); - print_address_value(insn->addr_val); + print_address_value_full(insn->addr_val, print_ir); break; case IR_INSN_STORERAW: printf("storeraw "); print_vr_type(insn->vr_type); printf(" "); - print_address_value(insn->addr_val); + print_address_value_full(insn->addr_val, print_ir); printf(" "); - print_ir_value(insn->values[0]); + print_ir_value_full(insn->values[0], print_ir); break; case IR_INSN_ADD: printf("add "); - print_ir_value(insn->values[0]); + print_ir_value_full(insn->values[0], print_ir); printf(", "); - print_ir_value(insn->values[1]); + print_ir_value_full(insn->values[1], print_ir); break; case IR_INSN_SUB: printf("sub "); - print_ir_value(insn->values[0]); + print_ir_value_full(insn->values[0], print_ir); printf(", "); - print_ir_value(insn->values[1]); + print_ir_value_full(insn->values[1], print_ir); break; case IR_INSN_MUL: printf("mul "); - print_ir_value(insn->values[0]); + print_ir_value_full(insn->values[0], print_ir); printf(", "); - print_ir_value(insn->values[1]); + print_ir_value_full(insn->values[1], print_ir); break; case IR_INSN_CALL: printf("call __built_in_func_%d(", insn->fid); - if (insn->f_arg_num >= 1) { - print_ir_value(insn->values[0]); + if (insn->value_num >= 1) { + print_ir_value_full(insn->values[0], print_ir); } - for (size_t i = 1; i < insn->f_arg_num; ++i) { + for (size_t i = 1; i < insn->value_num; ++i) { printf(", "); - print_ir_value(insn->values[i]); + print_ir_value_full(insn->values[i], print_ir); } printf(")"); break; case IR_INSN_RET: printf("ret "); - print_ir_value(insn->values[0]); + print_ir_value_full(insn->values[0], print_ir); break; case IR_INSN_JA: printf("ja "); @@ -235,9 +265,9 @@ void print_ir_insn(struct ir_insn *insn) { break; case IR_INSN_JEQ: printf("jeq "); - print_ir_value(insn->values[0]); + print_ir_value_full(insn->values[0], print_ir); printf(", "); - print_ir_value(insn->values[1]); + print_ir_value_full(insn->values[1], print_ir); printf(", "); print_bb_ptr(insn->bb1); printf("/"); @@ -245,9 +275,9 @@ void print_ir_insn(struct ir_insn *insn) { break; case IR_INSN_JGT: printf("jgt "); - print_ir_value(insn->values[0]); + print_ir_value_full(insn->values[0], print_ir); printf(", "); - print_ir_value(insn->values[1]); + print_ir_value_full(insn->values[1], print_ir); printf(", "); print_bb_ptr(insn->bb1); printf("/"); @@ -255,9 +285,9 @@ void print_ir_insn(struct ir_insn *insn) { break; case IR_INSN_JGE: printf("jge "); - print_ir_value(insn->values[0]); + print_ir_value_full(insn->values[0], print_ir); printf(", "); - print_ir_value(insn->values[1]); + print_ir_value_full(insn->values[1], print_ir); printf(", "); print_bb_ptr(insn->bb1); printf("/"); @@ -265,9 +295,9 @@ void print_ir_insn(struct ir_insn *insn) { break; case IR_INSN_JLT: printf("jlt "); - print_ir_value(insn->values[0]); + print_ir_value_full(insn->values[0], print_ir); printf(", "); - print_ir_value(insn->values[1]); + print_ir_value_full(insn->values[1], print_ir); printf(", "); print_bb_ptr(insn->bb1); printf("/"); @@ -275,9 +305,9 @@ void print_ir_insn(struct ir_insn *insn) { break; case IR_INSN_JLE: printf("jle "); - print_ir_value(insn->values[0]); + print_ir_value_full(insn->values[0], print_ir); printf(", "); - print_ir_value(insn->values[1]); + print_ir_value_full(insn->values[1], print_ir); printf(", "); print_bb_ptr(insn->bb1); printf("/"); @@ -285,9 +315,9 @@ void print_ir_insn(struct ir_insn *insn) { break; case IR_INSN_JNE: printf("jne "); - print_ir_value(insn->values[0]); + print_ir_value_full(insn->values[0], print_ir); printf(", "); - print_ir_value(insn->values[1]); + print_ir_value_full(insn->values[1], print_ir); printf(", "); print_bb_ptr(insn->bb1); printf("/"); @@ -295,32 +325,49 @@ void print_ir_insn(struct ir_insn *insn) { break; case IR_INSN_PHI: printf("phi"); - print_phi(&insn->phi); + print_phi_full(&insn->phi, print_ir); break; case IR_INSN_LSH: printf("lsh "); - print_ir_value(insn->values[0]); + print_ir_value_full(insn->values[0], print_ir); printf(", "); - print_ir_value(insn->values[1]); + print_ir_value_full(insn->values[1], print_ir); break; case IR_INSN_MOD: printf("mod "); - print_ir_value(insn->values[0]); + print_ir_value_full(insn->values[0], print_ir); printf(", "); - print_ir_value(insn->values[1]); + print_ir_value_full(insn->values[1], print_ir); + break; + case IR_INSN_ASSIGN: + print_ir_value_full(insn->values[0], print_ir); break; default: CRITICAL("Unknown IR insn"); } } -void print_raw_ir_insn(struct ir_insn *insn) { - printf("%p = ", insn); - print_ir_insn(insn); +void print_ir_insn(struct ir_insn *insn) { + print_ir_insn_full(insn, 0); +} + +void print_raw_ir_insn_full(struct ir_insn *insn, void (*print_ir)(struct ir_insn *)) { + if (print_ir) { + print_ir(insn); + } else { + printf("%p", insn); + } + printf(" = "); + print_ir_insn_full(insn, print_ir); printf("\n"); } -void print_ir_bb(struct ir_basic_block *bb) { +void print_raw_ir_insn(struct ir_insn *insn) { + print_raw_ir_insn_full(insn, 0); +} + +void print_ir_bb(struct ir_basic_block *bb, void (*post_fun)(struct ir_basic_block *), + void (*print_ir)(struct ir_insn *)) { if (bb->_visited) { return; } @@ -329,26 +376,44 @@ void print_ir_bb(struct ir_basic_block *bb) { struct list_head *p = NULL; list_for_each(p, &bb->ir_insn_head) { struct ir_insn *insn = list_entry(p, struct ir_insn, list_ptr); - printf(" %%%zu = ", insn->_insn_id); - print_ir_insn(insn); + if (is_void(insn)) { + printf(" "); + } else { + printf(" "); + if (print_ir) { + print_ir(insn); + } else { + printf("%%%zu", insn->_insn_id); + } + printf(" = "); + } + + print_ir_insn_full(insn, print_ir); printf("\n"); } + if (post_fun) { + post_fun(bb); + } for (size_t i = 0; i < bb->succs.num_elem; ++i) { struct ir_basic_block *next = ((struct ir_basic_block **)(bb->succs.data))[i]; - print_ir_bb(next); + print_ir_bb(next, post_fun, print_ir); } } -void print_raw_ir_bb(struct ir_basic_block *bb) { +void print_raw_ir_bb_full(struct ir_basic_block *bb, void (*print_ir)(struct ir_insn *)) { printf("b%p:\n", bb); struct list_head *p = NULL; list_for_each(p, &bb->ir_insn_head) { struct ir_insn *insn = list_entry(p, struct ir_insn, list_ptr); printf(" "); - print_raw_ir_insn(insn); + print_raw_ir_insn_full(insn, print_ir); } } +void print_raw_ir_bb(struct ir_basic_block *bb) { + print_raw_ir_bb_full(bb, 0); +} + void assign_id(struct ir_basic_block *bb, size_t *cnt, size_t *bb_cnt) { if (bb->_visited) { return; @@ -358,7 +423,9 @@ void assign_id(struct ir_basic_block *bb, size_t *cnt, size_t *bb_cnt) { struct list_head *p = NULL; list_for_each(p, &bb->ir_insn_head) { struct ir_insn *insn = list_entry(p, struct ir_insn, list_ptr); - insn->_insn_id = (*cnt)++; + if (!is_void(insn)) { + insn->_insn_id = (*cnt)++; + } } struct ir_basic_block **next; array_for(next, bb->succs) { @@ -366,12 +433,46 @@ void assign_id(struct ir_basic_block *bb, size_t *cnt, size_t *bb_cnt) { } } -void print_ir_prog(struct ir_function *fun) { +void tag_ir(struct ir_function *fun) { size_t cnt = 0; size_t bb_cnt = 0; clean_env(fun); assign_id(fun->entry, &cnt, &bb_cnt); clean_env(fun); - print_ir_bb(fun->entry); - clean_id(fun); +} + +void print_ir_prog(struct ir_function *fun) { + tag_ir(fun); + print_ir_bb(fun->entry, NULL, 0); + clean_tag(fun); +} + +void print_ir_dst(struct ir_insn *insn) { + insn = dst(insn); + if (insn->_insn_id == SIZE_MAX) { + printf("%p", insn); + return; + } + printf("%%%zu", insn->_insn_id); +} + +void print_ir_alloc(struct ir_insn *insn) { + insn = dst(insn); + struct ir_insn_cg_extra *extra = insn_cg(insn); + if (extra->allocated) { + if (extra->spilled) { + printf("sp-%zu", extra->spilled * 8); + } else { + printf("r%u", extra->alloc_reg); + } + } else { + CRITICAL("Not allocated"); + } +} + +void print_ir_prog_advanced(struct ir_function *fun, void (*post_fun)(struct ir_basic_block *), + void (*print_ir)(struct ir_insn *)) { + tag_ir(fun); + print_ir_bb(fun->entry, post_fun, print_ir); + clean_tag(fun); } diff --git a/IR/ir_insn.c b/IR/ir_insn.c index 6c7c3801..7de70807 100644 --- a/IR/ir_insn.c +++ b/IR/ir_insn.c @@ -3,16 +3,34 @@ #include "array.h" #include "bpf_ir.h" #include "dbg.h" +#include "ir_bb.h" #include "list.h" struct ir_insn *create_insn_base(struct ir_basic_block *bb) { struct ir_insn *new_insn = __malloc(sizeof(struct ir_insn)); new_insn->parent_bb = bb; + new_insn->users = INIT_ARRAY(struct ir_insn *); + new_insn->value_num = 0; return new_insn; } -struct array find_value_uses(struct ir_insn *insn) { - struct array uses = INIT_ARRAY(struct ir_value *); +void replace_all_usage(struct ir_insn *insn, struct ir_value rep) { + struct ir_insn **pos; + array_for(pos, insn->users) { + struct ir_insn *user = *pos; + struct array operands = get_operands(user); + struct ir_value **pos2; + array_for(pos2, operands) { + if ((*pos2)->type == IR_VALUE_INSN && (*pos2)->data.insn_d == insn) { + // Match, replace + **pos2 = rep; + } + } + } +} + +struct array get_operands(struct ir_insn *insn) { + struct array uses = INIT_ARRAY(struct ir_value *); struct ir_value *pos; for (__u8 j = 0; j < insn->value_num; ++j) { @@ -41,16 +59,49 @@ void erase_insn(struct ir_insn *insn) { void insert_at(struct ir_insn *new_insn, struct ir_insn *insn, enum insert_position pos) { if (pos == INSERT_BACK) { list_add(&new_insn->list_ptr, &insn->list_ptr); - } else { + } else if (pos == INSERT_FRONT) { list_add_tail(&new_insn->list_ptr, &insn->list_ptr); + } else { + CRITICAL("Insert position not available for insn"); } } void insert_at_bb(struct ir_insn *new_insn, struct ir_basic_block *bb, enum insert_position pos) { if (pos == INSERT_BACK) { list_add_tail(&new_insn->list_ptr, &bb->ir_insn_head); - } else { + } else if (pos == INSERT_FRONT) { list_add(&new_insn->list_ptr, &bb->ir_insn_head); + } else if (pos == INSERT_BACK_BEFORE_JMP) { + // 1. If no JMP instruction, directly insert at the back + // 2. If there is a JMP at the end, insert before it + struct ir_insn *last_insn = get_last_insn(bb); + if (last_insn) { + if (is_jmp(last_insn)) { + // Insert before this insn + list_add_tail(&new_insn->list_ptr, &last_insn->list_ptr); + } else { + // Insert at the back + list_add_tail(&new_insn->list_ptr, &bb->ir_insn_head); + } + } else { + // Empty + list_add_tail(&new_insn->list_ptr, &bb->ir_insn_head); + } + } else if (pos == INSERT_FRONT_AFTER_PHI) { + // Insert after all PHIs + struct ir_insn *insn = NULL; + list_for_each_entry(insn, &bb->ir_insn_head, list_ptr) { + if (insn->op != IR_INSN_PHI) { + break; + } + } + if (insn) { + // Insert before insn + list_add_tail(&new_insn->list_ptr, &insn->list_ptr); + } else { + // No insn + list_add(&new_insn->list_ptr, &bb->ir_insn_head); + } } } @@ -75,6 +126,21 @@ struct ir_insn *create_alloc_insn_bb(struct ir_basic_block *bb, enum ir_vr_type return new_insn; } +void val_remove_user(struct ir_value val, struct ir_insn *user) { + if (val.type != IR_VALUE_INSN) { + return; + } + struct array *arr = &val.data.insn_d->users; + for (size_t i = 0; i < arr->num_elem; ++i) { + struct ir_insn *pos = ((struct ir_insn **)(arr->data))[i]; + if (pos == user) { + array_erase(arr, i); + return; + } + } + printf("Warning: User not found in the users\n"); +} + void val_add_user(struct ir_value val, struct ir_insn *user) { if (val.type != IR_VALUE_INSN) { return; @@ -86,11 +152,10 @@ struct ir_insn *create_store_insn_base(struct ir_basic_block *bb, struct ir_insn struct ir_value val) { struct ir_insn *new_insn = create_insn_base(bb); new_insn->op = IR_INSN_STORE; - struct ir_value nv; - nv.type = IR_VALUE_INSN; - nv.data.insn_d = insn; - new_insn->values[0] = nv; - new_insn->values[1] = val; + struct ir_value nv = ir_value_insn(insn); + new_insn->values[0] = nv; + new_insn->values[1] = val; + new_insn->value_num = 2; val_add_user(nv, new_insn); return new_insn; } @@ -116,6 +181,7 @@ struct ir_insn *create_load_insn_base(struct ir_basic_block *bb, enum ir_vr_type new_insn->vr_type = ty; new_insn->values[0] = val; val_add_user(val, new_insn); + new_insn->value_num = 1; return new_insn; } @@ -141,6 +207,7 @@ struct ir_insn *create_bin_insn_base(struct ir_basic_block *bb, struct ir_value new_insn->values[1] = val2; val_add_user(val1, new_insn); val_add_user(val2, new_insn); + new_insn->value_num = 2; return new_insn; } @@ -171,6 +238,7 @@ struct ir_insn *create_ja_insn_base(struct ir_basic_block *bb, struct ir_basic_b struct ir_insn *new_insn = create_insn_base(bb); new_insn->op = IR_INSN_JA; new_insn->bb1 = to_bb; + array_push(&to_bb->users, &new_insn); return new_insn; } @@ -199,6 +267,9 @@ struct ir_insn *create_jbin_insn_base(struct ir_basic_block *bb, struct ir_value new_insn->bb2 = to_bb2; val_add_user(val1, new_insn); val_add_user(val2, new_insn); + array_push(&to_bb1->users, &new_insn); + array_push(&to_bb2->users, &new_insn); + new_insn->value_num = 2; return new_insn; } @@ -224,6 +295,7 @@ struct ir_insn *create_ret_insn_base(struct ir_basic_block *bb, struct ir_value struct ir_insn *new_insn = create_insn_base(bb); new_insn->op = IR_INSN_RET; new_insn->values[0] = val; + new_insn->value_num = 1; val_add_user(val, new_insn); return new_insn; } @@ -241,3 +313,62 @@ struct ir_insn *create_ret_insn_bb(struct ir_basic_block *bb, struct ir_value va insert_at_bb(new_insn, bb, pos); return new_insn; } + +int is_jmp(struct ir_insn *insn) { + return (insn->op >= IR_INSN_JA && insn->op <= IR_INSN_JNE) || insn->op == IR_INSN_RET; +} + +int is_void(struct ir_insn *insn) { + return is_jmp(insn) || insn->op == IR_INSN_STORERAW || insn->op == IR_INSN_STORE; +} + +struct ir_insn *create_assign_insn_base(struct ir_basic_block *bb, struct ir_value val) { + struct ir_insn *new_insn = create_insn_base(bb); + new_insn->op = IR_INSN_ASSIGN; + new_insn->values[0] = val; + new_insn->value_num = 1; + val_add_user(val, new_insn); + return new_insn; +} + +struct ir_insn *create_assign_insn(struct ir_insn *insn, struct ir_value val, + enum insert_position pos) { + struct ir_insn *new_insn = create_assign_insn_base(insn->parent_bb, val); + insert_at(new_insn, insn, pos); + return new_insn; +} + +struct ir_insn *create_assign_insn_bb(struct ir_basic_block *bb, struct ir_value val, + enum insert_position pos) { + struct ir_insn *new_insn = create_assign_insn_base(bb, val); + insert_at_bb(new_insn, bb, pos); + return new_insn; +} + +struct ir_insn *create_phi_insn_base(struct ir_basic_block *bb) { + struct ir_insn *new_insn = create_insn_base(bb); + new_insn->op = IR_INSN_PHI; + new_insn->phi = INIT_ARRAY(struct phi_value); + return new_insn; +} + +struct ir_insn *create_phi_insn(struct ir_insn *insn, enum insert_position pos) { + struct ir_insn *new_insn = create_phi_insn_base(insn->parent_bb); + insert_at(new_insn, insn, pos); + return new_insn; +} + +struct ir_insn *create_phi_insn_bb(struct ir_basic_block *bb, enum insert_position pos) { + struct ir_insn *new_insn = create_phi_insn_base(bb); + insert_at_bb(new_insn, bb, pos); + return new_insn; +} + +void phi_add_operand(struct ir_insn *insn, struct ir_basic_block *bb, struct ir_value val) { + // Make sure that bb is a pred of insn parent BB + struct phi_value pv; + pv.value = val; + pv.bb = bb; + array_push(&insn->phi, &pv); + val_add_user(val, insn); +} diff --git a/IR/add_constraint_pass.c b/IR/passes/add_constraint_pass.c similarity index 100% rename from IR/add_constraint_pass.c rename to IR/passes/add_constraint_pass.c diff --git a/IR/add_counter_pass.c b/IR/passes/add_counter_pass.c similarity index 100% rename from IR/add_counter_pass.c rename to IR/passes/add_counter_pass.c diff --git a/IR/passes/cut_bb_pass.c b/IR/passes/cut_bb_pass.c new file mode 100644 index 00000000..63e61786 --- /dev/null +++ b/IR/passes/cut_bb_pass.c @@ -0,0 +1,51 @@ +#include "cut_bb_pass.h" +#include "array.h" +#include "bpf_ir.h" +#include "dbg.h" +#include "list.h" + +void cut_bb(struct ir_function *fun) { + struct ir_basic_block **pos; + array_for(pos, fun->reachable_bbs) { + struct ir_basic_block *bb = *pos; + if (list_empty(&bb->ir_insn_head)) { + // Empty BB, try removing! + if (bb->succs.num_elem == 0) { + CRITICAL("Empty BB with no successors"); + } + if (bb->succs.num_elem > 1) { + CRITICAL("Empty BB with > 1 successors"); + } + struct ir_basic_block **pos2; + struct ir_basic_block *next = ((struct ir_basic_block **)(bb->succs.data))[0]; + array_for(pos2, bb->preds) { + struct ir_basic_block *pred = *pos2; + struct ir_basic_block **pos3; + array_for(pos3, pred->succs) { + struct ir_basic_block *succ = *pos3; + if (succ == bb) { + *pos3 = next; + } + } + } + struct ir_insn **pos4; + array_for(pos4, bb->users) { + struct ir_insn *user = *pos4; + if (user->bb1 == bb) { + user->bb1 = next; + } + if (user->bb2 == bb) { + user->bb2 = next; + } + if (user->op == IR_INSN_PHI) { + struct phi_value *pos5; + array_for(pos5, user->phi) { + if (pos5->bb == bb) { + pos5->bb = next; + } + } + } + } + } + } +} diff --git a/IR/phi_pass.c b/IR/passes/phi_pass.c similarity index 96% rename from IR/phi_pass.c rename to IR/passes/phi_pass.c index 0bfa7061..e13bb674 100644 --- a/IR/phi_pass.c +++ b/IR/passes/phi_pass.c @@ -41,7 +41,7 @@ void try_remove_trivial_phi(struct ir_insn *phi) { continue; } - struct array value_uses = find_value_uses(user); + struct array value_uses = get_operands(user); struct ir_value **pos2; array_for(pos2, value_uses) { if (ir_value_equal(**pos2, phi_val)) { diff --git a/IR/reachable_bb.c b/IR/passes/reachable_bb.c similarity index 92% rename from IR/reachable_bb.c rename to IR/passes/reachable_bb.c index 74de78c3..32416c38 100644 --- a/IR/reachable_bb.c +++ b/IR/passes/reachable_bb.c @@ -16,6 +16,7 @@ void add_reach(struct ir_function *fun, struct ir_basic_block *bb) { } void gen_reachable_bbs(struct ir_function *fun) { + array_free(&fun->reachable_bbs); fun->reachable_bbs = array_init(sizeof(struct ir_basic_block *)); add_reach(fun, fun->entry); } diff --git a/docs/IR.md b/docs/IR.md index 4c2024c4..eae582af 100644 --- a/docs/IR.md +++ b/docs/IR.md @@ -1,52 +1,191 @@ -# bpf IR spec +# bpf IR Specification (v0.1) -There are several steps to transform. +## `bpf_insn` Structure -## Stack access validation & Map to virtual (stack) registers +```c +struct ir_insn { + struct ir_value values[MAX_FUNC_ARG]; + __u8 value_num; -Verify stack access. + // Used in ALLOC instructions + enum ir_vr_type vr_type; -Memory access: not allowed to have `r10` + a non-constant address. + // Used in RAW instructions + struct ir_address_value addr_val; -Stack address: `0x123s` means `r10 - 0x123`. + // Used in JMP instructions + struct ir_basic_block *bb1; + struct ir_basic_block *bb2; -`allocP`: allocate a register at a given position. + // Array of phi_value + struct array phi; + __s32 fid; + __u32 f_arg_num; + enum ir_insn_type op; + + // Linked list + struct list_head list_ptr; + + // Parent BB + struct ir_basic_block *parent_bb; + + // Array of struct ir_insn * + // Users + struct array users; + + // Might be useful? + // Too difficult, need BTF + // enum ir_vr_type type; + + // Used when generating the real code + size_t _insn_id; + void *user_data; + __u8 _visited; +}; ``` -r1 = 0x6968 -*(u16 *)(r10 - 0x4) = r1 -r1 = 0x0 -*(u8 *)(r10 - 0x2) = r1 -r1 = r10 -r1 += -0x4 -r2 = 0x3 -call 0x6 + +There are currently 20 instructions supported. + +## IR Instructions + +General syntax notation for documenting the instructions: + +`INSN ...` + +`FIELD_1` is a field name in the `bpf_insn` struct. + +For example, the following notation is valid syntax notation: + +`alloc ` + +`abort` + +`ja ` + +### `alloc` + +Syntax: `alloc `. + +Allocate a space on stack or on a register (decided by the code gen). + +Example: + ``` +%1 = alloc IR_VR_TYPE_U32 +store %1 200 +``` + +### `store` + +Syntax: `store ` + +Requirement: `values[0]` is an `alloc` instruction. + +Store a value `values[1]` in an address `values[0]`. + +### `load` + +Syntax: `load ` + +Requirement: `values[0]` is an `alloc` instruction. + +Load a value `values[0]` with size `vr_type`. + +### `storeraw` + +Syntax: `storeraw ` + +Store a value `values[0]` in manually set `addr_val` with size `vr_type`. + +### `loadraw` + +Syntax: `loadraw ` -==> +Load a value `addr_val` with size `vr_type`. + +### ALU Binary Instructions + +This includes `add`, `sub`, etc. + +Syntax: `INSN ...` + +Call a eBPF helper function with arguments `values[0]`... + +### `ret` + +Syntax: `ret ` + +Exit the program with exit code `values[0]`. + +### `ja` + +Syntax: `ja ` + +Jump to basic block `bb1`. + +### Conditional Jump Instructions + +Syntax: `INSN ` + +Do condition jump based on testing `values[0]` and `values[1]`. -Form BBs. Get the graph. +`bb1` is the basic block next to this basic block if not jumping, `bb2` is the basic block to jump. -We need the pred/succ information of BB. +Requirement: `bb1` must be next to this basic block. -## Local value numbering +### `phi` -## Global Value Numbering +Syntax: `phi ...` -## Incomplete CFGs +Phi instruction. `phi` is an array of `phi_value`. Each `phi_value` is a `(ir_value, ir_basic_block*)` pair. -## Remove dead code (Optimization) +## BasicBlock + +The basic block structure is `struct ir_basic_block*`. + +The instructions in the basic block is stored in `ir_insn_head`. It is a doubly linked list. + +The predecessors and successors are stored in `preds` and `succs`. They are arrays of `struct ir_basic_block *`. + +Users could add custom data in the `user_data` field. Make sure to free the user data after using it. + +## How to build IR + +### Create a new instruction + +Use functions in `ir_insn`. + +It's possible to create an instruction after/before one existing instruction or at the back/front of a basic block. + +For example, to create a `alloc` instruction, there are two functions: + +```c + +struct ir_insn *create_alloc_insn(struct ir_insn *insn, enum ir_vr_type type, + enum insert_position pos); + +struct ir_insn *create_alloc_insn_bb(struct ir_basic_block *bb, enum ir_vr_type type, + enum insert_position pos); +``` + +`insn` is the instruction that you want to insert after/before. `type` is the specific data needed for this instruction. `pos` is the relative position to insert. There are two options: + +```c +enum insert_position { + INSERT_BACK, + INSERT_FRONT, +}; +```