diff options
author | Stephen Dolan <mu@netsoc.tcd.ie> | 2012-09-18 17:44:43 +0100 |
---|---|---|
committer | Stephen Dolan <mu@netsoc.tcd.ie> | 2012-09-18 17:44:43 +0100 |
commit | a4eea165bbab6d13f89b59707e835d58b7014a66 (patch) | |
tree | b99ee5dde8540f8dbe5de3d87b99e04ac4dd2673 /compile.c | |
parent | 25cbab056b1f73e96b636c88779a92400d92dc15 (diff) |
Move everything around - delete old Haskell code, clean up build.
Diffstat (limited to 'compile.c')
-rw-r--r-- | compile.c | 613 |
1 files changed, 613 insertions, 0 deletions
diff --git a/compile.c b/compile.c new file mode 100644 index 00000000..719a2706 --- /dev/null +++ b/compile.c @@ -0,0 +1,613 @@ +#include <assert.h> +#include <string.h> +#include <stdlib.h> +#include "opcode.h" +#include "compile.h" +#include "locfile.h" + +struct inst { + struct inst* next; + struct inst* prev; + + opcode op; + + union { + uint16_t intval; + struct inst* target; + jv constant; + struct cfunction* cfunc; + } imm; + + location source; + + // Binding + // An instruction requiring binding (for parameters/variables) + // is in one of three states: + // bound_by = NULL - Unbound free variable + // bound_by = self - This instruction binds a variable + // bound_by = other - Uses variable bound by other instruction + // The immediate field is generally not meaningful until instructions + // are bound, and even then only for instructions which bind. + struct inst* bound_by; + char* symbol; + block subfn; + + // This instruction is compiled as part of which function? + // (only used during block_compile) + struct bytecode* compiled; + + int bytecode_pos; // position just after this insn +}; + +static inst* inst_new(opcode op) { + inst* i = malloc(sizeof(inst)); + i->next = i->prev = 0; + i->op = op; + i->bytecode_pos = -1; + i->bound_by = 0; + i->symbol = 0; + i->subfn = gen_noop(); + i->source = UNKNOWN_LOCATION; + return i; +} + +static void inst_free(struct inst* i) { + free(i->symbol); + if (opcode_describe(i->op)->flags & OP_HAS_BLOCK) { + block_free(i->subfn); + } + if (opcode_describe(i->op)->flags & OP_HAS_CONSTANT) { + jv_free(i->imm.constant); + } + free(i); +} + +static block inst_block(inst* i) { + block b = {i,i}; + return b; +} + +static int block_is_single(block b) { + return b.first && b.first == b.last; +} + +static inst* block_take(block* b) { + if (b->first == 0) return 0; + inst* i = b->first; + if (i->next) { + i->next->prev = 0; + b->first = i->next; + i->next = 0; + } else { + b->first = 0; + b->last = 0; + } + return i; +} + +block gen_location(location loc, block b) { + for (inst* i = b.first; i; i = i->next) { + if (i->source.start == UNKNOWN_LOCATION.start && + i->source.end == UNKNOWN_LOCATION.end) { + i->source = loc; + } + } + return b; +} + +block gen_noop() { + block b = {0,0}; + return b; +} + +block gen_op_simple(opcode op) { + assert(opcode_describe(op)->length == 1); + return inst_block(inst_new(op)); +} + + +block gen_op_const(opcode op, jv constant) { + assert(opcode_describe(op)->flags & OP_HAS_CONSTANT); + inst* i = inst_new(op); + i->imm.constant = constant; + return inst_block(i); +} + +block gen_op_target(opcode op, block target) { + assert(opcode_describe(op)->flags & OP_HAS_BRANCH); + assert(target.last); + inst* i = inst_new(op); + i->imm.target = target.last; + return inst_block(i); +} + +block gen_op_targetlater(opcode op) { + assert(opcode_describe(op)->flags & OP_HAS_BRANCH); + inst* i = inst_new(op); + i->imm.target = 0; + return inst_block(i); +} +void inst_set_target(block b, block target) { + assert(block_is_single(b)); + assert(opcode_describe(b.first->op)->flags & OP_HAS_BRANCH); + assert(target.last); + b.first->imm.target = target.last; +} + +block gen_op_var_unbound(opcode op, const char* name) { + assert(opcode_describe(op)->flags & OP_HAS_VARIABLE); + inst* i = inst_new(op); + i->symbol = strdup(name); + return inst_block(i); +} + +block gen_op_var_bound(opcode op, block binder) { + assert(block_is_single(binder)); + block b = gen_op_var_unbound(op, binder.first->symbol); + b.first->bound_by = binder.first; + return b; +} + +block gen_op_symbol(opcode op, const char* sym) { + assert(opcode_describe(op)->flags & OP_HAS_SYMBOL); + inst* i = inst_new(op); + i->symbol = strdup(sym); + return inst_block(i); +} + +block gen_op_block_defn(opcode op, const char* name, block block) { + assert(opcode_describe(op)->flags & OP_IS_CALL_PSEUDO); + assert(opcode_describe(op)->flags & OP_HAS_BLOCK); + inst* i = inst_new(op); + i->subfn = block; + i->symbol = strdup(name); + return inst_block(i); +} + +static void block_bind_subblock(block binder, block body, int bindflags); +block gen_op_block_defn_rec(opcode op, const char* name, block blk) { + block b = gen_op_block_defn(op, name, blk); + block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING); + return b; +} + +block gen_op_block_unbound(opcode op, const char* name) { + assert(opcode_describe(op)->flags & OP_IS_CALL_PSEUDO); + inst* i = inst_new(op); + i->symbol = strdup(name); + return inst_block(i); +} + +block gen_op_block_bound(opcode op, block binder) { + assert(block_is_single(binder)); + block b = gen_op_block_unbound(op, binder.first->symbol); + b.first->bound_by = binder.first; + return b; +} + +block gen_op_call(opcode op, block arglist) { + assert(opcode_describe(op)->flags & OP_HAS_VARIABLE_LENGTH_ARGLIST); + inst* i = inst_new(op); + block prelude = gen_noop(); + block call = inst_block(i); + int nargs = 0; + inst* curr = 0; + while ((curr = block_take(&arglist))) { + assert(opcode_describe(curr->op)->flags & OP_IS_CALL_PSEUDO); + block bcurr = inst_block(curr); + switch (curr->op) { + default: assert(0 && "Unknown type of parameter"); break; + case CLOSURE_REF: + block_append(&call, bcurr); + break; + case CLOSURE_CREATE: + block_append(&prelude, bcurr); + block_append(&call, gen_op_block_bound(CLOSURE_REF, bcurr)); + break; + } + nargs++; + } + assert(nargs < 100); //FIXME + i->imm.intval = nargs; + return block_join(prelude, call); +} + +static void inst_join(inst* a, inst* b) { + assert(a && b); + assert(!a->next); + assert(!b->prev); + a->next = b; + b->prev = a; +} + +void block_append(block* b, block b2) { + if (b2.first) { + if (b->last) { + inst_join(b->last, b2.first); + } else { + b->first = b2.first; + } + b->last = b2.last; + } +} + +block block_join(block a, block b) { + block c = a; + block_append(&c, b); + return c; +} + +int block_has_only_binders(block binders, int bindflags) { + bindflags |= OP_HAS_BINDING; + for (inst* curr = binders.first; curr; curr = curr->next) { + if ((opcode_describe(curr->op)->flags & bindflags) != bindflags) { + return 0; + } + } + return 1; +} + +static void block_bind_subblock(block binder, block body, int bindflags) { + assert(block_is_single(binder)); + assert((opcode_describe(binder.first->op)->flags & bindflags) == bindflags); + assert(binder.first->symbol); + assert(binder.first->bound_by == 0 || binder.first->bound_by == binder.first); + + binder.first->bound_by = binder.first; + for (inst* i = body.first; i; i = i->next) { + int flags = opcode_describe(i->op)->flags; + if ((flags & bindflags) == bindflags && + i->bound_by == 0 && + !strcmp(i->symbol, binder.first->symbol)) { + // bind this instruction + i->bound_by = binder.first; + } + if (flags & OP_HAS_BLOCK) { + // binding recurses into closures + block_bind_subblock(binder, i->subfn, bindflags); + } + } +} + +block block_bind(block binder, block body, int bindflags) { + assert(block_has_only_binders(binder, bindflags)); + bindflags |= OP_HAS_BINDING; + for (inst* curr = binder.first; curr; curr = curr->next) { + block_bind_subblock(inst_block(curr), body, bindflags); + } + return block_join(binder, body); +} + + +block gen_subexp(block a) { + block c = gen_noop(); + block_append(&c, gen_op_simple(DUP)); + block_append(&c, a); + block_append(&c, gen_op_simple(SWAP)); + return c; +} + +block gen_both(block a, block b) { + block c = gen_noop(); + block jump = gen_op_targetlater(JUMP); + block fork = gen_op_targetlater(FORK); + block_append(&c, fork); + block_append(&c, a); + block_append(&c, jump); + inst_set_target(fork, c); + block_append(&c, b); + inst_set_target(jump, c); + return c; +} + + +block gen_collect(block expr) { + block c = gen_noop(); + block_append(&c, gen_op_simple(DUP)); + block_append(&c, gen_op_const(LOADK, jv_array())); + block array_var = block_bind(gen_op_var_unbound(STOREV, "collect"), + gen_noop(), OP_HAS_VARIABLE); + block_append(&c, array_var); + + block tail = {0}; + block_append(&tail, gen_op_simple(DUP)); + block_append(&tail, gen_op_var_bound(LOADV, array_var)); + block_append(&tail, gen_op_simple(SWAP)); + block_append(&tail, gen_op_simple(APPEND)); + block_append(&tail, gen_op_var_bound(STOREV, array_var)); + block_append(&tail, gen_op_simple(BACKTRACK)); + + block_append(&c, gen_op_target(FORK, tail)); + block_append(&c, expr); + block_append(&c, tail); + + block_append(&c, gen_op_var_bound(LOADV, array_var)); + + return c; +} + +block gen_assign(block expr) { + block c = gen_noop(); + block_append(&c, gen_op_simple(DUP)); + block result_var = block_bind(gen_op_var_unbound(STOREV, "result"), + gen_noop(), OP_HAS_VARIABLE); + block_append(&c, result_var); + + block loop = gen_noop(); + block_append(&loop, gen_op_simple(DUP)); + block_append(&loop, expr); + block_append(&loop, gen_op_var_bound(ASSIGN, result_var)); + block_append(&loop, gen_op_simple(BACKTRACK)); + + block_append(&c, gen_op_target(FORK, loop)); + block_append(&c, loop); + block_append(&c, gen_op_var_bound(LOADV, result_var)); + return c; +} + +block gen_definedor(block a, block b) { + // var found := false + block c = gen_op_simple(DUP); + block_append(&c, gen_op_const(LOADK, jv_false())); + block found_var = block_bind(gen_op_var_unbound(STOREV, "found"), + gen_noop(), OP_HAS_VARIABLE); + block_append(&c, found_var); + + // if found, backtrack. Otherwise execute b + block tail = gen_op_simple(DUP); + block_append(&tail, gen_op_var_bound(LOADV, found_var)); + block backtrack = gen_op_simple(BACKTRACK); + block_append(&tail, gen_op_target(JUMP_F, backtrack)); + block_append(&tail, backtrack); + block_append(&tail, gen_op_simple(POP)); + block_append(&tail, b); + + // try again + block if_notfound = gen_op_simple(BACKTRACK); + + // found := true, produce result + block if_found = gen_op_simple(DUP); + block_append(&if_found, gen_op_const(LOADK, jv_true())); + block_append(&if_found, gen_op_var_bound(STOREV, found_var)); + block_append(&if_found, gen_op_target(JUMP, tail)); + + block_append(&c, gen_op_target(FORK, if_notfound)); + block_append(&c, a); + block_append(&c, gen_op_target(JUMP_F, if_found)); + block_append(&c, if_found); + block_append(&c, if_notfound); + block_append(&c, tail); + + return c; +} + +block gen_condbranch(block iftrue, block iffalse) { + block b = gen_noop(); + block_append(&iftrue, gen_op_target(JUMP, iffalse)); + block_append(&b, gen_op_target(JUMP_F, iftrue)); + block_append(&b, iftrue); + block_append(&b, iffalse); + return b; +} + +block gen_and(block a, block b) { + // a and b = if a then (if b then true else false) else false + block code = gen_op_simple(DUP); + block_append(&code, a); + + block if_a_true = gen_op_simple(POP); + block_append(&if_a_true, b); + block_append(&if_a_true, gen_condbranch(gen_op_const(LOADK, jv_true()), + gen_op_const(LOADK, jv_false()))); + block_append(&code, gen_condbranch(if_a_true, + block_join(gen_op_simple(POP), gen_op_const(LOADK, jv_false())))); + return code; +} + +block gen_or(block a, block b) { + // a or b = if a then true else (if b then true else false) + block code = gen_op_simple(DUP); + block_append(&code, a); + + block if_a_false = gen_op_simple(POP); + block_append(&if_a_false, b); + block_append(&if_a_false, gen_condbranch(gen_op_const(LOADK, jv_true()), + gen_op_const(LOADK, jv_false()))); + block_append(&code, gen_condbranch(block_join(gen_op_simple(POP), gen_op_const(LOADK, jv_true())), + if_a_false)); + return code; +} + +block gen_cond(block cond, block iftrue, block iffalse) { + block b = gen_op_simple(DUP); + block_append(&b, cond); + block_append(&b, gen_condbranch(block_join(gen_op_simple(POP), iftrue), + block_join(gen_op_simple(POP), iffalse))); + return b; +} + +block gen_cbinding(struct symbol_table* t, block code) { + for (int cfunc=0; cfunc<t->ncfunctions; cfunc++) { + inst* i = inst_new(CLOSURE_CREATE_C); + i->imm.cfunc = &t->cfunctions[cfunc]; + i->symbol = strdup(i->imm.cfunc->name); + code = block_bind(inst_block(i), code, OP_IS_CALL_PSEUDO); + } + return code; +} + +static uint16_t nesting_level(struct bytecode* bc, inst* target) { + uint16_t level = 0; + assert(bc && target->compiled); + while (bc && target->compiled != bc) { + level++; + bc = bc->parent; + } + assert(bc && bc == target->compiled); + return level; +} + +static int count_cfunctions(block b) { + int n = 0; + for (inst* i = b.first; i; i = i->next) { + if (i->op == CLOSURE_CREATE_C) n++; + if (opcode_describe(i->op)->flags & OP_HAS_BLOCK) + n += count_cfunctions(i->subfn); + } + return n; +} + +static int compile(struct locfile* locations, struct bytecode* bc, block b) { + int errors = 0; + int pos = 0; + int var_frame_idx = 0; + bc->nsubfunctions = 0; + bc->nclosures = 0; + for (inst* curr = b.first; curr; curr = curr->next) { + if (!curr->next) assert(curr == b.last); + pos += opcode_length(curr->op); + curr->bytecode_pos = pos; + curr->compiled = bc; + + int opflags = opcode_describe(curr->op)->flags; + if (opflags & OP_HAS_BINDING) { + if (!curr->bound_by) { + locfile_locate(locations, curr->source, "error: %s is not defined", curr->symbol); + errors++; + } + } + if ((opflags & OP_HAS_VARIABLE) && + curr->bound_by == curr) { + curr->imm.intval = var_frame_idx++; + } + if (opflags & OP_HAS_BLOCK) { + assert(curr->bound_by == curr); + curr->imm.intval = bc->nsubfunctions++; + } + if (curr->op == CLOSURE_PARAM) { + assert(curr->bound_by == curr); + curr->imm.intval = bc->nclosures++; + } + if (curr->op == CLOSURE_CREATE_C) { + assert(curr->bound_by == curr); + int idx = bc->globals->ncfunctions++; + bc->globals->cfunctions[idx] = *curr->imm.cfunc; + curr->imm.intval = idx; + } + } + if (bc->nsubfunctions) { + bc->subfunctions = malloc(sizeof(struct bytecode*) * bc->nsubfunctions); + for (inst* curr = b.first; curr; curr = curr->next) { + if (!(opcode_describe(curr->op)->flags & OP_HAS_BLOCK)) + continue; + struct bytecode* subfn = malloc(sizeof(struct bytecode)); + bc->subfunctions[curr->imm.intval] = subfn; + subfn->globals = bc->globals; + subfn->parent = bc; + errors += compile(locations, subfn, curr->subfn); + } + } else { + bc->subfunctions = 0; + } + bc->codelen = pos; + uint16_t* code = malloc(sizeof(uint16_t) * bc->codelen); + bc->code = code; + pos = 0; + jv constant_pool = jv_array(); + int maxvar = -1; + if (!errors) for (inst* curr = b.first; curr; curr = curr->next) { + const struct opcode_description* op = opcode_describe(curr->op); + if (op->length == 0) + continue; + uint16_t* opcode_rewrite = &code[pos]; + code[pos++] = curr->op; + int opflags = op->flags; + assert(!(op->flags & OP_IS_CALL_PSEUDO)); + if (opflags & OP_HAS_VARIABLE_LENGTH_ARGLIST) { + int nargs = curr->imm.intval; + assert(nargs > 0); + code[pos++] = (uint16_t)nargs; + int desired_params; + for (int i=0; i<nargs; i++) { + curr = curr->next; + assert(curr && opcode_describe(curr->op)->flags & OP_IS_CALL_PSEUDO); + code[pos++] = nesting_level(bc, curr->bound_by); + switch (curr->bound_by->op) { + default: assert(0 && "Unknown type of argument"); + case CLOSURE_CREATE: + code[pos++] = curr->bound_by->imm.intval | ARG_NEWCLOSURE; + if (i == 0) { + inst* i = curr->bound_by; + desired_params = i->compiled->subfunctions[i->imm.intval]->nclosures; + } + break; + case CLOSURE_PARAM: + code[pos++] = curr->bound_by->imm.intval; + if (i == 0) desired_params = 0; + break; + case CLOSURE_CREATE_C: + code[pos++] = curr->bound_by->imm.intval; + *opcode_rewrite = bc->globals->cfunctions[curr->bound_by->imm.intval].callop; + if (i == 0) desired_params = 0; + break; + } + } + assert(nargs - 1 == desired_params); + } else if (opflags & OP_HAS_CONSTANT) { + code[pos++] = jv_array_length(jv_copy(constant_pool)); + constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant)); + } else if (opflags & OP_HAS_VARIABLE) { + code[pos++] = nesting_level(bc, curr->bound_by); + uint16_t var = (uint16_t)curr->bound_by->imm.intval; + code[pos++] = var; + if (var > maxvar) maxvar = var; + } else if (opflags & OP_HAS_BRANCH) { + assert(curr->imm.target->bytecode_pos != -1); + assert(curr->imm.target->bytecode_pos > pos); // only forward branches + code[pos] = curr->imm.target->bytecode_pos - (pos + 1); + pos++; + } else if (opflags & OP_HAS_CFUNC) { + assert(curr->symbol); + int found = 0; + for (int i=0; i<bc->globals->ncfunctions; i++) { + if (!strcmp(curr->symbol, bc->globals->cfunctions[i].name)) { + code[pos++] = i; + found = 1; + break; + } + } + assert(found); + } else if (op->length > 1) { + assert(0 && "codegen not implemented for this operation"); + } + } + bc->constants = constant_pool; + bc->nlocals = maxvar + 2; // FIXME: frames of size zero? + return errors; +} + +int block_compile(block b, struct locfile* locations, struct bytecode** out) { + struct bytecode* bc = malloc(sizeof(struct bytecode)); + bc->parent = 0; + bc->globals = malloc(sizeof(struct symbol_table)); + int ncfunc = count_cfunctions(b); + bc->globals->ncfunctions = 0; + bc->globals->cfunctions = malloc(sizeof(struct cfunction) * ncfunc); + int nerrors = compile(locations, bc, b); + assert(bc->globals->ncfunctions == ncfunc); + if (nerrors > 0) { + bytecode_free(bc); + *out = 0; + } else { + *out = bc; + } + return nerrors; +} + +void block_free(block b) { + struct inst* next; + for (struct inst* curr = b.first; curr; curr = next) { + next = curr->next; + inst_free(curr); + } +} |