6 // These are more sensical predicate names in most contexts in this file
7 #define LEFT(tok) ((tok)->children)
8 #define RIGHT(tok) ((tok)->lastChild)
9 #define BINARY(tok) (LEFT(tok) && RIGHT(tok) && LEFT(tok) != RIGHT(tok))
11 // Forward references for recursion
12 static void genExpr(struct Parser* p, struct Token* t);
13 static void genExprList(struct Parser* p, struct Token* t);
14 static naRef newLambda(struct Parser* p, struct Token* t);
16 static void emit(struct Parser* p, int val)
18 if(p->cg->codesz >= p->cg->codeAlloced) {
19 int i, sz = p->cg->codeAlloced * 2;
20 unsigned short* buf = naParseAlloc(p, sz*sizeof(unsigned short));
21 for(i=0; i<p->cg->codeAlloced; i++) buf[i] = p->cg->byteCode[i];
22 p->cg->byteCode = buf;
23 p->cg->codeAlloced = sz;
25 p->cg->byteCode[p->cg->codesz++] = (unsigned short)val;
28 static void emitImmediate(struct Parser* p, int val, int arg)
34 static void genBinOp(int op, struct Parser* p, struct Token* t)
36 if(!LEFT(t) || !RIGHT(t))
37 naParseError(p, "empty subexpression", t->line);
43 static int newConstant(struct Parser* p, naRef c)
46 naVec_append(p->cg->consts, c);
47 i = naVec_size(p->cg->consts) - 1;
48 if(i > 0xffff) naParseError(p, "too many constants in code block", 0);
52 static naRef getConstant(struct Parser* p, int idx)
54 return naVec_get(p->cg->consts, idx);
57 // Interns a scalar (!) constant and returns its index
58 static int internConstant(struct Parser* p, naRef c)
60 int i, n = naVec_size(p->cg->consts);
61 if(IS_CODE(c)) return newConstant(p, c);
63 naRef b = naVec_get(p->cg->consts, i);
64 if(IS_NUM(b) && IS_NUM(c) && b.num == c.num) return i;
65 else if(IS_NIL(b) && IS_NIL(c)) return i;
66 else if(naStrEqual(b, c)) return i;
68 return newConstant(p, c);
71 naRef naInternSymbol(naRef sym)
74 if(naHash_get(globals->symbols, sym, &result))
76 naHash_set(globals->symbols, sym, sym);
80 static int findConstantIndex(struct Parser* p, struct Token* t)
83 if(t->type == TOK_NIL) c = naNil();
85 c = naStr_fromdata(naNewString(p->context), t->str, t->strlen);
86 if(t->type == TOK_SYMBOL) c = naInternSymbol(c);
87 } else if(t->type == TOK_FUNC) c = newLambda(p, t);
88 else if(t->type == TOK_LITERAL) c = naNum(t->num);
89 else naParseError(p, "invalid/non-constant constant", t->line);
90 return internConstant(p, c);
93 static int lastExprInBlock(struct Token* t)
95 if(!t->parent) return 1;
96 if(t->parent->type == TOK_TOP || t->parent->type == TOK_LCURL) return 1;
97 if(t->parent->type == TOK_SEMI)
98 if(!t->next || t->next->type == TOK_EMPTY)
103 // Returns true if the node is in "tail context" -- either a child of
104 // a return, the last child of a func block, or else the
105 // last child of an if/elsif/if that is itself in tail context.
106 static int tailContext(struct Token* t)
108 if(t->parent && t->parent->type == TOK_RETURN)
110 else if(!lastExprInBlock(t))
113 // Walk up the tree. It is ok to see semicolons, else's, elsifs
114 // and curlies. If we reach the top or a func, then we are in
115 // tail context. If we hit an if, then we are in tail context
116 // only if the "if" node is.
117 while((t = t->parent) != 0)
119 case TOK_SEMI: case TOK_LCURL: break;
120 case TOK_ELSE: case TOK_ELSIF: break;
121 case TOK_TOP: case TOK_FUNC: return 1;
122 case TOK_IF: return tailContext(t);
128 static void genScalarConstant(struct Parser* p, struct Token* t)
130 // These opcodes are for special-case use in other constructs, but
131 // we might as well use them here to save a few bytes in the
132 // instruction stream.
133 if(t->str == 0 && t->num == 1) {
135 } else if(t->str == 0 && t->num == 0) {
136 emit(p, OP_PUSHZERO);
138 int idx = findConstantIndex(p, t);
139 emitImmediate(p, OP_PUSHCONST, idx);
143 static int genLValue(struct Parser* p, struct Token* t)
145 if(t->type == TOK_LPAR) {
146 return genLValue(p, LEFT(t)); // Handle stuff like "(a) = 1"
147 } else if(t->type == TOK_SYMBOL) {
148 genScalarConstant(p, t);
150 } else if(t->type == TOK_DOT && RIGHT(t) && RIGHT(t)->type == TOK_SYMBOL) {
152 genScalarConstant(p, RIGHT(t));
154 } else if(t->type == TOK_LBRA) {
156 genExpr(p, RIGHT(t));
158 } else if(t->type == TOK_VAR && RIGHT(t)->type == TOK_SYMBOL) {
159 genScalarConstant(p, RIGHT(t));
162 naParseError(p, "bad lvalue", t->line);
167 static int defArg(struct Parser* p, struct Token* t)
169 if(t->type == TOK_LPAR) return defArg(p, RIGHT(t));
170 return findConstantIndex(p, t);
173 static void genArgList(struct Parser* p, struct naCode* c, struct Token* t)
176 if(t->type == TOK_EMPTY) return;
177 if(!IDENTICAL(c->restArgSym, globals->argRef))
178 naParseError(p, "remainder must be last", t->line);
179 if(t->type == TOK_ELLIPSIS) {
180 if(LEFT(t)->type != TOK_SYMBOL)
181 naParseError(p, "bad function argument expression", t->line);
182 sym = naStr_fromdata(naNewString(p->context),
183 LEFT(t)->str, LEFT(t)->strlen);
184 c->restArgSym = naInternSymbol(sym);
185 c->needArgVector = 1;
186 } else if(t->type == TOK_ASSIGN) {
187 if(LEFT(t)->type != TOK_SYMBOL)
188 naParseError(p, "bad function argument expression", t->line);
189 c->optArgSyms[c->nOptArgs] = findConstantIndex(p, LEFT(t));
190 c->optArgVals[c->nOptArgs++] = defArg(p, RIGHT(t));
191 } else if(t->type == TOK_SYMBOL) {
193 naParseError(p, "optional arguments must be last", t->line);
194 if(c->nArgs >= MAX_FUNARGS)
195 naParseError(p, "too many named function arguments", t->line);
196 c->argSyms[c->nArgs++] = findConstantIndex(p, t);
197 } else if(t->type == TOK_COMMA) {
198 genArgList(p, c, LEFT(t));
199 genArgList(p, c, RIGHT(t));
201 naParseError(p, "bad function argument expression", t->line);
204 static naRef newLambda(struct Parser* p, struct Token* t)
206 struct CodeGenerator* cgSave;
208 struct Token* arglist;
209 if(RIGHT(t)->type != TOK_LCURL)
210 naParseError(p, "bad function definition", t->line);
212 // Save off the generator state while we do the new one
214 arglist = LEFT(t)->type == TOK_LPAR ? LEFT(LEFT(t)) : 0;
215 codeObj = naCodeGen(p, LEFT(RIGHT(t)), arglist);
220 static void genLambda(struct Parser* p, struct Token* t)
222 emitImmediate(p, OP_PUSHCONST, newConstant(p, newLambda(p, t)));
225 static int genList(struct Parser* p, struct Token* t, int doAppend)
227 if(t->type == TOK_COMMA) {
229 if(doAppend) emit(p, OP_VAPPEND);
230 return 1 + genList(p, RIGHT(t), doAppend);
231 } else if(t->type == TOK_EMPTY) {
235 if(doAppend) emit(p, OP_VAPPEND);
240 static void genHashElem(struct Parser* p, struct Token* t)
242 if(t->type == TOK_EMPTY)
244 if(t->type != TOK_COLON)
245 naParseError(p, "bad hash/object initializer", t->line);
246 if(LEFT(t)->type == TOK_SYMBOL) genScalarConstant(p, LEFT(t));
247 else if(LEFT(t)->type == TOK_LITERAL) genExpr(p, LEFT(t));
248 else naParseError(p, "bad hash/object initializer", t->line);
249 genExpr(p, RIGHT(t));
253 static void genHash(struct Parser* p, struct Token* t)
255 if(t->type == TOK_COMMA) {
256 genHashElem(p, LEFT(t));
257 genHash(p, RIGHT(t));
258 } else if(t->type != TOK_EMPTY) {
263 static void genFuncall(struct Parser* p, struct Token* t)
267 if(LEFT(t)->type == TOK_DOT) {
268 genExpr(p, LEFT(LEFT(t)));
270 emitImmediate(p, OP_MEMBER, findConstantIndex(p, RIGHT(LEFT(t))));
275 if(RIGHT(t)) nargs = genList(p, RIGHT(t), 0);
277 op = op == OP_FCALL ? OP_FTAIL : OP_MTAIL;
278 emitImmediate(p, op, nargs);
281 static void pushLoop(struct Parser* p, struct Token* label)
283 int i = p->cg->loopTop;
284 p->cg->loops[i].breakIP = 0xffffff;
285 p->cg->loops[i].contIP = 0xffffff;
286 p->cg->loops[i].label = label;
291 static void popLoop(struct Parser* p)
294 if(p->cg->loopTop < 0) naParseError(p, "BUG: loop stack underflow", -1);
298 // Emit a jump operation, and return the location of the address in
299 // the bytecode for future fixup in fixJumpTarget
300 static int emitJump(struct Parser* p, int op)
305 emit(p, 0xffff); // dummy address
309 // Points a previous jump instruction at the current "end-of-bytecode"
310 static void fixJumpTarget(struct Parser* p, int spot)
312 p->cg->byteCode[spot] = p->cg->codesz;
315 static void genShortCircuit(struct Parser* p, struct Token* t)
317 int jumpNext, jumpEnd, isAnd = (t->type == TOK_AND);
319 if(isAnd) emit(p, OP_NOT);
320 jumpNext = emitJump(p, OP_JIFNOT);
321 emit(p, isAnd ? OP_PUSHNIL : OP_PUSHONE);
322 jumpEnd = emitJump(p, OP_JMP);
323 fixJumpTarget(p, jumpNext);
324 genExpr(p, RIGHT(t));
325 fixJumpTarget(p, jumpEnd);
329 static void genIf(struct Parser* p, struct Token* tif, struct Token* telse)
331 int jumpNext, jumpEnd;
332 genExpr(p, tif->children); // the test
333 jumpNext = emitJump(p, OP_JIFNOT);
334 genExprList(p, tif->children->next->children); // the body
335 jumpEnd = emitJump(p, OP_JMP);
336 fixJumpTarget(p, jumpNext);
338 if(telse->type == TOK_ELSIF) genIf(p, telse, telse->next);
339 else genExprList(p, telse->children->children);
343 fixJumpTarget(p, jumpEnd);
346 static void genIfElse(struct Parser* p, struct Token* t)
348 genIf(p, t, t->children->next->next);
351 static void genQuestion(struct Parser* p, struct Token* t)
353 int jumpNext, jumpEnd;
354 if(!RIGHT(t) || RIGHT(t)->type != TOK_COLON)
355 naParseError(p, "invalid ?: expression", t->line);
356 genExpr(p, LEFT(t)); // the test
357 jumpNext = emitJump(p, OP_JIFNOT);
358 genExpr(p, LEFT(RIGHT(t))); // the "if true" expr
359 jumpEnd = emitJump(p, OP_JMP);
360 fixJumpTarget(p, jumpNext);
361 genExpr(p, RIGHT(RIGHT(t))); // the "else" expr
362 fixJumpTarget(p, jumpEnd);
365 static int countSemis(struct Token* t)
367 if(!t || t->type != TOK_SEMI) return 0;
368 return 1 + countSemis(RIGHT(t));
371 static void genLoop(struct Parser* p, struct Token* body,
372 struct Token* update, struct Token* label,
373 int loopTop, int jumpEnd)
375 int cont, jumpOverContinue;
377 p->cg->loops[p->cg->loopTop-1].breakIP = jumpEnd-1;
379 jumpOverContinue = emitJump(p, OP_JMP);
380 p->cg->loops[p->cg->loopTop-1].contIP = p->cg->codesz;
381 cont = emitJump(p, OP_JMP);
382 fixJumpTarget(p, jumpOverContinue);
384 genExprList(p, body);
386 fixJumpTarget(p, cont);
387 if(update) { genExpr(p, update); emit(p, OP_POP); }
388 emitImmediate(p, OP_JMPLOOP, loopTop);
389 fixJumpTarget(p, jumpEnd);
391 emit(p, OP_PUSHNIL); // Leave something on the stack
394 static void genForWhile(struct Parser* p, struct Token* init,
395 struct Token* test, struct Token* update,
396 struct Token* body, struct Token* label)
398 int loopTop, jumpEnd;
399 if(init) { genExpr(p, init); emit(p, OP_POP); }
401 loopTop = p->cg->codesz;
403 jumpEnd = emitJump(p, OP_JIFNOT);
404 genLoop(p, body, update, label, loopTop, jumpEnd);
407 static void genWhile(struct Parser* p, struct Token* t)
409 struct Token *test=LEFT(t)->children, *body, *label=0;
410 int semis = countSemis(test);
413 if(!label || label->type != TOK_SYMBOL)
414 naParseError(p, "bad loop label", t->line);
418 naParseError(p, "too many semicolons in while test", t->line);
419 body = LEFT(RIGHT(t));
420 genForWhile(p, 0, test, 0, body, label);
423 static void genFor(struct Parser* p, struct Token* t)
425 struct Token *init, *test, *body, *update, *label=0;
426 struct Token *h = LEFT(t)->children;
427 int semis = countSemis(h);
429 if(!LEFT(h) || LEFT(h)->type != TOK_SYMBOL)
430 naParseError(p, "bad loop label", h->line);
433 } else if(semis != 2) {
434 naParseError(p, "wrong number of terms in for header", t->line);
437 // Parse tree hell :)
439 test = LEFT(RIGHT(h));
440 update = RIGHT(RIGHT(h));
441 body = RIGHT(t)->children;
442 genForWhile(p, init, test, update, body, label);
445 static void genForEach(struct Parser* p, struct Token* t)
447 int loopTop, jumpEnd, assignOp;
448 struct Token *elem, *body, *vec, *label=0;
449 struct Token *h = LEFT(LEFT(t));
450 int semis = countSemis(h);
452 if(!LEFT(h) || LEFT(h)->type != TOK_SYMBOL)
453 naParseError(p, "bad loop label", h->line);
456 } else if (semis != 1) {
457 naParseError(p, "wrong number of terms in foreach header", t->line);
461 body = RIGHT(t)->children;
465 emit(p, OP_PUSHZERO);
466 loopTop = p->cg->codesz;
468 jumpEnd = emitJump(p, OP_JIFNIL);
469 assignOp = genLValue(p, elem);
473 genLoop(p, body, 0, label, loopTop, jumpEnd);
476 static int tokMatch(struct Token* a, struct Token* b)
478 int i, l = a->strlen;
479 if(!a || !b) return 0;
480 if(l != b->strlen) return 0;
481 for(i=0; i<l; i++) if(a->str[i] != b->str[i]) return 0;
485 static void genBreakContinue(struct Parser* p, struct Token* t)
487 int levels = 1, loop = -1, bp, cp, i;
489 if(RIGHT(t)->type != TOK_SYMBOL)
490 naParseError(p, "bad break/continue label", t->line);
491 for(i=0; i<p->cg->loopTop; i++)
492 if(tokMatch(RIGHT(t), p->cg->loops[i].label))
495 naParseError(p, "no match for break/continue label", t->line);
496 levels = p->cg->loopTop - loop;
498 bp = p->cg->loops[p->cg->loopTop - levels].breakIP;
499 cp = p->cg->loops[p->cg->loopTop - levels].contIP;
500 for(i=0; i<levels; i++)
502 if(t->type == TOK_BREAK)
503 emit(p, OP_PUSHNIL); // breakIP is always a JIFNOT/JIFNIL!
504 emitImmediate(p, OP_JMP, t->type == TOK_BREAK ? bp : cp);
507 static void newLineEntry(struct Parser* p, int line)
510 if(p->cg->nextLineIp >= p->cg->nLineIps) {
511 int nsz = p->cg->nLineIps*2 + 1;
512 unsigned short* n = naParseAlloc(p, sizeof(unsigned short)*2*nsz);
513 for(i=0; i<(p->cg->nextLineIp*2); i++)
514 n[i] = p->cg->lineIps[i];
516 p->cg->nLineIps = nsz;
518 p->cg->lineIps[p->cg->nextLineIp++] = (unsigned short) p->cg->codesz;
519 p->cg->lineIps[p->cg->nextLineIp++] = (unsigned short) line;
522 static void genExpr(struct Parser* p, struct Token* t)
525 if(t->line != p->cg->lastLine)
526 newLineEntry(p, t->line);
527 p->cg->lastLine = t->line;
544 case TOK_BREAK: case TOK_CONTINUE:
545 genBreakContinue(p, t);
548 genExprList(p, LEFT(t));
554 if(BINARY(t) || !RIGHT(t)) genFuncall(p, t); // function invocation
555 else genExpr(p, LEFT(t)); // simple parenthesis
559 genBinOp(OP_EXTRACT, p, t); // a[i]
562 genList(p, LEFT(t), 1);
570 i = genLValue(p, LEFT(t));
571 genExpr(p, RIGHT(t));
572 emit(p, i); // use the op appropriate to the lvalue
575 if(RIGHT(t)) genExpr(p, RIGHT(t));
576 else emit(p, OP_PUSHNIL);
577 for(i=0; i<p->cg->loopTop; i++) emit(p, OP_UNMARK);
581 genExpr(p, RIGHT(t));
585 emitImmediate(p, OP_LOCAL, findConstantIndex(p, t));
588 genScalarConstant(p, t);
592 genBinOp(OP_MINUS, p, t); // binary subtraction
593 } else if(RIGHT(t)->type == TOK_LITERAL && !RIGHT(t)->str) {
594 RIGHT(t)->num *= -1; // Pre-negate constants
595 genScalarConstant(p, RIGHT(t));
597 genExpr(p, RIGHT(t)); // unary negation
602 genExpr(p, RIGHT(t)); // unary negation (see also TOK_MINUS!)
607 if(RIGHT(t)->type != TOK_SYMBOL)
608 naParseError(p, "object field not symbol", RIGHT(t)->line);
609 emitImmediate(p, OP_MEMBER, findConstantIndex(p, RIGHT(t)));
611 case TOK_EMPTY: case TOK_NIL:
612 emit(p, OP_PUSHNIL); break; // *NOT* a noop!
613 case TOK_AND: case TOK_OR:
614 genShortCircuit(p, t);
616 case TOK_MUL: genBinOp(OP_MUL, p, t); break;
617 case TOK_PLUS: genBinOp(OP_PLUS, p, t); break;
618 case TOK_DIV: genBinOp(OP_DIV, p, t); break;
619 case TOK_CAT: genBinOp(OP_CAT, p, t); break;
620 case TOK_LT: genBinOp(OP_LT, p, t); break;
621 case TOK_LTE: genBinOp(OP_LTE, p, t); break;
622 case TOK_EQ: genBinOp(OP_EQ, p, t); break;
623 case TOK_NEQ: genBinOp(OP_NEQ, p, t); break;
624 case TOK_GT: genBinOp(OP_GT, p, t); break;
625 case TOK_GTE: genBinOp(OP_GTE, p, t); break;
627 naParseError(p, "parse error", t->line);
631 static void genExprList(struct Parser* p, struct Token* t)
633 if(t->type == TOK_SEMI) {
635 if(RIGHT(t) && RIGHT(t)->type != TOK_EMPTY) {
637 genExprList(p, RIGHT(t));
644 naRef naCodeGen(struct Parser* p, struct Token* block, struct Token* arglist)
649 struct CodeGenerator cg;
652 cg.codeAlloced = 1024; // Start fairly big, this is a cheap allocation
653 cg.byteCode = naParseAlloc(p, cg.codeAlloced *sizeof(unsigned short));
655 cg.consts = naNewVector(p->context);
662 genExprList(p, block);
665 // Now make a code object
666 codeObj = naNewCode(p->context);
667 code = codeObj.ref.ptr.code;
669 // Parse the argument list, if any
670 code->restArgSym = globals->argRef;
671 code->nArgs = code->nOptArgs = 0;
672 code->argSyms = code->optArgSyms = code->optArgVals = 0;
673 code->needArgVector = 1;
675 code->argSyms = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
676 code->optArgSyms = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
677 code->optArgVals = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
678 code->needArgVector = 0;
679 genArgList(p, code, arglist);
682 nsyms = naAlloc(sizeof(int) * code->nArgs);
683 for(i=0; i<code->nArgs; i++) nsyms[i] = code->argSyms[i];
684 code->argSyms = nsyms;
685 } else code->argSyms = 0;
687 int i, *nsyms, *nvals;
688 nsyms = naAlloc(sizeof(int) * code->nOptArgs);
689 nvals = naAlloc(sizeof(int) * code->nOptArgs);
690 for(i=0; i<code->nOptArgs; i++) nsyms[i] = code->optArgSyms[i];
691 for(i=0; i<code->nOptArgs; i++) nvals[i] = code->optArgVals[i];
692 code->optArgSyms = nsyms;
693 code->optArgVals = nvals;
694 } else code->optArgSyms = code->optArgVals = 0;
697 code->codesz = cg.codesz;
698 code->byteCode = naAlloc(cg.codesz * sizeof(unsigned short));
699 for(i=0; i < cg.codesz; i++)
700 code->byteCode[i] = cg.byteCode[i];
701 code->nConstants = naVec_size(cg.consts);
702 code->constants = naAlloc(code->nConstants * sizeof(naRef));
703 code->srcFile = p->srcFile;
704 for(i=0; i<code->nConstants; i++)
705 code->constants[i] = getConstant(p, i);
706 code->nLines = p->cg->nextLineIp;
707 code->lineIps = naAlloc(sizeof(unsigned short)*p->cg->nLineIps*2);
708 for(i=0; i<p->cg->nLineIps*2; i++)
709 code->lineIps[i] = p->cg->lineIps[i];