6 // These are more sensical predicate names in most contexts in this file
7 #define LEFT(tok) ((tok)->children)
8 #define RIGHT(tok) ((tok)->lastChild)
9 #define BINARY(tok) (LEFT(tok) && RIGHT(tok) && LEFT(tok) != RIGHT(tok))
11 // Forward references for recursion
12 static void genExpr(struct Parser* p, struct Token* t);
13 static void genExprList(struct Parser* p, struct Token* t);
14 static naRef newLambda(struct Parser* p, struct Token* t);
16 static void emit(struct Parser* p, int val)
18 if(p->cg->codesz >= p->cg->codeAlloced) {
19 int i, sz = p->cg->codeAlloced * 2;
20 unsigned short* buf = naParseAlloc(p, sz*sizeof(unsigned short));
21 for(i=0; i<p->cg->codeAlloced; i++) buf[i] = p->cg->byteCode[i];
22 p->cg->byteCode = buf;
23 p->cg->codeAlloced = sz;
25 p->cg->byteCode[p->cg->codesz++] = (unsigned short)val;
28 static void emitImmediate(struct Parser* p, int val, int arg)
34 static void genBinOp(int op, struct Parser* p, struct Token* t)
36 if(!LEFT(t) || !RIGHT(t))
37 naParseError(p, "empty subexpression", t->line);
43 static int newConstant(struct Parser* p, naRef c)
46 naVec_append(p->cg->consts, c);
47 i = naVec_size(p->cg->consts) - 1;
48 if(i > 0xffff) naParseError(p, "too many constants in code block", 0);
52 static naRef getConstant(struct Parser* p, int idx)
54 return naVec_get(p->cg->consts, idx);
57 // Interns a scalar (!) constant and returns its index
58 static int internConstant(struct Parser* p, naRef c)
60 int i, n = naVec_size(p->cg->consts);
61 if(IS_CODE(c)) return newConstant(p, c);
63 naRef b = naVec_get(p->cg->consts, i);
64 if(IS_NUM(b) && IS_NUM(c) && b.num == c.num) return i;
65 else if(IS_NIL(b) && IS_NIL(c)) return i;
66 else if(naStrEqual(b, c)) return i;
68 return newConstant(p, c);
71 naRef naInternSymbol(naRef sym)
74 if(naHash_get(globals->symbols, sym, &result))
76 naHash_set(globals->symbols, sym, sym);
80 static int findConstantIndex(struct Parser* p, struct Token* t)
83 if(t->type == TOK_NIL) c = naNil();
85 c = naStr_fromdata(naNewString(p->context), t->str, t->strlen);
86 if(t->type == TOK_SYMBOL) c = naInternSymbol(c);
87 } else if(t->type == TOK_FUNC) c = newLambda(p, t);
88 else if(t->type == TOK_LITERAL) c = naNum(t->num);
89 else naParseError(p, "invalid/non-constant constant", t->line);
90 return internConstant(p, c);
93 static int lastExprInBlock(struct Token* t)
95 if(!t->parent) return 1;
96 if(t->parent->type == TOK_TOP || t->parent->type == TOK_LCURL) return 1;
97 if(t->parent->type == TOK_SEMI)
98 if(!t->next || t->next->type == TOK_EMPTY)
103 // Returns true if the node is in "tail context" -- either a child of
104 // a return, the last child of a func block, or else the
105 // last child of an if/elsif/if that is itself in tail context.
106 static int tailContext(struct Token* t)
108 if(t->parent && t->parent->type == TOK_RETURN)
110 else if(!lastExprInBlock(t))
113 // Walk up the tree. It is ok to see semicolons, else's, elsifs
114 // and curlies. If we reach the top or a func, then we are in
115 // tail context. If we hit an if, then we are in tail context
116 // only if the "if" node is.
117 while((t = t->parent) != 0)
119 case TOK_SEMI: case TOK_LCURL: break;
120 case TOK_ELSE: case TOK_ELSIF: break;
121 case TOK_TOP: case TOK_FUNC: return 1;
122 case TOK_IF: return tailContext(t);
128 static int genScalarConstant(struct Parser* p, struct Token* t)
130 // These opcodes are for special-case use in other constructs, but
131 // we might as well use them here to save a few bytes in the
132 // instruction stream.
133 if(t->str == 0 && t->num == 1) {
135 } else if(t->str == 0 && t->num == 0) {
136 emit(p, OP_PUSHZERO);
138 int idx = findConstantIndex(p, t);
139 emitImmediate(p, OP_PUSHCONST, idx);
145 static int genLValue(struct Parser* p, struct Token* t, int* cidx)
147 if(t->type == TOK_LPAR) {
148 return genLValue(p, LEFT(t), cidx); // Handle stuff like "(a) = 1"
149 } else if(t->type == TOK_SYMBOL) {
150 *cidx = genScalarConstant(p, t);
152 } else if(t->type == TOK_DOT && RIGHT(t) && RIGHT(t)->type == TOK_SYMBOL) {
154 *cidx = genScalarConstant(p, RIGHT(t));
156 } else if(t->type == TOK_LBRA) {
158 genExpr(p, RIGHT(t));
160 } else if(t->type == TOK_VAR && RIGHT(t) && RIGHT(t)->type == TOK_SYMBOL) {
161 *cidx = genScalarConstant(p, RIGHT(t));
164 naParseError(p, "bad lvalue", t->line);
169 static void genEqOp(int op, struct Parser* p, struct Token* t)
171 int cidx, setop = genLValue(p, LEFT(t), &cidx);
172 if(setop == OP_SETMEMBER) {
175 emitImmediate(p, OP_MEMBER, cidx);
176 } else if(setop == OP_INSERT) {
179 } else // OP_SETSYM, OP_SETLOCAL
180 emitImmediate(p, OP_LOCAL, cidx);
181 genExpr(p, RIGHT(t));
186 static int defArg(struct Parser* p, struct Token* t)
188 if(t->type == TOK_LPAR) return defArg(p, RIGHT(t));
189 return findConstantIndex(p, t);
192 static void genArgList(struct Parser* p, struct naCode* c, struct Token* t)
195 if(t->type == TOK_EMPTY) return;
196 if(!IDENTICAL(c->restArgSym, globals->argRef))
197 naParseError(p, "remainder must be last", t->line);
198 if(t->type == TOK_ELLIPSIS) {
199 if(LEFT(t)->type != TOK_SYMBOL)
200 naParseError(p, "bad function argument expression", t->line);
201 sym = naStr_fromdata(naNewString(p->context),
202 LEFT(t)->str, LEFT(t)->strlen);
203 c->restArgSym = naInternSymbol(sym);
204 c->needArgVector = 1;
205 } else if(t->type == TOK_ASSIGN) {
206 if(LEFT(t)->type != TOK_SYMBOL)
207 naParseError(p, "bad function argument expression", t->line);
208 c->optArgSyms[c->nOptArgs] = findConstantIndex(p, LEFT(t));
209 c->optArgVals[c->nOptArgs++] = defArg(p, RIGHT(t));
210 } else if(t->type == TOK_SYMBOL) {
212 naParseError(p, "optional arguments must be last", t->line);
213 if(c->nArgs >= MAX_FUNARGS)
214 naParseError(p, "too many named function arguments", t->line);
215 c->argSyms[c->nArgs++] = findConstantIndex(p, t);
216 } else if(t->type == TOK_COMMA) {
217 genArgList(p, c, LEFT(t));
218 genArgList(p, c, RIGHT(t));
220 naParseError(p, "bad function argument expression", t->line);
223 static naRef newLambda(struct Parser* p, struct Token* t)
225 struct CodeGenerator* cgSave;
227 struct Token* arglist;
228 if(RIGHT(t)->type != TOK_LCURL)
229 naParseError(p, "bad function definition", t->line);
231 // Save off the generator state while we do the new one
233 arglist = LEFT(t)->type == TOK_LPAR ? LEFT(LEFT(t)) : 0;
234 codeObj = naCodeGen(p, LEFT(RIGHT(t)), arglist);
239 static void genLambda(struct Parser* p, struct Token* t)
241 emitImmediate(p, OP_PUSHCONST, newConstant(p, newLambda(p, t)));
244 static int genList(struct Parser* p, struct Token* t, int doAppend)
246 if(t->type == TOK_COMMA) {
248 if(doAppend) emit(p, OP_VAPPEND);
249 return 1 + genList(p, RIGHT(t), doAppend);
250 } else if(t->type == TOK_EMPTY) {
254 if(doAppend) emit(p, OP_VAPPEND);
259 static void genHashElem(struct Parser* p, struct Token* t)
261 if(t->type == TOK_EMPTY)
263 if(t->type != TOK_COLON)
264 naParseError(p, "bad hash/object initializer", t->line);
265 if(LEFT(t)->type == TOK_SYMBOL) genScalarConstant(p, LEFT(t));
266 else if(LEFT(t)->type == TOK_LITERAL) genExpr(p, LEFT(t));
267 else naParseError(p, "bad hash/object initializer", t->line);
268 genExpr(p, RIGHT(t));
272 static void genHash(struct Parser* p, struct Token* t)
274 if(t->type == TOK_COMMA) {
275 genHashElem(p, LEFT(t));
276 genHash(p, RIGHT(t));
277 } else if(t->type != TOK_EMPTY) {
282 static void genFuncall(struct Parser* p, struct Token* t)
286 if(LEFT(t)->type == TOK_DOT) {
287 genExpr(p, LEFT(LEFT(t)));
289 emitImmediate(p, OP_MEMBER, findConstantIndex(p, RIGHT(LEFT(t))));
294 if(RIGHT(t)) nargs = genList(p, RIGHT(t), 0);
296 op = op == OP_FCALL ? OP_FTAIL : OP_MTAIL;
297 emitImmediate(p, op, nargs);
300 static void pushLoop(struct Parser* p, struct Token* label)
302 int i = p->cg->loopTop;
303 p->cg->loops[i].breakIP = 0xffffff;
304 p->cg->loops[i].contIP = 0xffffff;
305 p->cg->loops[i].label = label;
310 static void popLoop(struct Parser* p)
313 if(p->cg->loopTop < 0) naParseError(p, "BUG: loop stack underflow", -1);
317 // Emit a jump operation, and return the location of the address in
318 // the bytecode for future fixup in fixJumpTarget
319 static int emitJump(struct Parser* p, int op)
324 emit(p, 0xffff); // dummy address
328 // Points a previous jump instruction at the current "end-of-bytecode"
329 static void fixJumpTarget(struct Parser* p, int spot)
331 p->cg->byteCode[spot] = p->cg->codesz;
334 static void genShortCircuit(struct Parser* p, struct Token* t)
336 int jumpNext, jumpEnd, isAnd = (t->type == TOK_AND);
338 if(isAnd) emit(p, OP_NOT);
339 jumpNext = emitJump(p, OP_JIFNOT);
340 emit(p, isAnd ? OP_PUSHNIL : OP_PUSHONE);
341 jumpEnd = emitJump(p, OP_JMP);
342 fixJumpTarget(p, jumpNext);
343 genExpr(p, RIGHT(t));
344 fixJumpTarget(p, jumpEnd);
348 static void genIf(struct Parser* p, struct Token* tif, struct Token* telse)
350 int jumpNext, jumpEnd;
351 genExpr(p, tif->children); // the test
352 jumpNext = emitJump(p, OP_JIFNOT);
353 genExprList(p, tif->children->next->children); // the body
354 jumpEnd = emitJump(p, OP_JMP);
355 fixJumpTarget(p, jumpNext);
357 if(telse->type == TOK_ELSIF) genIf(p, telse, telse->next);
358 else genExprList(p, telse->children->children);
362 fixJumpTarget(p, jumpEnd);
365 static void genIfElse(struct Parser* p, struct Token* t)
367 genIf(p, t, t->children->next->next);
370 static void genQuestion(struct Parser* p, struct Token* t)
372 int jumpNext, jumpEnd;
373 if(!RIGHT(t) || RIGHT(t)->type != TOK_COLON)
374 naParseError(p, "invalid ?: expression", t->line);
375 genExpr(p, LEFT(t)); // the test
376 jumpNext = emitJump(p, OP_JIFNOT);
377 genExpr(p, LEFT(RIGHT(t))); // the "if true" expr
378 jumpEnd = emitJump(p, OP_JMP);
379 fixJumpTarget(p, jumpNext);
380 genExpr(p, RIGHT(RIGHT(t))); // the "else" expr
381 fixJumpTarget(p, jumpEnd);
384 static int countSemis(struct Token* t)
386 if(!t || t->type != TOK_SEMI) return 0;
387 return 1 + countSemis(RIGHT(t));
390 static void genLoop(struct Parser* p, struct Token* body,
391 struct Token* update, struct Token* label,
392 int loopTop, int jumpEnd)
394 int cont, jumpOverContinue;
396 p->cg->loops[p->cg->loopTop-1].breakIP = jumpEnd-1;
398 jumpOverContinue = emitJump(p, OP_JMP);
399 p->cg->loops[p->cg->loopTop-1].contIP = p->cg->codesz;
400 cont = emitJump(p, OP_JMP);
401 fixJumpTarget(p, jumpOverContinue);
403 genExprList(p, body);
405 fixJumpTarget(p, cont);
406 if(update) { genExpr(p, update); emit(p, OP_POP); }
407 emitImmediate(p, OP_JMPLOOP, loopTop);
408 fixJumpTarget(p, jumpEnd);
410 emit(p, OP_PUSHNIL); // Leave something on the stack
413 static void genForWhile(struct Parser* p, struct Token* init,
414 struct Token* test, struct Token* update,
415 struct Token* body, struct Token* label)
417 int loopTop, jumpEnd;
418 if(init) { genExpr(p, init); emit(p, OP_POP); }
420 loopTop = p->cg->codesz;
422 jumpEnd = emitJump(p, OP_JIFNOT);
423 genLoop(p, body, update, label, loopTop, jumpEnd);
426 static void genWhile(struct Parser* p, struct Token* t)
428 struct Token *test=LEFT(t)->children, *body, *label=0;
429 int semis = countSemis(test);
432 if(!label || label->type != TOK_SYMBOL)
433 naParseError(p, "bad loop label", t->line);
437 naParseError(p, "too many semicolons in while test", t->line);
438 body = LEFT(RIGHT(t));
439 genForWhile(p, 0, test, 0, body, label);
442 static void genFor(struct Parser* p, struct Token* t)
444 struct Token *init, *test, *body, *update, *label=0;
445 struct Token *h = LEFT(t)->children;
446 int semis = countSemis(h);
448 if(!LEFT(h) || LEFT(h)->type != TOK_SYMBOL)
449 naParseError(p, "bad loop label", h->line);
452 } else if(semis != 2) {
453 naParseError(p, "wrong number of terms in for header", t->line);
456 // Parse tree hell :)
458 test = LEFT(RIGHT(h));
459 update = RIGHT(RIGHT(h));
460 body = RIGHT(t)->children;
461 genForWhile(p, init, test, update, body, label);
464 static void genForEach(struct Parser* p, struct Token* t)
466 int loopTop, jumpEnd, assignOp, dummy;
467 struct Token *elem, *body, *vec, *label=0;
468 struct Token *h = LEFT(LEFT(t));
469 int semis = countSemis(h);
471 if(!LEFT(h) || LEFT(h)->type != TOK_SYMBOL)
472 naParseError(p, "bad loop label", h->line);
475 } else if (semis != 1) {
476 naParseError(p, "wrong number of terms in foreach header", t->line);
480 body = RIGHT(t)->children;
484 emit(p, OP_PUSHZERO);
485 loopTop = p->cg->codesz;
486 emit(p, t->type == TOK_FOREACH ? OP_EACH : OP_INDEX);
487 jumpEnd = emitJump(p, OP_JIFNIL);
488 assignOp = genLValue(p, elem, &dummy);
492 genLoop(p, body, 0, label, loopTop, jumpEnd);
495 static int tokMatch(struct Token* a, struct Token* b)
497 int i, l = a->strlen;
498 if(!a || !b) return 0;
499 if(l != b->strlen) return 0;
500 for(i=0; i<l; i++) if(a->str[i] != b->str[i]) return 0;
504 static void genBreakContinue(struct Parser* p, struct Token* t)
506 int levels = 1, loop = -1, bp, cp, i;
508 if(RIGHT(t)->type != TOK_SYMBOL)
509 naParseError(p, "bad break/continue label", t->line);
510 for(i=0; i<p->cg->loopTop; i++)
511 if(tokMatch(RIGHT(t), p->cg->loops[i].label))
514 naParseError(p, "no match for break/continue label", t->line);
515 levels = p->cg->loopTop - loop;
517 bp = p->cg->loops[p->cg->loopTop - levels].breakIP;
518 cp = p->cg->loops[p->cg->loopTop - levels].contIP;
519 for(i=0; i<levels; i++)
521 if(t->type == TOK_BREAK)
522 emit(p, OP_PUSHNIL); // breakIP is always a JIFNOT/JIFNIL!
523 emitImmediate(p, OP_JMP, t->type == TOK_BREAK ? bp : cp);
526 static void newLineEntry(struct Parser* p, int line)
529 if(p->cg->nextLineIp >= p->cg->nLineIps) {
530 int nsz = p->cg->nLineIps*2 + 1;
531 unsigned short* n = naParseAlloc(p, sizeof(unsigned short)*2*nsz);
532 for(i=0; i<(p->cg->nextLineIp*2); i++)
533 n[i] = p->cg->lineIps[i];
535 p->cg->nLineIps = nsz;
537 p->cg->lineIps[p->cg->nextLineIp++] = (unsigned short) p->cg->codesz;
538 p->cg->lineIps[p->cg->nextLineIp++] = (unsigned short) line;
541 static void genExpr(struct Parser* p, struct Token* t)
544 if(t->line != p->cg->lastLine)
545 newLineEntry(p, t->line);
546 p->cg->lastLine = t->line;
564 case TOK_BREAK: case TOK_CONTINUE:
565 genBreakContinue(p, t);
568 genExprList(p, LEFT(t));
574 if(BINARY(t) || !RIGHT(t)) genFuncall(p, t); // function invocation
575 else genExpr(p, LEFT(t)); // simple parenthesis
579 genBinOp(OP_EXTRACT, p, t); // a[i]
582 genList(p, LEFT(t), 1);
590 i = genLValue(p, LEFT(t), &dummy);
591 genExpr(p, RIGHT(t));
592 emit(p, i); // use the op appropriate to the lvalue
595 if(RIGHT(t)) genExpr(p, RIGHT(t));
596 else emit(p, OP_PUSHNIL);
597 for(i=0; i<p->cg->loopTop; i++) emit(p, OP_UNMARK);
601 genExpr(p, RIGHT(t));
605 emitImmediate(p, OP_LOCAL, findConstantIndex(p, t));
608 genScalarConstant(p, t);
612 genBinOp(OP_MINUS, p, t); // binary subtraction
613 } else if(RIGHT(t)->type == TOK_LITERAL && !RIGHT(t)->str) {
614 RIGHT(t)->num *= -1; // Pre-negate constants
615 genScalarConstant(p, RIGHT(t));
617 genExpr(p, RIGHT(t)); // unary negation
622 genExpr(p, RIGHT(t)); // unary negation (see also TOK_MINUS!)
627 if(RIGHT(t)->type != TOK_SYMBOL)
628 naParseError(p, "object field not symbol", RIGHT(t)->line);
629 emitImmediate(p, OP_MEMBER, findConstantIndex(p, RIGHT(t)));
631 case TOK_EMPTY: case TOK_NIL:
632 emit(p, OP_PUSHNIL); break; // *NOT* a noop!
633 case TOK_AND: case TOK_OR:
634 genShortCircuit(p, t);
636 case TOK_MUL: genBinOp(OP_MUL, p, t); break;
637 case TOK_PLUS: genBinOp(OP_PLUS, p, t); break;
638 case TOK_DIV: genBinOp(OP_DIV, p, t); break;
639 case TOK_CAT: genBinOp(OP_CAT, p, t); break;
640 case TOK_LT: genBinOp(OP_LT, p, t); break;
641 case TOK_LTE: genBinOp(OP_LTE, p, t); break;
642 case TOK_EQ: genBinOp(OP_EQ, p, t); break;
643 case TOK_NEQ: genBinOp(OP_NEQ, p, t); break;
644 case TOK_GT: genBinOp(OP_GT, p, t); break;
645 case TOK_GTE: genBinOp(OP_GTE, p, t); break;
646 case TOK_PLUSEQ: genEqOp(OP_PLUS, p, t); break;
647 case TOK_MINUSEQ: genEqOp(OP_MINUS, p, t); break;
648 case TOK_MULEQ: genEqOp(OP_MUL, p, t); break;
649 case TOK_DIVEQ: genEqOp(OP_DIV, p, t); break;
650 case TOK_CATEQ: genEqOp(OP_CAT, p, t); break;
652 naParseError(p, "parse error", t->line);
656 static void genExprList(struct Parser* p, struct Token* t)
658 if(t->type == TOK_SEMI) {
660 if(RIGHT(t) && RIGHT(t)->type != TOK_EMPTY) {
662 genExprList(p, RIGHT(t));
669 naRef naCodeGen(struct Parser* p, struct Token* block, struct Token* arglist)
674 struct CodeGenerator cg;
677 cg.codeAlloced = 1024; // Start fairly big, this is a cheap allocation
678 cg.byteCode = naParseAlloc(p, cg.codeAlloced *sizeof(unsigned short));
680 cg.consts = naNewVector(p->context);
687 genExprList(p, block);
690 // Now make a code object
691 codeObj = naNewCode(p->context);
692 code = codeObj.ref.ptr.code;
694 // Parse the argument list, if any
695 code->restArgSym = globals->argRef;
696 code->nArgs = code->nOptArgs = 0;
697 code->argSyms = code->optArgSyms = code->optArgVals = 0;
698 code->needArgVector = 1;
700 code->argSyms = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
701 code->optArgSyms = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
702 code->optArgVals = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
703 code->needArgVector = 0;
704 genArgList(p, code, arglist);
707 nsyms = naAlloc(sizeof(int) * code->nArgs);
708 for(i=0; i<code->nArgs; i++) nsyms[i] = code->argSyms[i];
709 code->argSyms = nsyms;
710 } else code->argSyms = 0;
712 int i, *nsyms, *nvals;
713 nsyms = naAlloc(sizeof(int) * code->nOptArgs);
714 nvals = naAlloc(sizeof(int) * code->nOptArgs);
715 for(i=0; i<code->nOptArgs; i++) nsyms[i] = code->optArgSyms[i];
716 for(i=0; i<code->nOptArgs; i++) nvals[i] = code->optArgVals[i];
717 code->optArgSyms = nsyms;
718 code->optArgVals = nvals;
719 } else code->optArgSyms = code->optArgVals = 0;
722 code->codesz = cg.codesz;
723 code->byteCode = naAlloc(cg.codesz * sizeof(unsigned short));
724 for(i=0; i < cg.codesz; i++)
725 code->byteCode[i] = cg.byteCode[i];
726 code->nConstants = naVec_size(cg.consts);
727 code->constants = naAlloc(code->nConstants * sizeof(naRef));
728 code->srcFile = p->srcFile;
729 for(i=0; i<code->nConstants; i++)
730 code->constants[i] = getConstant(p, i);
731 code->nLines = p->cg->nextLineIp;
732 code->lineIps = naAlloc(sizeof(unsigned short)*p->cg->nLineIps*2);
733 for(i=0; i<p->cg->nLineIps*2; i++)
734 code->lineIps[i] = p->cg->lineIps[i];