6 // These are more sensical predicate names in most contexts in this file
7 #define LEFT(tok) ((tok)->children)
8 #define RIGHT(tok) ((tok)->lastChild)
9 #define BINARY(tok) (LEFT(tok) && RIGHT(tok) && LEFT(tok) != RIGHT(tok))
11 // Forward references for recursion
12 static void genExpr(struct Parser* p, struct Token* t);
13 static void genExprList(struct Parser* p, struct Token* t);
14 static naRef newLambda(struct Parser* p, struct Token* t);
16 static void emit(struct Parser* p, int val)
18 if(p->cg->codesz >= p->cg->codeAlloced) {
19 int i, sz = p->cg->codeAlloced * 2;
20 unsigned short* buf = naParseAlloc(p, sz*sizeof(unsigned short));
21 for(i=0; i<p->cg->codeAlloced; i++) buf[i] = p->cg->byteCode[i];
22 p->cg->byteCode = buf;
23 p->cg->codeAlloced = sz;
25 p->cg->byteCode[p->cg->codesz++] = (unsigned short)val;
28 static void emitImmediate(struct Parser* p, int val, int arg)
34 static void genBinOp(int op, struct Parser* p, struct Token* t)
36 if(!LEFT(t) || !RIGHT(t))
37 naParseError(p, "empty subexpression", t->line);
43 static int newConstant(struct Parser* p, naRef c)
46 naVec_append(p->cg->consts, c);
47 i = naVec_size(p->cg->consts) - 1;
48 if(i > 0xffff) naParseError(p, "too many constants in code block", 0);
52 static naRef getConstant(struct Parser* p, int idx)
54 return naVec_get(p->cg->consts, idx);
57 // Interns a scalar (!) constant and returns its index
58 static int internConstant(struct Parser* p, naRef c)
60 int i, n = naVec_size(p->cg->consts);
61 if(IS_CODE(c)) return newConstant(p, c);
63 naRef b = naVec_get(p->cg->consts, i);
64 if(IS_NUM(b) && IS_NUM(c) && b.num == c.num) return i;
65 else if(IS_NIL(b) && IS_NIL(c)) return i;
66 else if(naStrEqual(b, c)) return i;
68 return newConstant(p, c);
71 naRef naInternSymbol(naRef sym)
74 if(naHash_get(globals->symbols, sym, &result))
76 naHash_set(globals->symbols, sym, sym);
80 static int findConstantIndex(struct Parser* p, struct Token* t)
83 if(t->type == TOK_NIL) c = naNil();
85 c = naStr_fromdata(naNewString(p->context), t->str, t->strlen);
86 naHash_get(globals->symbols, c, &dummy); // noop, make c immutable
87 if(t->type == TOK_SYMBOL) c = naInternSymbol(c);
88 } else if(t->type == TOK_FUNC) c = newLambda(p, t);
89 else if(t->type == TOK_LITERAL) c = naNum(t->num);
90 else naParseError(p, "invalid/non-constant constant", t->line);
91 return internConstant(p, c);
94 static int lastExprInBlock(struct Token* t)
96 if(!t->parent) return 1;
97 if(t->parent->type == TOK_TOP || t->parent->type == TOK_LCURL) return 1;
98 if(t->parent->type == TOK_SEMI)
99 if(!t->next || t->next->type == TOK_EMPTY)
104 // Returns true if the node is in "tail context" -- either a child of
105 // a return, the last child of a func block, or else the
106 // last child of an if/elsif/if that is itself in tail context.
107 static int tailContext(struct Token* t)
109 if(t->parent && t->parent->type == TOK_RETURN)
111 else if(!lastExprInBlock(t))
114 // Walk up the tree. It is ok to see semicolons, else's, elsifs
115 // and curlies. If we reach the top or a func, then we are in
116 // tail context. If we hit an if, then we are in tail context
117 // only if the "if" node is.
118 while((t = t->parent) != 0)
120 case TOK_SEMI: case TOK_LCURL: break;
121 case TOK_ELSE: case TOK_ELSIF: break;
122 case TOK_TOP: case TOK_FUNC: return 1;
123 case TOK_IF: return tailContext(t);
129 static int genScalarConstant(struct Parser* p, struct Token* t)
131 // These opcodes are for special-case use in other constructs, but
132 // we might as well use them here to save a few bytes in the
133 // instruction stream.
134 if(t->str == 0 && t->num == 1) {
136 } else if(t->str == 0 && t->num == 0) {
137 emit(p, OP_PUSHZERO);
139 int idx = findConstantIndex(p, t);
140 emitImmediate(p, OP_PUSHCONST, idx);
146 static int genLValue(struct Parser* p, struct Token* t, int* cidx)
148 if(t->type == TOK_LPAR) {
149 return genLValue(p, LEFT(t), cidx); // Handle stuff like "(a) = 1"
150 } else if(t->type == TOK_SYMBOL) {
151 *cidx = genScalarConstant(p, t);
153 } else if(t->type == TOK_DOT && RIGHT(t) && RIGHT(t)->type == TOK_SYMBOL) {
155 *cidx = genScalarConstant(p, RIGHT(t));
157 } else if(t->type == TOK_LBRA) {
159 genExpr(p, RIGHT(t));
161 } else if(t->type == TOK_VAR && RIGHT(t) && RIGHT(t)->type == TOK_SYMBOL) {
162 *cidx = genScalarConstant(p, RIGHT(t));
165 naParseError(p, "bad lvalue", t->line);
170 static void genEqOp(int op, struct Parser* p, struct Token* t)
172 int cidx, setop = genLValue(p, LEFT(t), &cidx);
173 if(setop == OP_SETMEMBER) {
176 emitImmediate(p, OP_MEMBER, cidx);
177 } else if(setop == OP_INSERT) {
180 } else // OP_SETSYM, OP_SETLOCAL
181 emitImmediate(p, OP_LOCAL, cidx);
182 genExpr(p, RIGHT(t));
187 static int defArg(struct Parser* p, struct Token* t)
189 if(t->type == TOK_LPAR) return defArg(p, RIGHT(t));
190 return findConstantIndex(p, t);
193 static void genArgList(struct Parser* p, struct naCode* c, struct Token* t)
196 if(t->type == TOK_EMPTY) return;
197 if(!IDENTICAL(c->restArgSym, globals->argRef))
198 naParseError(p, "remainder must be last", t->line);
199 if(t->type == TOK_ELLIPSIS) {
200 if(LEFT(t)->type != TOK_SYMBOL)
201 naParseError(p, "bad function argument expression", t->line);
202 sym = naStr_fromdata(naNewString(p->context),
203 LEFT(t)->str, LEFT(t)->strlen);
204 c->restArgSym = naInternSymbol(sym);
205 c->needArgVector = 1;
206 } else if(t->type == TOK_ASSIGN) {
207 if(LEFT(t)->type != TOK_SYMBOL)
208 naParseError(p, "bad function argument expression", t->line);
209 c->optArgSyms[c->nOptArgs] = findConstantIndex(p, LEFT(t));
210 c->optArgVals[c->nOptArgs++] = defArg(p, RIGHT(t));
211 } else if(t->type == TOK_SYMBOL) {
213 naParseError(p, "optional arguments must be last", t->line);
214 if(c->nArgs >= MAX_FUNARGS)
215 naParseError(p, "too many named function arguments", t->line);
216 c->argSyms[c->nArgs++] = findConstantIndex(p, t);
217 } else if(t->type == TOK_COMMA) {
218 genArgList(p, c, LEFT(t));
219 genArgList(p, c, RIGHT(t));
221 naParseError(p, "bad function argument expression", t->line);
224 static naRef newLambda(struct Parser* p, struct Token* t)
226 struct CodeGenerator* cgSave;
228 struct Token* arglist;
229 if(RIGHT(t)->type != TOK_LCURL)
230 naParseError(p, "bad function definition", t->line);
232 // Save off the generator state while we do the new one
234 arglist = LEFT(t)->type == TOK_LPAR ? LEFT(LEFT(t)) : 0;
235 codeObj = naCodeGen(p, LEFT(RIGHT(t)), arglist);
240 static void genLambda(struct Parser* p, struct Token* t)
242 emitImmediate(p, OP_PUSHCONST, newConstant(p, newLambda(p, t)));
245 static int genList(struct Parser* p, struct Token* t, int doAppend)
247 if(t->type == TOK_COMMA) {
249 if(doAppend) emit(p, OP_VAPPEND);
250 return 1 + genList(p, RIGHT(t), doAppend);
251 } else if(t->type == TOK_EMPTY) {
255 if(doAppend) emit(p, OP_VAPPEND);
260 static void genHashElem(struct Parser* p, struct Token* t)
262 if(t->type == TOK_EMPTY)
264 if(t->type != TOK_COLON)
265 naParseError(p, "bad hash/object initializer", t->line);
266 if(LEFT(t)->type == TOK_SYMBOL) genScalarConstant(p, LEFT(t));
267 else if(LEFT(t)->type == TOK_LITERAL) genExpr(p, LEFT(t));
268 else naParseError(p, "bad hash/object initializer", t->line);
269 genExpr(p, RIGHT(t));
273 static void genHash(struct Parser* p, struct Token* t)
275 if(t->type == TOK_COMMA) {
276 genHashElem(p, LEFT(t));
277 genHash(p, RIGHT(t));
278 } else if(t->type != TOK_EMPTY) {
283 static void genFuncall(struct Parser* p, struct Token* t)
287 if(LEFT(t)->type == TOK_DOT) {
288 genExpr(p, LEFT(LEFT(t)));
290 emitImmediate(p, OP_MEMBER, findConstantIndex(p, RIGHT(LEFT(t))));
295 if(RIGHT(t)) nargs = genList(p, RIGHT(t), 0);
297 op = op == OP_FCALL ? OP_FTAIL : OP_MTAIL;
298 emitImmediate(p, op, nargs);
301 static void pushLoop(struct Parser* p, struct Token* label)
303 int i = p->cg->loopTop;
304 p->cg->loops[i].breakIP = 0xffffff;
305 p->cg->loops[i].contIP = 0xffffff;
306 p->cg->loops[i].label = label;
311 static void popLoop(struct Parser* p)
314 if(p->cg->loopTop < 0) naParseError(p, "BUG: loop stack underflow", -1);
318 // Emit a jump operation, and return the location of the address in
319 // the bytecode for future fixup in fixJumpTarget
320 static int emitJump(struct Parser* p, int op)
325 emit(p, 0xffff); // dummy address
329 // Points a previous jump instruction at the current "end-of-bytecode"
330 static void fixJumpTarget(struct Parser* p, int spot)
332 p->cg->byteCode[spot] = p->cg->codesz;
335 static void genShortCircuit(struct Parser* p, struct Token* t)
337 int jumpNext, jumpEnd, isAnd = (t->type == TOK_AND);
339 if(isAnd) emit(p, OP_NOT);
340 jumpNext = emitJump(p, OP_JIFNOT);
341 emit(p, isAnd ? OP_PUSHNIL : OP_PUSHONE);
342 jumpEnd = emitJump(p, OP_JMP);
343 fixJumpTarget(p, jumpNext);
344 genExpr(p, RIGHT(t));
345 fixJumpTarget(p, jumpEnd);
349 static void genIf(struct Parser* p, struct Token* tif, struct Token* telse)
351 int jumpNext, jumpEnd;
352 genExpr(p, tif->children); // the test
353 jumpNext = emitJump(p, OP_JIFNOT);
354 genExprList(p, tif->children->next->children); // the body
355 jumpEnd = emitJump(p, OP_JMP);
356 fixJumpTarget(p, jumpNext);
358 if(telse->type == TOK_ELSIF) genIf(p, telse, telse->next);
359 else genExprList(p, telse->children->children);
363 fixJumpTarget(p, jumpEnd);
366 static void genIfElse(struct Parser* p, struct Token* t)
368 genIf(p, t, t->children->next->next);
371 static void genQuestion(struct Parser* p, struct Token* t)
373 int jumpNext, jumpEnd;
374 if(!RIGHT(t) || RIGHT(t)->type != TOK_COLON)
375 naParseError(p, "invalid ?: expression", t->line);
376 genExpr(p, LEFT(t)); // the test
377 jumpNext = emitJump(p, OP_JIFNOT);
378 genExpr(p, LEFT(RIGHT(t))); // the "if true" expr
379 jumpEnd = emitJump(p, OP_JMP);
380 fixJumpTarget(p, jumpNext);
381 genExpr(p, RIGHT(RIGHT(t))); // the "else" expr
382 fixJumpTarget(p, jumpEnd);
385 static int countSemis(struct Token* t)
387 if(!t || t->type != TOK_SEMI) return 0;
388 return 1 + countSemis(RIGHT(t));
391 static void genLoop(struct Parser* p, struct Token* body,
392 struct Token* update, struct Token* label,
393 int loopTop, int jumpEnd)
395 int cont, jumpOverContinue;
397 p->cg->loops[p->cg->loopTop-1].breakIP = jumpEnd-1;
399 jumpOverContinue = emitJump(p, OP_JMP);
400 p->cg->loops[p->cg->loopTop-1].contIP = p->cg->codesz;
401 cont = emitJump(p, OP_JMP);
402 fixJumpTarget(p, jumpOverContinue);
404 genExprList(p, body);
406 fixJumpTarget(p, cont);
407 if(update) { genExpr(p, update); emit(p, OP_POP); }
408 emitImmediate(p, OP_JMPLOOP, loopTop);
409 fixJumpTarget(p, jumpEnd);
411 emit(p, OP_PUSHNIL); // Leave something on the stack
414 static void genForWhile(struct Parser* p, struct Token* init,
415 struct Token* test, struct Token* update,
416 struct Token* body, struct Token* label)
418 int loopTop, jumpEnd;
419 if(init) { genExpr(p, init); emit(p, OP_POP); }
421 loopTop = p->cg->codesz;
423 jumpEnd = emitJump(p, OP_JIFNOT);
424 genLoop(p, body, update, label, loopTop, jumpEnd);
427 static void genWhile(struct Parser* p, struct Token* t)
429 struct Token *test=LEFT(t)->children, *body, *label=0;
430 int semis = countSemis(test);
433 if(!label || label->type != TOK_SYMBOL)
434 naParseError(p, "bad loop label", t->line);
438 naParseError(p, "too many semicolons in while test", t->line);
439 body = LEFT(RIGHT(t));
440 genForWhile(p, 0, test, 0, body, label);
443 static void genFor(struct Parser* p, struct Token* t)
445 struct Token *init, *test, *body, *update, *label=0;
446 struct Token *h = LEFT(t)->children;
447 int semis = countSemis(h);
449 if(!LEFT(h) || LEFT(h)->type != TOK_SYMBOL)
450 naParseError(p, "bad loop label", h->line);
453 } else if(semis != 2) {
454 naParseError(p, "wrong number of terms in for header", t->line);
457 // Parse tree hell :)
459 test = LEFT(RIGHT(h));
460 update = RIGHT(RIGHT(h));
461 body = RIGHT(t)->children;
462 genForWhile(p, init, test, update, body, label);
465 static void genForEach(struct Parser* p, struct Token* t)
467 int loopTop, jumpEnd, assignOp, dummy;
468 struct Token *elem, *body, *vec, *label=0;
469 struct Token *h = LEFT(LEFT(t));
470 int semis = countSemis(h);
472 if(!LEFT(h) || LEFT(h)->type != TOK_SYMBOL)
473 naParseError(p, "bad loop label", h->line);
476 } else if (semis != 1) {
477 naParseError(p, "wrong number of terms in foreach header", t->line);
481 body = RIGHT(t)->children;
485 emit(p, OP_PUSHZERO);
486 loopTop = p->cg->codesz;
487 emit(p, t->type == TOK_FOREACH ? OP_EACH : OP_INDEX);
488 jumpEnd = emitJump(p, OP_JIFNIL);
489 assignOp = genLValue(p, elem, &dummy);
493 genLoop(p, body, 0, label, loopTop, jumpEnd);
496 static int tokMatch(struct Token* a, struct Token* b)
498 int i, l = a->strlen;
499 if(!a || !b) return 0;
500 if(l != b->strlen) return 0;
501 for(i=0; i<l; i++) if(a->str[i] != b->str[i]) return 0;
505 static void genBreakContinue(struct Parser* p, struct Token* t)
507 int levels = 1, loop = -1, bp, cp, i;
509 if(RIGHT(t)->type != TOK_SYMBOL)
510 naParseError(p, "bad break/continue label", t->line);
511 for(i=0; i<p->cg->loopTop; i++)
512 if(tokMatch(RIGHT(t), p->cg->loops[i].label))
515 naParseError(p, "no match for break/continue label", t->line);
516 levels = p->cg->loopTop - loop;
518 bp = p->cg->loops[p->cg->loopTop - levels].breakIP;
519 cp = p->cg->loops[p->cg->loopTop - levels].contIP;
520 for(i=0; i<levels; i++)
522 if(t->type == TOK_BREAK)
523 emit(p, OP_PUSHNIL); // breakIP is always a JIFNOT/JIFNIL!
524 emitImmediate(p, OP_JMP, t->type == TOK_BREAK ? bp : cp);
527 static void newLineEntry(struct Parser* p, int line)
530 if(p->cg->nextLineIp >= p->cg->nLineIps) {
531 int nsz = p->cg->nLineIps*2 + 1;
532 unsigned short* n = naParseAlloc(p, sizeof(unsigned short)*2*nsz);
533 for(i=0; i<(p->cg->nextLineIp*2); i++)
534 n[i] = p->cg->lineIps[i];
536 p->cg->nLineIps = nsz;
538 p->cg->lineIps[p->cg->nextLineIp++] = (unsigned short) p->cg->codesz;
539 p->cg->lineIps[p->cg->nextLineIp++] = (unsigned short) line;
542 static void genExpr(struct Parser* p, struct Token* t)
545 if(t->line != p->cg->lastLine)
546 newLineEntry(p, t->line);
547 p->cg->lastLine = t->line;
565 case TOK_BREAK: case TOK_CONTINUE:
566 genBreakContinue(p, t);
569 genExprList(p, LEFT(t));
575 if(BINARY(t) || !RIGHT(t)) genFuncall(p, t); // function invocation
576 else genExpr(p, LEFT(t)); // simple parenthesis
580 genBinOp(OP_EXTRACT, p, t); // a[i]
583 genList(p, LEFT(t), 1);
591 i = genLValue(p, LEFT(t), &dummy);
592 genExpr(p, RIGHT(t));
593 emit(p, i); // use the op appropriate to the lvalue
596 if(RIGHT(t)) genExpr(p, RIGHT(t));
597 else emit(p, OP_PUSHNIL);
598 for(i=0; i<p->cg->loopTop; i++) emit(p, OP_UNMARK);
602 genExpr(p, RIGHT(t));
606 emitImmediate(p, OP_LOCAL, findConstantIndex(p, t));
609 genScalarConstant(p, t);
613 genBinOp(OP_MINUS, p, t); // binary subtraction
614 } else if(RIGHT(t)->type == TOK_LITERAL && !RIGHT(t)->str) {
615 RIGHT(t)->num *= -1; // Pre-negate constants
616 genScalarConstant(p, RIGHT(t));
618 genExpr(p, RIGHT(t)); // unary negation
623 genExpr(p, RIGHT(t)); // unary negation (see also TOK_MINUS!)
628 if(RIGHT(t)->type != TOK_SYMBOL)
629 naParseError(p, "object field not symbol", RIGHT(t)->line);
630 emitImmediate(p, OP_MEMBER, findConstantIndex(p, RIGHT(t)));
632 case TOK_EMPTY: case TOK_NIL:
633 emit(p, OP_PUSHNIL); break; // *NOT* a noop!
634 case TOK_AND: case TOK_OR:
635 genShortCircuit(p, t);
637 case TOK_MUL: genBinOp(OP_MUL, p, t); break;
638 case TOK_PLUS: genBinOp(OP_PLUS, p, t); break;
639 case TOK_DIV: genBinOp(OP_DIV, p, t); break;
640 case TOK_CAT: genBinOp(OP_CAT, p, t); break;
641 case TOK_LT: genBinOp(OP_LT, p, t); break;
642 case TOK_LTE: genBinOp(OP_LTE, p, t); break;
643 case TOK_EQ: genBinOp(OP_EQ, p, t); break;
644 case TOK_NEQ: genBinOp(OP_NEQ, p, t); break;
645 case TOK_GT: genBinOp(OP_GT, p, t); break;
646 case TOK_GTE: genBinOp(OP_GTE, p, t); break;
647 case TOK_PLUSEQ: genEqOp(OP_PLUS, p, t); break;
648 case TOK_MINUSEQ: genEqOp(OP_MINUS, p, t); break;
649 case TOK_MULEQ: genEqOp(OP_MUL, p, t); break;
650 case TOK_DIVEQ: genEqOp(OP_DIV, p, t); break;
651 case TOK_CATEQ: genEqOp(OP_CAT, p, t); break;
653 naParseError(p, "parse error", t->line);
657 static void genExprList(struct Parser* p, struct Token* t)
659 if(t->type == TOK_SEMI) {
661 if(RIGHT(t) && RIGHT(t)->type != TOK_EMPTY) {
663 genExprList(p, RIGHT(t));
670 naRef naCodeGen(struct Parser* p, struct Token* block, struct Token* arglist)
675 struct CodeGenerator cg;
678 cg.codeAlloced = 1024; // Start fairly big, this is a cheap allocation
679 cg.byteCode = naParseAlloc(p, cg.codeAlloced *sizeof(unsigned short));
681 cg.consts = naNewVector(p->context);
688 genExprList(p, block);
691 // Now make a code object
692 codeObj = naNewCode(p->context);
693 code = codeObj.ref.ptr.code;
695 // Parse the argument list, if any
696 code->restArgSym = globals->argRef;
697 code->nArgs = code->nOptArgs = 0;
698 code->argSyms = code->optArgSyms = code->optArgVals = 0;
699 code->needArgVector = 1;
701 code->argSyms = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
702 code->optArgSyms = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
703 code->optArgVals = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
704 code->needArgVector = 0;
705 genArgList(p, code, arglist);
708 nsyms = naAlloc(sizeof(int) * code->nArgs);
709 for(i=0; i<code->nArgs; i++) nsyms[i] = code->argSyms[i];
710 code->argSyms = nsyms;
711 } else code->argSyms = 0;
713 int i, *nsyms, *nvals;
714 nsyms = naAlloc(sizeof(int) * code->nOptArgs);
715 nvals = naAlloc(sizeof(int) * code->nOptArgs);
716 for(i=0; i<code->nOptArgs; i++) nsyms[i] = code->optArgSyms[i];
717 for(i=0; i<code->nOptArgs; i++) nvals[i] = code->optArgVals[i];
718 code->optArgSyms = nsyms;
719 code->optArgVals = nvals;
720 } else code->optArgSyms = code->optArgVals = 0;
723 code->codesz = cg.codesz;
724 code->byteCode = naAlloc(cg.codesz * sizeof(unsigned short));
725 for(i=0; i < cg.codesz; i++)
726 code->byteCode[i] = cg.byteCode[i];
727 code->nConstants = naVec_size(cg.consts);
728 code->constants = naAlloc(code->nConstants * sizeof(naRef));
729 code->srcFile = p->srcFile;
730 for(i=0; i<code->nConstants; i++)
731 code->constants[i] = getConstant(p, i);
732 code->nLines = p->cg->nextLineIp;
733 code->lineIps = naAlloc(sizeof(unsigned short)*p->cg->nLineIps*2);
734 for(i=0; i<p->cg->nLineIps*2; i++)
735 code->lineIps[i] = p->cg->lineIps[i];