6 // These are more sensical predicate names in most contexts in this file
7 #define LEFT(tok) ((tok)->children)
8 #define RIGHT(tok) ((tok)->lastChild)
9 #define BINARY(tok) (LEFT(tok) && RIGHT(tok) && LEFT(tok) != RIGHT(tok))
11 // Forward references for recursion
12 static void genExpr(struct Parser* p, struct Token* t);
13 static void genExprList(struct Parser* p, struct Token* t);
14 static naRef newLambda(struct Parser* p, struct Token* t);
16 static void emit(struct Parser* p, int val)
18 if(p->cg->codesz >= p->cg->codeAlloced) {
19 int i, sz = p->cg->codeAlloced * 2;
20 unsigned short* buf = naParseAlloc(p, sz*sizeof(unsigned short));
21 for(i=0; i<p->cg->codeAlloced; i++) buf[i] = p->cg->byteCode[i];
22 p->cg->byteCode = buf;
23 p->cg->codeAlloced = sz;
25 p->cg->byteCode[p->cg->codesz++] = (unsigned short)val;
28 static void emitImmediate(struct Parser* p, int val, int arg)
34 static void genBinOp(int op, struct Parser* p, struct Token* t)
36 if(!LEFT(t) || !RIGHT(t))
37 naParseError(p, "empty subexpression", t->line);
43 static int newConstant(struct Parser* p, naRef c)
46 naVec_append(p->cg->consts, c);
47 i = naVec_size(p->cg->consts) - 1;
48 if(i > 0xffff) naParseError(p, "too many constants in code block", 0);
52 static naRef getConstant(struct Parser* p, int idx)
54 return naVec_get(p->cg->consts, idx);
57 // Interns a scalar (!) constant and returns its index
58 static int internConstant(struct Parser* p, naRef c)
60 int i, n = naVec_size(p->cg->consts);
61 if(IS_CODE(c)) return newConstant(p, c);
63 naRef b = naVec_get(p->cg->consts, i);
64 if(IS_NUM(b) && IS_NUM(c) && b.num == c.num) return i;
65 else if(IS_NIL(b) && IS_NIL(c)) return i;
66 else if(naStrEqual(b, c)) return i;
68 return newConstant(p, c);
71 naRef naInternSymbol(naRef sym)
74 if(naHash_get(globals->symbols, sym, &result))
76 naHash_set(globals->symbols, sym, sym);
80 static int findConstantIndex(struct Parser* p, struct Token* t)
83 if(t->type == TOK_NIL) c = naNil();
85 c = naStr_fromdata(naNewString(p->context), t->str, t->strlen);
86 naHash_get(globals->symbols, c, &dummy); // noop, make c immutable
87 if(t->type == TOK_SYMBOL) c = naInternSymbol(c);
88 } else if(t->type == TOK_FUNC) c = newLambda(p, t);
89 else if(t->type == TOK_LITERAL) c = naNum(t->num);
90 else naParseError(p, "invalid/non-constant constant", t->line);
91 return internConstant(p, c);
94 static int genScalarConstant(struct Parser* p, struct Token* t)
96 // These opcodes are for special-case use in other constructs, but
97 // we might as well use them here to save a few bytes in the
98 // instruction stream.
99 if(t->str == 0 && t->num == 1) {
101 } else if(t->str == 0 && t->num == 0) {
102 emit(p, OP_PUSHZERO);
104 int idx = findConstantIndex(p, t);
105 emitImmediate(p, OP_PUSHCONST, idx);
111 static int genLValue(struct Parser* p, struct Token* t, int* cidx)
113 if(t->type == TOK_LPAR && t->rule != PREC_SUFFIX) {
114 return genLValue(p, LEFT(t), cidx); // Handle stuff like "(a) = 1"
115 } else if(t->type == TOK_SYMBOL) {
116 *cidx = genScalarConstant(p, t);
118 } else if(t->type == TOK_DOT && RIGHT(t) && RIGHT(t)->type == TOK_SYMBOL) {
120 *cidx = genScalarConstant(p, RIGHT(t));
122 } else if(t->type == TOK_LBRA) {
124 genExpr(p, RIGHT(t));
126 } else if(t->type == TOK_VAR && RIGHT(t) && RIGHT(t)->type == TOK_SYMBOL) {
127 *cidx = genScalarConstant(p, RIGHT(t));
130 naParseError(p, "bad lvalue", t->line);
135 static void genEqOp(int op, struct Parser* p, struct Token* t)
137 int cidx, setop = genLValue(p, LEFT(t), &cidx);
138 if(setop == OP_SETMEMBER) {
141 emitImmediate(p, OP_MEMBER, cidx);
142 } else if(setop == OP_INSERT) {
145 } else // OP_SETSYM, OP_SETLOCAL
146 emitImmediate(p, OP_LOCAL, cidx);
147 genExpr(p, RIGHT(t));
152 static int defArg(struct Parser* p, struct Token* t)
154 if(t->type == TOK_LPAR) return defArg(p, RIGHT(t));
155 if(t->type == TOK_MINUS && RIGHT(t) &&
156 RIGHT(t)->type == TOK_LITERAL && !RIGHT(t)->str)
158 /* default arguments are constants, but "-1" parses as two
159 * tokens, so we have to subset the expression generator for that
162 return defArg(p, RIGHT(t));
164 return findConstantIndex(p, t);
167 static void genArgList(struct Parser* p, struct naCode* c, struct Token* t)
170 if(t->type == TOK_EMPTY) return;
171 if(!IDENTICAL(c->restArgSym, globals->argRef))
172 naParseError(p, "remainder must be last", t->line);
173 if(t->type == TOK_ELLIPSIS) {
174 if(LEFT(t)->type != TOK_SYMBOL)
175 naParseError(p, "bad function argument expression", t->line);
176 sym = naStr_fromdata(naNewString(p->context),
177 LEFT(t)->str, LEFT(t)->strlen);
178 c->restArgSym = naInternSymbol(sym);
179 c->needArgVector = 1;
180 } else if(t->type == TOK_ASSIGN) {
181 if(LEFT(t)->type != TOK_SYMBOL)
182 naParseError(p, "bad function argument expression", t->line);
183 c->optArgSyms[c->nOptArgs] = findConstantIndex(p, LEFT(t));
184 c->optArgVals[c->nOptArgs++] = defArg(p, RIGHT(t));
185 } else if(t->type == TOK_SYMBOL) {
187 naParseError(p, "optional arguments must be last", t->line);
188 if(c->nArgs >= MAX_FUNARGS)
189 naParseError(p, "too many named function arguments", t->line);
190 c->argSyms[c->nArgs++] = findConstantIndex(p, t);
191 } else if(t->type == TOK_COMMA) {
192 genArgList(p, c, LEFT(t));
193 genArgList(p, c, RIGHT(t));
195 naParseError(p, "bad function argument expression", t->line);
198 static naRef newLambda(struct Parser* p, struct Token* t)
200 struct CodeGenerator* cgSave;
202 struct Token* arglist;
203 if(RIGHT(t)->type != TOK_LCURL)
204 naParseError(p, "bad function definition", t->line);
206 // Save off the generator state while we do the new one
208 arglist = LEFT(t)->type == TOK_LPAR ? LEFT(LEFT(t)) : 0;
209 codeObj = naCodeGen(p, LEFT(RIGHT(t)), arglist);
214 static void genLambda(struct Parser* p, struct Token* t)
216 emitImmediate(p, OP_PUSHCONST, newConstant(p, newLambda(p, t)));
219 static int genList(struct Parser* p, struct Token* t, int doAppend)
221 if(t->type == TOK_COMMA) {
223 if(doAppend) emit(p, OP_VAPPEND);
224 return 1 + genList(p, RIGHT(t), doAppend);
225 } else if(t->type == TOK_EMPTY) {
229 if(doAppend) emit(p, OP_VAPPEND);
234 static void genHashElem(struct Parser* p, struct Token* t)
236 if(t->type == TOK_EMPTY)
238 if(t->type != TOK_COLON)
239 naParseError(p, "bad hash/object initializer", t->line);
240 if(LEFT(t)->type == TOK_SYMBOL) genScalarConstant(p, LEFT(t));
241 else if(LEFT(t)->type == TOK_LITERAL) genExpr(p, LEFT(t));
242 else naParseError(p, "bad hash/object initializer", t->line);
243 genExpr(p, RIGHT(t));
247 static void genHash(struct Parser* p, struct Token* t)
249 if(t->type == TOK_COMMA) {
250 genHashElem(p, LEFT(t));
251 genHash(p, RIGHT(t));
252 } else if(t->type != TOK_EMPTY) {
257 static void genFuncall(struct Parser* p, struct Token* t)
261 if(LEFT(t)->type == TOK_DOT) {
262 genExpr(p, LEFT(LEFT(t)));
264 emitImmediate(p, OP_MEMBER, findConstantIndex(p, RIGHT(LEFT(t))));
269 if(RIGHT(t)) nargs = genList(p, RIGHT(t), 0);
270 emitImmediate(p, op, nargs);
273 static void pushLoop(struct Parser* p, struct Token* label)
275 int i = p->cg->loopTop;
276 p->cg->loops[i].breakIP = 0xffffff;
277 p->cg->loops[i].contIP = 0xffffff;
278 p->cg->loops[i].label = label;
283 static void popLoop(struct Parser* p)
286 if(p->cg->loopTop < 0) naParseError(p, "BUG: loop stack underflow", -1);
290 // Emit a jump operation, and return the location of the address in
291 // the bytecode for future fixup in fixJumpTarget
292 static int emitJump(struct Parser* p, int op)
297 emit(p, 0xffff); // dummy address
301 // Points a previous jump instruction at the current "end-of-bytecode"
302 static void fixJumpTarget(struct Parser* p, int spot)
304 p->cg->byteCode[spot] = p->cg->codesz;
307 static void genShortCircuit(struct Parser* p, struct Token* t)
311 end = emitJump(p, t->type == TOK_AND ? OP_JIFNOT : OP_JIFTRUE);
313 genExpr(p, RIGHT(t));
314 fixJumpTarget(p, end);
318 static void genIf(struct Parser* p, struct Token* tif, struct Token* telse)
320 int jumpNext, jumpEnd;
321 genExpr(p, tif->children); // the test
322 jumpNext = emitJump(p, OP_JIFNOTPOP);
323 genExprList(p, tif->children->next->children); // the body
324 jumpEnd = emitJump(p, OP_JMP);
325 fixJumpTarget(p, jumpNext);
327 if(telse->type == TOK_ELSIF) genIf(p, telse, telse->next);
328 else genExprList(p, telse->children->children);
332 fixJumpTarget(p, jumpEnd);
335 static void genIfElse(struct Parser* p, struct Token* t)
337 genIf(p, t, t->children->next->next);
340 static void genQuestion(struct Parser* p, struct Token* t)
342 int jumpNext, jumpEnd;
343 if(!RIGHT(t) || RIGHT(t)->type != TOK_COLON)
344 naParseError(p, "invalid ?: expression", t->line);
345 genExpr(p, LEFT(t)); // the test
346 jumpNext = emitJump(p, OP_JIFNOTPOP);
347 genExpr(p, LEFT(RIGHT(t))); // the "if true" expr
348 jumpEnd = emitJump(p, OP_JMP);
349 fixJumpTarget(p, jumpNext);
350 genExpr(p, RIGHT(RIGHT(t))); // the "else" expr
351 fixJumpTarget(p, jumpEnd);
354 static int countSemis(struct Token* t)
356 if(!t || t->type != TOK_SEMI) return 0;
357 return 1 + countSemis(RIGHT(t));
360 static void genLoop(struct Parser* p, struct Token* body,
361 struct Token* update, struct Token* label,
362 int loopTop, int jumpEnd)
364 int cont, jumpOverContinue;
366 p->cg->loops[p->cg->loopTop-1].breakIP = jumpEnd-1;
368 jumpOverContinue = emitJump(p, OP_JMP);
369 p->cg->loops[p->cg->loopTop-1].contIP = p->cg->codesz;
370 cont = emitJump(p, OP_JMP);
371 fixJumpTarget(p, jumpOverContinue);
373 genExprList(p, body);
375 fixJumpTarget(p, cont);
376 if(update) { genExpr(p, update); emit(p, OP_POP); }
377 emitImmediate(p, OP_JMPLOOP, loopTop);
378 fixJumpTarget(p, jumpEnd);
380 emit(p, OP_PUSHNIL); // Leave something on the stack
383 static void genForWhile(struct Parser* p, struct Token* init,
384 struct Token* test, struct Token* update,
385 struct Token* body, struct Token* label)
387 int loopTop, jumpEnd;
388 if(init) { genExpr(p, init); emit(p, OP_POP); }
390 loopTop = p->cg->codesz;
392 jumpEnd = emitJump(p, OP_JIFNOTPOP);
393 genLoop(p, body, update, label, loopTop, jumpEnd);
396 static void genWhile(struct Parser* p, struct Token* t)
398 struct Token *test=LEFT(t)->children, *body, *label=0;
399 int semis = countSemis(test);
402 if(!label || label->type != TOK_SYMBOL)
403 naParseError(p, "bad loop label", t->line);
407 naParseError(p, "too many semicolons in while test", t->line);
408 body = LEFT(RIGHT(t));
409 genForWhile(p, 0, test, 0, body, label);
412 static void genFor(struct Parser* p, struct Token* t)
414 struct Token *init, *test, *body, *update, *label=0;
415 struct Token *h = LEFT(t)->children;
416 int semis = countSemis(h);
418 if(!LEFT(h) || LEFT(h)->type != TOK_SYMBOL)
419 naParseError(p, "bad loop label", h->line);
422 } else if(semis != 2) {
423 naParseError(p, "wrong number of terms in for header", t->line);
426 // Parse tree hell :)
428 test = LEFT(RIGHT(h));
429 update = RIGHT(RIGHT(h));
430 body = RIGHT(t)->children;
431 genForWhile(p, init, test, update, body, label);
434 static void genForEach(struct Parser* p, struct Token* t)
436 int loopTop, jumpEnd, assignOp, dummy;
437 struct Token *elem, *body, *vec, *label=0;
438 struct Token *h = LEFT(LEFT(t));
439 int semis = countSemis(h);
441 if(!LEFT(h) || LEFT(h)->type != TOK_SYMBOL)
442 naParseError(p, "bad loop label", h->line);
445 } else if (semis != 1) {
446 naParseError(p, "wrong number of terms in foreach header", t->line);
450 body = RIGHT(t)->children;
453 emit(p, OP_PUSHZERO);
455 loopTop = p->cg->codesz;
456 emit(p, t->type == TOK_FOREACH ? OP_EACH : OP_INDEX);
457 jumpEnd = emitJump(p, OP_JIFEND);
458 assignOp = genLValue(p, elem, &dummy);
462 genLoop(p, body, 0, label, loopTop, jumpEnd);
463 emit(p, OP_POP); // Pull off the vector and index
467 static int tokMatch(struct Token* a, struct Token* b)
469 int i, l = a->strlen;
470 if(!a || !b) return 0;
471 if(l != b->strlen) return 0;
472 for(i=0; i<l; i++) if(a->str[i] != b->str[i]) return 0;
476 static void genBreakContinue(struct Parser* p, struct Token* t)
478 int levels = 1, loop = -1, bp, cp, i;
480 if(RIGHT(t)->type != TOK_SYMBOL)
481 naParseError(p, "bad break/continue label", t->line);
482 for(i=0; i<p->cg->loopTop; i++)
483 if(tokMatch(RIGHT(t), p->cg->loops[i].label))
486 naParseError(p, "no match for break/continue label", t->line);
487 levels = p->cg->loopTop - loop;
489 bp = p->cg->loops[p->cg->loopTop - levels].breakIP;
490 cp = p->cg->loops[p->cg->loopTop - levels].contIP;
491 for(i=0; i<levels; i++)
492 emit(p, (i<levels-1) ? OP_BREAK2 : OP_BREAK);
493 if(t->type == TOK_BREAK)
494 emit(p, OP_PUSHEND); // breakIP is always a JIFNOTPOP/JIFEND!
495 emitImmediate(p, OP_JMP, t->type == TOK_BREAK ? bp : cp);
498 static void newLineEntry(struct Parser* p, int line)
501 if(p->cg->nextLineIp >= p->cg->nLineIps) {
502 int nsz = p->cg->nLineIps*2 + 1;
503 unsigned short* n = naParseAlloc(p, sizeof(unsigned short)*2*nsz);
504 for(i=0; i<(p->cg->nextLineIp*2); i++)
505 n[i] = p->cg->lineIps[i];
507 p->cg->nLineIps = nsz;
509 p->cg->lineIps[p->cg->nextLineIp++] = (unsigned short) p->cg->codesz;
510 p->cg->lineIps[p->cg->nextLineIp++] = (unsigned short) line;
513 static void genExpr(struct Parser* p, struct Token* t)
516 if(!t) naParseError(p, "parse error", -1); // throw line -1...
517 p->errLine = t->line; // ...to use this one instead
518 if(t->line != p->cg->lastLine)
519 newLineEntry(p, t->line);
520 p->cg->lastLine = t->line;
538 case TOK_BREAK: case TOK_CONTINUE:
539 genBreakContinue(p, t);
542 genExprList(p, LEFT(t));
548 if(BINARY(t) || !RIGHT(t)) genFuncall(p, t); // function invocation
549 else genExpr(p, LEFT(t)); // simple parenthesis
553 genBinOp(OP_EXTRACT, p, t); // a[i]
556 genList(p, LEFT(t), 1);
564 i = genLValue(p, LEFT(t), &dummy);
565 genExpr(p, RIGHT(t));
566 emit(p, i); // use the op appropriate to the lvalue
569 if(RIGHT(t)) genExpr(p, RIGHT(t));
570 else emit(p, OP_PUSHNIL);
571 for(i=0; i<p->cg->loopTop; i++) emit(p, OP_UNMARK);
575 genExpr(p, RIGHT(t));
579 emitImmediate(p, OP_LOCAL, findConstantIndex(p, t));
582 genScalarConstant(p, t);
586 genBinOp(OP_MINUS, p, t); // binary subtraction
587 } else if(RIGHT(t) && RIGHT(t)->type == TOK_LITERAL && !RIGHT(t)->str) {
588 RIGHT(t)->num *= -1; // Pre-negate constants
589 genScalarConstant(p, RIGHT(t));
591 genExpr(p, RIGHT(t)); // unary negation
596 genExpr(p, RIGHT(t)); // unary negation (see also TOK_MINUS!)
601 if(!RIGHT(t) || RIGHT(t)->type != TOK_SYMBOL)
602 naParseError(p, "object field not symbol", RIGHT(t)->line);
603 emitImmediate(p, OP_MEMBER, findConstantIndex(p, RIGHT(t)));
605 case TOK_EMPTY: case TOK_NIL:
606 emit(p, OP_PUSHNIL); break; // *NOT* a noop!
607 case TOK_AND: case TOK_OR:
608 genShortCircuit(p, t);
610 case TOK_MUL: genBinOp(OP_MUL, p, t); break;
611 case TOK_PLUS: genBinOp(OP_PLUS, p, t); break;
612 case TOK_DIV: genBinOp(OP_DIV, p, t); break;
613 case TOK_CAT: genBinOp(OP_CAT, p, t); break;
614 case TOK_LT: genBinOp(OP_LT, p, t); break;
615 case TOK_LTE: genBinOp(OP_LTE, p, t); break;
616 case TOK_EQ: genBinOp(OP_EQ, p, t); break;
617 case TOK_NEQ: genBinOp(OP_NEQ, p, t); break;
618 case TOK_GT: genBinOp(OP_GT, p, t); break;
619 case TOK_GTE: genBinOp(OP_GTE, p, t); break;
620 case TOK_PLUSEQ: genEqOp(OP_PLUS, p, t); break;
621 case TOK_MINUSEQ: genEqOp(OP_MINUS, p, t); break;
622 case TOK_MULEQ: genEqOp(OP_MUL, p, t); break;
623 case TOK_DIVEQ: genEqOp(OP_DIV, p, t); break;
624 case TOK_CATEQ: genEqOp(OP_CAT, p, t); break;
626 naParseError(p, "parse error", t->line);
630 static void genExprList(struct Parser* p, struct Token* t)
632 if(t && t->type == TOK_SEMI) {
634 if(RIGHT(t) && RIGHT(t)->type != TOK_EMPTY) {
636 genExprList(p, RIGHT(t));
643 naRef naCodeGen(struct Parser* p, struct Token* block, struct Token* arglist)
648 struct CodeGenerator cg;
651 cg.codeAlloced = 1024; // Start fairly big, this is a cheap allocation
652 cg.byteCode = naParseAlloc(p, cg.codeAlloced *sizeof(unsigned short));
654 cg.consts = naNewVector(p->context);
661 genExprList(p, block);
664 // Now make a code object
665 codeObj = naNewCode(p->context);
666 code = PTR(codeObj).code;
668 // Parse the argument list, if any
669 code->restArgSym = globals->argRef;
670 code->nArgs = code->nOptArgs = 0;
671 code->argSyms = code->optArgSyms = code->optArgVals = 0;
672 code->needArgVector = 1;
674 code->argSyms = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
675 code->optArgSyms = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
676 code->optArgVals = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
677 code->needArgVector = 0;
678 genArgList(p, code, arglist);
681 nsyms = naAlloc(sizeof(int) * code->nArgs);
682 for(i=0; i<code->nArgs; i++) nsyms[i] = code->argSyms[i];
683 code->argSyms = nsyms;
684 } else code->argSyms = 0;
686 int i, *nsyms, *nvals;
687 nsyms = naAlloc(sizeof(int) * code->nOptArgs);
688 nvals = naAlloc(sizeof(int) * code->nOptArgs);
689 for(i=0; i<code->nOptArgs; i++) nsyms[i] = code->optArgSyms[i];
690 for(i=0; i<code->nOptArgs; i++) nvals[i] = code->optArgVals[i];
691 code->optArgSyms = nsyms;
692 code->optArgVals = nvals;
693 } else code->optArgSyms = code->optArgVals = 0;
696 code->codesz = cg.codesz;
697 code->byteCode = naAlloc(cg.codesz * sizeof(unsigned short));
698 for(i=0; i < cg.codesz; i++)
699 code->byteCode[i] = cg.byteCode[i];
700 code->nConstants = naVec_size(cg.consts);
701 code->constants = naAlloc(code->nConstants * sizeof(naRef));
702 code->srcFile = p->srcFile;
703 for(i=0; i<code->nConstants; i++)
704 code->constants[i] = getConstant(p, i);
705 code->nLines = p->cg->nextLineIp;
706 code->lineIps = naAlloc(sizeof(unsigned short)*p->cg->nLineIps*2);
707 for(i=0; i<p->cg->nLineIps*2; i++)
708 code->lineIps[i] = p->cg->lineIps[i];