6 // These are more sensical predicate names in most contexts in this file
7 #define LEFT(tok) ((tok)->children)
8 #define RIGHT(tok) ((tok)->lastChild)
9 #define BINARY(tok) (LEFT(tok) && RIGHT(tok) && LEFT(tok) != RIGHT(tok))
11 // Forward references for recursion
12 static void genExpr(struct Parser* p, struct Token* t);
13 static void genExprList(struct Parser* p, struct Token* t);
14 static naRef newLambda(struct Parser* p, struct Token* t);
16 static void emit(struct Parser* p, int val)
18 if(p->cg->codesz >= p->cg->codeAlloced) {
19 int i, sz = p->cg->codeAlloced * 2;
20 unsigned short* buf = naParseAlloc(p, sz*sizeof(unsigned short));
21 for(i=0; i<p->cg->codeAlloced; i++) buf[i] = p->cg->byteCode[i];
22 p->cg->byteCode = buf;
23 p->cg->codeAlloced = sz;
25 p->cg->byteCode[p->cg->codesz++] = (unsigned short)val;
28 static void emitImmediate(struct Parser* p, int val, int arg)
34 static void genBinOp(int op, struct Parser* p, struct Token* t)
36 if(!LEFT(t) || !RIGHT(t))
37 naParseError(p, "empty subexpression", t->line);
43 static int newConstant(struct Parser* p, naRef c)
46 naVec_append(p->cg->consts, c);
47 i = naVec_size(p->cg->consts) - 1;
48 if(i > 0xffff) naParseError(p, "too many constants in code block", 0);
52 static naRef getConstant(struct Parser* p, int idx)
54 return naVec_get(p->cg->consts, idx);
57 // Interns a scalar (!) constant and returns its index
58 static int internConstant(struct Parser* p, naRef c)
60 int i, n = naVec_size(p->cg->consts);
61 if(IS_CODE(c)) return newConstant(p, c);
63 naRef b = naVec_get(p->cg->consts, i);
64 if(IS_NUM(b) && IS_NUM(c) && b.num == c.num) return i;
65 else if(IS_NIL(b) && IS_NIL(c)) return i;
66 else if(naStrEqual(b, c)) return i;
68 return newConstant(p, c);
71 naRef naInternSymbol(naRef sym)
74 if(naHash_get(globals->symbols, sym, &result))
76 naHash_set(globals->symbols, sym, sym);
80 static int findConstantIndex(struct Parser* p, struct Token* t)
83 if(t->type == TOK_NIL) c = naNil();
85 c = naStr_fromdata(naNewString(p->context), t->str, t->strlen);
86 naHash_get(globals->symbols, c, &dummy); // noop, make c immutable
87 if(t->type == TOK_SYMBOL) c = naInternSymbol(c);
88 } else if(t->type == TOK_FUNC) c = newLambda(p, t);
89 else if(t->type == TOK_LITERAL) c = naNum(t->num);
90 else naParseError(p, "invalid/non-constant constant", t->line);
91 return internConstant(p, c);
94 static int genScalarConstant(struct Parser* p, struct Token* t)
96 // These opcodes are for special-case use in other constructs, but
97 // we might as well use them here to save a few bytes in the
98 // instruction stream.
99 if(t->str == 0 && t->num == 1) {
101 } else if(t->str == 0 && t->num == 0) {
102 emit(p, OP_PUSHZERO);
104 int idx = findConstantIndex(p, t);
105 emitImmediate(p, OP_PUSHCONST, idx);
111 static int genLValue(struct Parser* p, struct Token* t, int* cidx)
113 if(!t) naParseError(p, "bad lvalue", -1);
114 if(t->type == TOK_LPAR && t->rule != PREC_SUFFIX) {
115 return genLValue(p, LEFT(t), cidx); // Handle stuff like "(a) = 1"
116 } else if(t->type == TOK_SYMBOL) {
117 *cidx = genScalarConstant(p, t);
119 } else if(t->type == TOK_DOT && RIGHT(t) && RIGHT(t)->type == TOK_SYMBOL) {
121 *cidx = genScalarConstant(p, RIGHT(t));
123 } else if(t->type == TOK_LBRA) {
125 genExpr(p, RIGHT(t));
127 } else if(t->type == TOK_VAR && RIGHT(t) && RIGHT(t)->type == TOK_SYMBOL) {
128 *cidx = genScalarConstant(p, RIGHT(t));
131 naParseError(p, "bad lvalue", t->line);
136 static void genEqOp(int op, struct Parser* p, struct Token* t)
138 int cidx, setop = genLValue(p, LEFT(t), &cidx);
139 if(setop == OP_SETMEMBER) {
142 emitImmediate(p, OP_MEMBER, cidx);
143 } else if(setop == OP_INSERT) {
146 } else // OP_SETSYM, OP_SETLOCAL
147 emitImmediate(p, OP_LOCAL, cidx);
148 genExpr(p, RIGHT(t));
153 static int defArg(struct Parser* p, struct Token* t)
155 if(t->type == TOK_LPAR) return defArg(p, RIGHT(t));
156 if(t->type == TOK_MINUS && RIGHT(t) &&
157 RIGHT(t)->type == TOK_LITERAL && !RIGHT(t)->str)
159 /* default arguments are constants, but "-1" parses as two
160 * tokens, so we have to subset the expression generator for that
163 return defArg(p, RIGHT(t));
165 return findConstantIndex(p, t);
168 static void genArgList(struct Parser* p, struct naCode* c, struct Token* t)
171 if(t->type == TOK_EMPTY) return;
172 if(!IDENTICAL(c->restArgSym, globals->argRef))
173 naParseError(p, "remainder must be last", t->line);
174 if(t->type == TOK_ELLIPSIS) {
175 if(LEFT(t)->type != TOK_SYMBOL)
176 naParseError(p, "bad function argument expression", t->line);
177 sym = naStr_fromdata(naNewString(p->context),
178 LEFT(t)->str, LEFT(t)->strlen);
179 c->restArgSym = naInternSymbol(sym);
180 c->needArgVector = 1;
181 } else if(t->type == TOK_ASSIGN) {
182 if(LEFT(t)->type != TOK_SYMBOL)
183 naParseError(p, "bad function argument expression", t->line);
184 c->optArgSyms[c->nOptArgs] = findConstantIndex(p, LEFT(t));
185 c->optArgVals[c->nOptArgs++] = defArg(p, RIGHT(t));
186 } else if(t->type == TOK_SYMBOL) {
188 naParseError(p, "optional arguments must be last", t->line);
189 if(c->nArgs >= MAX_FUNARGS)
190 naParseError(p, "too many named function arguments", t->line);
191 c->argSyms[c->nArgs++] = findConstantIndex(p, t);
192 } else if(t->type == TOK_COMMA) {
193 genArgList(p, c, LEFT(t));
194 genArgList(p, c, RIGHT(t));
196 naParseError(p, "bad function argument expression", t->line);
199 static naRef newLambda(struct Parser* p, struct Token* t)
201 struct CodeGenerator* cgSave;
203 struct Token* arglist;
204 if(RIGHT(t)->type != TOK_LCURL)
205 naParseError(p, "bad function definition", t->line);
207 // Save off the generator state while we do the new one
209 arglist = LEFT(t)->type == TOK_LPAR ? LEFT(LEFT(t)) : 0;
210 codeObj = naCodeGen(p, LEFT(RIGHT(t)), arglist);
215 static void genLambda(struct Parser* p, struct Token* t)
217 emitImmediate(p, OP_PUSHCONST, newConstant(p, newLambda(p, t)));
220 static int genList(struct Parser* p, struct Token* t, int doAppend)
222 if(t->type == TOK_COMMA) {
224 if(doAppend) emit(p, OP_VAPPEND);
225 return 1 + genList(p, RIGHT(t), doAppend);
226 } else if(t->type == TOK_EMPTY) {
230 if(doAppend) emit(p, OP_VAPPEND);
235 static void genHashElem(struct Parser* p, struct Token* t)
237 if(t->type == TOK_EMPTY)
239 if(t->type != TOK_COLON)
240 naParseError(p, "bad hash/object initializer", t->line);
241 if(LEFT(t)->type == TOK_SYMBOL) genScalarConstant(p, LEFT(t));
242 else if(LEFT(t)->type == TOK_LITERAL) genExpr(p, LEFT(t));
243 else naParseError(p, "bad hash/object initializer", t->line);
244 genExpr(p, RIGHT(t));
248 static void genHash(struct Parser* p, struct Token* t)
250 if(t->type == TOK_COMMA) {
251 genHashElem(p, LEFT(t));
252 genHash(p, RIGHT(t));
253 } else if(t->type != TOK_EMPTY) {
258 static void genFuncall(struct Parser* p, struct Token* t)
262 if(LEFT(t)->type == TOK_DOT) {
263 genExpr(p, LEFT(LEFT(t)));
265 emitImmediate(p, OP_MEMBER, findConstantIndex(p, RIGHT(LEFT(t))));
270 if(RIGHT(t)) nargs = genList(p, RIGHT(t), 0);
271 emitImmediate(p, op, nargs);
274 static void pushLoop(struct Parser* p, struct Token* label)
276 int i = p->cg->loopTop;
277 p->cg->loops[i].breakIP = 0xffffff;
278 p->cg->loops[i].contIP = 0xffffff;
279 p->cg->loops[i].label = label;
284 static void popLoop(struct Parser* p)
287 if(p->cg->loopTop < 0) naParseError(p, "BUG: loop stack underflow", -1);
291 // Emit a jump operation, and return the location of the address in
292 // the bytecode for future fixup in fixJumpTarget
293 static int emitJump(struct Parser* p, int op)
298 emit(p, 0xffff); // dummy address
302 // Points a previous jump instruction at the current "end-of-bytecode"
303 static void fixJumpTarget(struct Parser* p, int spot)
305 p->cg->byteCode[spot] = p->cg->codesz;
308 static void genShortCircuit(struct Parser* p, struct Token* t)
312 end = emitJump(p, t->type == TOK_AND ? OP_JIFNOT : OP_JIFTRUE);
314 genExpr(p, RIGHT(t));
315 fixJumpTarget(p, end);
319 static void genIf(struct Parser* p, struct Token* tif, struct Token* telse)
321 int jumpNext, jumpEnd;
322 genExpr(p, tif->children); // the test
323 jumpNext = emitJump(p, OP_JIFNOTPOP);
324 genExprList(p, tif->children->next->children); // the body
325 jumpEnd = emitJump(p, OP_JMP);
326 fixJumpTarget(p, jumpNext);
328 if(telse->type == TOK_ELSIF) genIf(p, telse, telse->next);
329 else genExprList(p, telse->children->children);
333 fixJumpTarget(p, jumpEnd);
336 static void genIfElse(struct Parser* p, struct Token* t)
338 genIf(p, t, t->children->next->next);
341 static void genQuestion(struct Parser* p, struct Token* t)
343 int jumpNext, jumpEnd;
344 if(!RIGHT(t) || RIGHT(t)->type != TOK_COLON)
345 naParseError(p, "invalid ?: expression", t->line);
346 genExpr(p, LEFT(t)); // the test
347 jumpNext = emitJump(p, OP_JIFNOTPOP);
348 genExpr(p, LEFT(RIGHT(t))); // the "if true" expr
349 jumpEnd = emitJump(p, OP_JMP);
350 fixJumpTarget(p, jumpNext);
351 genExpr(p, RIGHT(RIGHT(t))); // the "else" expr
352 fixJumpTarget(p, jumpEnd);
355 static int countSemis(struct Token* t)
357 if(!t || t->type != TOK_SEMI) return 0;
358 return 1 + countSemis(RIGHT(t));
361 static void genLoop(struct Parser* p, struct Token* body,
362 struct Token* update, struct Token* label,
363 int loopTop, int jumpEnd)
365 int cont, jumpOverContinue;
367 p->cg->loops[p->cg->loopTop-1].breakIP = jumpEnd-1;
369 jumpOverContinue = emitJump(p, OP_JMP);
370 p->cg->loops[p->cg->loopTop-1].contIP = p->cg->codesz;
371 cont = emitJump(p, OP_JMP);
372 fixJumpTarget(p, jumpOverContinue);
374 genExprList(p, body);
376 fixJumpTarget(p, cont);
377 if(update) { genExpr(p, update); emit(p, OP_POP); }
378 emitImmediate(p, OP_JMPLOOP, loopTop);
379 fixJumpTarget(p, jumpEnd);
381 emit(p, OP_PUSHNIL); // Leave something on the stack
384 static void genForWhile(struct Parser* p, struct Token* init,
385 struct Token* test, struct Token* update,
386 struct Token* body, struct Token* label)
388 int loopTop, jumpEnd;
389 if(init) { genExpr(p, init); emit(p, OP_POP); }
391 loopTop = p->cg->codesz;
393 jumpEnd = emitJump(p, OP_JIFNOTPOP);
394 genLoop(p, body, update, label, loopTop, jumpEnd);
397 static void genWhile(struct Parser* p, struct Token* t)
399 struct Token *test=LEFT(t)->children, *body, *label=0;
400 int semis = countSemis(test);
403 if(!label || label->type != TOK_SYMBOL)
404 naParseError(p, "bad loop label", t->line);
408 naParseError(p, "too many semicolons in while test", t->line);
409 body = LEFT(RIGHT(t));
410 genForWhile(p, 0, test, 0, body, label);
413 static void genFor(struct Parser* p, struct Token* t)
415 struct Token *init, *test, *body, *update, *label=0;
416 struct Token *h = LEFT(t)->children;
417 int semis = countSemis(h);
419 if(!LEFT(h) || LEFT(h)->type != TOK_SYMBOL)
420 naParseError(p, "bad loop label", h->line);
423 } else if(semis != 2) {
424 naParseError(p, "wrong number of terms in for header", t->line);
427 // Parse tree hell :)
429 test = LEFT(RIGHT(h));
430 update = RIGHT(RIGHT(h));
431 body = RIGHT(t)->children;
432 genForWhile(p, init, test, update, body, label);
435 static void genForEach(struct Parser* p, struct Token* t)
437 int loopTop, jumpEnd, assignOp, dummy;
438 struct Token *elem, *body, *vec, *label=0;
439 struct Token *h = LEFT(LEFT(t));
440 int semis = countSemis(h);
442 if(!LEFT(h) || LEFT(h)->type != TOK_SYMBOL)
443 naParseError(p, "bad loop label", h->line);
446 } else if (semis != 1) {
447 naParseError(p, "wrong number of terms in foreach header", t->line);
451 body = RIGHT(t)->children;
454 emit(p, OP_PUSHZERO);
456 loopTop = p->cg->codesz;
457 emit(p, t->type == TOK_FOREACH ? OP_EACH : OP_INDEX);
458 jumpEnd = emitJump(p, OP_JIFEND);
459 assignOp = genLValue(p, elem, &dummy);
463 genLoop(p, body, 0, label, loopTop, jumpEnd);
464 emit(p, OP_POP); // Pull off the vector and index
468 static int tokMatch(struct Token* a, struct Token* b)
470 int i, l = a->strlen;
471 if(!a || !b) return 0;
472 if(l != b->strlen) return 0;
473 for(i=0; i<l; i++) if(a->str[i] != b->str[i]) return 0;
477 static void genBreakContinue(struct Parser* p, struct Token* t)
479 int levels = 1, loop = -1, bp, cp, i;
481 if(RIGHT(t)->type != TOK_SYMBOL)
482 naParseError(p, "bad break/continue label", t->line);
483 for(i=0; i<p->cg->loopTop; i++)
484 if(tokMatch(RIGHT(t), p->cg->loops[i].label))
487 naParseError(p, "no match for break/continue label", t->line);
488 levels = p->cg->loopTop - loop;
490 bp = p->cg->loops[p->cg->loopTop - levels].breakIP;
491 cp = p->cg->loops[p->cg->loopTop - levels].contIP;
492 for(i=0; i<levels; i++)
493 emit(p, (i<levels-1) ? OP_BREAK2 : OP_BREAK);
494 if(t->type == TOK_BREAK)
495 emit(p, OP_PUSHEND); // breakIP is always a JIFNOTPOP/JIFEND!
496 emitImmediate(p, OP_JMP, t->type == TOK_BREAK ? bp : cp);
499 static void newLineEntry(struct Parser* p, int line)
502 if(p->cg->nextLineIp >= p->cg->nLineIps) {
503 int nsz = p->cg->nLineIps*2 + 1;
504 unsigned short* n = naParseAlloc(p, sizeof(unsigned short)*2*nsz);
505 for(i=0; i<(p->cg->nextLineIp*2); i++)
506 n[i] = p->cg->lineIps[i];
508 p->cg->nLineIps = nsz;
510 p->cg->lineIps[p->cg->nextLineIp++] = (unsigned short) p->cg->codesz;
511 p->cg->lineIps[p->cg->nextLineIp++] = (unsigned short) line;
514 static void genExpr(struct Parser* p, struct Token* t)
517 if(!t) naParseError(p, "parse error", -1); // throw line -1...
518 p->errLine = t->line; // ...to use this one instead
519 if(t->line != p->cg->lastLine)
520 newLineEntry(p, t->line);
521 p->cg->lastLine = t->line;
539 case TOK_BREAK: case TOK_CONTINUE:
540 genBreakContinue(p, t);
543 genExprList(p, LEFT(t));
549 if(BINARY(t) || !RIGHT(t)) genFuncall(p, t); // function invocation
550 else genExpr(p, LEFT(t)); // simple parenthesis
554 genBinOp(OP_EXTRACT, p, t); // a[i]
557 genList(p, LEFT(t), 1);
565 i = genLValue(p, LEFT(t), &dummy);
566 genExpr(p, RIGHT(t));
567 emit(p, i); // use the op appropriate to the lvalue
570 if(RIGHT(t)) genExpr(p, RIGHT(t));
571 else emit(p, OP_PUSHNIL);
572 for(i=0; i<p->cg->loopTop; i++) emit(p, OP_UNMARK);
576 genExpr(p, RIGHT(t));
580 emitImmediate(p, OP_LOCAL, findConstantIndex(p, t));
583 genScalarConstant(p, t);
587 genBinOp(OP_MINUS, p, t); // binary subtraction
588 } else if(RIGHT(t) && RIGHT(t)->type == TOK_LITERAL && !RIGHT(t)->str) {
589 RIGHT(t)->num *= -1; // Pre-negate constants
590 genScalarConstant(p, RIGHT(t));
592 genExpr(p, RIGHT(t)); // unary negation
597 genExpr(p, RIGHT(t)); // unary negation (see also TOK_MINUS!)
602 if(!RIGHT(t) || RIGHT(t)->type != TOK_SYMBOL)
603 naParseError(p, "object field not symbol", RIGHT(t)->line);
604 emitImmediate(p, OP_MEMBER, findConstantIndex(p, RIGHT(t)));
606 case TOK_EMPTY: case TOK_NIL:
607 emit(p, OP_PUSHNIL); break; // *NOT* a noop!
608 case TOK_AND: case TOK_OR:
609 genShortCircuit(p, t);
611 case TOK_MUL: genBinOp(OP_MUL, p, t); break;
612 case TOK_PLUS: genBinOp(OP_PLUS, p, t); break;
613 case TOK_DIV: genBinOp(OP_DIV, p, t); break;
614 case TOK_CAT: genBinOp(OP_CAT, p, t); break;
615 case TOK_LT: genBinOp(OP_LT, p, t); break;
616 case TOK_LTE: genBinOp(OP_LTE, p, t); break;
617 case TOK_EQ: genBinOp(OP_EQ, p, t); break;
618 case TOK_NEQ: genBinOp(OP_NEQ, p, t); break;
619 case TOK_GT: genBinOp(OP_GT, p, t); break;
620 case TOK_GTE: genBinOp(OP_GTE, p, t); break;
621 case TOK_PLUSEQ: genEqOp(OP_PLUS, p, t); break;
622 case TOK_MINUSEQ: genEqOp(OP_MINUS, p, t); break;
623 case TOK_MULEQ: genEqOp(OP_MUL, p, t); break;
624 case TOK_DIVEQ: genEqOp(OP_DIV, p, t); break;
625 case TOK_CATEQ: genEqOp(OP_CAT, p, t); break;
627 naParseError(p, "parse error", t->line);
631 static void genExprList(struct Parser* p, struct Token* t)
633 if(t && t->type == TOK_SEMI) {
635 if(RIGHT(t) && RIGHT(t)->type != TOK_EMPTY) {
637 genExprList(p, RIGHT(t));
644 naRef naCodeGen(struct Parser* p, struct Token* block, struct Token* arglist)
649 struct CodeGenerator cg;
652 cg.codeAlloced = 1024; // Start fairly big, this is a cheap allocation
653 cg.byteCode = naParseAlloc(p, cg.codeAlloced *sizeof(unsigned short));
655 cg.consts = naNewVector(p->context);
662 genExprList(p, block);
665 // Now make a code object
666 codeObj = naNewCode(p->context);
667 code = PTR(codeObj).code;
669 // Parse the argument list, if any
670 code->restArgSym = globals->argRef;
671 code->nArgs = code->nOptArgs = 0;
672 code->argSyms = code->optArgSyms = code->optArgVals = 0;
673 code->needArgVector = 1;
675 code->argSyms = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
676 code->optArgSyms = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
677 code->optArgVals = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
678 code->needArgVector = 0;
679 genArgList(p, code, arglist);
682 nsyms = naAlloc(sizeof(int) * code->nArgs);
683 for(i=0; i<code->nArgs; i++) nsyms[i] = code->argSyms[i];
684 code->argSyms = nsyms;
685 } else code->argSyms = 0;
687 int i, *nsyms, *nvals;
688 nsyms = naAlloc(sizeof(int) * code->nOptArgs);
689 nvals = naAlloc(sizeof(int) * code->nOptArgs);
690 for(i=0; i<code->nOptArgs; i++) nsyms[i] = code->optArgSyms[i];
691 for(i=0; i<code->nOptArgs; i++) nvals[i] = code->optArgVals[i];
692 code->optArgSyms = nsyms;
693 code->optArgVals = nvals;
694 } else code->optArgSyms = code->optArgVals = 0;
697 code->codesz = cg.codesz;
698 code->byteCode = naAlloc(cg.codesz * sizeof(unsigned short));
699 for(i=0; i < cg.codesz; i++)
700 code->byteCode[i] = cg.byteCode[i];
701 code->nConstants = naVec_size(cg.consts);
702 code->constants = naAlloc(code->nConstants * sizeof(naRef));
703 code->srcFile = p->srcFile;
704 for(i=0; i<code->nConstants; i++)
705 code->constants[i] = getConstant(p, i);
706 code->nLines = p->cg->nextLineIp;
707 code->lineIps = naAlloc(sizeof(unsigned short)*p->cg->nLineIps*2);
708 for(i=0; i<p->cg->nLineIps*2; i++)
709 code->lineIps[i] = p->cg->lineIps[i];