6 // These are more sensical predicate names in most contexts in this file
7 #define LEFT(tok) ((tok)->children)
8 #define RIGHT(tok) ((tok)->lastChild)
9 #define BINARY(tok) (LEFT(tok) && RIGHT(tok) && LEFT(tok) != RIGHT(tok))
11 // Forward references for recursion
12 static void genExpr(struct Parser* p, struct Token* t);
13 static void genExprList(struct Parser* p, struct Token* t);
14 static naRef newLambda(struct Parser* p, struct Token* t);
16 static void emit(struct Parser* p, int val)
18 if(p->cg->codesz >= p->cg->codeAlloced) {
19 int i, sz = p->cg->codeAlloced * 2;
20 unsigned short* buf = naParseAlloc(p, sz*sizeof(unsigned short));
21 for(i=0; i<p->cg->codeAlloced; i++) buf[i] = p->cg->byteCode[i];
22 p->cg->byteCode = buf;
23 p->cg->codeAlloced = sz;
25 p->cg->byteCode[p->cg->codesz++] = (unsigned short)val;
28 static void emitImmediate(struct Parser* p, int val, int arg)
34 static void genBinOp(int op, struct Parser* p, struct Token* t)
36 if(!LEFT(t) || !RIGHT(t))
37 naParseError(p, "empty subexpression", t->line);
43 static int newConstant(struct Parser* p, naRef c)
46 naVec_append(p->cg->consts, c);
47 i = naVec_size(p->cg->consts) - 1;
48 if(i > 0xffff) naParseError(p, "too many constants in code block", 0);
52 static naRef getConstant(struct Parser* p, int idx)
54 return naVec_get(p->cg->consts, idx);
57 // Interns a scalar (!) constant and returns its index
58 static int internConstant(struct Parser* p, naRef c)
60 int i, n = naVec_size(p->cg->consts);
61 if(IS_CODE(c)) return newConstant(p, c);
63 naRef b = naVec_get(p->cg->consts, i);
64 if(IS_NUM(b) && IS_NUM(c) && b.num == c.num) return i;
65 else if(IS_NIL(b) && IS_NIL(c)) return i;
66 else if(naStrEqual(b, c)) return i;
68 return newConstant(p, c);
71 naRef naInternSymbol(naRef sym)
74 if(naHash_get(globals->symbols, sym, &result))
76 naHash_set(globals->symbols, sym, sym);
80 static int findConstantIndex(struct Parser* p, struct Token* t)
83 if(t->type == TOK_NIL) c = naNil();
85 c = naStr_fromdata(naNewString(p->context), t->str, t->strlen);
86 naHash_get(globals->symbols, c, &dummy); // noop, make c immutable
87 if(t->type == TOK_SYMBOL) c = naInternSymbol(c);
88 } else if(t->type == TOK_FUNC) c = newLambda(p, t);
89 else if(t->type == TOK_LITERAL) c = naNum(t->num);
90 else naParseError(p, "invalid/non-constant constant", t->line);
91 return internConstant(p, c);
94 static int genScalarConstant(struct Parser* p, struct Token* t)
96 // These opcodes are for special-case use in other constructs, but
97 // we might as well use them here to save a few bytes in the
98 // instruction stream.
99 if(t->str == 0 && t->num == 1) {
101 } else if(t->str == 0 && t->num == 0) {
102 emit(p, OP_PUSHZERO);
104 int idx = findConstantIndex(p, t);
105 emitImmediate(p, OP_PUSHCONST, idx);
111 static int genLValue(struct Parser* p, struct Token* t, int* cidx)
113 if(t->type == TOK_LPAR && t->rule != PREC_SUFFIX) {
114 return genLValue(p, LEFT(t), cidx); // Handle stuff like "(a) = 1"
115 } else if(t->type == TOK_SYMBOL) {
116 *cidx = genScalarConstant(p, t);
118 } else if(t->type == TOK_DOT && RIGHT(t) && RIGHT(t)->type == TOK_SYMBOL) {
120 *cidx = genScalarConstant(p, RIGHT(t));
122 } else if(t->type == TOK_LBRA) {
124 genExpr(p, RIGHT(t));
126 } else if(t->type == TOK_VAR && RIGHT(t) && RIGHT(t)->type == TOK_SYMBOL) {
127 *cidx = genScalarConstant(p, RIGHT(t));
130 naParseError(p, "bad lvalue", t->line);
135 static void genEqOp(int op, struct Parser* p, struct Token* t)
137 int cidx, setop = genLValue(p, LEFT(t), &cidx);
138 if(setop == OP_SETMEMBER) {
141 emitImmediate(p, OP_MEMBER, cidx);
142 } else if(setop == OP_INSERT) {
145 } else // OP_SETSYM, OP_SETLOCAL
146 emitImmediate(p, OP_LOCAL, cidx);
147 genExpr(p, RIGHT(t));
152 static int defArg(struct Parser* p, struct Token* t)
154 if(t->type == TOK_LPAR) return defArg(p, RIGHT(t));
155 return findConstantIndex(p, t);
158 static void genArgList(struct Parser* p, struct naCode* c, struct Token* t)
161 if(t->type == TOK_EMPTY) return;
162 if(!IDENTICAL(c->restArgSym, globals->argRef))
163 naParseError(p, "remainder must be last", t->line);
164 if(t->type == TOK_ELLIPSIS) {
165 if(LEFT(t)->type != TOK_SYMBOL)
166 naParseError(p, "bad function argument expression", t->line);
167 sym = naStr_fromdata(naNewString(p->context),
168 LEFT(t)->str, LEFT(t)->strlen);
169 c->restArgSym = naInternSymbol(sym);
170 c->needArgVector = 1;
171 } else if(t->type == TOK_ASSIGN) {
172 if(LEFT(t)->type != TOK_SYMBOL)
173 naParseError(p, "bad function argument expression", t->line);
174 c->optArgSyms[c->nOptArgs] = findConstantIndex(p, LEFT(t));
175 c->optArgVals[c->nOptArgs++] = defArg(p, RIGHT(t));
176 } else if(t->type == TOK_SYMBOL) {
178 naParseError(p, "optional arguments must be last", t->line);
179 if(c->nArgs >= MAX_FUNARGS)
180 naParseError(p, "too many named function arguments", t->line);
181 c->argSyms[c->nArgs++] = findConstantIndex(p, t);
182 } else if(t->type == TOK_COMMA) {
183 genArgList(p, c, LEFT(t));
184 genArgList(p, c, RIGHT(t));
186 naParseError(p, "bad function argument expression", t->line);
189 static naRef newLambda(struct Parser* p, struct Token* t)
191 struct CodeGenerator* cgSave;
193 struct Token* arglist;
194 if(RIGHT(t)->type != TOK_LCURL)
195 naParseError(p, "bad function definition", t->line);
197 // Save off the generator state while we do the new one
199 arglist = LEFT(t)->type == TOK_LPAR ? LEFT(LEFT(t)) : 0;
200 codeObj = naCodeGen(p, LEFT(RIGHT(t)), arglist);
205 static void genLambda(struct Parser* p, struct Token* t)
207 emitImmediate(p, OP_PUSHCONST, newConstant(p, newLambda(p, t)));
210 static int genList(struct Parser* p, struct Token* t, int doAppend)
212 if(t->type == TOK_COMMA) {
214 if(doAppend) emit(p, OP_VAPPEND);
215 return 1 + genList(p, RIGHT(t), doAppend);
216 } else if(t->type == TOK_EMPTY) {
220 if(doAppend) emit(p, OP_VAPPEND);
225 static void genHashElem(struct Parser* p, struct Token* t)
227 if(t->type == TOK_EMPTY)
229 if(t->type != TOK_COLON)
230 naParseError(p, "bad hash/object initializer", t->line);
231 if(LEFT(t)->type == TOK_SYMBOL) genScalarConstant(p, LEFT(t));
232 else if(LEFT(t)->type == TOK_LITERAL) genExpr(p, LEFT(t));
233 else naParseError(p, "bad hash/object initializer", t->line);
234 genExpr(p, RIGHT(t));
238 static void genHash(struct Parser* p, struct Token* t)
240 if(t->type == TOK_COMMA) {
241 genHashElem(p, LEFT(t));
242 genHash(p, RIGHT(t));
243 } else if(t->type != TOK_EMPTY) {
248 static void genFuncall(struct Parser* p, struct Token* t)
252 if(LEFT(t)->type == TOK_DOT) {
253 genExpr(p, LEFT(LEFT(t)));
255 emitImmediate(p, OP_MEMBER, findConstantIndex(p, RIGHT(LEFT(t))));
260 if(RIGHT(t)) nargs = genList(p, RIGHT(t), 0);
261 emitImmediate(p, op, nargs);
264 static void pushLoop(struct Parser* p, struct Token* label)
266 int i = p->cg->loopTop;
267 p->cg->loops[i].breakIP = 0xffffff;
268 p->cg->loops[i].contIP = 0xffffff;
269 p->cg->loops[i].label = label;
274 static void popLoop(struct Parser* p)
277 if(p->cg->loopTop < 0) naParseError(p, "BUG: loop stack underflow", -1);
281 // Emit a jump operation, and return the location of the address in
282 // the bytecode for future fixup in fixJumpTarget
283 static int emitJump(struct Parser* p, int op)
288 emit(p, 0xffff); // dummy address
292 // Points a previous jump instruction at the current "end-of-bytecode"
293 static void fixJumpTarget(struct Parser* p, int spot)
295 p->cg->byteCode[spot] = p->cg->codesz;
298 static void genShortCircuit(struct Parser* p, struct Token* t)
302 end = emitJump(p, t->type == TOK_AND ? OP_JIFNOT : OP_JIFTRUE);
304 genExpr(p, RIGHT(t));
305 fixJumpTarget(p, end);
309 static void genIf(struct Parser* p, struct Token* tif, struct Token* telse)
311 int jumpNext, jumpEnd;
312 genExpr(p, tif->children); // the test
313 jumpNext = emitJump(p, OP_JIFNOTPOP);
314 genExprList(p, tif->children->next->children); // the body
315 jumpEnd = emitJump(p, OP_JMP);
316 fixJumpTarget(p, jumpNext);
318 if(telse->type == TOK_ELSIF) genIf(p, telse, telse->next);
319 else genExprList(p, telse->children->children);
323 fixJumpTarget(p, jumpEnd);
326 static void genIfElse(struct Parser* p, struct Token* t)
328 genIf(p, t, t->children->next->next);
331 static void genQuestion(struct Parser* p, struct Token* t)
333 int jumpNext, jumpEnd;
334 if(!RIGHT(t) || RIGHT(t)->type != TOK_COLON)
335 naParseError(p, "invalid ?: expression", t->line);
336 genExpr(p, LEFT(t)); // the test
337 jumpNext = emitJump(p, OP_JIFNOTPOP);
338 genExpr(p, LEFT(RIGHT(t))); // the "if true" expr
339 jumpEnd = emitJump(p, OP_JMP);
340 fixJumpTarget(p, jumpNext);
341 genExpr(p, RIGHT(RIGHT(t))); // the "else" expr
342 fixJumpTarget(p, jumpEnd);
345 static int countSemis(struct Token* t)
347 if(!t || t->type != TOK_SEMI) return 0;
348 return 1 + countSemis(RIGHT(t));
351 static void genLoop(struct Parser* p, struct Token* body,
352 struct Token* update, struct Token* label,
353 int loopTop, int jumpEnd)
355 int cont, jumpOverContinue;
357 p->cg->loops[p->cg->loopTop-1].breakIP = jumpEnd-1;
359 jumpOverContinue = emitJump(p, OP_JMP);
360 p->cg->loops[p->cg->loopTop-1].contIP = p->cg->codesz;
361 cont = emitJump(p, OP_JMP);
362 fixJumpTarget(p, jumpOverContinue);
364 genExprList(p, body);
366 fixJumpTarget(p, cont);
367 if(update) { genExpr(p, update); emit(p, OP_POP); }
368 emitImmediate(p, OP_JMPLOOP, loopTop);
369 fixJumpTarget(p, jumpEnd);
371 emit(p, OP_PUSHNIL); // Leave something on the stack
374 static void genForWhile(struct Parser* p, struct Token* init,
375 struct Token* test, struct Token* update,
376 struct Token* body, struct Token* label)
378 int loopTop, jumpEnd;
379 if(init) { genExpr(p, init); emit(p, OP_POP); }
381 loopTop = p->cg->codesz;
383 jumpEnd = emitJump(p, OP_JIFNOTPOP);
384 genLoop(p, body, update, label, loopTop, jumpEnd);
387 static void genWhile(struct Parser* p, struct Token* t)
389 struct Token *test=LEFT(t)->children, *body, *label=0;
390 int semis = countSemis(test);
393 if(!label || label->type != TOK_SYMBOL)
394 naParseError(p, "bad loop label", t->line);
398 naParseError(p, "too many semicolons in while test", t->line);
399 body = LEFT(RIGHT(t));
400 genForWhile(p, 0, test, 0, body, label);
403 static void genFor(struct Parser* p, struct Token* t)
405 struct Token *init, *test, *body, *update, *label=0;
406 struct Token *h = LEFT(t)->children;
407 int semis = countSemis(h);
409 if(!LEFT(h) || LEFT(h)->type != TOK_SYMBOL)
410 naParseError(p, "bad loop label", h->line);
413 } else if(semis != 2) {
414 naParseError(p, "wrong number of terms in for header", t->line);
417 // Parse tree hell :)
419 test = LEFT(RIGHT(h));
420 update = RIGHT(RIGHT(h));
421 body = RIGHT(t)->children;
422 genForWhile(p, init, test, update, body, label);
425 static void genForEach(struct Parser* p, struct Token* t)
427 int loopTop, jumpEnd, assignOp, dummy;
428 struct Token *elem, *body, *vec, *label=0;
429 struct Token *h = LEFT(LEFT(t));
430 int semis = countSemis(h);
432 if(!LEFT(h) || LEFT(h)->type != TOK_SYMBOL)
433 naParseError(p, "bad loop label", h->line);
436 } else if (semis != 1) {
437 naParseError(p, "wrong number of terms in foreach header", t->line);
441 body = RIGHT(t)->children;
444 emit(p, OP_PUSHZERO);
446 loopTop = p->cg->codesz;
447 emit(p, t->type == TOK_FOREACH ? OP_EACH : OP_INDEX);
448 jumpEnd = emitJump(p, OP_JIFEND);
449 assignOp = genLValue(p, elem, &dummy);
453 genLoop(p, body, 0, label, loopTop, jumpEnd);
454 emit(p, OP_POP); // Pull off the vector and index
458 static int tokMatch(struct Token* a, struct Token* b)
460 int i, l = a->strlen;
461 if(!a || !b) return 0;
462 if(l != b->strlen) return 0;
463 for(i=0; i<l; i++) if(a->str[i] != b->str[i]) return 0;
467 static void genBreakContinue(struct Parser* p, struct Token* t)
469 int levels = 1, loop = -1, bp, cp, i;
471 if(RIGHT(t)->type != TOK_SYMBOL)
472 naParseError(p, "bad break/continue label", t->line);
473 for(i=0; i<p->cg->loopTop; i++)
474 if(tokMatch(RIGHT(t), p->cg->loops[i].label))
477 naParseError(p, "no match for break/continue label", t->line);
478 levels = p->cg->loopTop - loop;
480 bp = p->cg->loops[p->cg->loopTop - levels].breakIP;
481 cp = p->cg->loops[p->cg->loopTop - levels].contIP;
482 for(i=0; i<levels; i++)
483 emit(p, (i<levels-1) ? OP_BREAK2 : OP_BREAK);
484 if(t->type == TOK_BREAK)
485 emit(p, OP_PUSHEND); // breakIP is always a JIFNOTPOP/JIFEND!
486 emitImmediate(p, OP_JMP, t->type == TOK_BREAK ? bp : cp);
489 static void newLineEntry(struct Parser* p, int line)
492 if(p->cg->nextLineIp >= p->cg->nLineIps) {
493 int nsz = p->cg->nLineIps*2 + 1;
494 unsigned short* n = naParseAlloc(p, sizeof(unsigned short)*2*nsz);
495 for(i=0; i<(p->cg->nextLineIp*2); i++)
496 n[i] = p->cg->lineIps[i];
498 p->cg->nLineIps = nsz;
500 p->cg->lineIps[p->cg->nextLineIp++] = (unsigned short) p->cg->codesz;
501 p->cg->lineIps[p->cg->nextLineIp++] = (unsigned short) line;
504 static void genExpr(struct Parser* p, struct Token* t)
507 if(!t) naParseError(p, "parse error", -1); // throw line -1...
508 p->errLine = t->line; // ...to use this one instead
509 if(t->line != p->cg->lastLine)
510 newLineEntry(p, t->line);
511 p->cg->lastLine = t->line;
529 case TOK_BREAK: case TOK_CONTINUE:
530 genBreakContinue(p, t);
533 genExprList(p, LEFT(t));
539 if(BINARY(t) || !RIGHT(t)) genFuncall(p, t); // function invocation
540 else genExpr(p, LEFT(t)); // simple parenthesis
544 genBinOp(OP_EXTRACT, p, t); // a[i]
547 genList(p, LEFT(t), 1);
555 i = genLValue(p, LEFT(t), &dummy);
556 genExpr(p, RIGHT(t));
557 emit(p, i); // use the op appropriate to the lvalue
560 if(RIGHT(t)) genExpr(p, RIGHT(t));
561 else emit(p, OP_PUSHNIL);
562 for(i=0; i<p->cg->loopTop; i++) emit(p, OP_UNMARK);
566 genExpr(p, RIGHT(t));
570 emitImmediate(p, OP_LOCAL, findConstantIndex(p, t));
573 genScalarConstant(p, t);
577 genBinOp(OP_MINUS, p, t); // binary subtraction
578 } else if(RIGHT(t) && RIGHT(t)->type == TOK_LITERAL && !RIGHT(t)->str) {
579 RIGHT(t)->num *= -1; // Pre-negate constants
580 genScalarConstant(p, RIGHT(t));
582 genExpr(p, RIGHT(t)); // unary negation
587 genExpr(p, RIGHT(t)); // unary negation (see also TOK_MINUS!)
592 if(!RIGHT(t) || RIGHT(t)->type != TOK_SYMBOL)
593 naParseError(p, "object field not symbol", RIGHT(t)->line);
594 emitImmediate(p, OP_MEMBER, findConstantIndex(p, RIGHT(t)));
596 case TOK_EMPTY: case TOK_NIL:
597 emit(p, OP_PUSHNIL); break; // *NOT* a noop!
598 case TOK_AND: case TOK_OR:
599 genShortCircuit(p, t);
601 case TOK_MUL: genBinOp(OP_MUL, p, t); break;
602 case TOK_PLUS: genBinOp(OP_PLUS, p, t); break;
603 case TOK_DIV: genBinOp(OP_DIV, p, t); break;
604 case TOK_CAT: genBinOp(OP_CAT, p, t); break;
605 case TOK_LT: genBinOp(OP_LT, p, t); break;
606 case TOK_LTE: genBinOp(OP_LTE, p, t); break;
607 case TOK_EQ: genBinOp(OP_EQ, p, t); break;
608 case TOK_NEQ: genBinOp(OP_NEQ, p, t); break;
609 case TOK_GT: genBinOp(OP_GT, p, t); break;
610 case TOK_GTE: genBinOp(OP_GTE, p, t); break;
611 case TOK_PLUSEQ: genEqOp(OP_PLUS, p, t); break;
612 case TOK_MINUSEQ: genEqOp(OP_MINUS, p, t); break;
613 case TOK_MULEQ: genEqOp(OP_MUL, p, t); break;
614 case TOK_DIVEQ: genEqOp(OP_DIV, p, t); break;
615 case TOK_CATEQ: genEqOp(OP_CAT, p, t); break;
617 naParseError(p, "parse error", t->line);
621 static void genExprList(struct Parser* p, struct Token* t)
623 if(t && t->type == TOK_SEMI) {
625 if(RIGHT(t) && RIGHT(t)->type != TOK_EMPTY) {
627 genExprList(p, RIGHT(t));
634 naRef naCodeGen(struct Parser* p, struct Token* block, struct Token* arglist)
639 struct CodeGenerator cg;
642 cg.codeAlloced = 1024; // Start fairly big, this is a cheap allocation
643 cg.byteCode = naParseAlloc(p, cg.codeAlloced *sizeof(unsigned short));
645 cg.consts = naNewVector(p->context);
652 genExprList(p, block);
655 // Now make a code object
656 codeObj = naNewCode(p->context);
657 code = PTR(codeObj).code;
659 // Parse the argument list, if any
660 code->restArgSym = globals->argRef;
661 code->nArgs = code->nOptArgs = 0;
662 code->argSyms = code->optArgSyms = code->optArgVals = 0;
663 code->needArgVector = 1;
665 code->argSyms = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
666 code->optArgSyms = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
667 code->optArgVals = naParseAlloc(p, sizeof(int) * MAX_FUNARGS);
668 code->needArgVector = 0;
669 genArgList(p, code, arglist);
672 nsyms = naAlloc(sizeof(int) * code->nArgs);
673 for(i=0; i<code->nArgs; i++) nsyms[i] = code->argSyms[i];
674 code->argSyms = nsyms;
675 } else code->argSyms = 0;
677 int i, *nsyms, *nvals;
678 nsyms = naAlloc(sizeof(int) * code->nOptArgs);
679 nvals = naAlloc(sizeof(int) * code->nOptArgs);
680 for(i=0; i<code->nOptArgs; i++) nsyms[i] = code->optArgSyms[i];
681 for(i=0; i<code->nOptArgs; i++) nvals[i] = code->optArgVals[i];
682 code->optArgSyms = nsyms;
683 code->optArgVals = nvals;
684 } else code->optArgSyms = code->optArgVals = 0;
687 code->codesz = cg.codesz;
688 code->byteCode = naAlloc(cg.codesz * sizeof(unsigned short));
689 for(i=0; i < cg.codesz; i++)
690 code->byteCode[i] = cg.byteCode[i];
691 code->nConstants = naVec_size(cg.consts);
692 code->constants = naAlloc(code->nConstants * sizeof(naRef));
693 code->srcFile = p->srcFile;
694 for(i=0; i<code->nConstants; i++)
695 code->constants[i] = getConstant(p, i);
696 code->nLines = p->cg->nextLineIp;
697 code->lineIps = naAlloc(sizeof(unsigned short)*p->cg->nLineIps*2);
698 for(i=0; i<p->cg->nLineIps*2; i++)
699 code->lineIps[i] = p->cg->lineIps[i];