6 // Static precedence table, from low (loose binding, do first) to high
7 // (tight binding, do last).
8 #define MAX_PREC_TOKS 6
10 int toks[MAX_PREC_TOKS];
13 { { TOK_SEMI, TOK_COMMA }, PREC_REVERSE },
14 { { TOK_ELLIPSIS }, PREC_SUFFIX },
15 { { TOK_RETURN, TOK_BREAK, TOK_CONTINUE }, PREC_PREFIX },
16 { { TOK_ASSIGN, TOK_PLUSEQ, TOK_MINUSEQ,
17 TOK_MULEQ, TOK_DIVEQ, TOK_CATEQ }, PREC_REVERSE },
18 { { TOK_COLON, TOK_QUESTION }, PREC_REVERSE },
19 { { TOK_VAR }, PREC_PREFIX },
20 { { TOK_OR }, PREC_BINARY },
21 { { TOK_AND }, PREC_BINARY },
22 { { TOK_EQ, TOK_NEQ }, PREC_BINARY },
23 { { TOK_LT, TOK_LTE, TOK_GT, TOK_GTE }, PREC_BINARY },
24 { { TOK_PLUS, TOK_MINUS, TOK_CAT }, PREC_BINARY },
25 { { TOK_MUL, TOK_DIV }, PREC_BINARY },
26 { { TOK_MINUS, TOK_NEG, TOK_NOT }, PREC_PREFIX },
27 { { TOK_LPAR, TOK_LBRA }, PREC_SUFFIX },
28 { { TOK_DOT }, PREC_BINARY },
30 #define PRECEDENCE_LEVELS (sizeof(PRECEDENCE)/sizeof(struct precedence))
32 void naParseError(struct Parser* p, char* msg, int line)
34 // Some errors (e.g. code generation of a null pointer) lack a
35 // line number, so we throw -1 and set the line earlier.
36 if(line > 0) p->errLine = line;
38 longjmp(p->jumpHandle, 1);
41 // A "generic" (too obfuscated to describe) parser error
42 static void oops(struct Parser* p, struct Token* t)
44 naParseError(p, "parse error", t->line);
47 void naParseInit(struct Parser* p)
59 p->tree.type = TOK_TOP;
67 p->tree.lastChild = 0;
70 void naParseDestroy(struct Parser* p)
73 for(i=0; i<p->nChunks; i++) naFree(p->chunks[i]);
75 naFree(p->chunkSizes);
79 void* naParseAlloc(struct Parser* p, int bytes)
83 // Round up to 8 byte chunks for alignment
84 if(bytes & 0x7) bytes = ((bytes>>3) + 1) << 3;
87 if(p->leftInChunk < bytes) {
94 if(sz < bytes) sz = bytes;
95 newChunk = naAlloc(sz);
99 newChunks = naAlloc(p->nChunks * sizeof(void*));
100 for(i=1; i<p->nChunks; i++) newChunks[i] = p->chunks[i-1];
101 newChunks[0] = newChunk;
103 p->chunks = newChunks;
105 newChunkSizes = naAlloc(p->nChunks * sizeof(int));
106 for(i=1; i<p->nChunks; i++) newChunkSizes[i] = p->chunkSizes[i-1];
107 newChunkSizes[0] = sz;
108 naFree(p->chunkSizes);
109 p->chunkSizes = newChunkSizes;
114 result = (char *)p->chunks[0] + p->chunkSizes[0] - p->leftInChunk;
115 p->leftInChunk -= bytes;
116 return (void*)result;
119 // Remove the child from the list where it exists, and insert it at
120 // the end of the parents child list.
121 static void addNewChild(struct Token* p, struct Token* c)
123 if(c->prev) c->prev->next = c->next;
124 if(c->next) c->next->prev = c->prev;
125 if(c == c->parent->children)
126 c->parent->children = c->next;
127 if(c == c->parent->lastChild)
128 c->parent->lastChild = c->prev;
131 c->prev = p->lastChild;
132 if(p->lastChild) p->lastChild->next = c;
133 if(!p->children) p->children = c;
137 // Follows the token list from start (which must be a left brace of
138 // some type), placing all tokens found into start's child list until
139 // it reaches the matching close brace.
140 static void collectBrace(struct Parser* p, struct Token* start)
144 if(start->type == TOK_LPAR) closer = TOK_RPAR;
145 if(start->type == TOK_LBRA) closer = TOK_RBRA;
146 if(start->type == TOK_LCURL) closer = TOK_RCURL;
152 case TOK_LPAR: case TOK_LBRA: case TOK_LCURL:
155 case TOK_RPAR: case TOK_RBRA: case TOK_RCURL:
156 if(t->type != closer)
157 naParseError(p, "mismatched closing brace", t->line);
159 // Drop this node on the floor, stitch up the list and return
160 if(start->parent->lastChild == t)
161 start->parent->lastChild = t->prev;
162 start->next = t->next;
163 if(t->next) t->next->prev = start;
166 // Snip t out of the existing list, and append it to start's
169 addNewChild(start, t);
172 naParseError(p, "unterminated brace", start->line);
175 // Recursively find the contents of all matching brace pairs in the
176 // token list and turn them into children of the left token. The
177 // right token disappears.
178 static void braceMatch(struct Parser* p, struct Token* start)
180 struct Token* t = start;
183 case TOK_LPAR: case TOK_LBRA: case TOK_LCURL:
186 case TOK_RPAR: case TOK_RBRA: case TOK_RCURL:
187 if(start->type != TOK_LBRA)
188 naParseError(p, "stray closing brace", t->line);
195 // Allocate and return an "empty" token as a parsing placeholder.
196 static struct Token* emptyToken(struct Parser* p)
198 struct Token* t = naParseAlloc(p, sizeof(struct Token));
204 t->next = t->prev = t->children = t->lastChild = 0;
209 // Synthesize a curly brace token to wrap token t foward to the end of
210 // "statement". FIXME: unify this with the addNewChild(), which does
211 // very similar stuff.
212 static void embrace(struct Parser* p, struct Token* t)
214 struct Token *b, *end = t;
217 if(end->next->type == TOK_SEMI) {
218 // Slurp up the semi, iff it is followed by an else/elsif,
219 // otherwise leave it in place.
220 if(end->next->next) {
221 if(end->next->next->type == TOK_ELSE) end = end->next;
222 if(end->next->next->type == TOK_ELSIF) end = end->next;
226 if(end->next->type == TOK_COMMA) break;
227 if(end->next->type == TOK_ELSE) break;
228 if(end->next->type == TOK_ELSIF) break;
234 b->parent = t->parent;
239 if(t->prev) t->prev->next = b;
240 else b->parent->children = b;
241 if(end->next) end->next->prev = b;
242 else b->parent->lastChild = b;
245 for(; t; t = t->next)
249 #define NEXT(t) (t ? t->next : 0)
250 #define TYPE(t) (t ? t->type : -1)
252 static void fixBracelessBlocks(struct Parser* p, struct Token* t)
254 // Find the end, and march *backward*
255 while(t && t->next) t = t->next;
256 for(/**/; t; t=t->prev) {
258 case TOK_FOR: case TOK_FOREACH: case TOK_FORINDEX: case TOK_WHILE:
259 case TOK_IF: case TOK_ELSIF:
260 if(TYPE(NEXT(t)) == TOK_LPAR && TYPE(NEXT(NEXT(t))) != TOK_LCURL)
261 embrace(p, t->next->next);
264 if(TYPE(NEXT(t)) != TOK_LCURL)
268 if(TYPE(NEXT(t)) == TOK_LPAR) {
269 if(TYPE(NEXT(NEXT(t))) != TOK_LCURL)
270 embrace(p, NEXT(NEXT(t)));
271 } else if(TYPE(NEXT(t)) != TOK_LCURL)
280 // Fixes up parenting for obvious parsing situations, like code blocks
281 // being the child of a func keyword, etc...
282 static void fixBlockStructure(struct Parser* p, struct Token* start)
285 fixBracelessBlocks(p, start);
290 // Slurp an optional paren block containing an arglist, then
291 // fall through to parse the curlies...
292 if(t->next && t->next->type == TOK_LPAR) {
295 fixBlockStructure(p, c);
297 case TOK_ELSE: // and TOK_FUNC!
298 // These guys precede a single curly block
299 if(!t->next || t->next->type != TOK_LCURL) oops(p, t);
302 fixBlockStructure(p, c);
304 case TOK_FOR: case TOK_FOREACH: case TOK_FORINDEX: case TOK_WHILE:
305 case TOK_IF: case TOK_ELSIF:
306 // Expect a paren and then a curly
307 if(!t->next || t->next->type != TOK_LPAR) oops(p, t);
310 fixBlockStructure(p, c);
312 if(!t->next || t->next->type != TOK_LCURL) oops(p, t);
315 fixBlockStructure(p, c);
317 case TOK_LPAR: case TOK_LBRA: case TOK_LCURL:
318 fixBlockStructure(p, t->children);
324 // Another pass to hook up the elsif/else chains.
327 if(t->type == TOK_IF) {
328 while(t->next && t->next->type == TOK_ELSIF)
329 addNewChild(t, t->next);
330 if(t->next && t->next->type == TOK_ELSE)
331 addNewChild(t, t->next);
336 // And a final one to add semicolons. Always add one after
337 // for/foreach/while expressions. Add one after a function lambda
338 // if it immediately follows an assignment, and add one after an
339 // if/elsif/else if it is the first token in an expression list
340 // (i.e has no previous token, or is preceded by a ';' or '{').
341 // This mimicks common usage and avoids a conspicuous difference
342 // between this grammar and more common languages. It can be
343 // "escaped" with extra parenthesis if necessary, e.g.:
344 // a = (func { join(" ", arg) })(1, 2, 3, 4);
351 || t->prev->type == TOK_SEMI
352 || t->prev->type == TOK_LCURL)
355 case TOK_FOR: case TOK_FOREACH: case TOK_FORINDEX: case TOK_WHILE:
359 if(t->prev && t->prev->type == TOK_ASSIGN)
363 if(!t->next || t->next->type == TOK_SEMI || t->next->type == TOK_COMMA)
364 addSemi = 0; // don't bother, no need
366 struct Token* semi = emptyToken(p);
367 semi->type = TOK_SEMI;
368 semi->line = t->line;
369 semi->next = t->next;
371 semi->parent = t->parent;
372 if(semi->next) semi->next->prev = semi;
373 else semi->parent->lastChild = semi;
375 t = semi; // don't bother checking the new one
382 // True if the token's type exists in the precedence level.
383 static int tokInLevel(struct Token* tok, int level)
386 for(i=0; i<MAX_PREC_TOKS; i++)
387 if(PRECEDENCE[level].toks[i] == tok->type)
392 static int isBrace(int type)
394 return type == TOK_LPAR || type == TOK_LBRA || type == TOK_LCURL;
397 static int isBlock(int t)
399 return t == TOK_IF || t == TOK_ELSIF || t == TOK_ELSE
400 || t == TOK_FOR || t == TOK_FOREACH || t == TOK_WHILE
401 || t == TOK_FUNC || t == TOK_FORINDEX;
404 static void precChildren(struct Parser* p, struct Token* t);
405 static void precBlock(struct Parser* p, struct Token* t);
407 static struct Token* parsePrecedence(struct Parser* p,
408 struct Token* start, struct Token* end,
412 struct Token *t, *top, *left, *right;
413 struct Token *a, *b, *c, *d; // temporaries
415 // This is an error. No "siblings" are allowed at the bottom level.
416 if(level >= PRECEDENCE_LEVELS && start != end)
419 // Synthesize an empty token if necessary
420 if(end == 0 && start == 0)
421 return emptyToken(p);
423 // Sanify the list. This is OK, since we're recursing into the
424 // list structure; stuff to the left and right has already been
425 // handled somewhere above.
426 if(end == 0) end = start;
427 if(start == 0) start = end;
428 if(start->prev) start->prev->next = 0;
429 if(end->next) end->next->prev = 0;
430 start->prev = end->next = 0;
432 // Single tokens parse as themselves. Recurse into braces, and
433 // parse children of block structure.
435 if(isBrace(start->type)) {
436 precChildren(p, start);
437 } else if(isBlock(start->type)) {
443 // A context-sensitivity: we want to parse ';' and ',' as binary
444 // operators, but want them to be legal at the beginning and end
445 // of a list (unlike, say, '+' where we want a parse error).
446 // Generate empties as necessary.
447 if(start->type == TOK_SEMI || start->type == TOK_COMMA) {
453 if(end->type == TOK_SEMI || end->type == TOK_COMMA) {
460 // Another one: the "." and (postfix) "[]/()" operators should
461 // really be the same precendence level, but the existing
462 // implementation doesn't allow for it. Bump us up a level if we
463 // are parsing for DOT but find a LPAR/LBRA at the end of the
465 if(PRECEDENCE[level].toks[0] == TOK_DOT)
466 if(end->type == TOK_LPAR || end->type == TOK_LBRA)
469 top = left = right = 0;
470 rule = PRECEDENCE[level].rule;
473 if(tokInLevel(start, level) && start->next) {
475 b = start->lastChild;
479 if(a) left = parsePrecedence(p, a, b, 0);
480 right = parsePrecedence(p, c, d, level);
484 if(tokInLevel(end, level) && end->prev) {
490 left = parsePrecedence(p, a, b, level);
491 if(c) right = parsePrecedence(p, c, d, 0);
497 if(tokInLevel(t, level)) {
498 a = t->prev ? start : 0;
501 d = t->next ? end : 0;
503 left = parsePrecedence(p, a, b, level);
504 right = parsePrecedence(p, c, d, level+1);
513 if(tokInLevel(t, level)) {
514 a = t->prev ? start : 0;
517 d = t->next ? end : 0;
519 left = parsePrecedence(p, a, b, level+1);
520 right = parsePrecedence(p, c, d, level);
528 // Found nothing, try the next level
530 return parsePrecedence(p, start, end, level+1);
539 top->children = left;
546 top->lastChild = right;
548 top->next = top->prev = 0;
552 static void precChildren(struct Parser* p, struct Token* t)
554 struct Token* top = parsePrecedence(p, t->children, t->lastChild, 0);
559 // Run a "block structure" node (if/elsif/else/for/while/foreach)
560 // through the precedence parser. The funny child structure makes
561 // this a little more complicated than it should be.
562 static void precBlock(struct Parser* p, struct Token* block)
564 struct Token* t = block->children;
568 else if(isBlock(t->type))
574 naRef naParseCode(struct Context* c, naRef srcFile, int firstLine,
575 char* buf, int len, int* errLine)
581 // Protect from garbage collection
582 naTempSave(c, srcFile);
584 // Catch parser errors here.
586 if(setjmp(p.jumpHandle)) {
587 strncpy(c->error, p.err, sizeof(c->error));
588 *errLine = p.errLine;
595 p.firstLine = firstLine;
599 // Lexify, match brace structure, fixup if/for/etc...
601 braceMatch(&p, p.tree.children);
602 fixBlockStructure(&p, p.tree.children);
604 // Recursively run the precedence parser, and fixup the treetop
605 t = parsePrecedence(&p, p.tree.children, p.tree.lastChild, 0);
606 t->prev = t->next = 0;
608 p.tree.lastChild = t;
611 codeObj = naCodeGen(&p, &(p.tree), 0);
615 naTempSave(c, codeObj);