5 // Static precedence table, from low (loose binding, do first) to high
6 // (tight binding, do last).
7 enum { PREC_BINARY, PREC_REVERSE, PREC_PREFIX, PREC_SUFFIX };
9 #define MAX_PREC_TOKS 5
11 int toks[MAX_PREC_TOKS];
14 { { TOK_SEMI, TOK_COMMA }, PREC_REVERSE },
15 { { TOK_COLON }, PREC_BINARY },
16 { { TOK_RETURN, TOK_BREAK, TOK_CONTINUE }, PREC_PREFIX },
17 { { TOK_ASSIGN }, PREC_REVERSE },
18 { { TOK_OR }, PREC_BINARY },
19 { { TOK_AND }, PREC_BINARY },
20 { { TOK_EQ, TOK_NEQ }, PREC_BINARY },
21 { { TOK_LT, TOK_LTE, TOK_GT, TOK_GTE }, PREC_BINARY },
22 { { TOK_PLUS, TOK_MINUS, TOK_CAT }, PREC_REVERSE },
23 { { TOK_MUL, TOK_DIV }, PREC_BINARY },
24 { { TOK_MINUS, TOK_NEG, TOK_NOT }, PREC_PREFIX },
25 { { TOK_LPAR, TOK_LBRA }, PREC_SUFFIX },
26 { { TOK_DOT }, PREC_BINARY },
28 #define PRECEDENCE_LEVELS (sizeof(PRECEDENCE)/sizeof(struct precedence))
30 void naParseError(struct Parser* p, char* msg, int line)
34 longjmp(p->jumpHandle, 1);
37 // A "generic" (too obfuscated to describe) parser error
38 static void oops(struct Parser* p, struct Token* t)
40 naParseError(p, "parse error", t->line);
43 void naParseInit(struct Parser* p)
55 p->tree.type = TOK_TOP;
63 p->tree.lastChild = 0;
66 void naParseDestroy(struct Parser* p)
69 for(i=0; i<p->nChunks; i++) naFree(p->chunks[i]);
71 naFree(p->chunkSizes);
75 void* naParseAlloc(struct Parser* p, int bytes)
79 // Round up to 8 byte chunks for alignment
80 if(bytes & 0x7) bytes = ((bytes>>3) + 1) << 3;
83 if(p->leftInChunk < bytes) {
90 if(sz < bytes) sz = bytes;
91 newChunk = naAlloc(sz);
95 newChunks = naAlloc(p->nChunks * sizeof(void*));
96 for(i=1; i<p->nChunks; i++) newChunks[i] = p->chunks[i-1];
97 newChunks[0] = newChunk;
99 p->chunks = newChunks;
101 newChunkSizes = naAlloc(p->nChunks * sizeof(int));
102 for(i=1; i<p->nChunks; i++) newChunkSizes[i] = p->chunkSizes[i-1];
103 newChunkSizes[0] = sz;
104 naFree(p->chunkSizes);
105 p->chunkSizes = newChunkSizes;
110 result = (char *)p->chunks[0] + p->chunkSizes[0] - p->leftInChunk;
111 p->leftInChunk -= bytes;
112 return (void*)result;
115 // Remove the child from the list where it exists, and insert it at
116 // the end of the parents child list.
117 static void addNewChild(struct Token* p, struct Token* c)
119 if(c->prev) c->prev->next = c->next;
120 if(c->next) c->next->prev = c->prev;
121 if(c == c->parent->children)
122 c->parent->children = c->next;
123 if(c == c->parent->lastChild)
124 c->parent->lastChild = c->prev;
127 c->prev = p->lastChild;
128 if(p->lastChild) p->lastChild->next = c;
129 if(!p->children) p->children = c;
133 // Follows the token list from start (which must be a left brace of
134 // some type), placing all tokens found into start's child list until
135 // it reaches the matching close brace.
136 static void collectBrace(struct Parser* p, struct Token* start)
140 if(start->type == TOK_LPAR) closer = TOK_RPAR;
141 if(start->type == TOK_LBRA) closer = TOK_RBRA;
142 if(start->type == TOK_LCURL) closer = TOK_RCURL;
148 case TOK_LPAR: case TOK_LBRA: case TOK_LCURL:
151 case TOK_RPAR: case TOK_RBRA: case TOK_RCURL:
152 if(t->type != closer)
153 naParseError(p, "mismatched closing brace", t->line);
155 // Drop this node on the floor, stitch up the list and return
156 if(start->parent->lastChild == t)
157 start->parent->lastChild = t->prev;
158 start->next = t->next;
159 if(t->next) t->next->prev = start;
162 // Snip t out of the existing list, and append it to start's
165 addNewChild(start, t);
168 naParseError(p, "unterminated brace", start->line);
171 // Recursively find the contents of all matching brace pairs in the
172 // token list and turn them into children of the left token. The
173 // right token disappears.
174 static void braceMatch(struct Parser* p, struct Token* start)
176 struct Token* t = start;
179 case TOK_LPAR: case TOK_LBRA: case TOK_LCURL:
182 case TOK_RPAR: case TOK_RBRA: case TOK_RCURL:
183 if(start->type != TOK_LBRA)
184 naParseError(p, "stray closing brace", t->line);
191 // Allocate and return an "empty" token as a parsing placeholder.
192 static struct Token* emptyToken(struct Parser* p)
194 struct Token* t = naParseAlloc(p, sizeof(struct Token));
200 t->next = t->prev = t->children = t->lastChild = 0;
205 // Fixes up parenting for obvious parsing situations, like code blocks
206 // being the child of a func keyword, etc...
207 static void fixBlockStructure(struct Parser* p, struct Token* start)
213 case TOK_ELSE: case TOK_FUNC:
214 // These guys precede a single curly block
215 if(!t->next || t->next->type != TOK_LCURL) oops(p, t);
218 fixBlockStructure(p, c);
220 case TOK_FOR: case TOK_FOREACH: case TOK_WHILE:
221 case TOK_IF: case TOK_ELSIF:
222 // Expect a paren and then a curly
223 if(!t->next || t->next->type != TOK_LPAR) oops(p, t);
226 fixBlockStructure(p, c);
228 if(!t->next || t->next->type != TOK_LCURL) oops(p, t);
231 fixBlockStructure(p, c);
233 case TOK_LPAR: case TOK_LBRA: case TOK_LCURL:
234 fixBlockStructure(p, t->children);
240 // Another pass to hook up the elsif/else chains.
243 if(t->type == TOK_IF) {
244 while(t->next && t->next->type == TOK_ELSIF)
245 addNewChild(t, t->next);
246 if(t->next && t->next->type == TOK_ELSE)
247 addNewChild(t, t->next);
252 // And a final one to add semicolons. Always add one after
253 // for/foreach/while expressions. Add one after a function lambda
254 // if it immediately follows an assignment, and add one after an
255 // if/elsif/else if it is the first token in an expression list
256 // (i.e has no previous token, or is preceded by a ';' or '{').
257 // This mimicks common usage and avoids a conspicuous difference
258 // between this grammar and more common languages. It can be
259 // "escaped" with extra parenthesis if necessary, e.g.:
260 // a = (func { join(" ", arg) })(1, 2, 3, 4);
267 || t->prev->type == TOK_SEMI
268 || t->prev->type == TOK_LCURL)
271 case TOK_FOR: case TOK_FOREACH: case TOK_WHILE:
275 if(t->prev && t->prev->type == TOK_ASSIGN)
280 struct Token* semi = emptyToken(p);
281 semi->type = TOK_SEMI;
282 semi->line = t->line;
283 semi->next = t->next;
285 semi->parent = t->parent;
286 if(semi->next) semi->next->prev = semi;
288 t = semi; // don't bother checking the new one
295 // True if the token's type exists in the precedence level.
296 static int tokInLevel(struct Token* tok, int level)
299 for(i=0; i<MAX_PREC_TOKS; i++)
300 if(PRECEDENCE[level].toks[i] == tok->type)
305 static int isBrace(int type)
307 return type == TOK_LPAR || type == TOK_LBRA || type == TOK_LCURL;
310 static int isBlock(int t)
312 return t == TOK_IF || t == TOK_ELSIF || t == TOK_ELSE
313 || t == TOK_FOR || t == TOK_FOREACH || t == TOK_WHILE
317 static void precChildren(struct Parser* p, struct Token* t);
318 static void precBlock(struct Parser* p, struct Token* t);
320 static struct Token* parsePrecedence(struct Parser* p,
321 struct Token* start, struct Token* end,
325 struct Token *t, *top, *left, *right;
326 struct Token *a, *b, *c, *d; // temporaries
328 // This is an error. No "siblings" are allowed at the bottom level.
329 if(level >= PRECEDENCE_LEVELS && start != end)
332 // Synthesize an empty token if necessary
333 if(end == 0 && start == 0)
334 return emptyToken(p);
336 // Sanify the list. This is OK, since we're recursing into the
337 // list structure; stuff to the left and right has already been
338 // handled somewhere above.
339 if(end == 0) end = start;
340 if(start == 0) start = end;
341 if(start->prev) start->prev->next = 0;
342 if(end->next) end->next->prev = 0;
343 start->prev = end->next = 0;
345 // Single tokens parse as themselves. Recurse into braces, and
346 // parse children of block structure.
348 if(isBrace(start->type)) {
349 precChildren(p, start);
350 } else if(isBlock(start->type)) {
356 // A context-sensitivity: we want to parse ';' and ',' as binary
357 // operators, but want them to be legal at the beginning and end
358 // of a list (unlike, say, '+' where we want a parse error).
359 // Generate empties as necessary.
360 if(start->type == TOK_SEMI || start->type == TOK_COMMA) {
366 if(end->type == TOK_SEMI || end->type == TOK_COMMA) {
373 // Another one: the "." and (postfix) "[]/()" operators should
374 // really be the same precendence level, but the existing
375 // implementation doesn't allow for it. Bump us up a level if we
376 // are parsing for DOT but find a LPAR/LBRA at the end of the
378 if(PRECEDENCE[level].toks[0] == TOK_DOT)
379 if(end->type == TOK_LPAR || end->type == TOK_LBRA)
382 top = left = right = 0;
383 rule = PRECEDENCE[level].rule;
386 if(tokInLevel(start, level) && start->next) {
388 b = start->lastChild;
392 if(a) left = parsePrecedence(p, a, b, 0);
393 right = parsePrecedence(p, c, d, level);
397 if(tokInLevel(end, level) && end->prev) {
403 left = parsePrecedence(p, a, b, level);
404 if(c) right = parsePrecedence(p, c, d, 0);
410 if(tokInLevel(t, level)) {
411 a = t->prev ? start : 0;
414 d = t->next ? end : 0;
416 left = parsePrecedence(p, a, b, level);
417 right = parsePrecedence(p, c, d, level+1);
426 if(tokInLevel(t, level)) {
427 a = t->prev ? start : 0;
430 d = t->next ? end : 0;
432 left = parsePrecedence(p, a, b, level+1);
433 right = parsePrecedence(p, c, d, level);
441 // Found nothing, try the next level
443 return parsePrecedence(p, start, end, level+1);
450 top->children = left;
457 top->lastChild = right;
459 top->next = top->prev = 0;
463 static void precChildren(struct Parser* p, struct Token* t)
465 struct Token* top = parsePrecedence(p, t->children, t->lastChild, 0);
470 // Run a "block structure" node (if/elsif/else/for/while/foreach)
471 // through the precedence parser. The funny child structure makes
472 // this a little more complicated than it should be.
473 static void precBlock(struct Parser* p, struct Token* block)
475 struct Token* t = block->children;
479 else if(isBlock(t->type))
485 naRef naParseCode(struct Context* c, naRef srcFile, int firstLine,
486 char* buf, int len, int* errLine)
492 // Protect from garbage collection
493 naVec_append(c->temps, srcFile);
495 // Catch parser errors here.
497 if(setjmp(p.jumpHandle)) {
499 *errLine = p.errLine;
506 p.firstLine = firstLine;
510 // Lexify, match brace structure, fixup if/for/etc...
512 braceMatch(&p, p.tree.children);
513 fixBlockStructure(&p, p.tree.children);
515 // Recursively run the precedence parser, and fixup the treetop
516 t = parsePrecedence(&p, p.tree.children, p.tree.lastChild, 0);
517 t->prev = t->next = 0;
519 p.tree.lastChild = t;
522 codeObj = naCodeGen(&p, &(p.tree));
526 naVec_append(c->temps, codeObj);