simgear/nasal/parse.c

   1 #include <setjmp.h>
   2 #include <string.h>
   3
   4 #include "parse.h"
   5
   6 // Static precedence table, from low (loose binding, do first) to high
   7 // (tight binding, do last).
   8 #define MAX_PREC_TOKS 6
   9 struct precedence {
  10     int toks[MAX_PREC_TOKS];
  11     int rule;
  12 } PRECEDENCE[] = {
  13     { { TOK_SEMI, TOK_COMMA },                 PREC_REVERSE },
  14     { { TOK_ELLIPSIS },                        PREC_SUFFIX  },
  15     { { TOK_RETURN, TOK_BREAK, TOK_CONTINUE }, PREC_PREFIX  },
  16     { { TOK_ASSIGN, TOK_PLUSEQ, TOK_MINUSEQ,
  17         TOK_MULEQ, TOK_DIVEQ, TOK_CATEQ     }, PREC_REVERSE },
  18     { { TOK_COLON, TOK_QUESTION },             PREC_REVERSE },
  19     { { TOK_VAR },                             PREC_PREFIX  },
  20     { { TOK_OR },                              PREC_BINARY  },
  21     { { TOK_AND },                             PREC_BINARY  },
  22     { { TOK_EQ, TOK_NEQ },                     PREC_BINARY  },
  23     { { TOK_LT, TOK_LTE, TOK_GT, TOK_GTE },    PREC_BINARY  },
  24     { { TOK_PLUS, TOK_MINUS, TOK_CAT },        PREC_BINARY  },
  25     { { TOK_MUL, TOK_DIV },                    PREC_BINARY  },
  26     { { TOK_MINUS, TOK_NEG, TOK_NOT },         PREC_PREFIX  },
  27     { { TOK_LPAR, TOK_LBRA },                  PREC_SUFFIX  },
  28     { { TOK_DOT },                             PREC_BINARY  },
  29 };
  30 #define PRECEDENCE_LEVELS (sizeof(PRECEDENCE)/sizeof(struct precedence))
  31
  32 void naParseError(struct Parser* p, char* msg, int line)
  33 {
  34     // Some errors (e.g. code generation of a null pointer) lack a
  35     // line number, so we throw -1 and set the line earlier.
  36     if(line > 0) p->errLine = line;
  37     p->err = msg;
  38     longjmp(p->jumpHandle, 1);
  39 }
  40
  41 // A "generic" (too obfuscated to describe) parser error
  42 static void oops(struct Parser* p, struct Token* t)
  43 {
  44     naParseError(p, "parse error", t->line);
  45 }
  46
  47 void naParseInit(struct Parser* p)
  48 {
  49     p->buf = 0;
  50     p->len = 0;
  51     p->lines = 0;
  52     p->nLines = 0;
  53     p->chunks = 0;
  54     p->chunkSizes = 0;
  55     p->nChunks = 0;
  56     p->leftInChunk = 0;
  57     p->cg = 0;
  58
  59     p->tree.type = TOK_TOP;
  60     p->tree.line = 1;
  61     p->tree.str = 0;
  62     p->tree.strlen = 0;
  63     p->tree.num = 0;
  64     p->tree.next = 0;
  65     p->tree.prev = 0;
  66     p->tree.children = 0;
  67     p->tree.lastChild = 0;
  68 }
  69
  70 void naParseDestroy(struct Parser* p)
  71 {
  72     int i;
  73     for(i=0; i<p->nChunks; i++) naFree(p->chunks[i]);
  74     naFree(p->chunks);
  75     naFree(p->chunkSizes);
  76     p->buf = 0;
  77 }
  78
  79 void* naParseAlloc(struct Parser* p, int bytes)
  80 {
  81     char* result;
  82
  83     // Round up to 8 byte chunks for alignment
  84     if(bytes & 0x7) bytes = ((bytes>>3) + 1) << 3;
  85
  86     // Need a new chunk?
  87     if(p->leftInChunk < bytes) {
  88         void* newChunk;
  89         void** newChunks;
  90         int* newChunkSizes;
  91         int sz, i;
  92
  93         sz = p->len;
  94         if(sz < bytes) sz = bytes;
  95         newChunk = naAlloc(sz);
  96
  97         p->nChunks++;
  98
  99         newChunks = naAlloc(p->nChunks * sizeof(void*));
 100         for(i=1; i<p->nChunks; i++) newChunks[i] = p->chunks[i-1];
 101         newChunks[0] = newChunk;
 102         naFree(p->chunks);
 103         p->chunks = newChunks;
 104
 105         newChunkSizes = naAlloc(p->nChunks * sizeof(int));
 106         for(i=1; i<p->nChunks; i++) newChunkSizes[i] = p->chunkSizes[i-1];
 107         newChunkSizes[0] = sz;
 108         naFree(p->chunkSizes);
 109         p->chunkSizes = newChunkSizes;
 110
 111         p->leftInChunk = sz;
 112     }
 113
 114     result = (char *)p->chunks[0] + p->chunkSizes[0] - p->leftInChunk;
 115     p->leftInChunk -= bytes;
 116     return (void*)result;
 117 }
 118
 119 // Remove the child from the list where it exists, and insert it at
 120 // the end of the parents child list.
 121 static void addNewChild(struct Token* p, struct Token* c)
 122 {
 123     if(c->prev) c->prev->next = c->next;
 124     if(c->next) c->next->prev = c->prev;
 125     if(c == c->parent->children)
 126         c->parent->children = c->next;
 127     if(c == c->parent->lastChild)
 128         c->parent->lastChild = c->prev;
 129     c->parent = p;
 130     c->next = 0;
 131     c->prev = p->lastChild;
 132     if(p->lastChild) p->lastChild->next = c;
 133     if(!p->children) p->children = c;
 134     p->lastChild = c;
 135 }
 136
 137 // Follows the token list from start (which must be a left brace of
 138 // some type), placing all tokens found into start's child list until
 139 // it reaches the matching close brace.
 140 static void collectBrace(struct Parser* p, struct Token* start)
 141 {
 142     struct Token* t;
 143     int closer = -1;
 144     if(start->type == TOK_LPAR)  closer = TOK_RPAR;
 145     if(start->type == TOK_LBRA)  closer = TOK_RBRA;
 146     if(start->type == TOK_LCURL) closer = TOK_RCURL;
 147
 148     t = start->next;
 149     while(t) {
 150         struct Token* next;
 151         switch(t->type) {
 152         case TOK_LPAR: case TOK_LBRA: case TOK_LCURL:
 153             collectBrace(p, t);
 154             break;
 155         case TOK_RPAR: case TOK_RBRA: case TOK_RCURL:
 156             if(t->type != closer)
 157                 naParseError(p, "mismatched closing brace", t->line);
 158
 159             // Drop this node on the floor, stitch up the list and return
 160             if(start->parent->lastChild == t)
 161                 start->parent->lastChild = t->prev;
 162             start->next = t->next;
 163             if(t->next) t->next->prev = start;
 164             return;
 165         }
 166         // Snip t out of the existing list, and append it to start's
 167         // children.
 168         next = t->next;
 169         addNewChild(start, t);
 170         t = next;
 171     }
 172     naParseError(p, "unterminated brace", start->line);
 173 }
 174
 175 // Recursively find the contents of all matching brace pairs in the
 176 // token list and turn them into children of the left token.  The
 177 // right token disappears.
 178 static void braceMatch(struct Parser* p, struct Token* start)
 179 {
 180     struct Token* t = start;
 181     while(t) {
 182         switch(t->type) {
 183         case TOK_LPAR: case TOK_LBRA: case TOK_LCURL:
 184             collectBrace(p, t);
 185             break;
 186         case TOK_RPAR: case TOK_RBRA: case TOK_RCURL:
 187             if(start->type != TOK_LBRA)
 188                 naParseError(p, "stray closing brace", t->line);
 189             break;
 190         }
 191         t = t->next;
 192     }
 193 }
 194
 195 // Allocate and return an "empty" token as a parsing placeholder.
 196 static struct Token* emptyToken(struct Parser* p)
 197 {
 198     struct Token* t = naParseAlloc(p, sizeof(struct Token));
 199     t->type = TOK_EMPTY;
 200     t->line = -1;
 201     t->strlen = 0;
 202     t->num = 0;
 203     t->str = 0;
 204     t->next = t->prev = t->children = t->lastChild = 0;
 205     t->parent = 0;
 206     return t;
 207 }
 208
 209 // Synthesize a curly brace token to wrap token t foward to the end of
 210 // "statement".  FIXME: unify this with the addNewChild(), which does
 211 // very similar stuff.
 212 static void embrace(struct Parser* p, struct Token* t)
 213 {
 214     struct Token *b, *end = t;
 215     if(!t) return;
 216     while(end->next) {
 217         if(end->next->type == TOK_SEMI) {
 218             // Slurp up the semi, iff it is followed by an else/elsif,
 219             // otherwise leave it in place.
 220             if(end->next->next) {
 221                 if(end->next->next->type == TOK_ELSE)  end = end->next;
 222                 if(end->next->next->type == TOK_ELSIF) end = end->next;
 223             }
 224             break;
 225         }
 226         if(end->next->type == TOK_COMMA) break;
 227         if(end->next->type == TOK_ELSE) break;
 228         if(end->next->type == TOK_ELSIF) break;
 229         end = end->next;
 230     }
 231     b = emptyToken(p);
 232     b->type = TOK_LCURL;
 233     b->line = t->line;
 234     b->parent = t->parent;
 235     b->prev = t->prev;
 236     b->next = end->next;
 237     b->children = t;
 238     b->lastChild = end;
 239     if(t->prev) t->prev->next = b;
 240     else b->parent->children = b;
 241     if(end->next) end->next->prev = b;
 242     else b->parent->lastChild = b;
 243     t->prev = 0;
 244     end->next = 0;
 245     for(; t; t = t->next)
 246         t->parent = b;
 247 }
 248
 249 #define NEXT(t) (t ? t->next : 0)
 250 #define TYPE(t) (t ? t->type : -1)
 251
 252 static void fixBracelessBlocks(struct Parser* p, struct Token* t)
 253 {
 254     // Find the end, and march *backward*
 255     while(t && t->next) t = t->next;
 256     for(/**/; t; t=t->prev) {
 257         switch(t->type) {
 258         case TOK_FOR: case TOK_FOREACH: case TOK_FORINDEX: case TOK_WHILE:
 259         case TOK_IF: case TOK_ELSIF:
 260             if(TYPE(NEXT(t)) == TOK_LPAR && TYPE(NEXT(NEXT(t))) != TOK_LCURL)
 261                     embrace(p, t->next->next);
 262             break;
 263         case TOK_ELSE:
 264             if(TYPE(NEXT(t)) != TOK_LCURL)
 265                 embrace(p, t->next);
 266             break;
 267         case TOK_FUNC:
 268             if(TYPE(NEXT(t)) == TOK_LPAR) {
 269                 if(TYPE(NEXT(NEXT(t))) != TOK_LCURL)
 270                     embrace(p, NEXT(NEXT(t)));
 271             } else if(TYPE(NEXT(t)) != TOK_LCURL)
 272                 embrace(p, t->next);
 273             break;
 274         default:
 275             break;
 276         }
 277     }
 278 }
 279
 280 // Fixes up parenting for obvious parsing situations, like code blocks
 281 // being the child of a func keyword, etc...
 282 static void fixBlockStructure(struct Parser* p, struct Token* start)
 283 {
 284     struct Token *t, *c;
 285     fixBracelessBlocks(p, start);
 286     t = start;
 287     while(t) {
 288         switch(t->type) {
 289         case TOK_FUNC:
 290             // Slurp an optional paren block containing an arglist, then
 291             // fall through to parse the curlies...
 292             if(t->next && t->next->type == TOK_LPAR) {
 293                 c = t->next;
 294                 addNewChild(t, c);
 295                 fixBlockStructure(p, c);
 296             }
 297         case TOK_ELSE: // and TOK_FUNC!
 298             // These guys precede a single curly block
 299             if(!t->next || t->next->type != TOK_LCURL) oops(p, t);
 300             c = t->next;
 301             addNewChild(t, c);
 302             fixBlockStructure(p, c);
 303             break;
 304         case TOK_FOR: case TOK_FOREACH: case TOK_FORINDEX: case TOK_WHILE:
 305         case TOK_IF: case TOK_ELSIF:
 306             // Expect a paren and then a curly
 307             if(!t->next || t->next->type != TOK_LPAR) oops(p, t);
 308             c = t->next;
 309             addNewChild(t, c);
 310             fixBlockStructure(p, c);
 311
 312             if(!t->next || t->next->type != TOK_LCURL) oops(p, t);
 313             c = t->next;
 314             addNewChild(t, c);
 315             fixBlockStructure(p, c);
 316             break;
 317         case TOK_LPAR: case TOK_LBRA: case TOK_LCURL:
 318             fixBlockStructure(p, t->children);
 319             break;
 320         }
 321         t = t->next;
 322     }
 323
 324     // Another pass to hook up the elsif/else chains.
 325     t = start;
 326     while(t) {
 327         if(t->type == TOK_IF) {
 328             while(t->next && t->next->type == TOK_ELSIF)
 329                 addNewChild(t, t->next);
 330             if(t->next && t->next->type == TOK_ELSE)
 331                 addNewChild(t, t->next);
 332         }
 333         t = t->next;
 334     }
 335
 336     // And a final one to add semicolons.  Always add one after
 337     // for/foreach/while expressions.  Add one after a function lambda
 338     // if it immediately follows an assignment, and add one after an
 339     // if/elsif/else if it is the first token in an expression list
 340     // (i.e has no previous token, or is preceded by a ';' or '{').
 341     // This mimicks common usage and avoids a conspicuous difference
 342     // between this grammar and more common languages.  It can be
 343     // "escaped" with extra parenthesis if necessary, e.g.:
 344     //      a = (func { join(" ", arg) })(1, 2, 3, 4);
 345     t = start;
 346     while(t) {
 347         int addSemi = 0;
 348         switch(t->type) {
 349         case TOK_IF:
 350             if(!t->prev
 351                || t->prev->type == TOK_SEMI
 352                || t->prev->type == TOK_LCURL)
 353                 addSemi = 1;
 354             break;
 355         case TOK_FOR: case TOK_FOREACH: case TOK_FORINDEX: case TOK_WHILE:
 356             addSemi = 1;
 357             break;
 358         case TOK_FUNC:
 359             if(t->prev && t->prev->type == TOK_ASSIGN)
 360                 addSemi = 1;
 361             break;
 362         }
 363         if(!t->next || t->next->type == TOK_SEMI || t->next->type == TOK_COMMA)
 364             addSemi = 0; // don't bother, no need
 365         if(addSemi) {
 366             struct Token* semi = emptyToken(p);
 367             semi->type = TOK_SEMI;
 368             semi->line = t->line;
 369             semi->next = t->next;
 370             semi->prev = t;
 371             semi->parent = t->parent;
 372             if(semi->next) semi->next->prev = semi;
 373             else semi->parent->lastChild = semi;
 374             t->next = semi;
 375             t = semi; // don't bother checking the new one
 376         }
 377         t = t->next;
 378     }
 379
 380 }
 381
 382 // True if the token's type exists in the precedence level.
 383 static int tokInLevel(struct Token* tok, int level)
 384 {
 385     int i;
 386     for(i=0; i<MAX_PREC_TOKS; i++)
 387         if(PRECEDENCE[level].toks[i] == tok->type)
 388             return 1;
 389     return 0;
 390 }
 391
 392 static int isBrace(int type)
 393 {
 394     return type == TOK_LPAR || type == TOK_LBRA || type == TOK_LCURL;
 395 }
 396
 397 static int isBlock(int t)
 398 {
 399     return t == TOK_IF  || t == TOK_ELSIF   || t == TOK_ELSE
 400         || t == TOK_FOR || t == TOK_FOREACH || t == TOK_WHILE
 401         || t == TOK_FUNC || t == TOK_FORINDEX;
 402 }
 403
 404 static void precChildren(struct Parser* p, struct Token* t);
 405 static void precBlock(struct Parser* p, struct Token* t);
 406
 407 static struct Token* parsePrecedence(struct Parser* p,
 408                                      struct Token* start, struct Token* end,
 409                                      int level)
 410 {
 411     int rule;
 412     struct Token *t, *top, *left, *right;
 413     struct Token *a, *b, *c, *d; // temporaries
 414
 415     // This is an error.  No "siblings" are allowed at the bottom level.
 416     if(level >= PRECEDENCE_LEVELS && start != end)
 417         oops(p, start);
 418
 419     // Synthesize an empty token if necessary
 420     if(end == 0 && start == 0)
 421         return emptyToken(p);
 422
 423     // Sanify the list.  This is OK, since we're recursing into the
 424     // list structure; stuff to the left and right has already been
 425     // handled somewhere above.
 426     if(end == 0) end = start;
 427     if(start == 0) start = end;
 428     if(start->prev) start->prev->next = 0;
 429     if(end->next) end->next->prev = 0;
 430     start->prev = end->next = 0;
 431
 432     // Single tokens parse as themselves.  Recurse into braces, and
 433     // parse children of block structure.
 434     if(start == end) {
 435         if(isBrace(start->type)) {
 436             precChildren(p, start);
 437         } else if(isBlock(start->type)) {
 438             precBlock(p, start);
 439         }
 440         return start;
 441     }
 442
 443     // A context-sensitivity: we want to parse ';' and ',' as binary
 444     // operators, but want them to be legal at the beginning and end
 445     // of a list (unlike, say, '+' where we want a parse error).
 446     // Generate empties as necessary.
 447     if(start->type == TOK_SEMI || start->type == TOK_COMMA) {
 448         t = emptyToken(p);
 449         start->prev = t;
 450         t->next = start;
 451         start = t;
 452     }
 453     if(end->type == TOK_SEMI || end->type == TOK_COMMA) {
 454         t = emptyToken(p);
 455         end->next = t;
 456         t->prev = end;
 457         end = t;
 458     }
 459
 460     // Another one: the "." and (postfix) "[]/()" operators should
 461     // really be the same precendence level, but the existing
 462     // implementation doesn't allow for it.  Bump us up a level if we
 463     // are parsing for DOT but find a LPAR/LBRA at the end of the
 464     // list.
 465     if(PRECEDENCE[level].toks[0] == TOK_DOT)
 466         if(end->type == TOK_LPAR || end->type == TOK_LBRA)
 467             level--;
 468
 469     top = left = right = 0;
 470     rule = PRECEDENCE[level].rule;
 471     switch(rule) {
 472     case PREC_PREFIX:
 473         if(tokInLevel(start, level) && start->next) {
 474             a = start->children;
 475             b = start->lastChild;
 476             c = start->next;
 477             d = end;
 478             top = start;
 479             if(a) left = parsePrecedence(p, a, b, 0);
 480             right = parsePrecedence(p, c, d, level);
 481         }
 482         break;
 483     case PREC_SUFFIX:
 484         if(tokInLevel(end, level) && end->prev) {
 485             a = start;
 486             b = end->prev;
 487             c = end->children;
 488             d = end->lastChild;
 489             top = end;
 490             left = parsePrecedence(p, a, b, level);
 491             if(c) right = parsePrecedence(p, c, d, 0);
 492         }
 493         break;
 494     case PREC_BINARY:
 495         t = end->prev;
 496         while(t->prev) {
 497             if(tokInLevel(t, level)) {
 498                 a = t->prev ? start : 0;
 499                 b = t->prev;
 500                 c = t->next;
 501                 d = t->next ? end : 0;
 502                 top = t;
 503                 left = parsePrecedence(p, a, b, level);
 504                 right = parsePrecedence(p, c, d, level+1);
 505                 break;
 506             }
 507             t = t->prev;
 508         }
 509         break;
 510     case PREC_REVERSE:
 511         t = start->next;
 512         while(t->next) {
 513             if(tokInLevel(t, level)) {
 514                 a = t->prev ? start : 0;
 515                 b = t->prev;
 516                 c = t->next;
 517                 d = t->next ? end : 0;
 518                 top = t;
 519                 left = parsePrecedence(p, a, b, level+1);
 520                 right = parsePrecedence(p, c, d, level);
 521                 break;
 522             }
 523             t = t->next;
 524         }
 525         break;
 526     }
 527
 528     // Found nothing, try the next level
 529     if(!top)
 530         return parsePrecedence(p, start, end, level+1);
 531
 532     top->rule = rule;
 533
 534     if(left) {
 535         left->next = right;
 536         left->prev = 0;
 537         left->parent = top;
 538     }
 539     top->children = left;
 540
 541     if(right) {
 542         right->next = 0;
 543         right->prev = left;
 544         right->parent = top;
 545     }
 546     top->lastChild = right;
 547
 548     top->next = top->prev = 0;
 549     return top;
 550 }
 551
 552 static void precChildren(struct Parser* p, struct Token* t)
 553 {
 554     struct Token* top = parsePrecedence(p, t->children, t->lastChild, 0);
 555     t->children = top;
 556     t->lastChild = top;
 557 }
 558
 559 // Run a "block structure" node (if/elsif/else/for/while/foreach)
 560 // through the precedence parser.  The funny child structure makes
 561 // this a little more complicated than it should be.
 562 static void precBlock(struct Parser* p, struct Token* block)
 563 {
 564     struct Token* t = block->children;
 565     while(t) {
 566         if(isBrace(t->type))
 567             precChildren(p, t);
 568         else if(isBlock(t->type))
 569             precBlock(p, t);
 570         t = t->next;
 571     }
 572 }
 573
 574 naRef naParseCode(struct Context* c, naRef srcFile, int firstLine,
 575                   char* buf, int len, int* errLine)
 576 {
 577     naRef codeObj;
 578     struct Token* t;
 579     struct Parser p;
 580
 581     // Protect from garbage collection
 582     naTempSave(c, srcFile);
 583
 584     // Catch parser errors here.
 585     *errLine = 0;
 586     if(setjmp(p.jumpHandle)) {
 587         strncpy(c->error, p.err, sizeof(c->error));
 588         *errLine = p.errLine;
 589         return naNil();
 590     }
 591
 592     naParseInit(&p);
 593     p.context = c;
 594     p.srcFile = srcFile;
 595     p.firstLine = firstLine;
 596     p.buf = buf;
 597     p.len = len;
 598
 599     // Lexify, match brace structure, fixup if/for/etc...
 600     naLex(&p);
 601     braceMatch(&p, p.tree.children);
 602     fixBlockStructure(&p, p.tree.children);
 603
 604     // Recursively run the precedence parser, and fixup the treetop
 605     t = parsePrecedence(&p, p.tree.children, p.tree.lastChild, 0);
 606     t->prev = t->next = 0;
 607     p.tree.children = t;
 608     p.tree.lastChild = t;
 609
 610     // Generate code!
 611     codeObj = naCodeGen(&p, &(p.tree), 0);
 612
 613     // Clean up our mess
 614     naParseDestroy(&p);
 615     naTempSave(c, codeObj);
 616
 617     return codeObj;
 618 }