simgear/nasal/parse.c

   1 #include <setjmp.h>
   2
   3 #include "parse.h"
   4
   5 // Static precedence table, from low (loose binding, do first) to high
   6 // (tight binding, do last).
   7 enum { PREC_BINARY, PREC_REVERSE, PREC_PREFIX, PREC_SUFFIX };
   8
   9 #define MAX_PREC_TOKS 6
  10 struct precedence {
  11     int toks[MAX_PREC_TOKS];
  12     int rule;
  13 } PRECEDENCE[] = {
  14     { { TOK_SEMI, TOK_COMMA },                 PREC_REVERSE },
  15     { { TOK_ELLIPSIS },                        PREC_SUFFIX  },
  16     { { TOK_RETURN, TOK_BREAK, TOK_CONTINUE }, PREC_PREFIX  },
  17     { { TOK_ASSIGN, TOK_PLUSEQ, TOK_MINUSEQ,
  18         TOK_MULEQ, TOK_DIVEQ, TOK_CATEQ     }, PREC_REVERSE },
  19     { { TOK_COLON, TOK_QUESTION },             PREC_REVERSE },
  20     { { TOK_VAR },                             PREC_PREFIX  },
  21     { { TOK_OR },                              PREC_BINARY  },
  22     { { TOK_AND },                             PREC_BINARY  },
  23     { { TOK_EQ, TOK_NEQ },                     PREC_BINARY  },
  24     { { TOK_LT, TOK_LTE, TOK_GT, TOK_GTE },    PREC_BINARY  },
  25     { { TOK_PLUS, TOK_MINUS, TOK_CAT },        PREC_BINARY  },
  26     { { TOK_MUL, TOK_DIV },                    PREC_BINARY  },
  27     { { TOK_MINUS, TOK_NEG, TOK_NOT },         PREC_PREFIX  },
  28     { { TOK_LPAR, TOK_LBRA },                  PREC_SUFFIX  },
  29     { { TOK_DOT },                             PREC_BINARY  },
  30 };
  31 #define PRECEDENCE_LEVELS (sizeof(PRECEDENCE)/sizeof(struct precedence))
  32
  33 void naParseError(struct Parser* p, char* msg, int line)
  34 {
  35     p->err = msg;
  36     p->errLine = line;
  37     longjmp(p->jumpHandle, 1);
  38 }
  39
  40 // A "generic" (too obfuscated to describe) parser error
  41 static void oops(struct Parser* p, struct Token* t)
  42 {
  43     naParseError(p, "parse error", t->line);
  44 }
  45
  46 void naParseInit(struct Parser* p)
  47 {
  48     p->buf = 0;
  49     p->len = 0;
  50     p->lines = 0;
  51     p->nLines = 0;
  52     p->chunks = 0;
  53     p->chunkSizes = 0;
  54     p->nChunks = 0;
  55     p->leftInChunk = 0;
  56     p->cg = 0;
  57
  58     p->tree.type = TOK_TOP;
  59     p->tree.line = 1;
  60     p->tree.str = 0;
  61     p->tree.strlen = 0;
  62     p->tree.num = 0;
  63     p->tree.next = 0;
  64     p->tree.prev = 0;
  65     p->tree.children = 0;
  66     p->tree.lastChild = 0;
  67 }
  68
  69 void naParseDestroy(struct Parser* p)
  70 {
  71     int i;
  72     for(i=0; i<p->nChunks; i++) naFree(p->chunks[i]);
  73     naFree(p->chunks);
  74     naFree(p->chunkSizes);
  75     p->buf = 0;
  76 }
  77
  78 void* naParseAlloc(struct Parser* p, int bytes)
  79 {
  80     char* result;
  81
  82     // Round up to 8 byte chunks for alignment
  83     if(bytes & 0x7) bytes = ((bytes>>3) + 1) << 3;
  84
  85     // Need a new chunk?
  86     if(p->leftInChunk < bytes) {
  87         void* newChunk;
  88         void** newChunks;
  89         int* newChunkSizes;
  90         int sz, i;
  91
  92         sz = p->len;
  93         if(sz < bytes) sz = bytes;
  94         newChunk = naAlloc(sz);
  95
  96         p->nChunks++;
  97
  98         newChunks = naAlloc(p->nChunks * sizeof(void*));
  99         for(i=1; i<p->nChunks; i++) newChunks[i] = p->chunks[i-1];
 100         newChunks[0] = newChunk;
 101         naFree(p->chunks);
 102         p->chunks = newChunks;
 103
 104         newChunkSizes = naAlloc(p->nChunks * sizeof(int));
 105         for(i=1; i<p->nChunks; i++) newChunkSizes[i] = p->chunkSizes[i-1];
 106         newChunkSizes[0] = sz;
 107         naFree(p->chunkSizes);
 108         p->chunkSizes = newChunkSizes;
 109
 110         p->leftInChunk = sz;
 111     }
 112
 113     result = (char *)p->chunks[0] + p->chunkSizes[0] - p->leftInChunk;
 114     p->leftInChunk -= bytes;
 115     return (void*)result;
 116 }
 117
 118 // Remove the child from the list where it exists, and insert it at
 119 // the end of the parents child list.
 120 static void addNewChild(struct Token* p, struct Token* c)
 121 {
 122     if(c->prev) c->prev->next = c->next;
 123     if(c->next) c->next->prev = c->prev;
 124     if(c == c->parent->children)
 125         c->parent->children = c->next;
 126     if(c == c->parent->lastChild)
 127         c->parent->lastChild = c->prev;
 128     c->parent = p;
 129     c->next = 0;
 130     c->prev = p->lastChild;
 131     if(p->lastChild) p->lastChild->next = c;
 132     if(!p->children) p->children = c;
 133     p->lastChild = c;
 134 }
 135
 136 // Follows the token list from start (which must be a left brace of
 137 // some type), placing all tokens found into start's child list until
 138 // it reaches the matching close brace.
 139 static void collectBrace(struct Parser* p, struct Token* start)
 140 {
 141     struct Token* t;
 142     int closer = -1;
 143     if(start->type == TOK_LPAR)  closer = TOK_RPAR;
 144     if(start->type == TOK_LBRA)  closer = TOK_RBRA;
 145     if(start->type == TOK_LCURL) closer = TOK_RCURL;
 146
 147     t = start->next;
 148     while(t) {
 149         struct Token* next;
 150         switch(t->type) {
 151         case TOK_LPAR: case TOK_LBRA: case TOK_LCURL:
 152             collectBrace(p, t);
 153             break;
 154         case TOK_RPAR: case TOK_RBRA: case TOK_RCURL:
 155             if(t->type != closer)
 156                 naParseError(p, "mismatched closing brace", t->line);
 157
 158             // Drop this node on the floor, stitch up the list and return
 159             if(start->parent->lastChild == t)
 160                 start->parent->lastChild = t->prev;
 161             start->next = t->next;
 162             if(t->next) t->next->prev = start;
 163             return;
 164         }
 165         // Snip t out of the existing list, and append it to start's
 166         // children.
 167         next = t->next;
 168         addNewChild(start, t);
 169         t = next;
 170     }
 171     naParseError(p, "unterminated brace", start->line);
 172 }
 173
 174 // Recursively find the contents of all matching brace pairs in the
 175 // token list and turn them into children of the left token.  The
 176 // right token disappears.
 177 static void braceMatch(struct Parser* p, struct Token* start)
 178 {
 179     struct Token* t = start;
 180     while(t) {
 181         switch(t->type) {
 182         case TOK_LPAR: case TOK_LBRA: case TOK_LCURL:
 183             collectBrace(p, t);
 184             break;
 185         case TOK_RPAR: case TOK_RBRA: case TOK_RCURL:
 186             if(start->type != TOK_LBRA)
 187                 naParseError(p, "stray closing brace", t->line);
 188             break;
 189         }
 190         t = t->next;
 191     }
 192 }
 193
 194 // Allocate and return an "empty" token as a parsing placeholder.
 195 static struct Token* emptyToken(struct Parser* p)
 196 {
 197     struct Token* t = naParseAlloc(p, sizeof(struct Token));
 198     t->type = TOK_EMPTY;
 199     t->line = -1;
 200     t->strlen = 0;
 201     t->num = 0;
 202     t->str = 0;
 203     t->next = t->prev = t->children = t->lastChild = 0;
 204     t->parent = 0;
 205     return t;
 206 }
 207
 208 // Fixes up parenting for obvious parsing situations, like code blocks
 209 // being the child of a func keyword, etc...
 210 static void fixBlockStructure(struct Parser* p, struct Token* start)
 211 {
 212     struct Token *t, *c;
 213     t = start;
 214     while(t) {
 215         switch(t->type) {
 216         case TOK_FUNC:
 217             // Slurp an optional paren block containing an arglist, then
 218             // fall through to parse the curlies...
 219             if(t->next && t->next->type == TOK_LPAR) {
 220                 c = t->next;
 221                 addNewChild(t, c);
 222                 fixBlockStructure(p, c);
 223             }
 224         case TOK_ELSE: // and TOK_FUNC!
 225             // These guys precede a single curly block
 226             if(!t->next || t->next->type != TOK_LCURL) oops(p, t);
 227             c = t->next;
 228             addNewChild(t, c);
 229             fixBlockStructure(p, c);
 230             break;
 231         case TOK_FOR: case TOK_FOREACH: case TOK_FORINDEX: case TOK_WHILE:
 232         case TOK_IF: case TOK_ELSIF:
 233             // Expect a paren and then a curly
 234             if(!t->next || t->next->type != TOK_LPAR) oops(p, t);
 235             c = t->next;
 236             addNewChild(t, c);
 237             fixBlockStructure(p, c);
 238
 239             if(!t->next || t->next->type != TOK_LCURL) oops(p, t);
 240             c = t->next;
 241             addNewChild(t, c);
 242             fixBlockStructure(p, c);
 243             break;
 244         case TOK_LPAR: case TOK_LBRA: case TOK_LCURL:
 245             fixBlockStructure(p, t->children);
 246             break;
 247         }
 248         t = t->next;
 249     }
 250
 251     // Another pass to hook up the elsif/else chains.
 252     t = start;
 253     while(t) {
 254         if(t->type == TOK_IF) {
 255             while(t->next && t->next->type == TOK_ELSIF)
 256                 addNewChild(t, t->next);
 257             if(t->next && t->next->type == TOK_ELSE)
 258                 addNewChild(t, t->next);
 259         }
 260         t = t->next;
 261     }
 262
 263     // And a final one to add semicolons.  Always add one after
 264     // for/foreach/while expressions.  Add one after a function lambda
 265     // if it immediately follows an assignment, and add one after an
 266     // if/elsif/else if it is the first token in an expression list
 267     // (i.e has no previous token, or is preceded by a ';' or '{').
 268     // This mimicks common usage and avoids a conspicuous difference
 269     // between this grammar and more common languages.  It can be
 270     // "escaped" with extra parenthesis if necessary, e.g.:
 271     //      a = (func { join(" ", arg) })(1, 2, 3, 4);
 272     t = start;
 273     while(t) {
 274         int addSemi = 0;
 275         switch(t->type) {
 276         case TOK_IF:
 277             if(!t->prev
 278                || t->prev->type == TOK_SEMI
 279                || t->prev->type == TOK_LCURL)
 280                 addSemi = 1;
 281             break;
 282         case TOK_FOR: case TOK_FOREACH: case TOK_FORINDEX: case TOK_WHILE:
 283             addSemi = 1;
 284             break;
 285         case TOK_FUNC:
 286             if(t->prev && t->prev->type == TOK_ASSIGN)
 287                 addSemi = 1;
 288             break;
 289         }
 290         if(t->next && t->next->type == TOK_SEMI)
 291             addSemi = 0; // don't bother if it's already there!
 292         if(addSemi) {
 293             struct Token* semi = emptyToken(p);
 294             semi->type = TOK_SEMI;
 295             semi->line = t->line;
 296             semi->next = t->next;
 297             semi->prev = t;
 298             semi->parent = t->parent;
 299             if(semi->next) semi->next->prev = semi;
 300             t->next = semi;
 301             t = semi; // don't bother checking the new one
 302         }
 303         t = t->next;
 304     }
 305
 306 }
 307
 308 // True if the token's type exists in the precedence level.
 309 static int tokInLevel(struct Token* tok, int level)
 310 {
 311     int i;
 312     for(i=0; i<MAX_PREC_TOKS; i++)
 313         if(PRECEDENCE[level].toks[i] == tok->type)
 314             return 1;
 315     return 0;
 316 }
 317
 318 static int isBrace(int type)
 319 {
 320     return type == TOK_LPAR || type == TOK_LBRA || type == TOK_LCURL;
 321 }
 322
 323 static int isBlock(int t)
 324 {
 325     return t == TOK_IF  || t == TOK_ELSIF   || t == TOK_ELSE
 326         || t == TOK_FOR || t == TOK_FOREACH || t == TOK_WHILE
 327         || t == TOK_FUNC || t == TOK_FORINDEX;
 328 }
 329
 330 static void precChildren(struct Parser* p, struct Token* t);
 331 static void precBlock(struct Parser* p, struct Token* t);
 332
 333 static struct Token* parsePrecedence(struct Parser* p,
 334                                      struct Token* start, struct Token* end,
 335                                      int level)
 336 {
 337     int rule;
 338     struct Token *t, *top, *left, *right;
 339     struct Token *a, *b, *c, *d; // temporaries
 340
 341     // This is an error.  No "siblings" are allowed at the bottom level.
 342     if(level >= PRECEDENCE_LEVELS && start != end)
 343         oops(p, start);
 344
 345     // Synthesize an empty token if necessary
 346     if(end == 0 && start == 0)
 347         return emptyToken(p);
 348
 349     // Sanify the list.  This is OK, since we're recursing into the
 350     // list structure; stuff to the left and right has already been
 351     // handled somewhere above.
 352     if(end == 0) end = start;
 353     if(start == 0) start = end;
 354     if(start->prev) start->prev->next = 0;
 355     if(end->next) end->next->prev = 0;
 356     start->prev = end->next = 0;
 357
 358     // Single tokens parse as themselves.  Recurse into braces, and
 359     // parse children of block structure.
 360     if(start == end) {
 361         if(isBrace(start->type)) {
 362             precChildren(p, start);
 363         } else if(isBlock(start->type)) {
 364             precBlock(p, start);
 365         }
 366         return start;
 367     }
 368
 369     // A context-sensitivity: we want to parse ';' and ',' as binary
 370     // operators, but want them to be legal at the beginning and end
 371     // of a list (unlike, say, '+' where we want a parse error).
 372     // Generate empties as necessary.
 373     if(start->type == TOK_SEMI || start->type == TOK_COMMA) {
 374         t = emptyToken(p);
 375         start->prev = t;
 376         t->next = start;
 377         start = t;
 378     }
 379     if(end->type == TOK_SEMI || end->type == TOK_COMMA) {
 380         t = emptyToken(p);
 381         end->next = t;
 382         t->prev = end;
 383         end = t;
 384     }
 385
 386     // Another one: the "." and (postfix) "[]/()" operators should
 387     // really be the same precendence level, but the existing
 388     // implementation doesn't allow for it.  Bump us up a level if we
 389     // are parsing for DOT but find a LPAR/LBRA at the end of the
 390     // list.
 391     if(PRECEDENCE[level].toks[0] == TOK_DOT)
 392         if(end->type == TOK_LPAR || end->type == TOK_LBRA)
 393             level--;
 394
 395     top = left = right = 0;
 396     rule = PRECEDENCE[level].rule;
 397     switch(rule) {
 398     case PREC_PREFIX:
 399         if(tokInLevel(start, level) && start->next) {
 400             a = start->children;
 401             b = start->lastChild;
 402             c = start->next;
 403             d = end;
 404             top = start;
 405             if(a) left = parsePrecedence(p, a, b, 0);
 406             right = parsePrecedence(p, c, d, level);
 407         }
 408         break;
 409     case PREC_SUFFIX:
 410         if(tokInLevel(end, level) && end->prev) {
 411             a = start;
 412             b = end->prev;
 413             c = end->children;
 414             d = end->lastChild;
 415             top = end;
 416             left = parsePrecedence(p, a, b, level);
 417             if(c) right = parsePrecedence(p, c, d, 0);
 418         }
 419         break;
 420     case PREC_BINARY:
 421         t = end->prev;
 422         while(t->prev) {
 423             if(tokInLevel(t, level)) {
 424                 a = t->prev ? start : 0;
 425                 b = t->prev;
 426                 c = t->next;
 427                 d = t->next ? end : 0;
 428                 top = t;
 429                 left = parsePrecedence(p, a, b, level);
 430                 right = parsePrecedence(p, c, d, level+1);
 431                 break;
 432             }
 433             t = t->prev;
 434         }
 435         break;
 436     case PREC_REVERSE:
 437         t = start->next;
 438         while(t->next) {
 439             if(tokInLevel(t, level)) {
 440                 a = t->prev ? start : 0;
 441                 b = t->prev;
 442                 c = t->next;
 443                 d = t->next ? end : 0;
 444                 top = t;
 445                 left = parsePrecedence(p, a, b, level+1);
 446                 right = parsePrecedence(p, c, d, level);
 447                 break;
 448             }
 449             t = t->next;
 450         }
 451         break;
 452     }
 453
 454     // Found nothing, try the next level
 455     if(!top)
 456         return parsePrecedence(p, start, end, level+1);
 457
 458     if(left) {
 459         left->next = right;
 460         left->prev = 0;
 461         left->parent = top;
 462     }
 463     top->children = left;
 464
 465     if(right) {
 466         right->next = 0;
 467         right->prev = left;
 468         right->parent = top;
 469     }
 470     top->lastChild = right;
 471
 472     top->next = top->prev = 0;
 473     return top;
 474 }
 475
 476 static void precChildren(struct Parser* p, struct Token* t)
 477 {
 478     struct Token* top = parsePrecedence(p, t->children, t->lastChild, 0);
 479     t->children = top;
 480     t->lastChild = top;
 481 }
 482
 483 // Run a "block structure" node (if/elsif/else/for/while/foreach)
 484 // through the precedence parser.  The funny child structure makes
 485 // this a little more complicated than it should be.
 486 static void precBlock(struct Parser* p, struct Token* block)
 487 {
 488     struct Token* t = block->children;
 489     while(t) {
 490         if(isBrace(t->type))
 491             precChildren(p, t);
 492         else if(isBlock(t->type))
 493             precBlock(p, t);
 494         t = t->next;
 495     }
 496 }
 497
 498 naRef naParseCode(struct Context* c, naRef srcFile, int firstLine,
 499                   char* buf, int len, int* errLine)
 500 {
 501     naRef codeObj;
 502     struct Token* t;
 503     struct Parser p;
 504
 505     // Protect from garbage collection
 506     naTempSave(c, srcFile);
 507
 508     // Catch parser errors here.
 509     *errLine = 0;
 510     if(setjmp(p.jumpHandle)) {
 511         c->error = p.err;
 512         *errLine = p.errLine;
 513         return naNil();
 514     }
 515
 516     naParseInit(&p);
 517     p.context = c;
 518     p.srcFile = srcFile;
 519     p.firstLine = firstLine;
 520     p.buf = buf;
 521     p.len = len;
 522
 523     // Lexify, match brace structure, fixup if/for/etc...
 524     naLex(&p);
 525     braceMatch(&p, p.tree.children);
 526     fixBlockStructure(&p, p.tree.children);
 527
 528     // Recursively run the precedence parser, and fixup the treetop
 529     t = parsePrecedence(&p, p.tree.children, p.tree.lastChild, 0);
 530     t->prev = t->next = 0;
 531     p.tree.children = t;
 532     p.tree.lastChild = t;
 533
 534     // Generate code!
 535     codeObj = naCodeGen(&p, &(p.tree), 0);
 536
 537     // Clean up our mess
 538     naParseDestroy(&p);
 539     naTempSave(c, codeObj);
 540
 541     return codeObj;
 542 }
 543
 544