simgear/nasal/parse.c

   1 #include <setjmp.h>
   2
   3 #include "parse.h"
   4
   5 // Static precedence table, from low (loose binding, do first) to high
   6 // (tight binding, do last).
   7 enum { PREC_BINARY, PREC_REVERSE, PREC_PREFIX, PREC_SUFFIX };
   8
   9 #define MAX_PREC_TOKS 5
  10 struct precedence {
  11     int toks[MAX_PREC_TOKS];
  12     int rule;
  13 } PRECEDENCE[] = {
  14     { { TOK_SEMI, TOK_COMMA },                 PREC_REVERSE },
  15     { { TOK_COLON },                           PREC_BINARY },
  16     { { TOK_RETURN, TOK_BREAK, TOK_CONTINUE }, PREC_PREFIX  },
  17     { { TOK_ASSIGN },                          PREC_REVERSE },
  18     { { TOK_OR },                              PREC_BINARY  },
  19     { { TOK_AND },                             PREC_BINARY  },
  20     { { TOK_EQ, TOK_NEQ },                     PREC_BINARY  },
  21     { { TOK_LT, TOK_LTE, TOK_GT, TOK_GTE },    PREC_BINARY  },
  22     { { TOK_PLUS, TOK_MINUS, TOK_CAT },        PREC_REVERSE  },
  23     { { TOK_MUL, TOK_DIV },                    PREC_BINARY  },
  24     { { TOK_MINUS, TOK_NEG, TOK_NOT },         PREC_PREFIX  },
  25     { { TOK_LPAR, TOK_LBRA },                  PREC_SUFFIX  },
  26     { { TOK_DOT },                             PREC_BINARY  },
  27 };
  28 #define PRECEDENCE_LEVELS (sizeof(PRECEDENCE)/sizeof(struct precedence))
  29
  30 void naParseError(struct Parser* p, char* msg, int line)
  31 {
  32     p->err = msg;
  33     p->errLine = line;
  34     longjmp(p->jumpHandle, 1);
  35 }
  36
  37 // A "generic" (too obfuscated to describe) parser error
  38 static void oops(struct Parser* p, struct Token* t)
  39 {
  40     naParseError(p, "parse error", t->line);
  41 }
  42
  43 void naParseInit(struct Parser* p)
  44 {
  45     p->buf = 0;
  46     p->len = 0;
  47     p->lines = 0;
  48     p->nLines = 0;
  49     p->chunks = 0;
  50     p->chunkSizes = 0;
  51     p->nChunks = 0;
  52     p->leftInChunk = 0;
  53     p->cg = 0;
  54
  55     p->tree.type = TOK_TOP;
  56     p->tree.line = 1;
  57     p->tree.str = 0;
  58     p->tree.strlen = 0;
  59     p->tree.num = 0;
  60     p->tree.next = 0;
  61     p->tree.prev = 0;
  62     p->tree.children = 0;
  63     p->tree.lastChild = 0;
  64 }
  65
  66 void naParseDestroy(struct Parser* p)
  67 {
  68     int i;
  69     for(i=0; i<p->nChunks; i++) naFree(p->chunks[i]);
  70     naFree(p->chunks);
  71     naFree(p->chunkSizes);
  72     p->buf = 0;
  73 }
  74
  75 void* naParseAlloc(struct Parser* p, int bytes)
  76 {
  77     char* result;
  78
  79     // Round up to 8 byte chunks for alignment
  80     if(bytes & 0x7) bytes = ((bytes>>3) + 1) << 3;
  81
  82     // Need a new chunk?
  83     if(p->leftInChunk < bytes) {
  84         void* newChunk;
  85         void** newChunks;
  86         int* newChunkSizes;
  87         int sz, i;
  88
  89         sz = p->len;
  90         if(sz < bytes) sz = bytes;
  91         newChunk = naAlloc(sz);
  92
  93         p->nChunks++;
  94
  95         newChunks = naAlloc(p->nChunks * sizeof(void*));
  96         for(i=1; i<p->nChunks; i++) newChunks[i] = p->chunks[i-1];
  97         newChunks[0] = newChunk;
  98         naFree(p->chunks);
  99         p->chunks = newChunks;
 100
 101         newChunkSizes = naAlloc(p->nChunks * sizeof(int));
 102         for(i=1; i<p->nChunks; i++) newChunkSizes[i] = p->chunkSizes[i-1];
 103         newChunkSizes[0] = sz;
 104         naFree(p->chunkSizes);
 105         p->chunkSizes = newChunkSizes;
 106
 107         p->leftInChunk = sz;
 108     }
 109
 110     result = (char *)p->chunks[0] + p->chunkSizes[0] - p->leftInChunk;
 111     p->leftInChunk -= bytes;
 112     return (void*)result;
 113 }
 114
 115 // Remove the child from the list where it exists, and insert it at
 116 // the end of the parents child list.
 117 static void addNewChild(struct Token* p, struct Token* c)
 118 {
 119     if(c->prev) c->prev->next = c->next;
 120     if(c->next) c->next->prev = c->prev;
 121     if(c == c->parent->children)
 122         c->parent->children = c->next;
 123     if(c == c->parent->lastChild)
 124         c->parent->lastChild = c->prev;
 125     c->parent = p;
 126     c->next = 0;
 127     c->prev = p->lastChild;
 128     if(p->lastChild) p->lastChild->next = c;
 129     if(!p->children) p->children = c;
 130     p->lastChild = c;
 131 }
 132
 133 // Follows the token list from start (which must be a left brace of
 134 // some type), placing all tokens found into start's child list until
 135 // it reaches the matching close brace.
 136 static void collectBrace(struct Parser* p, struct Token* start)
 137 {
 138     struct Token* t;
 139     int closer = -1;
 140     if(start->type == TOK_LPAR)  closer = TOK_RPAR;
 141     if(start->type == TOK_LBRA)  closer = TOK_RBRA;
 142     if(start->type == TOK_LCURL) closer = TOK_RCURL;
 143
 144     t = start->next;
 145     while(t) {
 146         struct Token* next;
 147         switch(t->type) {
 148         case TOK_LPAR: case TOK_LBRA: case TOK_LCURL:
 149             collectBrace(p, t);
 150             break;
 151         case TOK_RPAR: case TOK_RBRA: case TOK_RCURL:
 152             if(t->type != closer)
 153                 naParseError(p, "mismatched closing brace", t->line);
 154
 155             // Drop this node on the floor, stitch up the list and return
 156             if(start->parent->lastChild == t)
 157                 start->parent->lastChild = t->prev;
 158             start->next = t->next;
 159             if(t->next) t->next->prev = start;
 160             return;
 161         }
 162         // Snip t out of the existing list, and append it to start's
 163         // children.
 164         next = t->next;
 165         addNewChild(start, t);
 166         t = next;
 167     }
 168     naParseError(p, "unterminated brace", start->line);
 169 }
 170
 171 // Recursively find the contents of all matching brace pairs in the
 172 // token list and turn them into children of the left token.  The
 173 // right token disappears.
 174 static void braceMatch(struct Parser* p, struct Token* start)
 175 {
 176     struct Token* t = start;
 177     while(t) {
 178         switch(t->type) {
 179         case TOK_LPAR: case TOK_LBRA: case TOK_LCURL:
 180             collectBrace(p, t);
 181             break;
 182         case TOK_RPAR: case TOK_RBRA: case TOK_RCURL:
 183             if(start->type != TOK_LBRA)
 184                 naParseError(p, "stray closing brace", t->line);
 185             break;
 186         }
 187         t = t->next;
 188     }
 189 }
 190
 191 // Allocate and return an "empty" token as a parsing placeholder.
 192 static struct Token* emptyToken(struct Parser* p)
 193 {
 194     struct Token* t = naParseAlloc(p, sizeof(struct Token));
 195     t->type = TOK_EMPTY;
 196     t->line = -1;
 197     t->strlen = 0;
 198     t->num = 0;
 199     t->str = 0;
 200     t->next = t->prev = t->children = t->lastChild = 0;
 201     t->parent = 0;
 202     return t;
 203 }
 204
 205 // Fixes up parenting for obvious parsing situations, like code blocks
 206 // being the child of a func keyword, etc...
 207 static void fixBlockStructure(struct Parser* p, struct Token* start)
 208 {
 209     struct Token *t, *c;
 210     t = start;
 211     while(t) {
 212         switch(t->type) {
 213         case TOK_ELSE: case TOK_FUNC:
 214             // These guys precede a single curly block
 215             if(!t->next || t->next->type != TOK_LCURL) oops(p, t);
 216             c = t->next;
 217             addNewChild(t, c);
 218             fixBlockStructure(p, c);
 219             break;
 220         case TOK_FOR: case TOK_FOREACH: case TOK_WHILE:
 221         case TOK_IF: case TOK_ELSIF:
 222             // Expect a paren and then a curly
 223             if(!t->next || t->next->type != TOK_LPAR) oops(p, t);
 224             c = t->next;
 225             addNewChild(t, c);
 226             fixBlockStructure(p, c);
 227
 228             if(!t->next || t->next->type != TOK_LCURL) oops(p, t);
 229             c = t->next;
 230             addNewChild(t, c);
 231             fixBlockStructure(p, c);
 232             break;
 233         case TOK_LPAR: case TOK_LBRA: case TOK_LCURL:
 234             fixBlockStructure(p, t->children);
 235             break;
 236         }
 237         t = t->next;
 238     }
 239
 240     // Another pass to hook up the elsif/else chains.
 241     t = start;
 242     while(t) {
 243         if(t->type == TOK_IF) {
 244             while(t->next && t->next->type == TOK_ELSIF)
 245                 addNewChild(t, t->next);
 246             if(t->next && t->next->type == TOK_ELSE)
 247                 addNewChild(t, t->next);
 248         }
 249         t = t->next;
 250     }
 251
 252     // And a final one to add semicolons.  Always add one after
 253     // for/foreach/while expressions.  Add one after a function lambda
 254     // if it immediately follows an assignment, and add one after an
 255     // if/elsif/else if it is the first token in an expression list
 256     // (i.e has no previous token, or is preceded by a ';' or '{').
 257     // This mimicks common usage and avoids a conspicuous difference
 258     // between this grammar and more common languages.  It can be
 259     // "escaped" with extra parenthesis if necessary, e.g.:
 260     //      a = (func { join(" ", arg) })(1, 2, 3, 4);
 261     t = start;
 262     while(t) {
 263         int addSemi = 0;
 264         switch(t->type) {
 265         case TOK_IF:
 266             if(!t->prev
 267                || t->prev->type == TOK_SEMI
 268                || t->prev->type == TOK_LCURL)
 269                 addSemi = 1;
 270             break;
 271         case TOK_FOR: case TOK_FOREACH: case TOK_WHILE:
 272             addSemi = 1;
 273             break;
 274         case TOK_FUNC:
 275             if(t->prev && t->prev->type == TOK_ASSIGN)
 276                 addSemi = 1;
 277             break;
 278         }
 279         if(addSemi) {
 280             struct Token* semi = emptyToken(p);
 281             semi->type = TOK_SEMI;
 282             semi->line = t->line;
 283             semi->next = t->next;
 284             semi->prev = t;
 285             semi->parent = t->parent;
 286             if(semi->next) semi->next->prev = semi;
 287             t->next = semi;
 288             t = semi; // don't bother checking the new one
 289         }
 290         t = t->next;
 291     }
 292
 293 }
 294
 295 // True if the token's type exists in the precedence level.
 296 static int tokInLevel(struct Token* tok, int level)
 297 {
 298     int i;
 299     for(i=0; i<MAX_PREC_TOKS; i++)
 300         if(PRECEDENCE[level].toks[i] == tok->type)
 301             return 1;
 302     return 0;
 303 }
 304
 305 static int isBrace(int type)
 306 {
 307     return type == TOK_LPAR || type == TOK_LBRA || type == TOK_LCURL;
 308 }
 309
 310 static int isBlock(int t)
 311 {
 312     return t == TOK_IF  || t == TOK_ELSIF   || t == TOK_ELSE
 313         || t == TOK_FOR || t == TOK_FOREACH || t == TOK_WHILE
 314         || t == TOK_FUNC;
 315 }
 316
 317 static void precChildren(struct Parser* p, struct Token* t);
 318 static void precBlock(struct Parser* p, struct Token* t);
 319
 320 static struct Token* parsePrecedence(struct Parser* p,
 321                                      struct Token* start, struct Token* end,
 322                                      int level)
 323 {
 324     int rule;
 325     struct Token *t, *top, *left, *right;
 326     struct Token *a, *b, *c, *d; // temporaries
 327
 328     // This is an error.  No "siblings" are allowed at the bottom level.
 329     if(level >= PRECEDENCE_LEVELS && start != end)
 330         oops(p, start);
 331
 332     // Synthesize an empty token if necessary
 333     if(end == 0 && start == 0)
 334         return emptyToken(p);
 335
 336     // Sanify the list.  This is OK, since we're recursing into the
 337     // list structure; stuff to the left and right has already been
 338     // handled somewhere above.
 339     if(end == 0) end = start;
 340     if(start == 0) start = end;
 341     if(start->prev) start->prev->next = 0;
 342     if(end->next) end->next->prev = 0;
 343     start->prev = end->next = 0;
 344
 345     // Single tokens parse as themselves.  Recurse into braces, and
 346     // parse children of block structure.
 347     if(start == end) {
 348         if(isBrace(start->type)) {
 349             precChildren(p, start);
 350         } else if(isBlock(start->type)) {
 351             precBlock(p, start);
 352         }
 353         return start;
 354     }
 355
 356     // A context-sensitivity: we want to parse ';' and ',' as binary
 357     // operators, but want them to be legal at the beginning and end
 358     // of a list (unlike, say, '+' where we want a parse error).
 359     // Generate empties as necessary.
 360     if(start->type == TOK_SEMI || start->type == TOK_COMMA) {
 361         t = emptyToken(p);
 362         start->prev = t;
 363         t->next = start;
 364         start = t;
 365     }
 366     if(end->type == TOK_SEMI || end->type == TOK_COMMA) {
 367         t = emptyToken(p);
 368         end->next = t;
 369         t->prev = end;
 370         end = t;
 371     }
 372
 373     // Another one: the "." and (postfix) "[]/()" operators should
 374     // really be the same precendence level, but the existing
 375     // implementation doesn't allow for it.  Bump us up a level if we
 376     // are parsing for DOT but find a LPAR/LBRA at the end of the
 377     // list.
 378     if(PRECEDENCE[level].toks[0] == TOK_DOT)
 379         if(end->type == TOK_LPAR || end->type == TOK_LBRA)
 380             level--;
 381
 382     top = left = right = 0;
 383     rule = PRECEDENCE[level].rule;
 384     switch(rule) {
 385     case PREC_PREFIX:
 386         if(tokInLevel(start, level) && start->next) {
 387             a = start->children;
 388             b = start->lastChild;
 389             c = start->next;
 390             d = end;
 391             top = start;
 392             if(a) left = parsePrecedence(p, a, b, 0);
 393             right = parsePrecedence(p, c, d, level);
 394         }
 395         break;
 396     case PREC_SUFFIX:
 397         if(tokInLevel(end, level) && end->prev) {
 398             a = start;
 399             b = end->prev;
 400             c = end->children;
 401             d = end->lastChild;
 402             top = end;
 403             left = parsePrecedence(p, a, b, level);
 404             if(c) right = parsePrecedence(p, c, d, 0);
 405         }
 406         break;
 407     case PREC_BINARY:
 408         t = end->prev;
 409         while(t->prev) {
 410             if(tokInLevel(t, level)) {
 411                 a = t->prev ? start : 0;
 412                 b = t->prev;
 413                 c = t->next;
 414                 d = t->next ? end : 0;
 415                 top = t;
 416                 left = parsePrecedence(p, a, b, level);
 417                 right = parsePrecedence(p, c, d, level+1);
 418                 break;
 419             }
 420             t = t->prev;
 421         }
 422         break;
 423     case PREC_REVERSE:
 424         t = start->next;
 425         while(t->next) {
 426             if(tokInLevel(t, level)) {
 427                 a = t->prev ? start : 0;
 428                 b = t->prev;
 429                 c = t->next;
 430                 d = t->next ? end : 0;
 431                 top = t;
 432                 left = parsePrecedence(p, a, b, level+1);
 433                 right = parsePrecedence(p, c, d, level);
 434                 break;
 435             }
 436             t = t->next;
 437         }
 438         break;
 439     }
 440
 441     // Found nothing, try the next level
 442     if(!top)
 443         return parsePrecedence(p, start, end, level+1);
 444
 445     if(left) {
 446         left->next = right;
 447         left->prev = 0;
 448         left->parent = top;
 449     }
 450     top->children = left;
 451
 452     if(right) {
 453         right->next = 0;
 454         right->prev = left;
 455         right->parent = top;
 456     }
 457     top->lastChild = right;
 458
 459     top->next = top->prev = 0;
 460     return top;
 461 }
 462
 463 static void precChildren(struct Parser* p, struct Token* t)
 464 {
 465     struct Token* top = parsePrecedence(p, t->children, t->lastChild, 0);
 466     t->children = top;
 467     t->lastChild = top;
 468 }
 469
 470 // Run a "block structure" node (if/elsif/else/for/while/foreach)
 471 // through the precedence parser.  The funny child structure makes
 472 // this a little more complicated than it should be.
 473 static void precBlock(struct Parser* p, struct Token* block)
 474 {
 475     struct Token* t = block->children;
 476     while(t) {
 477         if(isBrace(t->type))
 478             precChildren(p, t);
 479         else if(isBlock(t->type))
 480             precBlock(p, t);
 481         t = t->next;
 482     }
 483 }
 484
 485 naRef naParseCode(struct Context* c, naRef srcFile, int firstLine,
 486                   char* buf, int len, int* errLine)
 487 {
 488     naRef codeObj;
 489     struct Token* t;
 490     struct Parser p;
 491
 492     // Protect from garbage collection
 493     naVec_append(c->temps, srcFile);
 494
 495     // Catch parser errors here.
 496     *errLine = 0;
 497     if(setjmp(p.jumpHandle)) {
 498         c->error = p.err;
 499         *errLine = p.errLine;
 500         return naNil();
 501     }
 502
 503     naParseInit(&p);
 504     p.context = c;
 505     p.srcFile = srcFile;
 506     p.firstLine = firstLine;
 507     p.buf = buf;
 508     p.len = len;
 509
 510     // Lexify, match brace structure, fixup if/for/etc...
 511     naLex(&p);
 512     braceMatch(&p, p.tree.children);
 513     fixBlockStructure(&p, p.tree.children);
 514
 515     // Recursively run the precedence parser, and fixup the treetop
 516     t = parsePrecedence(&p, p.tree.children, p.tree.lastChild, 0);
 517     t->prev = t->next = 0;
 518     p.tree.children = t;
 519     p.tree.lastChild = t;
 520
 521     // Generate code!
 522     codeObj = naCodeGen(&p, &(p.tree));
 523
 524     // Clean up our mess
 525     naParseDestroy(&p);
 526     naVec_append(c->temps, codeObj);
 527
 528     return codeObj;
 529 }
 530
 531