simgear/nasal/parse.c

   1 #include <setjmp.h>
   2 #include <string.h>
   3
   4 #include "parse.h"
   5
   6 // Static precedence table, from low (loose binding, do first) to high
   7 // (tight binding, do last).
   8 #define MAX_PREC_TOKS 6
   9 static const struct precedence {
  10     int toks[MAX_PREC_TOKS];
  11     int rule;
  12 } PRECEDENCE[] = {
  13     { { TOK_SEMI, TOK_COMMA },                 PREC_REVERSE },
  14     { { TOK_ELLIPSIS },                        PREC_SUFFIX  },
  15     { { TOK_RETURN, TOK_BREAK, TOK_CONTINUE }, PREC_PREFIX  },
  16     { { TOK_ASSIGN, TOK_PLUSEQ, TOK_MINUSEQ,
  17         TOK_MULEQ, TOK_DIVEQ, TOK_CATEQ     }, PREC_REVERSE },
  18     { { TOK_COLON, TOK_QUESTION },             PREC_REVERSE },
  19     { { TOK_VAR },                             PREC_PREFIX  },
  20     { { TOK_OR },                              PREC_BINARY  },
  21     { { TOK_AND },                             PREC_BINARY  },
  22     { { TOK_EQ, TOK_NEQ },                     PREC_BINARY  },
  23     { { TOK_LT, TOK_LTE, TOK_GT, TOK_GTE },    PREC_BINARY  },
  24     { { TOK_PLUS, TOK_MINUS, TOK_CAT },        PREC_BINARY  },
  25     { { TOK_MUL, TOK_DIV },                    PREC_BINARY  },
  26     { { TOK_MINUS, TOK_NEG, TOK_NOT },         PREC_PREFIX  },
  27     { { TOK_LPAR, TOK_LBRA },                  PREC_SUFFIX  },
  28     { { TOK_DOT },                             PREC_BINARY  },
  29 };
  30 #define PRECEDENCE_LEVELS (sizeof(PRECEDENCE)/sizeof(struct precedence))
  31
  32 void naParseError(struct Parser* p, char* msg, int line)
  33 {
  34     if(line > 0) p->errLine = line;
  35     p->err = msg;
  36     longjmp(p->jumpHandle, 1);
  37 }
  38
  39 static void oops(struct Parser* p) { naParseError(p, "parse error", -1); }
  40
  41 void naParseInit(struct Parser* p)
  42 {
  43     memset(p, 0, sizeof(*p));
  44     p->tree.type = TOK_TOP;
  45     p->tree.line = 1;
  46 }
  47
  48 void naParseDestroy(struct Parser* p)
  49 {
  50     int i;
  51     for(i=0; i<p->nChunks; i++) naFree(p->chunks[i]);
  52     naFree(p->chunks);
  53     naFree(p->chunkSizes);
  54     p->buf = 0;
  55 }
  56
  57 void* naParseAlloc(struct Parser* p, int bytes)
  58 {
  59     char* result;
  60     bytes = (bytes+7) & (~7); // Round up to 8 byte chunks for alignment
  61
  62     if(p->leftInChunk < bytes) {
  63         void* newChunk;
  64         void** newChunks;
  65         int* newChunkSizes;
  66         int sz, i;
  67
  68         sz = p->len;
  69         if(sz < bytes) sz = bytes;
  70         newChunk = naAlloc(sz);
  71
  72         p->nChunks++;
  73
  74         newChunks = naAlloc(p->nChunks * sizeof(void*));
  75         for(i=1; i<p->nChunks; i++) newChunks[i] = p->chunks[i-1];
  76         newChunks[0] = newChunk;
  77         naFree(p->chunks);
  78         p->chunks = newChunks;
  79
  80         newChunkSizes = naAlloc(p->nChunks * sizeof(int));
  81         for(i=1; i<p->nChunks; i++) newChunkSizes[i] = p->chunkSizes[i-1];
  82         newChunkSizes[0] = sz;
  83         naFree(p->chunkSizes);
  84         p->chunkSizes = newChunkSizes;
  85
  86         p->leftInChunk = sz;
  87     }
  88
  89     result = (char *)p->chunks[0] + p->chunkSizes[0] - p->leftInChunk;
  90     p->leftInChunk -= bytes;
  91     return result;
  92 }
  93
  94 static void addChild(struct Token *par, struct Token *ch)
  95 {
  96     if(par->lastChild) {
  97         ch->prev = par->lastChild;
  98         par->lastChild->next = ch;
  99     } else
 100         par->children = ch;
 101     par->lastChild = ch;
 102 }
 103
 104 static int endBrace(int tok)
 105 {
 106     if(tok == TOK_LBRA) return TOK_RBRA;
 107     if(tok == TOK_LPAR) return TOK_RPAR;
 108     if(tok == TOK_LCURL) return TOK_RCURL;
 109     return -1;
 110 }
 111
 112 static int isOpenBrace(int t)
 113 {
 114     return t==TOK_LPAR || t==TOK_LBRA || t==TOK_LCURL;
 115 }
 116
 117 static int isLoopoid(int t)
 118 {
 119     return t==TOK_FOR || t==TOK_FOREACH || t==TOK_WHILE || t==TOK_FORINDEX;
 120 }
 121
 122 static int isBlockoid(int t)
 123 {
 124     return isLoopoid(t)||t==TOK_IF||t==TOK_ELSIF||t==TOK_ELSE||t==TOK_FUNC;
 125 }
 126
 127 /* Yes, a bare else or elsif ends a block; it means we've reached the
 128  * end of the previous if/elsif clause. */
 129 static int isBlockEnd(int t)
 130 {
 131     return t==TOK_RPAR||t==TOK_RBRA||t==TOK_RCURL||t==TOK_ELSIF||t==TOK_ELSE;
 132 }
 133
 134 /* To match C's grammar, "blockoid" expressions sometimes need
 135  * synthesized terminating semicolons to make them act like
 136  * "statements" in C.  Always add one after "loopoid"
 137  * (for/foreach/while) expressions.  Add one after a func if it
 138  * immediately follows an assignment, and add one after an
 139  * if/elsif/else if it is the first token in an expression list */
 140 static int needsSemi(struct Token* t, struct Token* next)
 141 {
 142     if(!next || next->type == TOK_SEMI || isBlockEnd(next->type)) return 0;
 143     if(t->type == TOK_IF)   return !t->prev || t->prev->type == TOK_SEMI;
 144     if(t->type == TOK_FUNC) return t->prev && t->prev->type == TOK_ASSIGN;
 145     if(isLoopoid(t->type))  return 1;
 146     return 0;
 147 }
 148
 149 static struct Token* newToken(struct Parser* p, int type)
 150 {
 151     struct Token* t = naParseAlloc(p, sizeof(struct Token));
 152     memset(t, 0, sizeof(*t));
 153     t->type = type;
 154     t->line = -1;
 155     return t;
 156 }
 157
 158 static struct Token* parseToken(struct Parser* p, struct Token** list);
 159
 160 static void parseBlock(struct Parser* p, struct Token *top,
 161                        int end, struct Token** list)
 162 {
 163     struct Token *t;
 164     while(*list) {
 165         if(isBlockEnd((*list)->type) && (*list)->type != end) break;
 166         if(end == TOK_SEMI && (*list)->type == TOK_COMMA) break;
 167         t = parseToken(p, list);
 168         if(t->type == end) return; /* drop end token on the floor */
 169         addChild(top, t);
 170         if(needsSemi(t, *list))
 171             addChild(top, newToken(p, TOK_SEMI));
 172     }
 173     /* Context dependency: end of block is a parse error UNLESS we're
 174      * looking for a statement terminator (a braceless block) or a -1
 175      * (the top level) */
 176     if(end != TOK_SEMI && end != -1) oops(p);
 177 }
 178
 179 static struct Token* parseToken(struct Parser* p, struct Token** list)
 180 {
 181     struct Token *t = *list;
 182     *list = t->next;
 183     if(t->next) t->next->prev = 0;
 184     t->next = t->prev = 0;
 185     p->errLine = t->line;
 186
 187     if(!t) return 0;
 188     if(isOpenBrace(t->type)) {
 189         parseBlock(p, t, endBrace(t->type), list);
 190     } else if(isBlockoid(t->type)) {
 191         /* Read an optional paren expression */
 192         if(!*list) oops(p);
 193         if((*list)->type == TOK_LPAR)
 194             addChild(t, parseToken(p, list));
 195
 196         /* And the code block, which might be implicit/braceless */
 197         if(!*list) oops(p);
 198         if((*list)->type == TOK_LCURL) {
 199             addChild(t, parseToken(p, list));
 200         } else {
 201             /* Context dependency: if we're reading a braceless block,
 202              * and the first (!) token is itself a "blockoid"
 203              * expression, it is parsed alone, otherwise, read to the
 204              * terminating semicolon. */
 205             struct Token *blk = newToken(p, TOK_LCURL);
 206             if(isBlockoid((*list)->type)) addChild(blk, parseToken(p, list));
 207             else                          parseBlock(p, blk, TOK_SEMI, list);
 208             addChild(t, blk);
 209         }
 210
 211         /* Read the elsif/else chain */
 212         if(t->type == TOK_IF) {
 213             while(*list && ((*list)->type == TOK_ELSIF))
 214                 addChild(t, parseToken(p, list));
 215             if(*list && (*list)->type == TOK_ELSE)
 216                 addChild(t, parseToken(p, list));
 217         }
 218
 219         /* Finally, check for proper usage */
 220         if(t->type != TOK_FUNC) {
 221             if(t->type == TOK_ELSE && t->children->type != TOK_LCURL) oops(p);
 222             if(t->type != TOK_ELSE && t->children->type != TOK_LPAR) oops(p);
 223         }
 224     }
 225     return t;
 226 }
 227
 228 // True if the token's type exists in the precedence level.
 229 static int tokInLevel(struct Token* tok, int level)
 230 {
 231     int i;
 232     for(i=0; i<MAX_PREC_TOKS; i++)
 233         if(PRECEDENCE[level].toks[i] == tok->type)
 234             return 1;
 235     return 0;
 236 }
 237
 238 static struct Token* parsePrecedence(struct Parser* p, struct Token* start,
 239                                      struct Token* end, int level);
 240
 241 static void precChildren(struct Parser* p, struct Token* t)
 242 {
 243     struct Token* top = parsePrecedence(p, t->children, t->lastChild, 0);
 244     t->children = top;
 245     t->lastChild = top;
 246 }
 247
 248 // Run a "block structure" node (if/elsif/else/for/while/foreach)
 249 // through the precedence parser.  The funny child structure makes
 250 // this a little more complicated than it should be.
 251 static void precBlock(struct Parser* p, struct Token* block)
 252 {
 253     struct Token* t = block->children;
 254     while(t) {
 255         if(isOpenBrace(t->type))
 256             precChildren(p, t);
 257         else if(isBlockoid(t->type))
 258             precBlock(p, t);
 259         t = t->next;
 260     }
 261 }
 262
 263 /* Binary tokens that get empties synthesized if one side is missing */
 264 static int oneSidedBinary(int t)
 265 { return t == TOK_SEMI || t ==  TOK_COMMA || t == TOK_COLON; }
 266
 267 static struct Token* parsePrecedence(struct Parser* p,
 268                                      struct Token* start, struct Token* end,
 269                                      int level)
 270 {
 271     int rule;
 272     struct Token *t, *top, *left, *right;
 273     struct Token *a, *b, *c, *d; // temporaries
 274
 275     // This is an error.  No "siblings" are allowed at the bottom level.
 276     if(level >= PRECEDENCE_LEVELS && start != end)
 277         naParseError(p, "parse error", start->line);
 278
 279     // Synthesize an empty token if necessary
 280     if(end == 0 && start == 0)
 281         return newToken(p, TOK_EMPTY);
 282
 283     // Sanify the list.  This is OK, since we're recursing into the
 284     // list structure; stuff to the left and right has already been
 285     // handled somewhere above.
 286     if(end == 0) end = start;
 287     if(start == 0) start = end;
 288     if(start->prev) start->prev->next = 0;
 289     if(end->next) end->next->prev = 0;
 290     start->prev = end->next = 0;
 291
 292     // Single tokens parse as themselves.  Recurse into braces, and
 293     // parse children of block structure.
 294     if(start == end) {
 295         if     (isOpenBrace(start->type)) precChildren(p, start);
 296         else if(isBlockoid(start->type))  precBlock(p, start);
 297         return start;
 298     }
 299
 300     if(oneSidedBinary(start->type)) {
 301         t = newToken(p, TOK_EMPTY);
 302         start->prev = t;
 303         t->next = start;
 304         start = t;
 305     }
 306     if(oneSidedBinary(end->type)) {
 307         t = newToken(p, TOK_EMPTY);
 308         end->next = t;
 309         t->prev = end;
 310         end = t;
 311     }
 312
 313     // Another one: the "." and (postfix) "[]/()" operators should
 314     // really be the same precendence level, but the existing
 315     // implementation doesn't allow for it.  Bump us up a level if we
 316     // are parsing for DOT but find a LPAR/LBRA at the end of the
 317     // list.
 318     if(PRECEDENCE[level].toks[0] == TOK_DOT)
 319         if(end->type == TOK_LPAR || end->type == TOK_LBRA)
 320             level--;
 321
 322     top = left = right = 0;
 323     rule = PRECEDENCE[level].rule;
 324     switch(rule) {
 325     case PREC_PREFIX:
 326         if(tokInLevel(start, level) && start->next) {
 327             a = start->children;
 328             b = start->lastChild;
 329             c = start->next;
 330             d = end;
 331             top = start;
 332             if(a) left = parsePrecedence(p, a, b, 0);
 333             right = parsePrecedence(p, c, d, level);
 334         }
 335         break;
 336     case PREC_SUFFIX:
 337         if(tokInLevel(end, level) && end->prev) {
 338             a = start;
 339             b = end->prev;
 340             c = end->children;
 341             d = end->lastChild;
 342             top = end;
 343             left = parsePrecedence(p, a, b, level);
 344             if(c) right = parsePrecedence(p, c, d, 0);
 345         }
 346         break;
 347     case PREC_BINARY:
 348         t = end->prev;
 349         while(t->prev) {
 350             if(tokInLevel(t, level)) {
 351                 a = t->prev ? start : 0;
 352                 b = t->prev;
 353                 c = t->next;
 354                 d = t->next ? end : 0;
 355                 top = t;
 356                 left = parsePrecedence(p, a, b, level);
 357                 right = parsePrecedence(p, c, d, level+1);
 358                 break;
 359             }
 360             t = t->prev;
 361         }
 362         break;
 363     case PREC_REVERSE:
 364         t = start->next;
 365         while(t->next) {
 366             if(tokInLevel(t, level)) {
 367                 a = t->prev ? start : 0;
 368                 b = t->prev;
 369                 c = t->next;
 370                 d = t->next ? end : 0;
 371                 top = t;
 372                 left = parsePrecedence(p, a, b, level+1);
 373                 right = parsePrecedence(p, c, d, level);
 374                 break;
 375             }
 376             t = t->next;
 377         }
 378         break;
 379     }
 380
 381     // Found nothing, try the next level
 382     if(!top)
 383         return parsePrecedence(p, start, end, level+1);
 384
 385     top->rule = rule;
 386
 387     if(left) {
 388         left->next = right;
 389         left->prev = 0;
 390     }
 391     top->children = left;
 392
 393     if(right) {
 394         right->next = 0;
 395         right->prev = left;
 396     }
 397     top->lastChild = right;
 398
 399     top->next = top->prev = 0;
 400     return top;
 401 }
 402
 403 naRef naParseCode(struct Context* c, naRef srcFile, int firstLine,
 404                   char* buf, int len, int* errLine)
 405 {
 406     naRef codeObj;
 407     struct Token* t;
 408     struct Parser p;
 409
 410     // Protect from garbage collection
 411     naTempSave(c, srcFile);
 412
 413     naParseInit(&p);
 414
 415     // Catch parser errors here.
 416     p.errLine = *errLine = 1;
 417     if(setjmp(p.jumpHandle)) {
 418         strncpy(c->error, p.err, sizeof(c->error));
 419         *errLine = p.errLine;
 420         naParseDestroy(&p);
 421         return naNil();
 422     }
 423
 424     p.context = c;
 425     p.srcFile = srcFile;
 426     p.firstLine = firstLine;
 427     p.buf = buf;
 428     p.len = len;
 429
 430     // Lexify, match brace structure, fixup if/for/etc...
 431     naLex(&p);
 432
 433     // Run the block parser, make sure everything was eaten
 434     t = p.tree.children;
 435     p.tree.children = p.tree.lastChild = 0;
 436     parseBlock(&p, &p.tree, -1, &t);
 437     if(t) oops(&p);
 438
 439     // Recursively run the precedence parser, and fixup the treetop
 440     t = parsePrecedence(&p, p.tree.children, p.tree.lastChild, 0);
 441     t->prev = t->next = 0;
 442     p.tree.children = t;
 443     p.tree.lastChild = t;
 444
 445     // Generate code
 446     codeObj = naCodeGen(&p, &(p.tree), 0);
 447
 448     // Clean up our mess
 449     naParseDestroy(&p);
 450     naTempSave(c, codeObj);
 451
 452     return codeObj;
 453 }