X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=simgear%2Fnasal%2Flex.c;h=89c3c407b28c578181f07c4e460c684ea8fe591d;hb=bcb320b537b6f7e5e3724e8a30d309322171eb43;hp=c8b42b09519526c9b325fc087f2e683c43af632c;hpb=65056bfa72c5fc391c89a829bcd7bb2759284ce5;p=simgear.git diff --git a/simgear/nasal/lex.c b/simgear/nasal/lex.c index c8b42b09..89c3c407 100644 --- a/simgear/nasal/lex.c +++ b/simgear/nasal/lex.c @@ -1,7 +1,7 @@ #include "parse.h" // Static table of recognized lexemes in the language -struct Lexeme { +static const struct Lexeme { char* str; int tok; } LEXEMES[] = { @@ -109,7 +109,20 @@ static int lineEnd(struct Parser* p, int line) static void newToken(struct Parser* p, int pos, int type, char* str, int slen, double num) { - struct Token* tok; + struct Token *tok, *last = p->tree.lastChild; + + /* Adjacent string literals get concatenated */ + if(type == TOK_LITERAL && str) { + if(last && last->type == TOK_LITERAL) { + int i, len1 = last->strlen; + char* str2 = naParseAlloc(p, len1 + slen); + for(i=0; istr[i]; + for(i=0; istr = str2; + last->strlen += slen; + return; + } + } tok = naParseAlloc(p, sizeof(struct Token)); tok->type = type; @@ -117,33 +130,38 @@ static void newToken(struct Parser* p, int pos, int type, tok->str = str; tok->strlen = slen; tok->num = num; - tok->parent = &p->tree; tok->next = 0; - tok->prev = p->tree.lastChild; + tok->prev = last; tok->children = 0; tok->lastChild = 0; // Context sensitivity hack: a "-" following a binary operator of - // higher precedence (MUL and DIV, basically) must be a unary - // negation. Needed to get precedence right in the parser for - // expressiong like "a * -2" - if(type == TOK_MINUS && tok->prev) - if(tok->prev->type == TOK_MUL || tok->prev->type == TOK_DIV) + // equal or higher precedence must be a unary negation. Needed to + // get precedence right in the parser for expressiong like "a * -2" + if(type == TOK_MINUS && tok->prev) { + int pt = tok->prev->type; + if(pt==TOK_PLUS||pt==TOK_MINUS||pt==TOK_CAT||pt==TOK_MUL||pt==TOK_DIV) tok->type = type = TOK_NEG; + } if(!p->tree.children) p->tree.children = tok; if(p->tree.lastChild) p->tree.lastChild->next = tok; p->tree.lastChild = tok; } -// Parse a hex nibble -static int hexc(char c, struct Parser* p, int index) +static int hex(char c) { if(c >= '0' && c <= '9') return c - '0'; if(c >= 'A' && c <= 'F') return c - 'A' + 10; if(c >= 'a' && c <= 'f') return c - 'a' + 10; - error(p, "bad hex constant", index); - return 0; + return -1; +} + +static int hexc(char c, struct Parser* p, int index) +{ + int n = hex(c); + if(n < 0) error(p, "bad hex constant", index); + return n; } // Escape and returns a single backslashed expression in a single @@ -163,6 +181,7 @@ static void sqEscape(char* buf, int len, int index, struct Parser* p, } // Ditto, but more complicated for double quotes. +/* FIXME: need to handle \b (8), \f (12), and \uXXXX for JSON compliance */ static void dqEscape(char* buf, int len, int index, struct Parser* p, char* cOut, int* eatenOut) { @@ -174,6 +193,7 @@ static void dqEscape(char* buf, int len, int index, struct Parser* p, case 'n': *cOut = '\n'; break; case 't': *cOut = '\t'; break; case '\\': *cOut = '\\'; break; + case '`': *cOut = '`'; break; case 'x': if(len < 4) error(p, "unterminated string", index); *cOut = (char)((hexc(buf[2], p, index)<<4) | hexc(buf[3], p, index)); @@ -186,13 +206,20 @@ static void dqEscape(char* buf, int len, int index, struct Parser* p, } } +static void charLiteral(struct Parser* p, int index, char* s, int len) +{ + int n, c; + c = naLexUtf8C(s, len, &n); + if(c < 0 || n != len) error(p, "invalid utf8 character constant", index); + newToken(p, index, TOK_LITERAL, 0, 0, c); +} + // Read in a string literal -static int lexStringLiteral(struct Parser* p, int index, int singleQuote) +static int lexStringLiteral(struct Parser* p, int index, char q) { int i, j, len, iteration; char* out = 0; char* buf = p->buf; - char endMark = singleQuote ? '\'' : '"'; for(iteration = 0; iteration<2; iteration++) { i = index+1; @@ -200,11 +227,10 @@ static int lexStringLiteral(struct Parser* p, int index, int singleQuote) while(i < p->len) { char c = buf[i]; int eaten = 1; - if(c == endMark) - break; + if(c == q) break; if(c == '\\') { - if(singleQuote) sqEscape(buf+i, p->len-i, i, p, &c, &eaten); - else dqEscape(buf+i, p->len-i, i, p, &c, &eaten); + if(q == '\'') sqEscape(buf+i, p->len-i, i, p, &c, &eaten); + else dqEscape(buf+i, p->len-i, i, p, &c, &eaten); } if(iteration == 1) out[j++] = c; i += eaten; @@ -213,27 +239,44 @@ static int lexStringLiteral(struct Parser* p, int index, int singleQuote) // Finished stage one -- allocate the buffer for stage two if(iteration == 0) out = naParseAlloc(p, len); } - newToken(p, index, TOK_LITERAL, out, len, 0); + if(q == '`') charLiteral(p, index, out, len); + else newToken(p, index, TOK_LITERAL, out, len, 0); return i+1; } +static int lexHexLiteral(struct Parser* p, int index) +{ + int nib, i = index; + double d = 0; + while(i < p->len && (nib = hex(p->buf[i])) >= 0) { + d = d*16 + nib; + i++; + } + newToken(p, index, TOK_LITERAL, 0, 0, d); + return i; +} + +#define ISNUM(c) ((c) >= '0' && (c) <= '9') +#define ISHEX(c) (ISNUM(c) || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F')) +#define NUMSTART(c) (ISNUM(c) || (c) == '+' || (c) == '-') static int lexNumLiteral(struct Parser* p, int index) { int len = p->len, i = index; - unsigned char* buf = p->buf; + unsigned char* buf = (unsigned char*)p->buf; double d; - while(i= '0' && buf[i] <= '9') i++; + if(buf[i] == '0' && i+2= '0' && buf[i] <= '9') i++; + while(i= '0' && buf[i+1] <= '9')) i++; - while(i= '0' && buf[i] <= '9') i++; + if(buf[i] == '-' || buf[i] == '+') i++; + while(ibuf + index, i - index, &d); newToken(p, index, TOK_LITERAL, 0, 0, d); @@ -309,12 +352,13 @@ void naLex(struct Parser* p) case '#': i = lineEnd(p, getLine(p, i)); break; - case '\'': case '"': - i = lexStringLiteral(p, i, (c=='"' ? 0 : 1)); + case '\'': case '"': case '`': + i = lexStringLiteral(p, i, c); break; default: - if(c >= '0' && c <= '9') i = lexNumLiteral(p, i); - else handled = 0; + if(ISNUM(c) || (c == '.' && (i+1)len && ISNUM(p->buf[i+1]))) + i = lexNumLiteral(p, i); + else handled = 0; } // Lexemes and symbols are a little more complicated. Pick @@ -324,7 +368,7 @@ void naLex(struct Parser* p) // symbol (e.g. "orchid"). If neither match, we have a bad // character in the mix. if(!handled) { - int symlen=0, lexlen=0, lexeme; + int symlen=0, lexlen=0, lexeme=-1; lexlen = tryLexemes(p, i, &lexeme); if((c>='A' && c<='Z') || (c>='a' && c<='z') || (c=='_')) symlen = trySymbol(p, i);