#include "parse.h"
// Static table of recognized lexemes in the language
-struct Lexeme {
+static const struct Lexeme {
char* str;
int tok;
} LEXEMES[] = {
{"and", TOK_AND},
{"or", TOK_OR},
{"!", TOK_NOT},
+ {"&", TOK_BIT_AND},
+ {"|", TOK_BIT_OR},
+ {"^", TOK_BIT_XOR},
{"(", TOK_LPAR},
{")", TOK_RPAR},
{"[", TOK_LBRA},
{"*=", TOK_MULEQ},
{"/=", TOK_DIVEQ},
{"~=", TOK_CATEQ},
+ {"&=", TOK_BIT_ANDEQ},
+ {"|=", TOK_BIT_OREQ},
+ {"^=", TOK_BIT_XOREQ},
{"forindex", TOK_FORINDEX},
};
tok->str = str;
tok->strlen = slen;
tok->num = num;
- tok->parent = &p->tree;
tok->next = 0;
tok->prev = last;
tok->children = 0;
tok->lastChild = 0;
-
- // Context sensitivity hack: a "-" following a binary operator of
+ tok->rule = 0;
+
+ // Context sensitivity hack: a "-" or "~" following a binary operator of
// equal or higher precedence must be a unary negation. Needed to
// get precedence right in the parser for expressiong like "a * -2"
- if(type == TOK_MINUS && tok->prev) {
+ if((type == TOK_MINUS || type == TOK_CAT) && tok->prev) {
int pt = tok->prev->type;
- if(pt==TOK_PLUS||pt==TOK_MINUS||pt==TOK_CAT||pt==TOK_MUL||pt==TOK_DIV)
- tok->type = type = TOK_NEG;
+ if( pt==TOK_PLUS||pt==TOK_MINUS||pt==TOK_CAT||pt==TOK_MUL||pt==TOK_DIV
+ || pt==TOK_BIT_AND||pt==TOK_BIT_OR||pt==TOK_BIT_XOR )
+ tok->type = type = (type == TOK_MINUS ? TOK_NEG : TOK_BIT_NEG);
}
if(!p->tree.children) p->tree.children = tok;
}
// Ditto, but more complicated for double quotes.
+/* FIXME: need to handle \b (8), \f (12), and \uXXXX for JSON compliance */
static void dqEscape(char* buf, int len, int index, struct Parser* p,
char* cOut, int* eatenOut)
{
return i+1;
}
-static int lexHexLiteral(struct Parser* p, int index)
+static int lexIntLiteral(struct Parser* p, int index, int base)
{
int nib, i = index;
double d = 0;
- while(i < p->len && (nib = hex(p->buf[i])) >= 0) {
- d = d*16 + nib;
+ while(i < p->len && (nib = hex(p->buf[i])) >= 0 && nib < base) {
+ d = d * base + nib;
i++;
}
newToken(p, index, TOK_LITERAL, 0, 0, d);
return i;
}
+#define ISNUM(c) ((c) >= '0' && (c) <= '9')
+#define ISHEX(c) (ISNUM(c) || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
+#define NUMSTART(c) (ISNUM(c) || (c) == '+' || (c) == '-')
static int lexNumLiteral(struct Parser* p, int index)
{
int len = p->len, i = index;
unsigned char* buf = (unsigned char*)p->buf;
double d;
- if(i+1<len && buf[i+1] == 'x') return lexHexLiteral(p, index+2);
+ if( buf[i] == '0' && i + 2 < len ) {
+ if( buf[i+1] == 'x' && ISHEX(buf[i+2]) )
+ return lexIntLiteral(p, index+2, 16);
+ if( buf[i+1] == 'o' && ISNUM(buf[i+2]) )
+ return lexIntLiteral(p, index+2, 8);
+ }
- while(i<len && buf[i] >= '0' && buf[i] <= '9') i++;
+ while(i<len && ISNUM(buf[i])) i++;
if(i<len && buf[i] == '.') {
i++;
- while(i<len && buf[i] >= '0' && buf[i] <= '9') i++;
+ while(i<len && ISNUM(buf[i])) i++;
}
- if(i<len && (buf[i] == 'e' || buf[i] == 'E')) {
+ if(i+1<len && (buf[i] == 'e' || buf[i] == 'E') && NUMSTART(buf[i+1])) {
i++;
- if(i<len
- && (buf[i] == '-' || buf[i] == '+')
- && (i+1<len && buf[i+1] >= '0' && buf[i+1] <= '9')) i++;
- while(i<len && buf[i] >= '0' && buf[i] <= '9') i++;
+ if(buf[i] == '-' || buf[i] == '+') i++;
+ while(i<len && ISNUM(buf[i])) i++;
}
naStr_parsenum(p->buf + index, i - index, &d);
newToken(p, index, TOK_LITERAL, 0, 0, d);
return best;
}
-#define ISNUM(c) ((c) >= '0' && (c) <= '9')
void naLex(struct Parser* p)
{
int i = 0;