X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=simgear%2Fnasal%2Fhash.c;h=3679a0ca7685134afc0f4e039019c095f14569e4;hb=e1abab393b69d052186d799fd6182369f4877214;hp=966cd95785e7e0b7d3ace0ceda5a0378ce1f6450;hpb=1786692406214447db12b9d5af5364582af23d3b;p=simgear.git diff --git a/simgear/nasal/hash.c b/simgear/nasal/hash.c index 966cd957..3679a0ca 100644 --- a/simgear/nasal/hash.c +++ b/simgear/nasal/hash.c @@ -1,223 +1,272 @@ +#include #include "nasal.h" #include "data.h" -static void realloc(naRef hash) -{ - struct naHash* h = hash.ref.ptr.hash; - int i, sz, oldsz = h->size; - int oldcols = h->table ? 1 << h->lgalloced : 0; - - // Keep a handle to our original objects - struct HashNode* oldnodes = h->nodes; - struct HashNode** oldtable = h->table; - - // Figure out how big we need to be (start with a minimum size of - // 16 entries) - for(i=3; 1<lgalloced = i+1; - - // Allocate new ones (note that all the records are allocated in a - // single chunk, to avoid zillions of tiny node allocations) - sz = 1<lgalloced; - h->nodes = naAlloc(sz * (sizeof(struct HashNode) + sizeof(void*))); - h->table = (struct HashNode**)(((char*)h->nodes) + sz*sizeof(struct HashNode)); - naBZero(h->table, sz * sizeof(void*)); - h->nextnode = 0; - h->size = 0; - - // Re-insert everything from scratch - for(i=0; ikey, hn->val); - hn = hn->next; +/* A HashRec lives in a single allocated block. The layout is the + * header struct, then a table of 2^lgsz hash entries (key/value + * pairs), then an index table of 2*2^lgsz integers storing index + * values into the entry table. There are two tokens needed for + * "unused" and "used but empty". */ + +#define ENT_EMPTY -1 +#define ENT_DELETED -2 + +typedef struct { naRef key, val; } HashEnt; + +typedef struct HashRec { + int size; /* number of active entries */ + int lgsz; /* base-2 logarithm of the allocated (!) size */ + int next; /* next entry to use */ +} HashRec; + +#define REC(h) (PTR(h).hash->rec) +#define POW2(n) (1<<(n)) +#define NCELLS(hr) (2*POW2((hr)->lgsz)) +#define ROUNDUPOFF(n,m) ((((n)+(m-1))/m)*m)-(n) +#define ALIGN(p,sz) (((char*)p)+ROUNDUPOFF(((size_t)p)%sz,sz)) +#define ENTS(h) ((HashEnt*)ALIGN(&((HashRec*)h)[1],sizeof(naRef))) +#define TAB(h) ((int*)&(ENTS(h)[1<<(h)->lgsz])) +#define HBITS(hr,code) ((hr)->lgsz ? ((code)>>(32-(hr)->lgsz)) : 0) + +#define LROT(h,n) (((h)<>((8*sizeof(h))-n))) +static unsigned int mix32(unsigned int h) +{ + h ^= 0x2e63823a; h += LROT(h, 15); h -= LROT(h, 9); + h += LROT(h, 4); h -= LROT(h, 1); h ^= LROT(h, 2); + return h; +} +static unsigned int hash32(const unsigned char* in, int len) +{ + unsigned int h = len, val = 0; + int i, count = 0; + for(i=0; i= - // 2*sizeof(int). - unsigned int* p = (unsigned int*)&(r.num); - return p[0] ^ p[1]; - } else { - // This is Daniel Bernstein's djb2 hash function that I found - // on the web somewhere. It appears to work pretty well. - unsigned int i, hash = 5831; - for(i=0; ilen; i++) - hash = (hash * 33) ^ r.ref.ptr.str->data[i]; - return hash; +static unsigned int refhash(naRef key) +{ + if(IS_STR(key)) { + struct naStr* s = PTR(key).str; + if(s->hashcode) return s->hashcode; + return s->hashcode = hash32((void*)naStr_data(key), naStr_len(key)); + } else { /* must be a number */ + union { double d; unsigned int u[2]; } n; + n.d = key.num == -0.0 ? 0.0 : key.num; /* remember negative zero! */ + return mix32(mix32(n.u[0]) ^ n.u[1]); } } -// Which column in a given hash does the key correspond to. -static unsigned int hashcolumn(struct naHash* h, naRef key) +static int equal(naRef a, naRef b) { - // Multiply by a big number, and take the top N bits. Note - // assumption that sizeof(unsigned int) == 4. - return (2654435769u * hashcode(key)) >> (32 - h->lgalloced); + if(IS_NUM(a)) return a.num == b.num; + if(PTR(a).obj == PTR(b).obj) return 1; + if(naStr_len(a) != naStr_len(b)) return 0; + return memcmp(naStr_data(a), naStr_data(b), naStr_len(a)) == 0; } -struct HashNode* find(struct naHash* h, naRef key) +/* Returns the index of a cell that either contains a matching key, or + * is the empty slot to receive a new insertion. */ +static int findcell(struct HashRec *hr, naRef key, unsigned int hash) { - struct HashNode* hn; - if(h->table == 0) - return 0; - hn = h->table[hashcolumn(h, key)]; - while(hn) { - if(naEqual(key, hn->key)) - return hn; - hn = hn->next; + int i, mask = POW2(hr->lgsz+1)-1, step = (2*hash+1) & mask; + for(i=HBITS(hr,hash); TAB(hr)[i] != ENT_EMPTY; i=(i+step)&mask) + if(TAB(hr)[i] != ENT_DELETED && equal(key, ENTS(hr)[TAB(hr)[i]].key)) + break; + return i; +} + +static void hashset(HashRec* hr, naRef key, naRef val) +{ + int ent, cell = findcell(hr, key, refhash(key)); + if((ent = TAB(hr)[cell]) == ENT_EMPTY) { + ent = hr->next++; + if(ent >= NCELLS(hr)) return; /* race protection, don't overrun */ + TAB(hr)[cell] = ent; + hr->size++; + ENTS(hr)[ent].key = key; } - return 0; + ENTS(hr)[ent].val = val; } -void naHash_init(naRef hash) +static int recsize(int lgsz) { - struct naHash* h = hash.ref.ptr.hash; - h->size = 0; - h->lgalloced = 0; - h->table = 0; - h->nodes = 0; + HashRec hr; + hr.lgsz = lgsz; + return (int)((char*)&TAB(&hr)[POW2(lgsz+1)] - (char*)&hr) + sizeof(naRef); } -// Make a temporary string on the stack -static naRef tmpStr(struct naStr* str, char* key) +static HashRec* resize(struct naHash* hash) { - char* p = key; - while(*p) { p++; } - str->len = p - key; - str->data = key; - return naObj(T_STR, (struct naObj*)str); + HashRec *hr = hash->rec, *hr2; + int i, lgsz = 0; + if(hr) { + int oldsz = hr->size; + while(oldsz) { oldsz >>= 1; lgsz++; } + } + hr2 = naAlloc(recsize(lgsz)); + hr2->size = hr2->next = 0; + hr2->lgsz = lgsz; + for(i=0; i<(2*(1<lgsz+1); i++) + if(TAB(hr)[i] >= 0) + hashset(hr2, ENTS(hr)[TAB(hr)[i]].key, ENTS(hr)[TAB(hr)[i]].val); + naGC_swapfree((void*)&hash->rec, hr2); + return hr2; } -naRef naHash_cget(naRef hash, char* key) +int naHash_size(naRef h) { return REC(h) ? REC(h)->size : 0; } + +int naHash_get(naRef hash, naRef key, naRef* out) { - struct naStr str; - naRef result, key2 = tmpStr(&str, key); - if(naHash_get(hash, key2, &result)) - return result; - return naNil(); + HashRec* hr = REC(hash); + if(hr) { + int ent, cell = findcell(hr, key, refhash(key)); + if((ent = TAB(hr)[cell]) < 0) return 0; + *out = ENTS(hr)[ent].val; + return 1; + } + return 0; } -void naHash_cset(naRef hash, char* key, naRef val) +void naHash_set(naRef hash, naRef key, naRef val) { - struct naStr str; - naRef key2 = tmpStr(&str, key); - naHash_tryset(hash, key2, val); + HashRec* hr = REC(hash); + if(!hr || hr->next >= POW2(hr->lgsz)) + hr = resize(PTR(hash).hash); + hashset(hr, key, val); } -int naHash_get(naRef hash, naRef key, naRef* out) +void naHash_delete(naRef hash, naRef key) { - struct naHash* h = hash.ref.ptr.hash; - struct HashNode* n; - if(!IS_HASH(hash)) return 0; - n = find(h, key); - if(n) { - *out = n->val; - return 1; - } else { - *out = naNil(); - return 0; + HashRec* hr = REC(hash); + if(hr) { + int cell = findcell(hr, key, refhash(key)); + if(TAB(hr)[cell] >= 0) { + TAB(hr)[cell] = ENT_DELETED; + if(--hr->size < POW2(hr->lgsz-1)) + resize(PTR(hash).hash); + } } } -// Simpler version. Don't create a new node if the value isn't there -int naHash_tryset(naRef hash, naRef key, naRef val) +void naHash_keys(naRef dst, naRef hash) { - struct HashNode* n; - if(!IS_HASH(hash)) return 0; - n = find(hash.ref.ptr.hash, key); - if(n) n->val = val; - return n != 0; + int i; + HashRec* hr = REC(hash); + for(i=0; hr && i < NCELLS(hr); i++) + if(TAB(hr)[i] >= 0) + naVec_append(dst, ENTS(hr)[TAB(hr)[i]].key); } -void naHash_set(naRef hash, naRef key, naRef val) +void naiGCMarkHash(naRef hash) { - struct naHash* h = hash.ref.ptr.hash; - unsigned int col; - struct HashNode* n; + int i; + HashRec* hr = REC(hash); + for(i=0; hr && i < NCELLS(hr); i++) + if(TAB(hr)[i] >= 0) { + naiGCMark(ENTS(hr)[TAB(hr)[i]].key); + naiGCMark(ENTS(hr)[TAB(hr)[i]].val); + } +} - if(!IS_HASH(hash)) return; +static void tmpStr(naRef* out, struct naStr* str, const char* key) +{ + str->type = T_STR; + str->hashcode = 0; + str->emblen = -1; + str->data.ref.ptr = (unsigned char*)key; + str->data.ref.len = strlen(key); + SETPTR(*out, str); +} - n = find(h, key); - if(n) { - n->val = val; - return; - } +int naMember_cget(naContext c, naRef obj, const char* field, naRef* out) +{ + naRef key; struct naStr str; + tmpStr(&key, &str, field); + return naMember_get(c, obj, key, out); +} - if(h->size+1 >= 1<lgalloced) - realloc(hash); +naRef naHash_cget(naRef hash, char* key) +{ + struct naStr str; + naRef result, key2; + tmpStr(&key2, &str, key); + return naHash_get(hash, key2, &result) ? result : naNil(); +} - col = hashcolumn(h, key); - n = h->nodes + h->nextnode++; - n->key = key; - n->val = val; - n->next = h->table[col]; - h->table[col] = n; - h->size++; +void naHash_cset(naRef hash, char* key, naRef val) +{ + naRef key2; struct naStr str; + tmpStr(&key2, &str, key); + naiHash_tryset(hash, key2, val); } -// FIXME: this implementation does a realloc() after each delete, and -// is therefore needlessly O(N). (The reason is that this avoids the -// need to keep a free list around for the much more common case of -// adding a new value. Modifying an existing value is O(1), of -// course.) -void naHash_delete(naRef hash, naRef key) +int naiHash_tryset(naRef hash, naRef key, naRef val) { - struct naHash* h = hash.ref.ptr.hash; - int col; - struct HashNode *last=0, *hn; - if(!IS_HASH(hash)) return; - col = hashcolumn(h, key); - hn = h->table[col]; - while(hn) { - if(naEqual(hn->key, key)) { - if(last == 0) h->table[col] = hn->next; - else last->next = hn->next; - h->size--; - realloc(hash); - return; - } - last = hn; - hn = hn->next; + HashRec* hr = REC(hash); + if(hr) { + int ent, cell = findcell(hr, key, refhash(key)); + if((ent = TAB(hr)[cell]) >= 0) { ENTS(hr)[ent].val = val; return 1; } } + return 0; } -void naHash_keys(naRef dst, naRef hash) +void naiGCHashClean(struct naHash* h) { - struct naHash* h = hash.ref.ptr.hash; - int i; - if(!IS_HASH(hash)) return; - for(i=0; i<(1<lgalloced); i++) { - struct HashNode* hn = h->table[i]; - while(hn) { - naVec_append(dst, hn->key); - hn = hn->next; - } - } + naFree(h->rec); + h->rec = 0; } -int naHash_size(naRef h) +/* Optimized naHash_get for looking up local variables (OP_LOCAL is by + * far the most common opcode and deserves some special case + * optimization). Assumes that the key is an interned symbol + * (i.e. the hash code is precomputed, and we only need to test for + * pointer identity). */ +int naiHash_sym(struct naHash* hash, struct naStr* sym, naRef* out) { - if(!IS_HASH(h)) return 0; - return h.ref.ptr.hash->size; + HashRec* hr = hash->rec; + if(hr) { + int* tab = TAB(hr); + HashEnt* ents = ENTS(hr); + unsigned int hc = sym->hashcode; + int cell, mask = POW2(hr->lgsz+1) - 1, step = (2*hc+1) & mask; + for(cell=HBITS(hr,hc); tab[cell] != ENT_EMPTY; cell=(cell+step)&mask) + if(tab[cell]!=ENT_DELETED && sym==PTR(ents[tab[cell]].key).str) { + *out = ents[tab[cell]].val; + return 1; + } + } + return 0; } -void naHash_gcclean(struct naHash* h) + +/* As above, a special naHash_set for setting local variables. + * Assumes that the key is interned, and also that it isn't already + * present in the hash. */ +void naiHash_newsym(struct naHash* hash, naRef* sym, naRef* val) { - naFree(h->nodes); - h->nodes = 0; - h->size = 0; - h->lgalloced = 0; - h->table = 0; - h->nextnode = 0; + HashRec* hr = hash->rec; + int mask, step, cell, ent; + struct naStr *s = PTR(*sym).str; + if(!hr || hr->next >= POW2(hr->lgsz)) + hr = resize(hash); + mask = POW2(hr->lgsz+1) - 1; + step = (2*s->hashcode+1) & mask; + cell = HBITS(hr, s->hashcode); + while(TAB(hr)[cell] != ENT_EMPTY) + cell = (cell + step) & mask; + ent = hr->next++; + if(ent >= NCELLS(hr)) return; /* race protection, don't overrun */ + TAB(hr)[cell] = ent; + hr->size++; + ENTS(hr)[TAB(hr)[cell]].key = *sym; + ENTS(hr)[TAB(hr)[cell]].val = *val; } +