X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=simgear%2Fnasal%2Fhash.c;h=3679a0ca7685134afc0f4e039019c095f14569e4;hb=e1abab393b69d052186d799fd6182369f4877214;hp=55946535fe921fbbfcfea8f630a54df1309bf031;hpb=a807e66d4a40fc722e84a2fb99cb4d7b3d6223e1;p=simgear.git diff --git a/simgear/nasal/hash.c b/simgear/nasal/hash.c index 55946535..3679a0ca 100644 --- a/simgear/nasal/hash.c +++ b/simgear/nasal/hash.c @@ -1,252 +1,272 @@ +#include #include "nasal.h" #include "data.h" -#define MIN_HASH_SIZE 4 - -#define EQUAL(a, b) (((a).ref.reftag == (b).ref.reftag \ - && (a).ref.ptr.obj == (b).ref.ptr.obj) \ - || naEqual(a, b)) - -#define HASH_MAGIC 2654435769u - -#define INSERT(hh, hkey, hval, hcol) do { \ - unsigned int cc = (hcol), iidx=(hh)->size++; \ - if(iidx < (1<<(hh)->lgalloced)) { \ - struct HashNode* hnn = &(hh)->nodes[iidx]; \ - hnn->key = (hkey); hnn->val = (hval); \ - hnn->next = (hh)->table[cc]; \ - (hh)->table[cc] = hnn; \ - }} while(0) - -// Computes a hash code for a given scalar -static unsigned int hashcode(naRef r) -{ - if(IS_NUM(r)) - { - // Numbers get the number as a hash. Just use the bits and - // xor them together. Note assumption that sizeof(double) >= - // 2*sizeof(int). - unsigned int* p = (unsigned int*)&(r.num); - return p[0] ^ p[1]; - } else if(r.ref.ptr.str->hashcode) { - return r.ref.ptr.str->hashcode; - } else { - // This is Daniel Bernstein's djb2 hash function that I found - // on the web somewhere. It appears to work pretty well. - unsigned int i, hash = 5831; - for(i=0; ilen; i++) - hash = (hash * 33) ^ r.ref.ptr.str->data[i]; - r.ref.ptr.str->hashcode = hash; - return hash; - } -} +/* A HashRec lives in a single allocated block. The layout is the + * header struct, then a table of 2^lgsz hash entries (key/value + * pairs), then an index table of 2*2^lgsz integers storing index + * values into the entry table. There are two tokens needed for + * "unused" and "used but empty". */ -// Which column in a given hash does the key correspond to. -static unsigned int hashcolumn(struct HashRec* h, naRef key) -{ - // Multiply by a big number, and take the top N bits. Note - // assumption that sizeof(unsigned int) == 4. - return (HASH_MAGIC * hashcode(key)) >> (32 - h->lgalloced); -} +#define ENT_EMPTY -1 +#define ENT_DELETED -2 -static struct HashRec* hashrealloc(struct naHash* hash) -{ - struct HashRec *h, *h0 = hash->rec; - int lga, cols, need = h0 ? h0->size - h0->dels : MIN_HASH_SIZE; +typedef struct { naRef key, val; } HashEnt; - if(need < MIN_HASH_SIZE) need = MIN_HASH_SIZE; - for(lga=0; 1<lgalloced = lga; - h->nodes = (struct HashNode*)(((char*)h) - + sizeof(struct HashRec) - + cols * sizeof(struct HashNode*)); - for(lga=0; h0 != 0 && lga<(1<lgalloced); lga++) { - struct HashNode* hn = h0->table[lga]; - while(hn) { - INSERT(h, hn->key, hn->val, hashcolumn(h, hn->key)); - hn = hn->next; - } - } - naGC_swapfree((void**)&hash->rec, h); +#define REC(h) (PTR(h).hash->rec) +#define POW2(n) (1<<(n)) +#define NCELLS(hr) (2*POW2((hr)->lgsz)) +#define ROUNDUPOFF(n,m) ((((n)+(m-1))/m)*m)-(n) +#define ALIGN(p,sz) (((char*)p)+ROUNDUPOFF(((size_t)p)%sz,sz)) +#define ENTS(h) ((HashEnt*)ALIGN(&((HashRec*)h)[1],sizeof(naRef))) +#define TAB(h) ((int*)&(ENTS(h)[1<<(h)->lgsz])) +#define HBITS(hr,code) ((hr)->lgsz ? ((code)>>(32-(hr)->lgsz)) : 0) + +#define LROT(h,n) (((h)<>((8*sizeof(h))-n))) +static unsigned int mix32(unsigned int h) +{ + h ^= 0x2e63823a; h += LROT(h, 15); h -= LROT(h, 9); + h += LROT(h, 4); h -= LROT(h, 1); h ^= LROT(h, 2); return h; } - -// Special, optimized version of naHash_get for the express purpose of -// looking up symbols in the local variables hash (OP_LOCAL is by far -// the most common opcode and deserves some special case -// optimization). Elides all the typing checks that are normally -// required, presumes that the key is a string and has had its -// hashcode precomputed, checks only for object identity, and inlines -// the column computation. -int naHash_sym(struct naHash* hash, struct naStr* sym, naRef* out) -{ - struct HashRec* h = hash->rec; - if(h) { - int col = (HASH_MAGIC * sym->hashcode) >> (32 - h->lgalloced); - struct HashNode* hn = h->table[col]; - while(hn) { - if(hn->key.ref.ptr.str == sym) { - *out = hn->val; - return 1; - } - hn = hn->next; +static unsigned int hash32(const unsigned char* in, int len) +{ + unsigned int h = len, val = 0; + int i, count = 0; + for(i=0; irec; - if(h) { - struct HashNode* hn = h->table[hashcolumn(h, key)]; - while(hn) { - if(EQUAL(key, hn->key)) - return hn; - hn = hn->next; - } + if(IS_STR(key)) { + struct naStr* s = PTR(key).str; + if(s->hashcode) return s->hashcode; + return s->hashcode = hash32((void*)naStr_data(key), naStr_len(key)); + } else { /* must be a number */ + union { double d; unsigned int u[2]; } n; + n.d = key.num == -0.0 ? 0.0 : key.num; /* remember negative zero! */ + return mix32(mix32(n.u[0]) ^ n.u[1]); } - return 0; } -// Make a temporary string on the stack -static void tmpStr(naRef* out, struct naStr* str, char* key) +static int equal(naRef a, naRef b) { - str->len = 0; - str->data = (unsigned char*)key; - while(key[str->len]) str->len++; - *out = naNil(); - out->ref.ptr.str = str; + if(IS_NUM(a)) return a.num == b.num; + if(PTR(a).obj == PTR(b).obj) return 1; + if(naStr_len(a) != naStr_len(b)) return 0; + return memcmp(naStr_data(a), naStr_data(b), naStr_len(a)) == 0; } -naRef naHash_cget(naRef hash, char* key) +/* Returns the index of a cell that either contains a matching key, or + * is the empty slot to receive a new insertion. */ +static int findcell(struct HashRec *hr, naRef key, unsigned int hash) { - struct naStr str; - naRef result, key2; - tmpStr(&key2, &str, key); - if(naHash_get(hash, key2, &result)) - return result; - return naNil(); + int i, mask = POW2(hr->lgsz+1)-1, step = (2*hash+1) & mask; + for(i=HBITS(hr,hash); TAB(hr)[i] != ENT_EMPTY; i=(i+step)&mask) + if(TAB(hr)[i] != ENT_DELETED && equal(key, ENTS(hr)[TAB(hr)[i]].key)) + break; + return i; } -void naHash_cset(naRef hash, char* key, naRef val) +static void hashset(HashRec* hr, naRef key, naRef val) { - struct naStr str; - naRef key2; - tmpStr(&key2, &str, key); - naHash_tryset(hash, key2, val); + int ent, cell = findcell(hr, key, refhash(key)); + if((ent = TAB(hr)[cell]) == ENT_EMPTY) { + ent = hr->next++; + if(ent >= NCELLS(hr)) return; /* race protection, don't overrun */ + TAB(hr)[cell] = ent; + hr->size++; + ENTS(hr)[ent].key = key; + } + ENTS(hr)[ent].val = val; } -int naHash_get(naRef hash, naRef key, naRef* out) +static int recsize(int lgsz) { - if(IS_HASH(hash)) { - struct HashNode* n = find(hash.ref.ptr.hash, key); - if(n) { *out = n->val; return 1; } - } - return 0; + HashRec hr; + hr.lgsz = lgsz; + return (int)((char*)&TAB(&hr)[POW2(lgsz+1)] - (char*)&hr) + sizeof(naRef); } -// Simpler version. Don't create a new node if the value isn't there -int naHash_tryset(naRef hash, naRef key, naRef val) +static HashRec* resize(struct naHash* hash) { - if(IS_HASH(hash)) { - struct HashNode* n = find(hash.ref.ptr.hash, key); - if(n) n->val = val; - return n != 0; + HashRec *hr = hash->rec, *hr2; + int i, lgsz = 0; + if(hr) { + int oldsz = hr->size; + while(oldsz) { oldsz >>= 1; lgsz++; } } - return 0; + hr2 = naAlloc(recsize(lgsz)); + hr2->size = hr2->next = 0; + hr2->lgsz = lgsz; + for(i=0; i<(2*(1<lgsz+1); i++) + if(TAB(hr)[i] >= 0) + hashset(hr2, ENTS(hr)[TAB(hr)[i]].key, ENTS(hr)[TAB(hr)[i]].val); + naGC_swapfree((void*)&hash->rec, hr2); + return hr2; } -// Special purpose optimization for use in function call setups. Sets -// a value that is known *not* to be present in the hash table. As -// for naHash_sym, the key must be a string with a precomputed hash -// code. -void naHash_newsym(struct naHash* hash, naRef* sym, naRef* val) -{ - int col; - struct HashRec* h = hash->rec; - while(!h || h->size >= 1<lgalloced) - h = hashrealloc(hash); - col = (HASH_MAGIC * sym->ref.ptr.str->hashcode) >> (32 - h->lgalloced); - INSERT(h, *sym, *val, col); -} +int naHash_size(naRef h) { return REC(h) ? REC(h)->size : 0; } -// The cycle check is an integrity requirement for multithreading, -// where raced inserts can potentially cause cycles. This ensures -// that the "last" thread to hold a reference to an inserted node -// breaks any cycles that might have happened (at the expense of -// potentially dropping items out of the hash). Under normal -// circumstances, chains will be very short and this will be fast. -static void chkcycle(struct HashNode* node, int count) +int naHash_get(naRef hash, naRef key, naRef* out) { - struct HashNode* hn = node; - while(hn && (hn = hn->next) != 0) - if(count-- <= 0) { node->next = 0; return; } + HashRec* hr = REC(hash); + if(hr) { + int ent, cell = findcell(hr, key, refhash(key)); + if((ent = TAB(hr)[cell]) < 0) return 0; + *out = ENTS(hr)[ent].val; + return 1; + } + return 0; } void naHash_set(naRef hash, naRef key, naRef val) { - int col; - struct HashRec* h; - struct HashNode* n; - if(!IS_HASH(hash)) return; - if((n = find(hash.ref.ptr.hash, key))) { n->val = val; return; } - h = hash.ref.ptr.hash->rec; - while(!h || h->size >= 1<lgalloced) - h = hashrealloc(hash.ref.ptr.hash); - col = hashcolumn(h, key); - INSERT(h, key, val, hashcolumn(h, key)); - chkcycle(h->table[col], h->size - h->dels); + HashRec* hr = REC(hash); + if(!hr || hr->next >= POW2(hr->lgsz)) + hr = resize(PTR(hash).hash); + hashset(hr, key, val); } void naHash_delete(naRef hash, naRef key) { - struct HashRec* h = hash.ref.ptr.hash->rec; - int col; - struct HashNode *last=0, *hn; - if(!IS_HASH(hash) || !h) return; - col = hashcolumn(h, key); - hn = h->table[col]; - while(hn) { - if(EQUAL(hn->key, key)) { - if(last == 0) h->table[col] = hn->next; - else last->next = hn->next; - h->dels++; - return; + HashRec* hr = REC(hash); + if(hr) { + int cell = findcell(hr, key, refhash(key)); + if(TAB(hr)[cell] >= 0) { + TAB(hr)[cell] = ENT_DELETED; + if(--hr->size < POW2(hr->lgsz-1)) + resize(PTR(hash).hash); } - last = hn; - hn = hn->next; } } void naHash_keys(naRef dst, naRef hash) { int i; - struct HashRec* h = hash.ref.ptr.hash->rec; - if(!IS_HASH(hash) || !h) return; - for(i=0; i<(1<lgalloced); i++) { - struct HashNode* hn = h->table[i]; - while(hn) { - naVec_append(dst, hn->key); - hn = hn->next; + HashRec* hr = REC(hash); + for(i=0; hr && i < NCELLS(hr); i++) + if(TAB(hr)[i] >= 0) + naVec_append(dst, ENTS(hr)[TAB(hr)[i]].key); +} + +void naiGCMarkHash(naRef hash) +{ + int i; + HashRec* hr = REC(hash); + for(i=0; hr && i < NCELLS(hr); i++) + if(TAB(hr)[i] >= 0) { + naiGCMark(ENTS(hr)[TAB(hr)[i]].key); + naiGCMark(ENTS(hr)[TAB(hr)[i]].val); } - } } -int naHash_size(naRef hash) +static void tmpStr(naRef* out, struct naStr* str, const char* key) +{ + str->type = T_STR; + str->hashcode = 0; + str->emblen = -1; + str->data.ref.ptr = (unsigned char*)key; + str->data.ref.len = strlen(key); + SETPTR(*out, str); +} + +int naMember_cget(naContext c, naRef obj, const char* field, naRef* out) +{ + naRef key; struct naStr str; + tmpStr(&key, &str, field); + return naMember_get(c, obj, key, out); +} + +naRef naHash_cget(naRef hash, char* key) { - struct HashRec* h = hash.ref.ptr.hash->rec; - if(!IS_HASH(hash) || !h) return 0; - return h->size - h->dels; + struct naStr str; + naRef result, key2; + tmpStr(&key2, &str, key); + return naHash_get(hash, key2, &result) ? result : naNil(); +} + +void naHash_cset(naRef hash, char* key, naRef val) +{ + naRef key2; struct naStr str; + tmpStr(&key2, &str, key); + naiHash_tryset(hash, key2, val); } -void naHash_gcclean(struct naHash* h) +int naiHash_tryset(naRef hash, naRef key, naRef val) +{ + HashRec* hr = REC(hash); + if(hr) { + int ent, cell = findcell(hr, key, refhash(key)); + if((ent = TAB(hr)[cell]) >= 0) { ENTS(hr)[ent].val = val; return 1; } + } + return 0; +} + +void naiGCHashClean(struct naHash* h) { naFree(h->rec); h->rec = 0; } + +/* Optimized naHash_get for looking up local variables (OP_LOCAL is by + * far the most common opcode and deserves some special case + * optimization). Assumes that the key is an interned symbol + * (i.e. the hash code is precomputed, and we only need to test for + * pointer identity). */ +int naiHash_sym(struct naHash* hash, struct naStr* sym, naRef* out) +{ + HashRec* hr = hash->rec; + if(hr) { + int* tab = TAB(hr); + HashEnt* ents = ENTS(hr); + unsigned int hc = sym->hashcode; + int cell, mask = POW2(hr->lgsz+1) - 1, step = (2*hc+1) & mask; + for(cell=HBITS(hr,hc); tab[cell] != ENT_EMPTY; cell=(cell+step)&mask) + if(tab[cell]!=ENT_DELETED && sym==PTR(ents[tab[cell]].key).str) { + *out = ents[tab[cell]].val; + return 1; + } + } + return 0; +} + + +/* As above, a special naHash_set for setting local variables. + * Assumes that the key is interned, and also that it isn't already + * present in the hash. */ +void naiHash_newsym(struct naHash* hash, naRef* sym, naRef* val) +{ + HashRec* hr = hash->rec; + int mask, step, cell, ent; + struct naStr *s = PTR(*sym).str; + if(!hr || hr->next >= POW2(hr->lgsz)) + hr = resize(hash); + mask = POW2(hr->lgsz+1) - 1; + step = (2*s->hashcode+1) & mask; + cell = HBITS(hr, s->hashcode); + while(TAB(hr)[cell] != ENT_EMPTY) + cell = (cell + step) & mask; + ent = hr->next++; + if(ent >= NCELLS(hr)) return; /* race protection, don't overrun */ + TAB(hr)[cell] = ent; + hr->size++; + ENTS(hr)[TAB(hr)[cell]].key = *sym; + ENTS(hr)[TAB(hr)[cell]].val = *val; +} +