X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=simgear%2Fnasal%2Fhash.c;h=3679a0ca7685134afc0f4e039019c095f14569e4;hb=e1abab393b69d052186d799fd6182369f4877214;hp=966cd95785e7e0b7d3ace0ceda5a0378ce1f6450;hpb=1786692406214447db12b9d5af5364582af23d3b;p=simgear.git

diff --git a/simgear/nasal/hash.c b/simgear/nasal/hash.c
index 966cd957..3679a0ca 100644
--- a/simgear/nasal/hash.c
+++ b/simgear/nasal/hash.c
@@ -1,223 +1,272 @@
+#include <string.h>
 #include "nasal.h"
 #include "data.h"
 
-static void realloc(naRef hash)
-{
-    struct naHash* h = hash.ref.ptr.hash;
-    int i, sz, oldsz = h->size;
-    int oldcols = h->table ? 1 << h->lgalloced : 0;
-
-    // Keep a handle to our original objects
-    struct HashNode* oldnodes = h->nodes;
-    struct HashNode** oldtable = h->table;
-
-    // Figure out how big we need to be (start with a minimum size of
-    // 16 entries)
-    for(i=3; 1<<i < oldsz; i++);
-    h->lgalloced = i+1;
-    
-    // Allocate new ones (note that all the records are allocated in a
-    // single chunk, to avoid zillions of tiny node allocations)
-    sz = 1<<h->lgalloced;
-    h->nodes = naAlloc(sz * (sizeof(struct HashNode) + sizeof(void*)));
-    h->table = (struct HashNode**)(((char*)h->nodes) + sz*sizeof(struct HashNode));
-    naBZero(h->table, sz * sizeof(void*));
-    h->nextnode = 0;
-    h->size = 0;
-
-    // Re-insert everything from scratch
-    for(i=0; i<oldcols; i++) {
-        struct HashNode* hn = oldtable[i];
-        while(hn) {
-            naHash_set(hash, hn->key, hn->val);
-            hn = hn->next;
+/* A HashRec lives in a single allocated block.  The layout is the
+ * header struct, then a table of 2^lgsz hash entries (key/value
+ * pairs), then an index table of 2*2^lgsz integers storing index
+ * values into the entry table.  There are two tokens needed for
+ * "unused" and "used but empty". */
+
+#define ENT_EMPTY -1
+#define ENT_DELETED -2
+
+typedef struct { naRef key, val; } HashEnt;
+
+typedef struct HashRec {
+    int size; /* number of active entries */
+    int lgsz; /* base-2 logarithm of the allocated (!) size */
+    int next; /* next entry to use */
+} HashRec;
+
+#define REC(h) (PTR(h).hash->rec)
+#define POW2(n) (1<<(n))
+#define NCELLS(hr) (2*POW2((hr)->lgsz))
+#define ROUNDUPOFF(n,m) ((((n)+(m-1))/m)*m)-(n)
+#define ALIGN(p,sz) (((char*)p)+ROUNDUPOFF(((size_t)p)%sz,sz))
+#define ENTS(h) ((HashEnt*)ALIGN(&((HashRec*)h)[1],sizeof(naRef)))
+#define TAB(h) ((int*)&(ENTS(h)[1<<(h)->lgsz]))
+#define HBITS(hr,code) ((hr)->lgsz ? ((code)>>(32-(hr)->lgsz)) : 0)
+
+#define LROT(h,n) (((h)<<n)|((h)>>((8*sizeof(h))-n)))
+static unsigned int mix32(unsigned int h)
+{
+    h ^= 0x2e63823a;  h += LROT(h, 15); h -= LROT(h, 9);
+    h += LROT(h, 4);  h -= LROT(h, 1);  h ^= LROT(h, 2);
+    return h;
+}
+static unsigned int hash32(const unsigned char* in, int len)
+{
+    unsigned int h = len, val = 0;
+    int i, count = 0;
+    for(i=0; i<len; i++) {
+        val = (val<<8) ^ in[i];
+        if(++count == 4) {
+            h = mix32(h ^ val);
+            val = count = 0;
         }
     }
+    return mix32(h ^ val);
+}
 
-    // Free the old memory
-    naFree(oldnodes);
-}
-
-// Computes a hash code for a given scalar
-static unsigned int hashcode(naRef r)
-{
-    if(IS_NUM(r))
-    {
-        // Numbers get the number as a hash.  Just use the bits and
-        // xor them together.  Note assumption that sizeof(double) >=
-        // 2*sizeof(int).
-        unsigned int* p = (unsigned int*)&(r.num);
-        return p[0] ^ p[1];
-    } else {
-        // This is Daniel Bernstein's djb2 hash function that I found
-        // on the web somewhere.  It appears to work pretty well.
-        unsigned int i, hash = 5831;
-        for(i=0; i<r.ref.ptr.str->len; i++)
-            hash = (hash * 33) ^ r.ref.ptr.str->data[i];
-        return hash;
+static unsigned int refhash(naRef key)
+{
+    if(IS_STR(key)) {
+        struct naStr* s = PTR(key).str;
+        if(s->hashcode) return s->hashcode;
+        return s->hashcode = hash32((void*)naStr_data(key), naStr_len(key));
+    } else { /* must be a number */
+        union { double d; unsigned int u[2]; } n;
+        n.d = key.num == -0.0 ? 0.0 : key.num; /* remember negative zero! */ 
+        return mix32(mix32(n.u[0]) ^ n.u[1]);
     }
 }
 
-// Which column in a given hash does the key correspond to.
-static unsigned int hashcolumn(struct naHash* h, naRef key)
+static int equal(naRef a, naRef b)
 {
-    // Multiply by a big number, and take the top N bits.  Note
-    // assumption that sizeof(unsigned int) == 4.
-    return (2654435769u * hashcode(key)) >> (32 - h->lgalloced);
+    if(IS_NUM(a)) return a.num == b.num;
+    if(PTR(a).obj == PTR(b).obj) return 1;
+    if(naStr_len(a) != naStr_len(b)) return 0;
+    return memcmp(naStr_data(a), naStr_data(b), naStr_len(a)) == 0;
 }
 
-struct HashNode* find(struct naHash* h, naRef key)
+/* Returns the index of a cell that either contains a matching key, or
+ * is the empty slot to receive a new insertion. */
+static int findcell(struct HashRec *hr, naRef key, unsigned int hash)
 {
-    struct HashNode* hn;
-    if(h->table == 0)
-        return 0;
-    hn = h->table[hashcolumn(h, key)];
-    while(hn) {
-        if(naEqual(key, hn->key))
-            return hn;
-        hn = hn->next;
+    int i, mask = POW2(hr->lgsz+1)-1, step = (2*hash+1) & mask;
+    for(i=HBITS(hr,hash); TAB(hr)[i] != ENT_EMPTY; i=(i+step)&mask)
+        if(TAB(hr)[i] != ENT_DELETED && equal(key, ENTS(hr)[TAB(hr)[i]].key))
+            break;
+    return i;
+}
+
+static void hashset(HashRec* hr, naRef key, naRef val)
+{
+    int ent, cell = findcell(hr, key, refhash(key));
+    if((ent = TAB(hr)[cell]) == ENT_EMPTY) {
+        ent = hr->next++;
+        if(ent >= NCELLS(hr)) return; /* race protection, don't overrun */
+        TAB(hr)[cell] = ent;
+        hr->size++;
+        ENTS(hr)[ent].key = key;
     }
-    return 0;
+    ENTS(hr)[ent].val = val;
 }
 
-void naHash_init(naRef hash)
+static int recsize(int lgsz)
 {
-    struct naHash* h = hash.ref.ptr.hash;
-    h->size = 0;
-    h->lgalloced = 0;
-    h->table = 0;
-    h->nodes = 0;
+    HashRec hr;
+    hr.lgsz = lgsz;
+    return (int)((char*)&TAB(&hr)[POW2(lgsz+1)] - (char*)&hr) + sizeof(naRef);
 }
 
-// Make a temporary string on the stack
-static naRef tmpStr(struct naStr* str, char* key)
+static HashRec* resize(struct naHash* hash)
 {
-    char* p = key;
-    while(*p) { p++; }
-    str->len = p - key;
-    str->data = key;
-    return naObj(T_STR, (struct naObj*)str);
+    HashRec *hr = hash->rec, *hr2;
+    int i, lgsz = 0;
+    if(hr) {
+        int oldsz = hr->size;
+        while(oldsz) { oldsz >>= 1; lgsz++; }
+    }
+    hr2 = naAlloc(recsize(lgsz));
+    hr2->size = hr2->next = 0;
+    hr2->lgsz = lgsz;
+    for(i=0; i<(2*(1<<lgsz)); i++)
+        TAB(hr2)[i] = ENT_EMPTY;
+    for(i=0; hr && i < POW2(hr->lgsz+1); i++)
+        if(TAB(hr)[i] >= 0)
+            hashset(hr2, ENTS(hr)[TAB(hr)[i]].key, ENTS(hr)[TAB(hr)[i]].val);
+    naGC_swapfree((void*)&hash->rec, hr2);
+    return hr2;
 }
 
-naRef naHash_cget(naRef hash, char* key)
+int naHash_size(naRef h) { return REC(h) ? REC(h)->size : 0; }
+
+int naHash_get(naRef hash, naRef key, naRef* out)
 {
-    struct naStr str;
-    naRef result, key2 = tmpStr(&str, key);
-    if(naHash_get(hash, key2, &result))
-        return result;
-    return naNil();
+    HashRec* hr = REC(hash);
+    if(hr) {
+        int ent, cell = findcell(hr, key, refhash(key));
+        if((ent = TAB(hr)[cell]) < 0) return 0;
+        *out = ENTS(hr)[ent].val;
+        return 1;
+    }
+    return 0;
 }
 
-void naHash_cset(naRef hash, char* key, naRef val)
+void naHash_set(naRef hash, naRef key, naRef val)
 {
-    struct naStr str;
-    naRef key2 = tmpStr(&str, key);
-    naHash_tryset(hash, key2, val);
+    HashRec* hr = REC(hash);
+    if(!hr || hr->next >= POW2(hr->lgsz))
+        hr = resize(PTR(hash).hash);
+    hashset(hr, key, val);
 }
 
-int naHash_get(naRef hash, naRef key, naRef* out)
+void naHash_delete(naRef hash, naRef key)
 {
-    struct naHash* h = hash.ref.ptr.hash;
-    struct HashNode* n;
-    if(!IS_HASH(hash)) return 0;
-    n = find(h, key);
-    if(n) {
-        *out = n->val;
-        return 1;
-    } else {
-        *out = naNil();
-        return 0;
+    HashRec* hr = REC(hash);
+    if(hr) {
+        int cell = findcell(hr, key, refhash(key));
+        if(TAB(hr)[cell] >= 0) {
+            TAB(hr)[cell] = ENT_DELETED;
+            if(--hr->size < POW2(hr->lgsz-1))
+                resize(PTR(hash).hash);
+        }
     }
 }
 
-// Simpler version.  Don't create a new node if the value isn't there
-int naHash_tryset(naRef hash, naRef key, naRef val)
+void naHash_keys(naRef dst, naRef hash)
 {
-    struct HashNode* n;
-    if(!IS_HASH(hash)) return 0;
-    n = find(hash.ref.ptr.hash, key);
-    if(n) n->val = val;
-    return n != 0;
+    int i;
+    HashRec* hr = REC(hash);
+    for(i=0; hr && i < NCELLS(hr); i++)
+        if(TAB(hr)[i] >= 0)
+            naVec_append(dst, ENTS(hr)[TAB(hr)[i]].key);
 }
 
-void naHash_set(naRef hash, naRef key, naRef val)
+void naiGCMarkHash(naRef hash)
 {
-    struct naHash* h = hash.ref.ptr.hash;
-    unsigned int col;
-    struct HashNode* n;
+    int i;
+    HashRec* hr = REC(hash);
+    for(i=0; hr && i < NCELLS(hr); i++)
+        if(TAB(hr)[i] >= 0) {
+            naiGCMark(ENTS(hr)[TAB(hr)[i]].key);
+            naiGCMark(ENTS(hr)[TAB(hr)[i]].val);
+        }
+}
 
-    if(!IS_HASH(hash)) return;
+static void tmpStr(naRef* out, struct naStr* str, const char* key)
+{
+    str->type = T_STR;
+    str->hashcode = 0;
+    str->emblen = -1;
+    str->data.ref.ptr = (unsigned char*)key;
+    str->data.ref.len = strlen(key);
+    SETPTR(*out, str);
+}
 
-    n = find(h, key);
-    if(n) {
-        n->val = val;
-        return;
-    }
+int naMember_cget(naContext c, naRef obj, const char* field, naRef* out)
+{
+    naRef key; struct naStr str;
+    tmpStr(&key, &str, field);
+    return naMember_get(c, obj, key, out);
+}
 
-    if(h->size+1 >= 1<<h->lgalloced)
-        realloc(hash);
+naRef naHash_cget(naRef hash, char* key)
+{
+    struct naStr str;
+    naRef result, key2;
+    tmpStr(&key2, &str, key);
+    return naHash_get(hash, key2, &result) ? result : naNil();
+}
 
-    col = hashcolumn(h, key);
-    n = h->nodes + h->nextnode++;
-    n->key = key;
-    n->val = val;
-    n->next = h->table[col];
-    h->table[col] = n;
-    h->size++;
+void naHash_cset(naRef hash, char* key, naRef val)
+{
+    naRef key2; struct naStr str;
+    tmpStr(&key2, &str, key);
+    naiHash_tryset(hash, key2, val);
 }
 
-// FIXME: this implementation does a realloc() after each delete, and
-// is therefore needlessly O(N).  (The reason is that this avoids the
-// need to keep a free list around for the much more common case of
-// adding a new value.  Modifying an existing value is O(1), of
-// course.)
-void naHash_delete(naRef hash, naRef key)
+int naiHash_tryset(naRef hash, naRef key, naRef val)
 {
-    struct naHash* h = hash.ref.ptr.hash;
-    int col;
-    struct HashNode *last=0, *hn;
-    if(!IS_HASH(hash)) return;
-    col = hashcolumn(h, key);
-    hn = h->table[col];
-    while(hn) {
-        if(naEqual(hn->key, key)) {
-            if(last == 0) h->table[col] = hn->next;
-            else last->next = hn->next;
-            h->size--;
-            realloc(hash);
-            return;
-        }
-        last = hn;
-        hn = hn->next;
+    HashRec* hr = REC(hash);
+    if(hr) {
+        int ent, cell = findcell(hr, key, refhash(key));
+        if((ent = TAB(hr)[cell]) >= 0) { ENTS(hr)[ent].val = val; return 1; }
     }
+    return 0;
 }
 
-void naHash_keys(naRef dst, naRef hash)
+void naiGCHashClean(struct naHash* h)
 {
-    struct naHash* h = hash.ref.ptr.hash;
-    int i;
-    if(!IS_HASH(hash)) return;
-    for(i=0; i<(1<<h->lgalloced); i++) {
-        struct HashNode* hn = h->table[i];
-        while(hn) {
-            naVec_append(dst, hn->key);
-            hn = hn->next;
-        }
-    }
+    naFree(h->rec);
+    h->rec = 0;
 }
 
-int naHash_size(naRef h)
+/* Optimized naHash_get for looking up local variables (OP_LOCAL is by
+ * far the most common opcode and deserves some special case
+ * optimization).  Assumes that the key is an interned symbol
+ * (i.e. the hash code is precomputed, and we only need to test for
+ * pointer identity). */
+int naiHash_sym(struct naHash* hash, struct naStr* sym, naRef* out)
 {
-    if(!IS_HASH(h)) return 0;
-    return h.ref.ptr.hash->size;
+    HashRec* hr = hash->rec;
+    if(hr) {
+        int* tab = TAB(hr);
+        HashEnt* ents = ENTS(hr);
+        unsigned int hc = sym->hashcode;
+        int cell, mask = POW2(hr->lgsz+1) - 1, step = (2*hc+1) & mask;
+        for(cell=HBITS(hr,hc); tab[cell] != ENT_EMPTY; cell=(cell+step)&mask)
+            if(tab[cell]!=ENT_DELETED && sym==PTR(ents[tab[cell]].key).str) {
+                *out = ents[tab[cell]].val;
+                return 1;
+            }
+    }
+    return 0;
 }
 
-void naHash_gcclean(struct naHash* h)
+
+/* As above, a special naHash_set for setting local variables.
+ * Assumes that the key is interned, and also that it isn't already
+ * present in the hash. */
+void naiHash_newsym(struct naHash* hash, naRef* sym, naRef* val)
 {
-    naFree(h->nodes);
-    h->nodes = 0;
-    h->size = 0;
-    h->lgalloced = 0;
-    h->table = 0;
-    h->nextnode = 0;
+    HashRec* hr = hash->rec;
+    int mask, step, cell, ent;
+    struct naStr *s = PTR(*sym).str;
+    if(!hr || hr->next >= POW2(hr->lgsz))
+        hr = resize(hash);
+    mask = POW2(hr->lgsz+1) - 1;
+    step = (2*s->hashcode+1) & mask;
+    cell = HBITS(hr, s->hashcode);
+    while(TAB(hr)[cell] != ENT_EMPTY)
+        cell = (cell + step) & mask;
+    ent = hr->next++;
+    if(ent >= NCELLS(hr)) return; /* race protection, don't overrun */
+    TAB(hr)[cell] = ent;
+    hr->size++;
+    ENTS(hr)[TAB(hr)[cell]].key = *sym;
+    ENTS(hr)[TAB(hr)[cell]].val = *val;
 }
+