X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=simgear%2Fnasal%2Fhash.c;h=3679a0ca7685134afc0f4e039019c095f14569e4;hb=e1abab393b69d052186d799fd6182369f4877214;hp=55946535fe921fbbfcfea8f630a54df1309bf031;hpb=a807e66d4a40fc722e84a2fb99cb4d7b3d6223e1;p=simgear.git

diff --git a/simgear/nasal/hash.c b/simgear/nasal/hash.c
index 55946535..3679a0ca 100644
--- a/simgear/nasal/hash.c
+++ b/simgear/nasal/hash.c
@@ -1,252 +1,272 @@
+#include <string.h>
 #include "nasal.h"
 #include "data.h"
 
-#define MIN_HASH_SIZE 4
-
-#define EQUAL(a, b) (((a).ref.reftag == (b).ref.reftag \
-                      && (a).ref.ptr.obj == (b).ref.ptr.obj) \
-                     || naEqual(a, b))
-
-#define HASH_MAGIC 2654435769u
-
-#define INSERT(hh, hkey, hval, hcol) do {                       \
-        unsigned int cc = (hcol), iidx=(hh)->size++;            \
-        if(iidx < (1<<(hh)->lgalloced)) {                       \
-            struct HashNode* hnn = &(hh)->nodes[iidx];  \
-            hnn->key = (hkey); hnn->val = (hval);               \
-            hnn->next = (hh)->table[cc];                        \
-            (hh)->table[cc] = hnn;                              \
-        }} while(0)
-
-// Computes a hash code for a given scalar
-static unsigned int hashcode(naRef r)
-{
-    if(IS_NUM(r))
-    {
-        // Numbers get the number as a hash.  Just use the bits and
-        // xor them together.  Note assumption that sizeof(double) >=
-        // 2*sizeof(int).
-        unsigned int* p = (unsigned int*)&(r.num);
-        return p[0] ^ p[1];
-    } else if(r.ref.ptr.str->hashcode) {
-        return r.ref.ptr.str->hashcode;
-    } else {
-        // This is Daniel Bernstein's djb2 hash function that I found
-        // on the web somewhere.  It appears to work pretty well.
-        unsigned int i, hash = 5831;
-        for(i=0; i<r.ref.ptr.str->len; i++)
-            hash = (hash * 33) ^ r.ref.ptr.str->data[i];
-        r.ref.ptr.str->hashcode = hash;
-        return hash;
-    }
-}
+/* A HashRec lives in a single allocated block.  The layout is the
+ * header struct, then a table of 2^lgsz hash entries (key/value
+ * pairs), then an index table of 2*2^lgsz integers storing index
+ * values into the entry table.  There are two tokens needed for
+ * "unused" and "used but empty". */
 
-// Which column in a given hash does the key correspond to.
-static unsigned int hashcolumn(struct HashRec* h, naRef key)
-{
-    // Multiply by a big number, and take the top N bits.  Note
-    // assumption that sizeof(unsigned int) == 4.
-    return (HASH_MAGIC * hashcode(key)) >> (32 - h->lgalloced);
-}
+#define ENT_EMPTY -1
+#define ENT_DELETED -2
 
-static struct HashRec* hashrealloc(struct naHash* hash)
-{
-    struct HashRec *h, *h0 = hash->rec;
-    int lga, cols, need = h0 ? h0->size - h0->dels : MIN_HASH_SIZE;
+typedef struct { naRef key, val; } HashEnt;
 
-    if(need < MIN_HASH_SIZE) need = MIN_HASH_SIZE;
-    for(lga=0; 1<<lga <= need; lga++);
-    cols = 1<<lga;
-    h = naAlloc(sizeof(struct HashRec) +
-                cols * (sizeof(struct HashNode*) + sizeof(struct HashNode)));
-    naBZero(h, sizeof(struct HashRec) + cols * sizeof(struct HashNode*));
+typedef struct HashRec {
+    int size; /* number of active entries */
+    int lgsz; /* base-2 logarithm of the allocated (!) size */
+    int next; /* next entry to use */
+} HashRec;
 
-    h->lgalloced = lga;
-    h->nodes = (struct HashNode*)(((char*)h)
-                                  + sizeof(struct HashRec)
-                                  + cols * sizeof(struct HashNode*));
-    for(lga=0; h0 != 0 && lga<(1<<h0->lgalloced); lga++) {
-        struct HashNode* hn = h0->table[lga];
-        while(hn) {
-            INSERT(h, hn->key, hn->val, hashcolumn(h, hn->key));
-            hn = hn->next;
-        }
-    }
-    naGC_swapfree((void**)&hash->rec, h);
+#define REC(h) (PTR(h).hash->rec)
+#define POW2(n) (1<<(n))
+#define NCELLS(hr) (2*POW2((hr)->lgsz))
+#define ROUNDUPOFF(n,m) ((((n)+(m-1))/m)*m)-(n)
+#define ALIGN(p,sz) (((char*)p)+ROUNDUPOFF(((size_t)p)%sz,sz))
+#define ENTS(h) ((HashEnt*)ALIGN(&((HashRec*)h)[1],sizeof(naRef)))
+#define TAB(h) ((int*)&(ENTS(h)[1<<(h)->lgsz]))
+#define HBITS(hr,code) ((hr)->lgsz ? ((code)>>(32-(hr)->lgsz)) : 0)
+
+#define LROT(h,n) (((h)<<n)|((h)>>((8*sizeof(h))-n)))
+static unsigned int mix32(unsigned int h)
+{
+    h ^= 0x2e63823a;  h += LROT(h, 15); h -= LROT(h, 9);
+    h += LROT(h, 4);  h -= LROT(h, 1);  h ^= LROT(h, 2);
     return h;
 }
-
-// Special, optimized version of naHash_get for the express purpose of
-// looking up symbols in the local variables hash (OP_LOCAL is by far
-// the most common opcode and deserves some special case
-// optimization).  Elides all the typing checks that are normally
-// required, presumes that the key is a string and has had its
-// hashcode precomputed, checks only for object identity, and inlines
-// the column computation.
-int naHash_sym(struct naHash* hash, struct naStr* sym, naRef* out)
-{
-    struct HashRec* h = hash->rec;
-    if(h) {
-        int col = (HASH_MAGIC * sym->hashcode) >> (32 - h->lgalloced);
-        struct HashNode* hn = h->table[col];
-        while(hn) {
-            if(hn->key.ref.ptr.str == sym) {
-                *out = hn->val;
-                return 1;
-            }
-            hn = hn->next;
+static unsigned int hash32(const unsigned char* in, int len)
+{
+    unsigned int h = len, val = 0;
+    int i, count = 0;
+    for(i=0; i<len; i++) {
+        val = (val<<8) ^ in[i];
+        if(++count == 4) {
+            h = mix32(h ^ val);
+            val = count = 0;
         }
     }
-    return 0;
+    return mix32(h ^ val);
 }
 
-static struct HashNode* find(struct naHash* hash, naRef key)
+static unsigned int refhash(naRef key)
 {
-    struct HashRec* h = hash->rec;
-    if(h) {
-        struct HashNode* hn = h->table[hashcolumn(h, key)];
-        while(hn) {
-            if(EQUAL(key, hn->key))
-                return hn;
-            hn = hn->next;
-        }
+    if(IS_STR(key)) {
+        struct naStr* s = PTR(key).str;
+        if(s->hashcode) return s->hashcode;
+        return s->hashcode = hash32((void*)naStr_data(key), naStr_len(key));
+    } else { /* must be a number */
+        union { double d; unsigned int u[2]; } n;
+        n.d = key.num == -0.0 ? 0.0 : key.num; /* remember negative zero! */ 
+        return mix32(mix32(n.u[0]) ^ n.u[1]);
     }
-    return 0;
 }
 
-// Make a temporary string on the stack
-static void tmpStr(naRef* out, struct naStr* str, char* key)
+static int equal(naRef a, naRef b)
 {
-    str->len = 0;
-    str->data = (unsigned char*)key;
-    while(key[str->len]) str->len++;
-    *out = naNil();
-    out->ref.ptr.str = str;
+    if(IS_NUM(a)) return a.num == b.num;
+    if(PTR(a).obj == PTR(b).obj) return 1;
+    if(naStr_len(a) != naStr_len(b)) return 0;
+    return memcmp(naStr_data(a), naStr_data(b), naStr_len(a)) == 0;
 }
 
-naRef naHash_cget(naRef hash, char* key)
+/* Returns the index of a cell that either contains a matching key, or
+ * is the empty slot to receive a new insertion. */
+static int findcell(struct HashRec *hr, naRef key, unsigned int hash)
 {
-    struct naStr str;
-    naRef result, key2;
-    tmpStr(&key2, &str, key);
-    if(naHash_get(hash, key2, &result))
-        return result;
-    return naNil();
+    int i, mask = POW2(hr->lgsz+1)-1, step = (2*hash+1) & mask;
+    for(i=HBITS(hr,hash); TAB(hr)[i] != ENT_EMPTY; i=(i+step)&mask)
+        if(TAB(hr)[i] != ENT_DELETED && equal(key, ENTS(hr)[TAB(hr)[i]].key))
+            break;
+    return i;
 }
 
-void naHash_cset(naRef hash, char* key, naRef val)
+static void hashset(HashRec* hr, naRef key, naRef val)
 {
-    struct naStr str;
-    naRef key2;
-    tmpStr(&key2, &str, key);
-    naHash_tryset(hash, key2, val);
+    int ent, cell = findcell(hr, key, refhash(key));
+    if((ent = TAB(hr)[cell]) == ENT_EMPTY) {
+        ent = hr->next++;
+        if(ent >= NCELLS(hr)) return; /* race protection, don't overrun */
+        TAB(hr)[cell] = ent;
+        hr->size++;
+        ENTS(hr)[ent].key = key;
+    }
+    ENTS(hr)[ent].val = val;
 }
 
-int naHash_get(naRef hash, naRef key, naRef* out)
+static int recsize(int lgsz)
 {
-    if(IS_HASH(hash)) {
-        struct HashNode* n = find(hash.ref.ptr.hash, key);
-        if(n) { *out = n->val; return 1; }
-    }
-    return 0;
+    HashRec hr;
+    hr.lgsz = lgsz;
+    return (int)((char*)&TAB(&hr)[POW2(lgsz+1)] - (char*)&hr) + sizeof(naRef);
 }
 
-// Simpler version.  Don't create a new node if the value isn't there
-int naHash_tryset(naRef hash, naRef key, naRef val)
+static HashRec* resize(struct naHash* hash)
 {
-    if(IS_HASH(hash)) {
-        struct HashNode* n = find(hash.ref.ptr.hash, key);
-        if(n) n->val = val;
-        return n != 0;
+    HashRec *hr = hash->rec, *hr2;
+    int i, lgsz = 0;
+    if(hr) {
+        int oldsz = hr->size;
+        while(oldsz) { oldsz >>= 1; lgsz++; }
     }
-    return 0;
+    hr2 = naAlloc(recsize(lgsz));
+    hr2->size = hr2->next = 0;
+    hr2->lgsz = lgsz;
+    for(i=0; i<(2*(1<<lgsz)); i++)
+        TAB(hr2)[i] = ENT_EMPTY;
+    for(i=0; hr && i < POW2(hr->lgsz+1); i++)
+        if(TAB(hr)[i] >= 0)
+            hashset(hr2, ENTS(hr)[TAB(hr)[i]].key, ENTS(hr)[TAB(hr)[i]].val);
+    naGC_swapfree((void*)&hash->rec, hr2);
+    return hr2;
 }
 
-// Special purpose optimization for use in function call setups.  Sets
-// a value that is known *not* to be present in the hash table.  As
-// for naHash_sym, the key must be a string with a precomputed hash
-// code.
-void naHash_newsym(struct naHash* hash, naRef* sym, naRef* val)
-{
-    int col;
-    struct HashRec* h = hash->rec;
-    while(!h || h->size >= 1<<h->lgalloced)
-        h = hashrealloc(hash);
-    col = (HASH_MAGIC * sym->ref.ptr.str->hashcode) >> (32 - h->lgalloced);
-    INSERT(h, *sym, *val, col);
-}
+int naHash_size(naRef h) { return REC(h) ? REC(h)->size : 0; }
 
-// The cycle check is an integrity requirement for multithreading,
-// where raced inserts can potentially cause cycles.  This ensures
-// that the "last" thread to hold a reference to an inserted node
-// breaks any cycles that might have happened (at the expense of
-// potentially dropping items out of the hash).  Under normal
-// circumstances, chains will be very short and this will be fast.
-static void chkcycle(struct HashNode* node, int count)
+int naHash_get(naRef hash, naRef key, naRef* out)
 {
-    struct HashNode* hn = node;
-    while(hn && (hn = hn->next) != 0)
-        if(count-- <= 0) { node->next = 0; return; }
+    HashRec* hr = REC(hash);
+    if(hr) {
+        int ent, cell = findcell(hr, key, refhash(key));
+        if((ent = TAB(hr)[cell]) < 0) return 0;
+        *out = ENTS(hr)[ent].val;
+        return 1;
+    }
+    return 0;
 }
 
 void naHash_set(naRef hash, naRef key, naRef val)
 {
-    int col;
-    struct HashRec* h;
-    struct HashNode* n;
-    if(!IS_HASH(hash)) return;
-    if((n = find(hash.ref.ptr.hash, key))) { n->val = val; return; }
-    h = hash.ref.ptr.hash->rec;
-    while(!h || h->size >= 1<<h->lgalloced)
-        h = hashrealloc(hash.ref.ptr.hash);
-    col = hashcolumn(h, key);
-    INSERT(h, key, val, hashcolumn(h, key));
-    chkcycle(h->table[col], h->size - h->dels);
+    HashRec* hr = REC(hash);
+    if(!hr || hr->next >= POW2(hr->lgsz))
+        hr = resize(PTR(hash).hash);
+    hashset(hr, key, val);
 }
 
 void naHash_delete(naRef hash, naRef key)
 {
-    struct HashRec* h = hash.ref.ptr.hash->rec;
-    int col;
-    struct HashNode *last=0, *hn;
-    if(!IS_HASH(hash) || !h) return;
-    col = hashcolumn(h, key);
-    hn = h->table[col];
-    while(hn) {
-        if(EQUAL(hn->key, key)) {
-            if(last == 0) h->table[col] = hn->next;
-            else last->next = hn->next;
-            h->dels++;
-            return;
+    HashRec* hr = REC(hash);
+    if(hr) {
+        int cell = findcell(hr, key, refhash(key));
+        if(TAB(hr)[cell] >= 0) {
+            TAB(hr)[cell] = ENT_DELETED;
+            if(--hr->size < POW2(hr->lgsz-1))
+                resize(PTR(hash).hash);
         }
-        last = hn;
-        hn = hn->next;
     }
 }
 
 void naHash_keys(naRef dst, naRef hash)
 {
     int i;
-    struct HashRec* h = hash.ref.ptr.hash->rec;
-    if(!IS_HASH(hash) || !h) return;
-    for(i=0; i<(1<<h->lgalloced); i++) {
-        struct HashNode* hn = h->table[i];
-        while(hn) {
-            naVec_append(dst, hn->key);
-            hn = hn->next;
+    HashRec* hr = REC(hash);
+    for(i=0; hr && i < NCELLS(hr); i++)
+        if(TAB(hr)[i] >= 0)
+            naVec_append(dst, ENTS(hr)[TAB(hr)[i]].key);
+}
+
+void naiGCMarkHash(naRef hash)
+{
+    int i;
+    HashRec* hr = REC(hash);
+    for(i=0; hr && i < NCELLS(hr); i++)
+        if(TAB(hr)[i] >= 0) {
+            naiGCMark(ENTS(hr)[TAB(hr)[i]].key);
+            naiGCMark(ENTS(hr)[TAB(hr)[i]].val);
         }
-    }
 }
 
-int naHash_size(naRef hash)
+static void tmpStr(naRef* out, struct naStr* str, const char* key)
+{
+    str->type = T_STR;
+    str->hashcode = 0;
+    str->emblen = -1;
+    str->data.ref.ptr = (unsigned char*)key;
+    str->data.ref.len = strlen(key);
+    SETPTR(*out, str);
+}
+
+int naMember_cget(naContext c, naRef obj, const char* field, naRef* out)
+{
+    naRef key; struct naStr str;
+    tmpStr(&key, &str, field);
+    return naMember_get(c, obj, key, out);
+}
+
+naRef naHash_cget(naRef hash, char* key)
 {
-    struct HashRec* h = hash.ref.ptr.hash->rec;
-    if(!IS_HASH(hash) || !h) return 0;
-    return h->size - h->dels;
+    struct naStr str;
+    naRef result, key2;
+    tmpStr(&key2, &str, key);
+    return naHash_get(hash, key2, &result) ? result : naNil();
+}
+
+void naHash_cset(naRef hash, char* key, naRef val)
+{
+    naRef key2; struct naStr str;
+    tmpStr(&key2, &str, key);
+    naiHash_tryset(hash, key2, val);
 }
 
-void naHash_gcclean(struct naHash* h)
+int naiHash_tryset(naRef hash, naRef key, naRef val)
+{
+    HashRec* hr = REC(hash);
+    if(hr) {
+        int ent, cell = findcell(hr, key, refhash(key));
+        if((ent = TAB(hr)[cell]) >= 0) { ENTS(hr)[ent].val = val; return 1; }
+    }
+    return 0;
+}
+
+void naiGCHashClean(struct naHash* h)
 {
     naFree(h->rec);
     h->rec = 0;
 }
+
+/* Optimized naHash_get for looking up local variables (OP_LOCAL is by
+ * far the most common opcode and deserves some special case
+ * optimization).  Assumes that the key is an interned symbol
+ * (i.e. the hash code is precomputed, and we only need to test for
+ * pointer identity). */
+int naiHash_sym(struct naHash* hash, struct naStr* sym, naRef* out)
+{
+    HashRec* hr = hash->rec;
+    if(hr) {
+        int* tab = TAB(hr);
+        HashEnt* ents = ENTS(hr);
+        unsigned int hc = sym->hashcode;
+        int cell, mask = POW2(hr->lgsz+1) - 1, step = (2*hc+1) & mask;
+        for(cell=HBITS(hr,hc); tab[cell] != ENT_EMPTY; cell=(cell+step)&mask)
+            if(tab[cell]!=ENT_DELETED && sym==PTR(ents[tab[cell]].key).str) {
+                *out = ents[tab[cell]].val;
+                return 1;
+            }
+    }
+    return 0;
+}
+
+
+/* As above, a special naHash_set for setting local variables.
+ * Assumes that the key is interned, and also that it isn't already
+ * present in the hash. */
+void naiHash_newsym(struct naHash* hash, naRef* sym, naRef* val)
+{
+    HashRec* hr = hash->rec;
+    int mask, step, cell, ent;
+    struct naStr *s = PTR(*sym).str;
+    if(!hr || hr->next >= POW2(hr->lgsz))
+        hr = resize(hash);
+    mask = POW2(hr->lgsz+1) - 1;
+    step = (2*s->hashcode+1) & mask;
+    cell = HBITS(hr, s->hashcode);
+    while(TAB(hr)[cell] != ENT_EMPTY)
+        cell = (cell + step) & mask;
+    ent = hr->next++;
+    if(ent >= NCELLS(hr)) return; /* race protection, don't overrun */
+    TAB(hr)[cell] = ent;
+    hr->size++;
+    ENTS(hr)[TAB(hr)[cell]].key = *sym;
+    ENTS(hr)[TAB(hr)[cell]].val = *val;
+}
+