+#include <string.h>
#include "nasal.h"
#include "data.h"
-#define MIN_HASH_SIZE 4
-
-#define EQUAL(a, b) (((a).ref.reftag == (b).ref.reftag \
- && (a).ref.ptr.obj == (b).ref.ptr.obj) \
- || naEqual(a, b))
-
-#define HASH_MAGIC 2654435769u
-
-#define INSERT(hh, hkey, hval, hcol) do { \
- unsigned int cc = (hcol), iidx=(hh)->size++; \
- if(iidx < (1<<(hh)->lgalloced)) { \
- struct HashNode* hnn = &(hh)->nodes[iidx]; \
- hnn->key = (hkey); hnn->val = (hval); \
- hnn->next = (hh)->table[cc]; \
- (hh)->table[cc] = hnn; \
- }} while(0)
-
-// Computes a hash code for a given scalar
-static unsigned int hashcode(naRef r)
-{
- if(IS_NUM(r))
- {
- // Numbers get the number as a hash. Just use the bits and
- // xor them together. Note assumption that sizeof(double) >=
- // 2*sizeof(int).
- unsigned int* p = (unsigned int*)&(r.num);
- return p[0] ^ p[1];
- } else if(r.ref.ptr.str->hashcode) {
- return r.ref.ptr.str->hashcode;
- } else {
- // This is Daniel Bernstein's djb2 hash function that I found
- // on the web somewhere. It appears to work pretty well.
- unsigned int i, hash = 5831;
- for(i=0; i<r.ref.ptr.str->len; i++)
- hash = (hash * 33) ^ r.ref.ptr.str->data[i];
- r.ref.ptr.str->hashcode = hash;
- return hash;
- }
-}
+/* A HashRec lives in a single allocated block. The layout is the
+ * header struct, then a table of 2^lgsz hash entries (key/value
+ * pairs), then an index table of 2*2^lgsz integers storing index
+ * values into the entry table. There are two tokens needed for
+ * "unused" and "used but empty". */
-// Which column in a given hash does the key correspond to.
-static unsigned int hashcolumn(struct HashRec* h, naRef key)
-{
- // Multiply by a big number, and take the top N bits. Note
- // assumption that sizeof(unsigned int) == 4.
- return (HASH_MAGIC * hashcode(key)) >> (32 - h->lgalloced);
-}
+#define ENT_EMPTY -1
+#define ENT_DELETED -2
-static struct HashRec* realloc(struct naHash* hash)
-{
- struct HashRec *h, *h0 = hash->rec;
- int lga, cols, need = h0 ? h0->size - h0->dels : MIN_HASH_SIZE;
+typedef struct { naRef key, val; } HashEnt;
- if(need < MIN_HASH_SIZE) need = MIN_HASH_SIZE;
- for(lga=0; 1<<lga <= need; lga++);
- cols = 1<<lga;
- h = naAlloc(sizeof(struct HashRec) +
- cols * (sizeof(struct HashNode*) + sizeof(struct HashNode)));
- naBZero(h, sizeof(struct HashRec) + cols * sizeof(struct HashNode*));
+typedef struct HashRec {
+ int size; /* number of active entries */
+ int lgsz; /* base-2 logarithm of the allocated (!) size */
+ int next; /* next entry to use */
+} HashRec;
- h->lgalloced = lga;
- h->nodes = (struct HashNode*)(((char*)h)
- + sizeof(struct HashRec)
- + cols * sizeof(struct HashNode*));
- for(lga=0; h0 != 0 && lga<(1<<h0->lgalloced); lga++) {
- struct HashNode* hn = h0->table[lga];
- while(hn) {
- INSERT(h, hn->key, hn->val, hashcolumn(h, hn->key));
- hn = hn->next;
- }
- }
- naGC_swapfree((void**)&hash->rec, h);
+#define REC(h) (PTR(h).hash->rec)
+#define POW2(n) (1<<(n))
+#define NCELLS(hr) (2*POW2((hr)->lgsz))
+#define ROUNDUPOFF(n,m) ((((n)+(m-1))/m)*m)-(n)
+#define ALIGN(p,sz) (((char*)p)+ROUNDUPOFF(((size_t)p)%sz,sz))
+#define ENTS(h) ((HashEnt*)ALIGN(&((HashRec*)h)[1],sizeof(naRef)))
+#define TAB(h) ((int*)&(ENTS(h)[1<<(h)->lgsz]))
+#define HBITS(hr,code) ((hr)->lgsz ? ((code)>>(32-(hr)->lgsz)) : 0)
+
+#define LROT(h,n) (((h)<<n)|((h)>>((8*sizeof(h))-n)))
+static unsigned int mix32(unsigned int h)
+{
+ h ^= 0x2e63823a; h += LROT(h, 15); h -= LROT(h, 9);
+ h += LROT(h, 4); h -= LROT(h, 1); h ^= LROT(h, 2);
return h;
}
-
-// Special, optimized version of naHash_get for the express purpose of
-// looking up symbols in the local variables hash (OP_LOCAL is by far
-// the most common opcode and deserves some special case
-// optimization). Elides all the typing checks that are normally
-// required, presumes that the key is a string and has had its
-// hashcode precomputed, checks only for object identity, and inlines
-// the column computation.
-int naHash_sym(struct naHash* hash, struct naStr* sym, naRef* out)
-{
- struct HashRec* h = hash->rec;
- if(h) {
- int col = (HASH_MAGIC * sym->hashcode) >> (32 - h->lgalloced);
- struct HashNode* hn = h->table[col];
- while(hn) {
- if(hn->key.ref.ptr.str == sym) {
- *out = hn->val;
- return 1;
- }
- hn = hn->next;
+static unsigned int hash32(const unsigned char* in, int len)
+{
+ unsigned int h = len, val = 0;
+ int i, count = 0;
+ for(i=0; i<len; i++) {
+ val = (val<<8) ^ in[i];
+ if(++count == 4) {
+ h = mix32(h ^ val);
+ val = count = 0;
}
}
- return 0;
+ return mix32(h ^ val);
}
-static struct HashNode* find(struct naHash* hash, naRef key)
+static unsigned int refhash(naRef key)
{
- struct HashRec* h = hash->rec;
- if(h) {
- struct HashNode* hn = h->table[hashcolumn(h, key)];
- while(hn) {
- if(EQUAL(key, hn->key))
- return hn;
- hn = hn->next;
- }
+ if(IS_STR(key)) {
+ struct naStr* s = PTR(key).str;
+ if(s->hashcode) return s->hashcode;
+ return s->hashcode = hash32((void*)naStr_data(key), naStr_len(key));
+ } else { /* must be a number */
+ union { double d; unsigned int u[2]; } n;
+ n.d = key.num == -0.0 ? 0.0 : key.num; /* remember negative zero! */
+ return mix32(mix32(n.u[0]) ^ n.u[1]);
}
- return 0;
}
-// Make a temporary string on the stack
-static void tmpStr(naRef* out, struct naStr* str, char* key)
+static int equal(naRef a, naRef b)
{
- str->len = 0;
- str->data = key;
- while(key[str->len]) str->len++;
- *out = naNil();
- out->ref.ptr.str = str;
+ if(IS_NUM(a)) return a.num == b.num;
+ if(PTR(a).obj == PTR(b).obj) return 1;
+ if(naStr_len(a) != naStr_len(b)) return 0;
+ return memcmp(naStr_data(a), naStr_data(b), naStr_len(a)) == 0;
}
-naRef naHash_cget(naRef hash, char* key)
+/* Returns the index of a cell that either contains a matching key, or
+ * is the empty slot to receive a new insertion. */
+static int findcell(struct HashRec *hr, naRef key, unsigned int hash)
{
- struct naStr str;
- naRef result, key2;
- tmpStr(&key2, &str, key);
- if(naHash_get(hash, key2, &result))
- return result;
- return naNil();
+ int i, mask = POW2(hr->lgsz+1)-1, step = (2*hash+1) & mask;
+ for(i=HBITS(hr,hash); TAB(hr)[i] != ENT_EMPTY; i=(i+step)&mask)
+ if(TAB(hr)[i] != ENT_DELETED && equal(key, ENTS(hr)[TAB(hr)[i]].key))
+ break;
+ return i;
}
-void naHash_cset(naRef hash, char* key, naRef val)
+static void hashset(HashRec* hr, naRef key, naRef val)
{
- struct naStr str;
- naRef key2;
- tmpStr(&key2, &str, key);
- naHash_tryset(hash, key2, val);
+ int ent, cell = findcell(hr, key, refhash(key));
+ if((ent = TAB(hr)[cell]) == ENT_EMPTY) {
+ ent = hr->next++;
+ if(ent >= NCELLS(hr)) return; /* race protection, don't overrun */
+ TAB(hr)[cell] = ent;
+ hr->size++;
+ ENTS(hr)[ent].key = key;
+ }
+ ENTS(hr)[ent].val = val;
}
-int naHash_get(naRef hash, naRef key, naRef* out)
+static int recsize(int lgsz)
{
- if(IS_HASH(hash)) {
- struct HashNode* n = find(hash.ref.ptr.hash, key);
- if(n) { *out = n->val; return 1; }
- }
- return 0;
+ HashRec hr;
+ hr.lgsz = lgsz;
+ return (int)((char*)&TAB(&hr)[POW2(lgsz+1)] - (char*)&hr) + sizeof(naRef);
}
-// Simpler version. Don't create a new node if the value isn't there
-int naHash_tryset(naRef hash, naRef key, naRef val)
+static HashRec* resize(struct naHash* hash)
{
- if(IS_HASH(hash)) {
- struct HashNode* n = find(hash.ref.ptr.hash, key);
- if(n) n->val = val;
- return n != 0;
+ HashRec *hr = hash->rec, *hr2;
+ int i, lgsz = 0;
+ if(hr) {
+ int oldsz = hr->size;
+ while(oldsz) { oldsz >>= 1; lgsz++; }
}
- return 0;
+ hr2 = naAlloc(recsize(lgsz));
+ hr2->size = hr2->next = 0;
+ hr2->lgsz = lgsz;
+ for(i=0; i<(2*(1<<lgsz)); i++)
+ TAB(hr2)[i] = ENT_EMPTY;
+ for(i=0; hr && i < POW2(hr->lgsz+1); i++)
+ if(TAB(hr)[i] >= 0)
+ hashset(hr2, ENTS(hr)[TAB(hr)[i]].key, ENTS(hr)[TAB(hr)[i]].val);
+ naGC_swapfree((void*)&hash->rec, hr2);
+ return hr2;
}
-// Special purpose optimization for use in function call setups. Sets
-// a value that is known *not* to be present in the hash table. As
-// for naHash_sym, the key must be a string with a precomputed hash
-// code.
-void naHash_newsym(struct naHash* hash, naRef* sym, naRef* val)
-{
- int col;
- struct HashRec* h = hash->rec;
- if(!h || h->size >= 1<<h->lgalloced)
- h = realloc(hash);
- col = (HASH_MAGIC * sym->ref.ptr.str->hashcode) >> (32 - h->lgalloced);
- INSERT(h, *sym, *val, col);
-}
+int naHash_size(naRef h) { return REC(h) ? REC(h)->size : 0; }
-// The cycle check is an integrity requirement for multithreading,
-// where raced inserts can potentially cause cycles. This ensures
-// that the "last" thread to hold a reference to an inserted node
-// breaks any cycles that might have happened (at the expense of
-// potentially dropping items out of the hash). Under normal
-// circumstances, chains will be very short and this will be fast.
-static void chkcycle(struct HashNode* node, int count)
+int naHash_get(naRef hash, naRef key, naRef* out)
{
- struct HashNode* hn = node;
- while(hn && (hn = hn->next) != 0)
- if(count-- <= 0) { node->next = 0; return; }
+ HashRec* hr = REC(hash);
+ if(hr) {
+ int ent, cell = findcell(hr, key, refhash(key));
+ if((ent = TAB(hr)[cell]) < 0) return 0;
+ *out = ENTS(hr)[ent].val;
+ return 1;
+ }
+ return 0;
}
void naHash_set(naRef hash, naRef key, naRef val)
{
- int col;
- struct HashRec* h;
- struct HashNode* n;
- if(!IS_HASH(hash)) return;
- if((n = find(hash.ref.ptr.hash, key))) { n->val = val; return; }
- h = hash.ref.ptr.hash->rec;
- while(!h || h->size >= 1<<h->lgalloced)
- h = realloc(hash.ref.ptr.hash);
- col = hashcolumn(h, key);
- INSERT(h, key, val, hashcolumn(h, key));
- chkcycle(h->table[col], h->size - h->dels);
+ HashRec* hr = REC(hash);
+ if(!hr || hr->next >= POW2(hr->lgsz))
+ hr = resize(PTR(hash).hash);
+ hashset(hr, key, val);
}
void naHash_delete(naRef hash, naRef key)
{
- struct HashRec* h = hash.ref.ptr.hash->rec;
- int col;
- struct HashNode *last=0, *hn;
- if(!IS_HASH(hash) || !h) return;
- col = hashcolumn(h, key);
- hn = h->table[col];
- while(hn) {
- if(EQUAL(hn->key, key)) {
- if(last == 0) h->table[col] = hn->next;
- else last->next = hn->next;
- h->dels++;
- return;
+ HashRec* hr = REC(hash);
+ if(hr) {
+ int cell = findcell(hr, key, refhash(key));
+ if(TAB(hr)[cell] >= 0) {
+ TAB(hr)[cell] = ENT_DELETED;
+ if(--hr->size < POW2(hr->lgsz-1))
+ resize(PTR(hash).hash);
}
- last = hn;
- hn = hn->next;
}
}
void naHash_keys(naRef dst, naRef hash)
{
int i;
- struct HashRec* h = hash.ref.ptr.hash->rec;
- if(!IS_HASH(hash) || !h) return;
- for(i=0; i<(1<<h->lgalloced); i++) {
- struct HashNode* hn = h->table[i];
- while(hn) {
- naVec_append(dst, hn->key);
- hn = hn->next;
+ HashRec* hr = REC(hash);
+ for(i=0; hr && i < NCELLS(hr); i++)
+ if(TAB(hr)[i] >= 0)
+ naVec_append(dst, ENTS(hr)[TAB(hr)[i]].key);
+}
+
+void naiGCMarkHash(naRef hash)
+{
+ int i;
+ HashRec* hr = REC(hash);
+ for(i=0; hr && i < NCELLS(hr); i++)
+ if(TAB(hr)[i] >= 0) {
+ naiGCMark(ENTS(hr)[TAB(hr)[i]].key);
+ naiGCMark(ENTS(hr)[TAB(hr)[i]].val);
}
- }
}
-int naHash_size(naRef hash)
+static void tmpStr(naRef* out, struct naStr* str, const char* key)
+{
+ str->type = T_STR;
+ str->hashcode = 0;
+ str->emblen = -1;
+ str->data.ref.ptr = (unsigned char*)key;
+ str->data.ref.len = strlen(key);
+ SETPTR(*out, str);
+}
+
+int naMember_cget(naContext c, naRef obj, const char* field, naRef* out)
+{
+ naRef key; struct naStr str;
+ tmpStr(&key, &str, field);
+ return naMember_get(c, obj, key, out);
+}
+
+naRef naHash_cget(naRef hash, char* key)
{
- struct HashRec* h = hash.ref.ptr.hash->rec;
- if(!IS_HASH(hash) || !h) return 0;
- return h->size - h->dels;
+ struct naStr str;
+ naRef result, key2;
+ tmpStr(&key2, &str, key);
+ return naHash_get(hash, key2, &result) ? result : naNil();
+}
+
+void naHash_cset(naRef hash, char* key, naRef val)
+{
+ naRef key2; struct naStr str;
+ tmpStr(&key2, &str, key);
+ naiHash_tryset(hash, key2, val);
}
-void naHash_gcclean(struct naHash* h)
+int naiHash_tryset(naRef hash, naRef key, naRef val)
+{
+ HashRec* hr = REC(hash);
+ if(hr) {
+ int ent, cell = findcell(hr, key, refhash(key));
+ if((ent = TAB(hr)[cell]) >= 0) { ENTS(hr)[ent].val = val; return 1; }
+ }
+ return 0;
+}
+
+void naiGCHashClean(struct naHash* h)
{
naFree(h->rec);
h->rec = 0;
}
+
+/* Optimized naHash_get for looking up local variables (OP_LOCAL is by
+ * far the most common opcode and deserves some special case
+ * optimization). Assumes that the key is an interned symbol
+ * (i.e. the hash code is precomputed, and we only need to test for
+ * pointer identity). */
+int naiHash_sym(struct naHash* hash, struct naStr* sym, naRef* out)
+{
+ HashRec* hr = hash->rec;
+ if(hr) {
+ int* tab = TAB(hr);
+ HashEnt* ents = ENTS(hr);
+ unsigned int hc = sym->hashcode;
+ int cell, mask = POW2(hr->lgsz+1) - 1, step = (2*hc+1) & mask;
+ for(cell=HBITS(hr,hc); tab[cell] != ENT_EMPTY; cell=(cell+step)&mask)
+ if(tab[cell]!=ENT_DELETED && sym==PTR(ents[tab[cell]].key).str) {
+ *out = ents[tab[cell]].val;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+/* As above, a special naHash_set for setting local variables.
+ * Assumes that the key is interned, and also that it isn't already
+ * present in the hash. */
+void naiHash_newsym(struct naHash* hash, naRef* sym, naRef* val)
+{
+ HashRec* hr = hash->rec;
+ int mask, step, cell, ent;
+ struct naStr *s = PTR(*sym).str;
+ if(!hr || hr->next >= POW2(hr->lgsz))
+ hr = resize(hash);
+ mask = POW2(hr->lgsz+1) - 1;
+ step = (2*s->hashcode+1) & mask;
+ cell = HBITS(hr, s->hashcode);
+ while(TAB(hr)[cell] != ENT_EMPTY)
+ cell = (cell + step) & mask;
+ ent = hr->next++;
+ if(ent >= NCELLS(hr)) return; /* race protection, don't overrun */
+ TAB(hr)[cell] = ent;
+ hr->size++;
+ ENTS(hr)[TAB(hr)[cell]].key = *sym;
+ ENTS(hr)[TAB(hr)[cell]].val = *val;
+}
+