+#include <string.h>
#include "nasal.h"
#include "data.h"
-static void realloc(naRef hash)
-{
- struct naHash* h = hash.ref.ptr.hash;
- int i, sz, oldsz = h->size;
- int oldcols = h->table ? 1 << h->lgalloced : 0;
-
- // Keep a handle to our original objects
- struct HashNode* oldnodes = h->nodes;
- struct HashNode** oldtable = h->table;
-
- // Figure out how big we need to be (start with a minimum size of
- // 16 entries)
- for(i=3; 1<<i < oldsz; i++);
- h->lgalloced = i+1;
-
- // Allocate new ones (note that all the records are allocated in a
- // single chunk, to avoid zillions of tiny node allocations)
- sz = 1<<h->lgalloced;
- h->nodes = naAlloc(sz * (sizeof(struct HashNode) + sizeof(void*)));
- h->table = (struct HashNode**)(((char*)h->nodes) + sz*sizeof(struct HashNode));
- naBZero(h->table, sz * sizeof(void*));
- h->nextnode = 0;
- h->size = 0;
-
- // Re-insert everything from scratch
- for(i=0; i<oldcols; i++) {
- struct HashNode* hn = oldtable[i];
- while(hn) {
- naHash_set(hash, hn->key, hn->val);
- hn = hn->next;
+/* A HashRec lives in a single allocated block. The layout is the
+ * header struct, then a table of 2^lgsz hash entries (key/value
+ * pairs), then an index table of 2*2^lgsz integers storing index
+ * values into the entry table. There are two tokens needed for
+ * "unused" and "used but empty". */
+
+#define ENT_EMPTY -1
+#define ENT_DELETED -2
+
+typedef struct { naRef key, val; } HashEnt;
+
+typedef struct HashRec {
+ int size; /* number of active entries */
+ int lgsz; /* base-2 logarithm of the allocated (!) size */
+ int next; /* next entry to use */
+} HashRec;
+
+#define REC(h) (PTR(h).hash->rec)
+#define POW2(n) (1<<(n))
+#define NCELLS(hr) (2*POW2((hr)->lgsz))
+#define ROUNDUPOFF(n,m) ((((n)+(m-1))/m)*m)-(n)
+#define ALIGN(p,sz) (((char*)p)+ROUNDUPOFF(((size_t)p)%sz,sz))
+#define ENTS(h) ((HashEnt*)ALIGN(&((HashRec*)h)[1],sizeof(naRef)))
+#define TAB(h) ((int*)&(ENTS(h)[1<<(h)->lgsz]))
+#define HBITS(hr,code) ((hr)->lgsz ? ((code)>>(32-(hr)->lgsz)) : 0)
+
+#define LROT(h,n) (((h)<<n)|((h)>>((8*sizeof(h))-n)))
+static unsigned int mix32(unsigned int h)
+{
+ h ^= 0x2e63823a; h += LROT(h, 15); h -= LROT(h, 9);
+ h += LROT(h, 4); h -= LROT(h, 1); h ^= LROT(h, 2);
+ return h;
+}
+static unsigned int hash32(const unsigned char* in, int len)
+{
+ unsigned int h = len, val = 0;
+ int i, count = 0;
+ for(i=0; i<len; i++) {
+ val = (val<<8) ^ in[i];
+ if(++count == 4) {
+ h = mix32(h ^ val);
+ val = count = 0;
}
}
+ return mix32(h ^ val);
+}
- // Free the old memory
- naFree(oldnodes);
-}
-
-// Computes a hash code for a given scalar
-static unsigned int hashcode(naRef r)
-{
- if(IS_NUM(r))
- {
- // Numbers get the number as a hash. Just use the bits and
- // xor them together. Note assumption that sizeof(double) >=
- // 2*sizeof(int).
- unsigned int* p = (unsigned int*)&(r.num);
- return p[0] ^ p[1];
- } else {
- // This is Daniel Bernstein's djb2 hash function that I found
- // on the web somewhere. It appears to work pretty well.
- unsigned int i, hash = 5831;
- for(i=0; i<r.ref.ptr.str->len; i++)
- hash = (hash * 33) ^ r.ref.ptr.str->data[i];
- return hash;
+static unsigned int refhash(naRef key)
+{
+ if(IS_STR(key)) {
+ struct naStr* s = PTR(key).str;
+ if(s->hashcode) return s->hashcode;
+ return s->hashcode = hash32((void*)naStr_data(key), naStr_len(key));
+ } else { /* must be a number */
+ union { double d; unsigned int u[2]; } n;
+ n.d = key.num == -0.0 ? 0.0 : key.num; /* remember negative zero! */
+ return mix32(mix32(n.u[0]) ^ n.u[1]);
}
}
-// Which column in a given hash does the key correspond to.
-static unsigned int hashcolumn(struct naHash* h, naRef key)
+static int equal(naRef a, naRef b)
{
- // Multiply by a big number, and take the top N bits. Note
- // assumption that sizeof(unsigned int) == 4.
- return (2654435769u * hashcode(key)) >> (32 - h->lgalloced);
+ if(IS_NUM(a)) return a.num == b.num;
+ if(PTR(a).obj == PTR(b).obj) return 1;
+ if(naStr_len(a) != naStr_len(b)) return 0;
+ return memcmp(naStr_data(a), naStr_data(b), naStr_len(a)) == 0;
}
-struct HashNode* find(struct naHash* h, naRef key)
+/* Returns the index of a cell that either contains a matching key, or
+ * is the empty slot to receive a new insertion. */
+static int findcell(struct HashRec *hr, naRef key, unsigned int hash)
{
- struct HashNode* hn;
- if(h->table == 0)
- return 0;
- hn = h->table[hashcolumn(h, key)];
- while(hn) {
- if(naEqual(key, hn->key))
- return hn;
- hn = hn->next;
+ int i, mask = POW2(hr->lgsz+1)-1, step = (2*hash+1) & mask;
+ for(i=HBITS(hr,hash); TAB(hr)[i] != ENT_EMPTY; i=(i+step)&mask)
+ if(TAB(hr)[i] != ENT_DELETED && equal(key, ENTS(hr)[TAB(hr)[i]].key))
+ break;
+ return i;
+}
+
+static void hashset(HashRec* hr, naRef key, naRef val)
+{
+ int ent, cell = findcell(hr, key, refhash(key));
+ if((ent = TAB(hr)[cell]) == ENT_EMPTY) {
+ ent = hr->next++;
+ if(ent >= NCELLS(hr)) return; /* race protection, don't overrun */
+ TAB(hr)[cell] = ent;
+ hr->size++;
+ ENTS(hr)[ent].key = key;
}
- return 0;
+ ENTS(hr)[ent].val = val;
}
-void naHash_init(naRef hash)
+static int recsize(int lgsz)
{
- struct naHash* h = hash.ref.ptr.hash;
- h->size = 0;
- h->lgalloced = 0;
- h->table = 0;
- h->nodes = 0;
+ HashRec hr;
+ hr.lgsz = lgsz;
+ return (int)((char*)&TAB(&hr)[POW2(lgsz+1)] - (char*)&hr) + sizeof(naRef);
}
-// Make a temporary string on the stack
-static naRef tmpStr(struct naStr* str, char* key)
+static HashRec* resize(struct naHash* hash)
{
- char* p = key;
- while(*p) { p++; }
- str->len = p - key;
- str->data = key;
- return naObj(T_STR, (struct naObj*)str);
+ HashRec *hr = hash->rec, *hr2;
+ int i, lgsz = 0;
+ if(hr) {
+ int oldsz = hr->size;
+ while(oldsz) { oldsz >>= 1; lgsz++; }
+ }
+ hr2 = naAlloc(recsize(lgsz));
+ hr2->size = hr2->next = 0;
+ hr2->lgsz = lgsz;
+ for(i=0; i<(2*(1<<lgsz)); i++)
+ TAB(hr2)[i] = ENT_EMPTY;
+ for(i=0; hr && i < POW2(hr->lgsz+1); i++)
+ if(TAB(hr)[i] >= 0)
+ hashset(hr2, ENTS(hr)[TAB(hr)[i]].key, ENTS(hr)[TAB(hr)[i]].val);
+ naGC_swapfree((void*)&hash->rec, hr2);
+ return hr2;
}
-naRef naHash_cget(naRef hash, char* key)
+int naHash_size(naRef h) { return REC(h) ? REC(h)->size : 0; }
+
+int naHash_get(naRef hash, naRef key, naRef* out)
{
- struct naStr str;
- naRef result, key2 = tmpStr(&str, key);
- if(naHash_get(hash, key2, &result))
- return result;
- return naNil();
+ HashRec* hr = REC(hash);
+ if(hr) {
+ int ent, cell = findcell(hr, key, refhash(key));
+ if((ent = TAB(hr)[cell]) < 0) return 0;
+ *out = ENTS(hr)[ent].val;
+ return 1;
+ }
+ return 0;
}
-void naHash_cset(naRef hash, char* key, naRef val)
+void naHash_set(naRef hash, naRef key, naRef val)
{
- struct naStr str;
- naRef key2 = tmpStr(&str, key);
- naHash_tryset(hash, key2, val);
+ HashRec* hr = REC(hash);
+ if(!hr || hr->next >= POW2(hr->lgsz))
+ hr = resize(PTR(hash).hash);
+ hashset(hr, key, val);
}
-int naHash_get(naRef hash, naRef key, naRef* out)
+void naHash_delete(naRef hash, naRef key)
{
- struct naHash* h = hash.ref.ptr.hash;
- struct HashNode* n;
- if(!IS_HASH(hash)) return 0;
- n = find(h, key);
- if(n) {
- *out = n->val;
- return 1;
- } else {
- *out = naNil();
- return 0;
+ HashRec* hr = REC(hash);
+ if(hr) {
+ int cell = findcell(hr, key, refhash(key));
+ if(TAB(hr)[cell] >= 0) {
+ TAB(hr)[cell] = ENT_DELETED;
+ if(--hr->size < POW2(hr->lgsz-1))
+ resize(PTR(hash).hash);
+ }
}
}
-// Simpler version. Don't create a new node if the value isn't there
-int naHash_tryset(naRef hash, naRef key, naRef val)
+void naHash_keys(naRef dst, naRef hash)
{
- struct HashNode* n;
- if(!IS_HASH(hash)) return 0;
- n = find(hash.ref.ptr.hash, key);
- if(n) n->val = val;
- return n != 0;
+ int i;
+ HashRec* hr = REC(hash);
+ for(i=0; hr && i < NCELLS(hr); i++)
+ if(TAB(hr)[i] >= 0)
+ naVec_append(dst, ENTS(hr)[TAB(hr)[i]].key);
}
-void naHash_set(naRef hash, naRef key, naRef val)
+void naiGCMarkHash(naRef hash)
{
- struct naHash* h = hash.ref.ptr.hash;
- unsigned int col;
- struct HashNode* n;
+ int i;
+ HashRec* hr = REC(hash);
+ for(i=0; hr && i < NCELLS(hr); i++)
+ if(TAB(hr)[i] >= 0) {
+ naiGCMark(ENTS(hr)[TAB(hr)[i]].key);
+ naiGCMark(ENTS(hr)[TAB(hr)[i]].val);
+ }
+}
- if(!IS_HASH(hash)) return;
+static void tmpStr(naRef* out, struct naStr* str, const char* key)
+{
+ str->type = T_STR;
+ str->hashcode = str->emblen = 0;
+ str->data.ref.ptr = (unsigned char*)key;
+ str->data.ref.len = strlen(key);
+ SETPTR(*out, str);
+}
- n = find(h, key);
- if(n) {
- n->val = val;
- return;
- }
+int naMember_cget(naRef obj, const char* field, naRef* out)
+{
+ naRef key; struct naStr str;
+ tmpStr(&key, &str, field);
+ return naMember_get(obj, key, out);
+}
- if(h->size+1 >= 1<<h->lgalloced)
- realloc(hash);
+naRef naHash_cget(naRef hash, char* key)
+{
+ struct naStr str;
+ naRef result, key2;
+ tmpStr(&key2, &str, key);
+ return naHash_get(hash, key2, &result) ? result : naNil();
+}
- col = hashcolumn(h, key);
- n = h->nodes + h->nextnode++;
- n->key = key;
- n->val = val;
- n->next = h->table[col];
- h->table[col] = n;
- h->size++;
+void naHash_cset(naRef hash, char* key, naRef val)
+{
+ naRef key2; struct naStr str;
+ tmpStr(&key2, &str, key);
+ naiHash_tryset(hash, key2, val);
}
-// FIXME: this implementation does a realloc() after each delete, and
-// is therefore needlessly O(N). (The reason is that this avoids the
-// need to keep a free list around for the much more common case of
-// adding a new value. Modifying an existing value is O(1), of
-// course.)
-void naHash_delete(naRef hash, naRef key)
+int naiHash_tryset(naRef hash, naRef key, naRef val)
{
- struct naHash* h = hash.ref.ptr.hash;
- int col;
- struct HashNode *last=0, *hn;
- if(!IS_HASH(hash)) return;
- col = hashcolumn(h, key);
- hn = h->table[col];
- while(hn) {
- if(naEqual(hn->key, key)) {
- if(last == 0) h->table[col] = hn->next;
- else last->next = hn->next;
- h->size--;
- realloc(hash);
- return;
- }
- last = hn;
- hn = hn->next;
+ HashRec* hr = REC(hash);
+ if(hr) {
+ int ent, cell = findcell(hr, key, refhash(key));
+ if((ent = TAB(hr)[cell]) >= 0) { ENTS(hr)[ent].val = val; return 1; }
}
+ return 0;
}
-void naHash_keys(naRef dst, naRef hash)
+void naiGCHashClean(struct naHash* h)
{
- struct naHash* h = hash.ref.ptr.hash;
- int i;
- if(!IS_HASH(hash) || !h->table) return;
- for(i=0; i<(1<<h->lgalloced); i++) {
- struct HashNode* hn = h->table[i];
- while(hn) {
- naVec_append(dst, hn->key);
- hn = hn->next;
- }
- }
+ naFree(h->rec);
+ h->rec = 0;
}
-int naHash_size(naRef h)
+/* Optimized naHash_get for looking up local variables (OP_LOCAL is by
+ * far the most common opcode and deserves some special case
+ * optimization). Assumes that the key is an interned symbol
+ * (i.e. the hash code is precomputed, and we only need to test for
+ * pointer identity). */
+int naiHash_sym(struct naHash* hash, struct naStr* sym, naRef* out)
{
- if(!IS_HASH(h)) return 0;
- return h.ref.ptr.hash->size;
+ HashRec* hr = hash->rec;
+ if(hr) {
+ int* tab = TAB(hr);
+ HashEnt* ents = ENTS(hr);
+ unsigned int hc = sym->hashcode;
+ int cell, mask = POW2(hr->lgsz+1) - 1, step = (2*hc+1) & mask;
+ for(cell=HBITS(hr,hc); tab[cell] != ENT_EMPTY; cell=(cell+step)&mask)
+ if(tab[cell]!=ENT_DELETED && sym==PTR(ents[tab[cell]].key).str) {
+ *out = ents[tab[cell]].val;
+ return 1;
+ }
+ }
+ return 0;
}
-void naHash_gcclean(struct naHash* h)
+
+/* As above, a special naHash_set for setting local variables.
+ * Assumes that the key is interned, and also that it isn't already
+ * present in the hash. */
+void naiHash_newsym(struct naHash* hash, naRef* sym, naRef* val)
{
- naFree(h->nodes);
- h->nodes = 0;
- h->size = 0;
- h->lgalloced = 0;
- h->table = 0;
- h->nextnode = 0;
+ HashRec* hr = hash->rec;
+ int mask, step, cell, ent;
+ struct naStr *s = PTR(*sym).str;
+ if(!hr || hr->next >= POW2(hr->lgsz))
+ hr = resize(hash);
+ mask = POW2(hr->lgsz+1) - 1;
+ step = (2*s->hashcode+1) & mask;
+ cell = HBITS(hr, s->hashcode);
+ while(TAB(hr)[cell] != ENT_EMPTY)
+ cell = (cell + step) & mask;
+ ent = hr->next++;
+ if(ent >= NCELLS(hr)) return; /* race protection, don't overrun */
+ TAB(hr)[cell] = ent;
+ hr->size++;
+ ENTS(hr)[TAB(hr)[cell]].key = *sym;
+ ENTS(hr)[TAB(hr)[cell]].val = *val;
}
+