#include "nasal.h"
#include "data.h"
-static void realloc(naRef hash)
-{
- struct naHash* h = hash.ref.ptr.hash;
- int i, sz, oldsz = h->size;
- int oldcols = h->table ? 1 << h->lgalloced : 0;
-
- // Keep a handle to our original objects
- struct HashNode* oldnodes = h->nodes;
- struct HashNode** oldtable = h->table;
-
- // Figure out how big we need to be (start with a minimum size of
- // 16 entries)
- for(i=3; 1<<i < oldsz; i++);
- h->lgalloced = i+1;
-
- // Allocate new ones (note that all the records are allocated in a
- // single chunk, to avoid zillions of tiny node allocations)
- sz = 1<<h->lgalloced;
- h->nodes = naAlloc(sz * (sizeof(struct HashNode) + sizeof(void*)));
- h->table = (struct HashNode**)(((char*)h->nodes) + sz*sizeof(struct HashNode));
- naBZero(h->table, sz * sizeof(void*));
- h->nextnode = 0;
- h->size = 0;
-
- // Re-insert everything from scratch
- for(i=0; i<oldcols; i++) {
- struct HashNode* hn = oldtable[i];
- while(hn) {
- naHash_set(hash, hn->key, hn->val);
- hn = hn->next;
- }
- }
+#define MIN_HASH_SIZE 4
- // Free the old memory
- naFree(oldnodes);
-}
+#define EQUAL(a, b) (IDENTICAL(a, b) || naEqual(a, b))
+
+#define HASH_MAGIC 2654435769u
+
+#define INSERT(hh, hkey, hval, hcol) do { \
+ unsigned int cc = (hcol), iidx=(hh)->size++; \
+ if(iidx < (1<<(hh)->lgalloced)) { \
+ struct HashNode* hnn = &(hh)->nodes[iidx]; \
+ hnn->key = (hkey); hnn->val = (hval); \
+ hnn->next = (hh)->table[cc]; \
+ (hh)->table[cc] = hnn; \
+ }} while(0)
// Computes a hash code for a given scalar
static unsigned int hashcode(naRef r)
// 2*sizeof(int).
unsigned int* p = (unsigned int*)&(r.num);
return p[0] ^ p[1];
+ } else if(PTR(r).str->hashcode) {
+ return PTR(r).str->hashcode;
} else {
// This is Daniel Bernstein's djb2 hash function that I found
// on the web somewhere. It appears to work pretty well.
unsigned int i, hash = 5831;
- for(i=0; i<r.ref.ptr.str->len; i++)
- hash = (hash * 33) ^ r.ref.ptr.str->data[i];
+ for(i=0; i<PTR(r).str->len; i++)
+ hash = (hash * 33) ^ PTR(r).str->data[i];
+ PTR(r).str->hashcode = hash;
return hash;
}
}
// Which column in a given hash does the key correspond to.
-static unsigned int hashcolumn(struct naHash* h, naRef key)
+static unsigned int hashcolumn(struct HashRec* h, naRef key)
{
// Multiply by a big number, and take the top N bits. Note
// assumption that sizeof(unsigned int) == 4.
- return (2654435769u * hashcode(key)) >> (32 - h->lgalloced);
+ return (HASH_MAGIC * hashcode(key)) >> (32 - h->lgalloced);
}
-struct HashNode* find(struct naHash* h, naRef key)
+static struct HashRec* resize(struct naHash* hash)
{
- struct HashNode* hn;
- if(h->table == 0)
- return 0;
- hn = h->table[hashcolumn(h, key)];
- while(hn) {
- if(naEqual(key, hn->key))
- return hn;
- hn = hn->next;
+ struct HashRec *h, *h0 = hash->rec;
+ int lga, cols, need = h0 ? h0->size - h0->dels : MIN_HASH_SIZE;
+
+ if(need < MIN_HASH_SIZE) need = MIN_HASH_SIZE;
+ for(lga=0; 1<<lga <= need; lga++);
+ cols = 1<<lga;
+ h = naAlloc(sizeof(struct HashRec) +
+ cols * (sizeof(struct HashNode*) + sizeof(struct HashNode)));
+ naBZero(h, sizeof(struct HashRec) + cols * sizeof(struct HashNode*));
+
+ h->lgalloced = lga;
+ h->nodes = (struct HashNode*)(((char*)h)
+ + sizeof(struct HashRec)
+ + cols * sizeof(struct HashNode*));
+ for(lga=0; h0 != 0 && lga<(1<<h0->lgalloced); lga++) {
+ struct HashNode* hn = h0->table[lga];
+ while(hn) {
+ INSERT(h, hn->key, hn->val, hashcolumn(h, hn->key));
+ hn = hn->next;
+ }
+ }
+ naGC_swapfree((void**)&hash->rec, h);
+ return h;
+}
+
+// Special, optimized version of naHash_get for the express purpose of
+// looking up symbols in the local variables hash (OP_LOCAL is by far
+// the most common opcode and deserves some special case
+// optimization). Elides all the typing checks that are normally
+// required, presumes that the key is a string and has had its
+// hashcode precomputed, checks only for object identity, and inlines
+// the column computation.
+int naHash_sym(struct naHash* hash, struct naStr* sym, naRef* out)
+{
+ struct HashRec* h = hash->rec;
+ if(h) {
+ int col = (HASH_MAGIC * sym->hashcode) >> (32 - h->lgalloced);
+ struct HashNode* hn = h->table[col];
+ while(hn) {
+ if(PTR(hn->key).str == sym) {
+ *out = hn->val;
+ return 1;
+ }
+ hn = hn->next;
+ }
}
return 0;
}
-void naHash_init(naRef hash)
+static struct HashNode* find(struct naHash* hash, naRef key)
{
- struct naHash* h = hash.ref.ptr.hash;
- h->size = 0;
- h->lgalloced = 0;
- h->table = 0;
- h->nodes = 0;
+ struct HashRec* h = hash->rec;
+ struct HashNode* hn;
+ if(!h) return 0;
+ for(hn = h->table[hashcolumn(h, key)]; hn; hn = hn->next)
+ if(EQUAL(key, hn->key))
+ return hn;
+ return 0;
}
// Make a temporary string on the stack
-static naRef tmpStr(struct naStr* str, char* key)
+static void tmpStr(naRef* out, struct naStr* str, const char* key)
{
- char* p = key;
- while(*p) { p++; }
- str->len = p - key;
- str->data = key;
- return naObj(T_STR, (struct naObj*)str);
+ str->len = 0;
+ str->type = T_STR;
+ str->data = (unsigned char*)key;
+ str->hashcode = 0;
+ while(key[str->len]) str->len++;
+ *out = naNil();
+ SETPTR(*out, str);
+}
+
+int naMember_cget(naRef obj, const char* field, naRef* out)
+{
+ naRef key;
+ struct naStr str;
+ tmpStr(&key, &str, field);
+ return naMember_get(obj, key, out);
}
naRef naHash_cget(naRef hash, char* key)
{
struct naStr str;
- naRef result, key2 = tmpStr(&str, key);
+ naRef result, key2;
+ tmpStr(&key2, &str, key);
if(naHash_get(hash, key2, &result))
return result;
return naNil();
void naHash_cset(naRef hash, char* key, naRef val)
{
struct naStr str;
- naRef key2 = tmpStr(&str, key);
+ naRef key2;
+ tmpStr(&key2, &str, key);
naHash_tryset(hash, key2, val);
}
int naHash_get(naRef hash, naRef key, naRef* out)
{
- struct naHash* h = hash.ref.ptr.hash;
- struct HashNode* n;
- if(!IS_HASH(hash)) return 0;
- n = find(h, key);
- if(n) {
- *out = n->val;
- return 1;
- } else {
- *out = naNil();
- return 0;
+ if(IS_HASH(hash)) {
+ struct HashNode* n = find(PTR(hash).hash, key);
+ if(n) { *out = n->val; return 1; }
}
+ return 0;
}
// Simpler version. Don't create a new node if the value isn't there
int naHash_tryset(naRef hash, naRef key, naRef val)
{
- struct HashNode* n;
- if(!IS_HASH(hash)) return 0;
- n = find(hash.ref.ptr.hash, key);
- if(n) n->val = val;
- return n != 0;
+ if(IS_HASH(hash)) {
+ struct HashNode* n = find(PTR(hash).hash, key);
+ if(n) n->val = val;
+ return n != 0;
+ }
+ return 0;
+}
+
+// Special purpose optimization for use in function call setups. Sets
+// a value that is known *not* to be present in the hash table. As
+// for naHash_sym, the key must be a string with a precomputed hash
+// code.
+void naHash_newsym(struct naHash* hash, naRef* sym, naRef* val)
+{
+ int col;
+ struct HashRec* h = hash->rec;
+ while(!h || h->size >= 1<<h->lgalloced)
+ h = resize(hash);
+ col = (HASH_MAGIC * PTR(*sym).str->hashcode) >> (32 - h->lgalloced);
+ INSERT(h, *sym, *val, col);
+}
+
+// The cycle check is an integrity requirement for multithreading,
+// where raced inserts can potentially cause cycles. This ensures
+// that the "last" thread to hold a reference to an inserted node
+// breaks any cycles that might have happened (at the expense of
+// potentially dropping items out of the hash). Under normal
+// circumstances, chains will be very short and this will be fast.
+static void chkcycle(struct HashNode* node, int count)
+{
+ struct HashNode* hn = node;
+ while(hn && (hn = hn->next) != 0)
+ if(count-- <= 0) { node->next = 0; return; }
}
void naHash_set(naRef hash, naRef key, naRef val)
{
- struct naHash* h = hash.ref.ptr.hash;
- unsigned int col;
+ int col;
+ struct HashRec* h;
struct HashNode* n;
-
if(!IS_HASH(hash)) return;
-
- n = find(h, key);
- if(n) {
- n->val = val;
- return;
- }
-
- if(h->size+1 >= 1<<h->lgalloced)
- realloc(hash);
-
+ if((n = find(PTR(hash).hash, key))) { n->val = val; return; }
+ h = PTR(hash).hash->rec;
+ while(!h || h->size >= 1<<h->lgalloced)
+ h = resize(PTR(hash).hash);
col = hashcolumn(h, key);
- n = h->nodes + h->nextnode++;
- n->key = key;
- n->val = val;
- n->next = h->table[col];
- h->table[col] = n;
- h->size++;
-}
-
-// FIXME: this implementation does a realloc() after each delete, and
-// is therefore needlessly O(N). (The reason is that this avoids the
-// need to keep a free list around for the much more common case of
-// adding a new value. Modifying an existing value is O(1), of
-// course.)
+ INSERT(h, key, val, hashcolumn(h, key));
+ chkcycle(h->table[col], h->size - h->dels);
+}
+
void naHash_delete(naRef hash, naRef key)
{
- struct naHash* h = hash.ref.ptr.hash;
+ struct HashRec* h = PTR(hash).hash->rec;
int col;
struct HashNode *last=0, *hn;
- if(!IS_HASH(hash)) return;
+ if(!IS_HASH(hash) || !h) return;
col = hashcolumn(h, key);
hn = h->table[col];
while(hn) {
- if(naEqual(hn->key, key)) {
+ if(EQUAL(hn->key, key)) {
if(last == 0) h->table[col] = hn->next;
else last->next = hn->next;
- h->size--;
- realloc(hash);
+ h->dels++;
return;
}
last = hn;
void naHash_keys(naRef dst, naRef hash)
{
- struct naHash* h = hash.ref.ptr.hash;
int i;
- if(!IS_HASH(hash) || !h->table) return;
+ struct HashRec* h = PTR(hash).hash->rec;
+ if(!IS_HASH(hash) || !h) return;
for(i=0; i<(1<<h->lgalloced); i++) {
struct HashNode* hn = h->table[i];
while(hn) {
}
}
-int naHash_size(naRef h)
+int naHash_size(naRef hash)
{
- if(!IS_HASH(h)) return 0;
- return h.ref.ptr.hash->size;
+ struct HashRec* h = PTR(hash).hash->rec;
+ if(!IS_HASH(hash) || !h) return 0;
+ return h->size - h->dels;
}
void naHash_gcclean(struct naHash* h)
{
- naFree(h->nodes);
- h->nodes = 0;
- h->size = 0;
- h->lgalloced = 0;
- h->table = 0;
- h->nextnode = 0;
+ naFree(h->rec);
+ h->rec = 0;
}