commit 458b9ee284a08361b9b07c9f8dc751cac1ed95e0
parent e080606786e9e9851804c35ffdb664cf97e1204c
Author: Andres Navarro <canavarro82@gmail.com>
Date: Wed, 16 Nov 2011 23:30:13 -0300
Added immutable bytevector interning in the string table.
Diffstat:
7 files changed, 82 insertions(+), 35 deletions(-)
diff --git a/src/Makefile b/src/Makefile
@@ -117,7 +117,7 @@ kapplicative.o: kapplicative.c kobject.h klimits.h klisp.h klispconf.h \
kauxlib.o: kauxlib.c klisp.h kobject.h klimits.h klispconf.h kstate.h \
ktoken.h kmem.h
kbytevector.o: kbytevector.c kbytevector.h kobject.h klimits.h klisp.h \
-klispconf.h kstate.h ktoken.h kmem.h kgc.h
+klispconf.h kstate.h ktoken.h kmem.h kgc.h kstring.h
kcontinuation.o: kcontinuation.c kcontinuation.h kobject.h klimits.h \
klisp.h klispconf.h kstate.h ktoken.h kmem.h kgc.h
kencapsulation.o: kencapsulation.c kobject.h klimits.h klisp.h \
@@ -273,7 +273,7 @@ kstate.o: kstate.c klisp.h kobject.h klimits.h klispconf.h kstate.h \
kstring.o: kstring.c kstring.h kobject.h klimits.h klisp.h klispconf.h \
kstate.h ktoken.h kmem.h kgc.h
ksymbol.o: ksymbol.c ksymbol.h kobject.h klimits.h klisp.h klispconf.h \
- kstate.h ktoken.h kmem.h kstring.h kgc.h
+ kstate.h ktoken.h kmem.h kstring.h kgc.h kstring.h
ktable.o: ktable.c klisp.h kobject.h klimits.h klispconf.h kgc.h kstate.h \
ktoken.h kmem.h ktable.h kapplicative.h koperative.h kgeqp.h kinteger.h \
imath.h krational.h imrat.h kghelpers.h kerror.h kpair.h kcontinuation.h \
diff --git a/src/kbytevector.c b/src/kbytevector.c
@@ -11,6 +11,8 @@
#include "kstate.h"
#include "kmem.h"
#include "kgc.h"
+/* for immutable table */
+#include "kstring.h"
/* Constructors */
@@ -29,14 +31,35 @@ TValue kbytevector_new_bs_g(klisp_State *K, bool m, const uint8_t *buf,
/* main constructor for immutable bytevectors */
TValue kbytevector_new_bs_imm(klisp_State *K, const uint8_t *buf, uint32_t size)
{
- /* Does it make sense to put them in the string table
- (i.e. interning them)?, we have two different constructors just in case */
-
- /* XXX: find a better way to do this! */
- if (size == 0 && ttisbytevector(K->empty_bytevector)) {
- return K->empty_bytevector;
- }
-
+ /* first check to see if it's in the stringtable */
+ GCObject *o;
+ uint32_t h = size; /* seed */
+ size_t step = (size>>5)+1; /* if bytevector is too long, don't hash all
+ its bytes */
+ size_t size1;
+ for (size1 = size; size1 >= step; size1 -= step) /* compute hash */
+ h = h ^ ((h<<5)+(h>>2)+ buf[size1-1]);
+
+ for (o = K->strt.hash[lmod(h, K->strt.size)];
+ o != NULL; o = o->gch.next) {
+ Bytevector *tb = NULL;
+ if (o->gch.tt == K_TBYTEVECTOR) {
+ tb = (Bytevector *) o;
+ } else if (o->gch.tt == K_TSYMBOL || o->gch.tt == K_TSTRING) {
+ continue;
+ } else {
+ /* only symbols, immutable bytevectors and immutable strings */
+ klisp_assert(0);
+ }
+ if (tb->size == size && (memcmp(buf, tb->b, size) == 0)) {
+ /* bytevector may be dead */
+ if (isdead(K, o)) changewhite(o);
+ return gc2bytevector(o);
+ }
+ }
+
+ /* If it exits the loop, it means it wasn't found, hash is still in h */
+ /* REFACTOR: move all of these to a new function */
Bytevector *new_bb;
if (size > (SIZE_MAX - sizeof(Bytevector)))
@@ -45,9 +68,15 @@ TValue kbytevector_new_bs_imm(klisp_State *K, const uint8_t *buf, uint32_t size)
new_bb = (Bytevector *) klispM_malloc(K, sizeof(Bytevector) + size);
/* header + gc_fields */
- klispC_link(K, (GCObject *) new_bb, K_TBYTEVECTOR, K_FLAG_IMMUTABLE);
+ /* can't use klispC_link, because strings use the next pointer
+ differently */
+ new_bb->gct = klispC_white(K);
+ new_bb->tt = K_TBYTEVECTOR;
+ new_bb->kflags = K_FLAG_IMMUTABLE;
+ new_bb->si = NULL;
/* bytevector specific fields */
+ new_bb->hash = h;
new_bb->mark = KFALSE;
new_bb->size = size;
@@ -55,7 +84,21 @@ TValue kbytevector_new_bs_imm(klisp_State *K, const uint8_t *buf, uint32_t size)
memcpy(new_bb->b, buf, size);
}
- return gc2bytevector(new_bb);
+ /* add to the string/symbol table (and link it) */
+ stringtable *tb;
+ tb = &K->strt;
+ h = lmod(h, tb->size);
+ new_bb->next = tb->hash[h]; /* chain new entry */
+ tb->hash[h] = (GCObject *)(new_bb);
+ tb->nuse++;
+ TValue ret_tv = gc2bytevector(new_bb);
+ if (tb->nuse > ((uint32_t) tb->size) && tb->size <= INT32_MAX / 2) {
+ krooted_tvs_push(K, ret_tv); /* save in case of gc */
+ klispS_resize(K, tb->size*2); /* too crowded */
+ krooted_tvs_pop(K);
+ }
+
+ return ret_tv;
}
/*
diff --git a/src/kgc.c b/src/kgc.c
@@ -261,7 +261,7 @@ static int32_t propagatemark (klisp_State *K) {
case K_TSTRING: {
String *s = cast(String *, o);
markvalue(K, s->mark);
- return sizeof(String) + s->size * sizeof(char);
+ return sizeof(String) + (s->size + 1 * sizeof(char));
}
case K_TENVIRONMENT: {
Environment *e = cast(Environment *, o);
@@ -324,7 +324,7 @@ static int32_t propagatemark (klisp_State *K) {
case K_TBYTEVECTOR: {
Bytevector *b = cast(Bytevector *, o);
markvalue(K, b->mark);
- return sizeof(String) + b->size * sizeof(uint8_t);
+ return sizeof(Bytevector) + b->size * sizeof(uint8_t);
}
default:
fprintf(stderr, "Unknown GCObject type (in GC propagate): %d\n",
@@ -347,6 +347,8 @@ static size_t propagateall (klisp_State *K) {
** other objects: if really collected, cannot keep them; for userdata
** being finalized, keep them in keys, but not in values
*/
+/* XXX what the hell is this, I should reread this part of the lua
+ source Andres Navarro */
static int32_t iscleared (TValue o, int iskey) {
if (!iscollectable(o)) return 0;
#if 0 /* klisp: strings may be mutable... */
@@ -457,6 +459,9 @@ static void freeobj (klisp_State *K, GCObject *o) {
klispE_free(K, (Error *)o);
break;
case K_TBYTEVECTOR:
+ /* immutable bytevectors are in the string/symbol table */
+ if (kbytevector_immutablep(gc2str(o)))
+ K->strt.nuse--;
klispM_freemem(K, o, sizeof(Bytevector)+o->bytevector.size);
break;
default:
@@ -552,7 +557,7 @@ void klispC_freeall (klisp_State *K) {
K->currentwhite = WHITEBITS | bitmask(SFIXEDBIT); /* in klisp this may not be
necessary */
sweepwholelist(K, &K->rootgc);
- /* free all symbol/string lists */
+ /* free all symbol/string/bytevectors lists */
for (int32_t i = 0; i < K->strt.size; i++)
sweepwholelist(K, &K->strt.hash[i]);
}
@@ -779,7 +784,8 @@ void klispC_barrierback (klisp_State *K, Table *t) {
}
/* NOTE: kflags is added for klisp */
-/* NOTE: both symbols & strings do this "by hand", they don't call this */
+/* NOTE: symbols, immutable strings and immutable bytevectors do this
+ "by hand", they don't call this */
void klispC_link (klisp_State *K, GCObject *o, uint8_t tt, uint8_t kflags) {
o->gch.next = K->rootgc;
K->rootgc = o;
diff --git a/src/kgeqp.h b/src/kgeqp.h
@@ -20,7 +20,6 @@
#include "krational.h" /* for kbigrat_eqp */
#include "klisp.h"
#include "kghelpers.h"
-#include "kbytevector.h" /* temp until interned, for kbytevector_equalp */
/* 4.2.1 eq? */
/* 6.5.1 eq? */
@@ -29,8 +28,6 @@ void eqp(klisp_State *K, TValue *xparams, TValue ptree, TValue denv);
/* Helper (also used in equal?) */
inline bool eq2p(klisp_State *K, TValue obj1, TValue obj2)
{
- /* MAYBE: immutable bytevectors aren't interned and so we have to compare
- them everytime, maybe we should intern them */
bool res = (tv_equal(obj1, obj2));
if (!res && (ttype(obj1) == ttype(obj2))) {
switch (ttype(obj1)) {
@@ -57,13 +54,8 @@ inline bool eq2p(klisp_State *K, TValue obj1, TValue obj2)
(eq? obj1 obj2) */
res = kbigrat_eqp(K, obj1, obj2);
break;
- case K_TBYTEVECTOR:
- if (kbytevector_immutablep(obj1) && kbytevector_immutablep(obj2))
- res = kbytevector_equalp(obj1, obj2);
- else
- res = false;
- break;
- } /* immutable strings are interned so are covered already */
+ } /* immutable strings & bytevectors are interned so they are
+ covered already by tv_equalp */
}
return res;
diff --git a/src/kobject.h b/src/kobject.h
@@ -493,7 +493,7 @@ typedef struct __attribute__ ((__packed__)) {
CommonHeader;
TValue mark; /* for cycle/sharing aware algorithms */
uint32_t size;
- int32_t __dummy; /* for alignment to 64 bits */
+ uint32_t hash; /* only used for immutable strings */
uint8_t b[]; /* buffer */
} Bytevector;
diff --git a/src/kstring.c b/src/kstring.c
@@ -15,7 +15,7 @@
#include "kmem.h"
#include "kgc.h"
-/* for immutable string/symbols table */
+/* for immutable string/symbols/bytevector table */
void klispS_resize (klisp_State *K, int32_t newsize)
{
GCObject **newhash;
@@ -30,8 +30,8 @@ void klispS_resize (klisp_State *K, int32_t newsize)
for (i = 0; i < tb->size; i++) {
GCObject *p = tb->hash[i];
while (p) { /* for each node in the list */
- /* imm string & symbols aren't chained with all other
- objs, but with each other in strt */
+ /* imm string, imm bytevectors & symbols aren't chained with
+ all other objs, but with each other in strt */
GCObject *next = p->gch.next; /* save next */
uint32_t h = 0;
@@ -40,6 +40,8 @@ void klispS_resize (klisp_State *K, int32_t newsize)
h = ((Symbol *) p)->hash;
} else if (p->gch.tt == K_TSTRING) {
h = ((String *) p)->hash;
+ } else if (p->gch.tt == K_TBYTEVECTOR) {
+ h = ((Bytevector *) p)->hash;
} else {
klisp_assert(0);
}
@@ -85,10 +87,11 @@ TValue kstring_new_bs_imm(klisp_State *K, const char *buf, uint32_t size)
String *ts = NULL;
if (o->gch.tt == K_TSTRING) {
ts = (String *) o;
- } else if (o->gch.tt == K_TSYMBOL) {
+ } else if (o->gch.tt == K_TSYMBOL || o->gch.tt == K_TBYTEVECTOR) {
continue;
} else {
- klisp_assert(0); /* only symbols and immutable strings */
+ /* only symbols, immutable bytevectors and immutable strings */
+ klisp_assert(0);
}
if (ts->size == size && (memcmp(buf, ts->b, size) == 0)) {
/* string may be dead */
@@ -101,7 +104,7 @@ TValue kstring_new_bs_imm(klisp_State *K, const char *buf, uint32_t size)
/* REFACTOR: move all of these to a new function */
String *new_str;
- if (size+1 > (SIZE_MAX - sizeof(String)))
+ if (size > (SIZE_MAX - sizeof(String) - 1))
klispM_toobig(K);
new_str = (String *) klispM_malloc(K, sizeof(String) + size + 1);
diff --git a/src/ksymbol.c b/src/ksymbol.c
@@ -13,6 +13,8 @@
#include "kstate.h"
#include "kmem.h"
#include "kgc.h"
+/* for immutable table */
+#include "kstring.h"
/* NOTE: symbols can have source info, they should be compared with
tv_sym_equal, NOT tv_equal */
@@ -39,12 +41,13 @@ TValue ksymbol_new_g(klisp_State *K, const char *buf, int32_t size,
for (o = K->strt.hash[lmod(h, K->strt.size)];
o != NULL; o = o->gch.next) {
String *ts = NULL;
- if (o->gch.tt == K_TSTRING) {
+ if (o->gch.tt == K_TSTRING || o->gch.tt == K_TBYTEVECTOR) {
continue;
} else if (o->gch.tt == K_TSYMBOL) {
ts = tv2str(((Symbol *) o)->str);
} else {
- klisp_assert(0); /* only symbols and immutable strings */
+ /* only symbols, immutable bytevectors and immutable strings */
+ klisp_assert(0);
}
if (ts->size == size && (memcmp(buf, ts->b, size) == 0)) {
/* symbol may be dead */