commit 31ec01ba055f2f33dd5b76c2520f1321432a18be
parent d59b74abdb62573d6f040fc79c5eb52baf6afab4
Author: Andres Navarro <canavarro82@gmail.com>
Date: Tue, 19 Apr 2011 20:29:49 -0300
Implemented string/symbol table for immutable strings. Symbols & GC pending.
Diffstat:
6 files changed, 138 insertions(+), 31 deletions(-)
diff --git a/src/Makefile b/src/Makefile
@@ -143,6 +143,6 @@ kgnumbers.o: kgnumbers.c kgnumbers.h kghelpers.h kstate.h klisp.h \
ksymbol.h kinteger.h
kgstrings.o: kgstrings.c kgstrings.h kghelpers.h kstate.h klisp.h \
kobject.h kerror.h kapplicative.h koperative.h kcontinuation.h \
- ksymbol.h kgnumbers.h
+ kstring.h ksymbol.h kgnumbers.h
imath.o: kobject.h kstate.h kmem.h kerror.h
kgc.o: kgc.c kgc.h kobject.h kmem.h kstate.h kport.h imath.h ktable.h
diff --git a/src/klimits.h b/src/klimits.h
@@ -49,4 +49,9 @@
*/
#define IntPoint(p) ((uint32_t)(p))
+/* minimum size for the string table (must be power of 2) */
+#ifndef MINSTRTABSIZE
+#define MINSTRTABSIZE 32
+#endif
+
#endif
diff --git a/src/kstate.c b/src/kstate.c
@@ -124,9 +124,16 @@ klisp_State *klisp_newstate (klisp_Alloc f, void *ud) {
K->dummy_pair3 = kcons(K, KINERT, KNIL);
/* initialize strings */
+
+ /* initial size of string/symbol table */
+ K->strt.size = 0;
+ K->strt.nuse = 0;
+ K->strt.hash = NULL;
+ klispS_resize(K, MINSTRTABSIZE);
+
/* Empty string */
- /* TODO: make it uncollectible */
- K->empty_string = kstring_new_empty(K);
+ /* MAYBE: fix it so we can remove empty_string from roots */
+ K->empty_string = kstring_new_b_imm(K, "");
/* initialize tokenizer */
@@ -172,6 +179,7 @@ klisp_State *klisp_newstate (klisp_Alloc f, void *ud) {
K->eval_op = kmake_operative(K, keval_ofn, 0);
K->list_app = kmake_applicative(K, list, 0);
K->ground_env = kmake_empty_environment(K);
+ /* MAYBE: fix it so we can remove module_params_sym from roots */
K->module_params_sym = ksymbol_new(K, "module-parameters");
kinit_ground_env(K);
diff --git a/src/kstring.c b/src/kstring.c
@@ -56,26 +56,101 @@ void klispS_resize (klisp_State *K, int32_t newsize)
tb->hash = newhash;
}
-/* TEMP: this is for initializing the above value */
-TValue kstring_new_empty(klisp_State *K)
+/* General constructor for strings */
+TValue kstring_new_bs_g(klisp_State *K, bool m, const char *buf,
+ uint32_t size)
{
+ return m? kstring_new_bs(K, buf, size) :
+ kstring_new_bs_imm(K, buf, size);
+}
+
+/*
+** Constructors for immutable strings
+*/
+
+/* main constructor for immutable strings */
+TValue kstring_new_bs_imm(klisp_State *K, const char *buf, uint32_t size)
+{
+ /* first check to see if it's in the stringtable */
+ GCObject *o;
+ uint32_t h = size; /* seed */
+ size_t step = (size>>5)+1; /* if string is too long, don't hash all
+ its chars */
+ size_t size1;
+ for (size1 = size; size1 >= step; size1 -= step) /* compute hash */
+ h = h ^ ((h<<5)+(h>>2)+ ((unsigned char) buf[size1-1]));
+
+ for (o = K->strt.hash[lmod(h, K->strt.size)];
+ o != NULL; o = o->gch.next) {
+ String *ts = NULL;
+ if (o->gch.tt == K_TSTRING) {
+ ts = (String *) o;
+ } else if (o->gch.tt == K_TSYMBOL) {
+ continue;
+ } else {
+ klisp_assert(0); /* only symbols and immutable strings */
+ }
+ if (ts->size == size && (memcmp(buf, ts->b, size) == 0)) {
+ /* string may be dead */
+ if (isdead(K, o)) changewhite(o);
+ return gc2str(o);
+ }
+ }
+
+ /* If it exits the loop, it means it wasn't found, hash is still in h */
+ /* REFACTOR: move all of these to a new function */
String *new_str;
- new_str = klispM_malloc(K, sizeof(String) + 1);
+ if (size+1 > (SIZE_MAX - sizeof(String)))
+ klispM_toobig(K);
- /* header + gc_fields */
- klispC_link(K, (GCObject *) new_str, K_TSTRING, K_FLAG_IMMUTABLE);
+ new_str = (String *) klispM_malloc(K, sizeof(String) + size + 1);
+ /* header + gc_fields */
+ /* can't use klispC_link, because strings use the next pointer
+ differently */
+ new_str->gct = klispC_white(K);
+ new_str->tt = K_TSTRING;
+ new_str->kflags = K_FLAG_IMMUTABLE;
/* string specific fields */
+ new_str->hash = h;
new_str->mark = KFALSE;
- new_str->size = 0;
- new_str->b[0] = '\0';
+ new_str->size = size;
+ if (size != 0) {
+ memcpy(new_str->b, buf, size);
+ }
+ new_str->b[size] = '\0'; /* final 0 for printing */
- return gc2str(new_str);
+ /* add to the string table (and link it) */
+ stringtable *tb;
+ tb = &K->strt;
+ h = lmod(h, tb->size);
+ new_str->next = tb->hash[h]; /* chain new entry */
+ tb->hash[h] = (GCObject *)(new_str);
+ tb->nuse++;
+ TValue ret_tv = gc2str(new_str);
+ if (tb->nuse > ((uint32_t) tb->size) && tb->size <= INT32_MAX / 2) {
+ krooted_tvs_push(K, ret_tv); /* save in case of gc */
+ klispS_resize(K, tb->size*2); /* too crowded */
+ krooted_tvs_pop(K);
+ }
+
+ return ret_tv;
}
+/* with just buffer, no embedded '\0's */
+TValue kstring_new_b_imm(klisp_State *K, const char *buf)
+{
+ return (kstring_new_bs_imm(K, buf, strlen(buf)));
+}
+
+/*
+** Constructors for mutable strings
+*/
+
+/* main constructor for mutable strings */
/* with just size */
-TValue kstring_new_s_g(klisp_State *K, bool m, uint32_t size)
+TValue kstring_new_s(klisp_State *K, uint32_t size)
{
String *new_str;
@@ -86,10 +161,10 @@ TValue kstring_new_s_g(klisp_State *K, bool m, uint32_t size)
new_str = klispM_malloc(K, sizeof(String) + size + 1);
/* header + gc_fields */
- klispC_link(K, (GCObject *) new_str, K_TSTRING,
- m? 0 : K_FLAG_IMMUTABLE);
+ klispC_link(K, (GCObject *) new_str, K_TSTRING, 0);
/* string specific fields */
+ new_str->hash = 0; /* unimportant for mutable strings */
new_str->mark = KFALSE;
new_str->size = size;
@@ -103,23 +178,23 @@ TValue kstring_new_s_g(klisp_State *K, bool m, uint32_t size)
}
/* with buffer & size */
-TValue kstring_new_bs_g(klisp_State *K, bool m, const char *buf, uint32_t size)
+TValue kstring_new_bs(klisp_State *K, const char *buf, uint32_t size)
{
- TValue new_str = kstring_new_s_g(K, m, size);
+ TValue new_str = kstring_new_s(K, size);
memcpy(kstring_buf(new_str), buf, size);
return new_str;
}
/* with buffer but no size, no embedded '\0's */
-TValue kstring_new_b_g(klisp_State *K, bool m, const char *buf)
+TValue kstring_new_b(klisp_State *K, const char *buf)
{
- return (kstring_new_bs_g(K, m, buf, strlen(buf)));
+ return (kstring_new_bs(K, buf, strlen(buf)));
}
/* with size and fill char */
-TValue kstring_new_sf_g(klisp_State *K, bool m, uint32_t size, char fill)
+TValue kstring_new_sf(klisp_State *K, uint32_t size, char fill)
{
- TValue new_str = kstring_new_s_g(K, m, size);
+ TValue new_str = kstring_new_s(K, size);
memset(kstring_buf(new_str), fill, size);
return new_str;
}
diff --git a/src/kstring.h b/src/kstring.h
@@ -17,19 +17,37 @@
/* for immutable string table */
void klispS_resize (klisp_State *K, int32_t newsize);
-/* used for initialization */
-TValue kstring_new_empty(klisp_State *K);
-/* general string constructor, buf remains uninit
- (except for an extra trailing zero used for printing */
-TValue kstring_new_s_g(klisp_State *K, bool m, uint32_t size);
+/*
+** Constructors for immutable strings
+*/
+
+/* General constructor for strings */
+TValue kstring_new_bs_g(klisp_State *K, bool m, const char *buf,
+ uint32_t size);
+
+/* main immutable string constructor */
+/* with buffer & size */
+TValue kstring_new_bs_imm(klisp_State *K, const char *buf, uint32_t size);
+
+/* with just buffer, no embedded '\0's */
+TValue kstring_new_b_imm(klisp_State *K, const char *buf);
+
+/*
+** Constructors for mutable strings
+*/
+
+/* main mutable string constructor */
+/* with just size */
+TValue kstring_new_s(klisp_State *K, uint32_t size);
/* with buffer & size */
-TValue kstring_new_bs_g(klisp_State *K, bool m, const char *buf, uint32_t size);
-/* with buffer but no size, no embedded '\0's */
-TValue kstring_new_b_g(klisp_State *K, bool m, const char *buf);
+TValue kstring_new_bs(klisp_State *K, const char *buf, uint32_t size);
+/* with just buffer, no embedded '\0's */
+TValue kstring_new_b(klisp_State *K, const char *buf);
/* with size & fill char */
-TValue kstring_new_sf_g(klisp_State *K, bool m, uint32_t size, char fill);
+TValue kstring_new_sf(klisp_State *K, uint32_t size, char fill);
/* macros for mutable & immutable versions of the above */
+#if 0
#define kstring_new_s(K_, size_) \
kstring_new_s_g(K_, true, size_)
#define kstring_new_bs(K_, buf_, size_) \
@@ -47,7 +65,7 @@ TValue kstring_new_sf_g(klisp_State *K, bool m, uint32_t size, char fill);
kstring_new_b_g(K_, false, buf_)
#define kstring_new_sf_imm(K_, size_, fill_) \
kstring_new_sf_g(K_, false, size_, fill_)
-
+#endif
/* some macros to access the parts of the string */
#define kstring_buf(tv_) (tv2str(tv_)->b)
#define kstring_size(tv_) (tv2str(tv_)->size)
diff --git a/src/ksymbol.c b/src/ksymbol.c
@@ -34,7 +34,8 @@ TValue ksymbol_new_g(klisp_State *K, const char *buf, int32_t size,
}
/* Didn't find it, alloc new immutable string and save in symbol table */
- TValue new_str = kstring_new_bs_imm(K, buf, size);
+ TValue new_str = kstring_new_bs_imm(K, buf, size);
+ printf("new symbol. buf: %s, str: %s\n", buf, kstring_buf(new_str));
krooted_tvs_push(K, new_str);
Symbol *new_sym = klispM_new(K, Symbol);
krooted_tvs_pop(K);