klisp

an open source interpreter for the Kernel Programming Language.
git clone http://git.hanabi.in/repos/klisp.git
Log | Files | Refs | README

commit 458b9ee284a08361b9b07c9f8dc751cac1ed95e0
parent e080606786e9e9851804c35ffdb664cf97e1204c
Author: Andres Navarro <canavarro82@gmail.com>
Date:   Wed, 16 Nov 2011 23:30:13 -0300

Added immutable bytevector interning in the string table.

Diffstat:
Msrc/Makefile | 4++--
Msrc/kbytevector.c | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Msrc/kgc.c | 14++++++++++----
Msrc/kgeqp.h | 12++----------
Msrc/kobject.h | 2+-
Msrc/kstring.c | 15+++++++++------
Msrc/ksymbol.c | 7+++++--
7 files changed, 82 insertions(+), 35 deletions(-)

diff --git a/src/Makefile b/src/Makefile @@ -117,7 +117,7 @@ kapplicative.o: kapplicative.c kobject.h klimits.h klisp.h klispconf.h \ kauxlib.o: kauxlib.c klisp.h kobject.h klimits.h klispconf.h kstate.h \ ktoken.h kmem.h kbytevector.o: kbytevector.c kbytevector.h kobject.h klimits.h klisp.h \ -klispconf.h kstate.h ktoken.h kmem.h kgc.h +klispconf.h kstate.h ktoken.h kmem.h kgc.h kstring.h kcontinuation.o: kcontinuation.c kcontinuation.h kobject.h klimits.h \ klisp.h klispconf.h kstate.h ktoken.h kmem.h kgc.h kencapsulation.o: kencapsulation.c kobject.h klimits.h klisp.h \ @@ -273,7 +273,7 @@ kstate.o: kstate.c klisp.h kobject.h klimits.h klispconf.h kstate.h \ kstring.o: kstring.c kstring.h kobject.h klimits.h klisp.h klispconf.h \ kstate.h ktoken.h kmem.h kgc.h ksymbol.o: ksymbol.c ksymbol.h kobject.h klimits.h klisp.h klispconf.h \ - kstate.h ktoken.h kmem.h kstring.h kgc.h + kstate.h ktoken.h kmem.h kstring.h kgc.h kstring.h ktable.o: ktable.c klisp.h kobject.h klimits.h klispconf.h kgc.h kstate.h \ ktoken.h kmem.h ktable.h kapplicative.h koperative.h kgeqp.h kinteger.h \ imath.h krational.h imrat.h kghelpers.h kerror.h kpair.h kcontinuation.h \ diff --git a/src/kbytevector.c b/src/kbytevector.c @@ -11,6 +11,8 @@ #include "kstate.h" #include "kmem.h" #include "kgc.h" +/* for immutable table */ +#include "kstring.h" /* Constructors */ @@ -29,14 +31,35 @@ TValue kbytevector_new_bs_g(klisp_State *K, bool m, const uint8_t *buf, /* main constructor for immutable bytevectors */ TValue kbytevector_new_bs_imm(klisp_State *K, const uint8_t *buf, uint32_t size) { - /* Does it make sense to put them in the string table - (i.e. interning them)?, we have two different constructors just in case */ - - /* XXX: find a better way to do this! */ - if (size == 0 && ttisbytevector(K->empty_bytevector)) { - return K->empty_bytevector; - } - + /* first check to see if it's in the stringtable */ + GCObject *o; + uint32_t h = size; /* seed */ + size_t step = (size>>5)+1; /* if bytevector is too long, don't hash all + its bytes */ + size_t size1; + for (size1 = size; size1 >= step; size1 -= step) /* compute hash */ + h = h ^ ((h<<5)+(h>>2)+ buf[size1-1]); + + for (o = K->strt.hash[lmod(h, K->strt.size)]; + o != NULL; o = o->gch.next) { + Bytevector *tb = NULL; + if (o->gch.tt == K_TBYTEVECTOR) { + tb = (Bytevector *) o; + } else if (o->gch.tt == K_TSYMBOL || o->gch.tt == K_TSTRING) { + continue; + } else { + /* only symbols, immutable bytevectors and immutable strings */ + klisp_assert(0); + } + if (tb->size == size && (memcmp(buf, tb->b, size) == 0)) { + /* bytevector may be dead */ + if (isdead(K, o)) changewhite(o); + return gc2bytevector(o); + } + } + + /* If it exits the loop, it means it wasn't found, hash is still in h */ + /* REFACTOR: move all of these to a new function */ Bytevector *new_bb; if (size > (SIZE_MAX - sizeof(Bytevector))) @@ -45,9 +68,15 @@ TValue kbytevector_new_bs_imm(klisp_State *K, const uint8_t *buf, uint32_t size) new_bb = (Bytevector *) klispM_malloc(K, sizeof(Bytevector) + size); /* header + gc_fields */ - klispC_link(K, (GCObject *) new_bb, K_TBYTEVECTOR, K_FLAG_IMMUTABLE); + /* can't use klispC_link, because strings use the next pointer + differently */ + new_bb->gct = klispC_white(K); + new_bb->tt = K_TBYTEVECTOR; + new_bb->kflags = K_FLAG_IMMUTABLE; + new_bb->si = NULL; /* bytevector specific fields */ + new_bb->hash = h; new_bb->mark = KFALSE; new_bb->size = size; @@ -55,7 +84,21 @@ TValue kbytevector_new_bs_imm(klisp_State *K, const uint8_t *buf, uint32_t size) memcpy(new_bb->b, buf, size); } - return gc2bytevector(new_bb); + /* add to the string/symbol table (and link it) */ + stringtable *tb; + tb = &K->strt; + h = lmod(h, tb->size); + new_bb->next = tb->hash[h]; /* chain new entry */ + tb->hash[h] = (GCObject *)(new_bb); + tb->nuse++; + TValue ret_tv = gc2bytevector(new_bb); + if (tb->nuse > ((uint32_t) tb->size) && tb->size <= INT32_MAX / 2) { + krooted_tvs_push(K, ret_tv); /* save in case of gc */ + klispS_resize(K, tb->size*2); /* too crowded */ + krooted_tvs_pop(K); + } + + return ret_tv; } /* diff --git a/src/kgc.c b/src/kgc.c @@ -261,7 +261,7 @@ static int32_t propagatemark (klisp_State *K) { case K_TSTRING: { String *s = cast(String *, o); markvalue(K, s->mark); - return sizeof(String) + s->size * sizeof(char); + return sizeof(String) + (s->size + 1 * sizeof(char)); } case K_TENVIRONMENT: { Environment *e = cast(Environment *, o); @@ -324,7 +324,7 @@ static int32_t propagatemark (klisp_State *K) { case K_TBYTEVECTOR: { Bytevector *b = cast(Bytevector *, o); markvalue(K, b->mark); - return sizeof(String) + b->size * sizeof(uint8_t); + return sizeof(Bytevector) + b->size * sizeof(uint8_t); } default: fprintf(stderr, "Unknown GCObject type (in GC propagate): %d\n", @@ -347,6 +347,8 @@ static size_t propagateall (klisp_State *K) { ** other objects: if really collected, cannot keep them; for userdata ** being finalized, keep them in keys, but not in values */ +/* XXX what the hell is this, I should reread this part of the lua + source Andres Navarro */ static int32_t iscleared (TValue o, int iskey) { if (!iscollectable(o)) return 0; #if 0 /* klisp: strings may be mutable... */ @@ -457,6 +459,9 @@ static void freeobj (klisp_State *K, GCObject *o) { klispE_free(K, (Error *)o); break; case K_TBYTEVECTOR: + /* immutable bytevectors are in the string/symbol table */ + if (kbytevector_immutablep(gc2str(o))) + K->strt.nuse--; klispM_freemem(K, o, sizeof(Bytevector)+o->bytevector.size); break; default: @@ -552,7 +557,7 @@ void klispC_freeall (klisp_State *K) { K->currentwhite = WHITEBITS | bitmask(SFIXEDBIT); /* in klisp this may not be necessary */ sweepwholelist(K, &K->rootgc); - /* free all symbol/string lists */ + /* free all symbol/string/bytevectors lists */ for (int32_t i = 0; i < K->strt.size; i++) sweepwholelist(K, &K->strt.hash[i]); } @@ -779,7 +784,8 @@ void klispC_barrierback (klisp_State *K, Table *t) { } /* NOTE: kflags is added for klisp */ -/* NOTE: both symbols & strings do this "by hand", they don't call this */ +/* NOTE: symbols, immutable strings and immutable bytevectors do this + "by hand", they don't call this */ void klispC_link (klisp_State *K, GCObject *o, uint8_t tt, uint8_t kflags) { o->gch.next = K->rootgc; K->rootgc = o; diff --git a/src/kgeqp.h b/src/kgeqp.h @@ -20,7 +20,6 @@ #include "krational.h" /* for kbigrat_eqp */ #include "klisp.h" #include "kghelpers.h" -#include "kbytevector.h" /* temp until interned, for kbytevector_equalp */ /* 4.2.1 eq? */ /* 6.5.1 eq? */ @@ -29,8 +28,6 @@ void eqp(klisp_State *K, TValue *xparams, TValue ptree, TValue denv); /* Helper (also used in equal?) */ inline bool eq2p(klisp_State *K, TValue obj1, TValue obj2) { - /* MAYBE: immutable bytevectors aren't interned and so we have to compare - them everytime, maybe we should intern them */ bool res = (tv_equal(obj1, obj2)); if (!res && (ttype(obj1) == ttype(obj2))) { switch (ttype(obj1)) { @@ -57,13 +54,8 @@ inline bool eq2p(klisp_State *K, TValue obj1, TValue obj2) (eq? obj1 obj2) */ res = kbigrat_eqp(K, obj1, obj2); break; - case K_TBYTEVECTOR: - if (kbytevector_immutablep(obj1) && kbytevector_immutablep(obj2)) - res = kbytevector_equalp(obj1, obj2); - else - res = false; - break; - } /* immutable strings are interned so are covered already */ + } /* immutable strings & bytevectors are interned so they are + covered already by tv_equalp */ } return res; diff --git a/src/kobject.h b/src/kobject.h @@ -493,7 +493,7 @@ typedef struct __attribute__ ((__packed__)) { CommonHeader; TValue mark; /* for cycle/sharing aware algorithms */ uint32_t size; - int32_t __dummy; /* for alignment to 64 bits */ + uint32_t hash; /* only used for immutable strings */ uint8_t b[]; /* buffer */ } Bytevector; diff --git a/src/kstring.c b/src/kstring.c @@ -15,7 +15,7 @@ #include "kmem.h" #include "kgc.h" -/* for immutable string/symbols table */ +/* for immutable string/symbols/bytevector table */ void klispS_resize (klisp_State *K, int32_t newsize) { GCObject **newhash; @@ -30,8 +30,8 @@ void klispS_resize (klisp_State *K, int32_t newsize) for (i = 0; i < tb->size; i++) { GCObject *p = tb->hash[i]; while (p) { /* for each node in the list */ - /* imm string & symbols aren't chained with all other - objs, but with each other in strt */ + /* imm string, imm bytevectors & symbols aren't chained with + all other objs, but with each other in strt */ GCObject *next = p->gch.next; /* save next */ uint32_t h = 0; @@ -40,6 +40,8 @@ void klispS_resize (klisp_State *K, int32_t newsize) h = ((Symbol *) p)->hash; } else if (p->gch.tt == K_TSTRING) { h = ((String *) p)->hash; + } else if (p->gch.tt == K_TBYTEVECTOR) { + h = ((Bytevector *) p)->hash; } else { klisp_assert(0); } @@ -85,10 +87,11 @@ TValue kstring_new_bs_imm(klisp_State *K, const char *buf, uint32_t size) String *ts = NULL; if (o->gch.tt == K_TSTRING) { ts = (String *) o; - } else if (o->gch.tt == K_TSYMBOL) { + } else if (o->gch.tt == K_TSYMBOL || o->gch.tt == K_TBYTEVECTOR) { continue; } else { - klisp_assert(0); /* only symbols and immutable strings */ + /* only symbols, immutable bytevectors and immutable strings */ + klisp_assert(0); } if (ts->size == size && (memcmp(buf, ts->b, size) == 0)) { /* string may be dead */ @@ -101,7 +104,7 @@ TValue kstring_new_bs_imm(klisp_State *K, const char *buf, uint32_t size) /* REFACTOR: move all of these to a new function */ String *new_str; - if (size+1 > (SIZE_MAX - sizeof(String))) + if (size > (SIZE_MAX - sizeof(String) - 1)) klispM_toobig(K); new_str = (String *) klispM_malloc(K, sizeof(String) + size + 1); diff --git a/src/ksymbol.c b/src/ksymbol.c @@ -13,6 +13,8 @@ #include "kstate.h" #include "kmem.h" #include "kgc.h" +/* for immutable table */ +#include "kstring.h" /* NOTE: symbols can have source info, they should be compared with tv_sym_equal, NOT tv_equal */ @@ -39,12 +41,13 @@ TValue ksymbol_new_g(klisp_State *K, const char *buf, int32_t size, for (o = K->strt.hash[lmod(h, K->strt.size)]; o != NULL; o = o->gch.next) { String *ts = NULL; - if (o->gch.tt == K_TSTRING) { + if (o->gch.tt == K_TSTRING || o->gch.tt == K_TBYTEVECTOR) { continue; } else if (o->gch.tt == K_TSYMBOL) { ts = tv2str(((Symbol *) o)->str); } else { - klisp_assert(0); /* only symbols and immutable strings */ + /* only symbols, immutable bytevectors and immutable strings */ + klisp_assert(0); } if (ts->size == size && (memcmp(buf, ts->b, size) == 0)) { /* symbol may be dead */