klisp

an open source interpreter for the Kernel Programming Language.
git clone http://git.hanabi.in/repos/klisp.git
Log | Files | Refs | README

commit 38507b52be58fb458813073fd5b15d045929960e
parent 2ae03fef7b1da19bd6ba56d788c83b05058cedc0
Author: Andres Navarro <canavarro82@gmail.com>
Date:   Sat, 25 Aug 2012 15:19:56 -0300

Refactored symbol, string, and keyword immutable constructors in the bytevector fashion (separate functions for calculating the hash and looking in the stringtable)

Diffstat:
Msrc/kbytevector.c | 5++---
Msrc/kkeyword.c | 35+++++++++++++++++++++++++++--------
Msrc/kkeyword.h | 2+-
Msrc/kstring.c | 35++++++++++++++++++++++++++---------
Msrc/ksymbol.c | 61++++++++++++++++++++++++++++++++++++++++---------------------
Msrc/ksymbol.h | 2+-
6 files changed, 97 insertions(+), 43 deletions(-)

diff --git a/src/kbytevector.c b/src/kbytevector.c @@ -24,9 +24,6 @@ TValue kbytevector_new_bs_g(klisp_State *K, bool m, const uint8_t *buf, kbytevector_new_bs_imm(K, buf, size); } -/* Looks for a bytevector in the stringtable and returns a pointer - to it if found or NULL otherwise. */ - /* LOCK: GIL should be acquired */ static uint32_t get_bytevector_hash(const uint8_t *buf, uint32_t size) { @@ -40,6 +37,8 @@ static uint32_t get_bytevector_hash(const uint8_t *buf, uint32_t size) return h; } +/* Looks for a bytevector in the stringtable and returns a pointer + to it if found or NULL otherwise. */ static Bytevector *search_in_bb_table(klisp_State *K, const uint8_t *buf, uint32_t size, uint32_t h) { diff --git a/src/kkeyword.c b/src/kkeyword.c @@ -14,11 +14,8 @@ /* for immutable table */ #include "kstring.h" -/* XXX lock? */ -/* No case folding is performed by these constructors */ -TValue kkeyword_new_bs(klisp_State *K, const char *buf, int32_t size) +static uint32_t get_keyword_hash(const char *buf, uint32_t size) { - /* First calculate the hash */ uint32_t h = size; /* seed */ size_t step = (size>>5)+1; /* if string is too long, don't hash all its chars */ @@ -30,7 +27,14 @@ TValue kkeyword_new_bs(klisp_State *K, const char *buf, int32_t size) /* keyword hash should be different from string & symbol hash otherwise keywords and their respective immutable string would always fall in the same bucket */ - /* look for it in the table */ + return h; +} + +/* Looks for a keyword in the stringtable and returns a pointer + to it if found or NULL otherwise. */ +static Keyword *search_in_keyword_table(klisp_State *K, const char *buf, + uint32_t size, uint32_t h) +{ for (GCObject *o = G(K)->strt.hash[lmod(h, G(K)->strt.size)]; o != NULL; o = o->gch.next) { klisp_assert(o->gch.tt == K_TKEYWORD || o->gch.tt == K_TSYMBOL || @@ -43,15 +47,30 @@ TValue kkeyword_new_bs(klisp_State *K, const char *buf, int32_t size) /* keyword and/or string may be dead */ if (isdead(G(K), o)) changewhite(o); if (isdead(G(K), (GCObject *) ts)) changewhite((GCObject *) ts); - return gc2keyw(o); + return (Keyword *) o; } } - /* REFACTOR: move this to a new function */ + /* If it exits the loop, it means it wasn't found */ + return NULL; +} + +/* No case folding is performed by these constructors */ +TValue kkeyword_new_bs(klisp_State *K, const char *buf, uint32_t size) +{ + /* First calculate the hash */ + uint32_t h = get_keyword_hash(buf, size); + + /* look for it in the table */ + Keyword *new_keyw = search_in_keyword_table(K, buf, size, h); + + if (new_keyw != NULL) { + return gc2keyw(new_keyw); + } /* Didn't find it, alloc new immutable string and save in keyword table, note that the hash value remained in h */ TValue new_str = kstring_new_bs_imm(K, buf, size); krooted_tvs_push(K, new_str); - Keyword *new_keyw = klispM_new(K, Keyword); + new_keyw = klispM_new(K, Keyword); TValue ret_tv = gc2keyw(new_keyw); krooted_tvs_pop(K); diff --git a/src/kkeyword.h b/src/kkeyword.h @@ -16,7 +16,7 @@ /* No case folding is performed by these constructors */ /* buffer + size, may contain nulls */ -TValue kkeyword_new_bs(klisp_State *K, const char *buf, int32_t size); +TValue kkeyword_new_bs(klisp_State *K, const char *buf, uint32_t size); /* null terminated buffer */ TValue kkeyword_new_b(klisp_State *K, const char *buf); /* copies str if not immutable */ diff --git a/src/kstring.c b/src/kstring.c @@ -15,7 +15,6 @@ #include "kmem.h" #include "kgc.h" -/* XXX lock? */ /* for immutable string/symbols/bytevector table */ void klispS_resize (klisp_State *K, int32_t newsize) { @@ -77,11 +76,8 @@ TValue kstring_new_bs_g(klisp_State *K, bool m, const char *buf, ** Constructors for immutable strings */ -/* XXX lock? */ -/* main constructor for immutable strings */ -TValue kstring_new_bs_imm(klisp_State *K, const char *buf, uint32_t size) +static uint32_t get_string_hash(const char *buf, uint32_t size) { - /* first check to see if it's in the stringtable */ uint32_t h = size; /* seed */ size_t step = (size>>5)+1; /* if string is too long, don't hash all its chars */ @@ -89,6 +85,14 @@ TValue kstring_new_bs_imm(klisp_State *K, const char *buf, uint32_t size) for (size1 = size; size1 >= step; size1 -= step) /* compute hash */ h = h ^ ((h<<5)+(h>>2)+ ((unsigned char) buf[size1-1])); + return h; +} + +/* Looks for a string in the stringtable and returns a pointer + to it if found or NULL otherwise. */ +static String *search_in_string_table(klisp_State *K, const char *buf, + uint32_t size, uint32_t h) +{ for (GCObject *o = G(K)->strt.hash[lmod(h, G(K)->strt.size)]; o != NULL; o = o->gch.next) { klisp_assert(o->gch.tt == K_TKEYWORD || o->gch.tt == K_TSYMBOL || @@ -100,13 +104,26 @@ TValue kstring_new_bs_imm(klisp_State *K, const char *buf, uint32_t size) if (ts->size == size && (memcmp(buf, ts->b, size) == 0)) { /* string may be dead */ if (isdead(G(K), o)) changewhite(o); - return gc2str(o); + return ts; } } - /* If it exits the loop, it means it wasn't found, hash is still in h */ - /* REFACTOR: move all of these to a new function */ - String *new_str; + /* If it exits the loop, it means it wasn't found */ + return NULL; +} + + +/* main constructor for immutable strings */ +TValue kstring_new_bs_imm(klisp_State *K, const char *buf, uint32_t size) +{ + uint32_t h = get_string_hash(buf, size); + + /* first check to see if it's in the stringtable */ + String *new_str = search_in_string_table(K, buf, size, h); + + if (new_str != NULL) { /* found */ + return gc2str(new_str); + } if (size > (SIZE_MAX - sizeof(String) - 1)) klispM_toobig(K); diff --git a/src/ksymbol.c b/src/ksymbol.c @@ -19,15 +19,12 @@ /* No case folding is performed by these constructors */ - -/* XXX lock? */ -/* +/* ** Interned symbols are only the ones that don't have source info ** (like those created with string->symbol) */ -TValue ksymbol_new_bs(klisp_State *K, const char *buf, int32_t size, TValue si) +static uint32_t get_symbol_hash(const char *buf, uint32_t size) { - /* First calculate the hash */ uint32_t h = size; /* seed */ size_t step = (size>>5)+1; /* if string is too long, don't hash all its chars */ @@ -38,25 +35,47 @@ TValue ksymbol_new_bs(klisp_State *K, const char *buf, int32_t size, TValue si) h = ~h; /* symbol hash should be different from string hash otherwise symbols and their respective immutable string would always fall in the same bucket */ + return h; +} + +/* Looks for a symbol in the stringtable and returns a pointer + to it if found or NULL otherwise. */ +static Symbol *search_in_symbol_table(klisp_State *K, const char *buf, + uint32_t size, uint32_t h) +{ + for (GCObject *o = G(K)->strt.hash[lmod(h, G(K)->strt.size)]; + o != NULL; o = o->gch.next) { + klisp_assert(o->gch.tt == K_TKEYWORD || o->gch.tt == K_TSYMBOL || + o->gch.tt == K_TSTRING || o->gch.tt == K_TBYTEVECTOR); + + if (o->gch.tt != K_TSYMBOL) continue; + + String *ts = tv2str(((Symbol *) o)->str); + if (ts->size == size && (memcmp(buf, ts->b, size) == 0)) { + /* symbol and/or string may be dead */ + if (isdead(G(K), o)) changewhite(o); + if (isdead(G(K), (GCObject *) ts)) changewhite((GCObject *) ts); + return (Symbol *) o; + } + } + + /* If it exits the loop, it means it wasn't found */ + return NULL; +} + +TValue ksymbol_new_bs(klisp_State *K, const char *buf, uint32_t size, TValue si) +{ + /* First calculate the hash */ + uint32_t h = get_symbol_hash(buf, size); + /* look for it in the table only if it doesn't have source info */ if (ttisnil(si)) { - for (GCObject *o = G(K)->strt.hash[lmod(h, G(K)->strt.size)]; - o != NULL; o = o->gch.next) { - klisp_assert(o->gch.tt == K_TKEYWORD || o->gch.tt == K_TSYMBOL || - o->gch.tt == K_TSTRING || o->gch.tt == K_TBYTEVECTOR); - - if (o->gch.tt != K_TSYMBOL) continue; - - String *ts = tv2str(((Symbol *) o)->str); - if (ts->size == size && (memcmp(buf, ts->b, size) == 0)) { - /* symbol and/or string may be dead */ - if (isdead(G(K), o)) changewhite(o); - if (isdead(G(K), (GCObject *) ts)) changewhite((GCObject *) ts); - return gc2sym(o); - } - } + Symbol *new_sym = search_in_symbol_table(K, buf, size, h); + if (new_sym != NULL) { + return gc2sym(new_sym); + } } - /* REFACTOR: move this to a new function */ + /* Didn't find it, alloc new immutable string and save in symbol table, note that the hash value remained in h */ TValue new_str = kstring_new_bs_imm(K, buf, size); diff --git a/src/ksymbol.h b/src/ksymbol.h @@ -20,7 +20,7 @@ /* No case folding is performed by these constructors */ /* buffer + size, may contain nulls */ -TValue ksymbol_new_bs(klisp_State *K, const char *buf, int32_t size, +TValue ksymbol_new_bs(klisp_State *K, const char *buf, uint32_t size, TValue si); /* null terminated buffer */ TValue ksymbol_new_b(klisp_State *K, const char *buf, TValue si);