klisp

an open source interpreter for the Kernel Programming Language.
git clone http://git.hanabi.in/repos/klisp.git
Log | Files | Refs | README

kstring.c (7438B)


      1 /*
      2 ** kstring.c
      3 ** Kernel Strings
      4 ** See Copyright Notice in klisp.h
      5 */
      6 
      7 /* SOURCE NOTE: the string table & hashing code is from lua */
      8 
      9 #include <string.h>
     10 #include <stdbool.h>
     11 
     12 #include "kstring.h"
     13 #include "kobject.h"
     14 #include "kstate.h"
     15 #include "kmem.h"
     16 #include "kgc.h"
     17 
     18 /* for immutable string/symbols/bytevector table */
     19 void klispS_resize (klisp_State *K, int32_t newsize)
     20 {
     21     GCObject **newhash;
     22     stringtable *tb;
     23     int32_t i;
     24     if (G(K)->gcstate == GCSsweepstring)
     25         return;  /* cannot resize during GC traverse */
     26     newhash = klispM_newvector(K, newsize, GCObject *);
     27     tb = &G(K)->strt;
     28     for (i = 0; i < newsize; i++) newhash[i] = NULL;
     29     /* rehash */
     30     for (i = 0; i < tb->size; i++) {
     31         GCObject *p = tb->hash[i];
     32         while (p) {  /* for each node in the list */
     33             /* imm string, imm bytevectors & symbols aren't chained with 
     34                all other objs, but with each other in strt */
     35             GCObject *next = p->gch.next;  /* save next */
     36             uint32_t h = 0;
     37             klisp_assert(p->gch.tt == K_TKEYWORD || p->gch.tt == K_TSYMBOL || 
     38                          p->gch.tt == K_TSTRING || p->gch.tt == K_TBYTEVECTOR);
     39 
     40             switch(p->gch.tt) {
     41             case K_TSYMBOL:
     42                 h = ((Symbol *) p)->hash;
     43                 break;
     44             case K_TSTRING:
     45                 h = ((String *) p)->hash;
     46                 break;
     47             case K_TBYTEVECTOR:
     48                 h = ((Bytevector *) p)->hash;
     49                 break;
     50             case K_TKEYWORD:
     51                 h = ((Keyword *) p)->hash;
     52                 break;
     53             }
     54 
     55             int32_t h1 = lmod(h, newsize);  /* new position */
     56             klisp_assert((int32_t) (h%newsize) == lmod(h, newsize));
     57             p->gch.next = newhash[h1];  /* chain it */
     58             newhash[h1] = p;
     59             p = next;
     60         }
     61     }
     62     klispM_freearray(K, tb->hash, tb->size, GCObject *);
     63     tb->size = newsize;
     64     tb->hash = newhash;
     65 }
     66 
     67 /* General constructor for strings */
     68 TValue kstring_new_bs_g(klisp_State *K, bool m, const char *buf, 
     69                         uint32_t size)
     70 {
     71     return m? kstring_new_bs(K, buf, size) :
     72         kstring_new_bs_imm(K, buf, size);
     73 }
     74 
     75 /* 
     76 ** Constructors for immutable strings
     77 */
     78 
     79 static uint32_t get_string_hash(const char *buf, uint32_t size)
     80 {
     81     uint32_t h = size; /* seed */
     82     size_t step = (size>>5)+1; /* if string is too long, don't hash all 
     83                                   its chars */
     84     size_t size1;
     85     for (size1 = size; size1 >= step; size1 -= step)  /* compute hash */
     86         h = h ^ ((h<<5)+(h>>2)+ ((unsigned char) buf[size1-1]));
     87 
     88     return h;
     89 }
     90 
     91 /* Looks for a string in the stringtable and returns a pointer
     92    to it if found or NULL otherwise.  */
     93 static String *search_in_string_table(klisp_State *K, const char *buf,
     94 				      uint32_t size, uint32_t h)
     95 {
     96     for (GCObject *o = G(K)->strt.hash[lmod(h, G(K)->strt.size)];
     97          o != NULL; o = o->gch.next) {
     98         klisp_assert(o->gch.tt == K_TKEYWORD || o->gch.tt == K_TSYMBOL || 
     99                      o->gch.tt == K_TSTRING || o->gch.tt == K_TBYTEVECTOR);
    100 
    101         if (o->gch.tt != K_TSTRING) continue;
    102 
    103         String *ts = (String *) o;
    104         if (ts->size == size && (memcmp(buf, ts->b, size) == 0)) {
    105             /* string may be dead */
    106             if (isdead(G(K), o)) changewhite(o);
    107             return ts;
    108         }
    109     } 
    110 
    111     /* If it exits the loop, it means it wasn't found */
    112     return NULL;
    113 }
    114 
    115 
    116 /* main constructor for immutable strings */
    117 TValue kstring_new_bs_imm(klisp_State *K, const char *buf, uint32_t size)
    118 {
    119     uint32_t h = get_string_hash(buf, size);
    120     
    121     /* first check to see if it's in the stringtable */
    122     String *new_str  = search_in_string_table(K, buf, size, h);
    123 
    124     if (new_str != NULL) { /* found */
    125       return gc2str(new_str);
    126     }
    127 
    128     if (size > (SIZE_MAX - sizeof(String) - 1))
    129         klispM_toobig(K);
    130 
    131     new_str = (String *) klispM_malloc(K, sizeof(String) + size + 1);
    132 
    133     /* header + gc_fields */
    134     /* can't use klispC_link, because strings use the next pointer
    135        differently */
    136     new_str->gct = klispC_white(G(K));
    137     new_str->tt = K_TSTRING;
    138     new_str->kflags = K_FLAG_IMMUTABLE;
    139     new_str->si = NULL;
    140     /* string specific fields */
    141     new_str->hash = h;
    142     new_str->mark = KFALSE;
    143     new_str->size = size;
    144     if (size != 0) {
    145         memcpy(new_str->b, buf, size);
    146     }
    147     new_str->b[size] = '\0'; /* final 0 for printing */
    148 
    149     /* add to the string/symbol table (and link it) */
    150     stringtable *tb;
    151     tb = &G(K)->strt;
    152     h = lmod(h, tb->size);
    153     new_str->next = tb->hash[h];  /* chain new entry */
    154     tb->hash[h] = (GCObject *)(new_str);
    155     tb->nuse++;
    156     TValue ret_tv = gc2str(new_str);
    157     if (tb->nuse > ((uint32_t) tb->size) && tb->size <= INT32_MAX / 2) {
    158         krooted_tvs_push(K, ret_tv); /* save in case of gc */
    159         klispS_resize(K, tb->size*2);  /* too crowded */
    160         krooted_tvs_pop(K);
    161     }
    162     
    163     return ret_tv;
    164 }
    165 
    166 /* with just buffer, no embedded '\0's */
    167 TValue kstring_new_b_imm(klisp_State *K, const char *buf)
    168 {
    169     return (kstring_new_bs_imm(K, buf, strlen(buf)));
    170 }
    171 
    172 /* 
    173 ** Constructors for mutable strings
    174 */
    175 
    176 /* main constructor for mutable strings */
    177 /* with just size */
    178 TValue kstring_new_s(klisp_State *K, uint32_t size)
    179 {
    180     String *new_str;
    181 
    182     if (size == 0) {
    183         klisp_assert(ttisstring(G(K)->empty_string));
    184         return G(K)->empty_string;
    185     }
    186 
    187     new_str = klispM_malloc(K, sizeof(String) + size + 1);
    188 
    189     /* header + gc_fields */
    190     klispC_link(K, (GCObject *) new_str, K_TSTRING, 0);
    191 
    192     /* string specific fields */
    193     new_str->hash = 0; /* unimportant for mutable strings */
    194     new_str->mark = KFALSE;
    195     new_str->size = size;
    196 
    197     /* the buffer is initialized elsewhere */
    198 
    199     /* NOTE: all string end with a '\0' for convenience in printing 
    200        even if they have embedded '\0's */
    201     new_str->b[size] = '\0';
    202 
    203     return gc2str(new_str);
    204 }
    205 
    206 /* with buffer & size */
    207 TValue kstring_new_bs(klisp_State *K, const char *buf, uint32_t size)
    208 {
    209     TValue new_str = kstring_new_s(K, size);
    210     memcpy(kstring_buf(new_str), buf, size);
    211     return new_str;
    212 }
    213 
    214 /* with buffer but no size, no embedded '\0's */
    215 TValue kstring_new_b(klisp_State *K, const char *buf)
    216 {
    217     return (kstring_new_bs(K, buf, strlen(buf)));
    218 }
    219 
    220 /* with size and fill char */
    221 TValue kstring_new_sf(klisp_State *K, uint32_t size, char fill)
    222 {
    223     TValue new_str = kstring_new_s(K, size);
    224     memset(kstring_buf(new_str), fill, size);
    225     return new_str;
    226 }
    227 
    228 /* both obj1 and obj2 should be strings */
    229 bool kstring_equalp(TValue obj1, TValue obj2)
    230 {
    231     klisp_assert(ttisstring(obj1) && ttisstring(obj2));
    232 
    233     String *str1 = tv2str(obj1);
    234     String *str2 = tv2str(obj2);
    235 
    236     if (str1->size == str2->size) {
    237         return (str1->size == 0) ||
    238             (memcmp(str1->b, str2->b, str1->size) == 0);
    239     } else {
    240         return false;
    241     }
    242 }
    243 
    244 bool kstringp(TValue obj) { return ttisstring(obj); }
    245 bool kimmutable_stringp(TValue obj)
    246 { 
    247     return ttisstring(obj) && kis_immutable(obj); 
    248 }
    249 bool kmutable_stringp(TValue obj)
    250 { 
    251     return ttisstring(obj) && kis_mutable(obj); 
    252 }
    253 
    254 int32_t kstring_cstr_cmp(TValue str, const char *buf)
    255 {
    256     int32_t len1 = kstring_size(str);
    257     int32_t len2 = strlen(buf);
    258     if (len1 != len2) 
    259         return len1 < len2? -1 : 1;
    260     else 
    261         return memcmp(kstring_buf(str), buf, len1);
    262 }