kstring.c (7438B)
1 /* 2 ** kstring.c 3 ** Kernel Strings 4 ** See Copyright Notice in klisp.h 5 */ 6 7 /* SOURCE NOTE: the string table & hashing code is from lua */ 8 9 #include <string.h> 10 #include <stdbool.h> 11 12 #include "kstring.h" 13 #include "kobject.h" 14 #include "kstate.h" 15 #include "kmem.h" 16 #include "kgc.h" 17 18 /* for immutable string/symbols/bytevector table */ 19 void klispS_resize (klisp_State *K, int32_t newsize) 20 { 21 GCObject **newhash; 22 stringtable *tb; 23 int32_t i; 24 if (G(K)->gcstate == GCSsweepstring) 25 return; /* cannot resize during GC traverse */ 26 newhash = klispM_newvector(K, newsize, GCObject *); 27 tb = &G(K)->strt; 28 for (i = 0; i < newsize; i++) newhash[i] = NULL; 29 /* rehash */ 30 for (i = 0; i < tb->size; i++) { 31 GCObject *p = tb->hash[i]; 32 while (p) { /* for each node in the list */ 33 /* imm string, imm bytevectors & symbols aren't chained with 34 all other objs, but with each other in strt */ 35 GCObject *next = p->gch.next; /* save next */ 36 uint32_t h = 0; 37 klisp_assert(p->gch.tt == K_TKEYWORD || p->gch.tt == K_TSYMBOL || 38 p->gch.tt == K_TSTRING || p->gch.tt == K_TBYTEVECTOR); 39 40 switch(p->gch.tt) { 41 case K_TSYMBOL: 42 h = ((Symbol *) p)->hash; 43 break; 44 case K_TSTRING: 45 h = ((String *) p)->hash; 46 break; 47 case K_TBYTEVECTOR: 48 h = ((Bytevector *) p)->hash; 49 break; 50 case K_TKEYWORD: 51 h = ((Keyword *) p)->hash; 52 break; 53 } 54 55 int32_t h1 = lmod(h, newsize); /* new position */ 56 klisp_assert((int32_t) (h%newsize) == lmod(h, newsize)); 57 p->gch.next = newhash[h1]; /* chain it */ 58 newhash[h1] = p; 59 p = next; 60 } 61 } 62 klispM_freearray(K, tb->hash, tb->size, GCObject *); 63 tb->size = newsize; 64 tb->hash = newhash; 65 } 66 67 /* General constructor for strings */ 68 TValue kstring_new_bs_g(klisp_State *K, bool m, const char *buf, 69 uint32_t size) 70 { 71 return m? kstring_new_bs(K, buf, size) : 72 kstring_new_bs_imm(K, buf, size); 73 } 74 75 /* 76 ** Constructors for immutable strings 77 */ 78 79 static uint32_t get_string_hash(const char *buf, uint32_t size) 80 { 81 uint32_t h = size; /* seed */ 82 size_t step = (size>>5)+1; /* if string is too long, don't hash all 83 its chars */ 84 size_t size1; 85 for (size1 = size; size1 >= step; size1 -= step) /* compute hash */ 86 h = h ^ ((h<<5)+(h>>2)+ ((unsigned char) buf[size1-1])); 87 88 return h; 89 } 90 91 /* Looks for a string in the stringtable and returns a pointer 92 to it if found or NULL otherwise. */ 93 static String *search_in_string_table(klisp_State *K, const char *buf, 94 uint32_t size, uint32_t h) 95 { 96 for (GCObject *o = G(K)->strt.hash[lmod(h, G(K)->strt.size)]; 97 o != NULL; o = o->gch.next) { 98 klisp_assert(o->gch.tt == K_TKEYWORD || o->gch.tt == K_TSYMBOL || 99 o->gch.tt == K_TSTRING || o->gch.tt == K_TBYTEVECTOR); 100 101 if (o->gch.tt != K_TSTRING) continue; 102 103 String *ts = (String *) o; 104 if (ts->size == size && (memcmp(buf, ts->b, size) == 0)) { 105 /* string may be dead */ 106 if (isdead(G(K), o)) changewhite(o); 107 return ts; 108 } 109 } 110 111 /* If it exits the loop, it means it wasn't found */ 112 return NULL; 113 } 114 115 116 /* main constructor for immutable strings */ 117 TValue kstring_new_bs_imm(klisp_State *K, const char *buf, uint32_t size) 118 { 119 uint32_t h = get_string_hash(buf, size); 120 121 /* first check to see if it's in the stringtable */ 122 String *new_str = search_in_string_table(K, buf, size, h); 123 124 if (new_str != NULL) { /* found */ 125 return gc2str(new_str); 126 } 127 128 if (size > (SIZE_MAX - sizeof(String) - 1)) 129 klispM_toobig(K); 130 131 new_str = (String *) klispM_malloc(K, sizeof(String) + size + 1); 132 133 /* header + gc_fields */ 134 /* can't use klispC_link, because strings use the next pointer 135 differently */ 136 new_str->gct = klispC_white(G(K)); 137 new_str->tt = K_TSTRING; 138 new_str->kflags = K_FLAG_IMMUTABLE; 139 new_str->si = NULL; 140 /* string specific fields */ 141 new_str->hash = h; 142 new_str->mark = KFALSE; 143 new_str->size = size; 144 if (size != 0) { 145 memcpy(new_str->b, buf, size); 146 } 147 new_str->b[size] = '\0'; /* final 0 for printing */ 148 149 /* add to the string/symbol table (and link it) */ 150 stringtable *tb; 151 tb = &G(K)->strt; 152 h = lmod(h, tb->size); 153 new_str->next = tb->hash[h]; /* chain new entry */ 154 tb->hash[h] = (GCObject *)(new_str); 155 tb->nuse++; 156 TValue ret_tv = gc2str(new_str); 157 if (tb->nuse > ((uint32_t) tb->size) && tb->size <= INT32_MAX / 2) { 158 krooted_tvs_push(K, ret_tv); /* save in case of gc */ 159 klispS_resize(K, tb->size*2); /* too crowded */ 160 krooted_tvs_pop(K); 161 } 162 163 return ret_tv; 164 } 165 166 /* with just buffer, no embedded '\0's */ 167 TValue kstring_new_b_imm(klisp_State *K, const char *buf) 168 { 169 return (kstring_new_bs_imm(K, buf, strlen(buf))); 170 } 171 172 /* 173 ** Constructors for mutable strings 174 */ 175 176 /* main constructor for mutable strings */ 177 /* with just size */ 178 TValue kstring_new_s(klisp_State *K, uint32_t size) 179 { 180 String *new_str; 181 182 if (size == 0) { 183 klisp_assert(ttisstring(G(K)->empty_string)); 184 return G(K)->empty_string; 185 } 186 187 new_str = klispM_malloc(K, sizeof(String) + size + 1); 188 189 /* header + gc_fields */ 190 klispC_link(K, (GCObject *) new_str, K_TSTRING, 0); 191 192 /* string specific fields */ 193 new_str->hash = 0; /* unimportant for mutable strings */ 194 new_str->mark = KFALSE; 195 new_str->size = size; 196 197 /* the buffer is initialized elsewhere */ 198 199 /* NOTE: all string end with a '\0' for convenience in printing 200 even if they have embedded '\0's */ 201 new_str->b[size] = '\0'; 202 203 return gc2str(new_str); 204 } 205 206 /* with buffer & size */ 207 TValue kstring_new_bs(klisp_State *K, const char *buf, uint32_t size) 208 { 209 TValue new_str = kstring_new_s(K, size); 210 memcpy(kstring_buf(new_str), buf, size); 211 return new_str; 212 } 213 214 /* with buffer but no size, no embedded '\0's */ 215 TValue kstring_new_b(klisp_State *K, const char *buf) 216 { 217 return (kstring_new_bs(K, buf, strlen(buf))); 218 } 219 220 /* with size and fill char */ 221 TValue kstring_new_sf(klisp_State *K, uint32_t size, char fill) 222 { 223 TValue new_str = kstring_new_s(K, size); 224 memset(kstring_buf(new_str), fill, size); 225 return new_str; 226 } 227 228 /* both obj1 and obj2 should be strings */ 229 bool kstring_equalp(TValue obj1, TValue obj2) 230 { 231 klisp_assert(ttisstring(obj1) && ttisstring(obj2)); 232 233 String *str1 = tv2str(obj1); 234 String *str2 = tv2str(obj2); 235 236 if (str1->size == str2->size) { 237 return (str1->size == 0) || 238 (memcmp(str1->b, str2->b, str1->size) == 0); 239 } else { 240 return false; 241 } 242 } 243 244 bool kstringp(TValue obj) { return ttisstring(obj); } 245 bool kimmutable_stringp(TValue obj) 246 { 247 return ttisstring(obj) && kis_immutable(obj); 248 } 249 bool kmutable_stringp(TValue obj) 250 { 251 return ttisstring(obj) && kis_mutable(obj); 252 } 253 254 int32_t kstring_cstr_cmp(TValue str, const char *buf) 255 { 256 int32_t len1 = kstring_size(str); 257 int32_t len2 = strlen(buf); 258 if (len1 != len2) 259 return len1 < len2? -1 : 1; 260 else 261 return memcmp(kstring_buf(str), buf, len1); 262 }