ktable.c (20314B)
1 /* 2 ** ktable.c 3 ** Kernel Hashtables 4 ** See Copyright Notice in klisp.h 5 */ 6 7 /* 8 ** SOURCE NOTE: This is almost textually from lua. 9 ** Parts that don't apply, or don't apply yet to klisp are in comments. 10 ** In klisp arrays are indexed from 0, (while in Lua they are indexed from 11 ** one). So watch out for off by one errors! Andres Navarro 12 ** To indicate a missing entry, klisp uses 'free' instead of 'nil'. 13 ** 'free' is a special type that is unavailable to Kernel programs. 14 */ 15 16 /* 17 ** Implementation of tables (aka arrays, objects, or hash tables). 18 ** Tables keep its elements in two parts: an array part and a hash part. 19 ** Non-negative integer keys are all candidates to be kept in the array 20 ** part. The actual size of the array is the largest `n' such that at 21 ** least half the slots between 0 and n are in use. 22 ** Hash uses a mix of chained scatter table with Brent's variation. 23 ** A main invariant of these tables is that, if an element is not 24 ** in its main position (i.e. the `original' position that its hash gives 25 ** to it), then the colliding element is in its own main position. 26 ** Hence even when the load factor reaches 100%, performance remains good. 27 */ 28 29 #include <math.h> 30 #include <string.h> 31 32 #include "klisp.h" 33 #include "kgc.h" 34 #include "kmem.h" 35 #include "kobject.h" 36 #include "kstate.h" 37 #include "ktable.h" 38 #include "kapplicative.h" 39 #include "kghelpers.h" /* for eq2p */ 40 #include "kstring.h" 41 42 /* 43 ** max size of array part is 2^MAXBITS 44 */ 45 #define MAXBITS 26 46 #define MAXASIZE (1 << MAXBITS) 47 48 49 #define hashpow2(t,n) (gnode(t, lmod((n), sizenode(t)))) 50 51 #define hashstr(t,str) hashpow2(t, (str)->hash) 52 #define hashsym(t,sym) hashpow2(t, (sym)->hash) 53 #define hashboolean(t,p) hashpow2(t, p? 1 : 0) 54 55 56 /* 57 ** for some types, it is better to avoid modulus by power of 2, as 58 ** they tend to have many 2 factors. 59 */ 60 #define hashmod(t,n) (gnode(t, ((n) % ((sizenode(t)-1)|1)))) 61 62 63 #define hashpointer(t,p) hashmod(t, IntPoint(p)) 64 65 #define dummynode (&dummynode_) 66 67 static const Node dummynode_ = { 68 .i_val = KFREE_, 69 .i_key = { .nk = { .this = KFREE_, .next = NULL}} 70 }; 71 72 73 /* 74 ** hash for klisp numbers 75 */ 76 static inline Node *hashfixint (const Table *t, int32_t n) { 77 return hashmod(t, (uint32_t) n); 78 } 79 80 /* XXX: this accesses the internal representation of bigints... 81 maybe it should be in kbigint.c. 82 This may also not be the best hashing for bigints, I just 83 made it up... 84 */ 85 static Node *hashbigint (const Table *t, Bigint *b) { 86 uint32_t n = (b->sign == 0)? 0 : 1; 87 for (uint32_t i = 0; i < b->used; i++) 88 n += b->digits[i]; 89 90 return hashmod(t, n); 91 } 92 93 /* 94 ** returns the `main' position of an element in a table (that is, the index 95 ** of its hash value) 96 */ 97 static Node *mainposition (const Table *t, TValue key) { 98 switch (ttype(key)) { 99 case K_TNIL: 100 case K_TIGNORE: 101 case K_TINERT: 102 case K_TEOF: 103 case K_TFIXINT: 104 case K_TEINF: /* infinites have -1 or 1 as ivalues */ 105 return hashfixint(t, ivalue(key)); 106 case K_TCHAR: 107 return hashfixint(t, chvalue(key)); 108 case K_TBIGINT: 109 return hashbigint(t, tv2bigint(key)); 110 case K_TBOOLEAN: 111 return hashboolean(t, bvalue(key)); 112 case K_TSTRING: 113 if (kstring_immutablep(key)) 114 return hashstr(t, tv2str(key)); 115 else /* mutable strings are eq iff they are the same object */ 116 return hashpointer(t, gcvalue(key)); 117 case K_TSYMBOL: 118 return hashsym(t, tv2sym(key)); 119 case K_TUSER: 120 return hashpointer(t, pvalue(key)); 121 case K_TAPPLICATIVE: 122 /* applicatives are eq if wrapping the same number of times the 123 same applicative, just in case make the hash of an applicative 124 the same as the hash of the operative is ultimately wraps */ 125 while(ttisapplicative(key)) { 126 key = kunwrap(key); 127 } 128 /* fall through */ 129 default: 130 return hashpointer(t, gcvalue(key)); 131 } 132 } 133 134 135 /* 136 ** returns the index for `key' if `key' is an appropriate key to live in 137 ** the array part of the table, -1 otherwise. 138 */ 139 static int32_t arrayindex (const TValue key) { 140 return (ttisfixint(key) && ivalue(key) >= 0)? ivalue(key) : -1; 141 } 142 143 144 /* 145 ** returns the index of a `key' for table traversals. First goes all 146 ** elements in the array part, then elements in the hash part. The 147 ** beginning of a traversal is signalled by -1. 148 */ 149 static int32_t findindex (klisp_State *K, Table *t, TValue key) 150 { 151 int32_t i; 152 if (ttisfree(key)) return -1; /* first iteration */ 153 i = arrayindex(key); 154 if (0 <= i && i < t->sizearray) /* is `key' inside array part? */ 155 return i; /* yes; that's the index */ 156 else { 157 Node *n = mainposition(t, key); 158 do { /* check whether `key' is somewhere in the chain */ 159 /* key may be dead already, but it is ok to use it in `next' */ 160 /* klisp: i'm not so sure about this... */ 161 if (eq2p(K, key2tval(n), key) || 162 (ttype(gkey(n)->this) == K_TDEADKEY && iscollectable(key) && 163 gcvalue(gkey(n)->this) == gcvalue(key))) { 164 i = (int32_t) (n - gnode(t, 0)); /* key index in hash table */ 165 /* hash elements are numbered after array ones */ 166 return i + t->sizearray; 167 } 168 else n = gnext(n); 169 } while (n); 170 klispE_throw_simple(K, "invalid key to next"); /* key not found */ 171 return 0; /* to avoid warnings */ 172 } 173 } 174 175 int32_t klispH_next (klisp_State *K, Table *t, TValue *key, TValue *data) 176 { 177 int32_t i = findindex(K, t, *key); /* find original element */ 178 for (i++; i < t->sizearray; i++) { /* try first array part */ 179 if (!ttisfree(t->array[i])) { /* a non-nil value? */ 180 *key = i2tv(i); 181 *data = t->array[i]; 182 return 1; 183 } 184 } 185 for (i -= t->sizearray; i < sizenode(t); i++) { /* then hash part */ 186 if (!ttisfree(gval(gnode(t, i)))) { /* a non-nil value? */ 187 *key = key2tval(gnode(t, i)); 188 *data = gval(gnode(t, i)); 189 return 1; 190 } 191 } 192 return 0; /* no more elements */ 193 } 194 195 196 /* 197 ** {============================================================= 198 ** Rehash 199 ** ============================================================== 200 */ 201 202 203 static int32_t computesizes (int32_t nums[], int32_t *narray) 204 { 205 int32_t i; 206 int32_t twotoi; /* 2^i */ 207 int32_t a = 0; /* number of elements smaller than 2^i */ 208 int32_t na = 0; /* number of elements to go to array part */ 209 int32_t n = 0; /* optimal size for array part */ 210 for (i = 0, twotoi = 1; twotoi/2 < *narray; i++, twotoi *= 2) { 211 if (nums[i] > 0) { 212 a += nums[i]; 213 if (a > twotoi/2) { /* more than half elements present? */ 214 n = twotoi; /* optimal size (till now) */ 215 na = a; /* all elements smaller than n will go to array part */ 216 } 217 } 218 if (a == *narray) break; /* all elements already counted */ 219 } 220 *narray = n; 221 klisp_assert(*narray/2 <= na && na <= *narray); 222 return na; 223 } 224 225 226 static int32_t countint (const TValue key, int32_t *nums) 227 { 228 int32_t k = arrayindex(key); 229 if (0 < k && k <= MAXASIZE) { /* is `key' an appropriate array index? */ 230 nums[ceillog2(k)]++; /* count as such */ 231 return 1; 232 } 233 else 234 return 0; 235 } 236 237 238 static int32_t numusearray (const Table *t, int32_t *nums) 239 { 240 int32_t lg; 241 int32_t ttlg; /* 2^lg */ 242 int32_t ause = 0; /* summation of `nums' */ 243 int32_t i = 1; /* count to traverse all array keys */ 244 for (lg=0, ttlg=1; lg<=MAXBITS; lg++, ttlg*=2) { /* for each slice */ 245 int32_t lc = 0; /* counter */ 246 int32_t lim = ttlg; 247 if (lim > t->sizearray) { 248 lim = t->sizearray; /* adjust upper limit */ 249 if (i > lim) 250 break; /* no more elements to count */ 251 } 252 /* count elements in range (2^(lg-1), 2^lg] */ 253 for (; i <= lim; i++) { 254 if (!ttisfree(t->array[i-1])) 255 lc++; 256 } 257 nums[lg] += lc; 258 ause += lc; 259 } 260 return ause; 261 } 262 263 264 static int32_t numusehash (const Table *t, int32_t *nums, int32_t *pnasize) 265 { 266 int32_t totaluse = 0; /* total number of elements */ 267 int32_t ause = 0; /* summation of `nums' */ 268 int32_t i = sizenode(t); 269 while (i--) { 270 Node *n = &t->node[i]; 271 if (!ttisfree(gval(n))) { 272 ause += countint(key2tval(n), nums); 273 totaluse++; 274 } 275 } 276 *pnasize += ause; 277 return totaluse; 278 } 279 280 281 static void setarrayvector (klisp_State *K, Table *t, int32_t size) 282 { 283 int32_t i; 284 klispM_reallocvector(K, t->array, t->sizearray, size, TValue); 285 for (i=t->sizearray; i<size; i++) 286 t->array[i] = KFREE; 287 t->sizearray = size; 288 } 289 290 291 static void setnodevector (klisp_State *K, Table *t, int32_t size) 292 { 293 int32_t lsize; 294 if (size == 0) { /* no elements to hash part? */ 295 t->node = cast(Node *, dummynode); /* use common `dummynode' */ 296 lsize = 0; 297 } 298 else { 299 int32_t i; 300 lsize = ceillog2(size); 301 if (lsize > MAXBITS) 302 klispE_throw_simple(K, "table overflow"); 303 size = twoto(lsize); 304 t->node = klispM_newvector(K, size, Node); 305 for (i=0; i<size; i++) { 306 Node *n = gnode(t, i); 307 gnext(n) = NULL; 308 gkey(n)->this = KFREE; 309 gval(n) = KFREE; 310 } 311 } 312 t->lsizenode = (uint8_t) (lsize); 313 t->lastfree = gnode(t, size); /* all positions are free */ 314 } 315 316 317 static void resize (klisp_State *K, Table *t, int32_t nasize, int32_t nhsize) 318 { 319 int32_t i; 320 int32_t oldasize = t->sizearray; 321 int32_t oldhsize = t->lsizenode; 322 Node *nold = t->node; /* save old hash ... */ 323 if (nasize > oldasize) /* array part must grow? */ 324 setarrayvector(K, t, nasize); 325 /* create new hash part with appropriate size */ 326 setnodevector(K, t, nhsize); 327 if (nasize < oldasize) { /* array part must shrink? */ 328 t->sizearray = nasize; 329 /* re-insert elements from vanishing slice */ 330 for (i=nasize; i<oldasize; i++) { 331 if (!ttisfree(t->array[i])) { 332 TValue v = t->array[i]; 333 *klispH_setfixint(K, t, i) = v; 334 checkliveness(G(K), v); 335 } 336 } 337 /* shrink array */ 338 klispM_reallocvector(K, t->array, oldasize, nasize, TValue); 339 } 340 /* re-insert elements from hash part */ 341 for (i = twoto(oldhsize) - 1; i >= 0; i--) { 342 Node *old = nold+i; 343 if (!ttisfree(gval(old))) { 344 TValue v = gval(old); 345 *klispH_set(K, t, key2tval(old)) = v; 346 checkliveness(G(K), v); 347 } 348 } 349 if (nold != dummynode) 350 klispM_freearray(K, nold, twoto(oldhsize), Node); /* free old array */ 351 } 352 353 354 void klispH_resizearray (klisp_State *K, Table *t, int32_t nasize) 355 { 356 int32_t nsize = (t->node == dummynode) ? 0 : sizenode(t); 357 resize(K, t, nasize, nsize); 358 } 359 360 361 static void rehash (klisp_State *K, Table *t, const TValue ek) { 362 int32_t nasize, na; 363 int32_t nums[MAXBITS+1]; /* nums[i] = number of keys between 2^(i-1) and 2^i */ 364 int32_t i; 365 int32_t totaluse; 366 for (i=0; i<=MAXBITS; i++) nums[i] = 0; /* reset counts */ 367 nasize = numusearray(t, nums); /* count keys in array part */ 368 totaluse = nasize; /* all those keys are integer keys */ 369 totaluse += numusehash(t, nums, &nasize); /* count keys in hash part */ 370 /* count extra key */ 371 nasize += countint(ek, nums); 372 totaluse++; 373 /* compute new size for array part */ 374 na = computesizes(nums, &nasize); 375 /* resize the table to new computed sizes */ 376 resize(K, t, nasize, totaluse - na); 377 } 378 379 380 381 /* 382 ** }============================================================= 383 */ 384 385 /* wflags should be either or both of K_FLAG_WEAK_KEYS or K_FLAG_WEAK VALUES */ 386 TValue klispH_new (klisp_State *K, int32_t narray, int32_t nhash, 387 int32_t wflags) 388 { 389 klisp_assert((wflags & (K_FLAG_WEAK_KEYS | K_FLAG_WEAK_VALUES)) == 390 wflags); 391 Table *t = klispM_new(K, Table); 392 klispC_link(K, (GCObject *) t, K_TTABLE, wflags); 393 /* temporary values (kept only if some malloc fails) */ 394 t->array = NULL; 395 t->sizearray = 0; 396 t->lsizenode = 0; 397 t->node = cast(Node *, dummynode); 398 /* root in case gc is run while allocating array or nodes */ 399 TValue tv_t = gc2table(t); 400 krooted_tvs_push(K, tv_t); 401 402 setarrayvector(K, t, narray); 403 setnodevector(K, t, nhash); 404 krooted_tvs_pop(K); 405 return tv_t; 406 } 407 408 409 void klispH_free (klisp_State *K, Table *t) 410 { 411 if (t->node != dummynode) 412 klispM_freearray(K, t->node, sizenode(t), Node); 413 klispM_freearray(K, t->array, t->sizearray, TValue); 414 klispM_free(K, t); 415 } 416 417 418 static Node *getfreepos (Table *t) 419 { 420 while (t->lastfree-- > t->node) { 421 if (ttisfree(gkey(t->lastfree)->this)) 422 return t->lastfree; 423 } 424 return NULL; /* could not find a free place */ 425 } 426 427 428 /* 429 ** inserts a new key into a hash table; first, check whether key's main 430 ** position is free. If not, check whether colliding node is in its main 431 ** position or not: if it is not, move colliding node to an empty place and 432 ** put new key in its main position; otherwise (colliding node is in its main 433 ** position), new key goes to an empty position. 434 */ 435 static TValue *newkey (klisp_State *K, Table *t, TValue key) 436 { 437 Node *mp = mainposition(t, key); 438 if (!ttisfree(gval(mp)) || mp == dummynode) { 439 Node *othern; 440 Node *n = getfreepos(t); /* get a free place */ 441 if (n == NULL) { /* cannot find a free place? */ 442 rehash(K, t, key); /* grow table */ 443 return klispH_set(K, t, key); /* re-insert key into grown table */ 444 } 445 klisp_assert(n != dummynode); 446 othern = mainposition(t, key2tval(mp)); 447 if (othern != mp) { /* is colliding node out of its main position? */ 448 /* yes; move colliding node into free position */ 449 while (gnext(othern) != mp) othern = gnext(othern); /* find previous */ 450 gnext(othern) = n; /* redo the chain with `n' in place of `mp' */ 451 *n = *mp; /* copy colliding node into free pos. (mp->next also goes) */ 452 gnext(mp) = NULL; /* now `mp' is free */ 453 gval(mp) = KFREE; 454 } else { /* colliding node is in its own main position */ 455 /* new node will go into free position */ 456 gnext(n) = gnext(mp); /* chain new position */ 457 gnext(mp) = n; 458 mp = n; 459 } 460 } 461 gkey(mp)->this = key; 462 klispC_barriert(K, t, key); 463 klisp_assert(ttisfree(gval(mp))); 464 return &gval(mp); 465 } 466 467 468 /* 469 ** search function for integers 470 */ 471 const TValue *klispH_getfixint (Table *t, int32_t key) 472 { 473 if (key >= 0 && key < t->sizearray) 474 return &t->array[key]; 475 else { 476 Node *n = hashfixint(t, key); 477 do { /* check whether `key' is somewhere in the chain */ 478 if (ttisfixint(gkey(n)->this) && ivalue(gkey(n)->this) == key) 479 return &gval(n); /* that's it */ 480 else n = gnext(n); 481 } while (n); 482 return &kfree; 483 } 484 } 485 486 487 /* 488 ** search function for immutable strings 489 */ 490 const TValue *klispH_getstr (Table *t, String *key) { 491 klisp_assert(kstring_immutablep(gc2str(key))); 492 Node *n = hashstr(t, key); 493 do { /* check whether `key' is somewhere in the chain */ 494 if (ttisstring(gkey(n)->this) && tv2str(gkey(n)->this) == key) 495 return &gval(n); /* that's it */ 496 else n = gnext(n); 497 } while (n); 498 return &kfree; 499 } 500 501 /* 502 ** search function for symbol 503 */ 504 const TValue *klispH_getsym (Table *t, Symbol *key) { 505 Node *n = hashsym(t, key); 506 TValue tv_key = gc2sym(key); 507 do { /* check whether `key' is somewhere in the chain */ 508 if (ttissymbol(gkey(n)->this) && 509 tv_sym_equal(gkey(n)->this, tv_key)) 510 return &gval(n); /* that's it */ 511 else n = gnext(n); 512 } while (n); 513 return &kfree; 514 } 515 516 517 /* 518 ** main search function 519 */ 520 const TValue *klispH_get (Table *t, TValue key) 521 { 522 switch (ttype(key)) { 523 case K_TFREE: return &kfree; 524 case K_TSYMBOL: return klispH_getsym(t, tv2sym(key)); 525 case K_TFIXINT: return klispH_getfixint(t, ivalue(key)); 526 case K_TSTRING: 527 if (kstring_immutablep(key)) 528 return klispH_getstr(t, tv2str(key)); 529 /* else fall through */ 530 default: { 531 Node *n = mainposition(t, key); 532 do { /* check whether `key' is somewhere in the chain */ 533 /* XXX: for some reason eq2p takes klisp_State but 534 doesn't use it */ 535 if (eq2p((klisp_State *)NULL, key2tval(n), key)) 536 return &gval(n); /* that's it */ 537 else n = gnext(n); 538 } while (n); 539 return &kfree; 540 } 541 } 542 } 543 544 545 TValue *klispH_set (klisp_State *K, Table *t, TValue key) 546 { 547 const TValue *p = klispH_get(t, key); 548 if (p != &kfree) 549 return cast(TValue *, p); 550 else { 551 if (ttisfree(key)) 552 klispE_throw_simple(K, "table index is free"); 553 /* 554 else if (ttisnumber(key) && luai_numisnan(nvalue(key))) 555 luaG_runerror(L, "table index is NaN"); 556 */ 557 return newkey(K, t, key); 558 } 559 } 560 561 562 TValue *klispH_setfixint (klisp_State *K, Table *t, int32_t key) 563 { 564 const TValue *p = klispH_getfixint(t, key); 565 if (p != &kfree) 566 return cast(TValue *, p); 567 else 568 return newkey(K, t, i2tv(key)); 569 } 570 571 572 TValue *klispH_setstr (klisp_State *K, Table *t, String *key) 573 { 574 klisp_assert(kstring_immutablep(gc2str(key))); 575 const TValue *p = klispH_getstr(t, key); 576 if (p != &kfree) 577 return cast(TValue *, p); 578 else { 579 return newkey(K, t, gc2str(key)); 580 } 581 } 582 583 584 TValue *klispH_setsym (klisp_State *K, Table *t, Symbol *key) 585 { 586 const TValue *p = klispH_getsym(t, key); 587 if (p != &kfree) 588 return cast(TValue *, p); 589 else { 590 return newkey(K, t, gc2sym(key)); 591 } 592 } 593 594 595 /* klisp: Untested, may have off by one errors, check before using */ 596 static int32_t unbound_search (Table *t, int32_t j) { 597 int32_t i = j; /* i -1 or a present index */ 598 j++; 599 /* find `i' and `j' such that i is present and j is not */ 600 while (!ttisfree(*klispH_getfixint(t, j))) { 601 i = j; 602 if (j <= (INT32_MAX - i) / 2) 603 j *= 2; 604 else { /* overflow? */ 605 /* table was built with bad purposes: resort to linear search */ 606 i = 0; 607 while (!ttisfree(*klispH_getfixint(t, i))) i++; 608 return i-1; 609 } 610 } 611 /* now do a binary search between them */ 612 while (j - i > 1) { 613 int32_t m = (i+j)/2; 614 if (ttisfree(*klispH_getfixint(t, m))) j = m; 615 else i = m; 616 } 617 return i; 618 } 619 620 621 /* 622 ** Try to find a boundary in table `t'. A `boundary' is an integer index 623 ** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil). 624 ** klisp: in klisp that indexes are from zero, this returns -1 if t[0] is nil 625 ** also klisp uses free instead of nil 626 */ 627 int32_t klispH_getn (Table *t) { 628 int32_t j = t->sizearray - 1; 629 if (j >= 0 && ttisfree(t->array[j])) { 630 /* there is a boundary in the array part: (binary) search for it */ 631 int32_t i = -1; 632 while (j - i > 1) { 633 int32_t m = (i+j)/2; 634 if (ttisfree(t->array[m])) j = m; 635 else i = m; 636 } 637 return i; 638 } 639 /* else must find a boundary in hash part */ 640 else if (t->node == dummynode) /* hash part is empty? */ 641 return j; /* that is easy... */ 642 else return unbound_search(t, j); 643 } 644 645 /* Return number of used elements in the hashtable. Code copied 646 * from rehash(). */ 647 648 int32_t klispH_numuse(Table *t) 649 { 650 int32_t nasize; 651 int32_t nums[MAXBITS+1]; /* nums[i] = number of keys between 2^(i-1) and 2^i */ 652 int32_t i; 653 int32_t totaluse; 654 for (i=0; i<=MAXBITS; i++) nums[i] = 0; /* reset counts */ 655 nasize = numusearray(t, nums); /* count keys in array part */ 656 totaluse = nasize; /* all those keys are integer keys */ 657 totaluse += numusehash(t, nums, &nasize); /* count keys in hash part */ 658 return totaluse; 659 } 660 661 bool ktablep(TValue obj) 662 { 663 return ttistable(obj); 664 }