klisp

an open source interpreter for the Kernel Programming Language.
git clone http://git.hanabi.in/repos/klisp.git
Log | Files | Refs | README

commit f086542f31e6f088370a8fb7eefe28f446e9b4cb
parent a82abb4dcb78d08961369a5360b5cedaf14bc9c7
Author: Andres Navarro <canavarro82@gmail.com>
Date:   Wed, 23 Mar 2011 16:12:56 -0300

Modified symbol internal struct to include an array.
Added non-identifier symbols ouput only representation to kwrite.

Diffstat:
Msrc/Makefile | 3++-
Msrc/kobject.h | 10++++++----
Msrc/kstate.c | 3++-
Msrc/kstring.h | 4++--
Msrc/ksymbol.c | 82++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
Msrc/ksymbol.h | 9++++++++-
Msrc/ktoken.c | 34+++++++---------------------------
Msrc/ktoken.h | 29+++++++++++++++++++++++++++++
Msrc/kwrite.c | 9++++++---
9 files changed, 131 insertions(+), 52 deletions(-)

diff --git a/src/Makefile b/src/Makefile @@ -48,7 +48,8 @@ ktoken.o: ktoken.c ktoken.h kobject.h kstate.h kpair.h kstring.h ksymbol.h \ kpair.o: kpair.c kpair.h kobject.h kstate.h kmem.h klisp.h kstring.o: kstring.c kstring.h kobject.h kstate.h kmem.h klisp.h # XXX: kpair.h because of use of list as symbol table -ksymbol.o: ksymbol.c ksymbol.h kobject.h kpair.h kstate.h kmem.h klisp.h +ksymbol.o: ksymbol.c ksymbol.h kobject.h kpair.h kstring.h kstate.h kmem.h \ + klisp.h kread.o: kread.c kread.h kobject.h ktoken.h kpair.h kstate.h kerror.h klisp.h \ kport.h kwrite.o: kwrite.c kwrite.h kobject.h kpair.h kstring.h kstate.h kerror.h \ diff --git a/src/kobject.h b/src/kobject.h @@ -249,15 +249,12 @@ typedef struct __attribute__ ((__packed__)) { TValue si; /* source code info (either () or (filename line col) */ } Pair; -/* XXX: Symbol should probably contain a String instead of a char buf */ typedef struct __attribute__ ((__packed__)) { CommonHeader; TValue mark; /* for cycle/sharing aware algorithms */ - uint32_t size; - char b[]; + TValue str; /* could use String * here, but for now... */ } Symbol; - typedef struct __attribute__ ((__packed__)) { CommonHeader; TValue mark; /* for cycle/sharing aware algorithms */ @@ -477,6 +474,11 @@ extern char *ktv_names[]; #define gch_get_flags(o_) (obj2gch(o_)->flags) #define tv_get_flags(o_) (gch_get_flags(tv2gch(o_))) +/* Flags for symbols */ +/* has external representation (identifiers) */ +#define K_FLAG_EXT_REP 0x01 +#define khas_ext_rep(s_) ((tv_get_flags(s_) & K_FLAG_EXT_REP) != 0) + /* Flags for marking continuations */ #define K_FLAG_OUTER 0x01 #define K_FLAG_INNER 0x02 diff --git a/src/kstate.c b/src/kstate.c @@ -439,7 +439,8 @@ void klisp_close (klisp_State *K) klispM_free(K, (Pair *)obj); break; case K_TSYMBOL: - klispM_freemem(K, obj, sizeof(Symbol)+obj->sym.size+1); + /* The string will be freed before/after */ + klispM_free(K, (Symbol *)obj); break; case K_TSTRING: klispM_freemem(K, obj, sizeof(String)+obj->str.size+1); diff --git a/src/kstring.h b/src/kstring.h @@ -19,8 +19,8 @@ TValue kstring_new(klisp_State *K, const char *buf, uint32_t size); TValue kstring_new_g(klisp_State *K, uint32_t size); TValue kstring_new_sc(klisp_State *K, uint32_t size, char fill); -#define kstring_buf(tv_) (((String *) ((tv_).tv.v.gc))->b) -#define kstring_size(tv_) (((String *) ((tv_).tv.v.gc))->size) +#define kstring_buf(tv_) (tv2str(tv_)->b) +#define kstring_size(tv_) (tv2str(tv_)->size) #define kstring_is_empty(tv_) (kstring_size(tv_) == 0) diff --git a/src/ksymbol.c b/src/ksymbol.c @@ -8,45 +8,101 @@ #include "ksymbol.h" #include "kobject.h" +/* for identifier checking */ +#include "ktoken.h" #include "kpair.h" #include "kstate.h" #include "kmem.h" -TValue ksymbol_new(klisp_State *K, const char *buf) +TValue ksymbol_new_g(klisp_State *K, const char *buf, int32_t size, + bool identifierp) { /* TODO: replace symbol list with hashtable */ /* First look for it in the symbol table */ TValue tbl = K->symbol_table; + while (!ttisnil(tbl)) { TValue first = kcar(tbl); - /* NOTE: there are no embedded '\0's in symbols */ - if (strcmp(buf, tv2sym(first)->b) == 0) + /* NOTE: there are no embedded '\0's in identifiers but + they could be in other symbols */ + if (size == ksymbol_size(first) && + memcmp(buf, ksymbol_buf(first), size) == 0) { return first; - else + } else tbl = kcdr(tbl); } - /* Didn't find it, alloc new and save in symbol table */ + /* Didn't find it, alloc new string and save in symbol table */ /* NOTE: there are no embedded '\0's in symbols */ - int32_t size = strlen(buf); - Symbol *new_sym = klispM_malloc(K, sizeof(Symbol) + size + 1); - + /* GC: root new_str */ + TValue new_str = kstring_new(K, buf, size); /* this copies the buf */ + Symbol *new_sym = klispM_new(K, Symbol); /* header + gc_fields */ new_sym->next = K->root_gc; K->root_gc = (GCObject *)new_sym; new_sym->gct = 0; new_sym->tt = K_TSYMBOL; - new_sym->flags = 0; + new_sym->flags = identifierp? K_FLAG_EXT_REP : 0; /* symbol specific fields */ new_sym->mark = KFALSE; - new_sym->size = size; - memcpy(new_sym->b, buf, size); - new_sym->b[size] = '\0'; + new_sym->str = new_str; TValue new_symv = gc2sym(new_sym); - /* XXX: new_symv unrooted */ + /* GC: root new_symb */ K->symbol_table = kcons(K, new_symv, K->symbol_table); return new_symv; } + +/* for indentifiers */ +TValue ksymbol_new_i(klisp_State *K, const char *buf, int32_t size) +{ + return ksymbol_new_g(K, buf, size, true); +} + +/* for indentifiers with no size */ +TValue ksymbol_new(klisp_State *K, const char *buf) +{ + int32_t size = (int32_t) strlen(buf); + return ksymbol_new_g(K, buf, size, true); +} + +/* for string->symbol */ +TValue ksymbol_new_check_i(klisp_State *K, TValue str) +{ + int32_t size = kstring_size(str); + char *buf = kstring_buf(str); + bool identifierp; + + /* this is necessary because the empty symbol isn't an identifier */ + /* MAYBE it should throw an error if the string is empty */ + /* XXX: The exact syntax for identifiers isn't there in the report + yet, here we use something like scheme, and the same as in ktoken.h + (details, leading numbers '.', '+' and '-' are a no go, but '+' and + '-' are an exception. + */ + identifierp = (size > 0); + if (identifierp) { + char first = *buf; + buf++; + size--; + if (first == '+' || first == '-') + identifierp = (size == 0); + else if (first == '.' || ktok_is_numeric(first)) + identifierp = false; + else + identifierp = ktok_is_subsequent(first); + + while(identifierp && size--) { + if (ktok_is_subsequent(*buf)) + identifierp = false; + else + buf++; + } + } + /* recover size & buf*/ + size = kstring_size(str); + buf = kstring_buf(str); + return ksymbol_new_g(K, buf, size, identifierp); +} diff --git a/src/ksymbol.h b/src/ksymbol.h @@ -9,11 +9,18 @@ #include "kobject.h" #include "kstate.h" +#include "kstring.h" #include "kmem.h" /* TEMP: for now all symbols are interned */ +/* For identifiers */ +TValue ksymbol_new_i(klisp_State *K, const char *buf, int32_t size); +/* For identifiers, simplified for unknown size */ TValue ksymbol_new(klisp_State *K, const char *buf); +/* For general strings */ +TValue ksymbol_new_check_i(klisp_State *K, TValue str); -#define ksymbol_buf(tv_) (((Symbol *) ((tv_).tv.v.gc))->b) +#define ksymbol_buf(tv_) (kstring_buf(tv2sym(tv_)->str)) +#define ksymbol_size(tv_) (kstring_size(tv2sym(tv_)->str)) #endif diff --git a/src/ktoken.c b/src/ktoken.c @@ -44,26 +44,14 @@ ** Char sets for fast ASCII char classification */ -/* Each bit correspond to a char in the 0-255 range */ -typedef uint32_t kcharset[8]; - /* ** Char set function/macro interface */ void kcharset_empty(kcharset); void kcharset_fill(kcharset, char *); void kcharset_union(kcharset, kcharset); -#define kcharset_contains(kch_, ch_) \ - ({ unsigned char ch__ = (unsigned char) (ch_); \ - kch_[KCHS_OCTANT(ch__)] & KCHS_BIT(ch__); }) - +/* contains in .h */ -/* -** Char set contains macro interface -*/ -#define KCHS_OCTANT(ch) ((ch) >> 5) -#define KCHS_BIT(ch) (1 << ((ch) & 0x1f)) - void kcharset_empty(kcharset chs) { for (int i = 0; i < 8; i++) { @@ -96,16 +84,6 @@ void kcharset_union(kcharset chs, kcharset chs2) kcharset ktok_alphabetic, ktok_numeric, ktok_whitespace; kcharset ktok_delimiter, ktok_extended, ktok_subsequent; -#define ktok_is_alphabetic(chi_) kcharset_contains(ktok_alphabetic, chi_) -/* TODO: add is_digit, that takes the base as parameter */ -#define ktok_is_numeric(chi_) kcharset_contains(ktok_numeric, chi_) -/* TODO: add hex digits */ -#define ktok_digit_value(ch_) (ch_ - '0') -#define ktok_is_whitespace(chi_) kcharset_contains(ktok_whitespace, chi_) -#define ktok_is_delimiter(chi_) ((chi_) == EOF || \ - kcharset_contains(ktok_delimiter, chi_)) -#define ktok_is_subsequent(chi_) kcharset_contains(ktok_subsequent, chi_) - /* ** Special Tokens ** @@ -408,7 +386,7 @@ TValue ktok_read_maybe_signed_numeric(klisp_State *K) if (ktok_check_delimiter(K)) { ks_tbadd(K, ch); ks_tbadd(K, '\0'); - TValue new_sym = ksymbol_new(K, ks_tbget_buffer(K)); + TValue new_sym = ksymbol_new_i(K, ks_tbget_buffer(K), 1); ks_tbclear(K); return new_sym; } else { @@ -626,18 +604,20 @@ TValue ktok_read_special(klisp_State *K) */ TValue ktok_read_identifier(klisp_State *K) { + int32_t i = 1; while (!ktok_check_delimiter(K)) { /* NOTE: can't be eof, because eof is a delimiter */ char ch = (char) ktok_getc(K); /* NOTE: is_subsequent of '\0' is false, so no embedded '\0' */ - if (ktok_is_subsequent(ch)) + if (ktok_is_subsequent(ch)) { ks_tbadd(K, ch); - else + i++; + } else ktok_error(K, "Invalid char in identifier"); } ks_tbadd(K, '\0'); - TValue new_sym = ksymbol_new(K, ks_tbget_buffer(K)); + TValue new_sym = ksymbol_new_i(K, ks_tbget_buffer(K), i-1); ks_tbclear(K); return new_sym; } diff --git a/src/ktoken.h b/src/ktoken.h @@ -23,5 +23,34 @@ TValue ktok_get_source_info(klisp_State *K); /* This is needed here to allow cleanup of shared dict from tokenizer */ void clear_shared_dict(klisp_State *K); +/* This is needed for string->symbol to check if a symbol has external + representation as an identifier */ +/* REFACTOR: think out a better interface to all this */ + +/* Each bit correspond to a char in the 0-255 range */ +typedef uint32_t kcharset[8]; + +extern kcharset ktok_alphabetic, ktok_numeric, ktok_whitespace; +extern kcharset ktok_delimiter, ktok_extended, ktok_subsequent; + +#define ktok_is_alphabetic(chi_) kcharset_contains(ktok_alphabetic, chi_) +/* TODO: add is_digit, that takes the base as parameter */ +#define ktok_is_numeric(chi_) kcharset_contains(ktok_numeric, chi_) +/* TODO: add hex digits */ +#define ktok_digit_value(ch_) (ch_ - '0') +#define ktok_is_whitespace(chi_) kcharset_contains(ktok_whitespace, chi_) +#define ktok_is_delimiter(chi_) ((chi_) == EOF || \ + kcharset_contains(ktok_delimiter, chi_)) +#define ktok_is_subsequent(chi_) kcharset_contains(ktok_subsequent, chi_) + +#define kcharset_contains(kch_, ch_) \ + ({ unsigned char ch__ = (unsigned char) (ch_); \ + kch_[KCHS_OCTANT(ch__)] & KCHS_BIT(ch__); }) + +/* +** Char set contains macro interface +*/ +#define KCHS_OCTANT(ch) ((ch) >> 5) +#define KCHS_BIT(ch) (1 << ((ch) & 0x1f)) #endif diff --git a/src/kwrite.c b/src/kwrite.c @@ -188,9 +188,12 @@ void kwrite_simple(klisp_State *K, TValue obj) kw_printf(K, "#%c", bvalue(obj)? 't' : 'f'); break; case K_TSYMBOL: - /* TEMP: access symbol structure directly */ - /* TEMP: for now assume all symbols have external representations */ - kw_printf(K, "%s", ksymbol_buf(obj)); + if (khas_ext_rep(obj)) { + /* TEMP: access symbol structure directly */ + kw_printf(K, "%s", ksymbol_buf(obj)); + } else { + kw_printf(K, "#[symbol]"); + } break; case K_TINERT: kw_printf(K, "#inert");