commit f086542f31e6f088370a8fb7eefe28f446e9b4cb
parent a82abb4dcb78d08961369a5360b5cedaf14bc9c7
Author: Andres Navarro <canavarro82@gmail.com>
Date: Wed, 23 Mar 2011 16:12:56 -0300
Modified symbol internal struct to include an array.
Added non-identifier symbols ouput only representation to kwrite.
Diffstat:
9 files changed, 131 insertions(+), 52 deletions(-)
diff --git a/src/Makefile b/src/Makefile
@@ -48,7 +48,8 @@ ktoken.o: ktoken.c ktoken.h kobject.h kstate.h kpair.h kstring.h ksymbol.h \
kpair.o: kpair.c kpair.h kobject.h kstate.h kmem.h klisp.h
kstring.o: kstring.c kstring.h kobject.h kstate.h kmem.h klisp.h
# XXX: kpair.h because of use of list as symbol table
-ksymbol.o: ksymbol.c ksymbol.h kobject.h kpair.h kstate.h kmem.h klisp.h
+ksymbol.o: ksymbol.c ksymbol.h kobject.h kpair.h kstring.h kstate.h kmem.h \
+ klisp.h
kread.o: kread.c kread.h kobject.h ktoken.h kpair.h kstate.h kerror.h klisp.h \
kport.h
kwrite.o: kwrite.c kwrite.h kobject.h kpair.h kstring.h kstate.h kerror.h \
diff --git a/src/kobject.h b/src/kobject.h
@@ -249,15 +249,12 @@ typedef struct __attribute__ ((__packed__)) {
TValue si; /* source code info (either () or (filename line col) */
} Pair;
-/* XXX: Symbol should probably contain a String instead of a char buf */
typedef struct __attribute__ ((__packed__)) {
CommonHeader;
TValue mark; /* for cycle/sharing aware algorithms */
- uint32_t size;
- char b[];
+ TValue str; /* could use String * here, but for now... */
} Symbol;
-
typedef struct __attribute__ ((__packed__)) {
CommonHeader;
TValue mark; /* for cycle/sharing aware algorithms */
@@ -477,6 +474,11 @@ extern char *ktv_names[];
#define gch_get_flags(o_) (obj2gch(o_)->flags)
#define tv_get_flags(o_) (gch_get_flags(tv2gch(o_)))
+/* Flags for symbols */
+/* has external representation (identifiers) */
+#define K_FLAG_EXT_REP 0x01
+#define khas_ext_rep(s_) ((tv_get_flags(s_) & K_FLAG_EXT_REP) != 0)
+
/* Flags for marking continuations */
#define K_FLAG_OUTER 0x01
#define K_FLAG_INNER 0x02
diff --git a/src/kstate.c b/src/kstate.c
@@ -439,7 +439,8 @@ void klisp_close (klisp_State *K)
klispM_free(K, (Pair *)obj);
break;
case K_TSYMBOL:
- klispM_freemem(K, obj, sizeof(Symbol)+obj->sym.size+1);
+ /* The string will be freed before/after */
+ klispM_free(K, (Symbol *)obj);
break;
case K_TSTRING:
klispM_freemem(K, obj, sizeof(String)+obj->str.size+1);
diff --git a/src/kstring.h b/src/kstring.h
@@ -19,8 +19,8 @@ TValue kstring_new(klisp_State *K, const char *buf, uint32_t size);
TValue kstring_new_g(klisp_State *K, uint32_t size);
TValue kstring_new_sc(klisp_State *K, uint32_t size, char fill);
-#define kstring_buf(tv_) (((String *) ((tv_).tv.v.gc))->b)
-#define kstring_size(tv_) (((String *) ((tv_).tv.v.gc))->size)
+#define kstring_buf(tv_) (tv2str(tv_)->b)
+#define kstring_size(tv_) (tv2str(tv_)->size)
#define kstring_is_empty(tv_) (kstring_size(tv_) == 0)
diff --git a/src/ksymbol.c b/src/ksymbol.c
@@ -8,45 +8,101 @@
#include "ksymbol.h"
#include "kobject.h"
+/* for identifier checking */
+#include "ktoken.h"
#include "kpair.h"
#include "kstate.h"
#include "kmem.h"
-TValue ksymbol_new(klisp_State *K, const char *buf)
+TValue ksymbol_new_g(klisp_State *K, const char *buf, int32_t size,
+ bool identifierp)
{
/* TODO: replace symbol list with hashtable */
/* First look for it in the symbol table */
TValue tbl = K->symbol_table;
+
while (!ttisnil(tbl)) {
TValue first = kcar(tbl);
- /* NOTE: there are no embedded '\0's in symbols */
- if (strcmp(buf, tv2sym(first)->b) == 0)
+ /* NOTE: there are no embedded '\0's in identifiers but
+ they could be in other symbols */
+ if (size == ksymbol_size(first) &&
+ memcmp(buf, ksymbol_buf(first), size) == 0) {
return first;
- else
+ } else
tbl = kcdr(tbl);
}
- /* Didn't find it, alloc new and save in symbol table */
+ /* Didn't find it, alloc new string and save in symbol table */
/* NOTE: there are no embedded '\0's in symbols */
- int32_t size = strlen(buf);
- Symbol *new_sym = klispM_malloc(K, sizeof(Symbol) + size + 1);
-
+ /* GC: root new_str */
+ TValue new_str = kstring_new(K, buf, size); /* this copies the buf */
+ Symbol *new_sym = klispM_new(K, Symbol);
/* header + gc_fields */
new_sym->next = K->root_gc;
K->root_gc = (GCObject *)new_sym;
new_sym->gct = 0;
new_sym->tt = K_TSYMBOL;
- new_sym->flags = 0;
+ new_sym->flags = identifierp? K_FLAG_EXT_REP : 0;
/* symbol specific fields */
new_sym->mark = KFALSE;
- new_sym->size = size;
- memcpy(new_sym->b, buf, size);
- new_sym->b[size] = '\0';
+ new_sym->str = new_str;
TValue new_symv = gc2sym(new_sym);
- /* XXX: new_symv unrooted */
+ /* GC: root new_symb */
K->symbol_table = kcons(K, new_symv, K->symbol_table);
return new_symv;
}
+
+/* for indentifiers */
+TValue ksymbol_new_i(klisp_State *K, const char *buf, int32_t size)
+{
+ return ksymbol_new_g(K, buf, size, true);
+}
+
+/* for indentifiers with no size */
+TValue ksymbol_new(klisp_State *K, const char *buf)
+{
+ int32_t size = (int32_t) strlen(buf);
+ return ksymbol_new_g(K, buf, size, true);
+}
+
+/* for string->symbol */
+TValue ksymbol_new_check_i(klisp_State *K, TValue str)
+{
+ int32_t size = kstring_size(str);
+ char *buf = kstring_buf(str);
+ bool identifierp;
+
+ /* this is necessary because the empty symbol isn't an identifier */
+ /* MAYBE it should throw an error if the string is empty */
+ /* XXX: The exact syntax for identifiers isn't there in the report
+ yet, here we use something like scheme, and the same as in ktoken.h
+ (details, leading numbers '.', '+' and '-' are a no go, but '+' and
+ '-' are an exception.
+ */
+ identifierp = (size > 0);
+ if (identifierp) {
+ char first = *buf;
+ buf++;
+ size--;
+ if (first == '+' || first == '-')
+ identifierp = (size == 0);
+ else if (first == '.' || ktok_is_numeric(first))
+ identifierp = false;
+ else
+ identifierp = ktok_is_subsequent(first);
+
+ while(identifierp && size--) {
+ if (ktok_is_subsequent(*buf))
+ identifierp = false;
+ else
+ buf++;
+ }
+ }
+ /* recover size & buf*/
+ size = kstring_size(str);
+ buf = kstring_buf(str);
+ return ksymbol_new_g(K, buf, size, identifierp);
+}
diff --git a/src/ksymbol.h b/src/ksymbol.h
@@ -9,11 +9,18 @@
#include "kobject.h"
#include "kstate.h"
+#include "kstring.h"
#include "kmem.h"
/* TEMP: for now all symbols are interned */
+/* For identifiers */
+TValue ksymbol_new_i(klisp_State *K, const char *buf, int32_t size);
+/* For identifiers, simplified for unknown size */
TValue ksymbol_new(klisp_State *K, const char *buf);
+/* For general strings */
+TValue ksymbol_new_check_i(klisp_State *K, TValue str);
-#define ksymbol_buf(tv_) (((Symbol *) ((tv_).tv.v.gc))->b)
+#define ksymbol_buf(tv_) (kstring_buf(tv2sym(tv_)->str))
+#define ksymbol_size(tv_) (kstring_size(tv2sym(tv_)->str))
#endif
diff --git a/src/ktoken.c b/src/ktoken.c
@@ -44,26 +44,14 @@
** Char sets for fast ASCII char classification
*/
-/* Each bit correspond to a char in the 0-255 range */
-typedef uint32_t kcharset[8];
-
/*
** Char set function/macro interface
*/
void kcharset_empty(kcharset);
void kcharset_fill(kcharset, char *);
void kcharset_union(kcharset, kcharset);
-#define kcharset_contains(kch_, ch_) \
- ({ unsigned char ch__ = (unsigned char) (ch_); \
- kch_[KCHS_OCTANT(ch__)] & KCHS_BIT(ch__); })
-
+/* contains in .h */
-/*
-** Char set contains macro interface
-*/
-#define KCHS_OCTANT(ch) ((ch) >> 5)
-#define KCHS_BIT(ch) (1 << ((ch) & 0x1f))
-
void kcharset_empty(kcharset chs)
{
for (int i = 0; i < 8; i++) {
@@ -96,16 +84,6 @@ void kcharset_union(kcharset chs, kcharset chs2)
kcharset ktok_alphabetic, ktok_numeric, ktok_whitespace;
kcharset ktok_delimiter, ktok_extended, ktok_subsequent;
-#define ktok_is_alphabetic(chi_) kcharset_contains(ktok_alphabetic, chi_)
-/* TODO: add is_digit, that takes the base as parameter */
-#define ktok_is_numeric(chi_) kcharset_contains(ktok_numeric, chi_)
-/* TODO: add hex digits */
-#define ktok_digit_value(ch_) (ch_ - '0')
-#define ktok_is_whitespace(chi_) kcharset_contains(ktok_whitespace, chi_)
-#define ktok_is_delimiter(chi_) ((chi_) == EOF || \
- kcharset_contains(ktok_delimiter, chi_))
-#define ktok_is_subsequent(chi_) kcharset_contains(ktok_subsequent, chi_)
-
/*
** Special Tokens
**
@@ -408,7 +386,7 @@ TValue ktok_read_maybe_signed_numeric(klisp_State *K)
if (ktok_check_delimiter(K)) {
ks_tbadd(K, ch);
ks_tbadd(K, '\0');
- TValue new_sym = ksymbol_new(K, ks_tbget_buffer(K));
+ TValue new_sym = ksymbol_new_i(K, ks_tbget_buffer(K), 1);
ks_tbclear(K);
return new_sym;
} else {
@@ -626,18 +604,20 @@ TValue ktok_read_special(klisp_State *K)
*/
TValue ktok_read_identifier(klisp_State *K)
{
+ int32_t i = 1;
while (!ktok_check_delimiter(K)) {
/* NOTE: can't be eof, because eof is a delimiter */
char ch = (char) ktok_getc(K);
/* NOTE: is_subsequent of '\0' is false, so no embedded '\0' */
- if (ktok_is_subsequent(ch))
+ if (ktok_is_subsequent(ch)) {
ks_tbadd(K, ch);
- else
+ i++;
+ } else
ktok_error(K, "Invalid char in identifier");
}
ks_tbadd(K, '\0');
- TValue new_sym = ksymbol_new(K, ks_tbget_buffer(K));
+ TValue new_sym = ksymbol_new_i(K, ks_tbget_buffer(K), i-1);
ks_tbclear(K);
return new_sym;
}
diff --git a/src/ktoken.h b/src/ktoken.h
@@ -23,5 +23,34 @@ TValue ktok_get_source_info(klisp_State *K);
/* This is needed here to allow cleanup of shared dict from tokenizer */
void clear_shared_dict(klisp_State *K);
+/* This is needed for string->symbol to check if a symbol has external
+ representation as an identifier */
+/* REFACTOR: think out a better interface to all this */
+
+/* Each bit correspond to a char in the 0-255 range */
+typedef uint32_t kcharset[8];
+
+extern kcharset ktok_alphabetic, ktok_numeric, ktok_whitespace;
+extern kcharset ktok_delimiter, ktok_extended, ktok_subsequent;
+
+#define ktok_is_alphabetic(chi_) kcharset_contains(ktok_alphabetic, chi_)
+/* TODO: add is_digit, that takes the base as parameter */
+#define ktok_is_numeric(chi_) kcharset_contains(ktok_numeric, chi_)
+/* TODO: add hex digits */
+#define ktok_digit_value(ch_) (ch_ - '0')
+#define ktok_is_whitespace(chi_) kcharset_contains(ktok_whitespace, chi_)
+#define ktok_is_delimiter(chi_) ((chi_) == EOF || \
+ kcharset_contains(ktok_delimiter, chi_))
+#define ktok_is_subsequent(chi_) kcharset_contains(ktok_subsequent, chi_)
+
+#define kcharset_contains(kch_, ch_) \
+ ({ unsigned char ch__ = (unsigned char) (ch_); \
+ kch_[KCHS_OCTANT(ch__)] & KCHS_BIT(ch__); })
+
+/*
+** Char set contains macro interface
+*/
+#define KCHS_OCTANT(ch) ((ch) >> 5)
+#define KCHS_BIT(ch) (1 << ((ch) & 0x1f))
#endif
diff --git a/src/kwrite.c b/src/kwrite.c
@@ -188,9 +188,12 @@ void kwrite_simple(klisp_State *K, TValue obj)
kw_printf(K, "#%c", bvalue(obj)? 't' : 'f');
break;
case K_TSYMBOL:
- /* TEMP: access symbol structure directly */
- /* TEMP: for now assume all symbols have external representations */
- kw_printf(K, "%s", ksymbol_buf(obj));
+ if (khas_ext_rep(obj)) {
+ /* TEMP: access symbol structure directly */
+ kw_printf(K, "%s", ksymbol_buf(obj));
+ } else {
+ kw_printf(K, "#[symbol]");
+ }
break;
case K_TINERT:
kw_printf(K, "#inert");