commit 321b066952b5a857461b2b68a06b179d3923b260
parent 0d4bbc12d1a40a7fd001712b81d7dd49cb84df03
Author: Andres Navarro <canavarro82@gmail.com>
Date: Sun, 20 Feb 2011 05:55:46 -0300
Added a writer featuring srfi-38 style shared defs/refs.
Diffstat:
13 files changed, 399 insertions(+), 18 deletions(-)
diff --git a/src/Makefile b/src/Makefile
@@ -7,7 +7,8 @@ MYCFLAGS=
MYLDFLAGS=
MYLIBS=
-CORE_O= kobject.o ktoken.o kpair.o kstring.o ksymbol.o kread.o
+CORE_O= kobject.o ktoken.o kpair.o kstring.o ksymbol.o kread.o \
+ kwrite.o
KRN_T= klisp
KRN_O= klisp.o
@@ -39,3 +40,4 @@ kstring.o: kstring.c kstring.h kobject.h
# XXX: kpair.h because of use of list as symbol table
ksymbol.o: ksymbol.c ksymbol.h kobject.h kpair.h
kread.o: kread.c kread.h kobject.h ktoken.h kpair.h
+kwrite.o: kwrite.c kwrite.h kobject.h kpair.h kstring.h
diff --git a/src/klisp.c b/src/klisp.c
@@ -11,23 +11,27 @@
#include "kobject.h"
#include "kread.h"
+#include "kwrite.h"
int main(int argc, char *argv[])
{
/*
- ** Simple read loop
+ ** Simple read/write loop
*/
- printf("Read Type Test\n");
+ printf("Read/Write Test\n");
kread_file = stdin;
kread_filename = "*STDIN*";
+ kwrite_file = stdout;
kread_init();
+ kwrite_init();
TValue obj = KNIL;
while(!ttiseof(obj)) {
obj = kread();
- printf("\nRead Object Type: %s\n", ttname(obj));
+ kwrite(obj);
+ knewline();
}
return 0;
diff --git a/src/kobject.h b/src/kobject.h
@@ -116,7 +116,7 @@ typedef struct __attribute__ ((__packed__)) GCheader {
#define K_TSTRING 31
#define K_TSYMBOL 32
-#define K_MAKE_VTAG(t) (K_TAG_TAGGED | t)
+#define K_MAKE_VTAG(t) (K_TAG_TAGGED | (t))
/*
** TODO:
@@ -144,7 +144,7 @@ typedef struct __attribute__ ((__packed__)) GCheader {
*/
/* NOTE: This is intended for use in switch statements */
-#define ttype(o) ({ TValue o_ = o; \
+#define ttype(o) ({ TValue o_ = (o); \
ttisdouble(o_)? K_TDOUBLE : ttype_(o_); })
/* This is intended for internal use below. DON'T USE OUTSIDE THIS FILE */
@@ -195,6 +195,7 @@ typedef struct __attribute__ ((__packed__)) InnerTV {
typedef __attribute__((aligned (8))) union {
double d;
InnerTV tv;
+ int64_t raw;
} TValue;
/*
@@ -211,6 +212,7 @@ typedef struct __attribute__ ((__packed__)) {
/* XXX: Symbol should probably contain a String instead of a char buf */
typedef struct __attribute__ ((__packed__)) {
CommonHeader;
+ TValue mark; /* for cycle/sharing aware algorithms */
uint32_t size;
char b[];
} Symbol;
@@ -225,16 +227,26 @@ typedef struct __attribute__ ((__packed__)) {
*/
typedef struct __attribute__ ((__packed__)) {
CommonHeader;
+ TValue mark; /* for cycle/sharing aware algorithms */
uint32_t size;
char b[]; // buffer
} String;
/*
+** Common header for markable objects
+*/
+typedef struct __attribute__ ((__packed__)) {
+ CommonHeader;
+ TValue mark;
+} MGCheader;
+
+/*
** Union of all Kernel heap-allocated values
*/
/* LUA NOTE: In Lua the corresponding union is in lstate.h */
union GCObject {
GCheader gch;
+ MGCheader mgch;
Pair pair;
Symbol sym;
String str;
@@ -274,9 +286,9 @@ const TValue kepinf;
const TValue keminf;
/* Macros to create TValues of non-heap allocated types (for initializers) */
-#define ch2tv_(ch_) {.tv = {.t = K_TAG_CHAR, .v = { .ch = ch_ }}}
-#define i2tv_(i_) {.tv = {.t = K_TAG_FIXINT, .v = { .i = i_ }}}
-#define b2tv_(b_) {.tv = {.t = K_TAG_BOOLEAN, .v = { .b = b_ }}}
+#define ch2tv_(ch_) {.tv = {.t = K_TAG_CHAR, .v = { .ch = (ch_) }}}
+#define i2tv_(i_) {.tv = {.t = K_TAG_FIXINT, .v = { .i = (i_) }}}
+#define b2tv_(b_) {.tv = {.t = K_TAG_BOOLEAN, .v = { .b = (b_) }}}
/* Macros to create TValues of non-heap allocated types */
#define ch2tv(ch_) ((TValue) ch2tv_(ch_))
@@ -286,7 +298,7 @@ const TValue keminf;
/* Macros to convert a GCObject * into a tagged value */
/* TODO: add assertions */
/* LUA NOTE: the corresponding defines are in lstate.h */
-#define gc2tv(t_, o_) ((TValue) {.tv = {.t = t_, \
+#define gc2tv(t_, o_) ((TValue) {.tv = {.t = (t_), \
.v = { .gc = obj2gco(o_)}}})
#define gc2pair(o_) (gc2tv(K_TAG_PAIR, o_))
#define gc2str(o_) (gc2tv(K_TAG_STRING, o_))
@@ -297,6 +309,8 @@ const TValue keminf;
#define tv2str(v_) ((String *) gcvalue(v_))
#define tv2sym(v_) ((Symbol *) gcvalue(v_))
+#define tv2mgch(v_) ((MGCheader *) gcvalue(v_))
+
/* Macro to convert any Kernel object into a GCObject */
#define obj2gco(v_) ((GCObject *) (v_))
@@ -312,4 +326,17 @@ const TValue keminf;
extern char *ktv_names[];
+/* Macros to handle marks */
+/* NOTE: this only works in markable objects */
+#define kget_mark(p_) (tv2mgch(p_)->mark)
+#define kset_mark(p_, m_) (kget_mark(p_) = (m_))
+/* simple boolean #t mark */
+#define kmark(p_) (kset_mark(p_, KTRUE))
+#define kunmark(p_) (kset_mark(p_, KFALSE))
+#define kis_marked(p_) (!kis_unmarked(p_))
+#define kis_unmarked(p_) (tv_equal(kget_mark(p_), KFALSE))
+
+/* Macro to test the most basic equality on TValues */
+#define tv_equal(tv1_, tv2_) ((tv1_).raw == (tv2_).raw)
+
#endif
diff --git a/src/kpair.c b/src/kpair.c
@@ -20,6 +20,7 @@ TValue kcons(TValue car, TValue cdr)
new_pair->next = NULL;
new_pair->gct = 0;
new_pair->tt = K_TPAIR;
+ new_pair->mark = KFALSE;
new_pair->car = car;
new_pair->cdr = cdr;
diff --git a/src/kpair.h b/src/kpair.h
@@ -14,8 +14,8 @@
#define kcar(p_) (tv2pair(p_)->car)
#define kcdr(p_) (tv2pair(p_)->cdr)
-#define kset_car(p_, v_) (kcar(p_) = v_)
-#define kset_cdr(p_, v_) (kcdr(p_) = v_)
+#define kset_car(p_, v_) (kcar(p_) = (v_))
+#define kset_cdr(p_, v_) (kcdr(p_) = (v_))
#define kdummy_cons() (kcons(KNIL, KNIL))
@@ -23,6 +23,6 @@
TValue kcons(TValue, TValue);
#define kget_source_info(p_) (tv2pair(p_)->si)
-#define kset_source_info(p_, si_) (kget_source_info(p_) = si_)
+#define kset_source_info(p_, si_) (kget_source_info(p_) = (si_))
#endif
diff --git a/src/kread.h b/src/kread.h
@@ -7,6 +7,8 @@
#ifndef kread_h
#define kread_h
+#include <stdio.h>
+
#include "kobject.h"
/*
diff --git a/src/kstring.c b/src/kstring.c
@@ -13,15 +13,25 @@
#include "kstring.h"
#include "kobject.h"
+/* TEMP: for now initialized in ktoken.c */
+TValue kempty_string = KINERT_;
+
/* TODO: Out of memory errors */
/* TEMP: for now all strings are mutable */
TValue kstring_new(const char *buf, uint32_t size)
{
- String *new_str = malloc(sizeof(String) + size + 1);
+ String *new_str;
+
+ if (size == 0 && ttisstring(kempty_string)) {
+ return kempty_string;
+ }
+
+ new_str = malloc(sizeof(String) + size + 1);
new_str->next = NULL;
new_str->gct = 0;
new_str->tt = K_TSTRING;
+ new_str->mark = KFALSE;
new_str->size = size;
/* NOTE: there can be embedded '\0's in a string */
memcpy(new_str->b, buf, size);
diff --git a/src/kstring.h b/src/kstring.h
@@ -11,5 +11,11 @@
/* TEMP: for now all strings are mutable */
TValue kstring_new(const char *, uint32_t);
+#define kstring_buf(tv_) (((Symbol *) ((tv_).tv.v.gc))->b)
+
+/* The only empty string */
+/* TEMP: for now initialized in ktoken.c */
+TValue kempty_string;
+#define kstring_is_empty(tv_) (tv_equal(tv_, kempty_string))
#endif
diff --git a/src/ksymbol.h b/src/ksymbol.h
@@ -16,4 +16,6 @@ TValue ksymbol_table;
/* TEMP: for now all symbols are interned */
TValue ksymbol_new(const char *);
+#define ksymbol_buf(tv_) (((Symbol *) ((tv_).tv.v.gc))->b)
+
#endif
diff --git a/src/ktoken.c b/src/ktoken.c
@@ -47,15 +47,15 @@ void kcharset_empty(kcharset);
void kcharset_fill(kcharset, char *);
void kcharset_union(kcharset, kcharset);
#define kcharset_contains(kch_, ch_) \
- ({ unsigned char ch__ = (unsigned char) ch_; \
+ ({ unsigned char ch__ = (unsigned char) (ch_); \
kch_[KCHS_OCTANT(ch__)] & KCHS_BIT(ch__); })
/*
** Char set contains macro interface
*/
-#define KCHS_OCTANT(ch) (ch >> 5)
-#define KCHS_BIT(ch) (1 << (ch & 0x1f))
+#define KCHS_OCTANT(ch) ((ch) >> 5)
+#define KCHS_BIT(ch) (1 << ((ch) & 0x1f))
void kcharset_empty(kcharset chs)
{
@@ -95,7 +95,7 @@ kcharset ktok_delimiter, ktok_extended, ktok_subsequent;
/* TODO: add hex digits */
#define ktok_digit_value(ch_) (ch_ - '0')
#define ktok_is_whitespace(chi_) kcharset_contains(ktok_whitespace, chi_)
-#define ktok_is_delimiter(chi_) (chi == EOF || \
+#define ktok_is_delimiter(chi_) ((chi_) == EOF || \
kcharset_contains(ktok_delimiter, chi_))
#define ktok_is_subsequent(chi_) kcharset_contains(ktok_subsequent, chi_)
@@ -127,6 +127,9 @@ bool ktok_seen_eof;
void ktok_init()
{
+ /* TEMP: for now initialize empty string here */
+ kempty_string = kstring_new("", 0);
+
assert(ktok_file != NULL);
assert(ktok_source_info.filename != NULL);
diff --git a/src/ktoken.h b/src/ktoken.h
@@ -9,6 +9,8 @@
#include "kobject.h"
+#include <stdio.h>
+
/*
** Tokenizer interface
*/
diff --git a/src/kwrite.c b/src/kwrite.c
@@ -0,0 +1,297 @@
+/*
+** kwrite.c
+** Writer for the Kernel Programming Language
+** See Copyright Notice in klisp.h
+*/
+
+#include <stdio.h>
+/* XXX for malloc */
+#include <stdlib.h>
+/* TODO: use a generalized alloc function */
+/* TEMP: for out of mem errors */
+#include <assert.h>
+#include <inttypes.h>
+
+#include "kwrite.h"
+#include "kobject.h"
+#include "kpair.h"
+#include "kstring.h"
+#include "ksymbol.h"
+
+/*
+** TODO:
+**
+** - Write a print function for strings that works on strings
+** with embedded '\0's
+**
+*/
+
+/* TODO: move to the global state */
+FILE *kwrite_file = NULL;
+/* TEMP: for now use fixints for shared refs */
+int32_t kw_shared_count;
+
+/*
+** Stack for the write FSM
+**
+*/
+
+/* TODO: move to the global state */
+TValue *kw_dstack;
+int kw_dstack_size;
+int kw_dstack_i;
+
+/* TEMP: for now stacks are fixed size, use asserts to check */
+#define STACK_INIT_SIZE 1024
+
+#define push_data(data_) ({ assert(kw_dstack_i < kw_dstack_size); \
+ kw_dstack[kw_dstack_i++] = data_; })
+#define pop_data() (--kw_dstack_i)
+#define get_data() (kw_dstack[kw_dstack_i-1])
+#define data_is_empty() (kw_dstack_i == 0)
+#define clear_data() (kw_dstack_i = 0)
+
+/* macro for printing */
+#define kw_printf(...) fprintf(kwrite_file, __VA_ARGS__)
+#define kw_flush() fflush(kwrite_file)
+
+/*
+** Writer initialization
+*/
+void kwrite_init()
+{
+ assert(kwrite_file != NULL);
+
+ /* XXX: for now use a fixed size for stack */
+ kw_dstack_size = STACK_INIT_SIZE;
+ clear_data();
+ kw_dstack = malloc(STACK_INIT_SIZE*sizeof(TValue));
+ assert(kw_dstack != NULL);
+}
+
+/*
+** Mark initialization and clearing
+*/
+void kw_clear_marks(TValue root)
+{
+ push_data(root);
+
+ while(!data_is_empty()) {
+ TValue obj = get_data();
+ pop_data();
+
+ if (ttispair(obj)) {
+ if (kis_marked(obj)) {
+ kunmark(obj);
+ push_data(kcdr(obj));
+ push_data(kcar(obj));
+ }
+ } else if (ttisstring(obj) && (kis_marked(obj))) {
+ kunmark(obj);
+ }
+ }
+}
+
+/*
+** NOTE:
+** - The objects that appear more than once are marked with a -1.
+** that way, the first time they are found in write, a shared def
+** token will be generated and the mark updated with the number;
+** from then on, the writer will generate a shared ref each time
+** it appears again.
+** - The objects that appear only once are marked with a #t to
+** find repetitions and to allow unmarking after write
+*/
+
+void kw_set_initial_marks(TValue root)
+{
+ push_data(root);
+
+ while(!data_is_empty()) {
+ TValue obj = get_data();
+ pop_data();
+
+ if (ttispair(obj)) {
+ if (kis_unmarked(obj)) {
+ kmark(obj); /* this mark just means visited */
+ push_data(kcdr(obj));
+ push_data(kcar(obj));
+ } else {
+ /* this mark means it will need a ref number */
+ kset_mark(obj, i2tv(-1));
+ }
+ } else if (ttisstring(obj)) {
+ if (kis_unmarked(obj)) {
+ kmark(obj); /* this mark just means visited */
+ } else {
+ /* this mark means it will need a ref number */
+ kset_mark(obj, i2tv(-1));
+ }
+ }
+ /* all other types of object don't matter */
+ }
+}
+
+/*
+** Writes all values except strings and pairs
+*/
+void kwrite_simple(TValue obj)
+{
+ switch(ttype(obj)) {
+ case K_TSTRING:
+ /* this shouldn't happen */
+ assert(0);
+ case K_TEINF:
+ kw_printf("#e%cinfinity", tv_equal(obj, KEPINF)? '+' : '-');
+ break;
+ case K_TFIXINT:
+ kw_printf("%" PRId32, ivalue(obj));
+ break;
+ case K_TNIL:
+ kw_printf("()");
+ break;
+ case K_TCHAR: {
+ char ch_buf[4];
+ char ch = chvalue(obj);
+ char *ch_ptr;
+
+ if (ch == '\n') {
+ ch_ptr = "newline";
+ } else if (ch == ' ') {
+ ch_ptr = "space";
+ } else {
+ ch_buf[0] = ch;
+ ch_buf[1] = '\0';
+ ch_ptr = ch_buf;
+ }
+ kw_printf("#\\%s", ch_ptr);
+ break;
+ }
+ case K_TBOOLEAN:
+ kw_printf("#%c", bvalue(obj)? 't' : 'f');
+ break;
+ case K_TSYMBOL:
+ /* TEMP: access symbol structure directly */
+ /* TEMP: for now assume all symbols have external representations */
+ kw_printf("%s", ksymbol_buf(obj));
+ break;
+ case K_TINERT:
+ kw_printf("#inert");
+ break;
+ case K_TIGNORE:
+ kw_printf("#ignore");
+ break;
+ case K_TEOF:
+ kw_printf("[eof]");
+ break;
+ default:
+ /* shouldn't happen */
+ assert(0);
+ }
+}
+
+
+void kwrite_fsm()
+{
+ bool middle_list = false;
+ while (!data_is_empty()) {
+ TValue obj = get_data();
+ pop_data();
+
+ if (middle_list) {
+ if (ttisnil(obj)) { /* end of list */
+ kw_printf(")");
+ /* middle_list = true; */
+ } else if (ttispair(obj) && ttisboolean(kget_mark(obj))) {
+ push_data(kcdr(obj));
+ push_data(kcar(obj));
+ kw_printf(" ");
+ middle_list = false;
+ } else { /* improper list is the same as shared ref */
+ kw_printf(" . ");
+ push_data(KNIL);
+ push_data(obj);
+ middle_list = false;
+ }
+ } else { /* if (middle_list) */
+ switch(ttype(obj)) {
+ case K_TPAIR: {
+ TValue mark = kget_mark(obj);
+ if (ttisboolean(mark)) { /* simple pair (only once) */
+ kw_printf("(");
+ push_data(kcdr(obj));
+ push_data(kcar(obj));
+ middle_list = false;
+ } else if (ivalue(mark) < 0) { /* pair with no assigned # */
+ /* TEMP: for now only fixints in shared refs */
+ assert(kw_shared_count >= 0);
+ kset_mark(obj, i2tv(kw_shared_count));
+ kw_printf("#%" PRId32 "=(", kw_shared_count);
+ kw_shared_count++;
+ push_data(kcdr(obj));
+ push_data(kcar(obj));
+ middle_list = false;
+ } else { /* string with an assigned number */
+ kw_printf("#%" PRId32 "#", ivalue(mark));
+ middle_list = true;
+ }
+ break;
+ }
+ case K_TSTRING: {
+ if (kstring_is_empty(obj)) {
+ kw_printf("\"\"");
+ } else {
+ TValue mark = kget_mark(obj);
+ if (ttisboolean(mark)) { /* simple string (only once) */
+ /* XXX: this doesn't correctly print strings with '\0's */
+ kw_printf("\"%s\"", kstring_buf(obj));
+ } else if (ivalue(mark) < 0) { /* string with no assigned # */
+ /* TEMP: for now only fixints in shared refs */
+ assert(kw_shared_count >= 0);
+ kset_mark(obj, i2tv(kw_shared_count));
+ /* XXX: this doesn't correctly print strings with '\0's */
+ kw_printf("#%" PRId32 "=\"%s\"", kw_shared_count,
+ kstring_buf(obj));
+ kw_shared_count++;
+ } else { /* string with an assigned number */
+ kw_printf("#%" PRId32 "#", ivalue(mark));
+ }
+ }
+ middle_list = true;
+ break;
+ }
+ default:
+ kwrite_simple(obj);
+ middle_list = true;
+ }
+ }
+ }
+ return;
+}
+
+/*
+** Writer Main function
+*/
+void kwrite(TValue obj)
+{
+ assert(data_is_empty());
+
+ kw_shared_count = 0;
+ kw_set_initial_marks(obj);
+
+ push_data(obj);
+ kwrite_fsm();
+ kw_flush();
+
+ kw_clear_marks(obj);
+
+ assert(data_is_empty());
+ return;
+}
+
+void knewline()
+{
+ kw_printf("\n");
+ kw_flush();
+ return;
+}
diff --git a/src/kwrite.h b/src/kwrite.h
@@ -0,0 +1,25 @@
+/*
+** kwrite.h
+** Writer for the Kernel Programming Language
+** See Copyright Notice in klisp.h
+*/
+
+#ifndef kwrite_h
+#define kwrite_h
+
+#include <stdio.h>
+
+#include "kobject.h"
+
+/*
+** Writer interface
+*/
+void kwrite_init();
+void kwrite(TValue);
+void knewline();
+
+/* TODO: move this to the global state */
+FILE *kwrite_file;
+
+#endif
+