klisp

an open source interpreter for the Kernel Programming Language.
git clone http://git.hanabi.in/repos/klisp.git
Log | Files | Refs | README

commit 321b066952b5a857461b2b68a06b179d3923b260
parent 0d4bbc12d1a40a7fd001712b81d7dd49cb84df03
Author: Andres Navarro <canavarro82@gmail.com>
Date:   Sun, 20 Feb 2011 05:55:46 -0300

Added a writer featuring srfi-38 style shared defs/refs.

Diffstat:
Msrc/Makefile | 4+++-
Msrc/klisp.c | 10+++++++---
Msrc/kobject.h | 39+++++++++++++++++++++++++++++++++------
Msrc/kpair.c | 1+
Msrc/kpair.h | 6+++---
Msrc/kread.h | 2++
Msrc/kstring.c | 12+++++++++++-
Msrc/kstring.h | 6++++++
Msrc/ksymbol.h | 2++
Msrc/ktoken.c | 11+++++++----
Msrc/ktoken.h | 2++
Asrc/kwrite.c | 297+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/kwrite.h | 25+++++++++++++++++++++++++
13 files changed, 399 insertions(+), 18 deletions(-)

diff --git a/src/Makefile b/src/Makefile @@ -7,7 +7,8 @@ MYCFLAGS= MYLDFLAGS= MYLIBS= -CORE_O= kobject.o ktoken.o kpair.o kstring.o ksymbol.o kread.o +CORE_O= kobject.o ktoken.o kpair.o kstring.o ksymbol.o kread.o \ + kwrite.o KRN_T= klisp KRN_O= klisp.o @@ -39,3 +40,4 @@ kstring.o: kstring.c kstring.h kobject.h # XXX: kpair.h because of use of list as symbol table ksymbol.o: ksymbol.c ksymbol.h kobject.h kpair.h kread.o: kread.c kread.h kobject.h ktoken.h kpair.h +kwrite.o: kwrite.c kwrite.h kobject.h kpair.h kstring.h diff --git a/src/klisp.c b/src/klisp.c @@ -11,23 +11,27 @@ #include "kobject.h" #include "kread.h" +#include "kwrite.h" int main(int argc, char *argv[]) { /* - ** Simple read loop + ** Simple read/write loop */ - printf("Read Type Test\n"); + printf("Read/Write Test\n"); kread_file = stdin; kread_filename = "*STDIN*"; + kwrite_file = stdout; kread_init(); + kwrite_init(); TValue obj = KNIL; while(!ttiseof(obj)) { obj = kread(); - printf("\nRead Object Type: %s\n", ttname(obj)); + kwrite(obj); + knewline(); } return 0; diff --git a/src/kobject.h b/src/kobject.h @@ -116,7 +116,7 @@ typedef struct __attribute__ ((__packed__)) GCheader { #define K_TSTRING 31 #define K_TSYMBOL 32 -#define K_MAKE_VTAG(t) (K_TAG_TAGGED | t) +#define K_MAKE_VTAG(t) (K_TAG_TAGGED | (t)) /* ** TODO: @@ -144,7 +144,7 @@ typedef struct __attribute__ ((__packed__)) GCheader { */ /* NOTE: This is intended for use in switch statements */ -#define ttype(o) ({ TValue o_ = o; \ +#define ttype(o) ({ TValue o_ = (o); \ ttisdouble(o_)? K_TDOUBLE : ttype_(o_); }) /* This is intended for internal use below. DON'T USE OUTSIDE THIS FILE */ @@ -195,6 +195,7 @@ typedef struct __attribute__ ((__packed__)) InnerTV { typedef __attribute__((aligned (8))) union { double d; InnerTV tv; + int64_t raw; } TValue; /* @@ -211,6 +212,7 @@ typedef struct __attribute__ ((__packed__)) { /* XXX: Symbol should probably contain a String instead of a char buf */ typedef struct __attribute__ ((__packed__)) { CommonHeader; + TValue mark; /* for cycle/sharing aware algorithms */ uint32_t size; char b[]; } Symbol; @@ -225,16 +227,26 @@ typedef struct __attribute__ ((__packed__)) { */ typedef struct __attribute__ ((__packed__)) { CommonHeader; + TValue mark; /* for cycle/sharing aware algorithms */ uint32_t size; char b[]; // buffer } String; /* +** Common header for markable objects +*/ +typedef struct __attribute__ ((__packed__)) { + CommonHeader; + TValue mark; +} MGCheader; + +/* ** Union of all Kernel heap-allocated values */ /* LUA NOTE: In Lua the corresponding union is in lstate.h */ union GCObject { GCheader gch; + MGCheader mgch; Pair pair; Symbol sym; String str; @@ -274,9 +286,9 @@ const TValue kepinf; const TValue keminf; /* Macros to create TValues of non-heap allocated types (for initializers) */ -#define ch2tv_(ch_) {.tv = {.t = K_TAG_CHAR, .v = { .ch = ch_ }}} -#define i2tv_(i_) {.tv = {.t = K_TAG_FIXINT, .v = { .i = i_ }}} -#define b2tv_(b_) {.tv = {.t = K_TAG_BOOLEAN, .v = { .b = b_ }}} +#define ch2tv_(ch_) {.tv = {.t = K_TAG_CHAR, .v = { .ch = (ch_) }}} +#define i2tv_(i_) {.tv = {.t = K_TAG_FIXINT, .v = { .i = (i_) }}} +#define b2tv_(b_) {.tv = {.t = K_TAG_BOOLEAN, .v = { .b = (b_) }}} /* Macros to create TValues of non-heap allocated types */ #define ch2tv(ch_) ((TValue) ch2tv_(ch_)) @@ -286,7 +298,7 @@ const TValue keminf; /* Macros to convert a GCObject * into a tagged value */ /* TODO: add assertions */ /* LUA NOTE: the corresponding defines are in lstate.h */ -#define gc2tv(t_, o_) ((TValue) {.tv = {.t = t_, \ +#define gc2tv(t_, o_) ((TValue) {.tv = {.t = (t_), \ .v = { .gc = obj2gco(o_)}}}) #define gc2pair(o_) (gc2tv(K_TAG_PAIR, o_)) #define gc2str(o_) (gc2tv(K_TAG_STRING, o_)) @@ -297,6 +309,8 @@ const TValue keminf; #define tv2str(v_) ((String *) gcvalue(v_)) #define tv2sym(v_) ((Symbol *) gcvalue(v_)) +#define tv2mgch(v_) ((MGCheader *) gcvalue(v_)) + /* Macro to convert any Kernel object into a GCObject */ #define obj2gco(v_) ((GCObject *) (v_)) @@ -312,4 +326,17 @@ const TValue keminf; extern char *ktv_names[]; +/* Macros to handle marks */ +/* NOTE: this only works in markable objects */ +#define kget_mark(p_) (tv2mgch(p_)->mark) +#define kset_mark(p_, m_) (kget_mark(p_) = (m_)) +/* simple boolean #t mark */ +#define kmark(p_) (kset_mark(p_, KTRUE)) +#define kunmark(p_) (kset_mark(p_, KFALSE)) +#define kis_marked(p_) (!kis_unmarked(p_)) +#define kis_unmarked(p_) (tv_equal(kget_mark(p_), KFALSE)) + +/* Macro to test the most basic equality on TValues */ +#define tv_equal(tv1_, tv2_) ((tv1_).raw == (tv2_).raw) + #endif diff --git a/src/kpair.c b/src/kpair.c @@ -20,6 +20,7 @@ TValue kcons(TValue car, TValue cdr) new_pair->next = NULL; new_pair->gct = 0; new_pair->tt = K_TPAIR; + new_pair->mark = KFALSE; new_pair->car = car; new_pair->cdr = cdr; diff --git a/src/kpair.h b/src/kpair.h @@ -14,8 +14,8 @@ #define kcar(p_) (tv2pair(p_)->car) #define kcdr(p_) (tv2pair(p_)->cdr) -#define kset_car(p_, v_) (kcar(p_) = v_) -#define kset_cdr(p_, v_) (kcdr(p_) = v_) +#define kset_car(p_, v_) (kcar(p_) = (v_)) +#define kset_cdr(p_, v_) (kcdr(p_) = (v_)) #define kdummy_cons() (kcons(KNIL, KNIL)) @@ -23,6 +23,6 @@ TValue kcons(TValue, TValue); #define kget_source_info(p_) (tv2pair(p_)->si) -#define kset_source_info(p_, si_) (kget_source_info(p_) = si_) +#define kset_source_info(p_, si_) (kget_source_info(p_) = (si_)) #endif diff --git a/src/kread.h b/src/kread.h @@ -7,6 +7,8 @@ #ifndef kread_h #define kread_h +#include <stdio.h> + #include "kobject.h" /* diff --git a/src/kstring.c b/src/kstring.c @@ -13,15 +13,25 @@ #include "kstring.h" #include "kobject.h" +/* TEMP: for now initialized in ktoken.c */ +TValue kempty_string = KINERT_; + /* TODO: Out of memory errors */ /* TEMP: for now all strings are mutable */ TValue kstring_new(const char *buf, uint32_t size) { - String *new_str = malloc(sizeof(String) + size + 1); + String *new_str; + + if (size == 0 && ttisstring(kempty_string)) { + return kempty_string; + } + + new_str = malloc(sizeof(String) + size + 1); new_str->next = NULL; new_str->gct = 0; new_str->tt = K_TSTRING; + new_str->mark = KFALSE; new_str->size = size; /* NOTE: there can be embedded '\0's in a string */ memcpy(new_str->b, buf, size); diff --git a/src/kstring.h b/src/kstring.h @@ -11,5 +11,11 @@ /* TEMP: for now all strings are mutable */ TValue kstring_new(const char *, uint32_t); +#define kstring_buf(tv_) (((Symbol *) ((tv_).tv.v.gc))->b) + +/* The only empty string */ +/* TEMP: for now initialized in ktoken.c */ +TValue kempty_string; +#define kstring_is_empty(tv_) (tv_equal(tv_, kempty_string)) #endif diff --git a/src/ksymbol.h b/src/ksymbol.h @@ -16,4 +16,6 @@ TValue ksymbol_table; /* TEMP: for now all symbols are interned */ TValue ksymbol_new(const char *); +#define ksymbol_buf(tv_) (((Symbol *) ((tv_).tv.v.gc))->b) + #endif diff --git a/src/ktoken.c b/src/ktoken.c @@ -47,15 +47,15 @@ void kcharset_empty(kcharset); void kcharset_fill(kcharset, char *); void kcharset_union(kcharset, kcharset); #define kcharset_contains(kch_, ch_) \ - ({ unsigned char ch__ = (unsigned char) ch_; \ + ({ unsigned char ch__ = (unsigned char) (ch_); \ kch_[KCHS_OCTANT(ch__)] & KCHS_BIT(ch__); }) /* ** Char set contains macro interface */ -#define KCHS_OCTANT(ch) (ch >> 5) -#define KCHS_BIT(ch) (1 << (ch & 0x1f)) +#define KCHS_OCTANT(ch) ((ch) >> 5) +#define KCHS_BIT(ch) (1 << ((ch) & 0x1f)) void kcharset_empty(kcharset chs) { @@ -95,7 +95,7 @@ kcharset ktok_delimiter, ktok_extended, ktok_subsequent; /* TODO: add hex digits */ #define ktok_digit_value(ch_) (ch_ - '0') #define ktok_is_whitespace(chi_) kcharset_contains(ktok_whitespace, chi_) -#define ktok_is_delimiter(chi_) (chi == EOF || \ +#define ktok_is_delimiter(chi_) ((chi_) == EOF || \ kcharset_contains(ktok_delimiter, chi_)) #define ktok_is_subsequent(chi_) kcharset_contains(ktok_subsequent, chi_) @@ -127,6 +127,9 @@ bool ktok_seen_eof; void ktok_init() { + /* TEMP: for now initialize empty string here */ + kempty_string = kstring_new("", 0); + assert(ktok_file != NULL); assert(ktok_source_info.filename != NULL); diff --git a/src/ktoken.h b/src/ktoken.h @@ -9,6 +9,8 @@ #include "kobject.h" +#include <stdio.h> + /* ** Tokenizer interface */ diff --git a/src/kwrite.c b/src/kwrite.c @@ -0,0 +1,297 @@ +/* +** kwrite.c +** Writer for the Kernel Programming Language +** See Copyright Notice in klisp.h +*/ + +#include <stdio.h> +/* XXX for malloc */ +#include <stdlib.h> +/* TODO: use a generalized alloc function */ +/* TEMP: for out of mem errors */ +#include <assert.h> +#include <inttypes.h> + +#include "kwrite.h" +#include "kobject.h" +#include "kpair.h" +#include "kstring.h" +#include "ksymbol.h" + +/* +** TODO: +** +** - Write a print function for strings that works on strings +** with embedded '\0's +** +*/ + +/* TODO: move to the global state */ +FILE *kwrite_file = NULL; +/* TEMP: for now use fixints for shared refs */ +int32_t kw_shared_count; + +/* +** Stack for the write FSM +** +*/ + +/* TODO: move to the global state */ +TValue *kw_dstack; +int kw_dstack_size; +int kw_dstack_i; + +/* TEMP: for now stacks are fixed size, use asserts to check */ +#define STACK_INIT_SIZE 1024 + +#define push_data(data_) ({ assert(kw_dstack_i < kw_dstack_size); \ + kw_dstack[kw_dstack_i++] = data_; }) +#define pop_data() (--kw_dstack_i) +#define get_data() (kw_dstack[kw_dstack_i-1]) +#define data_is_empty() (kw_dstack_i == 0) +#define clear_data() (kw_dstack_i = 0) + +/* macro for printing */ +#define kw_printf(...) fprintf(kwrite_file, __VA_ARGS__) +#define kw_flush() fflush(kwrite_file) + +/* +** Writer initialization +*/ +void kwrite_init() +{ + assert(kwrite_file != NULL); + + /* XXX: for now use a fixed size for stack */ + kw_dstack_size = STACK_INIT_SIZE; + clear_data(); + kw_dstack = malloc(STACK_INIT_SIZE*sizeof(TValue)); + assert(kw_dstack != NULL); +} + +/* +** Mark initialization and clearing +*/ +void kw_clear_marks(TValue root) +{ + push_data(root); + + while(!data_is_empty()) { + TValue obj = get_data(); + pop_data(); + + if (ttispair(obj)) { + if (kis_marked(obj)) { + kunmark(obj); + push_data(kcdr(obj)); + push_data(kcar(obj)); + } + } else if (ttisstring(obj) && (kis_marked(obj))) { + kunmark(obj); + } + } +} + +/* +** NOTE: +** - The objects that appear more than once are marked with a -1. +** that way, the first time they are found in write, a shared def +** token will be generated and the mark updated with the number; +** from then on, the writer will generate a shared ref each time +** it appears again. +** - The objects that appear only once are marked with a #t to +** find repetitions and to allow unmarking after write +*/ + +void kw_set_initial_marks(TValue root) +{ + push_data(root); + + while(!data_is_empty()) { + TValue obj = get_data(); + pop_data(); + + if (ttispair(obj)) { + if (kis_unmarked(obj)) { + kmark(obj); /* this mark just means visited */ + push_data(kcdr(obj)); + push_data(kcar(obj)); + } else { + /* this mark means it will need a ref number */ + kset_mark(obj, i2tv(-1)); + } + } else if (ttisstring(obj)) { + if (kis_unmarked(obj)) { + kmark(obj); /* this mark just means visited */ + } else { + /* this mark means it will need a ref number */ + kset_mark(obj, i2tv(-1)); + } + } + /* all other types of object don't matter */ + } +} + +/* +** Writes all values except strings and pairs +*/ +void kwrite_simple(TValue obj) +{ + switch(ttype(obj)) { + case K_TSTRING: + /* this shouldn't happen */ + assert(0); + case K_TEINF: + kw_printf("#e%cinfinity", tv_equal(obj, KEPINF)? '+' : '-'); + break; + case K_TFIXINT: + kw_printf("%" PRId32, ivalue(obj)); + break; + case K_TNIL: + kw_printf("()"); + break; + case K_TCHAR: { + char ch_buf[4]; + char ch = chvalue(obj); + char *ch_ptr; + + if (ch == '\n') { + ch_ptr = "newline"; + } else if (ch == ' ') { + ch_ptr = "space"; + } else { + ch_buf[0] = ch; + ch_buf[1] = '\0'; + ch_ptr = ch_buf; + } + kw_printf("#\\%s", ch_ptr); + break; + } + case K_TBOOLEAN: + kw_printf("#%c", bvalue(obj)? 't' : 'f'); + break; + case K_TSYMBOL: + /* TEMP: access symbol structure directly */ + /* TEMP: for now assume all symbols have external representations */ + kw_printf("%s", ksymbol_buf(obj)); + break; + case K_TINERT: + kw_printf("#inert"); + break; + case K_TIGNORE: + kw_printf("#ignore"); + break; + case K_TEOF: + kw_printf("[eof]"); + break; + default: + /* shouldn't happen */ + assert(0); + } +} + + +void kwrite_fsm() +{ + bool middle_list = false; + while (!data_is_empty()) { + TValue obj = get_data(); + pop_data(); + + if (middle_list) { + if (ttisnil(obj)) { /* end of list */ + kw_printf(")"); + /* middle_list = true; */ + } else if (ttispair(obj) && ttisboolean(kget_mark(obj))) { + push_data(kcdr(obj)); + push_data(kcar(obj)); + kw_printf(" "); + middle_list = false; + } else { /* improper list is the same as shared ref */ + kw_printf(" . "); + push_data(KNIL); + push_data(obj); + middle_list = false; + } + } else { /* if (middle_list) */ + switch(ttype(obj)) { + case K_TPAIR: { + TValue mark = kget_mark(obj); + if (ttisboolean(mark)) { /* simple pair (only once) */ + kw_printf("("); + push_data(kcdr(obj)); + push_data(kcar(obj)); + middle_list = false; + } else if (ivalue(mark) < 0) { /* pair with no assigned # */ + /* TEMP: for now only fixints in shared refs */ + assert(kw_shared_count >= 0); + kset_mark(obj, i2tv(kw_shared_count)); + kw_printf("#%" PRId32 "=(", kw_shared_count); + kw_shared_count++; + push_data(kcdr(obj)); + push_data(kcar(obj)); + middle_list = false; + } else { /* string with an assigned number */ + kw_printf("#%" PRId32 "#", ivalue(mark)); + middle_list = true; + } + break; + } + case K_TSTRING: { + if (kstring_is_empty(obj)) { + kw_printf("\"\""); + } else { + TValue mark = kget_mark(obj); + if (ttisboolean(mark)) { /* simple string (only once) */ + /* XXX: this doesn't correctly print strings with '\0's */ + kw_printf("\"%s\"", kstring_buf(obj)); + } else if (ivalue(mark) < 0) { /* string with no assigned # */ + /* TEMP: for now only fixints in shared refs */ + assert(kw_shared_count >= 0); + kset_mark(obj, i2tv(kw_shared_count)); + /* XXX: this doesn't correctly print strings with '\0's */ + kw_printf("#%" PRId32 "=\"%s\"", kw_shared_count, + kstring_buf(obj)); + kw_shared_count++; + } else { /* string with an assigned number */ + kw_printf("#%" PRId32 "#", ivalue(mark)); + } + } + middle_list = true; + break; + } + default: + kwrite_simple(obj); + middle_list = true; + } + } + } + return; +} + +/* +** Writer Main function +*/ +void kwrite(TValue obj) +{ + assert(data_is_empty()); + + kw_shared_count = 0; + kw_set_initial_marks(obj); + + push_data(obj); + kwrite_fsm(); + kw_flush(); + + kw_clear_marks(obj); + + assert(data_is_empty()); + return; +} + +void knewline() +{ + kw_printf("\n"); + kw_flush(); + return; +} diff --git a/src/kwrite.h b/src/kwrite.h @@ -0,0 +1,25 @@ +/* +** kwrite.h +** Writer for the Kernel Programming Language +** See Copyright Notice in klisp.h +*/ + +#ifndef kwrite_h +#define kwrite_h + +#include <stdio.h> + +#include "kobject.h" + +/* +** Writer interface +*/ +void kwrite_init(); +void kwrite(TValue); +void knewline(); + +/* TODO: move this to the global state */ +FILE *kwrite_file; + +#endif +