klisp

an open source interpreter for the Kernel Programming Language.
git clone http://git.hanabi.in/repos/klisp.git
Log | Files | Refs | README

commit ec54b3d633a47512af7075ebf4ac4f9da677ca01
parent bb6d6a061ac7a25e316e567b51e3c764b78cd2af
Author: Andres Navarro <canavarro82@gmail.com>
Date:   Tue, 29 Nov 2011 20:43:04 -0300

Added hex escaped character constants (reading and writing)

Diffstat:
MTODO | 1-
Msrc/kinteger.h | 1+
Msrc/ktoken.c | 39+++++++++++++++++++++++++++++++++------
Msrc/kwrite.c | 32++++++++++++++++++++++++++++----
4 files changed, 62 insertions(+), 11 deletions(-)

diff --git a/TODO b/TODO @@ -41,7 +41,6 @@ * reader ** symbol escapes (r7rs) ** string escapes (r7rs) -** char escapes (r7rs) ** syntax support for complex numbers (Kernel report) * other ** optional argument to member? (r7rs) diff --git a/src/kinteger.h b/src/kinteger.h @@ -94,6 +94,7 @@ void kbigint_invert_sign(klisp_State *K, TValue tv_bigint); /* read/write interface */ /* this works for bigints & fixints, returns true if ok */ +/* only positive numbers? */ bool kinteger_read(klisp_State *K, char *buf, int32_t base, TValue *out, char **end); diff --git a/src/ktoken.c b/src/ktoken.c @@ -740,7 +740,7 @@ struct kspecial_token { { "#\\nul", KNULL_ } /* same as r7rs \NULL */ }; -#define MAX_EXT_REP_SIZE 64 /* all special tokens have much than 64 chars */ +#define MAX_EXT_REP_SIZE 64 /* all special tokens have less than 64 chars */ TValue ktok_read_special(klisp_State *K) { @@ -812,7 +812,7 @@ TValue ktok_read_special(klisp_State *K) *str2 = tolower(*str2); /* REFACTOR: move this to a new function */ - /* then check the known constants */ + /* then check the known constants (including named characters) */ size_t stok_size = sizeof(kspecial_tokens) / sizeof(struct kspecial_token); size_t i; @@ -826,15 +826,41 @@ TValue ktok_read_special(klisp_State *K) } } + /* It wasn't a special token or named char, but it can still be a srfi-38 + token or a character escape */ + if (buf[1] == '\\') { /* this is to have a meaningful error msg */ - ktok_error(K, "Unrecognized character name"); - /* avoid warning */ - return KINERT; + if (buf[2] != 'x') {/* this will also accept 'X' */ + ktok_error(K, "Unrecognized character name"); + return KINERT; + } + /* We already checked that length != 3, so there's at least on + more char */ + TValue n; + char *end; + + /* test for - and + explicitly, becayse kinteger read would parse them + without complaining (it will also parse spaces, but we read until + delimiter so... */ + if (buf[3] == '-' || buf[3] == '+' || + !kinteger_read(K, buf+3, 16, &n, &end) || + end - buf != buf_len) { + ktok_error(K, "Bad char in hex escaped character constant"); + return KINERT; + } else if (!ttisfixint(n) || ivalue(n) > 127) { + ktok_error(K, "Non ASCII char found in hex escaped character constant"); + /* avoid warning */ + return KINERT; + } else { + /* all ok, we just clean up and return the char */ + ks_tbclear(K); + return ch2tv(ivalue(n)); + } } /* REFACTOR: move this to a new function */ /* It was not a special token so it must be either a srfi-38 style - token, or a char constant or a number. srfi-38 tokens are a '#' a + token, or a number. srfi-38 tokens are a '#' a decimal number and end with a '=' or a '#' */ if (buf_len > 2 && ktok_is_numeric(buf[1])) { /* NOTE: it's important to check is_numeric to avoid problems with @@ -853,6 +879,7 @@ TValue ktok_read_special(klisp_State *K) char *end; /* 10 is the radix for srfi-38 tokens, buf+1 to jump over the '#', end+1 to count the last char */ + /* N.B. buf+1 can't be + or -, we already tested numeric before */ if (!kinteger_read(K, buf+1, 10, &n, &end) || end+1 - buf != buf_len) { ktok_error(K, "Bad char in srfi-38 token"); return KINERT; diff --git a/src/kwrite.c b/src/kwrite.c @@ -368,7 +368,7 @@ void kwrite_scalar(klisp_State *K, TValue obj) if (K->write_displayp) { kw_printf(K, "%c", chvalue(obj)); } else { - char ch_buf[4]; + char ch_buf[16]; /* should be able to contain hex escapes */ char ch = chvalue(obj); char *ch_ptr; @@ -403,11 +403,35 @@ void kwrite_scalar(klisp_State *K, TValue obj) case '\v': ch_ptr = "vtab"; break; - default: - ch_buf[0] = ch; - ch_buf[1] = '\0'; + default: { + int i = 0; + if (ch >= 32 && ch < 127) { + /* printable ASCII range */ + /* (del(127) and space(32) were already considered, + but it's clearer this way) */ + ch_buf[i++] = ch; + } else { + /* use an hex escape for non printing, unnamed chars */ + ch_buf[i++] = 'x'; + int res = snprintf(ch_buf+i, sizeof(ch_buf) - i, + "%x", ch); + if (res < 0) { + /* shouldn't happen, but for the sake of + completeness... */ + TValue port = K->curr_port; + if (ttisfport(port)) { + FILE *file = kfport_file(port); + clearerr(file); /* clear error for next time */ + } + kwrite_error(K, "error writing"); + return; + } + i += res; /* res doesn't include the '\0' */ + } + ch_buf[i++] = '\0'; ch_ptr = ch_buf; } + } kw_printf(K, "#\\%s", ch_ptr); } break;