commit ec54b3d633a47512af7075ebf4ac4f9da677ca01
parent bb6d6a061ac7a25e316e567b51e3c764b78cd2af
Author: Andres Navarro <canavarro82@gmail.com>
Date: Tue, 29 Nov 2011 20:43:04 -0300
Added hex escaped character constants (reading and writing)
Diffstat:
4 files changed, 62 insertions(+), 11 deletions(-)
diff --git a/TODO b/TODO
@@ -41,7 +41,6 @@
* reader
** symbol escapes (r7rs)
** string escapes (r7rs)
-** char escapes (r7rs)
** syntax support for complex numbers (Kernel report)
* other
** optional argument to member? (r7rs)
diff --git a/src/kinteger.h b/src/kinteger.h
@@ -94,6 +94,7 @@ void kbigint_invert_sign(klisp_State *K, TValue tv_bigint);
/* read/write interface */
/* this works for bigints & fixints, returns true if ok */
+/* only positive numbers? */
bool kinteger_read(klisp_State *K, char *buf, int32_t base, TValue *out,
char **end);
diff --git a/src/ktoken.c b/src/ktoken.c
@@ -740,7 +740,7 @@ struct kspecial_token {
{ "#\\nul", KNULL_ } /* same as r7rs \NULL */
};
-#define MAX_EXT_REP_SIZE 64 /* all special tokens have much than 64 chars */
+#define MAX_EXT_REP_SIZE 64 /* all special tokens have less than 64 chars */
TValue ktok_read_special(klisp_State *K)
{
@@ -812,7 +812,7 @@ TValue ktok_read_special(klisp_State *K)
*str2 = tolower(*str2);
/* REFACTOR: move this to a new function */
- /* then check the known constants */
+ /* then check the known constants (including named characters) */
size_t stok_size = sizeof(kspecial_tokens) /
sizeof(struct kspecial_token);
size_t i;
@@ -826,15 +826,41 @@ TValue ktok_read_special(klisp_State *K)
}
}
+ /* It wasn't a special token or named char, but it can still be a srfi-38
+ token or a character escape */
+
if (buf[1] == '\\') { /* this is to have a meaningful error msg */
- ktok_error(K, "Unrecognized character name");
- /* avoid warning */
- return KINERT;
+ if (buf[2] != 'x') {/* this will also accept 'X' */
+ ktok_error(K, "Unrecognized character name");
+ return KINERT;
+ }
+ /* We already checked that length != 3, so there's at least on
+ more char */
+ TValue n;
+ char *end;
+
+ /* test for - and + explicitly, becayse kinteger read would parse them
+ without complaining (it will also parse spaces, but we read until
+ delimiter so... */
+ if (buf[3] == '-' || buf[3] == '+' ||
+ !kinteger_read(K, buf+3, 16, &n, &end) ||
+ end - buf != buf_len) {
+ ktok_error(K, "Bad char in hex escaped character constant");
+ return KINERT;
+ } else if (!ttisfixint(n) || ivalue(n) > 127) {
+ ktok_error(K, "Non ASCII char found in hex escaped character constant");
+ /* avoid warning */
+ return KINERT;
+ } else {
+ /* all ok, we just clean up and return the char */
+ ks_tbclear(K);
+ return ch2tv(ivalue(n));
+ }
}
/* REFACTOR: move this to a new function */
/* It was not a special token so it must be either a srfi-38 style
- token, or a char constant or a number. srfi-38 tokens are a '#' a
+ token, or a number. srfi-38 tokens are a '#' a
decimal number and end with a '=' or a '#' */
if (buf_len > 2 && ktok_is_numeric(buf[1])) {
/* NOTE: it's important to check is_numeric to avoid problems with
@@ -853,6 +879,7 @@ TValue ktok_read_special(klisp_State *K)
char *end;
/* 10 is the radix for srfi-38 tokens, buf+1 to jump over the '#',
end+1 to count the last char */
+ /* N.B. buf+1 can't be + or -, we already tested numeric before */
if (!kinteger_read(K, buf+1, 10, &n, &end) || end+1 - buf != buf_len) {
ktok_error(K, "Bad char in srfi-38 token");
return KINERT;
diff --git a/src/kwrite.c b/src/kwrite.c
@@ -368,7 +368,7 @@ void kwrite_scalar(klisp_State *K, TValue obj)
if (K->write_displayp) {
kw_printf(K, "%c", chvalue(obj));
} else {
- char ch_buf[4];
+ char ch_buf[16]; /* should be able to contain hex escapes */
char ch = chvalue(obj);
char *ch_ptr;
@@ -403,11 +403,35 @@ void kwrite_scalar(klisp_State *K, TValue obj)
case '\v':
ch_ptr = "vtab";
break;
- default:
- ch_buf[0] = ch;
- ch_buf[1] = '\0';
+ default: {
+ int i = 0;
+ if (ch >= 32 && ch < 127) {
+ /* printable ASCII range */
+ /* (del(127) and space(32) were already considered,
+ but it's clearer this way) */
+ ch_buf[i++] = ch;
+ } else {
+ /* use an hex escape for non printing, unnamed chars */
+ ch_buf[i++] = 'x';
+ int res = snprintf(ch_buf+i, sizeof(ch_buf) - i,
+ "%x", ch);
+ if (res < 0) {
+ /* shouldn't happen, but for the sake of
+ completeness... */
+ TValue port = K->curr_port;
+ if (ttisfport(port)) {
+ FILE *file = kfport_file(port);
+ clearerr(file); /* clear error for next time */
+ }
+ kwrite_error(K, "error writing");
+ return;
+ }
+ i += res; /* res doesn't include the '\0' */
+ }
+ ch_buf[i++] = '\0';
ch_ptr = ch_buf;
}
+ }
kw_printf(K, "#\\%s", ch_ptr);
}
break;