klisp

an open source interpreter for the Kernel Programming Language.
git clone http://git.hanabi.in/repos/klisp.git
Log | Files | Refs | README

commit 6283d55edd30579b0851420ec5d208d7fc46c3a0
parent 0421fd85d63a911b730df305eb2fcc119697fa2b
Author: Oto Havle <havleoto@gmail.com>
Date:   Fri, 28 Oct 2011 12:15:56 +0200

Merged recent changes from original repository.

Diffstat:
Msrc/kgports.c | 4----
Msrc/kgstrings.c | 6++++--
Msrc/kscript.c | 24------------------------
Msrc/kscript.h | 3---
Msrc/ktoken.c | 178+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
Msrc/tests/strings.k | 11++++++++++-
Msrc/tests/test-all.k | 2+-
7 files changed, 120 insertions(+), 108 deletions(-)

diff --git a/src/kgports.c b/src/kgports.c @@ -331,10 +331,6 @@ void call_with_file(klisp_State *K, TValue *xparams, TValue ptree, /* GC: assume port is rooted */ TValue read_all_expr(klisp_State *K, TValue port) { - /* support unix script directive #! */ - int line_count = kscript_eat_directive(kport_file(port)); - kport_line(port) += line_count; - /* GC: root dummy and obj */ TValue tail = kget_dummy1(K); TValue obj = KINERT; diff --git a/src/kgstrings.c b/src/kgstrings.c @@ -239,7 +239,7 @@ bool kstring_ci_gep(TValue str1, TValue str2) } /* 13.2.5? substring */ -/* TEMP: at least for now this always returns mutable strings */ +/* Note: This will return an mutable string iff the source string is mutable */ void substring(klisp_State *K, TValue *xparams, TValue ptree, TValue denv) { UNUSED(xparams); @@ -276,8 +276,10 @@ void substring(klisp_State *K, TValue *xparams, TValue ptree, TValue denv) /* the if isn't strictly necessary but it's clearer this way */ if (size == 0) { new_str = K->empty_string; - } else { + } else if (kstring_mutablep(str)) { new_str = kstring_new_bs(K, kstring_buf(str)+start, size); + } else { + new_str = kstring_new_bs_imm(K, kstring_buf(str)+start, size); } kapply_cc(K, new_str); } diff --git a/src/kscript.c b/src/kscript.c @@ -242,27 +242,3 @@ void kinit_script(klisp_State *K, int argc, char *argv[]) #undef RSI #undef G } - -/* skips the unix script directive (#!), if present. - returns number of lines skipped */ -int kscript_eat_directive(FILE *fr) -{ - static const char pattern[] = "#! "; - int c, n = 0; - - while (pattern[n] != '\0' && (c = getc(fr), c == pattern[n])) - n++; - - if (pattern[n] == '\0') { - while (c = getc(fr), c != EOF && c != '\n') - ; - return 1; - } else { - ungetc(c, fr); - /* XXX/Temp notice that the standard doesn't guarantee that more than one - ungetc in a row will be honored. Andres Navarro */ - while (n > 0) - ungetc(pattern[--n], fr); - return 0; - } -} diff --git a/src/kscript.h b/src/kscript.h @@ -18,9 +18,6 @@ void kinit_script(klisp_State *K, int argc, char *argv[]); void do_script_exit(klisp_State *K, TValue *xparams, TValue obj); void do_script_error(klisp_State *K, TValue *xparams, TValue obj); -/* unix script directive handling */ -int kscript_eat_directive(FILE *fr); - /* default exit code in case of error according to SRFI-22 */ #define KSCRIPT_DEFAULT_ERROR_EXIT_CODE 70 diff --git a/src/ktoken.c b/src/ktoken.c @@ -238,7 +238,8 @@ void ktok_set_source_info(klisp_State *K, TValue filename, int32_t line, /* ** ktok_read_token() helpers */ -void ktok_ignore_whitespace_and_comments(klisp_State *K); +void ktok_ignore_whitespace(klisp_State *K); +void ktok_ignore_single_line_comment(klisp_State *K); bool ktok_check_delimiter(klisp_State *K); TValue ktok_read_string(klisp_State *K); TValue ktok_read_special(klisp_State *K); @@ -256,82 +257,92 @@ TValue ktok_read_token(klisp_State *K) { assert(ks_tbisempty(K)); - ktok_ignore_whitespace_and_comments(K); - /* - ** NOTE: We jumped over all whitespace - ** so either the next token starts here or eof was reached, - ** in any case we save the location of the port - */ + while(true) { + ktok_ignore_whitespace(K); - /* save the source info of the start of the next token */ - ktok_save_source_info(K); + /* save the source info in case a token starts here */ + ktok_save_source_info(K); - int chi = ktok_peekc(K); + int chi = ktok_peekc(K); - switch(chi) { - case EOF: - ktok_getc(K); - return KEOF; - case '(': - ktok_getc(K); - return K->ktok_lparen; - case ')': - ktok_getc(K); - return K->ktok_rparen; - case '.': - ktok_getc(K); - if (ktok_check_delimiter(K)) - return K->ktok_dot; - else { - ktok_error(K, "no delimiter found after dot"); + switch(chi) { + case EOF: + ktok_getc(K); + return KEOF; + case ';': + ktok_ignore_single_line_comment(K); + continue; + case '(': + ktok_getc(K); + return K->ktok_lparen; + case ')': + ktok_getc(K); + return K->ktok_rparen; + case '.': + ktok_getc(K); + if (ktok_check_delimiter(K)) + return K->ktok_dot; + else { + ktok_error(K, "no delimiter found after dot"); + /* avoid warning */ + return KINERT; + } + case '"': + return ktok_read_string(K); +/* TODO use read_until_delimiter in all these cases */ + case '#': { + ktok_getc(K); + chi = ktok_peekc(K); + if ((chi != EOF) && (char) chi == '!') { + /* this handles the #! style script header too! */ + ktok_ignore_single_line_comment(K); + continue; + } else { + /* also handles EOF case */ + return ktok_read_special(K); + } + } + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { + /* positive number, no exactness or radix indicator */ + int32_t buf_len = ktok_read_until_delimiter(K); + char *buf = ks_tbget_buffer(K); + /* read number should free the tbbuffer */ + return ktok_read_number(K, buf, buf_len, false, false, false, 10); + } + case '+': case '-': + /* signed number, no exactness or radix indicator */ + return ktok_read_maybe_signed_numeric(K); + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'V': case 'W': case 'X': case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + case '!': case '$': case '%': case '&': case '*': case '/': case ':': + case '<': case '=': case '>': case '?': case '@': case '^': case '_': + case '~': + /* + ** NOTE: the cases for '+', '-', '.' and numbers were already + ** considered so identifier-subsequent is used instead of + ** identifier-first-char (in the cases above) + */ + return ktok_read_identifier(K); + default: + ktok_getc(K); + ktok_error(K, "unrecognized token starting char"); /* avoid warning */ return KINERT; } - case '"': - return ktok_read_string(K); -/* TODO use read_until_delimiter in all these cases */ - case '#': - return ktok_read_special(K); - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': { - /* positive number, no exactness or radix indicator */ - int32_t buf_len = ktok_read_until_delimiter(K); - char *buf = ks_tbget_buffer(K); - /* read number should free the tbbuffer */ - return ktok_read_number(K, buf, buf_len, false, false, false, 10); - } - case '+': case '-': - /* signed number, no exactness or radix indicator */ - return ktok_read_maybe_signed_numeric(K); - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': - case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': - case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': - case 'V': case 'W': case 'X': case 'Y': case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': - case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': - case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': - case 'v': case 'w': case 'x': case 'y': case 'z': - case '!': case '$': case '%': case '&': case '*': case '/': case ':': - case '<': case '=': case '>': case '?': case '@': case '^': case '_': - case '~': - /* - ** NOTE: the cases for '+', '-', '.' and numbers were already - ** considered so identifier-subsequent is used instead of - ** identifier-first-char (in the cases above) - */ - return ktok_read_identifier(K); - default: - ktok_getc(K); - ktok_error(K, "unrecognized token starting char"); - /* avoid warning */ - return KINERT; } } /* ** Comments and Whitespace */ -void ktok_ignore_comment(klisp_State *K) +void ktok_ignore_single_line_comment(klisp_State *K) { int chi; do { @@ -339,28 +350,48 @@ void ktok_ignore_comment(klisp_State *K) } while (chi != EOF && chi != '\n'); } +void ktok_ignore_whitespace(klisp_State *K) +{ + /* NOTE: if it's not whitespace do nothing (even on eof) */ + while(true) { + int chi = ktok_peekc(K); + + if (chi == EOF) { + return; + } else { + char ch = (char) chi; + if (ktok_is_whitespace(ch)) { + ktok_getc(K); + } else { + return; + } + } + } +} + +/* XXX temp for repl */ void ktok_ignore_whitespace_and_comments(klisp_State *K) { - /* NOTE: if it's not a whitespace or comment do nothing (even on eof) */ - bool end = false; - while(!end) { + /* NOTE: if it's not whitespace do nothing (even on eof) */ + while(true) { int chi = ktok_peekc(K); if (chi == EOF) { - end = true; + return; } else { char ch = (char) chi; if (ktok_is_whitespace(ch)) { ktok_getc(K); } else if (ch == ';') { - ktok_ignore_comment(K); /* NOTE: this also reads again the ';' */ + ktok_ignore_single_line_comment(K); } else { - end = true; + return; } } } } + /* ** Delimiter checking */ @@ -541,8 +572,9 @@ struct kspecial_token { TValue ktok_read_special(klisp_State *K) { - /* the # is still pending (was only peeked) */ - int32_t buf_len = ktok_read_until_delimiter(K); + /* the # is already consumed, add it manually */ + ks_tbadd(K, '#'); + int32_t buf_len = ktok_read_until_delimiter(K) + 1; char *buf = ks_tbget_buffer(K); if (buf_len < 2) { diff --git a/src/tests/strings.k b/src/tests/strings.k @@ -129,10 +129,19 @@ ($check-error (substring "abcdef" 3 10)) ($check-error (substring "abcdef" 4 2)) -($check-not-predicate + +;; immutable strings are eq? iff string=? +;; Andres Navarro +($check-predicate ($let* ((p "abc") (q (substring p 0 3))) (eq? p q))) +;; string-copy always generate mutable strings +;; Andres Navarro +($check-not-predicate + ($let* ((p (string-copy "abc")) (q (substring p 0 3))) + (eq? p q))) + ($check-predicate (immutable-string? (substring "abc" 0 0))) ($check-predicate (immutable-string? (substring "abc" 0 1))) diff --git a/src/tests/test-all.k b/src/tests/test-all.k @@ -15,7 +15,7 @@ (load "tests/environment-mutation.k") (load "tests/combiners.k") ;; XXX Oto, you forgot to add tests/encapsulations.k to the repo! -;;(load "tests/encapsulations.k") +(load "tests/encapsulations.k") (load "tests/numbers.k") (load "tests/strings.k") (load "tests/characters.k")