Merged recent changes from original repository. - klisp - an open source interpreter for the Kernel Programming Language.

commit 6283d55edd30579b0851420ec5d208d7fc46c3a0
parent 0421fd85d63a911b730df305eb2fcc119697fa2b
Author: Oto Havle <havleoto@gmail.com>
Date:   Fri, 28 Oct 2011 12:15:56 +0200

Merged recent changes from original repository.

Diffstat:
M src/kgports.c  | 4 ----
M src/kgstrings.c  | 6 ++++--
M src/kscript.c  | 24 ------------------------
M src/kscript.h  | 3 ---
M src/ktoken.c  | 178 +++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
M src/tests/strings.k  | 11 ++++++++++-
M src/tests/test-all.k  | 2 +-

7 files changed, 120 insertions(+), 108 deletions(-)
diff --git a/src/kgports.c b/src/kgports.c
@@ -331,10 +331,6 @@ void call_with_file(klisp_State *K, TValue *xparams, TValue ptree,
 /* GC: assume port is rooted */
 TValue read_all_expr(klisp_State *K, TValue port)
 {
-    /* support unix script directive #! */
-    int line_count = kscript_eat_directive(kport_file(port));
-    kport_line(port) += line_count;
-
     /* GC: root dummy and obj */
     TValue tail = kget_dummy1(K);
     TValue obj = KINERT;
diff --git a/src/kgstrings.c b/src/kgstrings.c
@@ -239,7 +239,7 @@ bool kstring_ci_gep(TValue str1, TValue str2)
 }
 
 /* 13.2.5? substring */
-/* TEMP: at least for now this always returns mutable strings */
+/* Note: This will return an mutable string iff the source string is mutable */
 void substring(klisp_State *K, TValue *xparams, TValue ptree, TValue denv)
 {
     UNUSED(xparams);
@@ -276,8 +276,10 @@ void substring(klisp_State *K, TValue *xparams, TValue ptree, TValue denv)
     /* the if isn't strictly necessary but it's clearer this way */
     if (size == 0) {
 	new_str = K->empty_string;
-    } else {
+    } else if (kstring_mutablep(str)) {
 	new_str = kstring_new_bs(K, kstring_buf(str)+start, size);
+    } else {
+	new_str = kstring_new_bs_imm(K, kstring_buf(str)+start, size);
     }
     kapply_cc(K, new_str);
 }
diff --git a/src/kscript.c b/src/kscript.c
@@ -242,27 +242,3 @@ void kinit_script(klisp_State *K, int argc, char *argv[])
 #undef RSI
 #undef G
 }
-
-/* skips the unix script directive (#!), if present.
-   returns number of lines skipped */
-int kscript_eat_directive(FILE *fr)
-{
-  static const char pattern[] = "#! ";
-  int c, n = 0;
-
-  while (pattern[n] != '\0' && (c = getc(fr), c == pattern[n]))
-    n++;
-
-  if (pattern[n] == '\0') {
-    while (c = getc(fr), c != EOF && c != '\n')
-      ;
-    return 1;
-  } else {
-    ungetc(c, fr);
-    /* XXX/Temp notice that the standard doesn't guarantee that more than one
-       ungetc in a row will be honored. Andres Navarro */
-    while (n > 0)
-      ungetc(pattern[--n], fr);
-    return 0;
-  }
-}
diff --git a/src/kscript.h b/src/kscript.h
@@ -18,9 +18,6 @@ void kinit_script(klisp_State *K, int argc, char *argv[]);
 void do_script_exit(klisp_State *K, TValue *xparams, TValue obj);
 void do_script_error(klisp_State *K, TValue *xparams, TValue obj);
 
-/* unix script directive handling */
-int kscript_eat_directive(FILE *fr);
-
 /* default exit code in case of error according to SRFI-22 */
 
 #define KSCRIPT_DEFAULT_ERROR_EXIT_CODE   70
diff --git a/src/ktoken.c b/src/ktoken.c
@@ -238,7 +238,8 @@ void ktok_set_source_info(klisp_State *K, TValue filename, int32_t line,
 /*
 ** ktok_read_token() helpers
 */
-void ktok_ignore_whitespace_and_comments(klisp_State *K);
+void ktok_ignore_whitespace(klisp_State *K);
+void ktok_ignore_single_line_comment(klisp_State *K);
 bool ktok_check_delimiter(klisp_State *K);
 TValue ktok_read_string(klisp_State *K);
 TValue ktok_read_special(klisp_State *K);
@@ -256,82 +257,92 @@ TValue ktok_read_token(klisp_State *K)
 {
     assert(ks_tbisempty(K));
 
-    ktok_ignore_whitespace_and_comments(K);
-    /*
-    ** NOTE: We jumped over all whitespace
-    ** so either the next token starts here or eof was reached, 
-    ** in any case we save the location of the port
-    */
+    while(true) {
+	ktok_ignore_whitespace(K);
 
-    /* save the source info of the start of the next token */
-    ktok_save_source_info(K);
+	/* save the source info in case a token starts here */
+	ktok_save_source_info(K);
 
-    int chi = ktok_peekc(K);
+	int chi = ktok_peekc(K);
 
-    switch(chi) {
-    case EOF:
-	ktok_getc(K);
-	return KEOF;
-    case '(':
-	ktok_getc(K);
-	return K->ktok_lparen;
-    case ')':
-	ktok_getc(K);
-	return K->ktok_rparen;
-    case '.':
-	ktok_getc(K);
-	if (ktok_check_delimiter(K))
-	    return K->ktok_dot;
-	else {
-	    ktok_error(K, "no delimiter found after dot");
+	switch(chi) {
+	case EOF:
+	    ktok_getc(K);
+	    return KEOF;
+	case ';':
+	    ktok_ignore_single_line_comment(K);
+	    continue;
+	case '(':
+	    ktok_getc(K);
+	    return K->ktok_lparen;
+	case ')':
+	    ktok_getc(K);
+	    return K->ktok_rparen;
+	case '.':
+	    ktok_getc(K);
+	    if (ktok_check_delimiter(K))
+		return K->ktok_dot;
+	    else {
+		ktok_error(K, "no delimiter found after dot");
+		/* avoid warning */
+		return KINERT;
+	    }
+	case '"':
+	    return ktok_read_string(K);
+/* TODO use read_until_delimiter in all these cases */
+	case '#': {
+	    ktok_getc(K);
+	    chi = ktok_peekc(K);
+	    if ((chi != EOF) && (char) chi == '!') {
+		/* this handles the #! style script header too! */
+		ktok_ignore_single_line_comment(K);
+		continue;
+	    } else {
+		/* also handles EOF case */
+		return ktok_read_special(K);
+	    }
+	}
+	case '0': case '1': case '2': case '3': case '4': 
+	case '5': case '6': case '7': case '8': case '9': {
+	    /* positive number, no exactness or radix indicator */
+	    int32_t buf_len = ktok_read_until_delimiter(K);
+	    char *buf = ks_tbget_buffer(K);
+	    /* read number should free the tbbuffer */
+	    return ktok_read_number(K, buf, buf_len, false, false, false, 10);
+	}
+	case '+': case '-':
+	    /* signed number, no exactness or radix indicator */
+	    return ktok_read_maybe_signed_numeric(K);
+	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': 
+	case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': 
+	case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': 
+	case 'V': case 'W': case 'X': case 'Y': case 'Z': 
+	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': 
+	case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': 
+	case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': 
+	case 'v': case 'w': case 'x': case 'y': case 'z': 
+	case '!': case '$': case '%': case '&': case '*': case '/': case ':': 
+	case '<': case '=': case '>': case '?': case '@': case '^': case '_': 
+	case '~': 
+	    /*
+	    ** NOTE: the cases for '+', '-', '.' and numbers were already 
+	    ** considered so identifier-subsequent is used instead of 
+	    ** identifier-first-char (in the cases above)
+	    */
+	    return ktok_read_identifier(K);
+	default:
+	    ktok_getc(K);
+	    ktok_error(K, "unrecognized token starting char");
 	    /* avoid warning */
 	    return KINERT;
 	}
-    case '"':
-	return ktok_read_string(K);
-/* TODO use read_until_delimiter in all these cases */
-    case '#':
-	return ktok_read_special(K);
-    case '0': case '1': case '2': case '3': case '4': 
-    case '5': case '6': case '7': case '8': case '9': {
-        /* positive number, no exactness or radix indicator */
-	int32_t buf_len = ktok_read_until_delimiter(K);
-	char *buf = ks_tbget_buffer(K);
-	/* read number should free the tbbuffer */
-	return ktok_read_number(K, buf, buf_len, false, false, false, 10);
-	}
-    case '+': case '-':
-        /* signed number, no exactness or radix indicator */
-	return ktok_read_maybe_signed_numeric(K);
-    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': 
-    case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': 
-    case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': 
-    case 'V': case 'W': case 'X': case 'Y': case 'Z': 
-    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': 
-    case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': 
-    case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': 
-    case 'v': case 'w': case 'x': case 'y': case 'z': 
-    case '!': case '$': case '%': case '&': case '*': case '/': case ':': 
-    case '<': case '=': case '>': case '?': case '@': case '^': case '_': 
-    case '~': 
-	/*
-	** NOTE: the cases for '+', '-', '.' and numbers were already 
-	** considered so identifier-subsequent is used instead of 
-	** identifier-first-char (in the cases above)
-	*/
-	return ktok_read_identifier(K);
-    default:
-	ktok_getc(K);
-	ktok_error(K, "unrecognized token starting char");
-	/* avoid warning */
-	return KINERT;
     }
 }
 
 /*
 ** Comments and Whitespace
 */
-void ktok_ignore_comment(klisp_State *K)
+void ktok_ignore_single_line_comment(klisp_State *K)
 {
     int chi;
     do {
@@ -339,28 +350,48 @@ void ktok_ignore_comment(klisp_State *K)
     } while (chi != EOF && chi != '\n');
 }
 
+void ktok_ignore_whitespace(klisp_State *K)
+{
+    /* NOTE: if it's not whitespace do nothing (even on eof) */
+    while(true) {
+	int chi = ktok_peekc(K);
+
+	if (chi == EOF) {
+	    return;
+	} else {
+	    char ch = (char) chi;
+	    if (ktok_is_whitespace(ch)) {
+		ktok_getc(K);
+	    } else {
+		return;
+	    }
+	}
+    }
+}
+
+/* XXX temp for repl */
 void ktok_ignore_whitespace_and_comments(klisp_State *K)
 {
-    /* NOTE: if it's not a whitespace or comment do nothing (even on eof) */
-    bool end = false;
-    while(!end) {
+    /* NOTE: if it's not whitespace do nothing (even on eof) */
+    while(true) {
 	int chi = ktok_peekc(K);
 
 	if (chi == EOF) {
-	    end = true; 
+	    return;
 	} else {
 	    char ch = (char) chi;
 	    if (ktok_is_whitespace(ch)) {
 		ktok_getc(K);
 	    } else if (ch == ';') {
-		ktok_ignore_comment(K); /* NOTE: this also reads again the ';' */
+		ktok_ignore_single_line_comment(K);
 	    } else {
-		end = true;
+		return;
 	    }
 	}
     }
 }
 
+
 /*
 ** Delimiter checking
 */
@@ -541,8 +572,9 @@ struct kspecial_token {
 
 TValue ktok_read_special(klisp_State *K)
 {
-    /* the # is still pending (was only peeked) */
-    int32_t buf_len = ktok_read_until_delimiter(K);
+    /* the # is already consumed, add it manually */
+    ks_tbadd(K, '#');
+    int32_t buf_len = ktok_read_until_delimiter(K) + 1;
     char *buf = ks_tbget_buffer(K);
 
     if (buf_len < 2) {
diff --git a/src/tests/strings.k b/src/tests/strings.k
@@ -129,10 +129,19 @@
 ($check-error (substring "abcdef" 3 10))
 ($check-error (substring "abcdef" 4 2))
 
-($check-not-predicate
+
+;; immutable strings are eq? iff string=?
+;; Andres Navarro
+($check-predicate
   ($let* ((p "abc") (q (substring p 0 3)))
     (eq? p q)))
 
+;; string-copy always generate mutable strings
+;; Andres Navarro
+($check-not-predicate
+ ($let* ((p (string-copy "abc")) (q (substring p 0 3)))
+   (eq? p q)))
+
 ($check-predicate (immutable-string? (substring "abc" 0 0)))
 ($check-predicate (immutable-string? (substring "abc" 0 1)))
 
diff --git a/src/tests/test-all.k b/src/tests/test-all.k
@@ -15,7 +15,7 @@
 (load "tests/environment-mutation.k")
 (load "tests/combiners.k")
 ;; XXX Oto, you forgot to add tests/encapsulations.k to the repo!
-;;(load "tests/encapsulations.k")
+(load "tests/encapsulations.k")
 (load "tests/numbers.k")
 (load "tests/strings.k")
 (load "tests/characters.k")

	klisp an open source interpreter for the Kernel Programming Language.
	git clone http://git.hanabi.in/repos/klisp.git
	Log \| Files \| Refs \| README

M	src/kgports.c	\|	4	----
M	src/kgstrings.c	\|	6	++++--
M	src/kscript.c	\|	24	------------------------
M	src/kscript.h	\|	3	---
M	src/ktoken.c	\|	178	+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
M	src/tests/strings.k	\|	11	++++++++++-
M	src/tests/test-all.k	\|	2	+-