From d59cc89a3c82abdc7ada13c481303838fdad85cd Mon Sep 17 00:00:00 2001 From: Ary Borenszweig <aborenszweig@manas.com.ar> Date: Sat, 3 Aug 2013 11:09:16 -0300 Subject: [PATCH] String interpolation in bootstrap's lexer --- bootstrap/crystal/lexer.cr | 117 ++++++- bootstrap/crystal/token.cr | 3 + .../spec/crystal/codegen/primitives_spec.cr | 2 - bootstrap/spec/crystal/lexer/lexer_spec.cr | 27 +- .../crystal/lexer/lexer_string_array_spec.cr | 53 +++ .../spec/crystal/lexer/lexer_string_spec.cr | 316 ++++++++++++++++++ bootstrap/spec/crystal/lexer/location_spec.cr | 39 +++ bootstrap/spec/crystal/parser/parser_spec.cr | 2 - bootstrap/spec/spec_helper.cr | 2 + lib/crystal/lexer.rb | 3 - spec/lexer/lexer_string_spec.rb | 2 +- std/char.cr | 6 +- 12 files changed, 538 insertions(+), 34 deletions(-) create mode 100644 bootstrap/spec/crystal/lexer/lexer_string_array_spec.cr create mode 100644 bootstrap/spec/crystal/lexer/lexer_string_spec.cr create mode 100644 bootstrap/spec/crystal/lexer/location_spec.cr diff --git a/bootstrap/crystal/lexer.cr b/bootstrap/crystal/lexer.cr index 1f1386724a..38841f0e28 100644 --- a/bootstrap/crystal/lexer.cr +++ b/bootstrap/crystal/lexer.cr @@ -188,6 +188,14 @@ module Crystal case next_char when '=' next_char :"%=" + when '(' + string_start_pair '(', ')' + when '[' + string_start_pair '[', ']' + when '{' + string_start_pair '{', '}' + when '<' + string_start_pair '<', '>' when 'w' if @buffer[1] == '(' next_char @@ -196,7 +204,11 @@ module Crystal @token.type = :"%" end else - @token.type = :"%" + if @buffer.value.alphanumeric? + @token.type = :"%" + else + string_start_pair @buffer.value, @buffer.value + end end when '(' then next_char :"(" when ')' then next_char :")" @@ -327,17 +339,11 @@ module Crystal end next_char when '"' - start = @buffer + 1 - count = 0 - while (char = next_char) != '"' && char != :EOF - count += 1 - end - if char != '"' - raise "unterminated string literal" - end next_char - @token.type = :STRING - @token.value = String.from_cstr(start, count) + @token.type = :STRING_START + @token.string_nest = '"' + @token.string_end = '"' + @token.string_open_count = 0 when '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' scan_number @buffer, 1 when '@' @@ -767,6 +773,95 @@ module Crystal @token.number_kind = yield end end + + def next_string_token(string_nest, string_end, string_open_count) + case @buffer.value + when '\0' + raise "unterminated string literal" + when string_end + next_char + if string_open_count == 0 + @token.type = :STRING_END + else + @token.type = :STRING + @token.value = string_end.to_s + @token.string_open_count = string_open_count - 1 + end + when string_nest + next_char + @token.type = :STRING + @token.value = string_nest.to_s + @token.string_open_count = string_open_count + 1 + when '\\' + case @buffer[1] + when 'n' + string_token_escape_value "\n" + when 'r' + string_token_escape_value "\r" + when 't' + string_token_escape_value "\t" + when 'v' + string_token_escape_value "\v" + when 'f' + string_token_escape_value "\f" + when '0' + string_token_escape_value "\0" + else + next_char + @token.type = :STRING + @token.value = @buffer.value.to_s + next_char + end + when '#' + if @buffer[1] == '{' + next_char + next_char + @token.type = :INTERPOLATION_START + else + next_char + @token.type = :STRING + @token.value = "#" + end + when '\n' + next_char + @column_number = 1 + @line_number += 1 + @token.type = :STRING + @token.value = "\n" + else + start = @buffer + count = 0 + while @buffer.value != string_end && + @buffer.value != string_nest && + @buffer.value != '\0' && + @buffer.value != '\\' && + @buffer.value != '#' && + @buffer.value != '\n' + next_char + count += 1 + end + + @token.type = :STRING + @token.value = String.from_cstr(start, count) + end + + @token + end + + def string_token_escape_value(value) + next_char + next_char + @token.type = :STRING + @token.value = value + end + + def string_start_pair(string_nest, string_end) + next_char + @token.type = :STRING_START + @token.string_nest = string_nest + @token.string_end = string_end + @token.string_open_count = 0 + end def next_string_array_token while true diff --git a/bootstrap/crystal/token.cr b/bootstrap/crystal/token.cr index 0a77609686..dcb800817d 100644 --- a/bootstrap/crystal/token.cr +++ b/bootstrap/crystal/token.cr @@ -5,6 +5,9 @@ module Crystal attr_accessor :type attr_accessor :value attr_accessor :number_kind + attr_accessor :string_end + attr_accessor :string_nest + attr_accessor :string_open_count attr_accessor :line_number attr_accessor :column_number attr_accessor :filename diff --git a/bootstrap/spec/crystal/codegen/primitives_spec.cr b/bootstrap/spec/crystal/codegen/primitives_spec.cr index af09729141..7ec419f3ba 100644 --- a/bootstrap/spec/crystal/codegen/primitives_spec.cr +++ b/bootstrap/spec/crystal/codegen/primitives_spec.cr @@ -1,7 +1,5 @@ require "../../spec_helper" -include Crystal - describe "Code gen: primitives" do it "codegens bool" do run("true").to_b.should be_true diff --git a/bootstrap/spec/crystal/lexer/lexer_spec.cr b/bootstrap/spec/crystal/lexer/lexer_spec.cr index d644d33308..67e80fdd70 100755 --- a/bootstrap/spec/crystal/lexer/lexer_spec.cr +++ b/bootstrap/spec/crystal/lexer/lexer_spec.cr @@ -3,7 +3,7 @@ require "../../spec_helper" def it_lexes(string, type) it "lexes #{string}" do - lexer = Crystal::Lexer.new string + lexer = Lexer.new string token = lexer.next_token token.type.should eq(type) end @@ -11,7 +11,7 @@ end def it_lexes(string, type, value) it "lexes #{string}" do - lexer = Crystal::Lexer.new string + lexer = Lexer.new string token = lexer.next_token token.type.should eq(type) token.value.should eq(value) @@ -20,7 +20,7 @@ end def it_lexes(string, type, value, number_kind) it "lexes #{string}" do - lexer = Crystal::Lexer.new string + lexer = Lexer.new string token = lexer.next_token token.type.should eq(type) token.value.should eq(value) @@ -72,7 +72,7 @@ end def it_lexes_char(string, value) it "lexes #{string}" do - lexer = Crystal::Lexer.new string + lexer = Lexer.new string token = lexer.next_token token.type.should eq(:CHAR) token.value.to_s.should eq(value.to_s) @@ -121,7 +121,6 @@ describe "Lexer" do it_lexes "\t", :SPACE it_lexes "\n", :NEWLINE it_lexes "\n\n\n", :NEWLINE - it_lexes %("foo"), :STRING, "foo" it_lexes_keywords [:"def", :"if", :"else", :"elsif", :"end", :"true", :"false", :"class", :"module", :"include", :"while", :"nil", :"do", :"yield", :"return", :"unless", :"next", :"break", :"begin", :"lib", :"fun", :"type", :"struct", :"macro", :"ptr", :"out", :"require", :"case", :"when", :"then", :"of", :"abstract"] it_lexes_idents ["ident", "something", "with_underscores", "with_1", "foo?", "bar!"] it_lexes_idents ["def?", "if?", "else?", "elsif?", "end?", "true?", "false?", "class?", "while?", "nil?", "do?", "yield?", "return?", "unless?", "next?", "break?", "begin?"] @@ -183,7 +182,7 @@ describe "Lexer" do it_lexes_global_match ["$1", "$10"] it "lexes not instance var" do - lexer = Crystal::Lexer.new "!@foo" + lexer = Lexer.new "!@foo" token = lexer.next_token token.type.should eq(:"!") token = lexer.next_token @@ -192,7 +191,7 @@ describe "Lexer" do end it "lexes space after keyword" do - lexer = Crystal::Lexer.new "end 1" + lexer = Lexer.new "end 1" token = lexer.next_token token.type.should eq(:IDENT) token.value.should eq(:end) @@ -201,7 +200,7 @@ describe "Lexer" do end it "lexes space after char" do - lexer = Crystal::Lexer.new "'a' " + lexer = Lexer.new "'a' " token = lexer.next_token token.type.should eq(:CHAR) token.value.should eq('a') @@ -210,7 +209,7 @@ describe "Lexer" do end it "lexes comment and token" do - lexer = Crystal::Lexer.new "# comment\n=" + lexer = Lexer.new "# comment\n=" token = lexer.next_token token.type.should eq(:NEWLINE) token = lexer.next_token @@ -218,20 +217,20 @@ describe "Lexer" do end it "lexes comment at the end" do - lexer = Crystal::Lexer.new "# comment" + lexer = Lexer.new "# comment" token = lexer.next_token token.type.should eq(:EOF) end it "lexes __LINE__" do - lexer = Crystal::Lexer.new "__LINE__" + lexer = Lexer.new "__LINE__" token = lexer.next_token token.type.should eq(:INT) token.value.should eq(1) end it "lexes __FILE__" do - lexer = Crystal::Lexer.new "__FILE__" + lexer = Lexer.new "__FILE__" lexer.filename = "foo" token = lexer.next_token token.type.should eq(:STRING) @@ -239,7 +238,7 @@ describe "Lexer" do end it "lexes __DIR__" do - lexer = Crystal::Lexer.new "__DIR__" + lexer = Lexer.new "__DIR__" lexer.filename = "/Users/foo/bar.cr" token = lexer.next_token token.type.should eq(:STRING) @@ -247,7 +246,7 @@ describe "Lexer" do end it "lexes dot and ident" do - lexer = Crystal::Lexer.new ".read" + lexer = Lexer.new ".read" token = lexer.next_token token.type.should eq(:".") token = lexer.next_token diff --git a/bootstrap/spec/crystal/lexer/lexer_string_array_spec.cr b/bootstrap/spec/crystal/lexer/lexer_string_array_spec.cr new file mode 100644 index 0000000000..2001ba7338 --- /dev/null +++ b/bootstrap/spec/crystal/lexer/lexer_string_array_spec.cr @@ -0,0 +1,53 @@ +#!/usr/bin/env bin/crystal -run +require "../../spec_helper" + +describe "Lexer string array" do + it "lexes simple string array" do + lexer = Lexer.new("%w(one two)") + + token = lexer.next_token + token.type.should eq(:STRING_ARRAY_START) + + token = lexer.next_string_array_token + token.type.should eq(:STRING) + token.value.should eq("one") + + token = lexer.next_string_array_token + token.type.should eq(:STRING) + token.value.should eq("two") + + token = lexer.next_string_array_token + token.type.should eq(:STRING_ARRAY_END) + end + + it "lexes string array with new line" do + lexer = Lexer.new("%w(one \n two)") + + token = lexer.next_token + token.type.should eq(:STRING_ARRAY_START) + + token = lexer.next_string_array_token + token.type.should eq(:STRING) + token.value.should eq("one") + + token = lexer.next_string_array_token + token.type.should eq(:STRING) + token.value.should eq("two") + + token = lexer.next_string_array_token + token.type.should eq(:STRING_ARRAY_END) + end + + it "lexes string array with new line gives correct column for next token" do + lexer = Lexer.new("%w(one \n two).") + + lexer.next_token + lexer.next_string_array_token + lexer.next_string_array_token + lexer.next_string_array_token + + token = lexer.next_token + token.line_number.should eq(2) + token.column_number.should eq(6) + end +end diff --git a/bootstrap/spec/crystal/lexer/lexer_string_spec.cr b/bootstrap/spec/crystal/lexer/lexer_string_spec.cr new file mode 100644 index 0000000000..db59c6f74e --- /dev/null +++ b/bootstrap/spec/crystal/lexer/lexer_string_spec.cr @@ -0,0 +1,316 @@ +#!/usr/bin/env bin/crystal -run +require "../../spec_helper" + +describe "Lexer string" do + it "lexes simple string" do + lexer = Lexer.new(%("hello")) + + token = lexer.next_token + token.type.should eq(:STRING_START) + token.string_end.should eq('"') + token.string_nest.should eq('"') + token.string_open_count.should eq(0) + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("hello") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING_END) + + token = lexer.next_token + token.type.should eq(:EOF) + end + + it "lexes string with newline" do + lexer = Lexer.new("\"hello\\nworld\"") + + token = lexer.next_token + token.type.should eq(:STRING_START) + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("hello") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("\n") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("world") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING_END) + end + + it "lexes string with slash" do + lexer = Lexer.new("\"hello\\\\world\"") + + token = lexer.next_token + token.type.should eq(:STRING_START) + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("hello") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("\\") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("world") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING_END) + end + + it "lexes string with slash quote" do + lexer = Lexer.new("\"\\\"\"") + + token = lexer.next_token + token.type.should eq(:STRING_START) + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("\"") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING_END) + end + + it "lexes string with slash t" do + lexer = Lexer.new("\"\\t\"") + + token = lexer.next_token + token.type.should eq(:STRING_START) + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("\t") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING_END) + end + + it "lexes string with interpolation" do + lexer = Lexer.new("\"hello \#{world}\"") + + token = lexer.next_token + token.type.should eq(:STRING_START) + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("hello ") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:INTERPOLATION_START) + + token = lexer.next_token + token.type.should eq(:IDENT) + token.value.should eq("world") + + token = lexer.next_token + token.type.should eq(:"}") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING_END) + end + + it "lexes string with numeral" do + lexer = Lexer.new("\"hello#world\"") + + token = lexer.next_token + token.type.should eq(:STRING_START) + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("hello") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("#") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("world") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING_END) + end + + it "lexes string with literal newline" do + lexer = Lexer.new("\"hello\nworld\"") + + token = lexer.next_token + token.type.should eq(:STRING_START) + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("hello") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("\n") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("world") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING_END) + + token = lexer.next_token + token.line_number.should eq(2) + token.column_number.should eq(7) + end + + it "lexes string with only newline" do + lexer = Lexer.new("\"\n\"") + + token = lexer.next_token + token.type.should eq(:STRING_START) + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("\n") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING_END) + end + + it "lexes double numeral" do + lexer = Lexer.new("\"##\"") + + token = lexer.next_token + token.type.should eq(:STRING_START) + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("#") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("#") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING_END) + end + + it "lexes string with interpolation with double numeral" do + lexer = Lexer.new("\"hello \#\#{world}\"") + + token = lexer.next_token + token.type.should eq(:STRING_START) + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("hello ") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("#") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:INTERPOLATION_START) + + token = lexer.next_token + token.type.should eq(:IDENT) + token.value.should eq("world") + + token = lexer.next_token + token.type.should eq(:"}") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING_END) + end + + it "lexes slash with no-escape char" do + lexer = Lexer.new("\"\\h\"") + + token = lexer.next_token + token.type.should eq(:STRING_START) + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING) + token.value.should eq("h") + + token = lexer.next_string_token('"', '"', 0) + token.type.should eq(:STRING_END) + end + + it "lexes simple string with %(" do + lexer = Lexer.new("%(hello)") + + token = lexer.next_token + token.type.should eq(:STRING_START) + token.string_end.should eq(')') + token.string_nest.should eq('(') + + token = lexer.next_string_token('(', ')', 0) + token.type.should eq(:STRING) + token.value.should eq("hello") + + token = lexer.next_string_token('(', ')', 0) + token.type.should eq(:STRING_END) + + token = lexer.next_token + token.type.should eq(:EOF) + end + + [['(', ')'], ['[', ']'], ['{', '}'], ['<', '>']].each do |pair| + it "lexes simple string with nested %#{pair[0]}" do + lexer = Lexer.new("%#{pair[0]}hello #{pair[0]}world#{pair[1]}#{pair[1]}") + + token = lexer.next_token + token.type.should eq(:STRING_START) + token.string_nest.should eq(pair[0]) + token.string_end.should eq(pair[1]) + token.string_open_count.should eq(0) + + token = lexer.next_string_token(pair[0], pair[1], 0) + token.type.should eq(:STRING) + token.value.should eq("hello ") + + token = lexer.next_string_token(pair[0], pair[1], 0) + token.type.should eq(:STRING) + token.value.should eq(pair[0].to_s) + token.string_open_count.should eq(1) + + token = lexer.next_string_token(pair[0], pair[1], 1) + token.type.should eq(:STRING) + token.value.should eq("world") + + token = lexer.next_string_token(pair[0], pair[1], 1) + token.type.should eq(:STRING) + token.value.should eq(pair[1].to_s) + token.string_open_count.should eq(0) + + token = lexer.next_string_token(pair[0], pair[1], 0) + token.type.should eq(:STRING_END) + + token = lexer.next_token + token.type.should eq(:EOF) + end + end + + it "lexes simple string with %~" do + lexer = Lexer.new("%~hello~") + + token = lexer.next_token + token.type.should eq(:STRING_START) + token.string_end.should eq('~') + token.string_nest.should eq('~') + + token = lexer.next_string_token('~', '~', 0) + token.type.should eq(:STRING) + token.value.should eq("hello") + + token = lexer.next_string_token('~', '~', 0) + token.type.should eq(:STRING_END) + + token = lexer.next_token + token.type.should eq(:EOF) + end +end diff --git a/bootstrap/spec/crystal/lexer/location_spec.cr b/bootstrap/spec/crystal/lexer/location_spec.cr new file mode 100644 index 0000000000..8216a2f3fe --- /dev/null +++ b/bootstrap/spec/crystal/lexer/location_spec.cr @@ -0,0 +1,39 @@ +#!/usr/bin/env bin/crystal -run +require "../../spec_helper" + +def assert_token_column_number(lexer, type, column_number) + token = lexer.next_token + token.type.should eq(type) + token.column_number.should eq(column_number) +end + +describe "Lexer: location" do + it "stores line numbers" do + lexer = Lexer.new "1\n2" + token = lexer.next_token + token.type.should eq(:NUMBER) + token.line_number.should eq(1) + + token = lexer.next_token + token.type.should eq(:NEWLINE) + token.line_number.should eq(1) + + token = lexer.next_token + token.type.should eq(:NUMBER) + token.line_number.should eq(2) + end + + it "stores column numbers" do + lexer = Lexer.new "1; ident; def;\n4" + assert_token_column_number lexer, :NUMBER, 1 + assert_token_column_number lexer, :";", 2 + assert_token_column_number lexer, :SPACE, 3 + assert_token_column_number lexer, :IDENT, 5 + assert_token_column_number lexer, :";", 10 + assert_token_column_number lexer, :SPACE, 11 + assert_token_column_number lexer, :IDENT, 12 + assert_token_column_number lexer, :";", 15 + assert_token_column_number lexer, :NEWLINE, 16 + assert_token_column_number lexer, :NUMBER, 1 + end +end diff --git a/bootstrap/spec/crystal/parser/parser_spec.cr b/bootstrap/spec/crystal/parser/parser_spec.cr index 590418f133..b8424e3ae4 100755 --- a/bootstrap/spec/crystal/parser/parser_spec.cr +++ b/bootstrap/spec/crystal/parser/parser_spec.cr @@ -1,8 +1,6 @@ #!/usr/bin/env bin/crystal -run require "../../spec_helper" -include Crystal - class Numeric def i32 NumberLiteral.new to_s, :i32 diff --git a/bootstrap/spec/spec_helper.cr b/bootstrap/spec/spec_helper.cr index b4bf3ffa58..af1ce40a84 100644 --- a/bootstrap/spec/spec_helper.cr +++ b/bootstrap/spec/spec_helper.cr @@ -1,6 +1,8 @@ require "spec" require "../crystal/**" +include Crystal + def assert_type(str) input = Parser.parse str mod = infer_type input diff --git a/lib/crystal/lexer.rb b/lib/crystal/lexer.rb index b075b5caf8..34fc4c4f4a 100644 --- a/lib/crystal/lexer.rb +++ b/lib/crystal/lexer.rb @@ -235,9 +235,6 @@ module Crystal elsif scan(/\\r/) @token.type = :STRING @token.value = "\r" - elsif scan(/\\"/) - @token.type = :STRING - @token.value = '"' elsif scan(/\\t/) @token.type = :STRING @token.value = "\t" diff --git a/spec/lexer/lexer_string_spec.rb b/spec/lexer/lexer_string_spec.rb index 583ce4cf71..778c42420c 100644 --- a/spec/lexer/lexer_string_spec.rb +++ b/spec/lexer/lexer_string_spec.rb @@ -67,7 +67,7 @@ describe 'Lexer string' do token.type.should eq(:STRING_END) end - it "lexes string with slash quote" do + it "lexes string with slash t" do lexer = Lexer.new(%("\\t")) token = lexer.next_token diff --git a/std/char.cr b/std/char.cr index c32a0e5aad..471d84775f 100644 --- a/std/char.cr +++ b/std/char.cr @@ -16,6 +16,10 @@ class Char ('A' <= self && self <= 'Z') end + def alphanumeric? + alpha? || digit? + end + def whitespace? self == ' ' || self == '\t' || self == '\n' || self == '\v' || self == '\f' || self == '\r' end @@ -57,4 +61,4 @@ class Char buffer.value = self end end -end \ No newline at end of file +end -- GitLab