ruby · tompng · Jun 25, 2023 · Jun 21, 2023
diff --git a/lib/irb/cmd/show_source.rb b/lib/irb/cmd/show_source.rb
@@ -58,9 +58,9 @@ def find_end(file, first_line, irb_context)
  tokens.chunk { |tok| tok.pos[0] }.each do |lnum, chunk|
  code = lines[0..lnum].join
  prev_tokens.concat chunk
- continue = lex.process_continue(prev_tokens)
- code_block_open = lex.check_code_block(code, prev_tokens)
- if !continue && !code_block_open
+ continue = lex.should_continue?(prev_tokens)
+ syntax = lex.check_code_syntax(code)
+ if !continue && syntax == :valid
  return first_line + lnum
  end
  end

diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb
@@ -85,7 +85,7 @@ def configure_io(io)
  # Avoid appending duplicated token. Tokens that include "\n" like multiline tstring_content can exist in multiple lines.
  tokens_until_line << token if token != tokens_until_line.last
  end
- continue = process_continue(tokens_until_line)
+ continue = should_continue?(tokens_until_line)
  prompt(next_opens, continue, line_num_offset)
  end
  end
@@ -196,7 +196,16 @@ def check_code_state(code)
  end
 
  def code_terminated?(code, tokens, opens)
- opens.empty? && !process_continue(tokens) && !check_code_block(code, tokens)
+ case check_code_syntax(code)
+ when :unrecoverable_error
+ true
+ when :recoverable_error
+ false
+ when :other_error
+ opens.empty? && !should_continue?(tokens)
+ when :valid
+ !should_continue?(tokens)
+ end
  end
 
  def save_prompt_to_context_io(opens, continue, line_num_offset)
@@ -227,7 +236,7 @@ def readmultiline
  return code if terminated
 
  line_offset += 1
- continue = process_continue(tokens)
+ continue = should_continue?(tokens)
  save_prompt_to_context_io(opens, continue, line_offset)
  end
  end
@@ -246,29 +255,33 @@ def each_top_level_statement
  end
  end
 
- def process_continue(tokens)
- # last token is always newline
- if tokens.size >= 2 and tokens[-2].event == :on_regexp_end
- # end of regexp literal
- return false
- elsif tokens.size >= 2 and tokens[-2].event == :on_semicolon
- return false
- elsif tokens.size >= 2 and tokens[-2].event == :on_kw and ['begin', 'else', 'ensure'].include?(tokens[-2].tok)
- return false
- elsif !tokens.empty? and tokens.last.tok == "\\\n"
- return true
- elsif tokens.size >= 1 and tokens[-1].event == :on_heredoc_end # "EOH\n"
- return false
- elsif tokens.size >= 2 and tokens[-2].state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_FNAME) and tokens[-2].tok !~ /\A\.\.\.?\z/
- # end of literal except for regexp
- # endless range at end of line is not a continue
- return true
+ def should_continue?(tokens)
+ # Look at the last token and check if IRB need to continue reading next line.
+ # Example code that should continue: `a\` `a +` `a.`
+ # Trailing spaces, newline, comments are skipped
+ return true if tokens.last&.event == :on_sp && tokens.last.tok == "\\\n"
+
+ tokens.reverse_each do |token|
+ case token.event
+ when :on_sp, :on_nl, :on_ignored_nl, :on_comment, :on_embdoc_beg, :on_embdoc, :on_embdoc_end
+ # Skip
+ when :on_regexp_end, :on_heredoc_end, :on_semicolon
+ # State is EXPR_BEG but should not continue
+ return false
+ else
+ # Endless range should not continue
+ return false if token.event == :on_op && token.tok.match?(/\A\.\.\.?\z/)
+
+ # EXPR_DOT and most of the EXPR_BEG should continue
+ return token.state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_DOT)
+ end
  end
  false
  end
 
- def check_code_block(code, tokens)
- return true if tokens.empty?
+ def check_code_syntax(code)
+ lvars_code = RubyLex.generate_local_variables_assign_code(@context.local_variables)
+ code = "#{lvars_code}\n#{code}"
 
  begin # check if parser error are available
  verbose, $VERBOSE = $VERBOSE, nil
@@ -287,6 +300,7 @@ def check_code_block(code, tokens)
  end
  rescue EncodingError
  # This is for a hash with invalid encoding symbol, {"\xAE": 1}
+ :unrecoverable_error
  rescue SyntaxError => e
  case e.message
  when /unterminated (?:string|regexp) meets end of file/
@@ -299,7 +313,7 @@ def check_code_block(code, tokens)
  #
  # example:
  # '
- return true
+ return :recoverable_error
  when /syntax error, unexpected end-of-input/
  # "syntax error, unexpected end-of-input, expecting keyword_end"
  #
@@ -309,7 +323,7 @@ def check_code_block(code, tokens)
  # if false
  # fuga
  # end
- return true
+ return :recoverable_error
  when /syntax error, unexpected keyword_end/
  # "syntax error, unexpected keyword_end"
  #
@@ -319,41 +333,26 @@ def check_code_block(code, tokens)
  #
  # example:
  # end
- return false
+ return :unrecoverable_error
  when /syntax error, unexpected '\.'/
  # "syntax error, unexpected '.'"
  #
  # example:
  # .
- return false
+ return :unrecoverable_error
  when /unexpected tREGEXP_BEG/
  # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
  #
  # example:
  # method / f /
- return false
+ return :unrecoverable_error
+ else
+ return :other_error
  end
  ensure
  $VERBOSE = verbose
  end
-
- last_lex_state = tokens.last.state
-
- if last_lex_state.allbits?(Ripper::EXPR_BEG)
- return false
- elsif last_lex_state.allbits?(Ripper::EXPR_DOT)
- return true
- elsif last_lex_state.allbits?(Ripper::EXPR_CLASS)
- return true
- elsif last_lex_state.allbits?(Ripper::EXPR_FNAME)
- return true
- elsif last_lex_state.allbits?(Ripper::EXPR_VALUE)
- return true
- elsif last_lex_state.allbits?(Ripper::EXPR_ARG)
- return false
- end
-
- false
+ :valid
  end
 
  def calc_indent_level(opens)

diff --git a/test/irb/test_ruby_lex.rb b/test/irb/test_ruby_lex.rb
@@ -82,25 +82,33 @@ def assert_row_indenting(lines, row)
  end
 
  def assert_indent_level(lines, expected, local_variables: [])
- indent_level, _code_block_open = check_state(lines, local_variables: local_variables)
+ indent_level, _continue, _code_block_open = check_state(lines, local_variables: local_variables)
  error_message = "Calculated the wrong number of indent level for:\n #{lines.join("\n")}"
  assert_equal(expected, indent_level, error_message)
  end
 
+ def assert_should_continue(lines, expected, local_variables: [])
+ _indent_level, continue, _code_block_open = check_state(lines, local_variables: local_variables)
+ error_message = "Wrong result of should_continue for:\n #{lines.join("\n")}"
+ assert_equal(expected, continue, error_message)
+ end
+
  def assert_code_block_open(lines, expected, local_variables: [])
- _indent_level, code_block_open = check_state(lines, local_variables: local_variables)
+ _indent_level, _continue, code_block_open = check_state(lines, local_variables: local_variables)
  error_message = "Wrong result of code_block_open for:\n #{lines.join("\n")}"
  assert_equal(expected, code_block_open, error_message)
  end
 
  def check_state(lines, local_variables: [])
  context = build_context(local_variables)
- tokens = RubyLex.ripper_lex_without_warning(lines.join("\n"), context: context)
+ code = lines.join("\n")
+ tokens = RubyLex.ripper_lex_without_warning(code, context: context)
  opens = IRB::NestingParser.open_tokens(tokens)
  ruby_lex = RubyLex.new(context)
  indent_level = ruby_lex.calc_indent_level(opens)
- code_block_open = !opens.empty? || ruby_lex.process_continue(tokens)
- [indent_level, code_block_open]
+ continue = ruby_lex.should_continue?(tokens)
+ terminated = ruby_lex.code_terminated?(code, tokens, opens)
+ [indent_level, continue, !terminated]
  end
 
  def test_interpolate_token_with_heredoc_and_unclosed_embexpr
@@ -235,7 +243,7 @@ def test_symbols
  def test_endless_range_at_end_of_line
  input_with_prompt = [
  PromptRow.new('001:0: :> ', %q(a = 3..)),
- PromptRow.new('002:0: :* ', %q()),
+ PromptRow.new('002:0: :> ', %q()),
  ]
 
  lines = input_with_prompt.map(&:content)
@@ -256,7 +264,7 @@ def test_heredoc_with_embexpr
  PromptRow.new('009:0:]:* ', %q(B)),
  PromptRow.new('010:0:]:* ', %q(})),
  PromptRow.new('011:0: :> ', %q(])),
- PromptRow.new('012:0: :* ', %q()),
+ PromptRow.new('012:0: :> ', %q()),
  ]
 
  lines = input_with_prompt.map(&:content)
@@ -285,9 +293,9 @@ def test_heredoc_prompt_with_quotes
  def test_backtick_method
  input_with_prompt = [
  PromptRow.new('001:0: :> ', %q(self.`(arg))),
- PromptRow.new('002:0: :* ', %q()),
+ PromptRow.new('002:0: :> ', %q()),
  PromptRow.new('003:0: :> ', %q(def `(); end)),
- PromptRow.new('004:0: :* ', %q()),
+ PromptRow.new('004:0: :> ', %q()),
  ]
 
  lines = input_with_prompt.map(&:content)
@@ -777,6 +785,36 @@ def test_dynamic_prompt_with_blank_line
  assert_dynamic_prompt(lines, expected_prompt_list)
  end
 
+ def test_should_continue
+ assert_should_continue(['a'], false)
+ assert_should_continue(['/a/'], false)
+ assert_should_continue(['a;'], false)
+ assert_should_continue(['<<A', 'A'], false)
+ assert_should_continue(['a...'], false)
+ assert_should_continue(['a\\', ''], true)
+ assert_should_continue(['a.'], true)
+ assert_should_continue(['a+'], true)
+ assert_should_continue(['a; #comment', '', '=begin', 'embdoc', '=end', ''], false)
+ assert_should_continue(['a+ #comment', '', '=begin', 'embdoc', '=end', ''], true)
+ end
+
+ def test_code_block_open_with_should_continue
+ # syntax ok
+ assert_code_block_open(['a'], false) # continue: false
+ assert_code_block_open(['a\\', ''], true) # continue: true
+
+ # recoverable syntax error code is not terminated
+ assert_code_block_open(['a+', ''], true)
+
+ # unrecoverable syntax error code is terminated
+ assert_code_block_open(['.; a+', ''], false)
+
+ # other syntax error that failed to determine if it is recoverable or not
+ assert_code_block_open(['@; a'], false)
+ assert_code_block_open(['@; a+'], true)
+ assert_code_block_open(['@; (a'], true)
+ end
+
  def test_broken_percent_literal
  tokens = RubyLex.ripper_lex_without_warning('%wwww')
  pos_to_index = {}