tom-lord
diff --git a/‎.rubocop.yml‎
Lines changed: 2 additions & 0 deletions b/‎.rubocop.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎lib/regexp-examples/chargroup_parser.rb‎
Lines changed: 37 additions & 21 deletions b/‎lib/regexp-examples/chargroup_parser.rb‎
Lines changed: 37 additions & 21 deletions
diff --git a/‎lib/regexp-examples/groups.rb‎
Lines changed: 24 additions & 17 deletions b/‎lib/regexp-examples/groups.rb‎
Lines changed: 24 additions & 17 deletions
diff --git a/‎lib/regexp-examples/parser.rb‎
Lines changed: 14 additions & 11 deletions b/‎lib/regexp-examples/parser.rb‎
Lines changed: 14 additions & 11 deletions
diff --git a/‎lib/regexp-examples/version.rb‎
Lines changed: 1 addition & 1 deletion b/‎lib/regexp-examples/version.rb‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎spec/regexp-examples_spec.rb‎
Lines changed: 10 additions & 4 deletions b/‎spec/regexp-examples_spec.rb‎
Lines changed: 10 additions & 4 deletions
@@ -0,0 +1,2 @@
+Metrics/LineLength:
+ Max: 90
@@ -25,29 +25,11 @@ def parse
  until next_char == ']'
  case next_char
  when '['
- @current_position += 1
- sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
- @charset.concat sub_group_parser.result
- @current_position += sub_group_parser.length
+ parse_sub_group_concat
  when '-'
- if regexp_string[@current_position + 1] == ']' # e.g. /[abc-]/ -- not a range!
- @charset << '-'
- @current_position += 1
- else
- @current_position += 1
- @charset.concat (@charset.last..parse_checking_backlash.first).to_a
- @current_position += 1
- end
+ parse_after_hyphen
  when '&'
- if regexp_string[@current_position + 1] == '&'
- @current_position += 2
- sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
- @charset &= sub_group_parser.result
- @current_position += (sub_group_parser.length - 1)
- else
- @charset << '&'
- @current_position += 1
- end
+ parse_after_ampersand
  else
  @charset.concat parse_checking_backlash
  @current_position += 1
@@ -116,6 +98,40 @@ def parse_after_backslash
  end
  end
 
+ def parse_sub_group_concat
+ @current_position += 1
+ sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
+ @charset.concat sub_group_parser.result
+ @current_position += sub_group_parser.length
+ end
+
+ def parse_after_ampersand
+ if regexp_string[@current_position + 1] == '&'
+ parse_sub_group_intersect
+ else
+ @charset << '&'
+ @current_position += 1
+ end
+ end
+
+ def parse_sub_group_intersect
+ @current_position += 2
+ sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
+ @charset &= sub_group_parser.result
+ @current_position += (sub_group_parser.length - 1)
+ end
+
+ def parse_after_hyphen
+ if regexp_string[@current_position + 1] == ']' # e.g. /[abc-]/ -- not a range!
+ @charset << '-'
+ @current_position += 1
+ else
+ @current_position += 1
+ @charset.concat (@charset.last..parse_checking_backlash.first).to_a
+ @current_position += 1
+ end
+ end
+
  def rest_of_string
  regexp_string[@current_position..-1]
  end
 
@@ -153,38 +153,45 @@ def result_by_method(method)
  end
 
  # A boolean "or" group.
- # It really is boolean: The implementation is to pass in 2 set of
- # (repeaters of) groups. The simplest example is: /a|b/
- # If you have more than one boolean "or" operator, then this is
- # constructed using multiple *boolean* OrGroups, e.g.
- # /a|b|c|d/ is treated like /((a|b)|c)|d/
+ # The implementation is to pass in 2 set of (repeaters of) groups.
+ # The simplest example is: /a|b/
+ # If you have more than one boolean "or" operator, then this is initially
+ # parsed as an OrGroup containing another OrGroup. However, in order to avoid
+ # probability distribution issues in Regexp#random_example, this then gets
+ # simplified down to one OrGroup containing 3+ repeaters.
  class OrGroup
+ attr_reader :repeaters_list
+
  def initialize(left_repeaters, right_repeaters)
- @left_repeaters = left_repeaters
- @right_repeaters = right_repeaters
+ @repeaters_list = [left_repeaters, *merge_if_orgroup(right_repeaters)]
  end
 
  def result
  result_by_method(:map_results)
  end
 
  def random_result
- # TODO: This logic is flawed in terms of choosing a truly "random" example! E.g.
- # /a|b|c|d/.random_example will choose a letter with the following probabilities:
- # a = 50%, b = 25%, c = 12.5%, d = 12.5%
- # In order to fix this, I must either apply some weighted selection logic,
- # or change how the OrGroup examples are generated
- # - i.e. make this class work with >2 repeaters
  result_by_method(:map_random_result).sample(1)
  end
 
  private
 
  def result_by_method(method)
- left_result = RegexpExamples.public_send(method, @left_repeaters)
- right_result = RegexpExamples.public_send(method, @right_repeaters)
- left_result.concat(right_result).flatten.uniq.map do |result|
- GroupResult.new(result)
+ repeaters_list.map do |repeaters|
+ RegexpExamples.public_send(method, repeaters)
+ end
+ .inject(:concat)
+ .map do |result|
+ GroupResult.new(result)
+ end
+ .uniq
+ end
+
+ def merge_if_orgroup(repeaters)
+ if repeaters.size == 1 && repeaters.first.is_a?(OrGroup)
+ repeaters.first.repeaters_list
+ else
+ [repeaters]
  end
  end
  end
 
@@ -15,7 +15,7 @@ def parse
  repeaters = []
  until end_of_regexp
  group = parse_group(repeaters)
- return [OneTimeRepeater.new(group)] if group.is_a? OrGroup
+ return [group] if group.is_a? OrGroup
  @current_position += 1
  repeaters << parse_repeater(group)
  end
@@ -148,7 +148,7 @@ def parse_after_backslash_group
  ) # Using "\r\n" as one character is little bit hacky...
  when next_char == 'g' # Subexpression call
  fail IllegalSyntaxError,
- 'Subexpression calls (\\g) cannot be supported, as they are not regular'
+  'Subexpression calls (\\g) cannot be supported, as they are not regular'
  when next_char =~ /[bB]/ # Anchors
  raise_anchors_exception!
  when next_char =~ /[AG]/ # Start of string
@@ -159,6 +159,7 @@ def parse_after_backslash_group
  end
  when next_char =~ /[zZ]/ # End of string
  if @current_position == (regexp_string.length - 1)
+ # TODO: /\Z/ should be treated as /\n?/
  group = PlaceHolderGroup.new
  else
  raise_anchors_exception!
@@ -212,10 +213,10 @@ def parse_multi_group
  end
  when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/
  fail IllegalSyntaxError,
- 'Lookaheads are not regular; cannot generate examples'
+  'Lookaheads are not regular; cannot generate examples'
  when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?<!neglookbehind)/
  fail IllegalSyntaxError,
- 'Lookbehinds are not regular; cannot generate examples'
+  'Lookbehinds are not regular; cannot generate examples'
  else # e.g. /(?<name>namedgroup)/
  @current_position += (match[3].length + 3)
  group_id = match[3]
@@ -237,12 +238,14 @@ def remember_old_regexp_options
  end
 
  def regexp_options_toggle(on, off)
- @ignorecase = true if on.include? 'i'
- @ignorecase = false if off.include? 'i'
- @multiline = true if on.include? 'm'
- @multiline = false if off.include? 'm'
- @extended = true if on.include? 'x'
- @extended = false if off.include? 'x'
+ regexp_option_toggle(on, off, '@ignorecase', 'i')
+ regexp_option_toggle(on, off, '@multiline', 'm')
+ regexp_option_toggle(on, off, '@extended', 'x')
+ end
+
+ def regexp_option_toggle(on, off, var, char)
+ instance_variable_set(var, true) if on.include? char
+ instance_variable_set(var, false) if off.include? char
  end
 
  def parse_char_group
@@ -327,7 +330,7 @@ def parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
 
  def raise_anchors_exception!
  fail IllegalSyntaxError,
- "Anchors ('#{next_char}') cannot be supported, as they are not regular"
+  "Anchors ('#{next_char}') cannot be supported, as they are not regular"
  end
 
  def parse_one_time_repeater(group)
 
@@ -1,3 +1,3 @@
 module RegexpExamples
- VERSION = '1.1.2'
+ VERSION = '1.1.3'
 end
@@ -4,8 +4,9 @@ def self.examples_exist_and_match(*regexps)
  it "examples for /#{regexp.source}/" do
  regexp_examples = regexp.examples(max_group_results: 99_999)
 
- expect(regexp_examples).not_to be_empty,
- "No examples were generated for regexp: /#{regexp.source}/"
+ expect(regexp_examples)
+ .not_to be_empty,
+ "No examples were generated for regexp: /#{regexp.source}/"
  regexp_examples.each do |example|
  expect(example).to match(/\A(?:#{regexp.source})\z/)
  end
@@ -205,8 +206,9 @@ def self.examples_are_empty(*regexps)
  ).each do |property|
  it "examples for /\p{#{property}}/" do
  regexp_examples = /\p{#{property}}/.examples(max_group_results: 99_999)
- expect(regexp_examples).not_to be_empty,
- "No examples were generated for regexp: /\p{#{property}}/"
+ expect(regexp_examples)
+ .not_to be_empty,
+ "No examples were generated for regexp: /\p{#{property}}/"
  # Just do one big check, for test system performance (~30% faster)
  # (Otherwise, we're doing up to 128 checks on 123 properties!!!)
  expect(regexp_examples.join('')).to match(/\A\p{#{property}}+\z/)
@@ -301,6 +303,10 @@ def self.examples_are_empty(*regexps)
  it { expect(/(a|b){2}/.examples).to match_array %w(aa ab ba bb) }
  it { expect(/a+|b?/.examples).to match_array ['a', 'aa', 'aaa', '', 'b'] }
 
+ # Only display unique examples:
+ it { expect(/a|a|b|b/.examples).to match_array ['a', 'b'] }
+ it { expect(/[ccdd]/.examples).to match_array ['c', 'd'] }
+
  # a{1}? should be equivalent to (?:a{1})?, i.e. NOT a "non-greedy quantifier"
  it { expect(/a{1}?/.examples).to match_array ['', 'a'] }
  end