Skip to content

Commit 94d7f48

Browse files
committed
wip
1 parent 3340370 commit 94d7f48

File tree

7 files changed

+62
-14
lines changed

7 files changed

+62
-14
lines changed

lib/lrama/grammar/reference.rb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@ module Lrama
22
class Grammar
33
# type: :dollar or :at
44
# name: String (e.g. $$, $foo, $expr.right)
5-
# index: Integer (e.g. $1)
5+
# number: Integer (e.g. $1)
6+
# index:
67
# ex_tag: "$<tag>1" (Optional)
7-
class Reference < Struct.new(:type, :name, :index, :ex_tag, :first_column, :last_column, keyword_init: true)
8+
class Reference < Struct.new(:type, :name, :number, :index, :ex_tag, :first_column, :last_column, keyword_init: true)
89
def value
9-
name || index
10+
name || number
1011
end
1112
end
1213
end

lib/lrama/grammar/rule_builder.rb

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,23 +164,33 @@ def numberize_references
164164
next unless token.is_a?(Lrama::Lexer::Token::UserCode)
165165

166166
token.references.each do |ref|
167+
# Derive number reference index from named reference
167168
ref_name = ref.name
168169
if ref_name && ref_name != '$'
169170
if lhs.referred_by?(ref_name)
170171
ref.name = '$'
171172
else
172-
candidates = rhs.each_with_index.select {|token, i| token.referred_by?(ref_name) }
173+
candidates = referable_tokens.each_with_index.select {|token, i| token.referred_by?(ref_name) }
173174

174175
raise "Referring symbol `#{ref_name}` is duplicated. #{token}" if candidates.size >= 2
175176
raise "Referring symbol `#{ref_name}` is not found. #{token}" unless referring_symbol = candidates.first
176177

177-
ref.index = referring_symbol[1] + 1
178+
ref.number = referring_symbol[1] + 1
179+
end
180+
end
181+
182+
if ref.number
183+
# Remapping number reference index to include non referable tokens
184+
# TODO: Is it better to separate "number" of reference from actual "index" (Grammar::Reference)?
185+
ref.index = number_to_index[ref.number]
186+
187+
if !ref.index
188+
raise "Can not refer to not exist component. $#{ref.number}"
178189
end
179190
end
180191

181192
# TODO: Need to check index of @ too?
182193
next if ref.type == :at
183-
184194
if ref.index
185195
# TODO: Prohibit $0 even so Bison allows it?
186196
# See: https://www.gnu.org/software/bison/manual/html_node/Actions.html
@@ -191,6 +201,39 @@ def numberize_references
191201
end
192202
end
193203

204+
def referable_token?(token)
205+
case token
206+
when Lrama::Lexer::Token::ParserStatePop
207+
false
208+
when Lrama::Lexer::Token::ParserStatePush
209+
false
210+
when Lrama::Lexer::Token::ParserStateSet
211+
false
212+
else
213+
true
214+
end
215+
end
216+
217+
def referable_tokens
218+
rhs.select do |token|
219+
referable_token?(token)
220+
end
221+
end
222+
223+
def number_to_index
224+
return @number_to_index if @number_to_index
225+
226+
@number_to_index = [0]
227+
228+
rhs.each.with_index(1) do |token, i|
229+
if referable_token?(token)
230+
@number_to_index << i
231+
end
232+
end
233+
234+
@number_to_index
235+
end
236+
194237
def flush_user_code
195238
if c = @user_code
196239
@rhs << c

lib/lrama/lexer/token/user_code.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def scan_reference(scanner)
3838
return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
3939
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
4040
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
41-
return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
41+
return Lrama::Grammar::Reference.new(type: :dollar, number: Integer(scanner[2]), index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
4242
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
4343
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
4444
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
@@ -51,7 +51,7 @@ def scan_reference(scanner)
5151
when scanner.scan(/@\$/) # @$
5252
return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos - 1)
5353
when scanner.scan(/@(\d+)/) # @1
54-
return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
54+
return Lrama::Grammar::Reference.new(type: :at, number: Integer(scanner[1]), index: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
5555
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
5656
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
5757
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)

sig/lrama/grammar/reference.rbs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@ module Lrama
33
class Reference
44
attr_accessor type: ::Symbol
55
attr_accessor name: String
6+
attr_accessor number: Integer
67
attr_accessor index: Integer
78
attr_accessor ex_tag: Lexer::Token?
89
attr_accessor first_column: Integer
910
attr_accessor last_column: Integer
1011
attr_accessor position_in_rhs: Integer?
1112

1213
def initialize: (
13-
type: ::Symbol, ?name: String, ?index: Integer, ?ex_tag: Lexer::Token?,
14+
type: ::Symbol, ?name: String, ?number: Integer, ?index: Integer, ?ex_tag: Lexer::Token?,
1415
first_column: Integer, last_column: Integer,
1516
?position_in_rhs: Integer?
1617
) -> void

sig/lrama/grammar/rule_builder.rbs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ module Lrama
3737
def process_rhs: (Grammar::ParameterizingRuleResolver parameterizing_resolver) -> void
3838
def process_parser_state_token: (Lexer::Token, String, String, Integer, Grammar::ParameterizingRuleResolver) -> void
3939
def numberize_references: () -> void
40+
def referable_token?: (Lexer::Token) -> bool
41+
def referable_tokens: () -> Array[Lexer::Token]
42+
def number_to_index: () -> Array[Integer]
4043
def flush_user_code: () -> void
4144
end
4245
end

spec/fixtures/integration/parser_state.y

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,28 +86,28 @@ primary : NUM { printf("NUM => %d\n", $1); }
8686
%parser-state-push(in_class, in_class)
8787
cname
8888
{
89-
printf("1. cname => %s. in_def: %s, in_class: %s.\n", $4, YY_CURRENT_STATE_IN_DEF_NAME, YY_CURRENT_STATE_IN_CLASS_NAME);
89+
printf("1. cname => %s. in_def: %s, in_class: %s.\n", $cname, YY_CURRENT_STATE_IN_DEF_NAME, YY_CURRENT_STATE_IN_CLASS_NAME);
9090
}
9191
compstmt
9292
keyword_end
9393
%parser-state-pop(in_def)
9494
%parser-state-pop(in_class)
9595
{
96-
printf("2. cname => %s. in_def: %s, in_class: %s.\n", $4, YY_CURRENT_STATE_IN_DEF_NAME, YY_CURRENT_STATE_IN_CLASS_NAME);
96+
printf("2. cname => %s. in_def: %s, in_class: %s.\n", $2, YY_CURRENT_STATE_IN_DEF_NAME, YY_CURRENT_STATE_IN_CLASS_NAME);
9797
}
9898
| keyword_def
9999
%parser-state-push(in_def, in_def)
100100
%parser-state-push(in_class, not_in_class)
101101
fname
102102
{
103-
printf("1. fname => %s. in_def: %s, in_class: %s.\n", $4, YY_CURRENT_STATE_IN_DEF_NAME, YY_CURRENT_STATE_IN_CLASS_NAME);
103+
printf("1. fname => %s. in_def: %s, in_class: %s.\n", $2, YY_CURRENT_STATE_IN_DEF_NAME, YY_CURRENT_STATE_IN_CLASS_NAME);
104104
}
105105
compstmt
106106
keyword_end
107107
%parser-state-pop(in_def)
108108
%parser-state-pop(in_class)
109109
{
110-
printf("2. fname => %s. in_def: %s, in_class: %s.\n", $4, YY_CURRENT_STATE_IN_DEF_NAME, YY_CURRENT_STATE_IN_CLASS_NAME);
110+
printf("2. fname => %s. in_def: %s, in_class: %s.\n", $fname, YY_CURRENT_STATE_IN_DEF_NAME, YY_CURRENT_STATE_IN_CLASS_NAME);
111111
}
112112
;
113113

spec/lrama/grammar/rule_builder_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@
229229
rule_builder.user_code = token_5
230230
rule_builder.complete_input
231231

232-
expect { rule_builder.send(:preprocess_references) }.to raise_error(/Can not refer following component\. 10 >= 4\./)
232+
expect { rule_builder.send(:preprocess_references) }.to raise_error(/Can not refer to not exist component\. \$10/)
233233
end
234234
end
235235

0 commit comments

Comments
 (0)