diff --git a/lib/public_suffix.rb b/lib/public_suffix.rb index 7b79dd3b..5dd11640 100644 --- a/lib/public_suffix.rb +++ b/lib/public_suffix.rb @@ -7,7 +7,7 @@ require_relative "public_suffix/domain" require_relative "public_suffix/version" require_relative "public_suffix/errors" -require_relative "public_suffix/rule" +require_relative "public_suffix/rules" require_relative "public_suffix/list" # PublicSuffix is a Ruby domain name parser based on the Public Suffix List. @@ -64,19 +64,16 @@ module PublicSuffix # If domain is not a valid domain. # @raise [PublicSuffix::DomainNotAllowed] # If a rule for +domain+ is found, but the rule doesn't allow +domain+. - def self.parse(name, list: List.default, default_rule: list.default_rule, ignore_private: false) + def self.parse(name, list: List.default, ignore_private: false) what = normalize(name) raise what if what.is_a?(DomainInvalid) - rule = list.find(what, default: default_rule, ignore_private: ignore_private) + rule = list.find(what, ignore_private: ignore_private) # rubocop:disable Style/IfUnlessModifier if rule.nil? raise DomainInvalid, "`#{what}` is not a valid domain" end - if rule.decompose(what).last.nil? - raise DomainNotAllowed, "`#{what}` is not allowed according to Registry policy" - end # rubocop:enable Style/IfUnlessModifier decompose(what, rule) @@ -119,13 +116,8 @@ def self.parse(name, list: List.default, default_rule: list.default_rule, ignore # @param [String, #to_s] name The domain name or fully qualified domain name to validate. # @param [Boolean] ignore_private # @return [Boolean] - def self.valid?(name, list: List.default, default_rule: list.default_rule, ignore_private: false) - what = normalize(name) - return false if what.is_a?(DomainInvalid) - - rule = list.find(what, default: default_rule, ignore_private: ignore_private) - - !rule.nil? && !rule.decompose(what).last.nil? + def self.valid?(name, list: List.default, ignore_private: false) + !normalize(name).is_a?(DomainInvalid) end # Attempt to parse the name and returns the domain, if valid. @@ -146,13 +138,13 @@ def self.domain(name, **options) # private def self.decompose(name, rule) - left, right = rule.decompose(name) + rule_len = rule.split(DOT).length + parts = name.split(DOT) - parts = left.split(DOT) # If we have 0 parts left, there is just a tld and no domain or subdomain # If we have 1 part left, there is just a tld, domain and not subdomain # If we have 2 parts left, the last part is the domain, the other parts (combined) are the subdomain - tld = right + tld = rule.empty? ? nil : parts.pop(rule_len).join(DOT) sld = parts.empty? ? nil : parts.pop trd = parts.empty? ? nil : parts.join(DOT) diff --git a/lib/public_suffix/domain.rb b/lib/public_suffix/domain.rb index 4c4edd44..88e3107f 100644 --- a/lib/public_suffix/domain.rb +++ b/lib/public_suffix/domain.rb @@ -133,7 +133,7 @@ def name # # @return [String] def domain - [@sld, @tld].join(DOT) if domain? + [@sld, @tld].compact.join(DOT) if domain? end # Returns a subdomain-like representation of this object @@ -165,7 +165,7 @@ def domain # # @return [String] def subdomain - [@trd, @sld, @tld].join(DOT) if subdomain? + [@trd, @sld, @tld].compact.join(DOT) if subdomain? end # Checks whether self looks like a domain. @@ -196,7 +196,7 @@ def subdomain # # @return [Boolean] def domain? - !(@tld.nil? || @sld.nil?) + !@sld.nil? end # Checks whether self looks like a subdomain. @@ -227,7 +227,7 @@ def domain? # # @return [Boolean] def subdomain? - !(@tld.nil? || @sld.nil? || @trd.nil?) + !(@sld.nil? || @trd.nil?) end end diff --git a/lib/public_suffix/list.rb b/lib/public_suffix/list.rb index 04e53a98..3bd69522 100644 --- a/lib/public_suffix/list.rb +++ b/lib/public_suffix/list.rb @@ -69,6 +69,7 @@ def self.default=(value) def self.parse(input, private_domains: true) comment_token = "//".freeze private_token = "===BEGIN PRIVATE DOMAINS===".freeze + space_re = /\p{Space}/ section = nil # 1 == ICANN, 2 == PRIVATE new do |list| @@ -90,7 +91,8 @@ def self.parse(input, private_domains: true) next else - list.add(Rule.factory(line, private: section == 2)) + rule = line.split(space_re).first + list.add(rule, private: section == 2) end end @@ -103,41 +105,23 @@ def self.parse(input, private_domains: true) # @yield [self] Yields on self. # @yieldparam [PublicSuffix::List] self The newly created instance. def initialize - @rules = {} + @rules = Rules.new + add('*', private: false) yield(self) if block_given? end - - # Checks whether two lists are equal. - # - # List one is equal to two, if two is an instance of - # {PublicSuffix::List} and each +PublicSuffix::Rule::*+ - # in list one is available in list two, in the same order. - # - # @param other [PublicSuffix::List] the List to compare - # @return [Boolean] - def ==(other) - return false unless other.is_a?(List) - equal?(other) || @rules == other.rules - end - alias eql? == - - # Iterates each rule in the list. - def each(&block) - Enumerator.new do |y| - @rules.each do |key, node| - y << entry_to_rule(node, key) - end - end.each(&block) - end - - # Adds the given object to the list and optionally refreshes the rule index. # # @param rule [PublicSuffix::Rule::*] the rule to add to the list # @return [self] - def add(rule) - @rules[rule.value] = rule_to_entry(rule) + def add(rule, private: false) + exception = false + if rule[0] == BANG + exception = true + rule = rule[1..-1] + end + lbls = rule.split(DOT).reverse + @rules.add(lbls, exception, private) self end alias << add @@ -160,7 +144,7 @@ def empty? # # @return [self] def clear - @rules.clear + @rules = Rules.new self end @@ -169,77 +153,19 @@ def clear # @param name [#to_s] the hostname # @param default [PublicSuffix::Rule::*] the default rule to return in case no rule matches # @return [PublicSuffix::Rule::*] - def find(name, default: default_rule, **options) - rule = select(name, **options).inject do |l, r| - return r if r.class == Rule::Exception - l.length > r.length ? l : r - end - rule || default + def find(name, ignore_private: false) + lbls = name.split(DOT).reverse + r = @rules.get_regdom(lbls, !ignore_private) + r.reverse[1..-1].join(DOT) end - # Selects all the rules matching given hostame. - # - # If `ignore_private` is set to true, the algorithm will skip the rules that are flagged as - # private domain. Note that the rules will still be part of the loop. - # If you frequently need to access lists ignoring the private domains, - # you should create a list that doesn't include these domains setting the - # `private_domains: false` option when calling {.parse}. - # - # Note that this method is currently private, as you should not rely on it. Instead, - # the public interface is {#find}. The current internal algorithm allows to return all - # matching rules, but different data structures may not be able to do it, and instead would - # return only the match. For this reason, you should rely on {#find}. - # - # @param name [#to_s] the hostname - # @param ignore_private [Boolean] - # @return [Array] - def select(name, ignore_private: false) - name = name.to_s - - parts = name.split(DOT).reverse! - index = 0 - query = parts[index] - rules = [] - - loop do - match = @rules[query] - if !match.nil? && (ignore_private == false || match.private == false) - rules << entry_to_rule(match, query) - end - - index += 1 - break if index >= parts.size - query = parts[index] + DOT + query - end - - rules - end - private :select - # Gets the default rule. # # @see PublicSuffix::Rule.default_rule # # @return [PublicSuffix::Rule::*] def default_rule - PublicSuffix::Rule.default - end - - - protected - - attr_reader :rules - - - private - - def entry_to_rule(entry, value) - entry.type.new(value: value, length: entry.length, private: entry.private) - end - - def rule_to_entry(rule) - Rule::Entry.new(rule.class, rule.length, rule.private) + '*' end - end end diff --git a/lib/public_suffix/rules.rb b/lib/public_suffix/rules.rb new file mode 100644 index 00000000..b8ae417a --- /dev/null +++ b/lib/public_suffix/rules.rb @@ -0,0 +1,75 @@ +# = Public Suffix +# +# Domain name parser based on the Public Suffix List. +# +# Copyright (c) 2009-2017 Simone Carletti + +module PublicSuffix + + # A Rule is a special object which holds a single definition + # of the Public Suffix List. + # + # There are 3 types of rules, each one represented by a specific + # subclass within the +PublicSuffix::Rule+ namespace. + # + # To create a new Rule, use the {PublicSuffix::Rule#factory} method. + # + # PublicSuffix::Rule.factory("ar") + # # => # + # + class Rules + def initialize + @children = {} + @terminus = false + @priv = false + @exception = false + end + + def empty? + @children.empty? && !@terminus + end + + def size + sz = @terminus ? 1 : 0 + @children.each{|k,v|sz += v.size} + sz + end + + def add(x, excpt, priv) + lbl = x.shift + if lbl.nil? + raise 'Duplicate rule' if @terminus + @terminus = true + @priv = priv + @exception = excpt + return + end + @children[lbl] ||= Rules.new + @children[lbl].add(x, excpt, priv) + end + + def get_regdom(lbls, priv = true, matched_lbls = []) + # Avoid modifying our input by copying it first + lbls = lbls.dup + lbl = lbls.shift + if lbl.nil? + if @terminus && (!@priv || priv) + if @exception + return matched_lbls + end + raise DomainNotAllowed, "#{matched_lbls.reverse.join(".")} is not allowed according to Registry policy" + end + return nil + end + r = @children[lbl].get_regdom(lbls, priv, matched_lbls + [lbl]) if @children.key?(lbl) + return r if !r.nil? + r = @children['*'].get_regdom(lbls, priv, matched_lbls + [lbl]) if @children.key?('*') + return r if !r.nil? + if @terminus && (!@priv || priv) + return matched_lbls if @exception + return matched_lbls + [lbl] + end + nil + end + end +end