diff --git a/lib/public_suffix.rb b/lib/public_suffix.rb
index 7b79dd3b..5dd11640 100644
--- a/lib/public_suffix.rb
+++ b/lib/public_suffix.rb
@@ -7,7 +7,7 @@
require_relative "public_suffix/domain"
require_relative "public_suffix/version"
require_relative "public_suffix/errors"
-require_relative "public_suffix/rule"
+require_relative "public_suffix/rules"
require_relative "public_suffix/list"
# PublicSuffix is a Ruby domain name parser based on the Public Suffix List.
@@ -64,19 +64,16 @@ module PublicSuffix
# If domain is not a valid domain.
# @raise [PublicSuffix::DomainNotAllowed]
# If a rule for +domain+ is found, but the rule doesn't allow +domain+.
- def self.parse(name, list: List.default, default_rule: list.default_rule, ignore_private: false)
+ def self.parse(name, list: List.default, ignore_private: false)
what = normalize(name)
raise what if what.is_a?(DomainInvalid)
- rule = list.find(what, default: default_rule, ignore_private: ignore_private)
+ rule = list.find(what, ignore_private: ignore_private)
# rubocop:disable Style/IfUnlessModifier
if rule.nil?
raise DomainInvalid, "`#{what}` is not a valid domain"
end
- if rule.decompose(what).last.nil?
- raise DomainNotAllowed, "`#{what}` is not allowed according to Registry policy"
- end
# rubocop:enable Style/IfUnlessModifier
decompose(what, rule)
@@ -119,13 +116,8 @@ def self.parse(name, list: List.default, default_rule: list.default_rule, ignore
# @param [String, #to_s] name The domain name or fully qualified domain name to validate.
# @param [Boolean] ignore_private
# @return [Boolean]
- def self.valid?(name, list: List.default, default_rule: list.default_rule, ignore_private: false)
- what = normalize(name)
- return false if what.is_a?(DomainInvalid)
-
- rule = list.find(what, default: default_rule, ignore_private: ignore_private)
-
- !rule.nil? && !rule.decompose(what).last.nil?
+ def self.valid?(name, list: List.default, ignore_private: false)
+ !normalize(name).is_a?(DomainInvalid)
end
# Attempt to parse the name and returns the domain, if valid.
@@ -146,13 +138,13 @@ def self.domain(name, **options)
# private
def self.decompose(name, rule)
- left, right = rule.decompose(name)
+ rule_len = rule.split(DOT).length
+ parts = name.split(DOT)
- parts = left.split(DOT)
# If we have 0 parts left, there is just a tld and no domain or subdomain
# If we have 1 part left, there is just a tld, domain and not subdomain
# If we have 2 parts left, the last part is the domain, the other parts (combined) are the subdomain
- tld = right
+ tld = rule.empty? ? nil : parts.pop(rule_len).join(DOT)
sld = parts.empty? ? nil : parts.pop
trd = parts.empty? ? nil : parts.join(DOT)
diff --git a/lib/public_suffix/domain.rb b/lib/public_suffix/domain.rb
index 4c4edd44..88e3107f 100644
--- a/lib/public_suffix/domain.rb
+++ b/lib/public_suffix/domain.rb
@@ -133,7 +133,7 @@ def name
#
# @return [String]
def domain
- [@sld, @tld].join(DOT) if domain?
+ [@sld, @tld].compact.join(DOT) if domain?
end
# Returns a subdomain-like representation of this object
@@ -165,7 +165,7 @@ def domain
#
# @return [String]
def subdomain
- [@trd, @sld, @tld].join(DOT) if subdomain?
+ [@trd, @sld, @tld].compact.join(DOT) if subdomain?
end
# Checks whether self looks like a domain.
@@ -196,7 +196,7 @@ def subdomain
#
# @return [Boolean]
def domain?
- !(@tld.nil? || @sld.nil?)
+ !@sld.nil?
end
# Checks whether self looks like a subdomain.
@@ -227,7 +227,7 @@ def domain?
#
# @return [Boolean]
def subdomain?
- !(@tld.nil? || @sld.nil? || @trd.nil?)
+ !(@sld.nil? || @trd.nil?)
end
end
diff --git a/lib/public_suffix/list.rb b/lib/public_suffix/list.rb
index 04e53a98..3bd69522 100644
--- a/lib/public_suffix/list.rb
+++ b/lib/public_suffix/list.rb
@@ -69,6 +69,7 @@ def self.default=(value)
def self.parse(input, private_domains: true)
comment_token = "//".freeze
private_token = "===BEGIN PRIVATE DOMAINS===".freeze
+ space_re = /\p{Space}/
section = nil # 1 == ICANN, 2 == PRIVATE
new do |list|
@@ -90,7 +91,8 @@ def self.parse(input, private_domains: true)
next
else
- list.add(Rule.factory(line, private: section == 2))
+ rule = line.split(space_re).first
+ list.add(rule, private: section == 2)
end
end
@@ -103,41 +105,23 @@ def self.parse(input, private_domains: true)
# @yield [self] Yields on self.
# @yieldparam [PublicSuffix::List] self The newly created instance.
def initialize
- @rules = {}
+ @rules = Rules.new
+ add('*', private: false)
yield(self) if block_given?
end
-
- # Checks whether two lists are equal.
- #
- # List one is equal to two, if two is an instance of
- # {PublicSuffix::List} and each +PublicSuffix::Rule::*+
- # in list one is available in list two, in the same order.
- #
- # @param other [PublicSuffix::List] the List to compare
- # @return [Boolean]
- def ==(other)
- return false unless other.is_a?(List)
- equal?(other) || @rules == other.rules
- end
- alias eql? ==
-
- # Iterates each rule in the list.
- def each(&block)
- Enumerator.new do |y|
- @rules.each do |key, node|
- y << entry_to_rule(node, key)
- end
- end.each(&block)
- end
-
-
# Adds the given object to the list and optionally refreshes the rule index.
#
# @param rule [PublicSuffix::Rule::*] the rule to add to the list
# @return [self]
- def add(rule)
- @rules[rule.value] = rule_to_entry(rule)
+ def add(rule, private: false)
+ exception = false
+ if rule[0] == BANG
+ exception = true
+ rule = rule[1..-1]
+ end
+ lbls = rule.split(DOT).reverse
+ @rules.add(lbls, exception, private)
self
end
alias << add
@@ -160,7 +144,7 @@ def empty?
#
# @return [self]
def clear
- @rules.clear
+ @rules = Rules.new
self
end
@@ -169,77 +153,19 @@ def clear
# @param name [#to_s] the hostname
# @param default [PublicSuffix::Rule::*] the default rule to return in case no rule matches
# @return [PublicSuffix::Rule::*]
- def find(name, default: default_rule, **options)
- rule = select(name, **options).inject do |l, r|
- return r if r.class == Rule::Exception
- l.length > r.length ? l : r
- end
- rule || default
+ def find(name, ignore_private: false)
+ lbls = name.split(DOT).reverse
+ r = @rules.get_regdom(lbls, !ignore_private)
+ r.reverse[1..-1].join(DOT)
end
- # Selects all the rules matching given hostame.
- #
- # If `ignore_private` is set to true, the algorithm will skip the rules that are flagged as
- # private domain. Note that the rules will still be part of the loop.
- # If you frequently need to access lists ignoring the private domains,
- # you should create a list that doesn't include these domains setting the
- # `private_domains: false` option when calling {.parse}.
- #
- # Note that this method is currently private, as you should not rely on it. Instead,
- # the public interface is {#find}. The current internal algorithm allows to return all
- # matching rules, but different data structures may not be able to do it, and instead would
- # return only the match. For this reason, you should rely on {#find}.
- #
- # @param name [#to_s] the hostname
- # @param ignore_private [Boolean]
- # @return [Array]
- def select(name, ignore_private: false)
- name = name.to_s
-
- parts = name.split(DOT).reverse!
- index = 0
- query = parts[index]
- rules = []
-
- loop do
- match = @rules[query]
- if !match.nil? && (ignore_private == false || match.private == false)
- rules << entry_to_rule(match, query)
- end
-
- index += 1
- break if index >= parts.size
- query = parts[index] + DOT + query
- end
-
- rules
- end
- private :select
-
# Gets the default rule.
#
# @see PublicSuffix::Rule.default_rule
#
# @return [PublicSuffix::Rule::*]
def default_rule
- PublicSuffix::Rule.default
- end
-
-
- protected
-
- attr_reader :rules
-
-
- private
-
- def entry_to_rule(entry, value)
- entry.type.new(value: value, length: entry.length, private: entry.private)
- end
-
- def rule_to_entry(rule)
- Rule::Entry.new(rule.class, rule.length, rule.private)
+ '*'
end
-
end
end
diff --git a/lib/public_suffix/rules.rb b/lib/public_suffix/rules.rb
new file mode 100644
index 00000000..b8ae417a
--- /dev/null
+++ b/lib/public_suffix/rules.rb
@@ -0,0 +1,75 @@
+# = Public Suffix
+#
+# Domain name parser based on the Public Suffix List.
+#
+# Copyright (c) 2009-2017 Simone Carletti
+
+module PublicSuffix
+
+ # A Rule is a special object which holds a single definition
+ # of the Public Suffix List.
+ #
+ # There are 3 types of rules, each one represented by a specific
+ # subclass within the +PublicSuffix::Rule+ namespace.
+ #
+ # To create a new Rule, use the {PublicSuffix::Rule#factory} method.
+ #
+ # PublicSuffix::Rule.factory("ar")
+ # # => #
+ #
+ class Rules
+ def initialize
+ @children = {}
+ @terminus = false
+ @priv = false
+ @exception = false
+ end
+
+ def empty?
+ @children.empty? && !@terminus
+ end
+
+ def size
+ sz = @terminus ? 1 : 0
+ @children.each{|k,v|sz += v.size}
+ sz
+ end
+
+ def add(x, excpt, priv)
+ lbl = x.shift
+ if lbl.nil?
+ raise 'Duplicate rule' if @terminus
+ @terminus = true
+ @priv = priv
+ @exception = excpt
+ return
+ end
+ @children[lbl] ||= Rules.new
+ @children[lbl].add(x, excpt, priv)
+ end
+
+ def get_regdom(lbls, priv = true, matched_lbls = [])
+ # Avoid modifying our input by copying it first
+ lbls = lbls.dup
+ lbl = lbls.shift
+ if lbl.nil?
+ if @terminus && (!@priv || priv)
+ if @exception
+ return matched_lbls
+ end
+ raise DomainNotAllowed, "#{matched_lbls.reverse.join(".")} is not allowed according to Registry policy"
+ end
+ return nil
+ end
+ r = @children[lbl].get_regdom(lbls, priv, matched_lbls + [lbl]) if @children.key?(lbl)
+ return r if !r.nil?
+ r = @children['*'].get_regdom(lbls, priv, matched_lbls + [lbl]) if @children.key?('*')
+ return r if !r.nil?
+ if @terminus && (!@priv || priv)
+ return matched_lbls if @exception
+ return matched_lbls + [lbl]
+ end
+ nil
+ end
+ end
+end