Skip to content

Commit 8b734b1

Browse files
committed
v6.6.0
1 parent 6a43dc4 commit 8b734b1

File tree

2 files changed

+348
-0
lines changed

2 files changed

+348
-0
lines changed
Lines changed: 348 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,348 @@
1+
/*
2+
* Copyright (C) 2017 Square, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.aoju.bus.http.metric.suffix;
17+
18+
import org.aoju.bus.core.io.source.BufferSource;
19+
import org.aoju.bus.core.io.source.GzipSource;
20+
import org.aoju.bus.core.toolkit.IoKit;
21+
22+
import java.io.IOException;
23+
import java.io.InputStream;
24+
import java.io.InterruptedIOException;
25+
import java.net.IDN;
26+
import java.util.concurrent.CountDownLatch;
27+
import java.util.concurrent.atomic.AtomicBoolean;
28+
29+
import static java.nio.charset.StandardCharsets.UTF_8;
30+
31+
/**
32+
* A database of public suffixes provided by
33+
* <a href="https://publicsuffix.org/">publicsuffix.org</a>.
34+
*/
35+
public final class Suffixes {
36+
37+
public static final String PUBLIC_SUFFIX_RESOURCE = "suffixes.gz";
38+
39+
private static final byte[] WILDCARD_LABEL = new byte[]{'*'};
40+
private static final String[] EMPTY_RULE = new String[0];
41+
private static final String[] PREVAILING_RULE = new String[]{"*"};
42+
43+
private static final byte EXCEPTION_MARKER = '!';
44+
45+
private static final Suffixes instance = new Suffixes();
46+
47+
/**
48+
* True after we've attempted to read the list for the first time.
49+
*/
50+
private final AtomicBoolean listRead = new AtomicBoolean(false);
51+
52+
/**
53+
* Used for concurrent threads reading the list for the first time.
54+
*/
55+
private final CountDownLatch readCompleteLatch = new CountDownLatch(1);
56+
57+
// The lists are held as a large array of UTF-8 bytes. This is to avoid allocating lots of strings
58+
// that will likely never be used. Each rule is separated by '\n'. Please see the
59+
// PublicSuffixListGenerator class for how these lists are generated.
60+
// Guarded by this.
61+
private byte[] publicSuffixListBytes;
62+
private byte[] publicSuffixExceptionListBytes;
63+
64+
public static Suffixes get() {
65+
return instance;
66+
}
67+
68+
private static String binarySearchBytes(byte[] bytesToSearch, byte[][] labels, int labelIndex) {
69+
int low = 0;
70+
int high = bytesToSearch.length;
71+
String match = null;
72+
while (low < high) {
73+
int mid = (low + high) / 2;
74+
// Search for a '\n' that marks the start of a value. Don't go back past the start of the
75+
// array.
76+
while (mid > -1 && bytesToSearch[mid] != '\n') {
77+
mid--;
78+
}
79+
mid++;
80+
81+
// Now look for the ending '\n'.
82+
int end = 1;
83+
while (bytesToSearch[mid + end] != '\n') {
84+
end++;
85+
}
86+
int publicSuffixLength = (mid + end) - mid;
87+
88+
// Compare the bytes. Note that the file stores UTF-8 encoded bytes, so we must compare the
89+
// unsigned bytes.
90+
int compareResult;
91+
int currentLabelIndex = labelIndex;
92+
int currentLabelByteIndex = 0;
93+
int publicSuffixByteIndex = 0;
94+
95+
boolean expectDot = false;
96+
while (true) {
97+
int byte0;
98+
if (expectDot) {
99+
byte0 = '.';
100+
expectDot = false;
101+
} else {
102+
byte0 = labels[currentLabelIndex][currentLabelByteIndex] & 0xff;
103+
}
104+
105+
int byte1 = bytesToSearch[mid + publicSuffixByteIndex] & 0xff;
106+
107+
compareResult = byte0 - byte1;
108+
if (compareResult != 0) break;
109+
110+
publicSuffixByteIndex++;
111+
currentLabelByteIndex++;
112+
if (publicSuffixByteIndex == publicSuffixLength) break;
113+
114+
if (labels[currentLabelIndex].length == currentLabelByteIndex) {
115+
// We've exhausted our current label. Either there are more labels to compare, in which
116+
// case we expect a dot as the next character. Otherwise, we've checked all our labels.
117+
if (currentLabelIndex == labels.length - 1) {
118+
break;
119+
} else {
120+
currentLabelIndex++;
121+
currentLabelByteIndex = -1;
122+
expectDot = true;
123+
}
124+
}
125+
}
126+
127+
if (compareResult < 0) {
128+
high = mid - 1;
129+
} else if (compareResult > 0) {
130+
low = mid + end + 1;
131+
} else {
132+
// We found a match, but are the lengths equal?
133+
int publicSuffixBytesLeft = publicSuffixLength - publicSuffixByteIndex;
134+
int labelBytesLeft = labels[currentLabelIndex].length - currentLabelByteIndex;
135+
for (int i = currentLabelIndex + 1; i < labels.length; i++) {
136+
labelBytesLeft += labels[i].length;
137+
}
138+
139+
if (labelBytesLeft < publicSuffixBytesLeft) {
140+
high = mid - 1;
141+
} else if (labelBytesLeft > publicSuffixBytesLeft) {
142+
low = mid + end + 1;
143+
} else {
144+
// Found a match.
145+
match = new String(bytesToSearch, mid, publicSuffixLength, UTF_8);
146+
break;
147+
}
148+
}
149+
}
150+
return match;
151+
}
152+
153+
/**
154+
* Returns the effective top-level domain plus one (eTLD+1) by referencing the public suffix list.
155+
* Returns null if the domain is a public suffix or a private address.
156+
*
157+
* <p>Here are some examples: <pre>{@code
158+
* assertEquals("google.com", getEffectiveTldPlusOne("google.com"));
159+
* assertEquals("google.com", getEffectiveTldPlusOne("www.google.com"));
160+
* assertNull(getEffectiveTldPlusOne("com"));
161+
* assertNull(getEffectiveTldPlusOne("localhost"));
162+
* assertNull(getEffectiveTldPlusOne("mymacbook"));
163+
* }</pre>
164+
*
165+
* @param domain A canonicalized domain. An International Domain Name (IDN) should be punycode
166+
* encoded.
167+
*/
168+
public String getEffectiveTldPlusOne(String domain) {
169+
if (domain == null) throw new NullPointerException("domain == null");
170+
171+
// We use UTF-8 in the list so we need to convert to Unicode.
172+
String unicodeDomain = IDN.toUnicode(domain);
173+
String[] domainLabels = unicodeDomain.split("\\.");
174+
String[] rule = findMatchingRule(domainLabels);
175+
if (domainLabels.length == rule.length && rule[0].charAt(0) != EXCEPTION_MARKER) {
176+
// The domain is a public suffix.
177+
return null;
178+
}
179+
180+
int firstLabelOffset;
181+
if (rule[0].charAt(0) == EXCEPTION_MARKER) {
182+
// Exception rules hold the effective TLD plus one.
183+
firstLabelOffset = domainLabels.length - rule.length;
184+
} else {
185+
// Otherwise the rule is for a public suffix, so we must take one more label.
186+
firstLabelOffset = domainLabels.length - (rule.length + 1);
187+
}
188+
189+
StringBuilder effectiveTldPlusOne = new StringBuilder();
190+
String[] punycodeLabels = domain.split("\\.");
191+
for (int i = firstLabelOffset; i < punycodeLabels.length; i++) {
192+
effectiveTldPlusOne.append(punycodeLabels[i]).append('.');
193+
}
194+
effectiveTldPlusOne.deleteCharAt(effectiveTldPlusOne.length() - 1);
195+
196+
return effectiveTldPlusOne.toString();
197+
}
198+
199+
private String[] findMatchingRule(String[] domainLabels) {
200+
if (!listRead.get() && listRead.compareAndSet(false, true)) {
201+
readTheListUninterruptibly();
202+
} else {
203+
try {
204+
readCompleteLatch.await();
205+
} catch (InterruptedException ignored) {
206+
Thread.currentThread().interrupt(); // Retain interrupted status.
207+
}
208+
}
209+
210+
synchronized (this) {
211+
if (publicSuffixListBytes == null) {
212+
throw new IllegalStateException("Unable to load " + PUBLIC_SUFFIX_RESOURCE + " resource "
213+
+ "from the classpath.");
214+
}
215+
}
216+
217+
// Break apart the domain into UTF-8 labels, i.e. foo.bar.com turns into [foo, bar, com].
218+
byte[][] domainLabelsUtf8Bytes = new byte[domainLabels.length][];
219+
for (int i = 0; i < domainLabels.length; i++) {
220+
domainLabelsUtf8Bytes[i] = domainLabels[i].getBytes(UTF_8);
221+
}
222+
223+
// Start by looking for exact matches. We start at the leftmost label. For example, foo.bar.com
224+
// will look like: [foo, bar, com], [bar, com], [com]. The longest matching rule wins.
225+
String exactMatch = null;
226+
for (int i = 0; i < domainLabelsUtf8Bytes.length; i++) {
227+
String rule = binarySearchBytes(publicSuffixListBytes, domainLabelsUtf8Bytes, i);
228+
if (rule != null) {
229+
exactMatch = rule;
230+
break;
231+
}
232+
}
233+
234+
// In theory, wildcard rules are not restricted to having the wildcard in the leftmost position.
235+
// In practice, wildcards are always in the leftmost position. For now, this implementation
236+
// cheats and does not attempt every possible permutation. Instead, it only considers wildcards
237+
// in the leftmost position. We assert this fact when we generate the public suffix file. If
238+
// this assertion ever fails we'll need to refactor this implementation.
239+
String wildcardMatch = null;
240+
if (domainLabelsUtf8Bytes.length > 1) {
241+
byte[][] labelsWithWildcard = domainLabelsUtf8Bytes.clone();
242+
for (int labelIndex = 0; labelIndex < labelsWithWildcard.length - 1; labelIndex++) {
243+
labelsWithWildcard[labelIndex] = WILDCARD_LABEL;
244+
String rule = binarySearchBytes(publicSuffixListBytes, labelsWithWildcard, labelIndex);
245+
if (rule != null) {
246+
wildcardMatch = rule;
247+
break;
248+
}
249+
}
250+
}
251+
252+
// Exception rules only apply to wildcard rules, so only try it if we matched a wildcard.
253+
String exception = null;
254+
if (wildcardMatch != null) {
255+
for (int labelIndex = 0; labelIndex < domainLabelsUtf8Bytes.length - 1; labelIndex++) {
256+
String rule = binarySearchBytes(
257+
publicSuffixExceptionListBytes, domainLabelsUtf8Bytes, labelIndex);
258+
if (rule != null) {
259+
exception = rule;
260+
break;
261+
}
262+
}
263+
}
264+
265+
if (exception != null) {
266+
// Signal we've identified an exception rule.
267+
exception = "!" + exception;
268+
return exception.split("\\.");
269+
} else if (exactMatch == null && wildcardMatch == null) {
270+
return PREVAILING_RULE;
271+
}
272+
273+
String[] exactRuleLabels = exactMatch != null
274+
? exactMatch.split("\\.")
275+
: EMPTY_RULE;
276+
277+
String[] wildcardRuleLabels = wildcardMatch != null
278+
? wildcardMatch.split("\\.")
279+
: EMPTY_RULE;
280+
281+
return exactRuleLabels.length > wildcardRuleLabels.length
282+
? exactRuleLabels
283+
: wildcardRuleLabels;
284+
}
285+
286+
/**
287+
* Reads the public suffix list treating the operation as uninterruptible. We always want to read
288+
* the list otherwise we'll be left in a bad state. If the thread was interrupted prior to this
289+
* operation, it will be re-interrupted after the list is read.
290+
*/
291+
private void readTheListUninterruptibly() {
292+
boolean interrupted = false;
293+
try {
294+
while (true) {
295+
try {
296+
readTheList();
297+
return;
298+
} catch (InterruptedIOException e) {
299+
Thread.interrupted(); // Temporarily clear the interrupted state.
300+
interrupted = true;
301+
} catch (IOException e) {
302+
303+
return;
304+
}
305+
}
306+
} finally {
307+
if (interrupted) {
308+
Thread.currentThread().interrupt(); // Retain interrupted status.
309+
}
310+
}
311+
}
312+
313+
private void readTheList() throws IOException {
314+
byte[] publicSuffixListBytes;
315+
byte[] publicSuffixExceptionListBytes;
316+
317+
InputStream resource = Suffixes.class.getResourceAsStream(PUBLIC_SUFFIX_RESOURCE);
318+
if (resource == null) return;
319+
320+
try (BufferSource BufferSource = IoKit.buffer(new GzipSource(IoKit.source(resource)))) {
321+
int totalBytes = BufferSource.readInt();
322+
publicSuffixListBytes = new byte[totalBytes];
323+
BufferSource.readFully(publicSuffixListBytes);
324+
325+
int totalExceptionBytes = BufferSource.readInt();
326+
publicSuffixExceptionListBytes = new byte[totalExceptionBytes];
327+
BufferSource.readFully(publicSuffixExceptionListBytes);
328+
}
329+
330+
synchronized (this) {
331+
this.publicSuffixListBytes = publicSuffixListBytes;
332+
this.publicSuffixExceptionListBytes = publicSuffixExceptionListBytes;
333+
}
334+
335+
readCompleteLatch.countDown();
336+
}
337+
338+
/**
339+
* Visible for testing.
340+
*/
341+
void setListBytes(byte[] publicSuffixListBytes, byte[] publicSuffixExceptionListBytes) {
342+
this.publicSuffixListBytes = publicSuffixListBytes;
343+
this.publicSuffixExceptionListBytes = publicSuffixExceptionListBytes;
344+
listRead.set(true);
345+
readCompleteLatch.countDown();
346+
}
347+
348+
}
Binary file not shown.

0 commit comments

Comments
 (0)