Skip to content

Commit dd2c6d6

Browse files
committed
Merge branch 'release/v3.0.0'
2 parents cec4f44 + a14b329 commit dd2c6d6

31 files changed

+944
-226
lines changed

Changes.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Changes
22

3+
## 3.0.0
4+
5+
- Pass twitter-text conformance 3.0.0
6+
- Change default configuration to v3 (emojiParsingEnabled=true)
7+
- Add t.co with query string support
8+
- Add Directional Characters support
9+
310
## 2.0.2
411

512
- Pass twitter-text conformance 2.0.5
@@ -17,4 +24,4 @@
1724
- Twtter\Text classes no longer extended Regex class.
1825
- Deprecated `Validator::isValidTweetText()`, `Validator::getTweetLength()`.
1926
- `Extractor` constractor no longer accepts `$tweet`
20-
- `Validator` constractor no longer accepts `$tweet` and `$config`. `Validator` constractor only accepts `Configuration` incetance.
27+
- `Validator` constractor no longer accepts `$tweet` and `$config`. `Validator` constractor only accepts `Configuration` incetance.

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,14 @@ composer require nojimage/twitter-text-php
6969

7070
## Length validation ##
7171

72-
twitter-text 2.0 introduces configuration files that define how Tweets are parsed for length. This allows for backwards compatibility and flexibility going forward. Old-style traditional 140-character parsing is defined by the v1.json configuration file, whereas v2.json is updated for "weighted" Tweets where ranges of Unicode code points can have independent weights aside from the default weight. The sum of all code points, each weighted appropriately, should not exceed the max weighted length.
72+
twitter-text 3.0 updates the config file with `emojiParsingEnabled` config option.
73+
When true, twitter-text-php will parse and discount emoji supported by the [Unicode Emoji 11.0](http://www.unicode.org/emoji/charts-11.0) (NOTE: Original [twitter-text](https://github.com/twitter/twitter-text) supported [twemoji library](https://github.com/twitter/twemoji)).
74+
The length of these emoji will be the default weight (200 or two characters) even if they contain multiple code points combined by zero-width joiners.
75+
This means that emoji with skin tone and gender modifiers no longer count as more characters than those without such modifiers.
76+
77+
twitter-text 2.0 introduced configuration files that define how Tweets are parsed for length. This allows for backwards compatibility and flexibility going forward.
78+
Old-style traditional 140-character parsing is defined by the v1.json configuration file, whereas v2.json is updated for "weighted" Tweets where ranges of Unicode code points can have independent weights aside from the default weight.
79+
The sum of all code points, each weighted appropriately, should not exceed the max weighted length.
7380

7481
Some old methods from twitter-text-php 1.0 have been marked deprecated, such as the `Twitter\Text\Validator::isValidTweetText()`, `Twitter\Text\Validator::getTweetLength()` method. The new API is based on the following method, `Twitter\Text\Parser::parseTweet()`
7582

build/build-emoji-regex.php

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<?php
2+
3+
require dirname(__DIR__) . '/vendor/autoload.php';
4+
5+
$classFile = dirname(__DIR__) . '/lib/Twitter/Text/EmojiRegex.php';
6+
$emojiDataUrl = 'https://www.unicode.org/Public/emoji/11.0/emoji-test.txt';
7+
8+
// --
9+
$emojiData = file($emojiDataUrl);
10+
$emojiCodes = array_map(function ($line) {
11+
$value = preg_replace('/^([0-9A-F]{4,}(?: [0-9A-F]{4,})*)\s*;.*$/u', '$1', trim($line));
12+
13+
return explode(' ', $value);
14+
}, array_filter($emojiData, function ($line) {
15+
return preg_match('/^[0-9A-F]{2}[0-9A-F]{2,}.*; /', $line);
16+
}));
17+
18+
// sort code length, reverse®
19+
usort($emojiCodes, function ($a, $b) {
20+
$aLength = count($a);
21+
$bLength = count($b);
22+
if ($aLength === $bLength) {
23+
return 0;
24+
}
25+
26+
return ($bLength < $aLength) ? -1 : 1;
27+
});
28+
29+
$codeRegexList = array_reduce($emojiCodes, function ($carry, $codes) {
30+
$carry[] = implode('', array_map(function ($c) {
31+
return sprintf('\x{%s}', strtolower($c));
32+
}, $codes));
33+
34+
return $carry;
35+
}, array());
36+
37+
$regex = implode('|', $codeRegexList);
38+
39+
echo "\n";
40+
41+
// -- modify class file
42+
$classContent = file_get_contents($classFile);
43+
$replacedClassContent = preg_replace('/(\s+const VALID_EMOJI_PATTERN = \')(?:.*)(\';)/', '$1/' . $regex . '/u$2', $classContent);
44+
45+
echo $replacedClassContent;
46+
47+
file_put_contents($classFile, $replacedClassContent);

build/build-tld-lists.php

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
<?php
2+
3+
$rootDir = dirname(__DIR__);
4+
require $rootDir . '/vendor/autoload.php';
5+
6+
use Symfony\Component\Yaml\Yaml;
7+
8+
$classFile = $rootDir . '/lib/Twitter/Text/TldLists.php';
9+
$tlds = Yaml::parse($rootDir . '/vendor/twitter/twitter-text/conformance/tld_lib.yml');
10+
11+
ob_start();
12+
echo "<?php\n";
13+
?>
14+
15+
/**
16+
* @author Takashi Nojima
17+
* @copyright Copyright <?= date('Y') ?>, Takashi Nojima
18+
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
19+
* @package Twitter.Text
20+
*/
21+
22+
namespace Twitter\Text;
23+
24+
/**
25+
* TLD Lists
26+
*/
27+
final class TldLists
28+
{
29+
/**
30+
* gTLDs
31+
*
32+
* @var array
33+
*/
34+
private static $gTLDs = array(
35+
<?php foreach ($tlds['generic'] as $tld) : ?>
36+
'<?= $tld ?>',
37+
<?php endforeach; ?>
38+
);
39+
40+
/**
41+
* gTLDs
42+
*
43+
* @var array
44+
*/
45+
private static $ccTLDs = array(
46+
<?php foreach ($tlds['country'] as $tld) : ?>
47+
'<?= $tld ?>',
48+
<?php endforeach; ?>
49+
);
50+
51+
/**
52+
* get valid gTLD regexp
53+
*
54+
* @staticvar string $regex
55+
* @return string
56+
*/
57+
final public static function getValidGTLD()
58+
{
59+
static $regex;
60+
61+
if (!empty($regex)) {
62+
return $regex;
63+
}
64+
65+
$gTLD = implode('|', static::$gTLDs);
66+
$regex = '(?:(?:' . $gTLD . ')(?=[^0-9a-z@]|$))';
67+
68+
return $regex;
69+
}
70+
71+
/**
72+
* get valid ccTLD regexp
73+
*
74+
* @staticvar string $regex
75+
* @return string
76+
*/
77+
final public static function getValidCcTLD()
78+
{
79+
static $regex;
80+
81+
if (!empty($regex)) {
82+
return $regex;
83+
}
84+
85+
$ccTLD = implode('|', static::$ccTLDs);
86+
$regex = '(?:(?:' . $ccTLD . ')(?=[^0-9a-z@]|$))';
87+
88+
return $regex;
89+
}
90+
}
91+
<?php
92+
$content = ob_get_clean();
93+
94+
echo $content;
95+
96+
file_put_contents($classFile, $content);

composer.json

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@
3131
"type": "package",
3232
"package": {
3333
"name": "twitter/twitter-text",
34-
"version": "2.0.5",
34+
"version": "3.0.0",
3535
"source": {
3636
"url": "https://github.com/twitter/twitter-text.git",
3737
"type": "git",
38-
"reference": "v2.0.5"
38+
"reference": "v3.0.0"
3939
}
4040
}
4141
}
@@ -46,15 +46,21 @@
4646
"ext-intl": "*"
4747
},
4848
"require-dev": {
49+
"ext-json": "*",
4950
"symfony/yaml": "~2.6.0",
5051
"phpunit/phpunit": "4.8.*|5.7.*|6.5.*",
51-
"twitter/twitter-text": "^2.0.0"
52+
"twitter/twitter-text": "^3.0.0"
5253
},
5354
"autoload": {
5455
"psr-0": {
5556
"Twitter\\Text\\": "lib/"
5657
}
5758
},
59+
"autoload-dev": {
60+
"psr-4": {
61+
"Twitter\\Text\\TestCase\\": "tests/TestCase/"
62+
}
63+
},
5864
"scripts": {
5965
"check": [
6066
"@cs",

lib/Twitter/Text/Autolink.php

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,6 @@
1010

1111
namespace Twitter\Text;
1212

13-
use Twitter\Text\Regex;
14-
use Twitter\Text\Extractor;
15-
use Twitter\Text\StringUtils;
16-
1713
/**
1814
* Twitter Autolink Class
1915
*
@@ -401,7 +397,7 @@ public function setTarget($v)
401397
*/
402398
public function autoLinkEntities($tweet = null, $entities = null)
403399
{
404-
if (is_null($tweet)) {
400+
if ($tweet === null) {
405401
$tweet = $this->tweet;
406402
}
407403

@@ -445,9 +441,7 @@ public function autoLinkWithJson($tweet = null, $json = null)
445441
$json = $this->object2array($json);
446442
}
447443
if (is_array($json)) {
448-
foreach ($json as $key => $vals) {
449-
$entities = array_merge($entities, $json[$key]);
450-
}
444+
$entities = call_user_func_array('array_merge', $json);
451445
}
452446

453447
// map JSON entity to twitter-text entity
@@ -487,7 +481,7 @@ protected function object2array($obj)
487481
*/
488482
public function autoLink($tweet = null)
489483
{
490-
if (is_null($tweet)) {
484+
if ($tweet === null) {
491485
$tweet = $this->tweet;
492486
}
493487
$entities = $this->extractor->extractURLWithoutProtocol(false)->extractEntitiesWithIndices($tweet);
@@ -504,7 +498,7 @@ public function autoLink($tweet = null)
504498
*/
505499
public function autoLinkUsernamesAndLists($tweet = null)
506500
{
507-
if (is_null($tweet)) {
501+
if ($tweet === null) {
508502
$tweet = $this->tweet;
509503
}
510504
$entities = $this->extractor->extractMentionsOrListsWithIndices($tweet);
@@ -520,7 +514,7 @@ public function autoLinkUsernamesAndLists($tweet = null)
520514
*/
521515
public function autoLinkHashtags($tweet = null)
522516
{
523-
if (is_null($tweet)) {
517+
if ($tweet === null) {
524518
$tweet = $this->tweet;
525519
}
526520
$entities = $this->extractor->extractHashtagsWithIndices($tweet);
@@ -537,7 +531,7 @@ public function autoLinkHashtags($tweet = null)
537531
*/
538532
public function autoLinkURLs($tweet = null)
539533
{
540-
if (is_null($tweet)) {
534+
if ($tweet === null) {
541535
$tweet = $this->tweet;
542536
}
543537
$entities = $this->extractor->extractURLWithoutProtocol(false)->extractURLsWithIndices($tweet);
@@ -553,7 +547,7 @@ public function autoLinkURLs($tweet = null)
553547
*/
554548
public function autoLinkCashtags($tweet = null)
555549
{
556-
if (is_null($tweet)) {
550+
if ($tweet === null) {
557551
$tweet = $this->tweet;
558552
}
559553
$entities = $this->extractor->extractCashtagsWithIndices($tweet);
@@ -650,7 +644,7 @@ public function linkToUrl($entity)
650644
*/
651645
public function linkToHashtag($entity, $tweet = null)
652646
{
653-
if (is_null($tweet)) {
647+
if ($tweet === null) {
654648
$tweet = $this->tweet;
655649
}
656650

@@ -668,7 +662,7 @@ public function linkToHashtag($entity, $tweet = null)
668662
$class[] = 'rtl';
669663
}
670664
if (!empty($class)) {
671-
$attributes['class'] = join(' ', $class);
665+
$attributes['class'] = implode(' ', $class);
672666
}
673667

674668
return $this->linkToText($entity, $linkText, $attributes);
@@ -712,7 +706,7 @@ public function linkToMentionAndList($entity)
712706
*/
713707
public function linkToCashtag($entity, $tweet = null)
714708
{
715-
if (is_null($tweet)) {
709+
if ($tweet === null) {
716710
$tweet = $this->tweet;
717711
}
718712
$attributes = array();
@@ -745,7 +739,7 @@ public function linkToText(array $entity, $text, $attributes = array())
745739
$rel[] = 'nofollow';
746740
}
747741
if (!empty($rel)) {
748-
$attributes['rel'] = join(' ', $rel);
742+
$attributes['rel'] = implode(' ', $rel);
749743
}
750744
if ($this->target) {
751745
$attributes['target'] = $this->target;

0 commit comments

Comments
 (0)