@@ -44,12 +44,118 @@ protected function normalizeHost($url)
4444 $ parts = explode ('. ' , $ host );
4545 $ utf8Host = '' ;
4646 foreach ($ parts as $ part ) {
47- $ utf8Host = $ utf8Host . (($ utf8Host === '' ) ? '' : '. ' ) . idn_to_utf8 ($ part );
47+ $ utf8Host = $ utf8Host . (($ utf8Host === '' ) ? '' : '. ' ) . $ this -> convertPunycode ($ part );
4848 }
4949
5050 return mb_strtolower ($ utf8Host );
5151 }
5252
53+ /**
54+ * Convert a punycode string to UTF-8 if needed
55+ *
56+ * @param string $part host component
57+ *
58+ * @return string host component as UTF-8
59+ */
60+ protected function convertPunycode ($ part )
61+ {
62+ if (strpos ($ part ,'xn-- ' )===0 ) {
63+ if (function_exists ('idn_to_utf8 ' )) {
64+ $ part = idn_to_utf8 ($ part );
65+ } else {
66+ $ part = $ this ->decodePunycode ($ part );
67+ }
68+ }
69+ return $ part ;
70+ }
71+
72+ /**
73+ * convert punycode to UTF-8 (the hard way) Used only if idn_to_utf8() is not available
74+ *
75+ * This fallback adapted from https://ckon.wordpress.com/2010/08/24/punycode-to-unicode-converter-php/
76+ *
77+ * @param string $encoded
78+ * @return string
79+ */
80+ protected function decodePunycode ($ encoded )
81+ {
82+ $ prefix = 'xn-- ' ;
83+ $ safe_char = 0xFFFC ;
84+ $ base = 36 ;
85+ $ tmin = 1 ;
86+ $ tmax = 26 ;
87+ $ skew = 38 ;
88+ $ damp = 700 ;
89+
90+ if (strpos ($ encoded , $ prefix ) !== 0 || strlen (trim (str_replace ($ prefix , '' , $ encoded ))) == 0 ) {
91+ return $ encoded ;
92+ }
93+
94+ $ is_first = true ;
95+ $ bias = 72 ;
96+ $ idx = 0 ;
97+ $ char = 0x80 ;
98+ $ decoded = array ();
99+ $ output = '' ;
100+
101+ $ delim_pos = strrpos ($ encoded , '- ' );
102+ if ($ delim_pos > strlen ($ prefix )) {
103+ for ($ k = strlen ($ prefix ); $ k < $ delim_pos ; ++$ k ) {
104+ $ decoded [] = ord ($ encoded {$ k });
105+ }
106+ }
107+ $ deco_len = count ($ decoded );
108+ $ enco_len = strlen ($ encoded );
109+
110+ for ($ enco_idx = $ delim_pos ? ($ delim_pos + 1 ) : 0 ; $ enco_idx < $ enco_len ; ++$ deco_len ) {
111+ for ($ old_idx = $ idx , $ w = 1 , $ k = $ base ; 1 ; $ k += $ base ) {
112+ $ cp = ord ($ encoded {$ enco_idx ++});
113+ $ digit = ($ cp - 48 < 10 ) ? $ cp - 22 : (($ cp - 65 < 26 ) ? $ cp - 65 : (($ cp - 97 < 26 ) ? $ cp - 97 : $ base ));
114+ $ idx += $ digit * $ w ;
115+ $ t = ($ k <= $ bias ) ? $ tmin : (($ k >= $ bias + $ tmax ) ? $ tmax : ($ k - $ bias ));
116+ if ($ digit < $ t ) {
117+ break ;
118+ }
119+ $ w = (int )($ w * ($ base - $ t ));
120+ }
121+ $ delta = $ idx - $ old_idx ;
122+ $ delta = intval ($ is_first ? ($ delta / $ damp ) : ($ delta / 2 ));
123+ $ delta += intval ($ delta / ($ deco_len + 1 ));
124+ for ($ k = 0 ; $ delta > (($ base - $ tmin ) * $ tmax ) / 2 ; $ k += $ base ) {
125+ $ delta = intval ($ delta / ($ base - $ tmin ));
126+ }
127+ $ bias = intval ($ k + ($ base - $ tmin + 1 ) * $ delta / ($ delta + $ skew ));
128+ $ is_first = false ;
129+ $ char += (int )($ idx / ($ deco_len + 1 ));
130+ $ idx %= ($ deco_len + 1 );
131+ if ($ deco_len > 0 ) {
132+ for ($ i = $ deco_len ; $ i > $ idx ; $ i --) {
133+ $ decoded [$ i ] = $ decoded [($ i - 1 )];
134+ }
135+ }
136+ $ decoded [$ idx ++] = $ char ;
137+ }
138+
139+ foreach ($ decoded as $ k => $ v ) {
140+ if ($ v < 128 ) {
141+ $ output .= chr ($ v );
142+ } // 7bit are transferred literally
143+ elseif ($ v < (1 << 11 )) {
144+ $ output .= chr (192 + ($ v >> 6 )) . chr (128 + ($ v & 63 ));
145+ } // 2 bytes
146+ elseif ($ v < (1 << 16 )) {
147+ $ output .= chr (224 + ($ v >> 12 )) . chr (128 + (($ v >> 6 ) & 63 )) . chr (128 + ($ v & 63 ));
148+ } // 3 bytes
149+ elseif ($ v < (1 << 21 )) {
150+ $ output .= chr (240 + ($ v >> 18 )) . chr (128 + (($ v >> 12 ) & 63 )) . chr (128 + (($ v >> 6 ) & 63 )) . chr (128 + ($ v & 63 ));
151+ } // 4 bytes
152+ else {
153+ $ output .= $ safe_char ;
154+ } // 'Conversion from UCS-4 to UTF-8 failed: malformed input at byte '.$k
155+ }
156+ return $ output ;
157+ }
158+
53159 /**
54160 * Determine the registered domain portion of the supplied host string
55161 *
0 commit comments