|
10 | 10 | class LinkAce |
11 | 11 | { |
12 | 12 | /** |
13 | | - * Get the title of an HTML page b |
| 13 | + * Get the title and description of a website form it's URL |
14 | 14 | * |
15 | 15 | * @param string $url |
16 | 16 | * @return string|string[] |
17 | 17 | */ |
18 | | - public static function getTitleFromURL(string $url) |
| 18 | + public static function getMetaFromURL(string $url) |
19 | 19 | { |
20 | | - $fail_return = parse_url($url, PHP_URL_HOST); |
| 20 | + $title_fallback = parse_url($url, PHP_URL_HOST); |
21 | 21 |
|
| 22 | + $fallback = [ |
| 23 | + 'title' => $title_fallback, |
| 24 | + 'description' => null, |
| 25 | + ]; |
| 26 | + |
| 27 | + // Try to get the HTML content of that URL |
22 | 28 | try { |
23 | | - $fp = file_get_contents($url); |
| 29 | + $html = file_get_contents($url); |
24 | 30 | } catch (\Exception $e) { |
25 | | - return $fail_return; |
| 31 | + return $fallback; |
26 | 32 | } |
27 | 33 |
|
28 | | - if (!$fp) { |
29 | | - return $fail_return; |
| 34 | + if (!$html) { |
| 35 | + return $fallback; |
30 | 36 | } |
31 | 37 |
|
32 | | - $res = preg_match("/<title>(.*)<\/title>/siU", $fp, $title_matches); |
| 38 | + // Parse the HTML for the title |
| 39 | + $res = preg_match("/<title>(.*)<\/title>/siU", $html, $title_matches); |
33 | 40 |
|
34 | | - if (!$res) { |
35 | | - return $fail_return; |
| 41 | + if ($res) { |
| 42 | + // Clean up title: remove EOL's and excessive whitespace. |
| 43 | + $title = preg_replace('/\s+/', ' ', $title_matches[1]); |
| 44 | + $title = trim($title); |
36 | 45 | } |
37 | 46 |
|
38 | | - // Clean up title: remove EOL's and excessive whitespace. |
39 | | - $title = preg_replace('/\s+/', ' ', $title_matches[1]); |
40 | | - $title = trim($title); |
| 47 | + // Parse the HTML for the meta description, or alternatively for the og:description property |
| 48 | + $res = preg_match( |
| 49 | + '/<meta (?:property="og:description"|name="description") content="(.*?)"(?:\s\/)?>/i', |
| 50 | + $html, |
| 51 | + $description_matches |
| 52 | + ); |
| 53 | + |
| 54 | + if ($res) { |
| 55 | + // Clean up description: remove EOL's and excessive whitespace. |
| 56 | + $description = preg_replace('/\s+/', ' ', $description_matches[1]); |
| 57 | + $description = trim($description); |
| 58 | + } |
41 | 59 |
|
42 | | - return $title; |
| 60 | + return [ |
| 61 | + 'title' => $title ?? $title_fallback, |
| 62 | + 'description' => $description ?? null, |
| 63 | + ]; |
43 | 64 | } |
44 | 65 |
|
45 | 66 | /** |
|
0 commit comments