Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
<?php
/*
* Copyright (C) 2013 - Rasmus Fuhse <fuhse@data-quest.de>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*/
/**
* A model class to handle the database table "opengraphdata", fetch data from
* an Opengraph-URL and render a fitting box with the opengraph information to
* the user.
* @property string url database column
* @property string id alias column for url
* @property string is_opengraph database column
* @property string title database column
* @property string image database column
* @property string description database column
* @property string type database column
* @property string data database column
* @property string last_update database column
* @property string chdate database column
* @property string mkdate database column
*/
class OpenGraphURL extends SimpleORMap
{
const EXPIRES_DURATION = 86400; // = 24 * 60 * 60
/**
* Configures this model.
*
* @param Array $config Configuration array
*/
protected static function configure($config = [])
{
$config['db_table'] = 'opengraphdata';
$config['serialized_fields']['data'] = 'JSONArrayObject';
parent::configure($config);
}
/**
* Create an instance of this model given url. Differs from findOneByURL
* insofar that it will return a new object with the given url set
* instead of null.
*
* @param String $url URL to find
* @return OpenGraphURL Either existing instance or a new instance for
* the given url
*/
public static function fromURL($url)
{
$og = self::findOneByUrl($url);
if (!$og) {
$og = new self();
$og->url = $url;
}
return $og;
}
/**
* Specialized findOneByURL function that uses the hash to find the
* appropriate record instead.
*
* @param string $url URL to find record for
* @return mixed instance of OpenGraphURL if available, null otherwise
*/
public static function findOneByURL($url)
{
return self::findOneByHash(md5($url));
}
/**
* Constructor of the object. Provides a fallback if a url is passed
* instead of the usually expected numeric id in order to not break
* backward compatibility.
* But this constructor will fail miserably if a url is passed that
* is not in the database. This was chosen by design to encourage the
* correct use of an id.
*
* @param mixed $id Numeric id, existing url or null
*/
public function __construct($id = null)
{
// Try to find matching id when an url is passed instead of an id.
// This is to ensure that no legacy code will immediately break.
if ($id !== null && !ctype_digit($id)) {
$temp = self::findOneByUrl($id);
if ($temp) {
$id = $temp->id;
}
}
parent::__construct($id);
}
/**
* Sets value of a column. Overwritten so that the hash is also set when
* the url is set.
*
* @param string $field
* @param string $value
* @return string
* @see SimpleORMap::setValue
*/
public function setValue($field, $value)
{
$ret = parent::setValue($field, $value);
if ($field === 'url') {
$this->content['hash'] = md5($value);
}
return $ret;
}
/**
* Stores the object and fetches the opengraph information when either
* the object is new or outdated.
*
* @return int Number of updated records
*/
public function store()
{
if ($this->isNew() || $this->last_update < time() - self::EXPIRES_DURATION) {
// Store last update timestamp BEFORE fetching so another thread
// will not fetch again
$this->last_update = time();
parent::store();
$this->fetch();
}
return parent::store();
}
/**
* Fetches information from the url by getting the contents of the
* webpage, parse the webpage and extract the information from the
* opengraph meta-tags.
* If the site doesn't have any opengraph-metatags it is in fact no
* opengraph node and thus no data will be stored in the database.
* Only $url['is_opengraph'] === '0' indicates that the site is no
* opengraph node at all.
*
* @todo The combination of FileManager::fetchURLMetadata() and the following request
* leads to two requests for the open graph data. This should
* be fixed due to performance reasons.
*/
public function fetch()
{
if (!Config::get()->OPENGRAPH_ENABLE) {
return;
}
$isOpenGraph = false;
$response = FileManager::fetchURLMetadata($this['url']);
if ((int)$response['response_code'] === 200 && isset($response['Content-Type']) && mb_strpos($response['Content-Type'],'html') !== false) {
if (preg_match('/(?<=charset=)[^;]*/i', $response['Content-Type'], $match)) {
$currentEncoding = trim($match[0], '"');
} else {
$currentEncoding = 'UTF-8';
}
$context = get_default_http_stream_context($this['url']);
stream_context_set_option($context, [
'http' => [
'method' => 'GET',
'header' => sprintf("User-Agent: Stud.IP v%s OpenGraph Parser\r\n", $GLOBALS['SOFTWARE_VERSION']),
],
]);
$content = @file_get_contents($this['url'], false, $context);
if ($content === false) {
return;
}
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
$content = mb_encode_numericentity($content, [0x80, 0xffff, 0, 0xffff], $currentEncoding);
$old_libxml_error = libxml_use_internal_errors(true);
$doc = new DOMDocument();
$doc->loadHTML($content);
libxml_use_internal_errors($old_libxml_error);
$metatags = $doc->getElementsByTagName('meta');
$reservedTags = ['url', 'chdate', 'mkdate', 'last_update', 'is_opengraph', 'data'];
$ogTags = [];
$data = [];
foreach ($metatags as $tag) {
$key = false;
if ($tag->hasAttribute('property')
&& mb_strpos($tag->getAttribute('property'), 'og:') === 0)
{
$key = mb_strtolower(mb_substr($tag->getAttribute('property'), 3));
}
if (!$key && $tag->hasAttribute('name')
&& mb_strpos($tag->getAttribute('name'), 'og:') === 0)
{
$key = mb_strtolower(mb_substr($tag->getAttribute('name'), 3));
}
if ($key) {
$content = $tag->getAttribute('content');
$data[] = ['og:'.$key => $content];
$ogTags[$key] = $content;
$isOpenGraph = true;
}
}
foreach ($ogTags as $key => $tag) {
if ($this->isField($key) && !in_array($key, $reservedTags)) {
$this[$key] = $tag;
}
}
if (empty($this['title']) && $isOpenGraph) {
$titles = $doc->getElementsByTagName('title');
if ($titles->length > 0) {
$this['title'] = $titles->item(0)->textContent;
}
}
if (empty($this['description']) && $isOpenGraph) {
foreach ($metatags as $tag) {
if (mb_stripos($tag->getAttribute('name'), "description") !== false
|| mb_stripos($tag->getAttribute('property'), "description") !== false)
{
$this['description'] = $tag->getAttribute('content');
}
}
}
$this['data'] = $data;
}
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
$this['is_opengraph'] = (int) $isOpenGraph;
}
/**
* Renders a small box with the information of the opengraph url. Used in
* blubber and in the forum.
*
* @return string html output of the box.
*/
public function render()
{
if (!Config::get()->OPENGRAPH_ENABLE || !$this->getValue('is_opengraph')) {
return '';
}
$template = $GLOBALS['template_factory']->open('shared/opengraphinfo_wide.php');
$template->og = $this;
return $template->render();
}
/**
* Returns an array with all audiofiles that are provided by the opengraph-node.
* Each array-entry is an array itself with the url as first parameter and the
* content-type (important for <audio/> tags) as the second.
*
* @return array(array($url, $content_type), ...)
*/
public function getAudioFiles()
{
return $this->getMediaFiles('audio');
}
/**
* Returns an array with all videofiles that are provided by the opengraph-node.
* Each array-entry is an array itself with the url as first parameter and the
* content-type (important for <video/> tags) as the second.
*
* @return array(array($url, $content_type), ...)
*/
public function getVideoFiles()
{
return $this->getMediaFiles('video');
}
/**
* Returns an array with all mediafiles that are provided by the opengraph-node.
* Each array-entry is an array itself with the url as first parameter and the
* content-type (important for <audio/> or <video/> tags) as the second.
*
* @param string $type "audio" or "video"
* @return array(array($url, $content_type), ...)
*/
protected function getMediaFiles($type)
{
$files = [];
$media = [];
$secure_media = [];
$media_types = [];
foreach ($this['data'] as $meta) {
foreach ($meta as $key => $value) {
switch ($key) {
case "og:{$type}:url":
case "og:{$type}":
$media[] = $value;
break;
case "og:{$type}:secure_url":
$secure_media[] = $value;
break;
case "og:{$type}:type":
$media_types[] = $value;
break;
}
}
}
if ($_SERVER['HTTPS'] === 'on' && count($secure_media) > 0) {
foreach ($secure_media as $index => $url) {
$files[] = [$url, $media_types[$index]];
}
} else {
foreach ($media as $index => $url) {
$files[] = [$url, $media_types[$index]];
}
}
return $files;
}
}