<?php /* * Copyright (C) 2013 - Rasmus Fuhse <fuhse@data-quest.de> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. */ /** * A model class to handle the database table "opengraphdata", fetch data from * an Opengraph-URL and render a fitting box with the opengraph information to * the user. * @property string url database column * @property string id alias column for url * @property string is_opengraph database column * @property string title database column * @property string image database column * @property string description database column * @property string type database column * @property string data database column * @property string last_update database column * @property string chdate database column * @property string mkdate database column */ class OpenGraphURL extends SimpleORMap { const EXPIRES_DURATION = 86400; // = 24 * 60 * 60 /** * Configures this model. * * @param Array $config Configuration array */ protected static function configure($config = []) { $config['db_table'] = 'opengraphdata'; $config['serialized_fields']['data'] = 'JSONArrayObject'; parent::configure($config); } /** * Create an instance of this model given url. Differs from findOneByURL * insofar that it will return a new object with the given url set * instead of null. * * @param String $url URL to find * @return OpenGraphURL Either existing instance or a new instance for * the given url */ public static function fromURL($url) { $og = self::findOneByUrl($url); if (!$og) { $og = new self(); $og->url = $url; } return $og; } /** * Specialized findOneByURL function that uses the hash to find the * appropriate record instead. * * @param string $url URL to find record for * @return mixed instance of OpenGraphURL if available, null otherwise */ public static function findOneByURL($url) { return self::findOneByHash(md5($url)); } /** * Constructor of the object. Provides a fallback if a url is passed * instead of the usually expected numeric id in order to not break * backward compatibility. * But this constructor will fail miserably if a url is passed that * is not in the database. This was chosen by design to encourage the * correct use of an id. * * @param mixed $id Numeric id, existing url or null */ public function __construct($id = null) { // Try to find matching id when an url is passed instead of an id. // This is to ensure that no legacy code will immediately break. if ($id !== null && !ctype_digit($id)) { $temp = self::findOneByUrl($id); if ($temp) { $id = $temp->id; } } parent::__construct($id); } /** * Sets value of a column. Overwritten so that the hash is also set when * the url is set. * * @param string $field * @param string $value * @return string * @see SimpleORMap::setValue */ public function setValue($field, $value) { $ret = parent::setValue($field, $value); if ($field === 'url') { $this->content['hash'] = md5($value); } return $ret; } /** * Stores the object and fetches the opengraph information when either * the object is new or outdated. * * @return int Number of updated records */ public function store() { if ($this->isNew() || $this->last_update < time() - self::EXPIRES_DURATION) { // Store last update timestamp BEFORE fetching so another thread // will not fetch again $this->last_update = time(); parent::store(); $this->fetch(); } return parent::store(); } /** * Fetches information from the url by getting the contents of the * webpage, parse the webpage and extract the information from the * opengraph meta-tags. * If the site doesn't have any opengraph-metatags it is in fact no * opengraph node and thus no data will be stored in the database. * Only $url['is_opengraph'] === '0' indicates that the site is no * opengraph node at all. * * @todo The combination of FileManager::fetchURLMetadata() and the following request * leads to two requests for the open graph data. This should * be fixed due to performance reasons. */ public function fetch() { if (!Config::get()->OPENGRAPH_ENABLE) { return; } $isOpenGraph = false; $response = FileManager::fetchURLMetadata($this['url']); if ((int)$response['response_code'] === 200 && isset($response['Content-Type']) && mb_strpos($response['Content-Type'],'html') !== false) { if (preg_match('/(?<=charset=)[^;]*/i', $response['Content-Type'], $match)) { $currentEncoding = trim($match[0], '"'); } else { $currentEncoding = 'UTF-8'; } $context = get_default_http_stream_context($this['url']); stream_context_set_option($context, [ 'http' => [ 'method' => 'GET', 'header' => sprintf("User-Agent: Stud.IP v%s OpenGraph Parser\r\n", $GLOBALS['SOFTWARE_VERSION']), ], ]); $content = @file_get_contents($this['url'], false, $context); if ($content === false) { return; } $content = mb_encode_numericentity($content, [0x80, 0xffff, 0, 0xffff], $currentEncoding); $old_libxml_error = libxml_use_internal_errors(true); $doc = new DOMDocument(); $doc->loadHTML($content); libxml_use_internal_errors($old_libxml_error); $metatags = $doc->getElementsByTagName('meta'); $reservedTags = ['url', 'chdate', 'mkdate', 'last_update', 'is_opengraph', 'data']; $ogTags = []; $data = []; foreach ($metatags as $tag) { $key = false; if ($tag->hasAttribute('property') && mb_strpos($tag->getAttribute('property'), 'og:') === 0) { $key = mb_strtolower(mb_substr($tag->getAttribute('property'), 3)); } if (!$key && $tag->hasAttribute('name') && mb_strpos($tag->getAttribute('name'), 'og:') === 0) { $key = mb_strtolower(mb_substr($tag->getAttribute('name'), 3)); } if ($key) { $content = $tag->getAttribute('content'); $data[] = ['og:'.$key => $content]; $ogTags[$key] = $content; $isOpenGraph = true; } } foreach ($ogTags as $key => $tag) { if ($this->isField($key) && !in_array($key, $reservedTags)) { $this[$key] = $tag; } } if (empty($this['title']) && $isOpenGraph) { $titles = $doc->getElementsByTagName('title'); if ($titles->length > 0) { $this['title'] = $titles->item(0)->textContent; } } if (empty($this['description']) && $isOpenGraph) { foreach ($metatags as $tag) { if (mb_stripos($tag->getAttribute('name'), "description") !== false || mb_stripos($tag->getAttribute('property'), "description") !== false) { $this['description'] = $tag->getAttribute('content'); } } } $this['data'] = $data; } $this['is_opengraph'] = (int) $isOpenGraph; } /** * Renders a small box with the information of the opengraph url. Used in * blubber and in the forum. * * @return string html output of the box. */ public function render() { if (!Config::get()->OPENGRAPH_ENABLE || !$this->getValue('is_opengraph')) { return ''; } $template = $GLOBALS['template_factory']->open('shared/opengraphinfo_wide.php'); $template->og = $this; return $template->render(); } /** * Returns an array with all audiofiles that are provided by the opengraph-node. * Each array-entry is an array itself with the url as first parameter and the * content-type (important for <audio/> tags) as the second. * * @return array(array($url, $content_type), ...) */ public function getAudioFiles() { return $this->getMediaFiles('audio'); } /** * Returns an array with all videofiles that are provided by the opengraph-node. * Each array-entry is an array itself with the url as first parameter and the * content-type (important for <video/> tags) as the second. * * @return array(array($url, $content_type), ...) */ public function getVideoFiles() { return $this->getMediaFiles('video'); } /** * Returns an array with all mediafiles that are provided by the opengraph-node. * Each array-entry is an array itself with the url as first parameter and the * content-type (important for <audio/> or <video/> tags) as the second. * * @param string $type "audio" or "video" * @return array(array($url, $content_type), ...) */ protected function getMediaFiles($type) { $files = []; $media = []; $secure_media = []; $media_types = []; foreach ($this['data'] as $meta) { foreach ($meta as $key => $value) { switch ($key) { case "og:{$type}:url": case "og:{$type}": $media[] = $value; break; case "og:{$type}:secure_url": $secure_media[] = $value; break; case "og:{$type}:type": $media_types[] = $value; break; } } } if ($_SERVER['HTTPS'] === 'on' && count($secure_media) > 0) { foreach ($secure_media as $index => $url) { $files[] = [$url, $media_types[$index]]; } } else { foreach ($media as $index => $url) { $files[] = [$url, $media_types[$index]]; } } return $files; } }