Skip to content
Snippets Groups Projects
Select Git revision
  • ec318155278066032616aa33c1407403eae8f88d
  • main default protected
  • studip-rector
  • ci-opt
  • course-members-export-as-word
  • data-vue-app
  • pipeline-improvements
  • webpack-optimizations
  • rector
  • icon-renewal
  • http-client-and-factories
  • jsonapi-atomic-operations
  • vueify-messages
  • tic-2341
  • 135-translatable-study-areas
  • extensible-sorm-action-parameters
  • sorm-configuration-trait
  • jsonapi-mvv-routes
  • docblocks-for-magic-methods
19 results

OpenGraphURL.class.php

Blame
  • Forked from Stud.IP / Stud.IP
    Source project has a limited visibility.
    Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    OpenGraphURL.class.php 10.86 KiB
    <?php
    /*
     * Copyright (C) 2013 - Rasmus Fuhse <fuhse@data-quest.de>
     *
     * This program is free software; you can redistribute it and/or
     * modify it under the terms of the GNU General Public License as
     * published by the Free Software Foundation; either version 2 of
     * the License, or (at your option) any later version.
     */
    
    /**
     * A model class to handle the database table "opengraphdata", fetch data from
     * an Opengraph-URL and render a fitting box with the opengraph information to
     * the user.
     * @property string url database column
     * @property string id alias column for url
     * @property string is_opengraph database column
     * @property string title database column
     * @property string image database column
     * @property string description database column
     * @property string type database column
     * @property string data database column
     * @property string last_update database column
     * @property string chdate database column
     * @property string mkdate database column
     */
    class OpenGraphURL extends SimpleORMap
    {
        const EXPIRES_DURATION = 86400; // = 24 * 60 * 60
    
        /**
         * Configures this model.
         *
         * @param Array $config Configuration array
         */
        protected static function configure($config = [])
        {
            $config['db_table'] = 'opengraphdata';
    
            $config['serialized_fields']['data'] = 'JSONArrayObject';
    
            parent::configure($config);
        }
    
        /**
         * Create an instance of this model given url. Differs from findOneByURL
         * insofar that it will return a new object with the given url set
         * instead of null.
         *
         * @param String $url URL to find
         * @return OpenGraphURL Either existing instance or a new instance for
         *                      the given url
         */
        public static function fromURL($url)
        {
            $og = self::findOneByUrl($url);
            if (!$og) {
                $og = new self();
                $og->url = $url;
            }
            return $og;
        }
    
        /**
         * Specialized findOneByURL function that uses the hash to find the
         * appropriate record instead.
         *
         * @param  string $url URL to find record for
         * @return mixed instance of OpenGraphURL if available, null otherwise
         */
        public static function findOneByURL($url)
        {
            return self::findOneByHash(md5($url));
        }
    
        /**
         * Constructor of the object. Provides a fallback if a url is passed
         * instead of the usually expected numeric id in order to not break
         * backward compatibility.
         * But this constructor will fail miserably if a url is passed that
         * is not in the database. This was chosen by design to encourage the
         * correct use of an id.
         *
         * @param mixed $id Numeric id, existing url or null
         */
        public function __construct($id = null)
        {
            // Try to find matching id when an url is passed instead of an id.
            // This is to ensure that no legacy code will immediately break.
            if ($id !== null && !ctype_digit($id)) {
                $temp = self::findOneByUrl($id);
                if ($temp) {
                    $id = $temp->id;
                }
            }
    
            parent::__construct($id);
        }
    
        /**
         * Sets value of a column. Overwritten so that the hash is also set when
         * the url is set.
         *
         * @param string $field
         * @param string $value
         * @return string
         * @see SimpleORMap::setValue
         */
        public function setValue($field, $value)
        {
            $ret = parent::setValue($field, $value);
    
            if ($field === 'url') {
                $this->content['hash'] = md5($value);
            }
    
            return $ret;
        }
    
        /**
         * Stores the object and fetches the opengraph information when either
         * the object is new or outdated.
         *
         * @return int Number of updated records
         */
        public function store()
        {
            if ($this->isNew() || $this->last_update < time() - self::EXPIRES_DURATION) {
                // Store last update timestamp BEFORE fetching so another thread
                // will not fetch again
                $this->last_update = time();
                parent::store();
    
                $this->fetch();
            }
    
            return parent::store();
        }
    
        /**
         * Fetches information from the url by getting the contents of the
         * webpage, parse the webpage and extract the information from the
         * opengraph meta-tags.
         * If the site doesn't have any opengraph-metatags it is in fact no
         * opengraph node and thus no data will be stored in the database.
         * Only $url['is_opengraph'] === '0' indicates that the site is no
         * opengraph node at all.
         *
         * @todo The combination of FileManager::fetchURLMetadata() and the following request
         *       leads to two requests for the open graph data. This should
         *       be fixed due to performance reasons.
         */
        public function fetch()
        {
            if (!Config::get()->OPENGRAPH_ENABLE) {
                return;
            }
    
            $isOpenGraph = false;
    
            $response = FileManager::fetchURLMetadata($this['url']);
            if ((int)$response['response_code'] === 200 && isset($response['Content-Type']) && mb_strpos($response['Content-Type'],'html') !== false) {
                if (preg_match('/(?<=charset=)[^;]*/i', $response['Content-Type'], $match)) {
                    $currentEncoding = trim($match[0], '"');
                } else {
                    $currentEncoding = 'UTF-8';
                }
    
                $context = get_default_http_stream_context($this['url']);
                stream_context_set_option($context, [
                    'http' => [
                        'method' => 'GET',
                        'header' => sprintf("User-Agent: Stud.IP v%s OpenGraph Parser\r\n", $GLOBALS['SOFTWARE_VERSION']),
                    ],
                ]);
    
                $content = @file_get_contents($this['url'], false, $context);
    
                if ($content === false) {
                    return;
                }
    
                $content = mb_encode_numericentity($content, [0x80, 0xffff, 0, 0xffff], $currentEncoding);
                $old_libxml_error = libxml_use_internal_errors(true);
                $doc = new DOMDocument();
                $doc->loadHTML($content);
                libxml_use_internal_errors($old_libxml_error);
    
                $metatags = $doc->getElementsByTagName('meta');
                $reservedTags = ['url', 'chdate', 'mkdate', 'last_update', 'is_opengraph', 'data'];
                $ogTags = [];
                $data = [];
                foreach ($metatags as $tag) {
                    $key = false;
                    if ($tag->hasAttribute('property')
                        && mb_strpos($tag->getAttribute('property'), 'og:') === 0)
                    {
                        $key = mb_strtolower(mb_substr($tag->getAttribute('property'), 3));
                    }
                    if (!$key && $tag->hasAttribute('name')
                        && mb_strpos($tag->getAttribute('name'), 'og:') === 0)
                    {
                        $key = mb_strtolower(mb_substr($tag->getAttribute('name'), 3));
                    }
                    if ($key) {
                        $content = $tag->getAttribute('content');
                        $data[] = ['og:'.$key => $content];
                        $ogTags[$key] = $content;
                        $isOpenGraph = true;
                    }
                }
                foreach ($ogTags as $key => $tag) {
                    if ($this->isField($key) && !in_array($key, $reservedTags)) {
                        $this[$key] = $tag;
                    }
                }
                if (empty($this['title']) && $isOpenGraph) {
                    $titles = $doc->getElementsByTagName('title');
                    if ($titles->length > 0) {
                        $this['title'] = $titles->item(0)->textContent;
                    }
                }
                if (empty($this['description']) && $isOpenGraph) {
                    foreach ($metatags as $tag) {
                        if (mb_stripos($tag->getAttribute('name'), "description") !== false
                            || mb_stripos($tag->getAttribute('property'), "description") !== false)
                        {
                            $this['description'] = $tag->getAttribute('content');
                        }
                    }
                }
                $this['data'] = $data;
            }
    
            $this['is_opengraph'] = (int) $isOpenGraph;
        }
    
        /**
         * Renders a small box with the information of the opengraph url. Used in
         * blubber and in the forum.
         *
         * @return string html output of the box.
         */
        public function render()
        {
            if (!Config::get()->OPENGRAPH_ENABLE || !$this->getValue('is_opengraph')) {
                return '';
            }
            $template = $GLOBALS['template_factory']->open('shared/opengraphinfo_wide.php');
            $template->og = $this;
            return $template->render();
        }
    
        /**
         * Returns an array with all audiofiles that are provided by the opengraph-node.
         * Each array-entry is an array itself with the url as first parameter and the
         * content-type (important for <audio/> tags) as the second.
         *
         * @return array(array($url, $content_type), ...)
         */
        public function getAudioFiles()
        {
            return $this->getMediaFiles('audio');
        }
    
        /**
         * Returns an array with all videofiles that are provided by the opengraph-node.
         * Each array-entry is an array itself with the url as first parameter and the
         * content-type (important for <video/> tags) as the second.
         *
         * @return array(array($url, $content_type), ...)
         */
        public function getVideoFiles()
        {
            return $this->getMediaFiles('video');
        }
    
        /**
         * Returns an array with all mediafiles that are provided by the opengraph-node.
         * Each array-entry is an array itself with the url as first parameter and the
         * content-type (important for <audio/> or <video/> tags) as the second.
         *
         * @param string $type "audio" or "video"
         * @return array(array($url, $content_type), ...)
         */
        protected function getMediaFiles($type)
        {
            $files = [];
            $media = [];
            $secure_media = [];
            $media_types = [];
            foreach ($this['data'] as $meta) {
                foreach ($meta as $key => $value) {
                    switch ($key) {
                        case "og:{$type}:url":
                        case "og:{$type}":
                            $media[] = $value;
                            break;
                        case "og:{$type}:secure_url":
                            $secure_media[] = $value;
                            break;
                        case "og:{$type}:type":
                            $media_types[] = $value;
                            break;
                    }
                }
            }
            if (
                isset($_SERVER['HTTPS'])
                && $_SERVER['HTTPS'] === 'on'
                && count($secure_media) > 0
            ) {
                foreach ($secure_media as $index => $url) {
                    $files[] = [$url, $media_types[$index]];
                }
            } else {
                foreach ($media as $index => $url) {
                    $files[] = [$url, $media_types[$index]];
                }
            }
            return $files;
        }
    }