Bläddra i källkod

Add new entity lexers

Daniel Supernault 7 år sedan
förälder
incheckning
7bb1f10d19

+ 771 - 0
app/Util/Lexer/Autolink.php

@@ -0,0 +1,771 @@
+<?php
+
+/**
+ * @author     Mike Cochrane <mikec@mikenz.geek.nz>
+ * @author     Nick Pope <nick@nickpope.me.uk>
+ * @copyright  Copyright © 2010, Mike Cochrane, Nick Pope
+ * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
+ * @package    Twitter.Text
+ */
+
+namespace App\Util\Lexer;
+
+use App\Util\Lexer\Regex;
+use App\Util\Lexer\Extractor;
+use App\Util\Lexer\StringUtils;
+
+/**
+ * Twitter Autolink Class
+ *
+ * Parses tweets and generates HTML anchor tags around URLs, usernames,
+ * username/list pairs and hashtags.
+ *
+ * Originally written by {@link http://github.com/mikenz Mike Cochrane}, this
+ * is based on code by {@link http://github.com/mzsanford Matt Sanford} and
+ * heavily modified by {@link http://github.com/ngnpope Nick Pope}.
+ *
+ * @author     Mike Cochrane <mikec@mikenz.geek.nz>
+ * @author     Nick Pope <nick@nickpope.me.uk>
+ * @copyright  Copyright © 2010, Mike Cochrane, Nick Pope
+ * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
+ * @package    Twitter.Text
+ */
+class Autolink extends Regex
+{
+
+    /**
+     * CSS class for auto-linked URLs.
+     *
+     * @var  string
+     */
+    protected $class_url = '';
+
+    /**
+     * CSS class for auto-linked username URLs.
+     *
+     * @var  string
+     */
+    protected $class_user = 'u-url mention';
+
+    /**
+     * CSS class for auto-linked list URLs.
+     *
+     * @var  string
+     */
+    protected $class_list = 'u-url list-slug';
+
+    /**
+     * CSS class for auto-linked hashtag URLs.
+     *
+     * @var  string
+     */
+    protected $class_hash = 'u-url hashtag';
+
+    /**
+     * CSS class for auto-linked cashtag URLs.
+     *
+     * @var  string
+     */
+    protected $class_cash = 'u-url cashtag';
+
+    /**
+     * URL base for username links (the username without the @ will be appended).
+     *
+     * @var  string
+     */
+    protected $url_base_user = null;
+
+    /**
+     * URL base for list links (the username/list without the @ will be appended).
+     *
+     * @var  string
+     */
+    protected $url_base_list = null;
+
+    /**
+     * URL base for hashtag links (the hashtag without the # will be appended).
+     *
+     * @var  string
+     */
+    protected $url_base_hash = null;
+
+    /**
+     * URL base for cashtag links (the hashtag without the $ will be appended).
+     *
+     * @var  string
+     */
+    protected $url_base_cash = null;
+
+    /**
+     * Whether to include the value 'nofollow' in the 'rel' attribute.
+     *
+     * @var  bool
+     */
+    protected $nofollow = true;
+
+    /**
+     * Whether to include the value 'noopener' in the 'rel' attribute.
+     *
+     * @var  bool
+     */
+    protected $noopener = true;
+
+    /**
+     * Whether to include the value 'external' in the 'rel' attribute.
+     *
+     * Often this is used to be matched on in JavaScript for dynamically adding
+     * the 'target' attribute which is deprecated in HTML 4.01.  In HTML 5 it has
+     * been undeprecated and thus the 'target' attribute can be used.  If this is
+     * set to false then the 'target' attribute will be output.
+     *
+     * @var  bool
+     */
+    protected $external = true; 
+
+    /**
+     * The scope to open the link in.
+     *
+     * Support for the 'target' attribute was deprecated in HTML 4.01 but has
+     * since been reinstated in HTML 5.  To output the 'target' attribute you
+     * must disable the adding of the string 'external' to the 'rel' attribute.
+     *
+     * @var  string
+     */
+    protected $target = '_blank';
+
+    /**
+     * attribute for invisible span tag
+     *
+     * @var string
+     */
+    protected $invisibleTagAttrs = "style='position:absolute;left:-9999px;'";
+
+    /**
+     *
+     * @var Extractor
+     */
+    protected $extractor = null;
+
+    /**
+     * Provides fluent method chaining.
+     *
+     * @param  string  $tweet        The tweet to be converted.
+     * @param  bool    $full_encode  Whether to encode all special characters.
+     *
+     * @see  __construct()
+     *
+     * @return  Autolink
+     */
+    public static function create($tweet = null, $full_encode = false)
+    {
+        return new static($tweet, $full_encode);
+    }
+
+    /**
+     * Reads in a tweet to be parsed and converted to contain links.
+     *
+     * As the intent is to produce links and output the modified tweet to the
+     * user, we take this opportunity to ensure that we escape user input.
+     *
+     * @see  htmlspecialchars()
+     *
+     * @param  string  $tweet        The tweet to be converted.
+     * @param  bool    $escape       Whether to escape the tweet (default: true).
+     * @param  bool    $full_encode  Whether to encode all special characters.
+     */
+    public function __construct($tweet = null, $escape = true, $full_encode = false)
+    {
+        if ($escape && !empty($tweet)) {
+            if ($full_encode) {
+                parent::__construct(htmlentities($tweet, ENT_QUOTES, 'UTF-8', false));
+            } else {
+                parent::__construct(htmlspecialchars($tweet, ENT_QUOTES, 'UTF-8', false));
+            }
+        } else {
+            parent::__construct($tweet);
+        }
+        $this->extractor = Extractor::create();
+        $this->url_base_user = config('app.url') . '/';
+        $this->url_base_list = config('app.url') . '/';
+        $this->url_base_hash = config('app.url') . "/discover/tags/";
+        $this->url_base_cash = config('app.url') . '/search?q=%24';
+    }
+
+    /**
+     * CSS class for auto-linked URLs.
+     *
+     * @return  string  CSS class for URL links.
+     */
+    public function getURLClass()
+    {
+        return $this->class_url;
+    }
+
+    /**
+     * CSS class for auto-linked URLs.
+     *
+     * @param  string  $v  CSS class for URL links.
+     *
+     * @return  Autolink  Fluid method chaining.
+     */
+    public function setURLClass($v)
+    {
+        $this->class_url = trim($v);
+        return $this;
+    }
+
+    /**
+     * CSS class for auto-linked username URLs.
+     *
+     * @return  string  CSS class for username links.
+     */
+    public function getUsernameClass()
+    {
+        return $this->class_user;
+    }
+
+    /**
+     * CSS class for auto-linked username URLs.
+     *
+     * @param  string  $v  CSS class for username links.
+     *
+     * @return  Autolink  Fluid method chaining.
+     */
+    public function setUsernameClass($v)
+    {
+        $this->class_user = trim($v);
+        return $this;
+    }
+
+    /**
+     * CSS class for auto-linked username/list URLs.
+     *
+     * @return  string  CSS class for username/list links.
+     */
+    public function getListClass()
+    {
+        return $this->class_list;
+    }
+
+    /**
+     * CSS class for auto-linked username/list URLs.
+     *
+     * @param  string  $v  CSS class for username/list links.
+     *
+     * @return  Autolink  Fluid method chaining.
+     */
+    public function setListClass($v)
+    {
+        $this->class_list = trim($v);
+        return $this;
+    }
+
+    /**
+     * CSS class for auto-linked hashtag URLs.
+     *
+     * @return  string  CSS class for hashtag links.
+     */
+    public function getHashtagClass()
+    {
+        return $this->class_hash;
+    }
+
+    /**
+     * CSS class for auto-linked hashtag URLs.
+     *
+     * @param  string  $v  CSS class for hashtag links.
+     *
+     * @return  Autolink  Fluid method chaining.
+     */
+    public function setHashtagClass($v)
+    {
+        $this->class_hash = trim($v);
+        return $this;
+    }
+
+    /**
+     * CSS class for auto-linked cashtag URLs.
+     *
+     * @return  string  CSS class for cashtag links.
+     */
+    public function getCashtagClass()
+    {
+        return $this->class_cash;
+    }
+
+    /**
+     * CSS class for auto-linked cashtag URLs.
+     *
+     * @param  string  $v  CSS class for cashtag links.
+     *
+     * @return  Autolink  Fluid method chaining.
+     */
+    public function setCashtagClass($v)
+    {
+        $this->class_cash = trim($v);
+        return $this;
+    }
+
+    /**
+     * Whether to include the value 'nofollow' in the 'rel' attribute.
+     *
+     * @return  bool  Whether to add 'nofollow' to the 'rel' attribute.
+     */
+    public function getNoFollow()
+    {
+        return $this->nofollow;
+    }
+
+    /**
+     * Whether to include the value 'nofollow' in the 'rel' attribute.
+     *
+     * @param  bool  $v  The value to add to the 'target' attribute.
+     *
+     * @return  Autolink  Fluid method chaining.
+     */
+    public function setNoFollow($v)
+    {
+        $this->nofollow = $v;
+        return $this;
+    }
+
+    /**
+     * Whether to include the value 'external' in the 'rel' attribute.
+     *
+     * Often this is used to be matched on in JavaScript for dynamically adding
+     * the 'target' attribute which is deprecated in HTML 4.01.  In HTML 5 it has
+     * been undeprecated and thus the 'target' attribute can be used.  If this is
+     * set to false then the 'target' attribute will be output.
+     *
+     * @return  bool  Whether to add 'external' to the 'rel' attribute.
+     */
+    public function getExternal()
+    {
+        return $this->external;
+    }
+
+    /**
+     * Whether to include the value 'external' in the 'rel' attribute.
+     *
+     * Often this is used to be matched on in JavaScript for dynamically adding
+     * the 'target' attribute which is deprecated in HTML 4.01.  In HTML 5 it has
+     * been undeprecated and thus the 'target' attribute can be used.  If this is
+     * set to false then the 'target' attribute will be output.
+     *
+     * @param  bool  $v  The value to add to the 'target' attribute.
+     *
+     * @return  Autolink  Fluid method chaining.
+     */
+    public function setExternal($v)
+    {
+        $this->external = $v;
+        return $this;
+    }
+
+    /**
+     * The scope to open the link in.
+     *
+     * Support for the 'target' attribute was deprecated in HTML 4.01 but has
+     * since been reinstated in HTML 5.  To output the 'target' attribute you
+     * must disable the adding of the string 'external' to the 'rel' attribute.
+     *
+     * @return  string  The value to add to the 'target' attribute.
+     */
+    public function getTarget()
+    {
+        return $this->target;
+    }
+
+    /**
+     * The scope to open the link in.
+     *
+     * Support for the 'target' attribute was deprecated in HTML 4.01 but has
+     * since been reinstated in HTML 5.  To output the 'target' attribute you
+     * must disable the adding of the string 'external' to the 'rel' attribute.
+     *
+     * @param  string  $v  The value to add to the 'target' attribute.
+     *
+     * @return  Autolink  Fluid method chaining.
+     */
+    public function setTarget($v)
+    {
+        $this->target = trim($v);
+        return $this;
+    }
+
+    /**
+     * Autolink with entities
+     *
+     * @param string $tweet
+     * @param array $entities
+     * @return string
+     * @since 1.1.0
+     */
+    public function autoLinkEntities($tweet = null, $entities = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+
+        $text = '';
+        $beginIndex = 0;
+        foreach ($entities as $entity) {
+            if (isset($entity['screen_name'])) {
+                $text .= StringUtils::substr($tweet, $beginIndex, $entity['indices'][0] - $beginIndex + 1);
+            } else {
+                $text .= StringUtils::substr($tweet, $beginIndex, $entity['indices'][0] - $beginIndex);
+            }
+
+            if (isset($entity['url'])) {
+                $text .= $this->linkToUrl($entity);
+            } elseif (isset($entity['hashtag'])) {
+                $text .= $this->linkToHashtag($entity, $tweet);
+            } elseif (isset($entity['screen_name'])) {
+                $text .= $this->linkToMentionAndList($entity);
+            } elseif (isset($entity['cashtag'])) {
+                $text .= $this->linkToCashtag($entity, $tweet);
+            }
+            $beginIndex = $entity['indices'][1];
+        }
+        $text .= StringUtils::substr($tweet, $beginIndex, StringUtils::strlen($tweet));
+        return $text;
+    }
+
+    /**
+     * Auto-link hashtags, URLs, usernames and lists, with JSON entities.
+     *
+     * @param  string The tweet to be converted
+     * @param  mixed  The entities info
+     * @return string that auto-link HTML added
+     * @since 1.1.0
+     */
+    public function autoLinkWithJson($tweet = null, $json = null)
+    {
+        // concatenate entities
+        $entities = array();
+        if (is_object($json)) {
+            $json = $this->object2array($json);
+        }
+        if (is_array($json)) {
+            foreach ($json as $key => $vals) {
+                $entities = array_merge($entities, $json[$key]);
+            }
+        }
+
+        // map JSON entity to twitter-text entity
+        foreach ($entities as $idx => $entity) {
+            if (!empty($entity['text'])) {
+                $entities[$idx]['hashtag'] = $entity['text'];
+            }
+        }
+
+        $entities = $this->extractor->removeOverlappingEntities($entities);
+        return $this->autoLinkEntities($tweet, $entities);
+    }
+
+    /**
+     * convert Object to Array
+     *
+     * @param mixed $obj
+     * @return array
+     */
+    protected function object2array($obj)
+    {
+        $array = (array) $obj;
+        foreach ($array as $key => $var) {
+            if (is_object($var) || is_array($var)) {
+                $array[$key] = $this->object2array($var);
+            }
+        }
+        return $array;
+    }
+
+    /**
+     * Auto-link hashtags, URLs, usernames and lists.
+     *
+     * @param  string The tweet to be converted
+     * @return string that auto-link HTML added
+     * @since 1.1.0
+     */
+    public function autoLink($tweet = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+        $entities = $this->extractor->extractURLWithoutProtocol(false)->extractEntitiesWithIndices($tweet);
+        return $this->autoLinkEntities($tweet, $entities);
+    }
+
+    /**
+     * Auto-link the @username and @username/list references in the provided text. Links to @username references will
+     * have the usernameClass CSS classes added. Links to @username/list references will have the listClass CSS class
+     * added.
+     *
+     * @return string that auto-link HTML added
+     * @since 1.1.0
+     */
+    public function autoLinkUsernamesAndLists($tweet = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+        $entities = $this->extractor->extractMentionsOrListsWithIndices($tweet);
+        return $this->autoLinkEntities($tweet, $entities);
+    }
+
+    /**
+     * Auto-link #hashtag references in the provided Tweet text. The #hashtag links will have the hashtagClass CSS class
+     * added.
+     *
+     * @return string that auto-link HTML added
+     * @since 1.1.0
+     */
+    public function autoLinkHashtags($tweet = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+        $entities = $this->extractor->extractHashtagsWithIndices($tweet);
+        return $this->autoLinkEntities($tweet, $entities);
+    }
+
+    /**
+     * Auto-link URLs in the Tweet text provided.
+     * <p/>
+     * This only auto-links URLs with protocol.
+     *
+     * @return string that auto-link HTML added
+     * @since 1.1.0
+     */
+    public function autoLinkURLs($tweet = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+        $entities = $this->extractor->extractURLWithoutProtocol(false)->extractURLsWithIndices($tweet);
+        return $this->autoLinkEntities($tweet, $entities);
+    }
+
+    /**
+     * Auto-link $cashtag references in the provided Tweet text. The $cashtag links will have the cashtagClass CSS class
+     * added.
+     *
+     * @return string that auto-link HTML added
+     * @since 1.1.0
+     */
+    public function autoLinkCashtags($tweet = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+        $entities = $this->extractor->extractCashtagsWithIndices($tweet);
+        return $this->autoLinkEntities($tweet, $entities);
+    }
+
+    public function linkToUrl($entity)
+    {
+        if (!empty($this->class_url)) {
+            $attributes['class'] = $this->class_url;
+        }
+        $attributes['href'] = $entity['url'];
+        $linkText = $this->escapeHTML($entity['url']);
+
+        if (!empty($entity['display_url']) && !empty($entity['expanded_url'])) {
+            // Goal: If a user copies and pastes a tweet containing t.co'ed link, the resulting paste
+            // should contain the full original URL (expanded_url), not the display URL.
+            //
+            // Method: Whenever possible, we actually emit HTML that contains expanded_url, and use
+            // font-size:0 to hide those parts that should not be displayed (because they are not part of display_url).
+            // Elements with font-size:0 get copied even though they are not visible.
+            // Note that display:none doesn't work here. Elements with display:none don't get copied.
+            //
+            // Additionally, we want to *display* ellipses, but we don't want them copied.  To make this happen we
+            // wrap the ellipses in a tco-ellipsis class and provide an onCopy handler that sets display:none on
+            // everything with the tco-ellipsis class.
+            //
+            // As an example: The user tweets "hi http://longdomainname.com/foo"
+            // This gets shortened to "hi http://t.co/xyzabc", with display_url = "…nname.com/foo"
+            // This will get rendered as:
+            // <span class='tco-ellipsis'> <!-- This stuff should get displayed but not copied -->
+            //   …
+            //   <!-- There's a chance the onCopy event handler might not fire. In case that happens,
+            //        we include an &nbsp; here so that the … doesn't bump up against the URL and ruin it.
+            //        The &nbsp; is inside the tco-ellipsis span so that when the onCopy handler *does*
+            //        fire, it doesn't get copied.  Otherwise the copied text would have two spaces in a row,
+            //        e.g. "hi  http://longdomainname.com/foo".
+            //   <span style='font-size:0'>&nbsp;</span>
+            // </span>
+            // <span style='font-size:0'>  <!-- This stuff should get copied but not displayed -->
+            //   http://longdomai
+            // </span>
+            // <span class='js-display-url'> <!-- This stuff should get displayed *and* copied -->
+            //   nname.com/foo
+            // </span>
+            // <span class='tco-ellipsis'> <!-- This stuff should get displayed but not copied -->
+            //   <span style='font-size:0'>&nbsp;</span>
+            //   …
+            // </span>
+            //
+            // Exception: pic.socialhub.dev images, for which expandedUrl = "https://socialhub.dev/#!/username/status/1234/photo/1
+            // For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts.
+            // For a pic.socialhub.dev URL, the only elided part will be the "https://", so this is fine.
+            $displayURL = $entity['display_url'];
+            $expandedURL = $entity['expanded_url'];
+            $displayURLSansEllipses = preg_replace('/…/u', '', $displayURL);
+            $diplayURLIndexInExpandedURL = mb_strpos($expandedURL, $displayURLSansEllipses);
+
+            if ($diplayURLIndexInExpandedURL !== false) {
+                $beforeDisplayURL = mb_substr($expandedURL, 0, $diplayURLIndexInExpandedURL);
+                $afterDisplayURL = mb_substr($expandedURL, $diplayURLIndexInExpandedURL + mb_strlen($displayURLSansEllipses));
+                $precedingEllipsis = (preg_match('/\A…/u', $displayURL)) ? '…' : '';
+                $followingEllipsis = (preg_match('/…\z/u', $displayURL)) ? '…' : '';
+
+                $invisibleSpan = "<span {$this->invisibleTagAttrs}>";
+
+                $linkText = "<span class='tco-ellipsis'>{$precedingEllipsis}{$invisibleSpan}&nbsp;</span></span>";
+                $linkText .= "{$invisibleSpan}{$this->escapeHTML($beforeDisplayURL)}</span>";
+                $linkText .= "<span class='js-display-url'>{$this->escapeHTML($displayURLSansEllipses)}</span>";
+                $linkText .= "{$invisibleSpan}{$this->escapeHTML($afterDisplayURL)}</span>";
+                $linkText .= "<span class='tco-ellipsis'>{$invisibleSpan}&nbsp;</span>{$followingEllipsis}</span>";
+            } else {
+                $linkText = $entity['display_url'];
+            }
+            $attributes['title'] = $entity['expanded_url'];
+        } elseif (!empty($entity['display_url'])) {
+            $linkText = $entity['display_url'];
+        }
+
+        return $this->linkToText($entity, $linkText, $attributes);
+    }
+
+    /**
+     *
+     * @param array  $entity
+     * @param string $tweet
+     * @return string
+     * @since 1.1.0
+     */
+    public function linkToHashtag($entity, $tweet = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+        $this->target = false;
+        $attributes = array();
+        $class = array();
+        $hash = StringUtils::substr($tweet, $entity['indices'][0], 1);
+        $linkText = $hash . $entity['hashtag'];
+
+        $attributes['href'] = $this->url_base_hash . $entity['hashtag'] . '?src=hash';
+        $attributes['title'] = '#' . $entity['hashtag'];
+        if (!empty($this->class_hash)) {
+            $class[] = $this->class_hash;
+        }
+        if (preg_match(self::$patterns['rtl_chars'], $linkText)) {
+            $class[] = 'rtl';
+        }
+        if (!empty($class)) {
+            $attributes['class'] = join(' ', $class);
+        }
+
+        return $this->linkToText($entity, $linkText, $attributes);
+    }
+
+    /**
+     *
+     * @param array  $entity
+     * @return string
+     * @since 1.1.0
+     */
+    public function linkToMentionAndList($entity)
+    {
+        $attributes = array();
+
+        if (!empty($entity['list_slug'])) {
+            # Replace the list and username
+            $linkText = $entity['screen_name'] . $entity['list_slug'];
+            $class = $this->class_list;
+            $url = $this->url_base_list . $linkText;
+        } else {
+            # Replace the username
+            $linkText = $entity['screen_name'];
+            $class = $this->class_user;
+            $url = $this->url_base_user . $linkText;
+        }
+        if (!empty($class)) {
+            $attributes['class'] = $class;
+        }
+        $attributes['href'] = $url;
+
+        return $this->linkToText($entity, $linkText, $attributes);
+    }
+
+    /**
+     *
+     * @param array  $entity
+     * @param string $tweet
+     * @return string
+     * @since 1.1.0
+     */
+    public function linkToCashtag($entity, $tweet = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+        $attributes = array();
+        $doller = StringUtils::substr($tweet, $entity['indices'][0], 1);
+        $linkText = $doller . $entity['cashtag'];
+        $attributes['href'] = $this->url_base_cash . $entity['cashtag'];
+        $attributes['title'] = $linkText;
+        if (!empty($this->class_cash)) {
+            $attributes['class'] = $this->class_cash;
+        }
+
+        return $this->linkToText($entity, $linkText, $attributes);
+    }
+
+    /**
+     *
+     * @param array $entity
+     * @param string $text
+     * @param array $attributes
+     * @return string
+     * @since 1.1.0
+     */
+    public function linkToText(array $entity, $text, $attributes = array())
+    {
+        $rel = array();
+        if ($this->external) {
+            $rel[] = 'external';
+        }
+        if ($this->nofollow) {
+            $rel[] = 'nofollow';
+        }
+        if ($this->noopener) {
+            $rel[] = 'noopener';
+        }
+        if (!empty($rel)) {
+            $attributes['rel'] = join(' ', $rel);
+        }
+        if ($this->target) {
+            $attributes['target'] = $this->target;
+        }
+        $link = '<a';
+        foreach ($attributes as $key => $val) {
+            $link .= ' ' . $key . '="' . $this->escapeHTML($val) . '"';
+        }
+        $link .= '>' . $text . '</a>';
+        return $link;
+    }
+
+    /**
+     * html escape
+     *
+     * @param string $text
+     * @return string
+     */
+    protected function escapeHTML($text)
+    {
+        return htmlspecialchars($text, ENT_QUOTES, 'UTF-8', false);
+    }
+}

+ 548 - 0
app/Util/Lexer/Extractor.php

@@ -0,0 +1,548 @@
+<?php
+
+/**
+ * @author     Mike Cochrane <mikec@mikenz.geek.nz>
+ * @author     Nick Pope <nick@nickpope.me.uk>
+ * @copyright  Copyright © 2010, Mike Cochrane, Nick Pope
+ * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
+ * @package    Twitter.Text
+ */
+
+namespace App\Util\Lexer;
+
+use App\Util\Lexer\Regex;
+use App\Util\Lexer\StringUtils;
+
+/**
+ * Twitter Extractor Class
+ *
+ * Parses tweets and extracts URLs, usernames, username/list pairs and
+ * hashtags.
+ *
+ * Originally written by {@link http://github.com/mikenz Mike Cochrane}, this
+ * is based on code by {@link http://github.com/mzsanford Matt Sanford} and
+ * heavily modified by {@link http://github.com/ngnpope Nick Pope}.
+ *
+ * @author     Mike Cochrane <mikec@mikenz.geek.nz>
+ * @author     Nick Pope <nick@nickpope.me.uk>
+ * @copyright  Copyright © 2010, Mike Cochrane, Nick Pope
+ * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
+ * @package    Twitter.Text
+ */
+class Extractor extends Regex
+{
+
+    /**
+     * @var boolean
+     */
+    protected $extractURLWithoutProtocol = true;
+
+    /**
+     * Provides fluent method chaining.
+     *
+     * @param  string  $tweet        The tweet to be converted.
+     *
+     * @see  __construct()
+     *
+     * @return  Extractor
+     */
+    public static function create($tweet = null)
+    {
+        return new self($tweet);
+    }
+
+    /**
+     * Reads in a tweet to be parsed and extracts elements from it.
+     *
+     * Extracts various parts of a tweet including URLs, usernames, hashtags...
+     *
+     * @param  string  $tweet  The tweet to extract.
+     */
+    public function __construct($tweet = null)
+    {
+        parent::__construct($tweet);
+    }
+
+    /**
+     * Extracts all parts of a tweet and returns an associative array containing
+     * the extracted elements.
+     *
+     * @param  string  $tweet  The tweet to extract.
+     * @return  array  The elements in the tweet.
+     */
+    public function extract($tweet = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+        return array(
+            'hashtags' => $this->extractHashtags($tweet),
+            'urls' => $this->extractURLs($tweet),
+            'mentions' => $this->extractMentionedUsernames($tweet),
+            'replyto' => $this->extractRepliedUsernames($tweet),
+            'hashtags_with_indices' => $this->extractHashtagsWithIndices($tweet),
+            'urls_with_indices' => $this->extractURLsWithIndices($tweet),
+            'mentions_with_indices' => $this->extractMentionedUsernamesWithIndices($tweet),
+        );
+    }
+
+    /**
+     * Extract URLs, @mentions, lists and #hashtag from a given text/tweet.
+     *
+     * @param  string  $tweet  The tweet to extract.
+     * @return array list of extracted entities
+     */
+    public function extractEntitiesWithIndices($tweet = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+        $entities = array();
+        $entities = array_merge($entities, $this->extractURLsWithIndices($tweet));
+        $entities = array_merge($entities, $this->extractHashtagsWithIndices($tweet, false));
+        $entities = array_merge($entities, $this->extractMentionsOrListsWithIndices($tweet));
+        $entities = array_merge($entities, $this->extractCashtagsWithIndices($tweet));
+        $entities = $this->removeOverlappingEntities($entities);
+        return $entities;
+    }
+
+    /**
+     * Extracts all the hashtags from the tweet.
+     *
+     * @param  string  $tweet  The tweet to extract.
+     * @return  array  The hashtag elements in the tweet.
+     */
+    public function extractHashtags($tweet = null)
+    {
+        $hashtagsOnly = array();
+        $hashtagsWithIndices = $this->extractHashtagsWithIndices($tweet);
+
+        foreach ($hashtagsWithIndices as $hashtagWithIndex) {
+            $hashtagsOnly[] = $hashtagWithIndex['hashtag'];
+        }
+        return $hashtagsOnly;
+    }
+
+    /**
+     * Extracts all the cashtags from the tweet.
+     *
+     * @param  string  $tweet  The tweet to extract.
+     * @return  array  The cashtag elements in the tweet.
+     */
+    public function extractCashtags($tweet = null)
+    {
+        $cashtagsOnly = array();
+        $cashtagsWithIndices = $this->extractCashtagsWithIndices($tweet);
+
+        foreach ($cashtagsWithIndices as $cashtagWithIndex) {
+            $cashtagsOnly[] = $cashtagWithIndex['cashtag'];
+        }
+        return $cashtagsOnly;
+    }
+
+    /**
+     * Extracts all the URLs from the tweet.
+     *
+     * @param  string  $tweet  The tweet to extract.
+     * @return  array  The URL elements in the tweet.
+     */
+    public function extractURLs($tweet = null)
+    {
+        $urlsOnly = array();
+        $urlsWithIndices = $this->extractURLsWithIndices($tweet);
+
+        foreach ($urlsWithIndices as $urlWithIndex) {
+            $urlsOnly[] = $urlWithIndex['url'];
+        }
+        return $urlsOnly;
+    }
+
+    /**
+     * Extract all the usernames from the tweet.
+     *
+     * A mention is an occurrence of a username anywhere in a tweet.
+     *
+     * @param  string  $tweet  The tweet to extract.
+     * @return  array  The usernames elements in the tweet.
+     */
+    public function extractMentionedScreennames($tweet = null)
+    {
+        $usernamesOnly = array();
+        $mentionsWithIndices = $this->extractMentionsOrListsWithIndices($tweet);
+
+        foreach ($mentionsWithIndices as $mentionWithIndex) {
+            $screen_name = mb_strtolower($mentionWithIndex['screen_name']);
+            if (empty($screen_name) OR in_array($screen_name, $usernamesOnly)) {
+                continue;
+            }
+            $usernamesOnly[] = $screen_name;
+        }
+        return $usernamesOnly;
+    }
+
+    /**
+     * Extract all the usernames from the tweet.
+     *
+     * A mention is an occurrence of a username anywhere in a tweet.
+     *
+     * @return  array  The usernames elements in the tweet.
+     * @deprecated since version 1.1.0
+     */
+    public function extractMentionedUsernames($tweet)
+    {
+        $this->tweet = $tweet;
+        return $this->extractMentionedScreennames($tweet);
+    }
+
+    /**
+     * Extract all the usernames replied to from the tweet.
+     *
+     * A reply is an occurrence of a username at the beginning of a tweet.
+     *
+     * @param  string  $tweet  The tweet to extract.
+     * @return  array  The usernames replied to in a tweet.
+     */
+    public function extractReplyScreenname($tweet = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+        $matched = preg_match(self::$patterns['valid_reply'], $tweet, $matches);
+        # Check username ending in
+        if ($matched && preg_match(self::$patterns['end_mention_match'], $matches[2])) {
+            $matched = false;
+        }
+        return $matched ? $matches[1] : null;
+    }
+
+    /**
+     * Extract all the usernames replied to from the tweet.
+     *
+     * A reply is an occurrence of a username at the beginning of a tweet.
+     *
+     * @return  array  The usernames replied to in a tweet.
+     * @deprecated since version 1.1.0
+     */
+    public function extractRepliedUsernames()
+    {
+        return $this->extractReplyScreenname();
+    }
+
+    /**
+     * Extracts all the hashtags and the indices they occur at from the tweet.
+     *
+     * @param  string  $tweet  The tweet to extract.
+     * @param boolean $checkUrlOverlap if true, check if extracted hashtags overlap URLs and remove overlapping ones
+     * @return  array  The hashtag elements in the tweet.
+     */
+    public function extractHashtagsWithIndices($tweet = null, $checkUrlOverlap = true)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+
+        if (!preg_match('/[##]/iu', $tweet)) {
+            return array();
+        }
+
+        preg_match_all(self::$patterns['valid_hashtag'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
+        $tags = array();
+
+        foreach ($matches as $match) {
+            list($all, $before, $hash, $hashtag, $outer) = array_pad($match, 3, array('', 0));
+            $start_position = $hash[1] > 0 ? StringUtils::strlen(substr($tweet, 0, $hash[1])) : $hash[1];
+            $end_position = $start_position + StringUtils::strlen($hash[0] . $hashtag[0]);
+
+            if (preg_match(self::$patterns['end_hashtag_match'], $outer[0])) {
+                continue;
+            }
+
+            $tags[] = array(
+                'hashtag' => $hashtag[0],
+                'indices' => array($start_position, $end_position)
+            );
+        }
+
+        if (!$checkUrlOverlap) {
+            return $tags;
+        }
+
+        # check url overlap
+        $urls = $this->extractURLsWithIndices($tweet);
+        $entities = $this->removeOverlappingEntities(array_merge($tags, $urls));
+
+        $validTags = array();
+        foreach ($entities as $entity) {
+            if (empty($entity['hashtag'])) {
+                continue;
+            }
+            $validTags[] = $entity;
+        }
+
+        return $validTags;
+    }
+
+    /**
+     * Extracts all the cashtags and the indices they occur at from the tweet.
+     *
+     * @param  string  $tweet  The tweet to extract.
+     * @return  array  The cashtag elements in the tweet.
+     */
+    public function extractCashtagsWithIndices($tweet = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+
+        if (!preg_match('/\$/iu', $tweet)) {
+            return array();
+        }
+
+        preg_match_all(self::$patterns['valid_cashtag'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
+        $tags = array();
+
+        foreach ($matches as $match) {
+            list($all, $before, $dollar, $cash_text, $outer) = array_pad($match, 3, array('', 0));
+            $start_position = $dollar[1] > 0 ? StringUtils::strlen(substr($tweet, 0, $dollar[1])) : $dollar[1];
+            $end_position = $start_position + StringUtils::strlen($dollar[0] . $cash_text[0]);
+
+            if (preg_match(self::$patterns['end_hashtag_match'], $outer[0])) {
+                continue;
+            }
+
+            $tags[] = array(
+                'cashtag' => $cash_text[0],
+                'indices' => array($start_position, $end_position)
+            );
+        }
+
+        return $tags;
+    }
+
+    /**
+     * Extracts all the URLs and the indices they occur at from the tweet.
+     *
+     * @param  string  $tweet  The tweet to extract.
+     * @return  array  The URLs elements in the tweet.
+     */
+    public function extractURLsWithIndices($tweet = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+
+        $needle = $this->extractURLWithoutProtocol() ? '.' : ':';
+        if (strpos($tweet, $needle) === false) {
+            return array();
+        }
+
+        $urls = array();
+        preg_match_all(self::$patterns['valid_url'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
+
+        foreach ($matches as $match) {
+            list($all, $before, $url, $protocol, $domain, $port, $path, $query) = array_pad($match, 8, array(''));
+            $start_position = $url[1] > 0 ? StringUtils::strlen(substr($tweet, 0, $url[1])) : $url[1];
+            $end_position = $start_position + StringUtils::strlen($url[0]);
+
+            $all = $all[0];
+            $before = $before[0];
+            $url = $url[0];
+            $protocol = $protocol[0];
+            $domain = $domain[0];
+            $port = $port[0];
+            $path = $path[0];
+            $query = $query[0];
+
+            // If protocol is missing and domain contains non-ASCII characters,
+            // extract ASCII-only domains.
+            if (empty($protocol)) {
+                if (!$this->extractURLWithoutProtocol || preg_match(self::$patterns['invalid_url_without_protocol_preceding_chars'], $before)) {
+                    continue;
+                }
+
+                $last_url = null;
+                $ascii_end_position = 0;
+
+                if (preg_match(self::$patterns['valid_ascii_domain'], $domain, $asciiDomain)) {
+                    $asciiDomain[0] = preg_replace('/' . preg_quote($domain, '/') . '/u', $asciiDomain[0], $url);
+                    $ascii_start_position = StringUtils::strpos($domain, $asciiDomain[0], $ascii_end_position);
+                    $ascii_end_position = $ascii_start_position + StringUtils::strlen($asciiDomain[0]);
+                    $last_url = array(
+                        'url' => $asciiDomain[0],
+                        'indices' => array($start_position + $ascii_start_position, $start_position + $ascii_end_position),
+                    );
+                    if (!empty($path)
+                        || preg_match(self::$patterns['valid_special_short_domain'], $asciiDomain[0])
+                        || !preg_match(self::$patterns['invalid_short_domain'], $asciiDomain[0])) {
+                        $urls[] = $last_url;
+                    }
+                }
+
+                // no ASCII-only domain found. Skip the entire URL
+                if (empty($last_url)) {
+                    continue;
+                }
+
+                // $last_url only contains domain. Need to add path and query if they exist.
+                if (!empty($path)) {
+                    // last_url was not added. Add it to urls here.
+                    $last_url['url'] = preg_replace('/' . preg_quote($domain, '/') . '/u', $last_url['url'], $url);
+                    $last_url['indices'][1] = $end_position;
+                }
+            } else {
+                // In the case of t.co URLs, don't allow additional path characters
+                if (preg_match(self::$patterns['valid_tco_url'], $url, $tcoUrlMatches)) {
+                    $url = $tcoUrlMatches[0];
+                    $end_position = $start_position + StringUtils::strlen($url);
+                }
+                $urls[] = array(
+                    'url' => $url,
+                    'indices' => array($start_position, $end_position),
+                );
+            }
+        }
+
+        return $urls;
+    }
+
+    /**
+     * Extracts all the usernames and the indices they occur at from the tweet.
+     *
+     * @param  string  $tweet  The tweet to extract.
+     * @return  array  The username elements in the tweet.
+     */
+    public function extractMentionedScreennamesWithIndices($tweet = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+
+        $usernamesOnly = array();
+        $mentions = $this->extractMentionsOrListsWithIndices($tweet);
+        foreach ($mentions as $mention) {
+            if (isset($mention['list_slug'])) {
+                unset($mention['list_slug']);
+            }
+            $usernamesOnly[] = $mention;
+        }
+        return $usernamesOnly;
+    }
+
+    /**
+     * Extracts all the usernames and the indices they occur at from the tweet.
+     *
+     * @return  array  The username elements in the tweet.
+     * @deprecated since version 1.1.0
+     */
+    public function extractMentionedUsernamesWithIndices()
+    {
+        return $this->extractMentionedScreennamesWithIndices();
+    }
+
+    /**
+     * Extracts all the usernames and the indices they occur at from the tweet.
+     *
+     * @param  string  $tweet  The tweet to extract.
+     * @return  array  The username elements in the tweet.
+     */
+    public function extractMentionsOrListsWithIndices($tweet = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+
+        if (!preg_match('/[@@]/iu', $tweet)) {
+            return array();
+        }
+
+        preg_match_all(self::$patterns['valid_mentions_or_lists'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
+        $results = array();
+
+        foreach ($matches as $match) {
+            list($all, $before, $at, $username, $list_slug, $outer) = array_pad($match, 6, array('', 0));
+            $start_position = $at[1] > 0 ? StringUtils::strlen(substr($tweet, 0, $at[1])) : $at[1];
+            $end_position = $start_position + StringUtils::strlen($at[0]) + StringUtils::strlen($username[0]);
+            $entity = array(
+                'screen_name' => $username[0],
+                'list_slug' => $list_slug[0],
+                'indices' => array($start_position, $end_position),
+            );
+
+            if (preg_match(self::$patterns['end_mention_match'], $outer[0])) {
+                continue;
+            }
+
+            if (!empty($list_slug[0])) {
+                $entity['indices'][1] = $end_position + StringUtils::strlen($list_slug[0]);
+            }
+
+            $results[] = $entity;
+        }
+
+        return $results;
+    }
+
+    /**
+     * Extracts all the usernames and the indices they occur at from the tweet.
+     *
+     * @return  array  The username elements in the tweet.
+     * @deprecated since version 1.1.0
+     */
+    public function extractMentionedUsernamesOrListsWithIndices()
+    {
+        return $this->extractMentionsOrListsWithIndices();
+    }
+
+    /**
+     * setter/getter for extractURLWithoutProtocol
+     *
+     * @param boolean $flag
+     * @return Extractor
+     */
+    public function extractURLWithoutProtocol($flag = null)
+    {
+        if (is_null($flag)) {
+            return $this->extractURLWithoutProtocol;
+        }
+        $this->extractURLWithoutProtocol = (bool) $flag;
+        return $this;
+    }
+
+    /**
+     * Remove overlapping entities.
+     * This returns a new array with no overlapping entities.
+     *
+     * @param array $entities
+     * @return array
+     */
+    public function removeOverlappingEntities($entities)
+    {
+        $result = array();
+        usort($entities, array($this, 'sortEntites'));
+
+        $prev = null;
+        foreach ($entities as $entity) {
+            if (isset($prev) && $entity['indices'][0] < $prev['indices'][1]) {
+                continue;
+            }
+            $prev = $entity;
+            $result[] = $entity;
+        }
+        return $result;
+    }
+
+    /**
+     * sort by entity start index
+     *
+     * @param array $a
+     * @param array $b
+     * @return int
+     */
+    protected function sortEntites($a, $b)
+    {
+        if ($a['indices'][0] == $b['indices'][0]) {
+            return 0;
+        }
+        return ($a['indices'][0] < $b['indices'][0]) ? -1 : 1;
+    }
+}

+ 202 - 0
app/Util/Lexer/HitHighlighter.php

@@ -0,0 +1,202 @@
+<?php
+
+/**
+ * @author     Nick Pope <nick@nickpope.me.uk>
+ * @copyright  Copyright © 2010, Nick Pope
+ * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
+ * @package    Twitter.Text
+ */
+
+namespace App\Util\Lexer;
+
+use App\Util\Lexer\Regex;
+use App\Util\Lexer\StringUtils;
+
+/**
+ * Twitter HitHighlighter Class
+ *
+ * Performs "hit highlighting" on tweets that have been auto-linked already.
+ * Useful with the results returned from the search API.
+ *
+ * Originally written by {@link http://github.com/mikenz Mike Cochrane}, this
+ * is based on code by {@link http://github.com/mzsanford Matt Sanford} and
+ * heavily modified by {@link http://github.com/ngnpope Nick Pope}.
+ *
+ * @author     Nick Pope <nick@nickpope.me.uk>
+ * @copyright  Copyright © 2010, Nick Pope
+ * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
+ * @package    Twitter.Text
+ */
+class HitHighlighter extends Regex
+{
+
+    /**
+     * The tag to surround hits with.
+     *
+     * @var  string
+     */
+    protected $tag = 'em';
+
+    /**
+     * Provides fluent method chaining.
+     *
+     * @param  string  $tweet        The tweet to be hit highlighted.
+     * @param  bool    $full_encode  Whether to encode all special characters.
+     *
+     * @see  __construct()
+     *
+     * @return  HitHighlighter
+     */
+    public static function create($tweet = null, $full_encode = false)
+    {
+        return new self($tweet, $full_encode);
+    }
+
+    /**
+     * Reads in a tweet to be parsed and hit highlighted.
+     *
+     * We take this opportunity to ensure that we escape user input.
+     *
+     * @see  htmlspecialchars()
+     *
+     * @param  string  $tweet        The tweet to be hit highlighted.
+     * @param  bool    $escape       Whether to escape the tweet (default: true).
+     * @param  bool    $full_encode  Whether to encode all special characters.
+     */
+    public function __construct($tweet = null, $escape = true, $full_encode = false)
+    {
+        if (!empty($tweet) && $escape) {
+            if ($full_encode) {
+                parent::__construct(htmlentities($tweet, ENT_QUOTES, 'UTF-8', false));
+            } else {
+                parent::__construct(htmlspecialchars($tweet, ENT_QUOTES, 'UTF-8', false));
+            }
+        } else {
+            parent::__construct($tweet);
+        }
+    }
+
+    /**
+     * Set the highlighting tag to surround hits with.  The default tag is 'em'.
+     *
+     * @return  string  The tag name.
+     */
+    public function getTag()
+    {
+        return $this->tag;
+    }
+
+    /**
+     * Set the highlighting tag to surround hits with.  The default tag is 'em'.
+     *
+     * @param  string  $v  The tag name.
+     *
+     * @return  HitHighlighter  Fluid method chaining.
+     */
+    public function setTag($v)
+    {
+        $this->tag = $v;
+        return $this;
+    }
+
+    /**
+     * Hit highlights the tweet.
+     *
+     * @param string $tweet The tweet to be hit highlighted.
+     * @param array  $hits  An array containing the start and end index pairs
+     *                        for the highlighting.
+     * @param bool   $escape      Whether to escape the tweet (default: true).
+     * @param bool   $full_encode  Whether to encode all special characters.
+     *
+     * @return  string  The hit highlighted tweet.
+     */
+    public function highlight($tweet = null, array $hits = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+        if (empty($hits)) {
+            return $tweet;
+        }
+        $highlightTweet = '';
+        $tags = array('<' . $this->tag . '>', '</' . $this->tag . '>');
+        # Check whether we can simply replace or whether we need to chunk...
+        if (strpos($tweet, '<') === false) {
+            $ti = 0; // tag increment (for added tags)
+            $highlightTweet = $tweet;
+            foreach ($hits as $hit) {
+                $highlightTweet = StringUtils::substrReplace($highlightTweet, $tags[0], $hit[0] + $ti, 0);
+                $ti += StringUtils::strlen($tags[0]);
+                $highlightTweet = StringUtils::substrReplace($highlightTweet, $tags[1], $hit[1] + $ti, 0);
+                $ti += StringUtils::strlen($tags[1]);
+            }
+        } else {
+            $chunks = preg_split('/[<>]/iu', $tweet);
+            $chunk = $chunks[0];
+            $chunk_index = 0;
+            $chunk_cursor = 0;
+            $offset = 0;
+            $start_in_chunk = false;
+            # Flatten the multidimensional hits array:
+            $hits_flat = array();
+            foreach ($hits as $hit) {
+                $hits_flat = array_merge($hits_flat, $hit);
+            }
+            # Loop over the hit indices:
+            for ($index = 0; $index < count($hits_flat); $index++) {
+                $hit = $hits_flat[$index];
+                $tag = $tags[$index % 2];
+                $placed = false;
+                while ($chunk !== null && $hit >= ($i = $offset + StringUtils::strlen($chunk))) {
+                    $highlightTweet .= StringUtils::substr($chunk, $chunk_cursor);
+                    if ($start_in_chunk && $hit === $i) {
+                        $highlightTweet .= $tag;
+                        $placed = true;
+                    }
+                    if (isset($chunks[$chunk_index + 1])) {
+                        $highlightTweet .= '<' . $chunks[$chunk_index + 1] . '>';
+                    }
+                    $offset += StringUtils::strlen($chunk);
+                    $chunk_cursor = 0;
+                    $chunk_index += 2;
+                    $chunk = (isset($chunks[$chunk_index]) ? $chunks[$chunk_index] : null);
+                    $start_in_chunk = false;
+                }
+                if (!$placed && $chunk !== null) {
+                    $hit_spot = $hit - $offset;
+                    $highlightTweet .= StringUtils::substr($chunk, $chunk_cursor, $hit_spot - $chunk_cursor) . $tag;
+                    $chunk_cursor = $hit_spot;
+                    $start_in_chunk = ($index % 2 === 0);
+                    $placed = true;
+                }
+                # Ultimate fallback - hits that run off the end get a closing tag:
+                if (!$placed) {
+                    $highlightTweet .= $tag;
+                }
+            }
+            if ($chunk !== null) {
+                if ($chunk_cursor < StringUtils::strlen($chunk)) {
+                    $highlightTweet .= StringUtils::substr($chunk, $chunk_cursor);
+                }
+                for ($index = $chunk_index + 1; $index < count($chunks); $index++) {
+                    $highlightTweet .= ($index % 2 === 0 ? $chunks[$index] : '<' . $chunks[$index] . '>');
+                }
+            }
+        }
+        return $highlightTweet;
+    }
+
+    /**
+     * Hit highlights the tweet.
+     *
+     * @param  array  $hits  An array containing the start and end index pairs
+     *                       for the highlighting.
+     *
+     * @return  string  The hit highlighted tweet.
+     * @deprecated since version 1.1.0
+     */
+    public function addHitHighlighting(array $hits)
+    {
+        return $this->highlight($this->tweet, $hits);
+    }
+}

+ 348 - 0
app/Util/Lexer/LooseAutolink.php

@@ -0,0 +1,348 @@
+<?php
+
+/**
+ * @author     Mike Cochrane <mikec@mikenz.geek.nz>
+ * @author     Nick Pope <nick@nickpope.me.uk>
+ * @author     Takashi Nojima
+ * @copyright  Copyright 2014 Mike Cochrane, Nick Pope, Takashi Nojima
+ * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
+ * @package    Twitter.Text
+ */
+
+namespace App\Util\Lexer;
+
+use App\Util\Lexer\Autolink;
+
+/**
+ * Twitter LooseAutolink Class
+ *
+ * Parses tweets and generates HTML anchor tags around URLs, usernames,
+ * username/list pairs and hashtags.
+ *
+ * Originally written by {@link http://github.com/mikenz Mike Cochrane}, this
+ * is based on code by {@link http://github.com/mzsanford Matt Sanford} and
+ * heavily modified by {@link http://github.com/ngnpope Nick Pope}.
+ *
+ * @author     Mike Cochrane <mikec@mikenz.geek.nz>
+ * @author     Nick Pope <nick@nickpope.me.uk>
+ * @author     Takashi Nojima
+ * @copyright  Copyright 2014 Mike Cochrane, Nick Pope, Takashi Nojima
+ * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
+ * @package    Twitter.Text
+ * @since      1.8.0
+ * @deprecated since version 1.9.0
+ */
+class LooseAutolink extends Autolink
+{
+
+    /**
+     * Auto-link hashtags, URLs, usernames and lists.
+     *
+     * @param  string The tweet to be converted
+     * @return string that auto-link HTML added
+     * @deprecated since version 1.9.0
+     */
+    public function autoLink($tweet = null)
+    {
+        if (!is_null($tweet)) {
+            $this->tweet = $tweet;
+        }
+        return $this->addLinks();
+    }
+
+    /**
+     * Auto-link the @username and @username/list references in the provided text. Links to @username references will
+     * have the usernameClass CSS classes added. Links to @username/list references will have the listClass CSS class
+     * added.
+     *
+     * @return string that auto-link HTML added
+     */
+    public function autoLinkUsernamesAndLists($tweet = null)
+    {
+        if (!is_null($tweet)) {
+            $this->tweet = $tweet;
+        }
+        return $this->addLinksToUsernamesAndLists();
+    }
+
+    /**
+     * Auto-link #hashtag references in the provided Tweet text. The #hashtag links will have the hashtagClass CSS class
+     * added.
+     *
+     * @return string that auto-link HTML added
+     */
+    public function autoLinkHashtags($tweet = null)
+    {
+        if (!is_null($tweet)) {
+            $this->tweet = $tweet;
+        }
+        return $this->addLinksToHashtags();
+    }
+
+    /**
+     * Auto-link URLs in the Tweet text provided.
+     * <p/>
+     * This only auto-links URLs with protocol.
+     *
+     * @return string that auto-link HTML added
+     */
+    public function autoLinkURLs($tweet = null)
+    {
+        if (!is_null($tweet)) {
+            $this->tweet = $tweet;
+        }
+        return $this->addLinksToURLs();
+    }
+
+    /**
+     * Auto-link $cashtag references in the provided Tweet text. The $cashtag links will have the cashtagClass CSS class
+     * added.
+     *
+     * @return string that auto-link HTML added
+     */
+    public function autoLinkCashtags($tweet = null)
+    {
+        if (!is_null($tweet)) {
+            $this->tweet = $tweet;
+        }
+        return $this->addLinksToCashtags();
+    }
+
+    /**
+     * Adds links to all elements in the tweet.
+     *
+     * @return  string  The modified tweet.
+     * @deprecated since version 1.9.0
+     */
+    public function addLinks()
+    {
+        $original = $this->tweet;
+        $this->tweet = $this->addLinksToURLs();
+        $this->tweet = $this->addLinksToHashtags();
+        $this->tweet = $this->addLinksToCashtags();
+        $this->tweet = $this->addLinksToUsernamesAndLists();
+        $modified = $this->tweet;
+        $this->tweet = $original;
+        return $modified;
+    }
+
+    /**
+     * Adds links to hashtag elements in the tweet.
+     *
+     * @return  string  The modified tweet.
+     */
+    public function addLinksToHashtags()
+    {
+        return preg_replace_callback(
+            self::$patterns['valid_hashtag'],
+            array($this, '_addLinksToHashtags'),
+            $this->tweet
+        );
+    }
+
+    /**
+     * Adds links to cashtag elements in the tweet.
+     *
+     * @return  string  The modified tweet.
+     */
+    public function addLinksToCashtags()
+    {
+        return preg_replace_callback(
+            self::$patterns['valid_cashtag'],
+            array($this, '_addLinksToCashtags'),
+            $this->tweet
+        );
+    }
+
+    /**
+     * Adds links to URL elements in the tweet.
+     *
+     * @return  string  The modified tweet
+     */
+    public function addLinksToURLs()
+    {
+        return preg_replace_callback(self::$patterns['valid_url'], array($this, '_addLinksToURLs'), $this->tweet);
+    }
+
+    /**
+     * Adds links to username/list elements in the tweet.
+     *
+     * @return  string  The modified tweet.
+     */
+    public function addLinksToUsernamesAndLists()
+    {
+        return preg_replace_callback(
+            self::$patterns['valid_mentions_or_lists'],
+            array($this, '_addLinksToUsernamesAndLists'),
+            $this->tweet
+        );
+    }
+
+    /**
+     * Wraps a tweet element in an HTML anchor tag using the provided URL.
+     *
+     * This is a helper function to perform the generation of the link.
+     *
+     * @param  string  $url      The URL to use as the href.
+     * @param  string  $class    The CSS class(es) to apply (space separated).
+     * @param  string  $element  The tweet element to wrap.
+     *
+     * @return  string  The tweet element with a link applied.
+     * @deprecated since version 1.1.0
+     */
+    protected function wrap($url, $class, $element)
+    {
+        $link = '<a';
+        if ($class) {
+            $link .= ' class="' . $class . '"';
+        }
+        $link .= ' href="' . $url . '"';
+        $rel = array();
+        if ($this->external) {
+            $rel[] = 'external';
+        }
+        if ($this->nofollow) {
+            $rel[] = 'nofollow';
+        }
+        if (!empty($rel)) {
+            $link .= ' rel="' . implode(' ', $rel) . '"';
+        }
+        if ($this->target) {
+            $link .= ' target="' . $this->target . '"';
+        }
+        $link .= '>' . $element . '</a>';
+        return $link;
+    }
+
+    /**
+     * Wraps a tweet element in an HTML anchor tag using the provided URL.
+     *
+     * This is a helper function to perform the generation of the hashtag link.
+     *
+     * @param  string  $url      The URL to use as the href.
+     * @param  string  $class    The CSS class(es) to apply (space separated).
+     * @param  string  $element  The tweet element to wrap.
+     *
+     * @return  string  The tweet element with a link applied.
+     */
+    protected function wrapHash($url, $class, $element)
+    {
+        $title = preg_replace('/#/u', '#', $element);
+        $link = '<a';
+        $link .= ' href="' . $url . '"';
+        $link .= ' title="' . $title . '"';
+        if ($class) {
+            $link .= ' class="' . $class . '"';
+        }
+        $rel = array();
+        if ($this->external) {
+            $rel[] = 'external';
+        }
+        if ($this->nofollow) {
+            $rel[] = 'nofollow';
+        }
+        if (!empty($rel)) {
+            $link .= ' rel="' . implode(' ', $rel) . '"';
+        }
+        if ($this->target) {
+            $link .= ' target="' . $this->target . '"';
+        }
+        $link .= '>' . $element . '</a>';
+        return $link;
+    }
+
+    /**
+     * Callback used by the method that adds links to hashtags.
+     *
+     * @see  addLinksToHashtags()
+     * @param  array  $matches  The regular expression matches.
+     * @return  string  The link-wrapped hashtag.
+     */
+    protected function _addLinksToHashtags($matches)
+    {
+        list($all, $before, $hash, $tag, $after) = array_pad($matches, 5, '');
+        if (preg_match(self::$patterns['end_hashtag_match'], $after)
+            || (!preg_match('!\A["\']!', $before) && preg_match('!\A["\']!', $after)) || preg_match('!\A</!', $after)) {
+            return $all;
+        }
+        $replacement = $before;
+        $element = $hash . $tag;
+        $url = $this->url_base_hash . $tag;
+        $class_hash = $this->class_hash;
+        if (preg_match(self::$patterns['rtl_chars'], $element)) {
+            $class_hash .= ' rtl';
+        }
+        $replacement .= $this->wrapHash($url, $class_hash, $element);
+        return $replacement;
+    }
+
+    /**
+     * Callback used by the method that adds links to cashtags.
+     *
+     * @see  addLinksToCashtags()
+     * @param  array  $matches  The regular expression matches.
+     * @return  string  The link-wrapped cashtag.
+     */
+    protected function _addLinksToCashtags($matches)
+    {
+        list($all, $before, $cash, $tag, $after) = array_pad($matches, 5, '');
+        if (preg_match(self::$patterns['end_cashtag_match'], $after)
+            || (!preg_match('!\A["\']!', $before) && preg_match('!\A["\']!', $after)) || preg_match('!\A</!', $after)) {
+            return $all;
+        }
+        $replacement = $before;
+        $element = $cash . $tag;
+        $url = $this->url_base_cash . $tag;
+        $replacement .= $this->wrapHash($url, $this->class_cash, $element);
+        return $replacement;
+    }
+
+    /**
+     * Callback used by the method that adds links to URLs.
+     *
+     * @see  addLinksToURLs()
+     * @param  array  $matches  The regular expression matches.
+     * @return  string  The link-wrapped URL.
+     */
+    protected function _addLinksToURLs($matches)
+    {
+        list($all, $before, $url, $protocol, $domain, $path, $query) = array_pad($matches, 7, '');
+        $url = htmlspecialchars($url, ENT_QUOTES, 'UTF-8', false);
+        if (!$protocol) {
+            return $all;
+        }
+        return $before . $this->wrap($url, $this->class_url, $url);
+    }
+
+    /**
+     * Callback used by the method that adds links to username/list pairs.
+     *
+     * @see  addLinksToUsernamesAndLists()
+     * @param  array  $matches  The regular expression matches.
+     * @return  string  The link-wrapped username/list pair.
+     */
+    protected function _addLinksToUsernamesAndLists($matches)
+    {
+        list($all, $before, $at, $username, $slash_listname, $after) = array_pad($matches, 6, '');
+        # If $after is not empty, there is an invalid character.
+        if (!empty($slash_listname)) {
+            # Replace the list and username
+            $element = $username . $slash_listname;
+            $class = $this->class_list;
+            $url = $this->url_base_list . $element;
+        } else {
+            if (preg_match(self::$patterns['end_mention_match'], $after)) {
+                return $all;
+            }
+            # Replace the username
+            $element = $username;
+            $class = $this->class_user;
+            $url = $this->url_base_user . $element;
+        }
+        # XXX: Due to use of preg_replace_callback() for multiple replacements in a
+        #      single tweet and also as only the match is replaced and we have to
+        #      use a look-ahead for $after because there is no equivalent for the
+        #      $' (dollar apostrophe) global from Ruby, we MUST NOT append $after.
+        return $before . $at . $this->wrap($url, $class, $element);
+    }
+}

Filskillnaden har hållts tillbaka eftersom den är för stor
+ 179 - 0
app/Util/Lexer/Regex.php


+ 104 - 0
app/Util/Lexer/StringUtils.php

@@ -0,0 +1,104 @@
+<?php
+
+/**
+ * @author     Takashi Nojima
+ * @copyright  Copyright 2014, Takashi Nojima
+ * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
+ * @package    Twitter.Text
+ */
+
+namespace App\Util\Lexer;
+
+/**
+ * String utility
+ *
+ * @author     Takashi Nojima
+ * @copyright  Copyright 2014, Takashi Nojima
+ * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
+ * @package    Twitter
+ */
+class StringUtils
+{
+
+    /**
+     * alias of mb_substr
+     *
+     * @param string $str
+     * @param integer $start
+     * @param integer $length
+     * @param string $encoding
+     * @return string
+     */
+    public static function substr($str, $start, $length = null, $encoding = 'UTF-8')
+    {
+        if (is_null($length)) {
+            // for PHP <= 5.4.7
+            $length = mb_strlen($str, $encoding);
+        }
+        return mb_substr($str, $start, $length, $encoding);
+    }
+
+    /**
+     * alias of mb_strlen
+     *
+     * @param string $str
+     * @param string $encoding
+     * @return integer
+     */
+    public static function strlen($str, $encoding = 'UTF-8')
+    {
+        return mb_strlen($str, $encoding);
+    }
+
+    /**
+     * alias of mb_strpos
+     *
+     * @param string $haystack
+     * @param string $needle
+     * @param integer $offset
+     * @param string $encoding
+     * @return integer
+     */
+    public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8')
+    {
+        return mb_strpos($haystack, $needle, $offset, $encoding);
+    }
+
+    /**
+     * A multibyte-aware substring replacement function.
+     *
+     * @param  string  $string       The string to modify.
+     * @param  string  $replacement  The replacement string.
+     * @param  int     $start        The start of the replacement.
+     * @param  int     $length       The number of characters to replace.
+     * @param  string  $encoding     The encoding of the string.
+     *
+     * @return  string  The modified string.
+     *
+     * @see http://www.php.net/manual/en/function.substr-replace.php#90146
+     */
+    public static function substrReplace($string, $replacement, $start, $length = null, $encoding = 'UTF-8')
+    {
+        if (extension_loaded('mbstring') === true) {
+            $string_length = static::strlen($string, $encoding);
+            if ($start < 0) {
+                $start = max(0, $string_length + $start);
+            } elseif ($start > $string_length) {
+                $start = $string_length;
+            }
+            if ($length < 0) {
+                $length = max(0, $string_length - $start + $length);
+            } elseif ((is_null($length) === true) || ($length > $string_length)) {
+                $length = $string_length;
+            }
+            if (($start + $length) > $string_length) {
+                $length = $string_length - $start;
+            }
+
+            $suffixOffset = $start + $length;
+            $suffixLength = $string_length - $start - $length;
+            return static::substr($string, 0, $start, $encoding) . $replacement . static::substr($string, $suffixOffset, $suffixLength, $encoding);
+        }
+        return (is_null($length) === true) ? substr_replace($string, $replacement, $start) : substr_replace($string, $replacement, $start, $length);
+    }
+}

+ 388 - 0
app/Util/Lexer/Validator.php

@@ -0,0 +1,388 @@
+<?php
+
+/**
+ * @author     Nick Pope <nick@nickpope.me.uk>
+ * @copyright  Copyright © 2010, Nick Pope
+ * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
+ * @package    Twitter.Text
+ */
+
+namespace App\Util\Lexer;
+
+use App\Util\Lexer\Regex;
+use App\Util\Lexer\Extractor;
+use App\Util\Lexer\StringUtils;
+
+/**
+ * Twitter Validator Class
+ *
+ * Performs "validation" on tweets.
+ *
+ * Originally written by {@link http://github.com/mikenz Mike Cochrane}, this
+ * is based on code by {@link http://github.com/mzsanford Matt Sanford} and
+ * heavily modified by {@link http://github.com/ngnpope Nick Pope}.
+ *
+ * @author     Nick Pope <nick@nickpope.me.uk>
+ * @copyright  Copyright © 2010, Nick Pope
+ * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
+ * @package    Twitter.Text
+ */
+class Validator extends Regex
+{
+
+    /**
+     * The maximum length of a tweet.
+     *
+     * @var  int
+     */
+    const MAX_LENGTH = 140;
+
+    /**
+     * The length of a short URL beginning with http:
+     *
+     * @var  int
+     */
+    protected $short_url_length = 23;
+
+    /**
+     * The length of a short URL beginning with http:
+     *
+     * @var  int
+     */
+    protected $short_url_length_https = 23;
+
+    /**
+     *
+     * @var Extractor
+     */
+    protected $extractor = null;
+
+    /**
+     * Provides fluent method chaining.
+     *
+     * @param  string  $tweet  The tweet to be validated.
+     * @param  mixed   $config Setup short URL length from Twitter API /help/configuration response.
+     *
+     * @see  __construct()
+     *
+     * @return  Validator
+     */
+    public static function create($tweet = null, $config = null)
+    {
+        return new self($tweet, $config);
+    }
+
+    /**
+     * Reads in a tweet to be parsed and validates it.
+     *
+     * @param  string  $tweet  The tweet to validate.
+     */
+    public function __construct($tweet = null, $config = null)
+    {
+        parent::__construct($tweet);
+        if (!empty($config)) {
+            $this->setConfiguration($config);
+        }
+        $this->extractor = Extractor::create();
+    }
+
+    /**
+     * Setup short URL length from Twitter API /help/configuration response
+     *
+     * @param mixed $config
+     * @return Validator
+     * @link https://dev.twitter.com/docs/api/1/get/help/configuration
+     */
+    public function setConfiguration($config)
+    {
+        if (is_array($config)) {
+            // setup from array
+            if (isset($config['short_url_length'])) {
+                $this->setShortUrlLength($config['short_url_length']);
+            }
+            if (isset($config['short_url_length_https'])) {
+                $this->setShortUrlLengthHttps($config['short_url_length_https']);
+            }
+        } elseif (is_object($config)) {
+            // setup from object
+            if (isset($config->short_url_length)) {
+                $this->setShortUrlLength($config->short_url_length);
+            }
+            if (isset($config->short_url_length_https)) {
+                $this->setShortUrlLengthHttps($config->short_url_length_https);
+            }
+        }
+
+        return $this;
+    }
+
+    /**
+     * Set the length of a short URL beginning with http:
+     *
+     * @param mixed $length
+     * @return Validator
+     */
+    public function setShortUrlLength($length)
+    {
+        $this->short_url_length = intval($length);
+        return $this;
+    }
+
+    /**
+     * Get the length of a short URL beginning with http:
+     *
+     * @return int
+     */
+    public function getShortUrlLength()
+    {
+        return $this->short_url_length;
+    }
+
+    /**
+     * Set the length of a short URL beginning with https:
+     *
+     * @param mixed $length
+     * @return Validator
+     */
+    public function setShortUrlLengthHttps($length)
+    {
+        $this->short_url_length_https = intval($length);
+        return $this;
+    }
+
+    /**
+     * Get the length of a short URL beginning with https:
+     *
+     * @return int
+     */
+    public function getShortUrlLengthHttps()
+    {
+        return $this->short_url_length_https;
+    }
+
+    /**
+     * Check whether a tweet is valid.
+     *
+     * @param string $tweet The tweet to validate.
+     * @return  boolean  Whether the tweet is valid.
+     */
+    public function isValidTweetText($tweet = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+        $length = $this->getTweetLength($tweet);
+        if (!$tweet || !$length) {
+            return false;
+        }
+        if ($length > self::MAX_LENGTH) {
+            return false;
+        }
+        if (preg_match(self::$patterns['invalid_characters'], $tweet)) {
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * Check whether a tweet is valid.
+     *
+     * @return  boolean  Whether the tweet is valid.
+     * @deprecated since version 1.1.0
+     */
+    public function validateTweet()
+    {
+        return $this->isValidTweetText();
+    }
+
+    /**
+     * Check whether a username is valid.
+     *
+     * @param string $username The username to validate.
+     * @return  boolean  Whether the username is valid.
+     */
+    public function isValidUsername($username = null)
+    {
+        if (is_null($username)) {
+            $username = $this->tweet;
+        }
+        $length = StringUtils::strlen($username);
+        if (empty($username) || !$length) {
+            return false;
+        }
+        $extracted = $this->extractor->extractMentionedScreennames($username);
+        return count($extracted) === 1 && $extracted[0] === substr($username, 1);
+    }
+
+    /**
+     * Check whether a username is valid.
+     *
+     * @return  boolean  Whether the username is valid.
+     * @deprecated since version 1.1.0
+     */
+    public function validateUsername()
+    {
+        return $this->isValidUsername();
+    }
+
+    /**
+     * Check whether a list is valid.
+     *
+     * @param string $list The list name to validate.
+     * @return  boolean  Whether the list is valid.
+     */
+    public function isValidList($list = null)
+    {
+        if (is_null($list)) {
+            $list = $this->tweet;
+        }
+        $length = StringUtils::strlen($list);
+        if (empty($list) || !$length) {
+            return false;
+        }
+        preg_match(self::$patterns['valid_mentions_or_lists'], $list, $matches);
+        $matches = array_pad($matches, 5, '');
+        return isset($matches) && $matches[1] === '' && $matches[4] && !empty($matches[4]) && $matches[5] === '';
+    }
+
+    /**
+     * Check whether a list is valid.
+     *
+     * @return  boolean  Whether the list is valid.
+     * @deprecated since version 1.1.0
+     */
+    public function validateList()
+    {
+        return $this->isValidList();
+    }
+
+    /**
+     * Check whether a hashtag is valid.
+     *
+     * @param string $hashtag The hashtag to validate.
+     * @return  boolean  Whether the hashtag is valid.
+     */
+    public function isValidHashtag($hashtag = null)
+    {
+        if (is_null($hashtag)) {
+            $hashtag = $this->tweet;
+        }
+        $length = StringUtils::strlen($hashtag);
+        if (empty($hashtag) || !$length) {
+            return false;
+        }
+        $extracted = $this->extractor->extractHashtags($hashtag);
+        return count($extracted) === 1 && $extracted[0] === substr($hashtag, 1);
+    }
+
+    /**
+     * Check whether a hashtag is valid.
+     *
+     * @return  boolean  Whether the hashtag is valid.
+     * @deprecated since version 1.1.0
+     */
+    public function validateHashtag()
+    {
+        return $this->isValidHashtag();
+    }
+
+    /**
+     * Check whether a URL is valid.
+     *
+     * @param  string   $url               The url to validate.
+     * @param  boolean  $unicode_domains   Consider the domain to be unicode.
+     * @param  boolean  $require_protocol  Require a protocol for valid domain?
+     *
+     * @return  boolean  Whether the URL is valid.
+     */
+    public function isValidURL($url = null, $unicode_domains = true, $require_protocol = true)
+    {
+        if (is_null($url)) {
+            $url = $this->tweet;
+        }
+        $length = StringUtils::strlen($url);
+        if (empty($url) || !$length) {
+            return false;
+        }
+        preg_match(self::$patterns['validate_url_unencoded'], $url, $matches);
+        $match = array_shift($matches);
+        if (!$matches || $match !== $url) {
+            return false;
+        }
+        list($scheme, $authority, $path, $query, $fragment) = array_pad($matches, 5, '');
+        # Check scheme, path, query, fragment:
+        if (($require_protocol && !(
+            self::isValidMatch($scheme, self::$patterns['validate_url_scheme']) && preg_match('/^https?$/i', $scheme))
+            ) || !self::isValidMatch($path, self::$patterns['validate_url_path']) || !self::isValidMatch($query, self::$patterns['validate_url_query'], true)
+            || !self::isValidMatch($fragment, self::$patterns['validate_url_fragment'], true)) {
+            return false;
+        }
+        # Check authority:
+        $authority_pattern = $unicode_domains ? 'validate_url_unicode_authority' : 'validate_url_authority';
+        return self::isValidMatch($authority, self::$patterns[$authority_pattern]);
+    }
+
+    /**
+     * Check whether a URL is valid.
+     *
+     * @param  boolean  $unicode_domains   Consider the domain to be unicode.
+     * @param  boolean  $require_protocol  Require a protocol for valid domain?
+     *
+     * @return  boolean  Whether the URL is valid.
+     * @deprecated since version 1.1.0
+     */
+    public function validateURL($unicode_domains = true, $require_protocol = true)
+    {
+        return $this->isValidURL(null, $unicode_domains, $require_protocol);
+    }
+
+    /**
+     * Determines the length of a tweet.  Takes shortening of URLs into account.
+     *
+     * @param string $tweet The tweet to validate.
+     * @return  int  the length of a tweet.
+     */
+    public function getTweetLength($tweet = null)
+    {
+        if (is_null($tweet)) {
+            $tweet = $this->tweet;
+        }
+        $length = StringUtils::strlen($tweet);
+        $urls_with_indices = $this->extractor->extractURLsWithIndices($tweet);
+        foreach ($urls_with_indices as $x) {
+            $length += $x['indices'][0] - $x['indices'][1];
+            $length += stripos($x['url'], 'https://') === 0 ? $this->short_url_length_https : $this->short_url_length;
+        }
+        return $length;
+    }
+
+    /**
+     * Determines the length of a tweet.  Takes shortening of URLs into account.
+     *
+     * @return  int  the length of a tweet.
+     * @deprecated since version 1.1.0
+     */
+    public function getLength()
+    {
+        return $this->getTweetLength();
+    }
+
+    /**
+     * A helper function to check for a valid match.  Used in URL validation.
+     *
+     * @param  string   $string    The subject string to test.
+     * @param  string   $pattern   The pattern to match against.
+     * @param  boolean  $optional  Whether a match is compulsory or not.
+     *
+     * @return  boolean  Whether an exact match was found.
+     */
+    protected static function isValidMatch($string, $pattern, $optional = false)
+    {
+        $found = preg_match($pattern, $string, $matches);
+        if (!$optional) {
+            return (($string || $string === '') && $found && $matches[0] === $string);
+        } else {
+            return !(($string || $string === '') && (!$found || $matches[0] !== $string));
+        }
+    }
+}

Vissa filer visades inte eftersom för många filer har ändrats