Validator.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. <?php
  2. /**
  3. * @author Nick Pope <nick@nickpope.me.uk>
  4. * @copyright Copyright © 2010, Nick Pope
  5. * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
  6. */
  7. namespace App\Util\Lexer;
  8. /**
  9. * Twitter Validator Class.
  10. *
  11. * Performs "validation" on tweets.
  12. *
  13. * Originally written by {@link http://github.com/mikenz Mike Cochrane}, this
  14. * is based on code by {@link http://github.com/mzsanford Matt Sanford} and
  15. * heavily modified by {@link http://github.com/ngnpope Nick Pope}.
  16. *
  17. * @author Nick Pope <nick@nickpope.me.uk>
  18. * @copyright Copyright © 2010, Nick Pope
  19. * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
  20. */
  21. class Validator extends Regex
  22. {
  23. /**
  24. * The maximum length of a tweet.
  25. *
  26. * @var int
  27. */
  28. const MAX_LENGTH = 140;
  29. /**
  30. * The length of a short URL beginning with http:.
  31. *
  32. * @var int
  33. */
  34. protected $short_url_length = 23;
  35. /**
  36. * The length of a short URL beginning with http:.
  37. *
  38. * @var int
  39. */
  40. protected $short_url_length_https = 23;
  41. /**
  42. * @var Extractor
  43. */
  44. protected $extractor = null;
  45. /**
  46. * Provides fluent method chaining.
  47. *
  48. * @param string $tweet The tweet to be validated.
  49. * @param mixed $config Setup short URL length from Twitter API /help/configuration response.
  50. *
  51. * @see __construct()
  52. *
  53. * @return Validator
  54. */
  55. public static function create($tweet = null, $config = null)
  56. {
  57. return new self($tweet, $config);
  58. }
  59. /**
  60. * Reads in a tweet to be parsed and validates it.
  61. *
  62. * @param string $tweet The tweet to validate.
  63. */
  64. public function __construct($tweet = null, $config = null)
  65. {
  66. parent::__construct($tweet);
  67. if (!empty($config)) {
  68. $this->setConfiguration($config);
  69. }
  70. $this->extractor = Extractor::create();
  71. }
  72. /**
  73. * Setup short URL length from Twitter API /help/configuration response.
  74. *
  75. * @param mixed $config
  76. *
  77. * @return Validator
  78. *
  79. * @link https://dev.twitter.com/docs/api/1/get/help/configuration
  80. */
  81. public function setConfiguration($config)
  82. {
  83. if (is_array($config)) {
  84. // setup from array
  85. if (isset($config['short_url_length'])) {
  86. $this->setShortUrlLength($config['short_url_length']);
  87. }
  88. if (isset($config['short_url_length_https'])) {
  89. $this->setShortUrlLengthHttps($config['short_url_length_https']);
  90. }
  91. } elseif (is_object($config)) {
  92. // setup from object
  93. if (isset($config->short_url_length)) {
  94. $this->setShortUrlLength($config->short_url_length);
  95. }
  96. if (isset($config->short_url_length_https)) {
  97. $this->setShortUrlLengthHttps($config->short_url_length_https);
  98. }
  99. }
  100. return $this;
  101. }
  102. /**
  103. * Set the length of a short URL beginning with http:.
  104. *
  105. * @param mixed $length
  106. *
  107. * @return Validator
  108. */
  109. public function setShortUrlLength($length)
  110. {
  111. $this->short_url_length = intval($length);
  112. return $this;
  113. }
  114. /**
  115. * Get the length of a short URL beginning with http:.
  116. *
  117. * @return int
  118. */
  119. public function getShortUrlLength()
  120. {
  121. return $this->short_url_length;
  122. }
  123. /**
  124. * Set the length of a short URL beginning with https:.
  125. *
  126. * @param mixed $length
  127. *
  128. * @return Validator
  129. */
  130. public function setShortUrlLengthHttps($length)
  131. {
  132. $this->short_url_length_https = intval($length);
  133. return $this;
  134. }
  135. /**
  136. * Get the length of a short URL beginning with https:.
  137. *
  138. * @return int
  139. */
  140. public function getShortUrlLengthHttps()
  141. {
  142. return $this->short_url_length_https;
  143. }
  144. /**
  145. * Check whether a tweet is valid.
  146. *
  147. * @param string $tweet The tweet to validate.
  148. *
  149. * @return bool Whether the tweet is valid.
  150. */
  151. public function isValidTweetText($tweet = null)
  152. {
  153. if (is_null($tweet)) {
  154. $tweet = $this->tweet;
  155. }
  156. $length = $this->getTweetLength($tweet);
  157. if (!$tweet || !$length) {
  158. return false;
  159. }
  160. if ($length > self::MAX_LENGTH) {
  161. return false;
  162. }
  163. if (preg_match(self::$patterns['invalid_characters'], $tweet)) {
  164. return false;
  165. }
  166. return true;
  167. }
  168. /**
  169. * Check whether a tweet is valid.
  170. *
  171. * @return bool Whether the tweet is valid.
  172. *
  173. * @deprecated since version 1.1.0
  174. */
  175. public function validateTweet()
  176. {
  177. return $this->isValidTweetText();
  178. }
  179. /**
  180. * Check whether a username is valid.
  181. *
  182. * @param string $username The username to validate.
  183. *
  184. * @return bool Whether the username is valid.
  185. */
  186. public function isValidUsername($username = null)
  187. {
  188. if (is_null($username)) {
  189. $username = $this->tweet;
  190. }
  191. $length = StringUtils::strlen($username);
  192. if (empty($username) || !$length) {
  193. return false;
  194. }
  195. $extracted = $this->extractor->extractMentionedScreennames($username);
  196. return count($extracted) === 1 && $extracted[0] === substr($username, 1);
  197. }
  198. /**
  199. * Check whether a username is valid.
  200. *
  201. * @return bool Whether the username is valid.
  202. *
  203. * @deprecated since version 1.1.0
  204. */
  205. public function validateUsername()
  206. {
  207. return $this->isValidUsername();
  208. }
  209. /**
  210. * Check whether a list is valid.
  211. *
  212. * @param string $list The list name to validate.
  213. *
  214. * @return bool Whether the list is valid.
  215. */
  216. public function isValidList($list = null)
  217. {
  218. if (is_null($list)) {
  219. $list = $this->tweet;
  220. }
  221. $length = StringUtils::strlen($list);
  222. if (empty($list) || !$length) {
  223. return false;
  224. }
  225. preg_match(self::$patterns['valid_mentions_or_lists'], $list, $matches);
  226. $matches = array_pad($matches, 5, '');
  227. return isset($matches) && $matches[1] === '' && $matches[4] && !empty($matches[4]) && $matches[5] === '';
  228. }
  229. /**
  230. * Check whether a list is valid.
  231. *
  232. * @return bool Whether the list is valid.
  233. *
  234. * @deprecated since version 1.1.0
  235. */
  236. public function validateList()
  237. {
  238. return $this->isValidList();
  239. }
  240. /**
  241. * Check whether a hashtag is valid.
  242. *
  243. * @param string $hashtag The hashtag to validate.
  244. *
  245. * @return bool Whether the hashtag is valid.
  246. */
  247. public function isValidHashtag($hashtag = null)
  248. {
  249. if (is_null($hashtag)) {
  250. $hashtag = $this->tweet;
  251. }
  252. $length = StringUtils::strlen($hashtag);
  253. if (empty($hashtag) || !$length) {
  254. return false;
  255. }
  256. $extracted = $this->extractor->extractHashtags($hashtag);
  257. return count($extracted) === 1 && $extracted[0] === substr($hashtag, 1);
  258. }
  259. /**
  260. * Check whether a hashtag is valid.
  261. *
  262. * @return bool Whether the hashtag is valid.
  263. *
  264. * @deprecated since version 1.1.0
  265. */
  266. public function validateHashtag()
  267. {
  268. return $this->isValidHashtag();
  269. }
  270. /**
  271. * Check whether a URL is valid.
  272. *
  273. * @param string $url The url to validate.
  274. * @param bool $unicode_domains Consider the domain to be unicode.
  275. * @param bool $require_protocol Require a protocol for valid domain?
  276. *
  277. * @return bool Whether the URL is valid.
  278. */
  279. public function isValidURL($url = null, $unicode_domains = true, $require_protocol = true)
  280. {
  281. if (is_null($url)) {
  282. $url = $this->tweet;
  283. }
  284. $length = StringUtils::strlen($url);
  285. if (empty($url) || !$length) {
  286. return false;
  287. }
  288. preg_match(self::$patterns['validate_url_unencoded'], $url, $matches);
  289. $match = array_shift($matches);
  290. if (!$matches || $match !== $url) {
  291. return false;
  292. }
  293. list($scheme, $authority, $path, $query, $fragment) = array_pad($matches, 5, '');
  294. // Check scheme, path, query, fragment:
  295. if (($require_protocol && !(
  296. self::isValidMatch($scheme, self::$patterns['validate_url_scheme']) && preg_match('/^https?$/i', $scheme))
  297. ) || !self::isValidMatch($path, self::$patterns['validate_url_path']) || !self::isValidMatch($query, self::$patterns['validate_url_query'], true)
  298. || !self::isValidMatch($fragment, self::$patterns['validate_url_fragment'], true)) {
  299. return false;
  300. }
  301. // Check authority:
  302. $authority_pattern = $unicode_domains ? 'validate_url_unicode_authority' : 'validate_url_authority';
  303. return self::isValidMatch($authority, self::$patterns[$authority_pattern]);
  304. }
  305. /**
  306. * Check whether a URL is valid.
  307. *
  308. * @param bool $unicode_domains Consider the domain to be unicode.
  309. * @param bool $require_protocol Require a protocol for valid domain?
  310. *
  311. * @return bool Whether the URL is valid.
  312. *
  313. * @deprecated since version 1.1.0
  314. */
  315. public function validateURL($unicode_domains = true, $require_protocol = true)
  316. {
  317. return $this->isValidURL(null, $unicode_domains, $require_protocol);
  318. }
  319. /**
  320. * Determines the length of a tweet. Takes shortening of URLs into account.
  321. *
  322. * @param string $tweet The tweet to validate.
  323. *
  324. * @return int the length of a tweet.
  325. */
  326. public function getTweetLength($tweet = null)
  327. {
  328. if (is_null($tweet)) {
  329. $tweet = $this->tweet;
  330. }
  331. $length = StringUtils::strlen($tweet);
  332. $urls_with_indices = $this->extractor->extractURLsWithIndices($tweet);
  333. foreach ($urls_with_indices as $x) {
  334. $length += $x['indices'][0] - $x['indices'][1];
  335. $length += stripos($x['url'], 'https://') === 0 ? $this->short_url_length_https : $this->short_url_length;
  336. }
  337. return $length;
  338. }
  339. /**
  340. * Determines the length of a tweet. Takes shortening of URLs into account.
  341. *
  342. * @return int the length of a tweet.
  343. *
  344. * @deprecated since version 1.1.0
  345. */
  346. public function getLength()
  347. {
  348. return $this->getTweetLength();
  349. }
  350. /**
  351. * A helper function to check for a valid match. Used in URL validation.
  352. *
  353. * @param string $string The subject string to test.
  354. * @param string $pattern The pattern to match against.
  355. * @param bool $optional Whether a match is compulsory or not.
  356. *
  357. * @return bool Whether an exact match was found.
  358. */
  359. protected static function isValidMatch($string, $pattern, $optional = false)
  360. {
  361. $found = preg_match($pattern, $string, $matches);
  362. if (!$optional) {
  363. return ($string || $string === '') && $found && $matches[0] === $string;
  364. } else {
  365. return !(($string || $string === '') && (!$found || $matches[0] !== $string));
  366. }
  367. }
  368. }