HitHighlighter.php 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. <?php
  2. /**
  3. * @author Nick Pope <nick@nickpope.me.uk>
  4. * @copyright Copyright © 2010, Nick Pope
  5. * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
  6. */
  7. namespace App\Util\Lexer;
  8. /**
  9. * Twitter HitHighlighter Class.
  10. *
  11. * Performs "hit highlighting" on tweets that have been auto-linked already.
  12. * Useful with the results returned from the search API.
  13. *
  14. * Originally written by {@link http://github.com/mikenz Mike Cochrane}, this
  15. * is based on code by {@link http://github.com/mzsanford Matt Sanford} and
  16. * heavily modified by {@link http://github.com/ngnpope Nick Pope}.
  17. *
  18. * @author Nick Pope <nick@nickpope.me.uk>
  19. * @copyright Copyright © 2010, Nick Pope
  20. * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
  21. */
  22. class HitHighlighter extends Regex
  23. {
  24. /**
  25. * The tag to surround hits with.
  26. *
  27. * @var string
  28. */
  29. protected $tag = 'em';
  30. /**
  31. * Provides fluent method chaining.
  32. *
  33. * @param string $tweet The tweet to be hit highlighted.
  34. * @param bool $full_encode Whether to encode all special characters.
  35. *
  36. * @see __construct()
  37. *
  38. * @return HitHighlighter
  39. */
  40. public static function create($tweet = null, $full_encode = false)
  41. {
  42. return new self($tweet, $full_encode);
  43. }
  44. /**
  45. * Reads in a tweet to be parsed and hit highlighted.
  46. *
  47. * We take this opportunity to ensure that we escape user input.
  48. *
  49. * @see htmlspecialchars()
  50. *
  51. * @param string $tweet The tweet to be hit highlighted.
  52. * @param bool $escape Whether to escape the tweet (default: true).
  53. * @param bool $full_encode Whether to encode all special characters.
  54. */
  55. public function __construct($tweet = null, $escape = true, $full_encode = false)
  56. {
  57. if (!empty($tweet) && $escape) {
  58. if ($full_encode) {
  59. parent::__construct(htmlentities($tweet, ENT_QUOTES, 'UTF-8', false));
  60. } else {
  61. parent::__construct(htmlspecialchars($tweet, ENT_QUOTES, 'UTF-8', false));
  62. }
  63. } else {
  64. parent::__construct($tweet);
  65. }
  66. }
  67. /**
  68. * Set the highlighting tag to surround hits with. The default tag is 'em'.
  69. *
  70. * @return string The tag name.
  71. */
  72. public function getTag()
  73. {
  74. return $this->tag;
  75. }
  76. /**
  77. * Set the highlighting tag to surround hits with. The default tag is 'em'.
  78. *
  79. * @param string $v The tag name.
  80. *
  81. * @return HitHighlighter Fluid method chaining.
  82. */
  83. public function setTag($v)
  84. {
  85. $this->tag = $v;
  86. return $this;
  87. }
  88. /**
  89. * Hit highlights the tweet.
  90. *
  91. * @param string $tweet The tweet to be hit highlighted.
  92. * @param array $hits An array containing the start and end index pairs
  93. * for the highlighting.
  94. * @param bool $escape Whether to escape the tweet (default: true).
  95. * @param bool $full_encode Whether to encode all special characters.
  96. *
  97. * @return string The hit highlighted tweet.
  98. */
  99. public function highlight($tweet = null, array $hits = null)
  100. {
  101. if (is_null($tweet)) {
  102. $tweet = $this->tweet;
  103. }
  104. if (empty($hits)) {
  105. return $tweet;
  106. }
  107. $highlightTweet = '';
  108. $tags = ['<'.$this->tag.'>', '</'.$this->tag.'>'];
  109. // Check whether we can simply replace or whether we need to chunk...
  110. if (strpos($tweet, '<') === false) {
  111. $ti = 0; // tag increment (for added tags)
  112. $highlightTweet = $tweet;
  113. foreach ($hits as $hit) {
  114. $highlightTweet = StringUtils::substrReplace($highlightTweet, $tags[0], $hit[0] + $ti, 0);
  115. $ti += StringUtils::strlen($tags[0]);
  116. $highlightTweet = StringUtils::substrReplace($highlightTweet, $tags[1], $hit[1] + $ti, 0);
  117. $ti += StringUtils::strlen($tags[1]);
  118. }
  119. } else {
  120. $chunks = preg_split('/[<>]/iu', $tweet);
  121. $chunk = $chunks[0];
  122. $chunk_index = 0;
  123. $chunk_cursor = 0;
  124. $offset = 0;
  125. $start_in_chunk = false;
  126. // Flatten the multidimensional hits array:
  127. $hits_flat = [];
  128. foreach ($hits as $hit) {
  129. $hits_flat = array_merge($hits_flat, $hit);
  130. }
  131. // Loop over the hit indices:
  132. for ($index = 0; $index < count($hits_flat); $index++) {
  133. $hit = $hits_flat[$index];
  134. $tag = $tags[$index % 2];
  135. $placed = false;
  136. while ($chunk !== null && $hit >= ($i = $offset + StringUtils::strlen($chunk))) {
  137. $highlightTweet .= StringUtils::substr($chunk, $chunk_cursor);
  138. if ($start_in_chunk && $hit === $i) {
  139. $highlightTweet .= $tag;
  140. $placed = true;
  141. }
  142. if (isset($chunks[$chunk_index + 1])) {
  143. $highlightTweet .= '<'.$chunks[$chunk_index + 1].'>';
  144. }
  145. $offset += StringUtils::strlen($chunk);
  146. $chunk_cursor = 0;
  147. $chunk_index += 2;
  148. $chunk = (isset($chunks[$chunk_index]) ? $chunks[$chunk_index] : null);
  149. $start_in_chunk = false;
  150. }
  151. if (!$placed && $chunk !== null) {
  152. $hit_spot = $hit - $offset;
  153. $highlightTweet .= StringUtils::substr($chunk, $chunk_cursor, $hit_spot - $chunk_cursor).$tag;
  154. $chunk_cursor = $hit_spot;
  155. $start_in_chunk = ($index % 2 === 0);
  156. $placed = true;
  157. }
  158. // Ultimate fallback - hits that run off the end get a closing tag:
  159. if (!$placed) {
  160. $highlightTweet .= $tag;
  161. }
  162. }
  163. if ($chunk !== null) {
  164. if ($chunk_cursor < StringUtils::strlen($chunk)) {
  165. $highlightTweet .= StringUtils::substr($chunk, $chunk_cursor);
  166. }
  167. for ($index = $chunk_index + 1; $index < count($chunks); $index++) {
  168. $highlightTweet .= ($index % 2 === 0 ? $chunks[$index] : '<'.$chunks[$index].'>');
  169. }
  170. }
  171. }
  172. return $highlightTweet;
  173. }
  174. /**
  175. * Hit highlights the tweet.
  176. *
  177. * @param array $hits An array containing the start and end index pairs
  178. * for the highlighting.
  179. *
  180. * @return string The hit highlighted tweet.
  181. *
  182. * @deprecated since version 1.1.0
  183. */
  184. public function addHitHighlighting(array $hits)
  185. {
  186. return $this->highlight($this->tweet, $hits);
  187. }
  188. }