Explorar o código

Update lexer/extractor to handle banned hashtags

Daniel Supernault %!s(int64=2) %!d(string=hai) anos
pai
achega
909a8a5a9b

+ 9 - 0
app/Jobs/StatusPipeline/StatusTagsPipeline.php

@@ -15,6 +15,7 @@ use App\Mention;
 use App\Services\AccountService;
 use App\Hashtag;
 use App\StatusHashtag;
+use App\Services\TrendingHashtagService;
 
 class StatusTagsPipeline implements ShouldQueue
 {
@@ -61,6 +62,14 @@ class StatusTagsPipeline implements ShouldQueue
 			$name = substr($tag['name'], 0, 1) == '#' ?
 				substr($tag['name'], 1) : $tag['name'];
 
+			$banned = TrendingHashtagService::getBannedHashtagNames();
+
+			if(count($banned)) {
+                if(in_array(strtolower($name), array_map('strtolower', $banned))) {
+                    continue;
+                }
+            }
+
 			$hashtag = Hashtag::firstOrCreate([
 				'slug' => str_slug($name)
 			], [

+ 9 - 2
app/Services/TrendingHashtagService.php

@@ -16,13 +16,20 @@ class TrendingHashtagService
         return self::CACHE_KEY . $k;
     }
 
-    public static function getBlockedHashtags()
+    public static function getBannedHashtags()
     {
         return Cache::remember(self::key(':is_banned'), 1209600, function() {
             return Hashtag::whereIsBanned(true)->pluck('id')->toArray();
         });
     }
 
+    public static function getBannedHashtagNames()
+    {
+        return Cache::remember(self::key(':is_banned:names'), 1209600, function() {
+            return Hashtag::find(self::getBannedHashtags())->pluck('name')->toArray();
+        });
+    }
+
     public static function getNonTrendingHashtags()
     {
         return Cache::remember(self::key(':can_trend'), 1209600, function() {
@@ -52,7 +59,7 @@ class TrendingHashtagService
     {
         $minId = self::getMinRecentId();
 
-        $skipIds = array_merge(self::getBlockedHashtags(), self::getNonTrendingHashtags(), self::getNsfwHashtags());
+        $skipIds = array_merge(self::getBannedHashtags(), self::getNonTrendingHashtags(), self::getNsfwHashtags());
 
         return Cache::remember(self::CACHE_KEY, config('trending.hashtags.ttl'), function() use($minId, $skipIds) {
             return StatusHashtag::select('hashtag_id', \DB::raw('count(*) as total'))

+ 9 - 1
app/Util/Lexer/Extractor.php

@@ -12,6 +12,7 @@ namespace App\Util\Lexer;
 use Illuminate\Support\Str;
 use App\Status;
 use App\Services\AutolinkService;
+use App\Services\TrendingHashtagService;
 
 /**
  * Twitter Extractor Class.
@@ -267,6 +268,8 @@ class Extractor extends Regex
             return [];
         }
 
+        $bannedTags = TrendingHashtagService::getBannedHashtagNames();
+
         preg_match_all(self::$patterns['valid_hashtag'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
         $tags = [];
 
@@ -278,7 +281,12 @@ class Extractor extends Regex
             if (preg_match(self::$patterns['end_hashtag_match'], $outer[0])) {
                 continue;
             }
-            if(mb_strlen($hashtag[0]) > 124) {
+            if (count($bannedTags)) {
+                if(in_array(strtolower($hashtag[0]), array_map('strtolower', $bannedTags))) {
+                    continue;
+                }
+            }
+            if (mb_strlen($hashtag[0]) > 124) {
                 continue;
             }
             $tags[] = [