Преглед изворни кода

Update CustomFilter model, add custom limits for max_content_scan_limit, max_filters_per_user, max_keywords_per_filter, max_keyword_length, max_pattern_length, max_reported_matches, max_create_per_hour, max_updates_per_hour so admins have more control over this feature

Daniel Supernault пре 2 месеци
родитељ
комит
77044ba193
2 измењених фајлова са 182 додато и 38 уклоњено
  1. 108 38
      app/Models/CustomFilter.php
  2. 74 0
      config/instance.php

+ 108 - 38
app/Models/CustomFilter.php

@@ -31,20 +31,8 @@ class CustomFilter extends Model
         'account',
     ];
 
-    const MAX_LIMIT = 20;
-
-    const MAX_KEYWORDS_PER_FILTER = 10;
-
     const MAX_STATUSES_PER_FILTER = 10;
 
-    const MAX_CONTENT_SCAN_LEN = 1000;
-
-    const MAX_KEYWORD_LEN = 40;
-
-    const MAX_PER_HOUR = 40;
-
-    const MAX_UPDATES_PER_HOUR = 40;
-
     const EXPIRATION_DURATIONS = [
         1800,   // 30 minutes
         3600,   // 1 hour
@@ -60,6 +48,20 @@ class CustomFilter extends Model
 
     const ACTION_BLUR = 2;
 
+    protected static ?int $maxContentScanLimit = null;
+
+    protected static ?int $maxFiltersPerUser = null;
+
+    protected static ?int $maxKeywordsPerFilter = null;
+
+    protected static ?int $maxKeywordsLength = null;
+
+    protected static ?int $maxPatternLength = null;
+
+    protected static ?int $maxCreatePerHour = null;
+
+    protected static ?int $maxUpdatesPerHour = null;
+
     public function account()
     {
         return $this->belongsTo(Profile::class, 'profile_id');
@@ -166,6 +168,11 @@ class CustomFilter extends Model
             $model->shouldInvalidateCache = true;
         });
 
+        static::updating(function ($model) {
+            $model->prepareContextForStorage();
+            $model->shouldInvalidateCache = true;
+        });
+
         static::deleting(function ($model) {
             $model->shouldInvalidateCache = true;
         });
@@ -197,6 +204,69 @@ class CustomFilter extends Model
         Cache::forget("filters:v3:{$this->profile_id}");
     }
 
+    public static function getMaxContentScanLimit(): int
+    {
+        if (self::$maxContentScanLimit === null) {
+            self::$maxContentScanLimit = config('instance.custom_filters.max_content_scan_limit', 2500);
+        }
+
+        return self::$maxContentScanLimit;
+    }
+
+    public static function getMaxFiltersPerUser(): int
+    {
+        if (self::$maxFiltersPerUser === null) {
+            self::$maxFiltersPerUser = config('instance.custom_filters.max_filters_per_user', 20);
+        }
+
+        return self::$maxFiltersPerUser;
+    }
+
+    public static function getMaxKeywordsPerFilter(): int
+    {
+        if (self::$maxKeywordsPerFilter === null) {
+            self::$maxKeywordsPerFilter = config('instance.custom_filters.max_keywords_per_filter', 10);
+        }
+
+        return self::$maxKeywordsPerFilter;
+    }
+
+    public static function getMaxKeywordLength(): int
+    {
+        if (self::$maxKeywordsLength === null) {
+            self::$maxKeywordsLength = config('instance.custom_filters.max_keyword_length', 40);
+        }
+
+        return self::$maxKeywordsLength;
+    }
+
+    public static function getMaxPatternLength(): int
+    {
+        if (self::$maxPatternLength === null) {
+            self::$maxPatternLength = config('instance.custom_filters.max_pattern_length', 10000);
+        }
+
+        return self::$maxPatternLength;
+    }
+
+    public static function getMaxCreatePerHour(): int
+    {
+        if (self::$maxCreatePerHour === null) {
+            self::$maxCreatePerHour = config('instance.custom_filters.max_create_per_hour', 20);
+        }
+
+        return self::$maxCreatePerHour;
+    }
+
+    public static function getMaxUpdatesPerHour(): int
+    {
+        if (self::$maxUpdatesPerHour === null) {
+            self::$maxUpdatesPerHour = config('instance.custom_filters.max_updates_per_hour', 40);
+        }
+
+        return self::$maxUpdatesPerHour;
+    }
+
     /**
      * Get cached filters for an account with simplified, secure approach
      *
@@ -219,7 +289,7 @@ class CustomFilter extends Model
                     return;
                 }
 
-                $maxPatternsPerFilter = self::MAX_KEYWORDS_PER_FILTER;
+                $maxPatternsPerFilter = self::getMaxFiltersPerUser();
                 $keywordsToProcess = $keywords->take($maxPatternsPerFilter);
 
                 $regexPatterns = $keywordsToProcess->map(function ($keyword) {
@@ -237,7 +307,7 @@ class CustomFilter extends Model
                 }
 
                 $combinedPattern = implode('|', $regexPatterns);
-                $maxPatternLength = self::MAX_KEYWORD_LEN;
+                $maxPatternLength = self::getMaxPatternLength();
                 if (strlen($combinedPattern) > $maxPatternLength) {
                     $combinedPattern = substr($combinedPattern, 0, $maxPatternLength);
                 }
@@ -248,24 +318,24 @@ class CustomFilter extends Model
                 ];
             });
 
-            $statusFilters = CustomFilterStatus::with(['customFilter' => function ($query) use ($profileId) {
-                $query->unexpired()->where('profile_id', $profileId);
-            }])->get();
+            // $statusFilters = CustomFilterStatus::with(['customFilter' => function ($query) use ($profileId) {
+            //     $query->unexpired()->where('profile_id', $profileId);
+            // }])->get();
 
-            $statusFilters->groupBy('custom_filter_id')->each(function ($statuses, $filterId) use (&$filtersHash) {
-                $filter = $statuses->first()->customFilter;
+            // $statusFilters->groupBy('custom_filter_id')->each(function ($statuses, $filterId) use (&$filtersHash) {
+            //     $filter = $statuses->first()->customFilter;
 
-                if (! $filter) {
-                    return;
-                }
+            //     if (! $filter) {
+            //         return;
+            //     }
 
-                if (! isset($filtersHash[$filterId])) {
-                    $filtersHash[$filterId] = ['filter' => $filter];
-                }
+            //     if (! isset($filtersHash[$filterId])) {
+            //         $filtersHash[$filterId] = ['filter' => $filter];
+            //     }
 
-                $maxStatusIds = self::MAX_STATUSES_PER_FILTER;
-                $filtersHash[$filterId]['status_ids'] = $statuses->take($maxStatusIds)->pluck('status_id')->toArray();
-            });
+            //     $maxStatusIds = self::MAX_STATUSES_PER_FILTER;
+            //     $filtersHash[$filterId]['status_ids'] = $statuses->take($maxStatusIds)->pluck('status_id')->toArray();
+            // });
 
             return array_map(function ($item) {
                 $filter = $item['filter'];
@@ -300,7 +370,7 @@ class CustomFilter extends Model
             if (isset($rules['keywords'])) {
                 $text = strip_tags($status['content']);
 
-                $maxContentLength = self::MAX_CONTENT_SCAN_LEN;
+                $maxContentLength = self::getMaxContentScanLimit();
                 if (mb_strlen($text) > $maxContentLength) {
                     $text = mb_substr($text, 0, $maxContentLength);
                 }
@@ -308,7 +378,7 @@ class CustomFilter extends Model
                 try {
                     preg_match_all($rules['keywords'], $text, $matches, PREG_PATTERN_ORDER, 0);
                     if (! empty($matches[0])) {
-                        $maxReportedMatches = 10;
+                        $maxReportedMatches = (int) config('instance.custom_filters.max_reported_matches', 10);
                         $keywordMatches = array_slice($matches[0], 0, $maxReportedMatches);
                     }
                 } catch (\Throwable $e) {
@@ -318,15 +388,15 @@ class CustomFilter extends Model
                 }
             }
 
-            if (isset($rules['status_ids'])) {
-                $statusId = $status->id;
-                $reblogId = $status->reblog_of_id ?? null;
+            // if (isset($rules['status_ids'])) {
+            //     $statusId = $status->id;
+            //     $reblogId = $status->reblog_of_id ?? null;
 
-                $matchingIds = array_intersect($rules['status_ids'], array_filter([$statusId, $reblogId]));
-                if (! empty($matchingIds)) {
-                    $statusMatches = $matchingIds;
-                }
-            }
+            //     $matchingIds = array_intersect($rules['status_ids'], array_filter([$statusId, $reblogId]));
+            //     if (! empty($matchingIds)) {
+            //         $statusMatches = $matchingIds;
+            //     }
+            // }
 
             if (! empty($keywordMatches) || ! empty($statusMatches)) {
                 $results[] = [

+ 74 - 0
config/instance.php

@@ -190,4 +190,78 @@ return [
     'allow_new_account_dms' => env('INSTANCE_ALLOW_NEW_DMS', true),
 
     'total_count_estimate' => env('INSTANCE_TOTAL_POSTS_COUNT_ESTIMATE', false),
+
+    'custom_filters' => [
+        /*
+         * The maximum number of characters from a status that will be scanned
+         * for filter matching. Scanning too many characters can hurt performance,
+         * so this limit ensures that only the most relevant portion of a status is processed.
+         *
+         * For remote statuses, you might want to increase this value if you expect
+         * important content to appear later in long posts.
+         */
+        'max_content_scan_limit' => env('PF_CF_CONTENT_SCAN_LIMIT', 2500),
+
+        /*
+         * The maximum number of filters a single user can create.
+         * Limiting the number of filters per user helps prevent abuse and
+         * ensures that the filtering system remains performant.
+         */
+        'max_filters_per_user' => env('PF_CF_MAX_FILTERS_PER_USER', 20),
+
+        /*
+         * The maximum number of keywords that can be associated with a single filter.
+         * This limit helps control the complexity of the generated regular expressions
+         * and protects against potential performance issues during content scanning.
+         */
+        'max_keywords_per_filter' => env('PF_CF_MAX_KEYWORDS_PER_FILTER', 10),
+
+        /*
+         * The maximum length allowed for each keyword in a filter.
+         * Limiting keyword length not only curtails the size of the regex patterns created,
+         * but also guards against potential abuse where excessively long keywords might
+         * negatively impact matching performance or lead to unintended behavior.
+         */
+        'max_keyword_length' => env('PF_CF_MAX_KEYWORD_LENGTH', 40),
+
+        /*
+         * The maximum allowed length for the combined regex pattern.
+         * When constructing a regex that matches multiple filter keywords, each keyword
+         * (after escaping and adding boundaries) contributes to the total pattern length.
+         *
+         * This value is set to 10000 by default. If you increase either the number of keywords
+         * per filter or the maximum length allowed for each keyword, consider increasing this
+         * limit accordingly so that the final regex pattern can accommodate the additional length
+         * without being truncated or causing performance issues.
+         */
+        'max_pattern_length' => env('PF_CF_MAX_PATTERN_LENGTH', 10000),
+
+        /*
+         * The maximum number of keyword matches to report for a given status.
+         * When a filter is applied to a status, the matching process may find multiple occurrences
+         * of a keyword. This value limits the number of matches that are reported back,
+         * which helps manage output volume and processing overhead.
+         *
+         * The default is set to 10, but you can adjust this value through your environment configuration.
+         */
+        'max_reported_matches' => env('PF_CF_MAX_REPORTED_MATCHES', 10),
+
+        /*
+         * The maximum number of filter creation operations allowed per hour for a non-admin user.
+         * This rate limit prevents abuse by restricting how many filters a normal user can create
+         * within one hour. Admin users are exempt from this limit.
+         *
+         * Default is 20 creations per hour.
+         */
+        'max_create_per_hour' => env('PF_CF_MAX_CREATE_PER_HOUR', 20),
+
+        /*
+         * The maximum number of filter update operations allowed per hour for a non-admin user.
+         * This rate limit is designed to prevent abuse by limiting how many times a normal user
+         * can update their filters within one hour. Admin users are not subject to these limits.
+         *
+         * Default is 40 updates per hour.
+         */
+        'max_updates_per_hour' => env('PF_CF_MAX_UPDATES_PER_HOUR', 40),
+    ],
 ];