From 4972afb7569ccf53dc23cb256bc8f5acc9417b0a Mon Sep 17 00:00:00 2001 From: kokopi Date: Wed, 11 Mar 2026 02:20:24 +0900 Subject: [PATCH] update:filters --- backend/src/middleware/contentFilter.ts | 35 +++++++++++++++++++++---- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/backend/src/middleware/contentFilter.ts b/backend/src/middleware/contentFilter.ts index 8c4072d..46296c8 100644 --- a/backend/src/middleware/contentFilter.ts +++ b/backend/src/middleware/contentFilter.ts @@ -32,6 +32,34 @@ const PII_PATTERNS: Array<{ pattern: RegExp; label: string }> = [ label: "[REDACTED_SSN]", pattern: /\b\d{3}[ -.]\d{2}[ -.]\d{4}\b/g, }, + { + // Explicit URLs: http://, https://, ftp://, // protocol-relative + // Captures the full URL including path, query string, and fragment. + label: "[REDACTED_URL]", + pattern: /(?:https?:\/\/|ftp:\/\/|\/\/)[\w\-._~:/?#[\]@!$&'()*+,;=%]+/gi, + }, + { + // Domains — both literal and separator-obfuscated variants. + // + // The separator group (SEP) matches any of: + // - a literal dot: \. + // - a comma (with optional spaces): \s*,\s* + // - the word "dot" in plain/bracket/paren form: \s*(?:dot|\[dot\]|\(dot\))\s* + // - Unicode dot substitutes: · (U+00B7) 。(U+3002) 。 (U+FF61) + // + // This covers: example.com example,com example dot com + // example (dot) com example·com example。com + // + // A recognised TLD must follow the final separator. The TLD must then be + // at a word boundary or followed by / ? whitespace or end-of-string so + // that file extensions like .json / .ts and version strings like 1.2.3 + // are not caught. + // + // The negative lookbehind on [REDACTED_ prevents double-processing tokens + // that were already replaced by an earlier pattern. + label: "[REDACTED_URL]", + pattern: /(?)\],]|$)/gi, + }, ]; /** @@ -80,10 +108,7 @@ export function filterBody(body: string): ContentFilterOutcome { * Runs subject and description through the content filter. * * - Profanity in either field → rejected, ticket is not saved - * - PII in description → silently redacted before saving - * - * Subject is not PII-redacted because it is short, user-facing in table views, - * and unlikely to contain structured PII like card numbers or SSNs. + * - PII / URLs in either field → silently redacted before saving */ export function filterContent( subject: string, @@ -109,7 +134,7 @@ export function filterContent( return { ok: true, - subject, + subject: redactPII(subject), description: description ? redactPII(description) : description, }; }