update:filters

This commit is contained in:
2026-03-11 02:20:24 +09:00
parent 6eb3c7c425
commit 4972afb756

View File

@@ -32,6 +32,34 @@ const PII_PATTERNS: Array<{ pattern: RegExp; label: string }> = [
label: "[REDACTED_SSN]", label: "[REDACTED_SSN]",
pattern: /\b\d{3}[ -.]\d{2}[ -.]\d{4}\b/g, pattern: /\b\d{3}[ -.]\d{2}[ -.]\d{4}\b/g,
}, },
{
// Explicit URLs: http://, https://, ftp://, // protocol-relative
// Captures the full URL including path, query string, and fragment.
label: "[REDACTED_URL]",
pattern: /(?:https?:\/\/|ftp:\/\/|\/\/)[\w\-._~:/?#[\]@!$&'()*+,;=%]+/gi,
},
{
// Domains — both literal and separator-obfuscated variants.
//
// The separator group (SEP) matches any of:
// - a literal dot: \.
// - a comma (with optional spaces): \s*,\s*
// - the word "dot" in plain/bracket/paren form: \s*(?:dot|\[dot\]|\(dot\))\s*
// - Unicode dot substitutes: · (U+00B7) 。(U+3002) 。 (U+FF61)
//
// This covers: example.com example,com example dot com
// example (dot) com example·com example。com
//
// A recognised TLD must follow the final separator. The TLD must then be
// at a word boundary or followed by / ? whitespace or end-of-string so
// that file extensions like .json / .ts and version strings like 1.2.3
// are not caught.
//
// The negative lookbehind on [REDACTED_ prevents double-processing tokens
// that were already replaced by an earlier pattern.
label: "[REDACTED_URL]",
pattern: /(?<!\[REDACTED_)\b(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?(?:\.|,|\s*,\s*|\s*(?:dot|\[dot\]|\(dot\))\s*|[·。。]))+(?:com|net|org|io|dev|app|co|uk|ca|au|de|fr|jp|cn|ru|br|in|gov|edu|mil|int|info|biz|me|tv|fm|gg|ai|xyz|online|site|tech|store|shop|cloud|ly|gl|to|link|click|live|pro|cc|us|nz|ie|nl|se|no|fi|dk|be|ch|at|pl|cz|hu|ro|bg|hr|sk|si|ee|lv|lt|pt|es|it|gr|tr|il|za|mx|ar|cl|pe|ve|ng|ke|eg|ph|id|my|sg|th|vn|pk|bd|lk|mm|kh|la|mn)\b(?=[\/\?\s"'>)\],]|$)/gi,
},
]; ];
/** /**
@@ -80,10 +108,7 @@ export function filterBody(body: string): ContentFilterOutcome {
* Runs subject and description through the content filter. * Runs subject and description through the content filter.
* *
* - Profanity in either field → rejected, ticket is not saved * - Profanity in either field → rejected, ticket is not saved
* - PII in description → silently redacted before saving * - PII / URLs in either field → silently redacted before saving
*
* Subject is not PII-redacted because it is short, user-facing in table views,
* and unlikely to contain structured PII like card numbers or SSNs.
*/ */
export function filterContent( export function filterContent(
subject: string, subject: string,
@@ -109,7 +134,7 @@ export function filterContent(
return { return {
ok: true, ok: true,
subject, subject: redactPII(subject),
description: description ? redactPII(description) : description, description: description ? redactPII(description) : description,
}; };
} }