update:filters
This commit is contained in:
@@ -32,6 +32,34 @@ const PII_PATTERNS: Array<{ pattern: RegExp; label: string }> = [
|
||||
label: "[REDACTED_SSN]",
|
||||
pattern: /\b\d{3}[ -.]\d{2}[ -.]\d{4}\b/g,
|
||||
},
|
||||
{
|
||||
// Explicit URLs: http://, https://, ftp://, // protocol-relative
|
||||
// Captures the full URL including path, query string, and fragment.
|
||||
label: "[REDACTED_URL]",
|
||||
pattern: /(?:https?:\/\/|ftp:\/\/|\/\/)[\w\-._~:/?#[\]@!$&'()*+,;=%]+/gi,
|
||||
},
|
||||
{
|
||||
// Domains — both literal and separator-obfuscated variants.
|
||||
//
|
||||
// The separator group (SEP) matches any of:
|
||||
// - a literal dot: \.
|
||||
// - a comma (with optional spaces): \s*,\s*
|
||||
// - the word "dot" in plain/bracket/paren form: \s*(?:dot|\[dot\]|\(dot\))\s*
|
||||
// - Unicode dot substitutes: · (U+00B7) 。(U+3002) 。 (U+FF61)
|
||||
//
|
||||
// This covers: example.com example,com example dot com
|
||||
// example (dot) com example·com example。com
|
||||
//
|
||||
// A recognised TLD must follow the final separator. The TLD must then be
|
||||
// at a word boundary or followed by / ? whitespace or end-of-string so
|
||||
// that file extensions like .json / .ts and version strings like 1.2.3
|
||||
// are not caught.
|
||||
//
|
||||
// The negative lookbehind on [REDACTED_ prevents double-processing tokens
|
||||
// that were already replaced by an earlier pattern.
|
||||
label: "[REDACTED_URL]",
|
||||
pattern: /(?<!\[REDACTED_)\b(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?(?:\.|,|\s*,\s*|\s*(?:dot|\[dot\]|\(dot\))\s*|[·。。]))+(?:com|net|org|io|dev|app|co|uk|ca|au|de|fr|jp|cn|ru|br|in|gov|edu|mil|int|info|biz|me|tv|fm|gg|ai|xyz|online|site|tech|store|shop|cloud|ly|gl|to|link|click|live|pro|cc|us|nz|ie|nl|se|no|fi|dk|be|ch|at|pl|cz|hu|ro|bg|hr|sk|si|ee|lv|lt|pt|es|it|gr|tr|il|za|mx|ar|cl|pe|ve|ng|ke|eg|ph|id|my|sg|th|vn|pk|bd|lk|mm|kh|la|mn)\b(?=[\/\?\s"'>)\],]|$)/gi,
|
||||
},
|
||||
];
|
||||
|
||||
/**
|
||||
@@ -80,10 +108,7 @@ export function filterBody(body: string): ContentFilterOutcome {
|
||||
* Runs subject and description through the content filter.
|
||||
*
|
||||
* - Profanity in either field → rejected, ticket is not saved
|
||||
* - PII in description → silently redacted before saving
|
||||
*
|
||||
* Subject is not PII-redacted because it is short, user-facing in table views,
|
||||
* and unlikely to contain structured PII like card numbers or SSNs.
|
||||
* - PII / URLs in either field → silently redacted before saving
|
||||
*/
|
||||
export function filterContent(
|
||||
subject: string,
|
||||
@@ -109,7 +134,7 @@ export function filterContent(
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
subject,
|
||||
subject: redactPII(subject),
|
||||
description: description ? redactPII(description) : description,
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user