Coverage for src / mcp_server_langgraph / core / security.py: 100%
26 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 00:43 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 00:43 +0000
1"""Security utilities for preventing common web vulnerabilities.
3This module provides functions to sanitize sensitive data and prevent:
4- CWE-200/CWE-532: Information Exposure Through Log Files
5- CWE-113: HTTP Response Splitting
6- CWE-20: Improper Input Validation
8All functions are designed to be defense-in-depth security controls.
9"""
11from typing import Any
14# Comprehensive set of sensitive field names that should be redacted in logs
15# Organized by category for maintainability
16SENSITIVE_FIELDS: frozenset[str] = frozenset(
17 {
18 # Authentication tokens
19 "token",
20 "access_token",
21 "refresh_token",
22 "bearer_token",
23 "auth_token",
24 "jwt",
25 # Session identifiers
26 "session_id",
27 "session_token",
28 # Credentials and secrets
29 "password",
30 "secret",
31 "secret_key",
32 "client_secret",
33 "api_key",
34 "api_key_value",
35 "private_key",
36 "credentials",
37 "credential",
38 # User PII (GDPR/CCPA/HIPAA)
39 "user_id",
40 "username",
41 # HTTP headers containing credentials
42 "authorization",
43 }
44)
46# Fields that should be truncated rather than redacted
47TRUNCATABLE_FIELDS: frozenset[str] = frozenset(
48 {
49 "message",
50 "query",
51 }
52)
55def sanitize_for_logging(arguments: dict[str, Any], max_length: int = 500) -> dict[str, Any]:
56 """Sanitize sensitive data from arguments before logging.
58 Prevents CWE-200 (Information Exposure) and CWE-532 (Information Exposure Through Log Files)
59 by redacting authentication tokens, session identifiers, credentials, and PII,
60 plus truncating large text fields.
62 This function creates a shallow copy of the input dict and applies the following
63 transformations:
64 - Redacts all fields in SENSITIVE_FIELDS to prevent credential/PII exposure
65 - Truncates fields in TRUNCATABLE_FIELDS that exceed max_length
66 - Preserves all other fields as-is (shallow copy)
68 Args:
69 arguments: Dictionary of tool call arguments or log context
70 max_length: Maximum length for text fields before truncation (default: 500)
72 Returns:
73 Sanitized copy of arguments safe for logging
75 Example:
76 >>> args = {"token": "secret_jwt", "api_key": "sk_live_123", "password": "pass123"}
77 >>> sanitized = sanitize_for_logging(args)
78 >>> sanitized
79 {"token": "[REDACTED]", "api_key": "[REDACTED]", "password": "[REDACTED]"}
80 >>> args["token"] # Original unchanged
81 "secret_jwt"
83 Security Context:
84 - JWT tokens in logs can be extracted and replayed to impersonate users
85 - Session IDs in logs enable session hijacking if logs are compromised
86 - API keys allow unauthorized access to external services
87 - Passwords enable account compromise
88 - Client secrets compromise OAuth flows
89 - Usernames/user IDs are PII subject to GDPR/CCPA/HIPAA protection
90 - Large prompts may contain sensitive user data (PII, credentials, secrets)
91 - Centralized logging systems may have broader access than application logs
92 - Compliance frameworks (SOC2, PCI-DSS, HIPAA) require credential protection
94 Thread Safety:
95 This function is thread-safe as it creates a new dict (shallow copy).
96 """
97 # Create shallow copy to avoid modifying original
98 sanitized = arguments.copy()
100 # Redact all sensitive fields
101 for field in SENSITIVE_FIELDS:
102 if field in sanitized and sanitized[field] is not None:
103 sanitized[field] = "[REDACTED]"
105 # Truncate long text fields
106 for field in TRUNCATABLE_FIELDS:
107 if field in sanitized and isinstance(sanitized[field], str) and len(sanitized[field]) > max_length:
108 sanitized[field] = sanitized[field][:max_length] + "..."
110 return sanitized
113def sanitize_header_value(value: str, max_length: int = 100) -> str:
114 """Sanitize user-controlled strings for safe use in HTTP headers.
116 Prevents CWE-113 (HTTP Response Splitting) by removing control characters
117 that could be used to inject additional headers or alter HTTP responses.
119 This function is specifically designed for values used in HTTP headers
120 like Content-Disposition filenames, where user-controlled data (e.g.,
121 usernames from JWT tokens) may be incorporated.
123 Args:
124 value: User-controlled string to sanitize (e.g., username)
125 max_length: Maximum allowed length (default: 100)
127 Returns:
128 Sanitized string safe for use in HTTP header values
130 Example:
131 >>> username = "alice\\r\\nX-Evil: injected"
132 >>> safe = sanitize_header_value(username)
133 >>> safe
134 "aliceX-Evil: injected" # CRLF removed, attack neutralized
136 Security Context:
137 HTTP Response Splitting Attack:
138 1. Attacker registers username: "alice\\r\\nSet-Cookie: session=hacked"
139 2. Server creates header: Content-Disposition: attachment; filename="user_data_alice
140 Set-Cookie: session=hacked_20250103.json"
141 3. Browser interprets this as TWO headers (response split)
142 4. Attacker successfully injects malicious Set-Cookie header
144 This function prevents the attack by removing CR/LF characters.
146 Related Vulnerabilities:
147 - CWE-113: Improper Neutralization of CRLF Sequences in HTTP Headers
148 - SANS Top 25: Improper Input Validation
149 - OWASP: HTTP Response Splitting
151 Thread Safety:
152 This function is thread-safe (no shared state).
153 """
154 if not value:
155 return ""
157 # Remove all control characters that could split HTTP headers
158 # CR (\\r), LF (\\n), NULL (\\x00), TAB (\\t)
159 sanitized = value
161 # Remove carriage return and line feed (primary attack vectors)
162 sanitized = sanitized.replace("\r", "")
163 sanitized = sanitized.replace("\n", "")
165 # Remove null bytes
166 sanitized = sanitized.replace("\x00", "")
168 # Replace tabs with spaces (preserve readability)
169 sanitized = sanitized.replace("\t", " ")
171 # Remove path traversal sequences for filename safety
172 sanitized = sanitized.replace("../", "")
173 sanitized = sanitized.replace("..\\", "")
175 # Enforce maximum length to prevent DoS
176 if len(sanitized) > max_length:
177 sanitized = sanitized[:max_length]
179 # Remove leading/trailing whitespace
180 sanitized = sanitized.strip()
182 return sanitized