Coverage for src/mcp_server_langgraph/core/security.py: 100%

1"""Security utilities for preventing common web vulnerabilities.

3This module provides functions to sanitize sensitive data and prevent:

4- CWE-200/CWE-532: Information Exposure Through Log Files

5- CWE-113: HTTP Response Splitting

6- CWE-20: Improper Input Validation

8All functions are designed to be defense-in-depth security controls.

9"""

11from typing import Any

14# Comprehensive set of sensitive field names that should be redacted in logs

15# Organized by category for maintainability

16SENSITIVE_FIELDS: frozenset[str] = frozenset(

17 {

18 # Authentication tokens

19 "token",

20 "access_token",

21 "refresh_token",

22 "bearer_token",

23 "auth_token",

24 "jwt",

25 # Session identifiers

26 "session_id",

27 "session_token",

28 # Credentials and secrets

29 "password",

30 "secret",

31 "secret_key",

32 "client_secret",

33 "api_key",

34 "api_key_value",

35 "private_key",

36 "credentials",

37 "credential",

38 # User PII (GDPR/CCPA/HIPAA)

39 "user_id",

40 "username",

41 # HTTP headers containing credentials

42 "authorization",

43 }

44)

46# Fields that should be truncated rather than redacted

47TRUNCATABLE_FIELDS: frozenset[str] = frozenset(

48 {

49 "message",

50 "query",

51 }

52)

55def sanitize_for_logging(arguments: dict[str, Any], max_length: int = 500) -> dict[str, Any]:

56 """Sanitize sensitive data from arguments before logging.

58 Prevents CWE-200 (Information Exposure) and CWE-532 (Information Exposure Through Log Files)

59 by redacting authentication tokens, session identifiers, credentials, and PII,

60 plus truncating large text fields.

62 This function creates a shallow copy of the input dict and applies the following

63 transformations:

64 - Redacts all fields in SENSITIVE_FIELDS to prevent credential/PII exposure

65 - Truncates fields in TRUNCATABLE_FIELDS that exceed max_length

66 - Preserves all other fields as-is (shallow copy)

68 Args:

69 arguments: Dictionary of tool call arguments or log context

70 max_length: Maximum length for text fields before truncation (default: 500)

72 Returns:

73 Sanitized copy of arguments safe for logging

75 Example:

76 >>> args = {"token": "secret_jwt", "api_key": "sk_live_123", "password": "pass123"}

77 >>> sanitized = sanitize_for_logging(args)

78 >>> sanitized

79 {"token": "[REDACTED]", "api_key": "[REDACTED]", "password": "[REDACTED]"}

80 >>> args["token"] # Original unchanged

81 "secret_jwt"

83 Security Context:

84 - JWT tokens in logs can be extracted and replayed to impersonate users

85 - Session IDs in logs enable session hijacking if logs are compromised

86 - API keys allow unauthorized access to external services

87 - Passwords enable account compromise

88 - Client secrets compromise OAuth flows

89 - Usernames/user IDs are PII subject to GDPR/CCPA/HIPAA protection

90 - Large prompts may contain sensitive user data (PII, credentials, secrets)

91 - Centralized logging systems may have broader access than application logs

92 - Compliance frameworks (SOC2, PCI-DSS, HIPAA) require credential protection

94 Thread Safety:

95 This function is thread-safe as it creates a new dict (shallow copy).

96 """

97 # Create shallow copy to avoid modifying original

98 sanitized = arguments.copy()

100 # Redact all sensitive fields

101 for field in SENSITIVE_FIELDS:

102 if field in sanitized and sanitized[field] is not None:

103 sanitized[field] = "[REDACTED]"

104

105 # Truncate long text fields

106 for field in TRUNCATABLE_FIELDS:

107 if field in sanitized and isinstance(sanitized[field], str) and len(sanitized[field]) > max_length:

108 sanitized[field] = sanitized[field][:max_length] + "..."

109

110 return sanitized

111

112

113def sanitize_header_value(value: str, max_length: int = 100) -> str:

114 """Sanitize user-controlled strings for safe use in HTTP headers.

115

116 Prevents CWE-113 (HTTP Response Splitting) by removing control characters

117 that could be used to inject additional headers or alter HTTP responses.

118

119 This function is specifically designed for values used in HTTP headers

120 like Content-Disposition filenames, where user-controlled data (e.g.,

121 usernames from JWT tokens) may be incorporated.

122

123 Args:

124 value: User-controlled string to sanitize (e.g., username)

125 max_length: Maximum allowed length (default: 100)

126

127 Returns:

128 Sanitized string safe for use in HTTP header values

129

130 Example:

131 >>> username = "alice\\r\\nX-Evil: injected"

132 >>> safe = sanitize_header_value(username)

133 >>> safe

134 "aliceX-Evil: injected" # CRLF removed, attack neutralized

135

136 Security Context:

137 HTTP Response Splitting Attack:

138 1. Attacker registers username: "alice\\r\\nSet-Cookie: session=hacked"

139 2. Server creates header: Content-Disposition: attachment; filename="user_data_alice

140 Set-Cookie: session=hacked_20250103.json"

141 3. Browser interprets this as TWO headers (response split)

142 4. Attacker successfully injects malicious Set-Cookie header

143

144 This function prevents the attack by removing CR/LF characters.

145

146 Related Vulnerabilities:

147 - CWE-113: Improper Neutralization of CRLF Sequences in HTTP Headers

148 - SANS Top 25: Improper Input Validation

149 - OWASP: HTTP Response Splitting

150

151 Thread Safety:

152 This function is thread-safe (no shared state).

153 """

154 if not value:

155 return ""

156

157 # Remove all control characters that could split HTTP headers

158 # CR (\\r), LF (\\n), NULL (\\x00), TAB (\\t)

159 sanitized = value

160

161 # Remove carriage return and line feed (primary attack vectors)

162 sanitized = sanitized.replace("\r", "")

163 sanitized = sanitized.replace("\n", "")

164

165 # Remove null bytes

166 sanitized = sanitized.replace("\x00", "")

167

168 # Replace tabs with spaces (preserve readability)

169 sanitized = sanitized.replace("\t", " ")

170

171 # Remove path traversal sequences for filename safety

172 sanitized = sanitized.replace("../", "")

173 sanitized = sanitized.replace("..\\", "")

174

175 # Enforce maximum length to prevent DoS

176 if len(sanitized) > max_length:

177 sanitized = sanitized[:max_length]

178

179 # Remove leading/trailing whitespace

180 sanitized = sanitized.strip()

181

182 return sanitized

Coverage for src / mcp_server_langgraph / core / security.py: 100%

26 statements