Coverage for src / mcp_server_langgraph / core / security.py: 100%

26 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 00:43 +0000

1"""Security utilities for preventing common web vulnerabilities. 

2 

3This module provides functions to sanitize sensitive data and prevent: 

4- CWE-200/CWE-532: Information Exposure Through Log Files 

5- CWE-113: HTTP Response Splitting 

6- CWE-20: Improper Input Validation 

7 

8All functions are designed to be defense-in-depth security controls. 

9""" 

10 

11from typing import Any 

12 

13 

14# Comprehensive set of sensitive field names that should be redacted in logs 

15# Organized by category for maintainability 

16SENSITIVE_FIELDS: frozenset[str] = frozenset( 

17 { 

18 # Authentication tokens 

19 "token", 

20 "access_token", 

21 "refresh_token", 

22 "bearer_token", 

23 "auth_token", 

24 "jwt", 

25 # Session identifiers 

26 "session_id", 

27 "session_token", 

28 # Credentials and secrets 

29 "password", 

30 "secret", 

31 "secret_key", 

32 "client_secret", 

33 "api_key", 

34 "api_key_value", 

35 "private_key", 

36 "credentials", 

37 "credential", 

38 # User PII (GDPR/CCPA/HIPAA) 

39 "user_id", 

40 "username", 

41 # HTTP headers containing credentials 

42 "authorization", 

43 } 

44) 

45 

46# Fields that should be truncated rather than redacted 

47TRUNCATABLE_FIELDS: frozenset[str] = frozenset( 

48 { 

49 "message", 

50 "query", 

51 } 

52) 

53 

54 

55def sanitize_for_logging(arguments: dict[str, Any], max_length: int = 500) -> dict[str, Any]: 

56 """Sanitize sensitive data from arguments before logging. 

57 

58 Prevents CWE-200 (Information Exposure) and CWE-532 (Information Exposure Through Log Files) 

59 by redacting authentication tokens, session identifiers, credentials, and PII, 

60 plus truncating large text fields. 

61 

62 This function creates a shallow copy of the input dict and applies the following 

63 transformations: 

64 - Redacts all fields in SENSITIVE_FIELDS to prevent credential/PII exposure 

65 - Truncates fields in TRUNCATABLE_FIELDS that exceed max_length 

66 - Preserves all other fields as-is (shallow copy) 

67 

68 Args: 

69 arguments: Dictionary of tool call arguments or log context 

70 max_length: Maximum length for text fields before truncation (default: 500) 

71 

72 Returns: 

73 Sanitized copy of arguments safe for logging 

74 

75 Example: 

76 >>> args = {"token": "secret_jwt", "api_key": "sk_live_123", "password": "pass123"} 

77 >>> sanitized = sanitize_for_logging(args) 

78 >>> sanitized 

79 {"token": "[REDACTED]", "api_key": "[REDACTED]", "password": "[REDACTED]"} 

80 >>> args["token"] # Original unchanged 

81 "secret_jwt" 

82 

83 Security Context: 

84 - JWT tokens in logs can be extracted and replayed to impersonate users 

85 - Session IDs in logs enable session hijacking if logs are compromised 

86 - API keys allow unauthorized access to external services 

87 - Passwords enable account compromise 

88 - Client secrets compromise OAuth flows 

89 - Usernames/user IDs are PII subject to GDPR/CCPA/HIPAA protection 

90 - Large prompts may contain sensitive user data (PII, credentials, secrets) 

91 - Centralized logging systems may have broader access than application logs 

92 - Compliance frameworks (SOC2, PCI-DSS, HIPAA) require credential protection 

93 

94 Thread Safety: 

95 This function is thread-safe as it creates a new dict (shallow copy). 

96 """ 

97 # Create shallow copy to avoid modifying original 

98 sanitized = arguments.copy() 

99 

100 # Redact all sensitive fields 

101 for field in SENSITIVE_FIELDS: 

102 if field in sanitized and sanitized[field] is not None: 

103 sanitized[field] = "[REDACTED]" 

104 

105 # Truncate long text fields 

106 for field in TRUNCATABLE_FIELDS: 

107 if field in sanitized and isinstance(sanitized[field], str) and len(sanitized[field]) > max_length: 

108 sanitized[field] = sanitized[field][:max_length] + "..." 

109 

110 return sanitized 

111 

112 

113def sanitize_header_value(value: str, max_length: int = 100) -> str: 

114 """Sanitize user-controlled strings for safe use in HTTP headers. 

115 

116 Prevents CWE-113 (HTTP Response Splitting) by removing control characters 

117 that could be used to inject additional headers or alter HTTP responses. 

118 

119 This function is specifically designed for values used in HTTP headers 

120 like Content-Disposition filenames, where user-controlled data (e.g., 

121 usernames from JWT tokens) may be incorporated. 

122 

123 Args: 

124 value: User-controlled string to sanitize (e.g., username) 

125 max_length: Maximum allowed length (default: 100) 

126 

127 Returns: 

128 Sanitized string safe for use in HTTP header values 

129 

130 Example: 

131 >>> username = "alice\\r\\nX-Evil: injected" 

132 >>> safe = sanitize_header_value(username) 

133 >>> safe 

134 "aliceX-Evil: injected" # CRLF removed, attack neutralized 

135 

136 Security Context: 

137 HTTP Response Splitting Attack: 

138 1. Attacker registers username: "alice\\r\\nSet-Cookie: session=hacked" 

139 2. Server creates header: Content-Disposition: attachment; filename="user_data_alice 

140 Set-Cookie: session=hacked_20250103.json" 

141 3. Browser interprets this as TWO headers (response split) 

142 4. Attacker successfully injects malicious Set-Cookie header 

143 

144 This function prevents the attack by removing CR/LF characters. 

145 

146 Related Vulnerabilities: 

147 - CWE-113: Improper Neutralization of CRLF Sequences in HTTP Headers 

148 - SANS Top 25: Improper Input Validation 

149 - OWASP: HTTP Response Splitting 

150 

151 Thread Safety: 

152 This function is thread-safe (no shared state). 

153 """ 

154 if not value: 

155 return "" 

156 

157 # Remove all control characters that could split HTTP headers 

158 # CR (\\r), LF (\\n), NULL (\\x00), TAB (\\t) 

159 sanitized = value 

160 

161 # Remove carriage return and line feed (primary attack vectors) 

162 sanitized = sanitized.replace("\r", "") 

163 sanitized = sanitized.replace("\n", "") 

164 

165 # Remove null bytes 

166 sanitized = sanitized.replace("\x00", "") 

167 

168 # Replace tabs with spaces (preserve readability) 

169 sanitized = sanitized.replace("\t", " ") 

170 

171 # Remove path traversal sequences for filename safety 

172 sanitized = sanitized.replace("../", "") 

173 sanitized = sanitized.replace("..\\", "") 

174 

175 # Enforce maximum length to prevent DoS 

176 if len(sanitized) > max_length: 

177 sanitized = sanitized[:max_length] 

178 

179 # Remove leading/trailing whitespace 

180 sanitized = sanitized.strip() 

181 

182 return sanitized