Coverage for src / mcp_server_langgraph / tools / code_execution_tools.py: 65%

68 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 00:43 +0000

1""" 

2Code execution tools for MCP server 

3 

4Provides secure Python code execution in sandboxed environments. 

5Integrates CodeValidator and Sandbox backends (Docker, Kubernetes). 

6""" 

7 

8import logging 

9 

10from langchain_core.tools import tool 

11from pydantic import BaseModel, Field 

12 

13from mcp_server_langgraph.core.config import settings 

14from mcp_server_langgraph.execution import ( 

15 CodeValidator, 

16 DockerSandbox, 

17 ExecutionResult, 

18 KubernetesSandbox, 

19 ResourceLimits, 

20 Sandbox, 

21 SandboxError, 

22) 

23 

24logger = logging.getLogger(__name__) 

25 

26# Maximum output size to prevent memory exhaustion 

27MAX_OUTPUT_SIZE = 10000 # 10KB 

28 

29 

30class ExecutePythonInput(BaseModel): 

31 """Input schema for execute_python tool""" 

32 

33 code: str = Field(description="Python code to execute in sandboxed environment") 

34 timeout: int | None = Field(default=None, description="Optional timeout in seconds (overrides default)") 

35 

36 

37def _is_execution_enabled() -> bool: 

38 """Check if code execution is enabled""" 

39 return settings.enable_code_execution 

40 

41 

42def _get_sandbox() -> Sandbox: 

43 """ 

44 Create sandbox instance based on configuration. 

45 

46 Returns: 

47 Sandbox instance (Docker or Kubernetes) 

48 

49 Raises: 

50 SandboxError: If sandbox creation fails 

51 """ 

52 # Create resource limits from settings 

53 limits = ResourceLimits( 

54 timeout_seconds=settings.code_execution_timeout, 

55 memory_limit_mb=settings.code_execution_memory_limit_mb, 

56 cpu_quota=settings.code_execution_cpu_quota, 

57 disk_quota_mb=settings.code_execution_disk_quota_mb, 

58 max_processes=settings.code_execution_max_processes, 

59 network_mode=settings.code_execution_network_mode, # type: ignore 

60 allowed_domains=tuple(settings.code_execution_allowed_domains), 

61 ) 

62 

63 # Select backend 

64 backend = settings.code_execution_backend 

65 

66 if backend == "docker-engine": 

67 return DockerSandbox( 

68 limits=limits, 

69 image=settings.code_execution_docker_image, 

70 socket_path=settings.code_execution_docker_socket, 

71 ) 

72 elif backend == "kubernetes": 

73 return KubernetesSandbox( 

74 limits=limits, 

75 namespace=settings.code_execution_k8s_namespace, 

76 image=settings.code_execution_docker_image, # Same image for both 

77 job_ttl=settings.code_execution_k8s_job_ttl, 

78 ) 

79 else: 

80 msg = f"Unsupported backend: {backend}" 

81 raise SandboxError(msg) 

82 

83 

84def _truncate_output(text: str, max_size: int = MAX_OUTPUT_SIZE) -> str: 

85 """ 

86 Truncate output if it exceeds maximum size. 

87 

88 Args: 

89 text: Text to truncate 

90 max_size: Maximum size in characters 

91 

92 Returns: 

93 Truncated text with indicator if truncated 

94 """ 

95 if len(text) <= max_size: 

96 return text 

97 

98 truncated = text[:max_size] 

99 return f"{truncated}\n\n... (output truncated, {len(text)} total characters)" 

100 

101 

102@tool 

103def execute_python(code: str, timeout: int | None = None) -> str: 

104 """ 

105 Execute Python code in a secure sandboxed environment. 

106 

107 This tool provides isolated code execution with resource limits and security controls. 

108 Code is validated before execution to prevent dangerous operations. 

109 

110 Security Features: 

111 - Import whitelist (only approved modules allowed) 

112 - No eval/exec/compile 

113 - No file system access 

114 - Configurable network isolation 

115 - Resource limits (CPU, memory, timeout) 

116 - Automatic cleanup 

117 

118 Args: 

119 code: Python code to execute 

120 timeout: Optional timeout in seconds (overrides default) 

121 

122 Returns: 

123 Execution result with output or error message 

124 

125 Example: 

126 >>> execute_python.invoke({"code": "print(2 + 2)"}) 

127 "Execution successful:\\nOutput:\\n4" 

128 """ 

129 # Note: Code execution enablement is controlled at the MCP server level. 

130 # The execute_python tool is only added to the tool list when settings.enable_code_execution is True. 

131 # This provides access control without needing runtime checks here. 

132 # Previous _is_execution_enabled() check removed due to settings caching issues in tests. 

133 

134 # Validate input 

135 if not code or not code.strip(): 

136 return "Error: Empty code provided" 

137 

138 try: 

139 # Validate code 

140 validator = CodeValidator(allowed_imports=settings.code_execution_allowed_imports) 

141 validation_result = validator.validate(code) 

142 

143 if not validation_result.is_valid: 

144 errors = "\n- ".join(validation_result.errors) 

145 return f"Code validation failed:\n- {errors}" 

146 

147 # Log warnings if any 

148 if validation_result.warnings: 148 ↛ 149line 148 didn't jump to line 149 because the condition on line 148 was never true

149 warnings = "\n- ".join(validation_result.warnings) 

150 logger.warning(f"Code validation warnings:\n- {warnings}") 

151 

152 # Get sandbox 

153 sandbox = _get_sandbox() 

154 

155 # Execute code 

156 result: ExecutionResult = sandbox.execute(code) 

157 

158 # Format result 

159 if result.success: 

160 output = _truncate_output(result.stdout) 

161 exec_time = f"{result.execution_time:.2f}s" 

162 

163 response = f"Execution successful (took {exec_time}):\n" 

164 if output: 164 ↛ 167line 164 didn't jump to line 167 because the condition on line 164 was always true

165 response += f"\nOutput:\n{output}" 

166 else: 

167 response += "\n(no output)" 

168 

169 if result.memory_used_mb: 169 ↛ 170line 169 didn't jump to line 170 because the condition on line 169 was never true

170 response += f"\n\nMemory used: {result.memory_used_mb:.1f}MB" 

171 

172 return response 

173 

174 else: 

175 # Execution failed 

176 stderr = _truncate_output(result.stderr) 

177 exec_time = f"{result.execution_time:.2f}s" 

178 

179 if result.timed_out: 

180 return f"Execution timed out after {exec_time}:\n{stderr}" 

181 else: 

182 response = f"Execution failed (exit code {result.exit_code}, took {exec_time}):\n" 

183 if stderr: 183 ↛ 185line 183 didn't jump to line 185 because the condition on line 183 was always true

184 response += f"\nError:\n{stderr}" 

185 elif result.error_message: 

186 response += f"\nError: {result.error_message}" 

187 else: 

188 response += "\n(no error details available)" 

189 

190 return response 

191 

192 except SandboxError as e: 

193 logger.error(f"Sandbox error: {e}", exc_info=True) 

194 return f"Sandbox error: {e}" 

195 except Exception as e: 

196 logger.error(f"Unexpected error during code execution: {e}", exc_info=True) 

197 return f"Unexpected error: {e}"