Coverage for src / mcp_server_langgraph / execution / docker_sandbox.py: 40%

141 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 00:43 +0000

1""" 

2Docker-based sandbox for code execution 

3 

4Provides secure isolated Python code execution using Docker containers. 

5Supports resource limits, network isolation, and automatic cleanup. 

6""" 

7 

8from __future__ import annotations 

9 

10import logging 

11import time 

12from typing import TYPE_CHECKING 

13 

14# Docker is an optional dependency - gracefully handle missing docker package 

15try: 

16 import docker 

17 from docker.errors import ImageNotFound, NotFound 

18 from docker.models.containers import Container 

19 

20 DOCKER_AVAILABLE = True 

21except ImportError: 

22 DOCKER_AVAILABLE = False 

23 if TYPE_CHECKING: 

24 import docker 

25 from docker.errors import ImageNotFound, NotFound 

26 from docker.models.containers import Container 

27 

28from mcp_server_langgraph.execution.resource_limits import ResourceLimits 

29from mcp_server_langgraph.execution.sandbox import ExecutionResult, Sandbox, SandboxError 

30import contextlib 

31 

32logger = logging.getLogger(__name__) 

33 

34 

35class DockerSandbox(Sandbox): 

36 """ 

37 Docker-based sandbox for executing Python code in isolated containers. 

38 

39 Features: 

40 - Ephemeral containers (created and destroyed per execution) 

41 - Resource limits (CPU, memory, timeout) 

42 - Network isolation (none/allowlist/unrestricted) 

43 - Read-only root filesystem 

44 - No privilege escalation 

45 - Automatic cleanup 

46 

47 Example: 

48 >>> limits = ResourceLimits(timeout_seconds=30, memory_limit_mb=512) 

49 >>> sandbox = DockerSandbox(limits=limits) 

50 >>> result = sandbox.execute("print('Hello')") 

51 >>> assert result.success 

52 >>> assert "Hello" in result.stdout 

53 """ 

54 

55 def __init__( 

56 self, 

57 limits: ResourceLimits, 

58 image: str = "python:3.12-slim", 

59 socket_path: str = "/var/run/docker.sock", 

60 ): 

61 """ 

62 Initialize Docker sandbox. 

63 

64 Args: 

65 limits: Resource limits to enforce 

66 image: Docker image to use (default: python:3.12-slim) 

67 socket_path: Path to Docker socket 

68 

69 Raises: 

70 SandboxError: If Docker is not available 

71 """ 

72 super().__init__(limits) 

73 

74 # Check if docker package is available 

75 if not DOCKER_AVAILABLE: 75 ↛ 76line 75 didn't jump to line 76 because the condition on line 75 was never true

76 msg = "Docker package not installed. Install it with: pip install docker or uv add docker" 

77 raise SandboxError(msg) 

78 

79 self.image = image 

80 self.socket_path = socket_path 

81 

82 try: 

83 self.client = docker.DockerClient(base_url=f"unix://{socket_path}") # type: ignore[attr-defined] 

84 # Test connection 

85 self.client.ping() 

86 except Exception as e: 

87 msg = f"Docker not available: {e}" 

88 raise SandboxError(msg) 

89 

90 # Ensure image exists 

91 self._ensure_image() 

92 

93 def _ensure_image(self) -> None: 

94 """Ensure Docker image is available, pull if necessary""" 

95 try: 

96 self.client.images.get(self.image) 

97 logger.debug(f"Docker image {self.image} already available") 

98 except ImageNotFound: 

99 logger.info(f"Pulling Docker image {self.image}...") 

100 try: 

101 self.client.images.pull(self.image) 

102 logger.info(f"Successfully pulled {self.image}") 

103 except Exception as e: 

104 msg = f"Failed to pull Docker image {self.image}: {e}" 

105 raise SandboxError(msg) 

106 

107 def execute(self, code: str) -> ExecutionResult: 

108 """ 

109 Execute Python code in a Docker container. 

110 

111 Args: 

112 code: Python source code to execute 

113 

114 Returns: 

115 ExecutionResult with execution status and outputs 

116 

117 Raises: 

118 SandboxError: If container creation or execution fails 

119 """ 

120 if not code or not code.strip(): 

121 return self._create_failure_result( 

122 stdout="", 

123 stderr="Error: Empty code provided", 

124 exit_code=1, 

125 execution_time=0.0, 

126 error_message="Empty code provided", 

127 ) 

128 

129 container = None 

130 start_time = time.time() 

131 

132 try: 

133 # Create container with resource limits 

134 container = self._create_container(code) 

135 

136 # Start container 

137 container.start() 

138 

139 # Wait for completion with timeout 

140 timed_out = False 

141 try: 

142 exit_code = container.wait(timeout=self.limits.timeout_seconds) 

143 if isinstance(exit_code, dict): 

144 exit_code = exit_code.get("StatusCode", 1) 

145 except Exception: 

146 # Timeout occurred 

147 timed_out = True 

148 exit_code = 124 # Standard timeout exit code 

149 

150 # Stop container 

151 try: 

152 container.stop(timeout=1) 

153 except Exception: 

154 container.kill() 

155 

156 execution_time = self._measure_time(start_time) 

157 

158 # Get logs 

159 try: 

160 logs = container.logs(stdout=True, stderr=True).decode("utf-8") 

161 # Docker combines stdout and stderr 

162 # Separate them based on content 

163 if exit_code != 0 and not timed_out: 

164 # If there was an error, look for Python error output 

165 if "Traceback" in logs or "Error" in logs or "SyntaxError" in logs: 

166 # Put error output in stderr 

167 stderr = logs 

168 stdout = "" 

169 else: 

170 # No traceback, might be a simple error 

171 stderr = logs 

172 stdout = "" 

173 else: 

174 # Success - everything is stdout 

175 stdout = logs 

176 stderr = "" 

177 except Exception as e: 

178 stdout = "" 

179 stderr = f"Error retrieving logs: {e}" 

180 

181 # Get memory usage (if available) 

182 memory_used_mb = None 

183 try: 

184 stats = container.stats(stream=False) 

185 if "memory_stats" in stats and "max_usage" in stats["memory_stats"]: 

186 memory_used_mb = stats["memory_stats"]["max_usage"] / (1024 * 1024) 

187 except Exception: 

188 pass # Memory stats not critical 

189 

190 # Cleanup container 

191 self._cleanup_container(container) 

192 

193 # Create result 

194 if timed_out: 

195 return self._create_failure_result( 

196 stdout=stdout, 

197 stderr=stderr or f"Execution timed out after {self.limits.timeout_seconds}s", 

198 exit_code=exit_code, 

199 execution_time=execution_time, 

200 timed_out=True, 

201 error_message=f"Timeout after {self.limits.timeout_seconds}s", 

202 ) 

203 elif exit_code == 0: 

204 return self._create_success_result( 

205 stdout=stdout, 

206 stderr=stderr, 

207 execution_time=execution_time, 

208 memory_used_mb=memory_used_mb, 

209 ) 

210 else: 

211 return self._create_failure_result( 

212 stdout=stdout, 

213 stderr=stderr, 

214 exit_code=exit_code, 

215 execution_time=execution_time, 

216 error_message=f"Process exited with code {exit_code}", 

217 ) 

218 

219 except Exception as e: 

220 execution_time = self._measure_time(start_time) 

221 

222 # Cleanup on error 

223 if container: 

224 self._cleanup_container(container) 

225 

226 logger.error(f"Docker execution failed: {e}", exc_info=True) 

227 msg = f"Docker execution failed: {e}" 

228 raise SandboxError(msg) 

229 

230 def _create_container(self, code: str) -> Container: 

231 """ 

232 Create Docker container with resource limits and security settings. 

233 

234 Args: 

235 code: Python code to execute 

236 

237 Returns: 

238 Docker container (not started) 

239 

240 Raises: 

241 SandboxError: If container creation fails 

242 """ 

243 try: 

244 # Configure resource limits 

245 mem_limit = f"{self.limits.memory_limit_mb}m" 

246 nano_cpus = int(self.limits.cpu_quota * 1_000_000_000) # Convert to nano CPUs 

247 

248 # Configure network 

249 network_mode = self._get_network_mode() 

250 

251 # Create container 

252 # Note: We don't use auto_remove=True because we need to get logs after execution 

253 container = self.client.containers.create( 

254 image=self.image, 

255 command=["python", "-c", code], 

256 detach=True, 

257 user="nobody", # Run as non-root user (security best practice) 

258 mem_limit=mem_limit, 

259 memswap_limit=mem_limit, # Disable swap 

260 nano_cpus=nano_cpus, 

261 network_mode=network_mode, 

262 network_disabled=(network_mode == "none"), 

263 read_only=True, # Read-only root FS for security (OpenAI Codex Finding #4) 

264 security_opt=["no-new-privileges"], # Prevent privilege escalation 

265 cap_drop=["ALL"], # Drop all capabilities 

266 pids_limit=self.limits.max_processes, 

267 # Tmpfs for writable directories (ephemeral, in-memory) 

268 # Python needs /tmp and /var/tmp for tempfile module 

269 tmpfs={ # nosec B108 - tmpfs is ephemeral in-memory, not persistent storage 

270 "/tmp": f"size={self.limits.disk_quota_mb}m,uid=65534,gid=65534", # nosec B108 - nobody user 

271 "/var/tmp": f"size={self.limits.disk_quota_mb}m,uid=65534,gid=65534", # nosec B108 - nobody user 

272 }, 

273 ) 

274 

275 return container 

276 

277 except Exception as e: 

278 logger.error(f"Failed to create Docker container: {e}", exc_info=True) 

279 msg = f"Failed to create Docker container: {e}" 

280 raise SandboxError(msg) 

281 

282 def _get_network_mode(self) -> str: 

283 """ 

284 Get Docker network mode from resource limits. 

285 

286 Returns: 

287 Docker network mode string 

288 """ 

289 if self.limits.network_mode == "none": 

290 return "none" 

291 elif self.limits.network_mode == "unrestricted": 

292 return "bridge" # Default Docker network 

293 elif self.limits.network_mode == "allowlist": 293 ↛ exitline 293 didn't return from function '_get_network_mode' because the condition on line 293 was always true

294 # SECURITY FIX (OpenAI Codex Finding #3): Network allowlist mode is NOT fully implemented! 

295 # FAIL CLOSED: Always return "none" until proper allowlist filtering is implemented. 

296 # 

297 # For proper security implementation, this would require: 

298 # 1. Docker network policies or firewall rules (iptables/nftables) 

299 # 2. DNS filtering to resolve allowed domains to IPs 

300 # 3. Egress filtering to block unlisted destinations 

301 # 

302 # Until implemented, we fail closed (deny all network access) rather than 

303 # fall back to unrestricted bridge network, which would create a false sense of security. 

304 logger.warning( 

305 "Network allowlist mode requested but not implemented. " 

306 "Failing closed with network_mode='none' (no network access). " 

307 f"Requested allowlist domains: {self.limits.allowed_domains}. " 

308 "To enable network access, use network_mode='unrestricted' explicitly." 

309 ) 

310 return "none" # Fail closed - deny all network access for security 

311 

312 def _cleanup_container(self, container: Container) -> None: 

313 """ 

314 Clean up Docker container. 

315 

316 Args: 

317 container: Container to remove 

318 """ 

319 try: 

320 # Stop container if still running 

321 try: 

322 container.reload() # Refresh container state 

323 if container.status == "running": 323 ↛ 324line 323 didn't jump to line 324 because the condition on line 323 was never true

324 container.stop(timeout=1) 

325 except NotFound: 

326 pass # Already removed 

327 except Exception: 

328 # Force kill if stop fails 

329 with contextlib.suppress(Exception): 

330 container.kill() 

331 

332 # Remove container 

333 try: 

334 container.remove(force=True) 

335 logger.debug(f"Removed container {container.id[:12]}") 

336 except NotFound: 

337 pass # Already removed 

338 except Exception as e: 

339 logger.warning(f"Failed to remove container {container.id[:12]}: {e}") 

340 

341 except Exception as e: 

342 logger.warning(f"Error during container cleanup: {e}") 

343 

344 def __del__(self) -> None: 

345 """Cleanup on garbage collection""" 

346 try: 

347 if hasattr(self, "client"): 

348 self.client.close() 

349 except Exception: 

350 pass