Coverage for src / mcp_server_langgraph / execution / docker_sandbox.py: 40%
141 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 00:43 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 00:43 +0000
1"""
2Docker-based sandbox for code execution
4Provides secure isolated Python code execution using Docker containers.
5Supports resource limits, network isolation, and automatic cleanup.
6"""
8from __future__ import annotations
10import logging
11import time
12from typing import TYPE_CHECKING
14# Docker is an optional dependency - gracefully handle missing docker package
15try:
16 import docker
17 from docker.errors import ImageNotFound, NotFound
18 from docker.models.containers import Container
20 DOCKER_AVAILABLE = True
21except ImportError:
22 DOCKER_AVAILABLE = False
23 if TYPE_CHECKING:
24 import docker
25 from docker.errors import ImageNotFound, NotFound
26 from docker.models.containers import Container
28from mcp_server_langgraph.execution.resource_limits import ResourceLimits
29from mcp_server_langgraph.execution.sandbox import ExecutionResult, Sandbox, SandboxError
30import contextlib
32logger = logging.getLogger(__name__)
35class DockerSandbox(Sandbox):
36 """
37 Docker-based sandbox for executing Python code in isolated containers.
39 Features:
40 - Ephemeral containers (created and destroyed per execution)
41 - Resource limits (CPU, memory, timeout)
42 - Network isolation (none/allowlist/unrestricted)
43 - Read-only root filesystem
44 - No privilege escalation
45 - Automatic cleanup
47 Example:
48 >>> limits = ResourceLimits(timeout_seconds=30, memory_limit_mb=512)
49 >>> sandbox = DockerSandbox(limits=limits)
50 >>> result = sandbox.execute("print('Hello')")
51 >>> assert result.success
52 >>> assert "Hello" in result.stdout
53 """
55 def __init__(
56 self,
57 limits: ResourceLimits,
58 image: str = "python:3.12-slim",
59 socket_path: str = "/var/run/docker.sock",
60 ):
61 """
62 Initialize Docker sandbox.
64 Args:
65 limits: Resource limits to enforce
66 image: Docker image to use (default: python:3.12-slim)
67 socket_path: Path to Docker socket
69 Raises:
70 SandboxError: If Docker is not available
71 """
72 super().__init__(limits)
74 # Check if docker package is available
75 if not DOCKER_AVAILABLE: 75 ↛ 76line 75 didn't jump to line 76 because the condition on line 75 was never true
76 msg = "Docker package not installed. Install it with: pip install docker or uv add docker"
77 raise SandboxError(msg)
79 self.image = image
80 self.socket_path = socket_path
82 try:
83 self.client = docker.DockerClient(base_url=f"unix://{socket_path}") # type: ignore[attr-defined]
84 # Test connection
85 self.client.ping()
86 except Exception as e:
87 msg = f"Docker not available: {e}"
88 raise SandboxError(msg)
90 # Ensure image exists
91 self._ensure_image()
93 def _ensure_image(self) -> None:
94 """Ensure Docker image is available, pull if necessary"""
95 try:
96 self.client.images.get(self.image)
97 logger.debug(f"Docker image {self.image} already available")
98 except ImageNotFound:
99 logger.info(f"Pulling Docker image {self.image}...")
100 try:
101 self.client.images.pull(self.image)
102 logger.info(f"Successfully pulled {self.image}")
103 except Exception as e:
104 msg = f"Failed to pull Docker image {self.image}: {e}"
105 raise SandboxError(msg)
107 def execute(self, code: str) -> ExecutionResult:
108 """
109 Execute Python code in a Docker container.
111 Args:
112 code: Python source code to execute
114 Returns:
115 ExecutionResult with execution status and outputs
117 Raises:
118 SandboxError: If container creation or execution fails
119 """
120 if not code or not code.strip():
121 return self._create_failure_result(
122 stdout="",
123 stderr="Error: Empty code provided",
124 exit_code=1,
125 execution_time=0.0,
126 error_message="Empty code provided",
127 )
129 container = None
130 start_time = time.time()
132 try:
133 # Create container with resource limits
134 container = self._create_container(code)
136 # Start container
137 container.start()
139 # Wait for completion with timeout
140 timed_out = False
141 try:
142 exit_code = container.wait(timeout=self.limits.timeout_seconds)
143 if isinstance(exit_code, dict):
144 exit_code = exit_code.get("StatusCode", 1)
145 except Exception:
146 # Timeout occurred
147 timed_out = True
148 exit_code = 124 # Standard timeout exit code
150 # Stop container
151 try:
152 container.stop(timeout=1)
153 except Exception:
154 container.kill()
156 execution_time = self._measure_time(start_time)
158 # Get logs
159 try:
160 logs = container.logs(stdout=True, stderr=True).decode("utf-8")
161 # Docker combines stdout and stderr
162 # Separate them based on content
163 if exit_code != 0 and not timed_out:
164 # If there was an error, look for Python error output
165 if "Traceback" in logs or "Error" in logs or "SyntaxError" in logs:
166 # Put error output in stderr
167 stderr = logs
168 stdout = ""
169 else:
170 # No traceback, might be a simple error
171 stderr = logs
172 stdout = ""
173 else:
174 # Success - everything is stdout
175 stdout = logs
176 stderr = ""
177 except Exception as e:
178 stdout = ""
179 stderr = f"Error retrieving logs: {e}"
181 # Get memory usage (if available)
182 memory_used_mb = None
183 try:
184 stats = container.stats(stream=False)
185 if "memory_stats" in stats and "max_usage" in stats["memory_stats"]:
186 memory_used_mb = stats["memory_stats"]["max_usage"] / (1024 * 1024)
187 except Exception:
188 pass # Memory stats not critical
190 # Cleanup container
191 self._cleanup_container(container)
193 # Create result
194 if timed_out:
195 return self._create_failure_result(
196 stdout=stdout,
197 stderr=stderr or f"Execution timed out after {self.limits.timeout_seconds}s",
198 exit_code=exit_code,
199 execution_time=execution_time,
200 timed_out=True,
201 error_message=f"Timeout after {self.limits.timeout_seconds}s",
202 )
203 elif exit_code == 0:
204 return self._create_success_result(
205 stdout=stdout,
206 stderr=stderr,
207 execution_time=execution_time,
208 memory_used_mb=memory_used_mb,
209 )
210 else:
211 return self._create_failure_result(
212 stdout=stdout,
213 stderr=stderr,
214 exit_code=exit_code,
215 execution_time=execution_time,
216 error_message=f"Process exited with code {exit_code}",
217 )
219 except Exception as e:
220 execution_time = self._measure_time(start_time)
222 # Cleanup on error
223 if container:
224 self._cleanup_container(container)
226 logger.error(f"Docker execution failed: {e}", exc_info=True)
227 msg = f"Docker execution failed: {e}"
228 raise SandboxError(msg)
230 def _create_container(self, code: str) -> Container:
231 """
232 Create Docker container with resource limits and security settings.
234 Args:
235 code: Python code to execute
237 Returns:
238 Docker container (not started)
240 Raises:
241 SandboxError: If container creation fails
242 """
243 try:
244 # Configure resource limits
245 mem_limit = f"{self.limits.memory_limit_mb}m"
246 nano_cpus = int(self.limits.cpu_quota * 1_000_000_000) # Convert to nano CPUs
248 # Configure network
249 network_mode = self._get_network_mode()
251 # Create container
252 # Note: We don't use auto_remove=True because we need to get logs after execution
253 container = self.client.containers.create(
254 image=self.image,
255 command=["python", "-c", code],
256 detach=True,
257 user="nobody", # Run as non-root user (security best practice)
258 mem_limit=mem_limit,
259 memswap_limit=mem_limit, # Disable swap
260 nano_cpus=nano_cpus,
261 network_mode=network_mode,
262 network_disabled=(network_mode == "none"),
263 read_only=True, # Read-only root FS for security (OpenAI Codex Finding #4)
264 security_opt=["no-new-privileges"], # Prevent privilege escalation
265 cap_drop=["ALL"], # Drop all capabilities
266 pids_limit=self.limits.max_processes,
267 # Tmpfs for writable directories (ephemeral, in-memory)
268 # Python needs /tmp and /var/tmp for tempfile module
269 tmpfs={ # nosec B108 - tmpfs is ephemeral in-memory, not persistent storage
270 "/tmp": f"size={self.limits.disk_quota_mb}m,uid=65534,gid=65534", # nosec B108 - nobody user
271 "/var/tmp": f"size={self.limits.disk_quota_mb}m,uid=65534,gid=65534", # nosec B108 - nobody user
272 },
273 )
275 return container
277 except Exception as e:
278 logger.error(f"Failed to create Docker container: {e}", exc_info=True)
279 msg = f"Failed to create Docker container: {e}"
280 raise SandboxError(msg)
282 def _get_network_mode(self) -> str:
283 """
284 Get Docker network mode from resource limits.
286 Returns:
287 Docker network mode string
288 """
289 if self.limits.network_mode == "none":
290 return "none"
291 elif self.limits.network_mode == "unrestricted":
292 return "bridge" # Default Docker network
293 elif self.limits.network_mode == "allowlist": 293 ↛ exitline 293 didn't return from function '_get_network_mode' because the condition on line 293 was always true
294 # SECURITY FIX (OpenAI Codex Finding #3): Network allowlist mode is NOT fully implemented!
295 # FAIL CLOSED: Always return "none" until proper allowlist filtering is implemented.
296 #
297 # For proper security implementation, this would require:
298 # 1. Docker network policies or firewall rules (iptables/nftables)
299 # 2. DNS filtering to resolve allowed domains to IPs
300 # 3. Egress filtering to block unlisted destinations
301 #
302 # Until implemented, we fail closed (deny all network access) rather than
303 # fall back to unrestricted bridge network, which would create a false sense of security.
304 logger.warning(
305 "Network allowlist mode requested but not implemented. "
306 "Failing closed with network_mode='none' (no network access). "
307 f"Requested allowlist domains: {self.limits.allowed_domains}. "
308 "To enable network access, use network_mode='unrestricted' explicitly."
309 )
310 return "none" # Fail closed - deny all network access for security
312 def _cleanup_container(self, container: Container) -> None:
313 """
314 Clean up Docker container.
316 Args:
317 container: Container to remove
318 """
319 try:
320 # Stop container if still running
321 try:
322 container.reload() # Refresh container state
323 if container.status == "running": 323 ↛ 324line 323 didn't jump to line 324 because the condition on line 323 was never true
324 container.stop(timeout=1)
325 except NotFound:
326 pass # Already removed
327 except Exception:
328 # Force kill if stop fails
329 with contextlib.suppress(Exception):
330 container.kill()
332 # Remove container
333 try:
334 container.remove(force=True)
335 logger.debug(f"Removed container {container.id[:12]}")
336 except NotFound:
337 pass # Already removed
338 except Exception as e:
339 logger.warning(f"Failed to remove container {container.id[:12]}: {e}")
341 except Exception as e:
342 logger.warning(f"Error during container cleanup: {e}")
344 def __del__(self) -> None:
345 """Cleanup on garbage collection"""
346 try:
347 if hasattr(self, "client"):
348 self.client.close()
349 except Exception:
350 pass