Coverage for src / mcp_server_langgraph / monitoring / pricing.py: 71%
14 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 00:43 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 00:43 +0000
1"""
2LLM Pricing Table and Cost Calculation
4Provides pricing data for all supported LLM providers and accurate cost calculation
5based on token usage.
7Pricing is updated monthly based on provider pricing pages:
8- Anthropic: https://www.anthropic.com/pricing
9- OpenAI: https://openai.com/pricing
10- Google: https://ai.google.dev/pricing
12Example:
13 >>> from mcp_server_langgraph.monitoring.pricing import calculate_cost
14 >>> cost = calculate_cost(
15 ... model="claude-sonnet-4-5-20250929",
16 ... provider="anthropic",
17 ... prompt_tokens=1000,
18 ... completion_tokens=500
19 ... )
20 >>> print(f"${cost}")
21 $0.0105
22"""
24from decimal import Decimal
26# ==============================================================================
27# Pricing Table - Updated: 2025-12-01
28# ==============================================================================
30PRICING_TABLE: dict[str, dict[str, dict[str, Decimal]]] = {
31 "anthropic": {
32 # Latest Claude 4.5 models (2025)
33 "claude-sonnet-4-5-20250929": {
34 "input": Decimal("0.003"), # $3.00 per 1M tokens
35 "output": Decimal("0.015"), # $15.00 per 1M tokens
36 },
37 "claude-haiku-4-5-20251001": {
38 "input": Decimal("0.001"), # $1.00 per 1M tokens
39 "output": Decimal("0.005"), # $5.00 per 1M tokens
40 },
41 "claude-opus-4-5-20251101": {
42 "input": Decimal("0.005"), # $5.00 per 1M tokens (66% reduction from Opus 4.1)
43 "output": Decimal("0.025"), # $25.00 per 1M tokens
44 },
45 "claude-opus-4-1-20250805": {
46 "input": Decimal("0.015"), # $15.00 per 1M tokens
47 "output": Decimal("0.075"), # $75.00 per 1M tokens
48 },
49 },
50 "vertex_ai": {
51 # Anthropic Claude models via Vertex AI (same pricing as direct API)
52 "claude-sonnet-4-5@20250929": {
53 "input": Decimal("0.003"), # $3.00 per 1M tokens
54 "output": Decimal("0.015"), # $15.00 per 1M tokens
55 },
56 "claude-haiku-4-5@20251001": {
57 "input": Decimal("0.001"), # $1.00 per 1M tokens
58 "output": Decimal("0.005"), # $5.00 per 1M tokens
59 },
60 "claude-opus-4-5@20251101": {
61 "input": Decimal("0.005"), # $5.00 per 1M tokens
62 "output": Decimal("0.025"), # $25.00 per 1M tokens
63 },
64 "claude-opus-4-1@20250805": {
65 "input": Decimal("0.015"), # $15.00 per 1M tokens
66 "output": Decimal("0.075"), # $75.00 per 1M tokens
67 },
68 # Google Gemini models via Vertex AI
69 "gemini-3-pro-preview": {
70 "input": Decimal("0.002"), # $2.00 per 1M tokens
71 "output": Decimal("0.012"), # $12.00 per 1M tokens
72 },
73 "gemini-2.5-flash": {
74 "input": Decimal("0.0003"), # $0.30 per 1M tokens
75 "output": Decimal("0.0025"), # $2.50 per 1M tokens
76 },
77 "gemini-2.5-flash-lite": {
78 "input": Decimal("0.0001"), # $0.10 per 1M tokens
79 "output": Decimal("0.0004"), # $0.40 per 1M tokens
80 },
81 "gemini-2.5-pro": {
82 "input": Decimal("0.00125"), # $1.25 per 1M tokens (≤200K)
83 "output": Decimal("0.010"), # $10.00 per 1M tokens (≤200K)
84 },
85 },
86 "openai": {
87 # Latest GPT-5.x models (Nov 2025)
88 "gpt-5.1": {
89 "input": Decimal("0.00125"), # $1.25 per 1M tokens
90 "output": Decimal("0.01"), # $10.00 per 1M tokens
91 },
92 "gpt-5.1-thinking": {
93 "input": Decimal("0.0025"), # $2.50 per 1M tokens
94 "output": Decimal("0.015"), # $15.00 per 1M tokens
95 },
96 "gpt-5-mini": {
97 "input": Decimal("0.00025"), # $0.25 per 1M tokens
98 "output": Decimal("0.002"), # $2.00 per 1M tokens
99 },
100 "gpt-5-nano": {
101 "input": Decimal("0.00005"), # $0.05 per 1M tokens
102 "output": Decimal("0.0004"), # $0.40 per 1M tokens
103 },
104 },
105 "google": {
106 # Latest Gemini 3.0 (Nov 2025)
107 "gemini-3-pro-preview": {
108 "input": Decimal("0.002"), # $2.00 per 1M tokens
109 "output": Decimal("0.012"), # $12.00 per 1M tokens
110 },
111 # Gemini 2.5 (current stable)
112 "gemini-2.5-pro": {
113 "input": Decimal("0.00125"), # $1.25 per 1M tokens (≤200K)
114 "output": Decimal("0.010"), # $10.00 per 1M tokens (≤200K)
115 },
116 "gemini-2.5-flash": {
117 "input": Decimal("0.0003"), # $0.30 per 1M tokens
118 "output": Decimal("0.0025"), # $2.50 per 1M tokens
119 },
120 "gemini-2.5-flash-lite": {
121 "input": Decimal("0.0001"), # $0.10 per 1M tokens
122 "output": Decimal("0.0004"), # $0.40 per 1M tokens
123 },
124 },
125}
128def calculate_cost(
129 model: str,
130 provider: str,
131 prompt_tokens: int,
132 completion_tokens: int,
133) -> Decimal:
134 """
135 Calculate cost for an LLM API call based on token usage.
137 Args:
138 model: Model name (e.g., "claude-sonnet-4-5-20250929")
139 provider: Provider name ("anthropic", "openai", "google")
140 prompt_tokens: Number of input/prompt tokens
141 completion_tokens: Number of output/completion tokens
143 Returns:
144 Total cost in USD as Decimal
146 Raises:
147 KeyError: If provider or model not found in pricing table
149 Example:
150 >>> cost = calculate_cost(
151 ... model="claude-sonnet-4-5-20250929",
152 ... provider="anthropic",
153 ... prompt_tokens=1000,
154 ... completion_tokens=500
155 ... )
156 >>> cost
157 Decimal('0.0105')
158 """
159 # Get pricing for model
160 pricing = PRICING_TABLE[provider][model]
162 # Calculate input cost (per 1K tokens)
163 input_cost = (Decimal(prompt_tokens) / 1000) * pricing["input"]
165 # Calculate output cost (per 1K tokens)
166 output_cost = (Decimal(completion_tokens) / 1000) * pricing["output"]
168 # Return total cost
169 return input_cost + output_cost
172def get_all_models() -> dict[str, list[str]]:
173 """
174 Get all available models grouped by provider.
176 Returns:
177 Dict mapping provider names to lists of model names
179 Example:
180 >>> models = get_all_models()
181 >>> models["anthropic"]
182 ['claude-sonnet-4-5-20250929', 'claude-haiku-4-5-20251001', ...]
183 """
184 return {provider: list(models.keys()) for provider, models in PRICING_TABLE.items()}
187def get_model_pricing(provider: str, model: str) -> dict[str, Decimal]:
188 """
189 Get pricing information for a specific model.
191 Args:
192 provider: Provider name
193 model: Model name
195 Returns:
196 Dict with "input" and "output" pricing per 1K tokens
198 Raises:
199 KeyError: If provider or model not found
201 Example:
202 >>> pricing = get_model_pricing("anthropic", "claude-sonnet-4-5-20250929")
203 >>> pricing["input"]
204 Decimal('0.003')
205 """
206 return PRICING_TABLE[provider][model].copy()
209def estimate_cost_from_text(
210 model: str,
211 provider: str,
212 input_text: str,
213 estimated_output_tokens: int = 500,
214 chars_per_token: int = 4,
215) -> Decimal:
216 """
217 Estimate cost from input text before making API call.
219 Uses rough approximation of ~4 characters per token.
221 Args:
222 model: Model name
223 provider: Provider name
224 input_text: Input text to send to model
225 estimated_output_tokens: Expected output length (default: 500)
226 chars_per_token: Character to token ratio (default: 4)
228 Returns:
229 Estimated cost in USD
231 Example:
232 >>> text = "Explain quantum computing in simple terms."
233 >>> cost = estimate_cost_from_text(
234 ... model="claude-sonnet-4-5-20250929",
235 ... provider="anthropic",
236 ... input_text=text,
237 ... estimated_output_tokens=300
238 ... )
239 """
240 # Estimate input tokens
241 estimated_input_tokens = len(input_text) // chars_per_token
243 # Calculate cost
244 return calculate_cost(
245 model=model,
246 provider=provider,
247 prompt_tokens=estimated_input_tokens,
248 completion_tokens=estimated_output_tokens,
249 )