Coverage for src / mcp_server_langgraph / monitoring / pricing.py: 71%

14 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 00:43 +0000

1""" 

2LLM Pricing Table and Cost Calculation 

3 

4Provides pricing data for all supported LLM providers and accurate cost calculation 

5based on token usage. 

6 

7Pricing is updated monthly based on provider pricing pages: 

8- Anthropic: https://www.anthropic.com/pricing 

9- OpenAI: https://openai.com/pricing 

10- Google: https://ai.google.dev/pricing 

11 

12Example: 

13 >>> from mcp_server_langgraph.monitoring.pricing import calculate_cost 

14 >>> cost = calculate_cost( 

15 ... model="claude-sonnet-4-5-20250929", 

16 ... provider="anthropic", 

17 ... prompt_tokens=1000, 

18 ... completion_tokens=500 

19 ... ) 

20 >>> print(f"${cost}") 

21 $0.0105 

22""" 

23 

24from decimal import Decimal 

25 

26# ============================================================================== 

27# Pricing Table - Updated: 2025-12-01 

28# ============================================================================== 

29 

30PRICING_TABLE: dict[str, dict[str, dict[str, Decimal]]] = { 

31 "anthropic": { 

32 # Latest Claude 4.5 models (2025) 

33 "claude-sonnet-4-5-20250929": { 

34 "input": Decimal("0.003"), # $3.00 per 1M tokens 

35 "output": Decimal("0.015"), # $15.00 per 1M tokens 

36 }, 

37 "claude-haiku-4-5-20251001": { 

38 "input": Decimal("0.001"), # $1.00 per 1M tokens 

39 "output": Decimal("0.005"), # $5.00 per 1M tokens 

40 }, 

41 "claude-opus-4-5-20251101": { 

42 "input": Decimal("0.005"), # $5.00 per 1M tokens (66% reduction from Opus 4.1) 

43 "output": Decimal("0.025"), # $25.00 per 1M tokens 

44 }, 

45 "claude-opus-4-1-20250805": { 

46 "input": Decimal("0.015"), # $15.00 per 1M tokens 

47 "output": Decimal("0.075"), # $75.00 per 1M tokens 

48 }, 

49 }, 

50 "vertex_ai": { 

51 # Anthropic Claude models via Vertex AI (same pricing as direct API) 

52 "claude-sonnet-4-5@20250929": { 

53 "input": Decimal("0.003"), # $3.00 per 1M tokens 

54 "output": Decimal("0.015"), # $15.00 per 1M tokens 

55 }, 

56 "claude-haiku-4-5@20251001": { 

57 "input": Decimal("0.001"), # $1.00 per 1M tokens 

58 "output": Decimal("0.005"), # $5.00 per 1M tokens 

59 }, 

60 "claude-opus-4-5@20251101": { 

61 "input": Decimal("0.005"), # $5.00 per 1M tokens 

62 "output": Decimal("0.025"), # $25.00 per 1M tokens 

63 }, 

64 "claude-opus-4-1@20250805": { 

65 "input": Decimal("0.015"), # $15.00 per 1M tokens 

66 "output": Decimal("0.075"), # $75.00 per 1M tokens 

67 }, 

68 # Google Gemini models via Vertex AI 

69 "gemini-3-pro-preview": { 

70 "input": Decimal("0.002"), # $2.00 per 1M tokens 

71 "output": Decimal("0.012"), # $12.00 per 1M tokens 

72 }, 

73 "gemini-2.5-flash": { 

74 "input": Decimal("0.0003"), # $0.30 per 1M tokens 

75 "output": Decimal("0.0025"), # $2.50 per 1M tokens 

76 }, 

77 "gemini-2.5-flash-lite": { 

78 "input": Decimal("0.0001"), # $0.10 per 1M tokens 

79 "output": Decimal("0.0004"), # $0.40 per 1M tokens 

80 }, 

81 "gemini-2.5-pro": { 

82 "input": Decimal("0.00125"), # $1.25 per 1M tokens (≤200K) 

83 "output": Decimal("0.010"), # $10.00 per 1M tokens (≤200K) 

84 }, 

85 }, 

86 "openai": { 

87 # Latest GPT-5.x models (Nov 2025) 

88 "gpt-5.1": { 

89 "input": Decimal("0.00125"), # $1.25 per 1M tokens 

90 "output": Decimal("0.01"), # $10.00 per 1M tokens 

91 }, 

92 "gpt-5.1-thinking": { 

93 "input": Decimal("0.0025"), # $2.50 per 1M tokens 

94 "output": Decimal("0.015"), # $15.00 per 1M tokens 

95 }, 

96 "gpt-5-mini": { 

97 "input": Decimal("0.00025"), # $0.25 per 1M tokens 

98 "output": Decimal("0.002"), # $2.00 per 1M tokens 

99 }, 

100 "gpt-5-nano": { 

101 "input": Decimal("0.00005"), # $0.05 per 1M tokens 

102 "output": Decimal("0.0004"), # $0.40 per 1M tokens 

103 }, 

104 }, 

105 "google": { 

106 # Latest Gemini 3.0 (Nov 2025) 

107 "gemini-3-pro-preview": { 

108 "input": Decimal("0.002"), # $2.00 per 1M tokens 

109 "output": Decimal("0.012"), # $12.00 per 1M tokens 

110 }, 

111 # Gemini 2.5 (current stable) 

112 "gemini-2.5-pro": { 

113 "input": Decimal("0.00125"), # $1.25 per 1M tokens (≤200K) 

114 "output": Decimal("0.010"), # $10.00 per 1M tokens (≤200K) 

115 }, 

116 "gemini-2.5-flash": { 

117 "input": Decimal("0.0003"), # $0.30 per 1M tokens 

118 "output": Decimal("0.0025"), # $2.50 per 1M tokens 

119 }, 

120 "gemini-2.5-flash-lite": { 

121 "input": Decimal("0.0001"), # $0.10 per 1M tokens 

122 "output": Decimal("0.0004"), # $0.40 per 1M tokens 

123 }, 

124 }, 

125} 

126 

127 

128def calculate_cost( 

129 model: str, 

130 provider: str, 

131 prompt_tokens: int, 

132 completion_tokens: int, 

133) -> Decimal: 

134 """ 

135 Calculate cost for an LLM API call based on token usage. 

136 

137 Args: 

138 model: Model name (e.g., "claude-sonnet-4-5-20250929") 

139 provider: Provider name ("anthropic", "openai", "google") 

140 prompt_tokens: Number of input/prompt tokens 

141 completion_tokens: Number of output/completion tokens 

142 

143 Returns: 

144 Total cost in USD as Decimal 

145 

146 Raises: 

147 KeyError: If provider or model not found in pricing table 

148 

149 Example: 

150 >>> cost = calculate_cost( 

151 ... model="claude-sonnet-4-5-20250929", 

152 ... provider="anthropic", 

153 ... prompt_tokens=1000, 

154 ... completion_tokens=500 

155 ... ) 

156 >>> cost 

157 Decimal('0.0105') 

158 """ 

159 # Get pricing for model 

160 pricing = PRICING_TABLE[provider][model] 

161 

162 # Calculate input cost (per 1K tokens) 

163 input_cost = (Decimal(prompt_tokens) / 1000) * pricing["input"] 

164 

165 # Calculate output cost (per 1K tokens) 

166 output_cost = (Decimal(completion_tokens) / 1000) * pricing["output"] 

167 

168 # Return total cost 

169 return input_cost + output_cost 

170 

171 

172def get_all_models() -> dict[str, list[str]]: 

173 """ 

174 Get all available models grouped by provider. 

175 

176 Returns: 

177 Dict mapping provider names to lists of model names 

178 

179 Example: 

180 >>> models = get_all_models() 

181 >>> models["anthropic"] 

182 ['claude-sonnet-4-5-20250929', 'claude-haiku-4-5-20251001', ...] 

183 """ 

184 return {provider: list(models.keys()) for provider, models in PRICING_TABLE.items()} 

185 

186 

187def get_model_pricing(provider: str, model: str) -> dict[str, Decimal]: 

188 """ 

189 Get pricing information for a specific model. 

190 

191 Args: 

192 provider: Provider name 

193 model: Model name 

194 

195 Returns: 

196 Dict with "input" and "output" pricing per 1K tokens 

197 

198 Raises: 

199 KeyError: If provider or model not found 

200 

201 Example: 

202 >>> pricing = get_model_pricing("anthropic", "claude-sonnet-4-5-20250929") 

203 >>> pricing["input"] 

204 Decimal('0.003') 

205 """ 

206 return PRICING_TABLE[provider][model].copy() 

207 

208 

209def estimate_cost_from_text( 

210 model: str, 

211 provider: str, 

212 input_text: str, 

213 estimated_output_tokens: int = 500, 

214 chars_per_token: int = 4, 

215) -> Decimal: 

216 """ 

217 Estimate cost from input text before making API call. 

218 

219 Uses rough approximation of ~4 characters per token. 

220 

221 Args: 

222 model: Model name 

223 provider: Provider name 

224 input_text: Input text to send to model 

225 estimated_output_tokens: Expected output length (default: 500) 

226 chars_per_token: Character to token ratio (default: 4) 

227 

228 Returns: 

229 Estimated cost in USD 

230 

231 Example: 

232 >>> text = "Explain quantum computing in simple terms." 

233 >>> cost = estimate_cost_from_text( 

234 ... model="claude-sonnet-4-5-20250929", 

235 ... provider="anthropic", 

236 ... input_text=text, 

237 ... estimated_output_tokens=300 

238 ... ) 

239 """ 

240 # Estimate input tokens 

241 estimated_input_tokens = len(input_text) // chars_per_token 

242 

243 # Calculate cost 

244 return calculate_cost( 

245 model=model, 

246 provider=provider, 

247 prompt_tokens=estimated_input_tokens, 

248 completion_tokens=estimated_output_tokens, 

249 )