Coverage for src/mcp_server_langgraph/monitoring/pricing.py: 71%

1"""

2LLM Pricing Table and Cost Calculation

4Provides pricing data for all supported LLM providers and accurate cost calculation

5based on token usage.

7Pricing is updated monthly based on provider pricing pages:

8- Anthropic: https://www.anthropic.com/pricing

9- OpenAI: https://openai.com/pricing

10- Google: https://ai.google.dev/pricing

12Example:

13 >>> from mcp_server_langgraph.monitoring.pricing import calculate_cost

14 >>> cost = calculate_cost(

15 ... model="claude-sonnet-4-5-20250929",

16 ... provider="anthropic",

17 ... prompt_tokens=1000,

18 ... completion_tokens=500

19 ... )

20 >>> print(f"${cost}")

21 $0.0105

22"""

24from decimal import Decimal

26# ==============================================================================

27# Pricing Table - Updated: 2025-12-01

28# ==============================================================================

30PRICING_TABLE: dict[str, dict[str, dict[str, Decimal]]] = {

31 "anthropic": {

32 # Latest Claude 4.5 models (2025)

33 "claude-sonnet-4-5-20250929": {

34 "input": Decimal("0.003"), # $3.00 per 1M tokens

35 "output": Decimal("0.015"), # $15.00 per 1M tokens

36 },

37 "claude-haiku-4-5-20251001": {

38 "input": Decimal("0.001"), # $1.00 per 1M tokens

39 "output": Decimal("0.005"), # $5.00 per 1M tokens

40 },

41 "claude-opus-4-5-20251101": {

42 "input": Decimal("0.005"), # $5.00 per 1M tokens (66% reduction from Opus 4.1)

43 "output": Decimal("0.025"), # $25.00 per 1M tokens

44 },

45 "claude-opus-4-1-20250805": {

46 "input": Decimal("0.015"), # $15.00 per 1M tokens

47 "output": Decimal("0.075"), # $75.00 per 1M tokens

48 },

49 },

50 "vertex_ai": {

51 # Anthropic Claude models via Vertex AI (same pricing as direct API)

52 "claude-sonnet-4-5@20250929": {

53 "input": Decimal("0.003"), # $3.00 per 1M tokens

54 "output": Decimal("0.015"), # $15.00 per 1M tokens

55 },

56 "claude-haiku-4-5@20251001": {

57 "input": Decimal("0.001"), # $1.00 per 1M tokens

58 "output": Decimal("0.005"), # $5.00 per 1M tokens

59 },

60 "claude-opus-4-5@20251101": {

61 "input": Decimal("0.005"), # $5.00 per 1M tokens

62 "output": Decimal("0.025"), # $25.00 per 1M tokens

63 },

64 "claude-opus-4-1@20250805": {

65 "input": Decimal("0.015"), # $15.00 per 1M tokens

66 "output": Decimal("0.075"), # $75.00 per 1M tokens

67 },

68 # Google Gemini models via Vertex AI

69 "gemini-3-pro-preview": {

70 "input": Decimal("0.002"), # $2.00 per 1M tokens

71 "output": Decimal("0.012"), # $12.00 per 1M tokens

72 },

73 "gemini-2.5-flash": {

74 "input": Decimal("0.0003"), # $0.30 per 1M tokens

75 "output": Decimal("0.0025"), # $2.50 per 1M tokens

76 },

77 "gemini-2.5-flash-lite": {

78 "input": Decimal("0.0001"), # $0.10 per 1M tokens

79 "output": Decimal("0.0004"), # $0.40 per 1M tokens

80 },

81 "gemini-2.5-pro": {

82 "input": Decimal("0.00125"), # $1.25 per 1M tokens (≤200K)

83 "output": Decimal("0.010"), # $10.00 per 1M tokens (≤200K)

84 },

85 },

86 "openai": {

87 # Latest GPT-5.x models (Nov 2025)

88 "gpt-5.1": {

89 "input": Decimal("0.00125"), # $1.25 per 1M tokens

90 "output": Decimal("0.01"), # $10.00 per 1M tokens

91 },

92 "gpt-5.1-thinking": {

93 "input": Decimal("0.0025"), # $2.50 per 1M tokens

94 "output": Decimal("0.015"), # $15.00 per 1M tokens

95 },

96 "gpt-5-mini": {

97 "input": Decimal("0.00025"), # $0.25 per 1M tokens

98 "output": Decimal("0.002"), # $2.00 per 1M tokens

99 },

100 "gpt-5-nano": {

101 "input": Decimal("0.00005"), # $0.05 per 1M tokens

102 "output": Decimal("0.0004"), # $0.40 per 1M tokens

103 },

104 },

105 "google": {

106 # Latest Gemini 3.0 (Nov 2025)

107 "gemini-3-pro-preview": {

108 "input": Decimal("0.002"), # $2.00 per 1M tokens

109 "output": Decimal("0.012"), # $12.00 per 1M tokens

110 },

111 # Gemini 2.5 (current stable)

112 "gemini-2.5-pro": {

113 "input": Decimal("0.00125"), # $1.25 per 1M tokens (≤200K)

114 "output": Decimal("0.010"), # $10.00 per 1M tokens (≤200K)

115 },

116 "gemini-2.5-flash": {

117 "input": Decimal("0.0003"), # $0.30 per 1M tokens

118 "output": Decimal("0.0025"), # $2.50 per 1M tokens

119 },

120 "gemini-2.5-flash-lite": {

121 "input": Decimal("0.0001"), # $0.10 per 1M tokens

122 "output": Decimal("0.0004"), # $0.40 per 1M tokens

123 },

124 },

125}

126

127

128def calculate_cost(

129 model: str,

130 provider: str,

131 prompt_tokens: int,

132 completion_tokens: int,

133) -> Decimal:

134 """

135 Calculate cost for an LLM API call based on token usage.

136

137 Args:

138 model: Model name (e.g., "claude-sonnet-4-5-20250929")

139 provider: Provider name ("anthropic", "openai", "google")

140 prompt_tokens: Number of input/prompt tokens

141 completion_tokens: Number of output/completion tokens

142

143 Returns:

144 Total cost in USD as Decimal

145

146 Raises:

147 KeyError: If provider or model not found in pricing table

148

149 Example:

150 >>> cost = calculate_cost(

151 ... model="claude-sonnet-4-5-20250929",

152 ... provider="anthropic",

153 ... prompt_tokens=1000,

154 ... completion_tokens=500

155 ... )

156 >>> cost

157 Decimal('0.0105')

158 """

159 # Get pricing for model

160 pricing = PRICING_TABLE[provider][model]

161

162 # Calculate input cost (per 1K tokens)

163 input_cost = (Decimal(prompt_tokens) / 1000) * pricing["input"]

164

165 # Calculate output cost (per 1K tokens)

166 output_cost = (Decimal(completion_tokens) / 1000) * pricing["output"]

167

168 # Return total cost

169 return input_cost + output_cost

170

171

172def get_all_models() -> dict[str, list[str]]:

173 """

174 Get all available models grouped by provider.

175

176 Returns:

177 Dict mapping provider names to lists of model names

178

179 Example:

180 >>> models = get_all_models()

181 >>> models["anthropic"]

182 ['claude-sonnet-4-5-20250929', 'claude-haiku-4-5-20251001', ...]

183 """

184 return {provider: list(models.keys()) for provider, models in PRICING_TABLE.items()}

185

186

187def get_model_pricing(provider: str, model: str) -> dict[str, Decimal]:

188 """

189 Get pricing information for a specific model.

190

191 Args:

192 provider: Provider name

193 model: Model name

194

195 Returns:

196 Dict with "input" and "output" pricing per 1K tokens

197

198 Raises:

199 KeyError: If provider or model not found

200

201 Example:

202 >>> pricing = get_model_pricing("anthropic", "claude-sonnet-4-5-20250929")

203 >>> pricing["input"]

204 Decimal('0.003')

205 """

206 return PRICING_TABLE[provider][model].copy()

207

208

209def estimate_cost_from_text(

210 model: str,

211 provider: str,

212 input_text: str,

213 estimated_output_tokens: int = 500,

214 chars_per_token: int = 4,

215) -> Decimal:

216 """

217 Estimate cost from input text before making API call.

218

219 Uses rough approximation of ~4 characters per token.

220

221 Args:

222 model: Model name

223 provider: Provider name

224 input_text: Input text to send to model

225 estimated_output_tokens: Expected output length (default: 500)

226 chars_per_token: Character to token ratio (default: 4)

227

228 Returns:

229 Estimated cost in USD

230

231 Example:

232 >>> text = "Explain quantum computing in simple terms."

233 >>> cost = estimate_cost_from_text(

234 ... model="claude-sonnet-4-5-20250929",

235 ... provider="anthropic",

236 ... input_text=text,

237 ... estimated_output_tokens=300

238 ... )

239 """

240 # Estimate input tokens

241 estimated_input_tokens = len(input_text) // chars_per_token

242

243 # Calculate cost

244 return calculate_cost(

245 model=model,

246 provider=provider,

247 prompt_tokens=estimated_input_tokens,

248 completion_tokens=estimated_output_tokens,

249 )

Coverage for src / mcp_server_langgraph / monitoring / pricing.py: 71%

14 statements