{
  "benchmark": "State of LLM Pricing for Finance — Q3 2026",
  "as_of_date": "2026-06-21",
  "publisher": "AI Fin Hub Research",
  "license": "CC BY 4.0",
  "note": "All figures pulled from vendor pricing pages on 2026-06-21. Vendors may change rates at any time. Refresh before material spend decisions. Null fields indicate the vendor does not publish that figure on its pricing page. Prices are USD per 1M tokens (text input/output) unless stated.",
  "sources": {
    "anthropic": "https://platform.claude.com/docs/en/about-claude/pricing",
    "anthropic_models": "https://platform.claude.com/docs/en/about-claude/models/overview",
    "openai": "https://developers.openai.com/api/docs/pricing",
    "google": "https://ai.google.dev/gemini-api/docs/pricing",
    "xai": "https://docs.x.ai/developers/models",
    "deepseek": "https://api-docs.deepseek.com/quick_start/pricing",
    "mistral": "https://mistral.ai/pricing"
  },
  "models": [
    {
      "provider": "Anthropic",
      "model_name": "Claude Opus",
      "model_version": "4.8",
      "input_usd_per_1m_tokens": 5.00,
      "output_usd_per_1m_tokens": 25.00,
      "cache_read_usd_per_1m_tokens": 0.50,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": 6.25,
      "cache_write_5m_multiplier": 1.25,
      "cache_write_1h_usd_per_1m_tokens": 10.00,
      "cache_write_1h_multiplier": 2.00,
      "batch_discount_percent": 50,
      "context_window_tokens": 1000000,
      "max_output_tokens": 128000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Current Anthropic flagship as of 2026-06-21. Full 1M context at standard rates (no long-context premium). Premium fast-mode research preview at $10/$50. Batch API supports up to 300K output via beta header.",
      "source_url": "https://platform.claude.com/docs/en/about-claude/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "Anthropic",
      "model_name": "Claude Fable",
      "model_version": "5",
      "input_usd_per_1m_tokens": 10.00,
      "output_usd_per_1m_tokens": 50.00,
      "cache_read_usd_per_1m_tokens": 1.00,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": 12.50,
      "cache_write_5m_multiplier": 1.25,
      "cache_write_1h_usd_per_1m_tokens": 20.00,
      "cache_write_1h_multiplier": 2.00,
      "batch_discount_percent": 50,
      "context_window_tokens": 1000000,
      "max_output_tokens": 128000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Anthropic top-of-stack GA model (GA June 9 2026). Roughly 2x Opus 4.8 per token. 1M context / 128K max output at standard rates.",
      "source_url": "https://platform.claude.com/docs/en/about-claude/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "Anthropic",
      "model_name": "Claude Sonnet",
      "model_version": "4.6",
      "input_usd_per_1m_tokens": 3.00,
      "output_usd_per_1m_tokens": 15.00,
      "cache_read_usd_per_1m_tokens": 0.30,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": 3.75,
      "cache_write_5m_multiplier": 1.25,
      "cache_write_1h_usd_per_1m_tokens": 6.00,
      "cache_write_1h_multiplier": 2.00,
      "batch_discount_percent": 50,
      "context_window_tokens": 1000000,
      "max_output_tokens": 64000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Full 1M context at standard pricing; no long-context premium.",
      "source_url": "https://platform.claude.com/docs/en/about-claude/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "Anthropic",
      "model_name": "Claude Haiku",
      "model_version": "4.5",
      "input_usd_per_1m_tokens": 1.00,
      "output_usd_per_1m_tokens": 5.00,
      "cache_read_usd_per_1m_tokens": 0.10,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": 1.25,
      "cache_write_5m_multiplier": 1.25,
      "cache_write_1h_usd_per_1m_tokens": 2.00,
      "cache_write_1h_multiplier": 2.00,
      "batch_discount_percent": 50,
      "context_window_tokens": 200000,
      "max_output_tokens": 64000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Cheapest Claude tier; 200K context window.",
      "source_url": "https://platform.claude.com/docs/en/about-claude/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "OpenAI",
      "model_name": "GPT-5.5",
      "model_version": "current",
      "input_usd_per_1m_tokens": 5.00,
      "output_usd_per_1m_tokens": 30.00,
      "cache_read_usd_per_1m_tokens": 0.50,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 1050000,
      "max_output_tokens": 128000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Current OpenAI flagship. Prompts exceeding 272K input tokens priced at 2x input / 1.5x output ($10/$45) for the full session. Reasoning effort selectable.",
      "source_url": "https://developers.openai.com/api/docs/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "OpenAI",
      "model_name": "GPT-5.5 (prompts > 272K)",
      "model_version": "current",
      "input_usd_per_1m_tokens": 10.00,
      "output_usd_per_1m_tokens": 45.00,
      "cache_read_usd_per_1m_tokens": 1.00,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 1050000,
      "max_output_tokens": 128000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Long-context tier: applies to any session whose input exceeds 272,000 tokens, for the full session.",
      "source_url": "https://developers.openai.com/api/docs/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "OpenAI",
      "model_name": "GPT-5.5 Pro",
      "model_version": "current",
      "input_usd_per_1m_tokens": 30.00,
      "output_usd_per_1m_tokens": 180.00,
      "cache_read_usd_per_1m_tokens": null,
      "cache_read_multiplier": null,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 1050000,
      "max_output_tokens": 128000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Maximum-reasoning variant. No separate >272K tier published for -pro.",
      "source_url": "https://developers.openai.com/api/docs/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "OpenAI",
      "model_name": "GPT-5.4",
      "model_version": "current",
      "input_usd_per_1m_tokens": 2.50,
      "output_usd_per_1m_tokens": 15.00,
      "cache_read_usd_per_1m_tokens": 0.25,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 1050000,
      "max_output_tokens": 128000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Prior-generation flagship still on the price page. Same >272K premium structure ($5/$22.50).",
      "source_url": "https://developers.openai.com/api/docs/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "OpenAI",
      "model_name": "GPT-5.4 mini",
      "model_version": "current",
      "input_usd_per_1m_tokens": 0.75,
      "output_usd_per_1m_tokens": 4.50,
      "cache_read_usd_per_1m_tokens": 0.075,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 400000,
      "max_output_tokens": 128000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Mini/nano tier lives under GPT-5.4 — no GPT-5.5 mini/nano exists as of 2026-06-21.",
      "source_url": "https://developers.openai.com/api/docs/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "OpenAI",
      "model_name": "GPT-5.4 nano",
      "model_version": "current",
      "input_usd_per_1m_tokens": 0.20,
      "output_usd_per_1m_tokens": 1.25,
      "cache_read_usd_per_1m_tokens": 0.02,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 400000,
      "max_output_tokens": 128000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Cheapest OpenAI tier; 400K context window.",
      "source_url": "https://developers.openai.com/api/docs/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "Google",
      "model_name": "Gemini 3.5 Flash",
      "model_version": "current",
      "input_usd_per_1m_tokens": 1.50,
      "output_usd_per_1m_tokens": 9.00,
      "cache_read_usd_per_1m_tokens": 0.15,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 1048576,
      "max_output_tokens": 65536,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "GA agent-tier Flash (not a budget tier). Context cache storage billed separately at $1.00 per 1M tokens per hour.",
      "source_url": "https://ai.google.dev/gemini-api/docs/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "Google",
      "model_name": "Gemini 2.5 Pro (prompts <= 200K)",
      "model_version": "current",
      "input_usd_per_1m_tokens": 1.25,
      "output_usd_per_1m_tokens": 10.00,
      "cache_read_usd_per_1m_tokens": 0.125,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 1048576,
      "max_output_tokens": 65536,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Applies to prompts with input tokens <= 200,000. Context cache storage billed at $4.50 per 1M tokens per hour.",
      "source_url": "https://ai.google.dev/gemini-api/docs/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "Google",
      "model_name": "Gemini 2.5 Pro (prompts > 200K)",
      "model_version": "current",
      "input_usd_per_1m_tokens": 2.50,
      "output_usd_per_1m_tokens": 15.00,
      "cache_read_usd_per_1m_tokens": 0.25,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 1048576,
      "max_output_tokens": 65536,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Applies to prompts with more than 200,000 input tokens.",
      "source_url": "https://ai.google.dev/gemini-api/docs/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "Google",
      "model_name": "Gemini 2.5 Flash",
      "model_version": "current",
      "input_usd_per_1m_tokens": 0.30,
      "output_usd_per_1m_tokens": 2.50,
      "cache_read_usd_per_1m_tokens": 0.03,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 1048576,
      "max_output_tokens": 65536,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Text/image/video input at $0.30; audio input priced higher ($1.00). Context cache storage $1.00 per 1M tokens per hour.",
      "source_url": "https://ai.google.dev/gemini-api/docs/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "Google",
      "model_name": "Gemini 2.5 Flash-Lite",
      "model_version": "current",
      "input_usd_per_1m_tokens": 0.10,
      "output_usd_per_1m_tokens": 0.40,
      "cache_read_usd_per_1m_tokens": 0.01,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 1048576,
      "max_output_tokens": 65536,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Cheapest tier across all providers on per-token input. Text/image/video input at $0.10; audio input $0.30.",
      "source_url": "https://ai.google.dev/gemini-api/docs/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "xAI",
      "model_name": "Grok 4.3",
      "model_version": "current",
      "input_usd_per_1m_tokens": 1.25,
      "output_usd_per_1m_tokens": 2.50,
      "cache_read_usd_per_1m_tokens": null,
      "cache_read_multiplier": null,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": null,
      "context_window_tokens": 1000000,
      "max_output_tokens": null,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Current xAI flagship (launched 2026-04-30). Aliases grok-4.3-latest / grok-latest. Flat 1M-context pricing — the older context-tier premium is gone. Notably low output rate. Max output, batch discount, and cached-input rate not published. Live-search and tool calls billed separately per 1,000 calls.",
      "source_url": "https://docs.x.ai/developers/models",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "DeepSeek",
      "model_name": "DeepSeek V4 Flash",
      "model_version": "current",
      "input_usd_per_1m_tokens": 0.14,
      "output_usd_per_1m_tokens": 0.28,
      "cache_read_usd_per_1m_tokens": 0.0028,
      "cache_read_multiplier": 0.02,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": null,
      "context_window_tokens": 1000000,
      "max_output_tokens": 384000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Input split into cache-miss ($0.14) vs cache-hit ($0.0028). Off-peak discount not documented for V4 (the legacy V3/R1 off-peak window is not republished). deepseek-chat / deepseek-reasoner names deprecate 2026-07-24 and alias V4 Flash modes.",
      "source_url": "https://api-docs.deepseek.com/quick_start/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "DeepSeek",
      "model_name": "DeepSeek V4 Pro",
      "model_version": "current",
      "input_usd_per_1m_tokens": 0.435,
      "output_usd_per_1m_tokens": 0.87,
      "cache_read_usd_per_1m_tokens": 0.003625,
      "cache_read_multiplier": 0.0083,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": null,
      "context_window_tokens": 1000000,
      "max_output_tokens": 384000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Standard list price (the launch discount was made permanent ~2026-05-22). Cache-miss input $0.435, cache-hit $0.003625.",
      "source_url": "https://api-docs.deepseek.com/quick_start/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "Mistral",
      "model_name": "Mistral Medium 3.5",
      "model_version": "current",
      "input_usd_per_1m_tokens": 1.50,
      "output_usd_per_1m_tokens": 7.50,
      "cache_read_usd_per_1m_tokens": null,
      "cache_read_multiplier": null,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 128000,
      "max_output_tokens": null,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Current Mistral premier flagship. Max output not published.",
      "source_url": "https://mistral.ai/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "Mistral",
      "model_name": "Mistral Large 3",
      "model_version": "current",
      "input_usd_per_1m_tokens": 0.50,
      "output_usd_per_1m_tokens": 1.50,
      "cache_read_usd_per_1m_tokens": null,
      "cache_read_multiplier": null,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 128000,
      "max_output_tokens": null,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Open-weight large tier; priced below Medium 3.5.",
      "source_url": "https://mistral.ai/pricing",
      "as_of_date": "2026-06-21"
    },
    {
      "provider": "Mistral",
      "model_name": "Mistral Small 4",
      "model_version": "current",
      "input_usd_per_1m_tokens": 0.15,
      "output_usd_per_1m_tokens": 0.60,
      "cache_read_usd_per_1m_tokens": null,
      "cache_read_multiplier": null,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 128000,
      "max_output_tokens": null,
      "supports_thinking": false,
      "thinking_token_rate": null,
      "notes": "Cheapest Mistral tier. Verified $0.15/$0.60 (a $0.10/$0.30 aggregator reading is a JS-render parse artifact).",
      "source_url": "https://mistral.ai/pricing",
      "as_of_date": "2026-06-21"
    }
  ]
}
