{
  "benchmark": "State of LLM Pricing for Finance — Q2 2026",
  "as_of_date": "2026-04-23",
  "publisher": "AI Fin Hub Research",
  "license": "CC BY 4.0",
  "note": "All figures pulled from vendor pricing pages on 2026-04-23. Vendors may change rates at any time. Refresh before material spend decisions. Null fields indicate the vendor does not publish that figure on its pricing page.",
  "sources": {
    "anthropic": "https://docs.anthropic.com/en/docs/about-claude/pricing",
    "anthropic_models": "https://docs.anthropic.com/en/docs/about-claude/models/overview",
    "openai": "https://developers.openai.com/api/docs/pricing",
    "google": "https://ai.google.dev/pricing",
    "google_vertex_flash": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash",
    "google_vertex_pro": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro",
    "google_vertex_flash_lite": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash-lite"
  },
  "models": [
    {
      "provider": "Anthropic",
      "model_name": "Claude Haiku",
      "model_version": "4.5",
      "input_usd_per_1m_tokens": 1.00,
      "output_usd_per_1m_tokens": 5.00,
      "cache_read_usd_per_1m_tokens": 0.10,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": 1.25,
      "cache_write_5m_multiplier": 1.25,
      "cache_write_1h_usd_per_1m_tokens": 2.00,
      "cache_write_1h_multiplier": 2.00,
      "batch_discount_percent": 50,
      "context_window_tokens": 200000,
      "max_output_tokens": 64000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Extended thinking supported; adaptive thinking not supported.",
      "source_url": "https://docs.anthropic.com/en/docs/about-claude/pricing",
      "as_of_date": "2026-04-23"
    },
    {
      "provider": "Anthropic",
      "model_name": "Claude Sonnet",
      "model_version": "4.6",
      "input_usd_per_1m_tokens": 3.00,
      "output_usd_per_1m_tokens": 15.00,
      "cache_read_usd_per_1m_tokens": 0.30,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": 3.75,
      "cache_write_5m_multiplier": 1.25,
      "cache_write_1h_usd_per_1m_tokens": 6.00,
      "cache_write_1h_multiplier": 2.00,
      "batch_discount_percent": 50,
      "context_window_tokens": 1000000,
      "max_output_tokens": 64000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Extended and adaptive thinking supported; full 1M context at standard pricing.",
      "source_url": "https://docs.anthropic.com/en/docs/about-claude/pricing",
      "as_of_date": "2026-04-23"
    },
    {
      "provider": "Anthropic",
      "model_name": "Claude Opus",
      "model_version": "4.6",
      "input_usd_per_1m_tokens": 5.00,
      "output_usd_per_1m_tokens": 25.00,
      "cache_read_usd_per_1m_tokens": 0.50,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": 6.25,
      "cache_write_5m_multiplier": 1.25,
      "cache_write_1h_usd_per_1m_tokens": 10.00,
      "cache_write_1h_multiplier": 2.00,
      "batch_discount_percent": 50,
      "context_window_tokens": 1000000,
      "max_output_tokens": 128000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Extended thinking supported. Fast mode beta at 6x rates available on Opus 4.6 only.",
      "source_url": "https://docs.anthropic.com/en/docs/about-claude/pricing",
      "as_of_date": "2026-04-23"
    },
    {
      "provider": "Anthropic",
      "model_name": "Claude Opus",
      "model_version": "4.7",
      "input_usd_per_1m_tokens": 5.00,
      "output_usd_per_1m_tokens": 25.00,
      "cache_read_usd_per_1m_tokens": 0.50,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": 6.25,
      "cache_write_5m_multiplier": 1.25,
      "cache_write_1h_usd_per_1m_tokens": 10.00,
      "cache_write_1h_multiplier": 2.00,
      "batch_discount_percent": 50,
      "context_window_tokens": 1000000,
      "max_output_tokens": 128000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Adaptive thinking supported; extended thinking not supported. New tokenizer may consume up to 35 percent more tokens than prior models for the same text.",
      "source_url": "https://docs.anthropic.com/en/docs/about-claude/pricing",
      "as_of_date": "2026-04-23"
    },
    {
      "provider": "OpenAI",
      "model_name": "GPT-5.4",
      "model_version": "current",
      "input_usd_per_1m_tokens": 2.50,
      "output_usd_per_1m_tokens": 15.00,
      "cache_read_usd_per_1m_tokens": 0.25,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 1050000,
      "max_output_tokens": 128000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Prompts exceeding 272K input tokens priced at 2x input and 1.5x output for the full session. Reasoning effort selectable (none, low, medium, high, xhigh).",
      "source_url": "https://developers.openai.com/api/docs/pricing",
      "as_of_date": "2026-04-23"
    },
    {
      "provider": "OpenAI",
      "model_name": "GPT-5.4 mini",
      "model_version": "current",
      "input_usd_per_1m_tokens": 0.75,
      "output_usd_per_1m_tokens": 4.50,
      "cache_read_usd_per_1m_tokens": 0.075,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 400000,
      "max_output_tokens": 128000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Reasoning token support; 400K context window.",
      "source_url": "https://developers.openai.com/api/docs/pricing",
      "as_of_date": "2026-04-23"
    },
    {
      "provider": "OpenAI",
      "model_name": "GPT-5.4 nano",
      "model_version": "current",
      "input_usd_per_1m_tokens": 0.20,
      "output_usd_per_1m_tokens": 1.25,
      "cache_read_usd_per_1m_tokens": 0.02,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 400000,
      "max_output_tokens": 128000,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Reasoning token support; 400K context window.",
      "source_url": "https://developers.openai.com/api/docs/pricing",
      "as_of_date": "2026-04-23"
    },
    {
      "provider": "OpenAI",
      "model_name": "GPT-5",
      "model_version": "legacy",
      "input_usd_per_1m_tokens": null,
      "output_usd_per_1m_tokens": null,
      "cache_read_usd_per_1m_tokens": null,
      "cache_read_multiplier": null,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": null,
      "context_window_tokens": null,
      "max_output_tokens": null,
      "supports_thinking": null,
      "thinking_token_rate": null,
      "notes": "Not listed on the main OpenAI pricing page as of 2026-04-23. Superseded by GPT-5.4 family. Historical rates ($1.25 input, $10.00 output) are not republished.",
      "source_url": "https://developers.openai.com/api/docs/pricing",
      "as_of_date": "2026-04-23"
    },
    {
      "provider": "OpenAI",
      "model_name": "o3",
      "model_version": "legacy",
      "input_usd_per_1m_tokens": null,
      "output_usd_per_1m_tokens": null,
      "cache_read_usd_per_1m_tokens": null,
      "cache_read_multiplier": null,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": null,
      "context_window_tokens": null,
      "max_output_tokens": null,
      "supports_thinking": null,
      "thinking_token_rate": null,
      "notes": "Base o3 not listed on the main OpenAI pricing page as of 2026-04-23. Only the o3-deep-research variant appears in the specialized (batch-only) section at $5.00 input / $20.00 output per 1M tokens.",
      "source_url": "https://developers.openai.com/api/docs/pricing",
      "as_of_date": "2026-04-23"
    },
    {
      "provider": "OpenAI",
      "model_name": "o4-mini",
      "model_version": "legacy",
      "input_usd_per_1m_tokens": null,
      "output_usd_per_1m_tokens": null,
      "cache_read_usd_per_1m_tokens": null,
      "cache_read_multiplier": null,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": null,
      "context_window_tokens": null,
      "max_output_tokens": null,
      "supports_thinking": null,
      "thinking_token_rate": null,
      "notes": "Base o4-mini not listed on the main OpenAI pricing page as of 2026-04-23. The o4-mini-deep-research variant appears in the specialized (batch-only) section at $1.00 input / $4.00 output per 1M tokens.",
      "source_url": "https://developers.openai.com/api/docs/pricing",
      "as_of_date": "2026-04-23"
    },
    {
      "provider": "Google",
      "model_name": "Gemini 2.5 Flash",
      "model_version": "current",
      "input_usd_per_1m_tokens": 0.30,
      "output_usd_per_1m_tokens": 2.50,
      "cache_read_usd_per_1m_tokens": 0.03,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 1048576,
      "max_output_tokens": 65535,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Text / image / video input at $0.30; audio input at $1.00. Context cache storage billed separately at $1.00 per 1M tokens per hour.",
      "source_url": "https://ai.google.dev/pricing",
      "as_of_date": "2026-04-23"
    },
    {
      "provider": "Google",
      "model_name": "Gemini 2.5 Pro (prompts <= 200K)",
      "model_version": "current",
      "input_usd_per_1m_tokens": 1.25,
      "output_usd_per_1m_tokens": 10.00,
      "cache_read_usd_per_1m_tokens": 0.125,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 1048576,
      "max_output_tokens": 65535,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Applies to prompts with input tokens less than or equal to 200,000. Context cache storage billed at $4.50 per 1M tokens per hour.",
      "source_url": "https://ai.google.dev/pricing",
      "as_of_date": "2026-04-23"
    },
    {
      "provider": "Google",
      "model_name": "Gemini 2.5 Pro (prompts > 200K)",
      "model_version": "current",
      "input_usd_per_1m_tokens": 2.50,
      "output_usd_per_1m_tokens": 15.00,
      "cache_read_usd_per_1m_tokens": 0.25,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 1048576,
      "max_output_tokens": 65535,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Applies to prompts with more than 200,000 input tokens.",
      "source_url": "https://ai.google.dev/pricing",
      "as_of_date": "2026-04-23"
    },
    {
      "provider": "Google",
      "model_name": "Gemini 2.5 Flash-Lite",
      "model_version": "current",
      "input_usd_per_1m_tokens": 0.10,
      "output_usd_per_1m_tokens": 0.40,
      "cache_read_usd_per_1m_tokens": 0.01,
      "cache_read_multiplier": 0.10,
      "cache_write_5m_usd_per_1m_tokens": null,
      "cache_write_5m_multiplier": null,
      "cache_write_1h_usd_per_1m_tokens": null,
      "cache_write_1h_multiplier": null,
      "batch_discount_percent": 50,
      "context_window_tokens": 1048576,
      "max_output_tokens": 65535,
      "supports_thinking": true,
      "thinking_token_rate": "billed_at_output_rate",
      "notes": "Text / image / video input at $0.10; audio input at $0.30. Context cache storage billed at $1.00 per 1M tokens per hour.",
      "source_url": "https://ai.google.dev/pricing",
      "as_of_date": "2026-04-23"
    }
  ]
}
