{
  "asOf": "2026-06-28",
  "rows": [
    {
      "name": "OpenAI",
      "slug": "openai",
      "category": "first-party-lab",
      "region": "US",
      "summary": "First-party model lab; the default frontier API for GPT-5 family text, image, audio and embeddings.",
      "offering": "Frontier proprietary API (GPT-5.5, GPT-5.4/Mini/Nano, GPT-5.x Pro, GPT-5.3 Codex), plus Batch and Realtime APIs.",
      "pricingModel": "per-token",
      "representativePricing": "GPT-5.5 $5/$30, GPT-5.4 $2.50/$15, Nano $0.20/$1.25 per Mtok. Batch 50% off; cached input up to 90% off.",
      "standout": "The default frontier benchmark. Note: winding down its fine-tuning platform (closed to new users as of May 2026).",
      "flagshipModel": "GPT-5.5",
      "flagshipOutputPerMtok": 30,
      "openAICompatible": true,
      "freeTier": "$5 in free credits for new accounts (expire in 3 months).",
      "homeUrl": "https://openai.com",
      "pricingUrl": "https://developers.openai.com/api/docs/pricing",
      "docsUrl": "https://platform.openai.com",
      "sourceUrl": "https://developers.openai.com/api/docs/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "Anthropic",
      "slug": "anthropic",
      "category": "first-party-lab",
      "region": "US",
      "summary": "First-party model lab focused on safety, coding and agentic work; the Claude family.",
      "offering": "Frontier proprietary API: Claude Opus 4.8, Sonnet 5, Haiku 4.5, with Fable 5 and Mythos 5 above Opus. All at 1M-token context.",
      "pricingModel": "per-token",
      "representativePricing": "Opus 4.8 $5/$25, Sonnet 5 $3/$15 (intro $2/$10 through Aug 31 2026), Haiku 4.5 $1/$5 per Mtok. Batch 50% off; prompt caching up to 90% off.",
      "standout": "Output costs 5x input across the line; pricing has held steady across generations. Run-rate revenue passed $30B in early 2026.",
      "flagshipModel": "Claude Opus 4.8",
      "flagshipOutputPerMtok": 25,
      "openAICompatible": false,
      "freeTier": "",
      "homeUrl": "https://anthropic.com",
      "pricingUrl": "https://claude.com/pricing",
      "docsUrl": "https://platform.claude.com/docs",
      "sourceUrl": "https://claude.com/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": "US-only inference adds a 1.1x multiplier."
    },
    {
      "name": "Google",
      "slug": "google",
      "category": "first-party-lab",
      "region": "US",
      "summary": "Hyperscaler and first-party lab; Gemini via the Gemini API, AI Studio and Vertex AI.",
      "offering": "Gemini 3.x API (3.1 Pro, 3.5 Flash, 3.1 Flash-Lite). AI Studio is free for prototyping; Vertex AI adds enterprise SLAs and compliance.",
      "pricingModel": "per-token",
      "representativePricing": "Gemini 3.1 Pro $2/$12 (to 200K ctx), 3.5 Flash $1.50/$9, Flash-Lite $0.25/$1.50 per Mtok. 90% context-caching discount.",
      "standout": "Largest production context window (2M tokens) and the cheapest Tier-1 budget model (Flash-Lite). Pro models are paid-only as of April 1, 2026.",
      "flagshipModel": "Gemini 3.1 Pro",
      "flagshipOutputPerMtok": 12,
      "openAICompatible": true,
      "freeTier": "AI Studio free for prototyping; Flash retains a free tier.",
      "homeUrl": "https://ai.google.dev",
      "pricingUrl": "https://ai.google.dev/gemini-api/docs/pricing",
      "docsUrl": "https://cloud.google.com/vertex-ai",
      "sourceUrl": "https://ai.google.dev/gemini-api/docs/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "xAI (Grok)",
      "slug": "xai-grok",
      "category": "first-party-lab",
      "region": "US",
      "summary": "First-party model lab; Grok, the only frontier model with live grounding to X posts.",
      "offering": "Frontier proprietary API: Grok 4.3 (flagship, ~1M ctx), Grok 4.20 (2M ctx long-context), Grok 4.1 Fast (cheap workhorse).",
      "pricingModel": "per-token",
      "representativePricing": "Grok 4.3 $1.25/$2.50, Grok 4.1 Fast $0.20/$0.50 per Mtok. Batch 50% off; cached input ~90% off.",
      "standout": "Only frontier model with live grounding to X posts; aggressive pricing undercuts GPT-5.4. API is independent of X subscriptions.",
      "flagshipModel": "Grok 4.3",
      "flagshipOutputPerMtok": 2.5,
      "openAICompatible": true,
      "freeTier": "Free developer credits via data-sharing program (~$150-175/mo reported).",
      "homeUrl": "https://x.ai",
      "pricingUrl": "https://x.ai/api",
      "docsUrl": "https://docs.x.ai",
      "sourceUrl": "https://x.ai/api",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "Mistral",
      "slug": "mistral",
      "category": "first-party-lab",
      "region": "EU",
      "summary": "European (French) first-party lab; open-weight friendly with EU data residency.",
      "offering": "Mistral Large 3, Medium 3.5, Small 3, Codestral (code), Ministral 3B/8B (edge), Pixtral (vision), OCR. Many under Apache 2.0.",
      "pricingModel": "per-token",
      "representativePricing": "Large 2 tier $2/$6, Small 3 $0.10/$0.30, Ministral 3B ~$0.04/$0.04 per Mtok.",
      "standout": "Among the cheapest flagship-tier output pricing, plus EU data residency and genuine open weights for self-hosting.",
      "flagshipModel": "Mistral Large",
      "flagshipOutputPerMtok": 6,
      "openAICompatible": true,
      "freeTier": "Free experimentation tier via la Plateforme.",
      "homeUrl": "https://mistral.ai",
      "pricingUrl": "https://mistral.ai/pricing",
      "docsUrl": "https://console.mistral.ai",
      "sourceUrl": "https://mistral.ai/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "DeepSeek",
      "slug": "deepseek",
      "category": "first-party-lab",
      "region": "China",
      "summary": "Chinese first-party lab (open-weight); among the cheapest frontier-class APIs in the market.",
      "offering": "V4 Flash (cheapest frontier-class API) and V4 Pro. Both 1M ctx, 384K max output. Open weights with 10M+ downloads.",
      "pricingModel": "per-token",
      "representativePricing": "V4 Flash $0.14/$0.28, V4 Pro $1.74/$3.48 per Mtok. Cache hits at 1/10 standard input.",
      "standout": "Roughly 90-95% cheaper than comparable Western models, with open weights. Owned by hedge fund High-Flyer.",
      "flagshipModel": "DeepSeek V4 Pro",
      "flagshipOutputPerMtok": 3.48,
      "openAICompatible": true,
      "freeTier": "",
      "homeUrl": "https://deepseek.com",
      "pricingUrl": "https://api-docs.deepseek.com/quick_start/pricing",
      "docsUrl": "https://api-docs.deepseek.com",
      "sourceUrl": "https://api-docs.deepseek.com/quick_start/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": "Some hosts serve at aggressive quantization. Legacy R1/V3.2 aliases retired July 24, 2026."
    },
    {
      "name": "Cohere",
      "slug": "cohere",
      "category": "first-party-lab",
      "region": "US",
      "summary": "Enterprise-focused first-party lab specializing in RAG and retrieval.",
      "offering": "Command (generation: Command A/R+/R/R7B), Embed (vectors), Rerank (neural reranking). Strong on data sovereignty and on-prem.",
      "pricingModel": "per-token",
      "representativePricing": "Command R+ / Command A $2.50/$10, Command R $0.15/$0.60, Command R7B $0.0375/$0.15 per Mtok. Embed v3 $0.10/M.",
      "standout": "Best-in-class Embed plus Rerank stack for retrieval; strong on data sovereignty, VPC and on-prem deployment.",
      "flagshipModel": "Command A",
      "flagshipOutputPerMtok": 10,
      "openAICompatible": false,
      "freeTier": "Free trial key (1,000 calls/month, not for production).",
      "homeUrl": "https://cohere.com",
      "pricingUrl": "https://cohere.com/pricing",
      "docsUrl": "https://docs.cohere.com",
      "sourceUrl": "https://cohere.com/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "MiniMax",
      "slug": "minimax",
      "category": "first-party-lab",
      "region": "China",
      "summary": "Chinese first-party lab; multimodal across text, voice and Hailuo video.",
      "offering": "M-series: M2.7, M3, plus legacy abab6.5 and MiniMax-01 (1M ctx). A faster highspeed variant at 2x.",
      "pricingModel": "per-token",
      "representativePricing": "M2.7 $0.30/$1.20 per Mtok (official), cache reads $0.06/M.",
      "standout": "Frontier-class coding and agentic quality at ~5-10% of Claude Opus output pricing. M2.7 restricts commercial use (M2 was MIT).",
      "flagshipModel": "MiniMax M2.7",
      "flagshipOutputPerMtok": 1.2,
      "openAICompatible": true,
      "freeTier": "",
      "homeUrl": "https://minimax.io",
      "pricingUrl": "https://platform.minimax.io/docs/guides/pricing-paygo",
      "docsUrl": "https://platform.minimax.io/docs",
      "sourceUrl": "https://platform.minimax.io/docs/guides/pricing-paygo",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "StepFun",
      "slug": "stepfun",
      "category": "first-party-lab",
      "region": "China",
      "summary": "Chinese first-party lab; multimodal MoE models, mostly open-weight.",
      "offering": "Step 3.7 Flash (196B/~11B active, 256K ctx, native image and video input), Step 3.5 Flash, Step3.",
      "pricingModel": "per-token",
      "representativePricing": "Step 3.7 Flash $0.20/$1.15, Step 3.5 Flash $0.09/$0.30, Step3 $0.57/$1.42 per Mtok.",
      "standout": "Disproportionately strong agentic benchmark scores relative to its price tier.",
      "flagshipModel": "Step3",
      "flagshipOutputPerMtok": 1.42,
      "openAICompatible": true,
      "freeTier": "",
      "homeUrl": "https://stepfun.ai",
      "pricingUrl": "https://platform.stepfun.ai/docs/en/guides/pricing/details",
      "docsUrl": "https://platform.stepfun.ai/docs",
      "sourceUrl": "https://platform.stepfun.ai/docs/en/guides/pricing/details",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "Reka AI",
      "slug": "reka-ai",
      "category": "first-party-lab",
      "region": "US",
      "summary": "First-party natively-multimodal lab across text, image, video and audio.",
      "offering": "Reka Core (top reasoning), Flash/Flash 3 (21B), Edge (7B vision-language), Spark. Deployable cloud, on-prem or on-device.",
      "pricingModel": "per-token",
      "representativePricing": "Edge $0.10/$0.10, Flash 3 $0.10/$0.20 per Mtok (via OpenRouter); Core is most expensive.",
      "standout": "Flexible deployment down to edge and device; Reka Edge uses only 64 tokens per image tile for low-latency robotics and AR.",
      "freeTier": "",
      "homeUrl": "https://reka.ai",
      "pricingUrl": "https://docs.reka.ai/pricing",
      "docsUrl": "https://docs.reka.ai",
      "sourceUrl": "https://docs.reka.ai/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": "Pricing via OpenRouter; official rates may differ modestly."
    },
    {
      "name": "Upstage",
      "slug": "upstage",
      "category": "first-party-lab",
      "region": "Korea",
      "summary": "South Korean first-party lab; also document AI.",
      "offering": "Solar Pro 3 (102B total/12B active, 128K ctx, tuned for Korean/English/Japanese), Solar Pro 2, Document Parse/Extract.",
      "pricingModel": "per-token",
      "representativePricing": "Solar Pro 3 ~$0.15/$0.60 per Mtok (via OpenRouter); Document Parse ~$0.01/page. Prices exclude 10% VAT.",
      "standout": "Best positioned for Korean-language and structured document and instruction-following tasks.",
      "flagshipModel": "Solar Pro 3",
      "flagshipOutputPerMtok": 0.6,
      "openAICompatible": true,
      "freeTier": "",
      "homeUrl": "https://upstage.ai",
      "pricingUrl": "https://upstage.ai/pricing/api",
      "docsUrl": "https://console.upstage.ai/docs",
      "sourceUrl": "https://upstage.ai/pricing/api",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "Kimi (Moonshot AI)",
      "slug": "kimi-moonshot",
      "category": "first-party-lab",
      "region": "China",
      "summary": "Chinese first-party lab; long-context and agentic specialist, open-weight (modified MIT).",
      "offering": "Kimi K2.6 (1T-param MoE/32B active, 256K ctx, multimodal), K2.5 (cheaper), K2.7-Code (coding). Agent Swarm up to 300 subagents.",
      "pricingModel": "per-token",
      "representativePricing": "K2.6 $0.95/$4.00, K2.5 $0.60/$3.00 per Mtok; cached input $0.10-0.16/M. Batch API 40% off.",
      "standout": "Roughly 8-10x cheaper than Claude Opus at frontier-adjacent quality.",
      "flagshipModel": "Kimi K2.6",
      "flagshipOutputPerMtok": 4,
      "openAICompatible": true,
      "freeTier": "",
      "homeUrl": "https://moonshot.ai",
      "pricingUrl": "https://platform.moonshot.ai/docs/pricing",
      "docsUrl": "https://platform.moonshot.ai",
      "sourceUrl": "https://platform.moonshot.ai/docs/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "Sarvam (Sarvam AI)",
      "slug": "sarvam-ai",
      "category": "first-party-lab",
      "region": "India",
      "summary": "Indian first-party full-stack sovereign AI lab, IndiaAI Mission-backed.",
      "offering": "Sarvam-30B and Sarvam-105B (MoE, trained on Indian compute, 128K ctx, open-weight), Bulbul (TTS), Saaras (STT), translation (22 languages), OCR.",
      "pricingModel": "per-token",
      "representativePricing": "Chat completion ~Rs 4 input / Rs 16 output per Mtok (105B tier); STT Rs 45/hour. Free credits on signup.",
      "standout": "INR-denominated pricing avoids USD plus GST overhead; data hosted in India; best-in-class Indic-language and OCR performance.",
      "openAICompatible": true,
      "freeTier": "Free credits on signup.",
      "homeUrl": "https://sarvam.ai",
      "pricingUrl": "https://sarvam.ai/api-pricing",
      "docsUrl": "https://docs.sarvam.ai",
      "sourceUrl": "https://sarvam.ai/api-pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": "Raised a $234M first close of a $300M Series B at a $1.5B valuation (June 15, 2026), led by HCLTech."
    },
    {
      "name": "Inception (Inception Labs)",
      "slug": "inception-labs",
      "category": "first-party-lab",
      "region": "US",
      "summary": "First-party lab pioneering diffusion LLMs (dLLMs) that generate tokens in parallel via denoising.",
      "offering": "Mercury 2 (reasoning dLLM, 128K ctx, >1,000 tok/s on Blackwell), Mercury Coder, Mercury Edit 2. Default in Continue and Zed.",
      "pricingModel": "per-token",
      "representativePricing": "Mercury 2 $0.25/$0.75 per Mtok (cached input $0.025/M).",
      "standout": "First commercially available diffusion LLM, for ~5-10x faster, cheaper inference than one-token-at-a-time models.",
      "flagshipModel": "Mercury 2",
      "flagshipOutputPerMtok": 0.75,
      "openAICompatible": true,
      "freeTier": "10M free tokens per new account.",
      "homeUrl": "https://inceptionlabs.ai",
      "pricingUrl": "https://inceptionlabs.ai/models",
      "docsUrl": "https://inceptionlabs.ai/models",
      "sourceUrl": "https://inceptionlabs.ai/models",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": "Stanford spinout, ~$50M raised."
    },
    {
      "name": "Microsoft Azure",
      "slug": "azure",
      "category": "hyperscaler-marketplace",
      "region": "US",
      "summary": "Cloud hyperscaler with an 11,000+ model marketplace (Azure OpenAI / AI Foundry).",
      "offering": "Hosts OpenAI (GPT-5 family, Sora, image), plus DeepSeek, Grok, Llama, Mistral, FLUX, and managed GPU compute.",
      "pricingModel": "mixed",
      "representativePricing": "Token pricing matches OpenAI direct (GPT-5 $1.25/$10 per Mtok). PTUs for sustained load from ~$2,448/mo.",
      "standout": "Enterprise governance, compliance and Azure integration. Real bills often run 15-40% above raw token cost (support, networking, search).",
      "openAICompatible": true,
      "freeTier": "$200 free credit for 30 days.",
      "homeUrl": "https://azure.microsoft.com",
      "pricingUrl": "https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/",
      "docsUrl": "https://learn.microsoft.com/azure/ai-services/openai/",
      "sourceUrl": "https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "Amazon Bedrock",
      "slug": "amazon-bedrock",
      "category": "hyperscaler-marketplace",
      "region": "US",
      "summary": "Cloud hyperscaler managed model service with a 100+ model marketplace.",
      "offering": "Foundation models from Anthropic, Meta, Mistral, Cohere, AI21, Amazon (Nova/Titan), Stability, and now OpenAI. AgentCore for agents.",
      "pricingModel": "mixed",
      "representativePricing": "Per-token, matching providers (Claude Sonnet 5 $3/$15, Nova Micro $0.035/$0.14 per Mtok). Batch 50% off; caching up to 90% off.",
      "standout": "Deep AWS integration, FedRAMP and HIPAA compliance. Watch hidden costs (OpenSearch Serverless ~$345/mo for Knowledge Bases).",
      "openAICompatible": false,
      "freeTier": "",
      "homeUrl": "https://aws.amazon.com/bedrock",
      "pricingUrl": "https://aws.amazon.com/bedrock/pricing",
      "docsUrl": "https://docs.aws.amazon.com/bedrock/",
      "sourceUrl": "https://aws.amazon.com/bedrock/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": "Five billing modes: on-demand, batch, provisioned throughput, prompt caching, customization."
    },
    {
      "name": "Snowflake (Cortex)",
      "slug": "snowflake-cortex",
      "category": "hyperscaler-marketplace",
      "region": "US",
      "summary": "AI inference layer inside the Snowflake Data Cloud; data never leaves the perimeter.",
      "offering": "Pre-integrated Arctic, Llama, Mistral, Reka, Google, plus OpenAI, Anthropic, DeepSeek. AISQL, Cortex Search, Analyst and Agents.",
      "pricingModel": "consumption",
      "representativePricing": "Consumption and credit-based, token-metered, roughly $0.12-5.10 per Mtok depending on model; warehouse compute billed separately.",
      "standout": "AI runs where the data lives, no egress; Snowflake does not train on customer data.",
      "openAICompatible": false,
      "freeTier": "No dedicated free tier (trial credits only).",
      "homeUrl": "https://www.snowflake.com/en/product/features/cortex/",
      "pricingUrl": "https://docs.snowflake.com/en/user-guide/snowflake-cortex",
      "docsUrl": "https://docs.snowflake.com/en/user-guide/snowflake-cortex",
      "sourceUrl": "https://docs.snowflake.com/en/user-guide/snowflake-cortex",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "Databricks (Mosaic AI)",
      "slug": "databricks-mosaic-ai",
      "category": "hyperscaler-marketplace",
      "region": "US",
      "summary": "Data and AI platform with model serving on the lakehouse, under unified governance.",
      "offering": "Open foundation models (Llama, DBRX) plus external models (OpenAI, Anthropic, Cohere). Tight Unity Catalog governance.",
      "pricingModel": "consumption",
      "representativePricing": "Consumption via DBUs from ~$0.07/DBU; pay-per-token, provisioned throughput (Llama 3.3 70B from $6/hr per band), and batch.",
      "standout": "Tight integration with Unity Catalog governance and data pipelines; OpenAI-compatible API.",
      "openAICompatible": true,
      "freeTier": "14-day free trial; Free Edition available.",
      "homeUrl": "https://databricks.com",
      "pricingUrl": "https://databricks.com/product/pricing/foundation-model-serving",
      "docsUrl": "https://docs.databricks.com/en/machine-learning/model-serving/",
      "sourceUrl": "https://databricks.com/product/pricing/foundation-model-serving",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "Together AI",
      "slug": "together-ai",
      "category": "neutral-inference-platform",
      "region": "US",
      "summary": "Neutral open-model inference cloud hosting 200+ open models.",
      "offering": "Serverless per-token (DeepSeek, Llama, Qwen, Kimi, GLM, MiniMax, Mixtral), plus fine-tuning, dedicated endpoints and GPU clusters.",
      "pricingModel": "mixed",
      "representativePricing": "DeepSeek V3.1 $0.60/$1.70, GPT-OSS 20B $0.05/$0.20 per Mtok; H100 reserved ~$3.99/hr. $5 minimum credit.",
      "standout": "Broad catalog, research-driven optimization (FlashAttention lineage), full-stack from serverless to clusters.",
      "openAICompatible": true,
      "freeTier": "",
      "homeUrl": "https://together.ai",
      "pricingUrl": "https://together.ai/pricing",
      "docsUrl": "https://docs.together.ai",
      "sourceUrl": "https://together.ai/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "Fireworks AI",
      "slug": "fireworks-ai",
      "category": "neutral-inference-platform",
      "region": "US",
      "summary": "Neutral open-model inference platform from the creators of PyTorch; speed and throughput focus.",
      "offering": "Serverless per-token (DeepSeek/Kimi/GLM/MiniMax catalog), on-demand GPUs, fine-tuning, reserved capacity. 30T+ tokens/day.",
      "pricingModel": "mixed",
      "representativePricing": "8B-class ~$0.20/M, 70B-class ~$0.90/M; H100/H200 $6/hr, B200 $9/hr. Batch 50% off. Often 20-40% below Together.",
      "standout": "Day-zero model support and rapid growth: $315M annualized revenue as of February 2026; in talks at a $15B valuation in mid-2026.",
      "openAICompatible": true,
      "freeTier": "$1 free starter credit.",
      "homeUrl": "https://fireworks.ai",
      "pricingUrl": "https://fireworks.ai/pricing",
      "docsUrl": "https://docs.fireworks.ai",
      "sourceUrl": "https://fireworks.ai/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "DeepInfra",
      "slug": "deepinfra",
      "category": "neutral-inference-platform",
      "region": "US",
      "summary": "Low-cost serverless inference for open-weight models; runs its own US data centers.",
      "offering": "190+ models (Llama, Qwen, DeepSeek, GLM, Gemma, Mistral, Nemotron, Kimi) plus embeddings, TTS, image. Dedicated GPUs by the hour.",
      "pricingModel": "mixed",
      "representativePricing": "From ~$0.06/M for small models; DeepSeek V4 Flash $0.10/$0.20 per Mtok. ~5T tokens/week.",
      "standout": "Among the cheapest serverless options, runs its own US data centers including Blackwell B200.",
      "openAICompatible": true,
      "freeTier": "No standing free tier.",
      "homeUrl": "https://deepinfra.com",
      "pricingUrl": "https://deepinfra.com/pricing",
      "docsUrl": "https://deepinfra.com/docs",
      "sourceUrl": "https://deepinfra.com/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": "Community reports flag FP4 quantization on some models."
    },
    {
      "name": "Replicate",
      "slug": "replicate",
      "category": "neutral-inference-platform",
      "region": "US",
      "summary": "Developer-friendly model-hosting marketplace (acquired by Cloudflare, late 2025).",
      "offering": "50,000+ community models plus ~100 curated Official Models (Claude, DeepSeek, FLUX, Veo, Kling). Deploy custom models via Cog.",
      "pricingModel": "mixed",
      "representativePricing": "Hardware-per-second (CPU $0.000025/s up to H100 ~$0.001525/s) and output-based (per token/image/video).",
      "standout": "Largest open model catalog and easiest experimentation. Cold starts and unpredictable per-second billing are the main drawbacks.",
      "openAICompatible": false,
      "freeTier": "",
      "homeUrl": "https://replicate.com",
      "pricingUrl": "https://replicate.com/pricing",
      "docsUrl": "https://replicate.com/docs",
      "sourceUrl": "https://replicate.com/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": "Now owned by Cloudflare, which may affect the roadmap over time."
    },
    {
      "name": "SiliconFlow",
      "slug": "siliconflow",
      "category": "neutral-inference-platform",
      "region": "China",
      "summary": "Chinese model-as-a-service aggregator; notable for Huawei Ascend-based DeepSeek serving.",
      "offering": "200+ models across text/image/video/audio (DeepSeek, Qwen, GLM, Kimi, MiniMax, Step, FLUX). Backed by Alibaba Cloud.",
      "pricingModel": "mixed",
      "representativePricing": "Pay-as-you-go per-token (DeepSeek V4 Flash $0.14/$0.28 per Mtok); reserved GPU ~CNY 2.73/hr.",
      "standout": "6M+ users, 100B+ daily tokens, fastest to handle DeepSeek traffic.",
      "openAICompatible": true,
      "freeTier": "$1 credits on signup; some smaller models permanently free.",
      "homeUrl": "https://siliconflow.com",
      "pricingUrl": "https://siliconflow.com/pricing",
      "docsUrl": "https://docs.siliconflow.com",
      "sourceUrl": "https://siliconflow.com/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "Hyperbolic",
      "slug": "hyperbolic",
      "category": "neutral-inference-platform",
      "region": "US",
      "summary": "Decentralized open-access AI cloud: a GPU marketplace plus serverless inference.",
      "offering": "25+ open models plus a GPU marketplace that aggregates idle GPUs. Pay by card or crypto. Serves Llama-3.1-405B-Base in BF16.",
      "pricingModel": "mixed",
      "representativePricing": "GPU/hr: RTX 4090 $0.50, A100 ~$1.60-1.80, H100 PCIe $3.00, H100 SXM $3.20. Llama 3.3 70B $0.40/M.",
      "standout": "Up to 75% savings vs hyperscalers by pooling idle GPUs; only platform serving Llama-3.1-405B-Base in BF16.",
      "openAICompatible": true,
      "freeTier": "$1 promo credit (not for GPU rental).",
      "homeUrl": "https://hyperbolic.ai",
      "pricingUrl": "https://docs.hyperbolic.xyz/docs/hyperbolic-pricing",
      "docsUrl": "https://docs.hyperbolic.xyz",
      "sourceUrl": "https://docs.hyperbolic.xyz/docs/hyperbolic-pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "Nebius",
      "slug": "nebius",
      "category": "neutral-inference-platform",
      "region": "EU",
      "summary": "Full-stack European AI cloud (successor to Yandex N.V.); Token Factory plus raw GPU rental.",
      "offering": "Token Factory: 60+ open-source models in Fast/Base tiers, OpenAI-compatible. Also H100/H200/B200/B300 rental with per-second billing.",
      "pricingModel": "mixed",
      "representativePricing": "Cheapest ~$0.06-0.08/M input (Nemotron 3 Nano $0.08 blended) up to ~$1.93/M for DeepSeek V4 Pro. Reserve discounts up to 35%.",
      "standout": "EU data residency, full-stack managed compute plus inference, and a ~$2B NVIDIA deal with early Rubin access.",
      "openAICompatible": true,
      "freeTier": "",
      "homeUrl": "https://nebius.com",
      "pricingUrl": "https://nebius.com/token-factory/prices",
      "docsUrl": "https://docs.nebius.com",
      "sourceUrl": "https://nebius.com/token-factory/prices",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": "The NVIDIA Rubin 10x cost-per-token claim is projected for H2 2026, not yet delivered."
    },
    {
      "name": "Parasail",
      "slug": "parasail",
      "category": "neutral-inference-platform",
      "region": "US",
      "summary": "Inference-only AI Supercloud aggregating global GPU supply (40 data centers, 15 countries).",
      "offering": "Serverless per-token (GLM-5.2, Kimi, DeepSeek V4, MiniMax, Qwen, gpt-oss, Llama), dedicated endpoints and batch. Deploy any HF model in ~5 lines.",
      "pricingModel": "mixed",
      "representativePricing": "~$0.09/M for MiMo-V2.5 up to ~$0.90/M for GLM-5.1. 500B tokens/day.",
      "standout": "Lossless by default, no hidden quantization; pay-per-token with no GPU contracts. Founded by an ex-Groq exec; raised $32M Series A.",
      "openAICompatible": true,
      "freeTier": "",
      "homeUrl": "https://parasail.io",
      "pricingUrl": "https://saas.parasail.io/pricing",
      "docsUrl": "https://docs.parasail.io",
      "sourceUrl": "https://saas.parasail.io/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "FriendliAI",
      "slug": "friendliai",
      "category": "neutral-inference-platform",
      "region": "US",
      "summary": "Fast-serving inference platform with custom kernels and speculative decoding.",
      "offering": "Serverless Endpoints (OpenAI-compatible), Dedicated Endpoints (per GPU-hour), Container (on-prem). DeepSeek, Qwen, Kimi, GLM, Llama, EXAONE.",
      "pricingModel": "mixed",
      "representativePricing": "Pay-per-token serverless plus per GPU-hour dedicated. Claims 50-90% cost savings vs vLLM.",
      "standout": "Claims up to 3x faster than vLLM via custom kernels, speculative decoding, continuous batching. SOC 2 Type II plus HIPAA, 99.99% uptime SLA.",
      "openAICompatible": true,
      "freeTier": "$5 free credits.",
      "homeUrl": "https://friendli.ai",
      "pricingUrl": "https://friendli.ai/pricing",
      "docsUrl": "https://friendli.ai/docs",
      "sourceUrl": "https://friendli.ai/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "Baseten",
      "slug": "baseten",
      "category": "neutral-inference-platform",
      "region": "US",
      "summary": "Enterprise model-inference platform; deploy custom/open/fine-tuned models via the Truss framework.",
      "offering": "Per-minute dedicated GPU billing (T4 ~$0.01/min up to B200 ~$0.166/min) with scale-to-zero, plus a per-token Model APIs catalog.",
      "pricingModel": "mixed",
      "representativePricing": "Model APIs median ~$0.60/$2.20 per Mtok; dedicated GPU T4 ~$0.01/min up to B200 ~$0.166/min.",
      "standout": "Multi-cloud routing across ~18-20 providers, 1B+ inference calls/day. Closed a $1.5B Series F at a $13B valuation (June 22, 2026).",
      "openAICompatible": true,
      "freeTier": "New-account credits.",
      "homeUrl": "https://baseten.co",
      "pricingUrl": "https://baseten.co/pricing",
      "docsUrl": "https://docs.baseten.co",
      "sourceUrl": "https://baseten.co/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "GMI (GMI Cloud)",
      "slug": "gmi-cloud",
      "category": "neutral-inference-platform",
      "region": "US",
      "summary": "AI-native GPU and inference cloud (NVIDIA Preferred Partner); bare-metal pricing with no forced upsell.",
      "offering": "Bare-metal plus serverless plus dedicated clusters. Inference Engine (auto-scaling), Cluster Engine, and Model-as-a-Service.",
      "pricingModel": "per-gpu-hour",
      "representativePricing": "On-demand GPU/hr: H100 from $2.00, H200 from $2.60, B200 from $4.00, GB200 from $8.00. Reserved cuts 30-50%.",
      "standout": "Claims 40-70% savings vs hyperscalers; pure bare-metal rates with no forced CPU or networking upsell.",
      "openAICompatible": true,
      "freeTier": "",
      "homeUrl": "https://gmicloud.ai",
      "pricingUrl": "https://gmicloud.ai/en/pricing",
      "docsUrl": "https://docs.gmicloud.ai",
      "sourceUrl": "https://gmicloud.ai/en/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "Groq",
      "slug": "groq",
      "category": "custom-silicon",
      "region": "US",
      "summary": "Custom-silicon (LPU) inference cloud for open models; among the fastest inference available.",
      "offering": "Runs Llama, Qwen, Kimi, GPT-OSS, DeepSeek distills and Whisper at 500-1,000+ tok/s. Catalog is open-source only.",
      "pricingModel": "per-token",
      "representativePricing": "Llama 3.1 8B $0.05/$0.08, Llama 3.3 70B $0.59/$0.79, Kimi K2 $1/$3 per Mtok. Batch and caching each cut 50%.",
      "standout": "Among the fastest inference available. NVIDIA agreed to pay ~$20B for a perpetual license to Groq's LPU patents (finalized December 24, 2025); GroqCloud continues operating.",
      "openAICompatible": true,
      "freeTier": "Free developer tier (no credit card).",
      "homeUrl": "https://groq.com",
      "pricingUrl": "https://groq.com/pricing",
      "docsUrl": "https://console.groq.com/docs",
      "sourceUrl": "https://groq.com/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "Cerebras",
      "slug": "cerebras",
      "category": "custom-silicon",
      "region": "US",
      "summary": "Wafer-scale (WSE-3) inference cloud; the speed champion at 1,800-3,000+ tok/s on open models.",
      "offering": "Llama, Qwen, DeepSeek distills, GPT-OSS. No proprietary frontier models and no custom-model uploads.",
      "pricingModel": "per-token",
      "representativePricing": "~$0.10-6 per Mtok depending on model ($0.35/M cheapest input). Pay-as-you-go and enterprise tiers.",
      "standout": "Fastest inference benchmarked by Artificial Analysis.",
      "openAICompatible": true,
      "freeTier": "1M tokens/day, no credit card.",
      "homeUrl": "https://cerebras.ai",
      "pricingUrl": "https://cerebras.ai/pricing",
      "docsUrl": "https://inference-docs.cerebras.ai",
      "sourceUrl": "https://cerebras.ai/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "SambaNova",
      "slug": "sambanova",
      "category": "custom-silicon",
      "region": "US",
      "summary": "Custom-silicon (RDU) inference platform (SambaCloud) purpose-built for agentic inference.",
      "offering": "Fast serving of large open models (Llama, DeepSeek 671B, Qwen, MiniMax) with a three-tier memory architecture.",
      "pricingModel": "per-token",
      "representativePricing": "Pay-per-token; rates listed on the SambaCloud plans page.",
      "standout": "Benchmarked by Artificial Analysis as among the fastest for large models (MiniMax M2.7 at 435 tok/s). Strong sovereign-AI and on-prem story.",
      "openAICompatible": true,
      "freeTier": "",
      "homeUrl": "https://sambanova.ai",
      "pricingUrl": "https://cloud.sambanova.ai/plans",
      "docsUrl": "https://docs.sambanova.ai",
      "sourceUrl": "https://cloud.sambanova.ai/plans",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "CoreWeave",
      "slug": "coreweave",
      "category": "gpu-compression-niche",
      "region": "US",
      "summary": "Specialized GPU hyperscaler (neocloud), Kubernetes-native, for AI training and inference at scale.",
      "offering": "Rents NVIDIA A100, H100, H200, GB200/B200, GB300. Per-second billing, no egress fees; spot and reserved available.",
      "pricingModel": "per-gpu-hour",
      "representativePricing": "8x H100 node ~$49.24/hr (~$6.16/GPU/hr); single GPUs from ~$1.19/hr (A100 PCIe) to $10.50/hr (B200 NVL).",
      "standout": "Roughly 40-60% cheaper than hyperscalers for equivalent GPUs; customers include OpenAI, Mistral, Jane Street. Often 8-GPU minimums.",
      "openAICompatible": false,
      "freeTier": "",
      "homeUrl": "https://coreweave.com",
      "pricingUrl": "https://coreweave.com/pricing",
      "docsUrl": "https://docs.coreweave.com",
      "sourceUrl": "https://coreweave.com/pricing",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": ""
    },
    {
      "name": "CompactifAI (Multiverse Computing)",
      "slug": "compactifai",
      "category": "gpu-compression-niche",
      "region": "Spain",
      "summary": "Quantum-inspired model-compression company that shrinks open LLMs up to 95% via tensor networks.",
      "offering": "Serves compressed Slim models via API and the AWS/Azure marketplaces. HyperNova 60B (from gpt-oss-120b), compressed Llama/DeepSeek/Mistral.",
      "pricingModel": "per-token",
      "representativePricing": "HyperNova 60B $0.04/$0.14 per Mtok; Llama 3.3 70B Slim ~$0.15/$0.31 per Mtok.",
      "standout": "Claims compressed models beat their base models on speed and cost, runnable on edge devices down to Raspberry Pi. Raised a 189M euro Series B (June 12, 2025).",
      "openAICompatible": false,
      "freeTier": "",
      "homeUrl": "https://multiversecomputing.com/compactifai",
      "pricingUrl": "https://multiversecomputing.com/compactifai/api",
      "docsUrl": "https://multiversecomputing.com/compactifai/api",
      "sourceUrl": "https://multiversecomputing.com/compactifai/api",
      "verifiedDate": "2026-06-28",
      "status": "representative",
      "note": "States up to 95% compression with only 2-3% precision loss (vendor claim)."
    }
  ]
}