{"ok":true,"source":"tensorfeed.ai","lastUpdated":"2026-05-24","tracked_providers":["Together AI","Fireworks","DeepInfra","Groq","OpenRouter","Replicate","Anyscale","DeepSeek","GitHub Models"],"count":9,"models":[{"modelId":"llama-4-maverick","modelName":"Llama 4 Maverick","family":"Meta","paramsB":400,"license":"Llama 4 Community License","openWeights":true,"offers":[{"provider":"Together AI","providerModelId":"meta-llama/Llama-4-Maverick-Instruct","inputPrice":0.5,"outputPrice":1.5,"blendedPrice":1,"contextWindow":1000000,"outputTPS":145,"features":["function-calling","json-mode","vision"],"url":"https://www.together.ai/pricing","note":""},{"provider":"Fireworks","providerModelId":"accounts/fireworks/models/llama4-maverick","inputPrice":0.55,"outputPrice":1.65,"blendedPrice":1.1,"contextWindow":1000000,"outputTPS":130,"features":["function-calling","json-mode","vision"],"url":"https://fireworks.ai/pricing","note":""},{"provider":"DeepInfra","providerModelId":"meta-llama/Llama-4-Maverick","inputPrice":0.45,"outputPrice":1.4,"blendedPrice":0.925,"contextWindow":1000000,"outputTPS":110,"features":["function-calling","vision"],"url":"https://deepinfra.com/pricing","note":""},{"provider":"Groq","providerModelId":"llama-4-maverick","inputPrice":0.59,"outputPrice":1.79,"blendedPrice":1.19,"contextWindow":128000,"outputTPS":720,"features":["function-calling","json-mode","vision"],"url":"https://groq.com/pricing","note":"Highest TPS in the matrix; 128k context cap"},{"provider":"OpenRouter","providerModelId":"meta-llama/llama-4-maverick","inputPrice":0.49,"outputPrice":1.49,"blendedPrice":0.99,"contextWindow":1000000,"outputTPS":null,"features":["function-calling","vision"],"url":"https://openrouter.ai/meta-llama/llama-4-maverick","note":"Routes across multiple providers automatically"}]},{"modelId":"llama-4-scout","modelName":"Llama 4 Scout","family":"Meta","paramsB":109,"license":"Llama 4 Community License","openWeights":true,"offers":[{"provider":"Together AI","providerModelId":"meta-llama/Llama-4-Scout-Instruct","inputPrice":0.18,"outputPrice":0.59,"blendedPrice":0.385,"contextWindow":10000000,"outputTPS":195,"features":["function-calling","json-mode","vision"],"url":"https://www.together.ai/pricing","note":""},{"provider":"Fireworks","providerModelId":"accounts/fireworks/models/llama4-scout","inputPrice":0.2,"outputPrice":0.6,"blendedPrice":0.4,"contextWindow":10000000,"outputTPS":180,"features":["function-calling","json-mode","vision"],"url":"https://fireworks.ai/pricing","note":""},{"provider":"DeepInfra","providerModelId":"meta-llama/Llama-4-Scout","inputPrice":0.16,"outputPrice":0.55,"blendedPrice":0.355,"contextWindow":10000000,"outputTPS":170,"features":["function-calling","vision"],"url":"https://deepinfra.com/pricing","note":""},{"provider":"Groq","providerModelId":"llama-4-scout","inputPrice":0.18,"outputPrice":0.59,"blendedPrice":0.385,"contextWindow":128000,"outputTPS":950,"features":["function-calling","json-mode","vision"],"url":"https://groq.com/pricing","note":"Highest TPS in the matrix; 128k context cap"},{"provider":"OpenRouter","providerModelId":"meta-llama/llama-4-scout","inputPrice":0.18,"outputPrice":0.59,"blendedPrice":0.385,"contextWindow":10000000,"outputTPS":null,"features":["function-calling","vision"],"url":"https://openrouter.ai/meta-llama/llama-4-scout","note":""},{"provider":"Replicate","providerModelId":"meta/llama-4-scout","inputPrice":0.2,"outputPrice":0.65,"blendedPrice":0.425,"contextWindow":10000000,"outputTPS":95,"features":["function-calling"],"url":"https://replicate.com/meta/llama-4-scout","note":""}]},{"modelId":"llama-3.1-405b","modelName":"Llama 3.1 405B Instruct","family":"Meta","paramsB":405,"license":"Llama 3.1 Community License","openWeights":true,"offers":[{"provider":"Together AI","providerModelId":"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo","inputPrice":3.5,"outputPrice":3.5,"blendedPrice":3.5,"contextWindow":130000,"outputTPS":70,"features":["function-calling","json-mode"],"url":"https://www.together.ai/pricing","note":"Turbo (FP8) variant"},{"provider":"Fireworks","providerModelId":"accounts/fireworks/models/llama-v3p1-405b-instruct","inputPrice":3,"outputPrice":3,"blendedPrice":3,"contextWindow":130000,"outputTPS":65,"features":["function-calling","json-mode"],"url":"https://fireworks.ai/pricing","note":""},{"provider":"DeepInfra","providerModelId":"meta-llama/Meta-Llama-3.1-405B-Instruct","inputPrice":1.79,"outputPrice":1.79,"blendedPrice":1.79,"contextWindow":130000,"outputTPS":50,"features":["function-calling"],"url":"https://deepinfra.com/pricing","note":"Cheapest 405B host"},{"provider":"OpenRouter","providerModelId":"meta-llama/llama-3.1-405b-instruct","inputPrice":3,"outputPrice":3,"blendedPrice":3,"contextWindow":130000,"outputTPS":null,"features":["function-calling"],"url":"https://openrouter.ai/meta-llama/llama-3.1-405b-instruct","note":""}]},{"modelId":"llama-3.1-70b","modelName":"Llama 3.1 70B Instruct","family":"Meta","paramsB":70,"license":"Llama 3.1 Community License","openWeights":true,"offers":[{"provider":"Together AI","providerModelId":"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo","inputPrice":0.88,"outputPrice":0.88,"blendedPrice":0.88,"contextWindow":130000,"outputTPS":165,"features":["function-calling","json-mode"],"url":"https://www.together.ai/pricing","note":"Turbo (FP8) variant"},{"provider":"Fireworks","providerModelId":"accounts/fireworks/models/llama-v3p1-70b-instruct","inputPrice":0.9,"outputPrice":0.9,"blendedPrice":0.9,"contextWindow":130000,"outputTPS":145,"features":["function-calling","json-mode"],"url":"https://fireworks.ai/pricing","note":""},{"provider":"DeepInfra","providerModelId":"meta-llama/Meta-Llama-3.1-70B-Instruct","inputPrice":0.35,"outputPrice":0.4,"blendedPrice":0.375,"contextWindow":130000,"outputTPS":95,"features":["function-calling"],"url":"https://deepinfra.com/pricing","note":"Cheapest 70B host"},{"provider":"Groq","providerModelId":"llama-3.1-70b-versatile","inputPrice":0.59,"outputPrice":0.79,"blendedPrice":0.69,"contextWindow":130000,"outputTPS":280,"features":["function-calling","json-mode"],"url":"https://groq.com/pricing","note":""},{"provider":"OpenRouter","providerModelId":"meta-llama/llama-3.1-70b-instruct","inputPrice":0.4,"outputPrice":0.4,"blendedPrice":0.4,"contextWindow":130000,"outputTPS":null,"features":["function-calling"],"url":"https://openrouter.ai/meta-llama/llama-3.1-70b-instruct","note":""},{"provider":"Anyscale","providerModelId":"meta-llama/Meta-Llama-3.1-70B-Instruct","inputPrice":1,"outputPrice":1,"blendedPrice":1,"contextWindow":130000,"outputTPS":110,"features":["function-calling"],"url":"https://www.anyscale.com/pricing","note":""},{"provider":"GitHub Models","providerModelId":"Meta-Llama-3.1-70B-Instruct","inputPrice":0,"outputPrice":0,"blendedPrice":0,"contextWindow":130000,"outputTPS":null,"features":["function-calling"],"url":"https://github.com/marketplace/models/azureml-meta/Meta-Llama-3-1-70B-Instruct","note":"Free tier rate-limited (prototyping); paid via Azure AI Foundry"}]},{"modelId":"deepseek-v4-pro","modelName":"DeepSeek V4 Pro","family":"DeepSeek","paramsB":1600,"license":"MIT","openWeights":true,"offers":[{"provider":"DeepSeek","providerModelId":"deepseek-chat","inputPrice":0.14,"outputPrice":0.28,"blendedPrice":0.21,"contextWindow":1000000,"outputTPS":110,"features":["function-calling","json-mode"],"url":"https://api-docs.deepseek.com/quick_start/pricing","note":"First-party API; cheapest path"},{"provider":"Together AI","providerModelId":"deepseek-ai/DeepSeek-V4-Pro","inputPrice":0.27,"outputPrice":1.1,"blendedPrice":0.685,"contextWindow":1000000,"outputTPS":90,"features":["function-calling","json-mode"],"url":"https://www.together.ai/pricing","note":""},{"provider":"Fireworks","providerModelId":"accounts/fireworks/models/deepseek-v4-pro","inputPrice":0.3,"outputPrice":1.2,"blendedPrice":0.75,"contextWindow":1000000,"outputTPS":85,"features":["function-calling","json-mode"],"url":"https://fireworks.ai/pricing","note":""},{"provider":"OpenRouter","providerModelId":"deepseek/deepseek-chat","inputPrice":0.14,"outputPrice":0.28,"blendedPrice":0.21,"contextWindow":1000000,"outputTPS":null,"features":["function-calling"],"url":"https://openrouter.ai/deepseek/deepseek-chat","note":"Routes to first-party DeepSeek"},{"provider":"GitHub Models","providerModelId":"DeepSeek-V4-Pro","inputPrice":0,"outputPrice":0,"blendedPrice":0,"contextWindow":1000000,"outputTPS":null,"features":["function-calling"],"url":"https://github.com/marketplace/models","note":"Free tier rate-limited (prototyping); paid via Azure AI Foundry"}]},{"modelId":"deepseek-v4-flash","modelName":"DeepSeek V4 Flash","family":"DeepSeek","paramsB":70,"license":"MIT","openWeights":true,"offers":[{"provider":"DeepSeek","providerModelId":"deepseek-flash","inputPrice":0.04,"outputPrice":0.08,"blendedPrice":0.06,"contextWindow":130000,"outputTPS":165,"features":["function-calling","json-mode"],"url":"https://api-docs.deepseek.com/quick_start/pricing","note":"Cheapest hosted inference of any frontier-class model in 2026"},{"provider":"Together AI","providerModelId":"deepseek-ai/DeepSeek-V4-Flash","inputPrice":0.1,"outputPrice":0.3,"blendedPrice":0.2,"contextWindow":130000,"outputTPS":145,"features":["function-calling","json-mode"],"url":"https://www.together.ai/pricing","note":""},{"provider":"Fireworks","providerModelId":"accounts/fireworks/models/deepseek-v4-flash","inputPrice":0.12,"outputPrice":0.36,"blendedPrice":0.24,"contextWindow":130000,"outputTPS":130,"features":["function-calling","json-mode"],"url":"https://fireworks.ai/pricing","note":""},{"provider":"OpenRouter","providerModelId":"deepseek/deepseek-flash","inputPrice":0.04,"outputPrice":0.08,"blendedPrice":0.06,"contextWindow":130000,"outputTPS":null,"features":["function-calling"],"url":"https://openrouter.ai/deepseek/deepseek-flash","note":""}]},{"modelId":"mixtral-8x22b","modelName":"Mixtral 8x22B Instruct","family":"Mistral","paramsB":141,"license":"Apache-2.0","openWeights":true,"offers":[{"provider":"Together AI","providerModelId":"mistralai/Mixtral-8x22B-Instruct-v0.1","inputPrice":1.2,"outputPrice":1.2,"blendedPrice":1.2,"contextWindow":65536,"outputTPS":90,"features":["function-calling","json-mode"],"url":"https://www.together.ai/pricing","note":""},{"provider":"Fireworks","providerModelId":"accounts/fireworks/models/mixtral-8x22b-instruct","inputPrice":1.2,"outputPrice":1.2,"blendedPrice":1.2,"contextWindow":65536,"outputTPS":80,"features":["function-calling","json-mode"],"url":"https://fireworks.ai/pricing","note":""},{"provider":"DeepInfra","providerModelId":"mistralai/Mixtral-8x22B-Instruct-v0.1","inputPrice":0.65,"outputPrice":0.65,"blendedPrice":0.65,"contextWindow":65536,"outputTPS":60,"features":["function-calling"],"url":"https://deepinfra.com/pricing","note":"Cheapest Mixtral 8x22B host"},{"provider":"OpenRouter","providerModelId":"mistralai/mixtral-8x22b-instruct","inputPrice":0.65,"outputPrice":0.65,"blendedPrice":0.65,"contextWindow":65536,"outputTPS":null,"features":["function-calling"],"url":"https://openrouter.ai/mistralai/mixtral-8x22b-instruct","note":""}]},{"modelId":"phi-4","modelName":"Phi-4","family":"Microsoft","paramsB":14,"license":"MIT","openWeights":true,"offers":[{"provider":"GitHub Models","providerModelId":"Phi-4","inputPrice":0,"outputPrice":0,"blendedPrice":0,"contextWindow":16384,"outputTPS":null,"features":["function-calling"],"url":"https://github.com/marketplace/models/azureml/Phi-4","note":"Free tier rate-limited (prototyping); paid via Azure AI Foundry"},{"provider":"DeepInfra","providerModelId":"microsoft/phi-4","inputPrice":0.07,"outputPrice":0.14,"blendedPrice":0.105,"contextWindow":16384,"outputTPS":220,"features":["function-calling"],"url":"https://deepinfra.com/pricing","note":""},{"provider":"Together AI","providerModelId":"microsoft/phi-4","inputPrice":0.08,"outputPrice":0.16,"blendedPrice":0.12,"contextWindow":16384,"outputTPS":200,"features":["function-calling"],"url":"https://www.together.ai/pricing","note":""},{"provider":"OpenRouter","providerModelId":"microsoft/phi-4","inputPrice":0.07,"outputPrice":0.14,"blendedPrice":0.105,"contextWindow":16384,"outputTPS":null,"features":["function-calling"],"url":"https://openrouter.ai/microsoft/phi-4","note":""}]},{"modelId":"qwen-2.5-72b","modelName":"Qwen 2.5 72B Instruct","family":"Alibaba","paramsB":72,"license":"Qwen License","openWeights":true,"offers":[{"provider":"Together AI","providerModelId":"Qwen/Qwen2.5-72B-Instruct-Turbo","inputPrice":0.9,"outputPrice":0.9,"blendedPrice":0.9,"contextWindow":130000,"outputTPS":130,"features":["function-calling","json-mode"],"url":"https://www.together.ai/pricing","note":"Turbo (FP8) variant"},{"provider":"Fireworks","providerModelId":"accounts/fireworks/models/qwen2p5-72b-instruct","inputPrice":0.9,"outputPrice":0.9,"blendedPrice":0.9,"contextWindow":130000,"outputTPS":110,"features":["function-calling","json-mode"],"url":"https://fireworks.ai/pricing","note":""},{"provider":"DeepInfra","providerModelId":"Qwen/Qwen2.5-72B-Instruct","inputPrice":0.35,"outputPrice":0.4,"blendedPrice":0.375,"contextWindow":130000,"outputTPS":80,"features":["function-calling"],"url":"https://deepinfra.com/pricing","note":"Cheapest Qwen 72B host"},{"provider":"OpenRouter","providerModelId":"qwen/qwen-2.5-72b-instruct","inputPrice":0.35,"outputPrice":0.4,"blendedPrice":0.375,"contextWindow":130000,"outputTPS":null,"features":["function-calling"],"url":"https://openrouter.ai/qwen/qwen-2.5-72b-instruct","note":""},{"provider":"GitHub Models","providerModelId":"qwen2.5-72b-instruct","inputPrice":0,"outputPrice":0,"blendedPrice":0,"contextWindow":130000,"outputTPS":null,"features":["function-calling"],"url":"https://github.com/marketplace/models","note":"Free tier rate-limited (prototyping); paid via Azure AI Foundry"}]}]}