{"ok":true,"source":"tensorfeed.ai","lastUpdated":"2026-04-30","count":24,"models":[{"id":"midjourney-v7","name":"Midjourney v7","provider":"Midjourney","modality":"image","pricingUnit":"per_subscription_month","pricingAmount":10,"pricingNote":"Subscription only ($10 Basic up to $120 Pro/month). No public API; access via Discord or web app.","released":"2025-04-04","apiAvailable":false,"url":"https://www.midjourney.com","notes":"Best aesthetic for stylized work. Slow turn-around. No API limits creative-tool agents to scraping or unofficial wrappers.","maxOutput":"2048x2048 upscaled","features":["style references","image-to-image","inpainting"]},{"id":"flux-1.1-pro-ultra","name":"FLUX 1.1 Pro Ultra","provider":"Black Forest Labs","modality":"image","pricingUnit":"per_image","pricingAmount":0.06,"pricingNote":"$0.06 per 4MP image via Black Forest Labs API. Replicate, Together, FAL also host with their own markup.","released":"2024-11-04","apiAvailable":true,"url":"https://blackforestlabs.ai/flux-1-1-ultra/","notes":"Highest-fidelity FLUX tier. 4MP native output, no upscaler. State-of-the-art prompt adherence.","maxOutput":"4MP (2048x2048)","features":["photorealism","long prompts","raw mode"]},{"id":"flux-1.1-pro","name":"FLUX 1.1 Pro","provider":"Black Forest Labs","modality":"image","pricingUnit":"per_image","pricingAmount":0.04,"pricingNote":"$0.04 per 1MP image via Black Forest Labs API. Cheaper through Replicate at ~$0.025.","released":"2024-10-01","apiAvailable":true,"url":"https://blackforestlabs.ai/flux-1-1-pro/","notes":"Production workhorse. 6x faster than FLUX Pro 1.0 with the same quality. The default for image-gen agents that need API access.","maxOutput":"2MP (1408x1408)","features":["fast","json mode","image-to-image"]},{"id":"flux-1-schnell","name":"FLUX 1 Schnell","provider":"Black Forest Labs","modality":"image","pricingUnit":"per_image","pricingAmount":0.003,"pricingNote":"$0.003 per image via Replicate or Together. Apache-2.0 licensed; free to self-host on a GPU.","released":"2024-08-01","apiAvailable":true,"url":"https://huggingface.co/black-forest-labs/FLUX.1-schnell","notes":"Open-weights distilled FLUX. 1-4 step generation. The cheapest production-grade image model.","maxOutput":"1MP (1024x1024)","features":["open weights","apache-2.0","1-4 step"]},{"id":"dall-e-3","name":"DALL-E 3","provider":"OpenAI","modality":"image","pricingUnit":"per_image","pricingAmount":0.04,"pricingNote":"$0.04 per standard 1024x1024. $0.08 for HD. $0.12 per 1024x1792 HD.","released":"2023-10-19","apiAvailable":true,"url":"https://platform.openai.com/docs/models/dall-e-3","notes":"Strong instruction following and on-image text rendering. Now older than FLUX 1.1; some prompts produce better results in FLUX.","maxOutput":"1024x1792","features":["instruction following","text rendering","json mode"]},{"id":"imagen-4","name":"Imagen 4","provider":"Google","modality":"image","pricingUnit":"per_image","pricingAmount":0.03,"pricingNote":"$0.03 per image via Vertex AI. Imagen 4 Ultra at $0.06 for higher fidelity.","released":"2025-12-11","apiAvailable":true,"url":"https://cloud.google.com/vertex-ai/generative-ai/docs/image/overview","notes":"Google flagship image model. Strong on text rendering and prompt adherence. Vertex AI only.","maxOutput":"2048x2048","features":["text rendering","safety filters","watermarking"]},{"id":"recraft-v3","name":"Recraft v3","provider":"Recraft","modality":"image","pricingUnit":"per_image","pricingAmount":0.04,"pricingNote":"$0.04 per image. Volume tiers via subscription.","released":"2024-10-30","apiAvailable":true,"url":"https://www.recraft.ai","notes":"Best-in-class on-image typography. Vector output option. Top of LMSYS image arena for several months.","maxOutput":"2048x2048","features":["typography","vector output","brand styles"]},{"id":"ideogram-2","name":"Ideogram 2.0","provider":"Ideogram","modality":"image","pricingUnit":"per_image","pricingAmount":0.08,"pricingNote":"$0.08 per image via API. Cheaper through subscription.","released":"2024-08-22","apiAvailable":true,"url":"https://ideogram.ai","notes":"Strong text rendering and design layouts. Good complement to FLUX for ad creative agents.","maxOutput":"2048x2048","features":["typography","design styles","image-to-image"]},{"id":"sora-2","name":"Sora 2","provider":"OpenAI","modality":"video","pricingUnit":"per_second_video","pricingAmount":0.5,"pricingNote":"$0.50 per second of generated video at 1080p. Higher tiers for 4K. ChatGPT Pro includes a quota.","released":"2025-12-09","apiAvailable":true,"url":"https://openai.com/sora","notes":"OpenAI flagship. 30-second max with strong physics consistency. API gated through Sora 2 partner program; full public availability rolling out.","maxOutput":"30s @ 1080p","features":["physics consistency","image-to-video","remix"]},{"id":"veo-3","name":"Veo 3","provider":"Google","modality":"video","pricingUnit":"per_second_video","pricingAmount":0.5,"pricingNote":"$0.50 per second via Vertex AI. Native audio generation included (no separate TTS pass).","released":"2025-05-20","apiAvailable":true,"url":"https://deepmind.google/technologies/veo/","notes":"Google flagship with built-in synced audio (dialogue, SFX, music). The first major video model that ships speech in-frame.","maxOutput":"8s @ 1080p with audio","features":["native audio","lip-sync","image-to-video"]},{"id":"kling-2.0","name":"Kling 2.0","provider":"Kuaishou","modality":"video","pricingUnit":"per_second_video","pricingAmount":0.28,"pricingNote":"$0.28 per second on the Kling Pro tier. $0.05/sec on Standard. Subscription tiers at flat rates.","released":"2025-04-15","apiAvailable":true,"url":"https://klingai.com","notes":"Strong physics, excellent for character consistency across frames. Fal and Replicate host with their own markup.","maxOutput":"10s @ 1080p","features":["character consistency","image-to-video","lip-sync"]},{"id":"happyhorse-1.0","name":"HappyHorse 1.0","provider":"Alibaba","modality":"video","pricingUnit":"per_second_video","pricingAmount":0.2,"pricingNote":"$0.20 per second via Alibaba Cloud. 15B parameters; current Artificial Analysis Video Arena leader.","released":"2026-04-29","apiAvailable":true,"url":"https://www.alibabacloud.com/product/happyhorse","notes":"Open weights expected. As of late April 2026 it leads the Artificial Analysis Video Arena by 115 Elo. The first frontier-class open-weights video model.","maxOutput":"8s @ 1080p","features":["leaderboard #1","image-to-video","open weights expected"]},{"id":"runway-gen-4","name":"Runway Gen-4","provider":"Runway","modality":"video","pricingUnit":"per_second_video","pricingAmount":0.3,"pricingNote":"$0.30 per second on the Runway API. Subscription tiers offer cheaper bulk pricing.","released":"2025-09-12","apiAvailable":true,"url":"https://runwayml.com","notes":"Best-in-class control: motion brush, frame interpolation, frame-by-frame consistency. The model creative tools standardize on.","maxOutput":"10s @ 1080p","features":["motion brush","frame controls","image-to-video"]},{"id":"luma-dream-machine-2","name":"Luma Dream Machine 2","provider":"Luma AI","modality":"video","pricingUnit":"per_second_video","pricingAmount":0.18,"pricingNote":"$0.18 per second via Luma API. Free tier on Dream Machine app.","released":"2025-06-10","apiAvailable":true,"url":"https://lumalabs.ai/dream-machine","notes":"Strong on cinematic motion and camera control. Cheaper than Runway with comparable quality on simpler scenes.","maxOutput":"10s @ 1080p","features":["camera control","image-to-video","extend"]},{"id":"pika-2.2","name":"Pika 2.2","provider":"Pika Labs","modality":"video","pricingUnit":"per_second_video","pricingAmount":0.15,"pricingNote":"$0.15 per second via Pika API. Subscription-first product with metered overage.","released":"2025-02-04","apiAvailable":true,"url":"https://pika.art","notes":"Effects-first product (Pikaffects, Inflate, Crush). Lower fidelity than Sora/Veo but the cheapest brand-friendly option.","maxOutput":"10s @ 1080p","features":["video effects","image-to-video","lip-sync"]},{"id":"elevenlabs-v3","name":"Eleven v3 (alpha)","provider":"ElevenLabs","modality":"tts","pricingUnit":"per_1k_chars","pricingAmount":0.18,"pricingNote":"$0.18 per 1k characters on the Pro tier (Creator $0.30, Enterprise volume-discounted).","released":"2025-06-04","apiAvailable":true,"url":"https://elevenlabs.io/voice-cloning","notes":"Most expressive tier. Supports audio tags ([whisper], [laughs]). 70+ languages. The default for premium voice agents.","maxOutput":"70+ languages, expressive","features":["voice cloning","audio tags","streaming","real-time"]},{"id":"cartesia-sonic-2","name":"Cartesia Sonic 2","provider":"Cartesia","modality":"tts","pricingUnit":"per_1k_chars","pricingAmount":0.05,"pricingNote":"$0.05 per 1k chars on Cartesia API. 90% cheaper than ElevenLabs at comparable quality.","released":"2025-03-17","apiAvailable":true,"url":"https://cartesia.ai","notes":"Mamba-architecture TTS. Lowest latency in production: <90ms time-to-first-byte. The default for real-time voice agents that need sub-second response.","maxOutput":"15+ languages, ultra-low latency","features":["90ms TTFB","voice cloning","streaming"]},{"id":"openai-tts-hd","name":"OpenAI TTS-1-HD","provider":"OpenAI","modality":"tts","pricingUnit":"per_1k_chars","pricingAmount":0.03,"pricingNote":"$0.030 per 1k chars (TTS-1-HD). $0.015 per 1k chars (TTS-1). 6 voices, no cloning.","released":"2023-11-06","apiAvailable":true,"url":"https://platform.openai.com/docs/guides/text-to-speech","notes":"Cheap and reliable. No voice cloning. 6 fixed voices. Strong default for non-branded TTS where voice is not a differentiator.","maxOutput":"6 voices, ~50 languages","features":["6 fixed voices","streaming","json mode"]},{"id":"deepgram-aura-2","name":"Deepgram Aura-2","provider":"Deepgram","modality":"tts","pricingUnit":"per_1k_chars","pricingAmount":0.03,"pricingNote":"$0.030 per 1k chars. Volume discounts at $5k/mo.","released":"2025-02-11","apiAvailable":true,"url":"https://deepgram.com/product/text-to-speech","notes":"Real-time TTS with sub-200ms time-to-first-byte. Good integration with Deepgram Nova STT for full voice-agent loop.","maxOutput":"40+ voices, English-focused","features":["low latency","streaming","real-time"]},{"id":"deepgram-nova-3","name":"Deepgram Nova-3","provider":"Deepgram","modality":"stt","pricingUnit":"per_minute_audio","pricingAmount":0.0043,"pricingNote":"$0.0043 per minute streaming. $0.0058/min batch. Real-time tier $0.0077/min.","released":"2025-01-23","apiAvailable":true,"url":"https://deepgram.com/product/speech-to-text","notes":"Fastest production STT. Multilingual real-time with code-switching. Word error rate competitive with Whisper while being 10-30x faster.","maxOutput":"WER 6.84% (English), 36 languages","features":["streaming","real-time","diarization","code-switching"]},{"id":"gpt-4o-transcribe","name":"GPT-4o Transcribe","provider":"OpenAI","modality":"stt","pricingUnit":"per_minute_audio","pricingAmount":0.006,"pricingNote":"$0.006 per minute. Streaming variant available; gpt-4o-mini-transcribe at $0.003/min.","released":"2025-03-20","apiAvailable":true,"url":"https://platform.openai.com/docs/guides/speech-to-text","notes":"Replaced whisper-1 as the OpenAI flagship STT. Lower hallucination rate than Whisper especially on silence and noise.","maxOutput":"WER 6.7% (English), 100+ languages","features":["streaming","low hallucination","multilingual"]},{"id":"whisper-large-v3","name":"Whisper Large v3","provider":"OpenAI","modality":"stt","pricingUnit":"per_minute_audio","pricingAmount":0,"pricingNote":"Open weights, free to self-host. Hosted via OpenAI API at $0.006/min (whisper-1) or via Groq at $0.04/hour ($0.000667/min).","released":"2023-11-06","apiAvailable":true,"url":"https://github.com/openai/whisper","notes":"Open-weights workhorse. Groq hosting on LPU silicon is the cheapest production STT in 2026 at ~$0.0007/min. Apache-2.0 licensed.","maxOutput":"WER 7.4% (English), 99 languages","features":["open weights","apache-2.0","multilingual"]},{"id":"assemblyai-universal-2","name":"AssemblyAI Universal-2","provider":"AssemblyAI","modality":"stt","pricingUnit":"per_minute_audio","pricingAmount":0.0062,"pricingNote":"$0.0062 per minute (Universal-2 batch). $0.0094/min for streaming.","released":"2025-01-15","apiAvailable":true,"url":"https://www.assemblyai.com","notes":"Strong on long-form content (calls, podcasts) with built-in summarization, sentiment, and topic detection.","maxOutput":"WER 5.6% (English), 99 languages","features":["summarization","sentiment","topics","streaming"]},{"id":"google-chirp-2","name":"Chirp 2","provider":"Google","modality":"stt","pricingUnit":"per_minute_audio","pricingAmount":0.024,"pricingNote":"$0.024 per minute (V2 dynamic batching). Free tier of 60 min/month.","released":"2024-11-08","apiAvailable":true,"url":"https://cloud.google.com/speech-to-text/v2/docs/chirp_2-model","notes":"Google Cloud Speech-to-Text v2. Strongest multilingual coverage (125+ languages). Higher price reflects enterprise-grade latency SLAs.","maxOutput":"125+ languages","features":["multilingual","streaming","diarization","enterprise SLA"]}]}