|
71 | 71 | "DeepSeek-V3": {
|
72 | 72 | "description": "DeepSeek-V3 is a MoE model developed in-house by Deep Seek Company. Its performance surpasses that of other open-source models such as Qwen2.5-72B and Llama-3.1-405B in multiple assessments, and it stands on par with the world's top proprietary models like GPT-4o and Claude-3.5-Sonnet."
|
73 | 73 | },
|
74 |
| - "Doubao-1.5-thinking-pro-m": { |
75 |
| - "description": "Doubao-1.5 is a new deep thinking model (the m version comes with native multimodal deep reasoning capabilities) that excels in specialized fields such as mathematics, programming, scientific reasoning, and general tasks like creative writing, achieving or nearing top-tier performance in authoritative benchmarks such as AIME 2024, Codeforces, and GPQA. It supports a 128k context window and 16k output." |
76 |
| - }, |
77 |
| - "Doubao-1.5-thinking-vision-pro": { |
78 |
| - "description": "A brand new visual deep thinking model, equipped with stronger general multimodal understanding and reasoning capabilities, achieving SOTA performance in 37 out of 59 public evaluation benchmarks." |
79 |
| - }, |
80 |
| - "Doubao-1.5-vision-pro": { |
81 |
| - "description": "Doubao-1.5-vision-pro is a newly upgraded multimodal large model that supports image recognition at any resolution and extreme aspect ratios, enhancing visual reasoning, document recognition, detail comprehension, and instruction following capabilities." |
82 |
| - }, |
83 |
| - "Doubao-1.5-vision-pro-32k": { |
84 |
| - "description": "Doubao-1.5-vision-pro is a newly upgraded multimodal large model that supports image recognition at any resolution and extreme aspect ratios, enhancing visual reasoning, document recognition, detail understanding, and instruction-following capabilities." |
85 |
| - }, |
86 | 74 | "Doubao-lite-128k": {
|
87 |
| - "description": "Doubao-lite provides extreme response speed and better cost-effectiveness, offering flexible options for various customer scenarios. It supports inference and fine-tuning with a 128k context window." |
| 75 | + "description": "Doubao-lite offers ultra-fast response times and better cost-effectiveness, providing customers with more flexible options for different scenarios. Supports inference and fine-tuning with a 128k context window." |
88 | 76 | },
|
89 | 77 | "Doubao-lite-32k": {
|
90 |
| - "description": "Doubao-lite offers extreme response speed and better cost-effectiveness, providing flexible options for various customer scenarios. It supports inference and fine-tuning with a 32k context window." |
| 78 | + "description": "Doubao-lite offers ultra-fast response times and better cost-effectiveness, providing customers with more flexible options for different scenarios. Supports inference and fine-tuning with a 32k context window." |
91 | 79 | },
|
92 | 80 | "Doubao-lite-4k": {
|
93 |
| - "description": "Doubao-lite boasts extreme response speed and better cost-effectiveness, providing flexible options for various customer scenarios. It supports inference and fine-tuning with a 4k context window." |
| 81 | + "description": "Doubao-lite offers ultra-fast response times and better cost-effectiveness, providing customers with more flexible options for different scenarios. Supports inference and fine-tuning with a 4k context window." |
94 | 82 | },
|
95 | 83 | "Doubao-pro-128k": {
|
96 |
| - "description": "The best-performing primary model designed to handle complex tasks, achieving strong performance in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. It supports inference and fine-tuning with a 128k context window." |
97 |
| - }, |
98 |
| - "Doubao-pro-256k": { |
99 |
| - "description": "The best-performing flagship model, suitable for handling complex tasks, with excellent results in reference Q&A, summarization, creative writing, text classification, role-playing, and more. It supports reasoning and fine-tuning with a 256k context window." |
| 84 | + "description": "The best-performing flagship model, suitable for handling complex tasks. It excels in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. Supports inference and fine-tuning with a 128k context window." |
100 | 85 | },
|
101 | 86 | "Doubao-pro-32k": {
|
102 |
| - "description": "The best-performing primary model suited for complex tasks, showing great results in reference Q&A, summarization, creative writing, text classification, and role-playing. It supports inference and fine-tuning with a 32k context window." |
| 87 | + "description": "The best-performing flagship model, suitable for handling complex tasks. It excels in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. Supports inference and fine-tuning with a 32k context window." |
103 | 88 | },
|
104 | 89 | "Doubao-pro-4k": {
|
105 |
| - "description": "The best-performing primary model suitable for handling complex tasks, demonstrating excellent performance in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. It supports inference and fine-tuning with a 4k context window." |
106 |
| - }, |
107 |
| - "Doubao-vision-lite-32k": { |
108 |
| - "description": "The Doubao-vision model is a multimodal large model launched by Doubao, featuring powerful image understanding and reasoning capabilities, as well as precise instruction comprehension. The model has demonstrated strong performance in image-text information extraction and image-based reasoning tasks, making it applicable to more complex and broader visual question-answering tasks." |
109 |
| - }, |
110 |
| - "Doubao-vision-pro-32k": { |
111 |
| - "description": "The Doubao-vision model is a multimodal large model launched by Doubao, featuring powerful image understanding and reasoning capabilities, as well as precise instruction comprehension. The model has demonstrated strong performance in image-text information extraction and image-based reasoning tasks, making it applicable to more complex and broader visual question-answering tasks." |
| 90 | + "description": "The best-performing flagship model, suitable for handling complex tasks. It excels in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. Supports inference and fine-tuning with a 4k context window." |
112 | 91 | },
|
113 | 92 | "ERNIE-3.5-128K": {
|
114 | 93 | "description": "Baidu's self-developed flagship large-scale language model, covering a vast amount of Chinese and English corpus. It possesses strong general capabilities, meeting the requirements for most dialogue Q&A, creative generation, and plugin application scenarios; it supports automatic integration with Baidu's search plugin to ensure the timeliness of Q&A information."
|
|
914 | 893 | "doubao-1.5-thinking-pro": {
|
915 | 894 | "description": "Doubao-1.5 is a brand new deep thinking model that excels in specialized fields such as mathematics, programming, and scientific reasoning, as well as in general tasks like creative writing. It has achieved or is close to the top tier of industry standards in several authoritative benchmarks, including AIME 2024, Codeforces, and GPQA. It supports a 128k context window and 16k output."
|
916 | 895 | },
|
| 896 | + "doubao-1.5-thinking-pro-m": { |
| 897 | + "description": "Doubao-1.5 is a brand-new deep thinking model (the m version comes with native multimodal deep reasoning capabilities). It performs outstandingly in specialized fields such as mathematics, programming, scientific reasoning, as well as general tasks like creative writing. It achieves or approaches top-tier industry benchmarks on AIME 2024, Codeforces, GPQA, and more. Supports a 128k context window and 16k output." |
| 898 | + }, |
| 899 | + "doubao-1.5-thinking-vision-pro": { |
| 900 | + "description": "A new visual deep thinking model with enhanced general multimodal understanding and reasoning capabilities, achieving state-of-the-art (SOTA) results on 37 out of 59 public evaluation benchmarks." |
| 901 | + }, |
| 902 | + "doubao-1.5-ui-tars": { |
| 903 | + "description": "Doubao-1.5-UI-TARS is a native agent model designed for graphical user interface (GUI) interaction. It seamlessly interacts with GUIs through human-like abilities such as perception, reasoning, and action." |
| 904 | + }, |
917 | 905 | "doubao-1.5-vision-lite": {
|
918 | 906 | "description": "Doubao-1.5-vision-lite is a newly upgraded multimodal large model that supports image recognition at any resolution and extreme aspect ratios, enhancing visual reasoning, document recognition, detail comprehension, and instruction following capabilities. It supports a context window of 128k and an output length of up to 16k tokens."
|
919 | 907 | },
|
| 908 | + "doubao-1.5-vision-pro": { |
| 909 | + "description": "Doubao-1.5-vision-pro is a newly upgraded multimodal large model supporting image recognition at any resolution and extreme aspect ratios. It enhances visual reasoning, document recognition, detailed information understanding, and instruction compliance." |
| 910 | + }, |
| 911 | + "doubao-1.5-vision-pro-32k": { |
| 912 | + "description": "Doubao-1.5-vision-pro is a newly upgraded multimodal large model supporting image recognition at any resolution and extreme aspect ratios. It enhances visual reasoning, document recognition, detailed information understanding, and instruction compliance." |
| 913 | + }, |
| 914 | + "doubao-lite-128k": { |
| 915 | + "description": "Offers ultra-fast response times and better cost-effectiveness, providing customers with more flexible options for different scenarios. Supports inference and fine-tuning with a 128k context window." |
| 916 | + }, |
| 917 | + "doubao-lite-32k": { |
| 918 | + "description": "Offers ultra-fast response times and better cost-effectiveness, providing customers with more flexible options for different scenarios. Supports inference and fine-tuning with a 32k context window." |
| 919 | + }, |
| 920 | + "doubao-lite-4k": { |
| 921 | + "description": "Offers ultra-fast response times and better cost-effectiveness, providing customers with more flexible options for different scenarios. Supports inference and fine-tuning with a 4k context window." |
| 922 | + }, |
| 923 | + "doubao-pro-256k": { |
| 924 | + "description": "The best-performing flagship model, suitable for handling complex tasks. It excels in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. Supports inference and fine-tuning with a 256k context window." |
| 925 | + }, |
| 926 | + "doubao-pro-32k": { |
| 927 | + "description": "The best-performing flagship model, suitable for handling complex tasks. It excels in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. Supports inference and fine-tuning with a 32k context window." |
| 928 | + }, |
920 | 929 | "doubao-seed-1.6": {
|
921 | 930 | "description": "Doubao-Seed-1.6 is a brand-new multimodal deep thinking model supporting auto, thinking, and non-thinking modes. In non-thinking mode, its performance significantly surpasses Doubao-1.5-pro/250115. It supports a 256k context window and output lengths up to 16k tokens."
|
922 | 931 | },
|
|
926 | 935 | "doubao-seed-1.6-thinking": {
|
927 | 936 | "description": "Doubao-Seed-1.6-thinking features greatly enhanced thinking capabilities. Compared to Doubao-1.5-thinking-pro, it further improves foundational skills such as coding, math, and logical reasoning, and supports visual understanding. It supports a 256k context window and output lengths up to 16k tokens."
|
928 | 937 | },
|
| 938 | + "doubao-vision-lite-32k": { |
| 939 | + "description": "The Doubao-vision model is a multimodal large model launched by Doubao, featuring powerful image understanding and reasoning capabilities along with precise instruction comprehension. It demonstrates strong performance in image-text information extraction and image-based reasoning tasks, applicable to more complex and diverse visual question answering scenarios." |
| 940 | + }, |
| 941 | + "doubao-vision-pro-32k": { |
| 942 | + "description": "The Doubao-vision model is a multimodal large model launched by Doubao, featuring powerful image understanding and reasoning capabilities along with precise instruction comprehension. It demonstrates strong performance in image-text information extraction and image-based reasoning tasks, applicable to more complex and diverse visual question answering scenarios." |
| 943 | + }, |
929 | 944 | "emohaa": {
|
930 | 945 | "description": "Emohaa is a psychological model with professional counseling capabilities, helping users understand emotional issues."
|
931 | 946 | },
|
|
1313 | 1328 | "gpt-4o-mini-search-preview": {
|
1314 | 1329 | "description": "GPT-4o mini Search Preview is a model specifically trained to understand and execute web search queries, using the Chat Completions API. In addition to token fees, web search queries incur charges per tool invocation."
|
1315 | 1330 | },
|
| 1331 | + "gpt-4o-mini-transcribe": { |
| 1332 | + "description": "GPT-4o Mini Transcribe is a speech-to-text model that uses GPT-4o to transcribe audio. Compared to the original Whisper model, it improves word error rate, language recognition, and accuracy. Use it for more precise transcriptions." |
| 1333 | + }, |
1316 | 1334 | "gpt-4o-mini-tts": {
|
1317 | 1335 | "description": "GPT-4o mini TTS is a text-to-speech model based on GPT-4o mini, providing high-quality speech generation at a lower cost."
|
1318 | 1336 | },
|
|
1322 | 1340 | "gpt-4o-realtime-preview-2024-10-01": {
|
1323 | 1341 | "description": "GPT-4o real-time version, supporting real-time audio and text input and output."
|
1324 | 1342 | },
|
1325 |
| - "gpt-4o-realtime-preview-2024-12-17": { |
1326 |
| - "description": "GPT-4o real-time version, supporting real-time audio and text input and output." |
| 1343 | + "gpt-4o-realtime-preview-2025-06-03": { |
| 1344 | + "description": "GPT-4o real-time version supporting real-time audio and text input and output." |
1327 | 1345 | },
|
1328 | 1346 | "gpt-4o-search-preview": {
|
1329 | 1347 | "description": "GPT-4o Search Preview is a model specifically trained to understand and execute web search queries, using the Chat Completions API. In addition to token fees, web search queries incur charges per tool invocation."
|
1330 | 1348 | },
|
| 1349 | + "gpt-4o-transcribe": { |
| 1350 | + "description": "GPT-4o Transcribe is a speech-to-text model that uses GPT-4o to transcribe audio. Compared to the original Whisper model, it improves word error rate, language recognition, and accuracy. Use it for more precise transcriptions." |
| 1351 | + }, |
1331 | 1352 | "grok-2-1212": {
|
1332 | 1353 | "description": "This model has improved in accuracy, instruction adherence, and multilingual capabilities."
|
1333 | 1354 | },
|
|
1460 | 1481 | "jina-deepsearch-v1": {
|
1461 | 1482 | "description": "DeepSearch combines web search, reading, and reasoning for comprehensive investigations. You can think of it as an agent that takes on your research tasks—it conducts extensive searches and iterates multiple times before providing answers. This process involves ongoing research, reasoning, and problem-solving from various angles. This fundamentally differs from standard large models that generate answers directly from pre-trained data and traditional RAG systems that rely on one-time surface searches."
|
1462 | 1483 | },
|
| 1484 | + "kimi-k2-0711-preview": { |
| 1485 | + "description": "kimi-k2 is a MoE architecture base model with powerful coding and agent capabilities, totaling 1 trillion parameters with 32 billion active parameters. In benchmark tests across general knowledge reasoning, programming, mathematics, and agent tasks, the K2 model outperforms other mainstream open-source models." |
| 1486 | + }, |
1463 | 1487 | "kimi-latest": {
|
1464 | 1488 | "description": "The Kimi Smart Assistant product uses the latest Kimi large model, which may include features that are not yet stable. It supports image understanding and will automatically select the 8k/32k/128k model as the billing model based on the length of the request context."
|
1465 | 1489 | },
|
|
2352 | 2376 | "description": "The v0-1.5-md model is suitable for everyday tasks and user interface (UI) generation."
|
2353 | 2377 | },
|
2354 | 2378 | "whisper-1": {
|
2355 |
| - "description": "A universal speech recognition model that supports multilingual speech recognition, speech translation, and language identification." |
| 2379 | + "description": "A general-purpose speech recognition model supporting multilingual speech recognition, speech translation, and language identification." |
2356 | 2380 | },
|
2357 | 2381 | "wizardlm2": {
|
2358 | 2382 | "description": "WizardLM 2 is a language model provided by Microsoft AI, excelling in complex dialogues, multilingual capabilities, reasoning, and intelligent assistant applications."
|
|
0 commit comments