{"data":[{"id":"deepseek/deepseek-v3-2","name":"DeepSeek V3.2","canonical_slug":"deepseek/deepseek-v3-2","description":"DeepSeek V3.2 — frontier open-source general-purpose model.","created":1775000000,"context_length":163840,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek"},"pricing":{"prompt":"0.0000004000","completion":"0.0000006000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":163840,"completion_tokens":16384},"top_provider":{"context_length":163840,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/deepseek-v3-2"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"deepseek-ai/DeepSeek-V3.2","powered_by":null,"hosting":"Azure AI Foundry"},{"id":"deepseek-v3-2","name":"DeepSeek V3.2","canonical_slug":"deepseek/deepseek-v3-2","description":"DeepSeek V3.2 — frontier open-source general-purpose model.","created":1775000000,"context_length":163840,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek"},"pricing":{"prompt":"0.0000004000","completion":"0.0000006000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":163840,"completion_tokens":16384},"top_provider":{"context_length":163840,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/deepseek-v3-2"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"deepseek-ai/DeepSeek-V3.2","powered_by":null,"hosting":"Azure AI Foundry"},{"id":"deepseek/deepseek-r1","name":"DeepSeek R1","canonical_slug":"deepseek/deepseek-r1","description":"DeepSeek R1 — o1-class reasoning model with transparent chain-of-thought.","created":1775000000,"context_length":163840,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek"},"pricing":{"prompt":"0.0000005000","completion":"0.0000020000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":163840,"completion_tokens":32768},"top_provider":{"context_length":163840,"max_completion_tokens":32768,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/deepseek-r1"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"deepseek-ai/DeepSeek-R1","powered_by":null,"hosting":"Azure AI Foundry"},{"id":"deepseek-r1","name":"DeepSeek R1","canonical_slug":"deepseek/deepseek-r1","description":"DeepSeek R1 — o1-class reasoning model with transparent chain-of-thought.","created":1775000000,"context_length":163840,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek"},"pricing":{"prompt":"0.0000005000","completion":"0.0000020000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":163840,"completion_tokens":32768},"top_provider":{"context_length":163840,"max_completion_tokens":32768,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/deepseek-r1"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"deepseek-ai/DeepSeek-R1","powered_by":null,"hosting":"Azure AI Foundry"},{"id":"meta/llama-3.3-70b","name":"Llama 3.3 70B Instruct","canonical_slug":"meta/llama-3.3-70b","description":"Meta Llama 3.3 70B Instruct — strong general-purpose with tool use.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":"0.0000003000","completion":"0.0000004000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":8192},"top_provider":{"context_length":131072,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/llama-3.3-70b"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"meta-llama/Llama-3.3-70B-Instruct","powered_by":null,"hosting":"Azure AI Foundry"},{"id":"llama-3.3-70b","name":"Llama 3.3 70B Instruct","canonical_slug":"meta/llama-3.3-70b","description":"Meta Llama 3.3 70B Instruct — strong general-purpose with tool use.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":"0.0000003000","completion":"0.0000004000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":8192},"top_provider":{"context_length":131072,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/llama-3.3-70b"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"meta-llama/Llama-3.3-70B-Instruct","powered_by":null,"hosting":"Azure AI Foundry"},{"id":"meta/llama-3.1-8b","name":"Llama 3.1 8B Instruct","canonical_slug":"meta/llama-3.1-8b","description":"Meta Llama 3.1 8B Instruct — fast, capable, cheap.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":"0.0000000500","completion":"0.0000000800","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":4096},"top_provider":{"context_length":131072,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/llama-3.1-8b"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"meta-llama/Llama-3.1-8B-Instruct","powered_by":null,"hosting":"Azure AI Foundry"},{"id":"llama-3.1-8b","name":"Llama 3.1 8B Instruct","canonical_slug":"meta/llama-3.1-8b","description":"Meta Llama 3.1 8B Instruct — fast, capable, cheap.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":"0.0000000500","completion":"0.0000000800","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":4096},"top_provider":{"context_length":131072,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/llama-3.1-8b"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"meta-llama/Llama-3.1-8B-Instruct","powered_by":null,"hosting":"Azure AI Foundry"},{"id":"mistralai/mistral-large-3","name":"Mistral Large 3","canonical_slug":"mistralai/mistral-large-3","description":"Mistral Large 3 — flagship European model, instruction following.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":"0.0000004000","completion":"0.0000012000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":8192},"top_provider":{"context_length":131072,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/mistral-large-3"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"mistralai/Mistral-Large-3","powered_by":null,"hosting":"Azure AI Foundry"},{"id":"mistral-large-3","name":"Mistral Large 3","canonical_slug":"mistralai/mistral-large-3","description":"Mistral Large 3 — flagship European model, instruction following.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":"0.0000004000","completion":"0.0000012000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":8192},"top_provider":{"context_length":131072,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/mistral-large-3"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"mistralai/Mistral-Large-3","powered_by":null,"hosting":"Azure AI Foundry"},{"id":"openai/gpt-oss-120b","name":"GPT-OSS 120B","canonical_slug":"openai/gpt-oss-120b","description":"Microsoft GPT-OSS 120B — open-weight reasoning model.","created":1775000000,"context_length":128000,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":"none"},"pricing":{"prompt":"0.0000001500","completion":"0.0000006000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":128000,"completion_tokens":16384},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-oss-120b"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure AI Foundry"},{"id":"gpt-oss-120b","name":"GPT-OSS 120B","canonical_slug":"openai/gpt-oss-120b","description":"Microsoft GPT-OSS 120B — open-weight reasoning model.","created":1775000000,"context_length":128000,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":"none"},"pricing":{"prompt":"0.0000001500","completion":"0.0000006000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":128000,"completion_tokens":16384},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-oss-120b"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure AI Foundry"},{"id":"deepseek/deepseek-v3-1","name":"DeepSeek V3.1","canonical_slug":"deepseek/deepseek-v3-1","description":"DeepSeek V3.1 — adds tool calling on top of the V3 family.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek"},"pricing":{"prompt":"0.0000002700","completion":"0.0000011000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":16384},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/deepseek-v3-1"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"deepseek-ai/DeepSeek-V3.1","powered_by":null,"hosting":"Azure AI Foundry"},{"id":"deepseek-v3-1","name":"DeepSeek V3.1","canonical_slug":"deepseek/deepseek-v3-1","description":"DeepSeek V3.1 — adds tool calling on top of the V3 family.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek"},"pricing":{"prompt":"0.0000002700","completion":"0.0000011000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":16384},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/deepseek-v3-1"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"deepseek-ai/DeepSeek-V3.1","powered_by":null,"hosting":"Azure AI Foundry"},{"id":"meta/llama-4-maverick","name":"Llama 4 Maverick 17B","canonical_slug":"meta/llama-4-maverick","description":"Meta Llama 4 Maverick — first Llama 4 generation, 1M context, 12 languages, image+text.","created":1775000000,"context_length":1000000,"architecture":{"input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Llama4","instruct_type":"llama4"},"pricing":{"prompt":"0.0000005000","completion":"0.0000015000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":1000000,"completion_tokens":8192},"top_provider":{"context_length":1000000,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/llama-4-maverick"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"meta-llama/Llama-4-Maverick-17B-128E-Instruct","powered_by":null,"hosting":"Azure AI Foundry"},{"id":"llama-4-maverick","name":"Llama 4 Maverick 17B","canonical_slug":"meta/llama-4-maverick","description":"Meta Llama 4 Maverick — first Llama 4 generation, 1M context, 12 languages, image+text.","created":1775000000,"context_length":1000000,"architecture":{"input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Llama4","instruct_type":"llama4"},"pricing":{"prompt":"0.0000005000","completion":"0.0000015000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":1000000,"completion_tokens":8192},"top_provider":{"context_length":1000000,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/llama-4-maverick"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"meta-llama/Llama-4-Maverick-17B-128E-Instruct","powered_by":null,"hosting":"Azure AI Foundry"},{"id":"cohere/cohere-command-a","name":"Cohere Command-A","canonical_slug":"cohere/cohere-command-a","description":"Cohere Command-A — multilingual (10 languages), strong RAG and tool-calling.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Cohere","instruct_type":"command"},"pricing":{"prompt":"0.0000025000","completion":"0.0000100000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":8192},"top_provider":{"context_length":131072,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/cohere-command-a"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure AI Foundry"},{"id":"cohere-command-a","name":"Cohere Command-A","canonical_slug":"cohere/cohere-command-a","description":"Cohere Command-A — multilingual (10 languages), strong RAG and tool-calling.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Cohere","instruct_type":"command"},"pricing":{"prompt":"0.0000025000","completion":"0.0000100000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":8192},"top_provider":{"context_length":131072,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/cohere-command-a"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure AI Foundry"},{"id":"mistralai/mistral-medium","name":"Mistral Medium 2505","canonical_slug":"mistralai/mistral-medium","description":"Mistral Medium 2505 — mid-tier between Nemo and Large 3, image+text.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":"0.0000004000","completion":"0.0000020000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":8192},"top_provider":{"context_length":131072,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/mistral-medium"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure AI Foundry"},{"id":"mistral-medium","name":"Mistral Medium 2505","canonical_slug":"mistralai/mistral-medium","description":"Mistral Medium 2505 — mid-tier between Nemo and Large 3, image+text.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":"0.0000004000","completion":"0.0000020000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":8192},"top_provider":{"context_length":131072,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/mistral-medium"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure AI Foundry"},{"id":"x-ai/grok-4-1-fast","name":"xAI Grok 4.1 Fast","canonical_slug":"x-ai/grok-4-1-fast","description":"xAI Grok 4.1 Fast (reasoning) — fast Grok variant with reasoning + tool use.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":"none"},"pricing":{"prompt":"0.0000002000","completion":"0.0000005000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":16384},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/grok-4-1-fast"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure AI Foundry"},{"id":"grok-4-1-fast","name":"xAI Grok 4.1 Fast","canonical_slug":"x-ai/grok-4-1-fast","description":"xAI Grok 4.1 Fast (reasoning) — fast Grok variant with reasoning + tool use.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":"none"},"pricing":{"prompt":"0.0000002000","completion":"0.0000005000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":16384},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/grok-4-1-fast"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure AI Foundry"},{"id":"microsoft/phi-4-mini-reasoning","name":"Phi-4 Mini Reasoning","canonical_slug":"microsoft/phi-4-mini-reasoning","description":"Microsoft Phi-4 Mini — small, cheap reasoning model with thinking traces.","created":1775000000,"context_length":128000,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":"none"},"pricing":{"prompt":"0.0000001000","completion":"0.0000003000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":128000,"completion_tokens":8192},"top_provider":{"context_length":128000,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/phi-4-mini-reasoning"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"microsoft/Phi-4-mini-reasoning","powered_by":null,"hosting":"Azure AI Foundry"},{"id":"phi-4-mini-reasoning","name":"Phi-4 Mini Reasoning","canonical_slug":"microsoft/phi-4-mini-reasoning","description":"Microsoft Phi-4 Mini — small, cheap reasoning model with thinking traces.","created":1775000000,"context_length":128000,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":"none"},"pricing":{"prompt":"0.0000001000","completion":"0.0000003000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":128000,"completion_tokens":8192},"top_provider":{"context_length":128000,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/phi-4-mini-reasoning"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"microsoft/Phi-4-mini-reasoning","powered_by":null,"hosting":"Azure AI Foundry"},{"id":"openai/gpt-5","name":"GPT-5","canonical_slug":"openai/gpt-5","description":"OpenAI GPT-5 — flagship proprietary model.","created":1775000000,"context_length":400000,"architecture":{"input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT-5","instruct_type":"none"},"pricing":{"prompt":"0.0000025000","completion":"0.0000100000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":400000,"completion_tokens":16384},"top_provider":{"context_length":400000,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-5"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"gpt-5","name":"GPT-5","canonical_slug":"openai/gpt-5","description":"OpenAI GPT-5 — flagship proprietary model.","created":1775000000,"context_length":400000,"architecture":{"input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT-5","instruct_type":"none"},"pricing":{"prompt":"0.0000025000","completion":"0.0000100000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":400000,"completion_tokens":16384},"top_provider":{"context_length":400000,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-5"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"openai/gpt-5.2","name":"GPT-5.2","canonical_slug":"openai/gpt-5.2","description":"OpenAI GPT-5.2 — improved reasoning and speed.","created":1775000000,"context_length":400000,"architecture":{"input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT-5","instruct_type":"none"},"pricing":{"prompt":"0.0000025000","completion":"0.0000100000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":400000,"completion_tokens":16384},"top_provider":{"context_length":400000,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-5.2"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"gpt-5.2","name":"GPT-5.2","canonical_slug":"openai/gpt-5.2","description":"OpenAI GPT-5.2 — improved reasoning and speed.","created":1775000000,"context_length":400000,"architecture":{"input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT-5","instruct_type":"none"},"pricing":{"prompt":"0.0000025000","completion":"0.0000100000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":400000,"completion_tokens":16384},"top_provider":{"context_length":400000,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-5.2"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"openai/gpt-5.5","name":"GPT-5.5","canonical_slug":"openai/gpt-5.5","description":"OpenAI GPT-5.5 — flagship, 1.05M context, image+text reasoning.","created":1775000000,"context_length":1050000,"architecture":{"input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT-5","instruct_type":"none"},"pricing":{"prompt":"0.0000025000","completion":"0.0000150000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":1050000,"completion_tokens":128000},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-5.5"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"gpt-5.5","name":"GPT-5.5","canonical_slug":"openai/gpt-5.5","description":"OpenAI GPT-5.5 — flagship, 1.05M context, image+text reasoning.","created":1775000000,"context_length":1050000,"architecture":{"input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT-5","instruct_type":"none"},"pricing":{"prompt":"0.0000025000","completion":"0.0000150000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":1050000,"completion_tokens":128000},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-5.5"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"openai/gpt-5.4","name":"GPT-5.4","canonical_slug":"openai/gpt-5.4","description":"OpenAI GPT-5.4 — production workhorse, 1.05M context, strong agentic + long-doc.","created":1775000000,"context_length":1050000,"architecture":{"input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT-5","instruct_type":"none"},"pricing":{"prompt":"0.0000025000","completion":"0.0000150000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":1050000,"completion_tokens":128000},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-5.4"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"gpt-5.4","name":"GPT-5.4","canonical_slug":"openai/gpt-5.4","description":"OpenAI GPT-5.4 — production workhorse, 1.05M context, strong agentic + long-doc.","created":1775000000,"context_length":1050000,"architecture":{"input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT-5","instruct_type":"none"},"pricing":{"prompt":"0.0000025000","completion":"0.0000150000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":1050000,"completion_tokens":128000},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-5.4"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"openai/gpt-5.4-mini","name":"GPT-5.4 Mini","canonical_slug":"openai/gpt-5.4-mini","description":"OpenAI GPT-5.4 Mini — drop-in mid-tier, 400K context, reasoning.","created":1775000000,"context_length":400000,"architecture":{"input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT-5","instruct_type":"none"},"pricing":{"prompt":"0.0000002500","completion":"0.0000020000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":400000,"completion_tokens":128000},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-5.4-mini"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"gpt-5.4-mini","name":"GPT-5.4 Mini","canonical_slug":"openai/gpt-5.4-mini","description":"OpenAI GPT-5.4 Mini — drop-in mid-tier, 400K context, reasoning.","created":1775000000,"context_length":400000,"architecture":{"input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT-5","instruct_type":"none"},"pricing":{"prompt":"0.0000002500","completion":"0.0000020000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":400000,"completion_tokens":128000},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed","frequency_penalty","presence_penalty"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-5.4-mini"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"moonshotai/kimi-k2.5","name":"Kimi K2.5","canonical_slug":"moonshotai/kimi-k2.5","description":"Moonshot AI Kimi K2.5 — strong reasoning model with thinking traces.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":"none"},"pricing":{"prompt":"0.0000006000","completion":"0.0000024000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":16384},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/kimi-k2.5"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure AI Foundry"},{"id":"kimi-k2.5","name":"Kimi K2.5","canonical_slug":"moonshotai/kimi-k2.5","description":"Moonshot AI Kimi K2.5 — strong reasoning model with thinking traces.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":"none"},"pricing":{"prompt":"0.0000006000","completion":"0.0000024000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":16384},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/kimi-k2.5"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure AI Foundry"},{"id":"moonshotai/kimi-k2.6","name":"Kimi K2.6","canonical_slug":"moonshotai/kimi-k2.6","description":"Moonshot AI Kimi K2.6 — latest frontier reasoning model with thinking traces.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":"none"},"pricing":{"prompt":"0.0000006000","completion":"0.0000024000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":16384},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/kimi-k2.6"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure AI Foundry"},{"id":"kimi-k2.6","name":"Kimi K2.6","canonical_slug":"moonshotai/kimi-k2.6","description":"Moonshot AI Kimi K2.6 — latest frontier reasoning model with thinking traces.","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":"none"},"pricing":{"prompt":"0.0000006000","completion":"0.0000024000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":16384},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/kimi-k2.6"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure AI Foundry"},{"id":"openai/gpt-image-2","name":"GPT-Image-2","canonical_slug":"openai/gpt-image-2","description":"OpenAI GPT-Image-2 — flagship image generation model, photoreal + editorial.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["text"],"output_modalities":["image"],"tokenizer":"GPT-5","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["n","size","quality","style","response_format"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-image-2"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"gpt-image-2","name":"GPT-Image-2","canonical_slug":"openai/gpt-image-2","description":"OpenAI GPT-Image-2 — flagship image generation model, photoreal + editorial.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["text"],"output_modalities":["image"],"tokenizer":"GPT-5","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["n","size","quality","style","response_format"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-image-2"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"openai/gpt-image-1-mini","name":"GPT-Image-1 Mini","canonical_slug":"openai/gpt-image-1-mini","description":"OpenAI GPT-Image-1 Mini — cheap image generation, good for thumbnails and batch.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["text"],"output_modalities":["image"],"tokenizer":"Other","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["n","size","quality","response_format"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-image-1-mini"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"gpt-image-1-mini","name":"GPT-Image-1 Mini","canonical_slug":"openai/gpt-image-1-mini","description":"OpenAI GPT-Image-1 Mini — cheap image generation, good for thumbnails and batch.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["text"],"output_modalities":["image"],"tokenizer":"Other","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["n","size","quality","response_format"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-image-1-mini"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"black-forest-labs/flux-2-pro","name":"FLUX.2 Pro","canonical_slug":"black-forest-labs/flux-2-pro","description":"Black Forest Labs FLUX.2 Pro — open frontier image gen, strong typography + composition.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["text"],"output_modalities":["image"],"tokenizer":"Other","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["n","size"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/flux-2-pro"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure AI Foundry"},{"id":"flux-2-pro","name":"FLUX.2 Pro","canonical_slug":"black-forest-labs/flux-2-pro","description":"Black Forest Labs FLUX.2 Pro — open frontier image gen, strong typography + composition.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["text"],"output_modalities":["image"],"tokenizer":"Other","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["n","size"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/flux-2-pro"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure AI Foundry"},{"id":"openai/whisper-large-v3-turbo","name":"Whisper Large V3 Turbo","canonical_slug":"openai/whisper-large-v3-turbo","description":"OpenAI Whisper Large V3 Turbo — V3 distilled to ~6× faster decoding with near-identical WER. Self-hosted on our own GPU fleet.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["audio"],"output_modalities":["text"],"tokenizer":"Whisper","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["language","prompt","response_format","temperature"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/whisper-large-v3-turbo"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"openai/whisper-large-v3-turbo","powered_by":null,"hosting":null},{"id":"whisper-large-v3-turbo","name":"Whisper Large V3 Turbo","canonical_slug":"openai/whisper-large-v3-turbo","description":"OpenAI Whisper Large V3 Turbo — V3 distilled to ~6× faster decoding with near-identical WER. Self-hosted on our own GPU fleet.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["audio"],"output_modalities":["text"],"tokenizer":"Whisper","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["language","prompt","response_format","temperature"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/whisper-large-v3-turbo"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"openai/whisper-large-v3-turbo","powered_by":null,"hosting":null},{"id":"openai/whisper-large-v3-turbo-stream","name":"Whisper Large V3 Turbo (streaming)","canonical_slug":"openai/whisper-large-v3-turbo-stream","description":"Real-time streaming ASR over WebSocket. Deepgram-shape protocol at /v1/listen. Same Whisper Turbo model, VAD-segmented, partial + final transcripts. Self-hosted on our own GPU fleet, EU-resident.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["audio"],"output_modalities":["text"],"tokenizer":"Whisper","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["language","encoding","sample_rate","interim_results","endpointing"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/whisper-large-v3-turbo-stream"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"openai/whisper-large-v3-turbo","powered_by":null,"hosting":null},{"id":"whisper-large-v3-turbo-stream","name":"Whisper Large V3 Turbo (streaming)","canonical_slug":"openai/whisper-large-v3-turbo-stream","description":"Real-time streaming ASR over WebSocket. Deepgram-shape protocol at /v1/listen. Same Whisper Turbo model, VAD-segmented, partial + final transcripts. Self-hosted on our own GPU fleet, EU-resident.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["audio"],"output_modalities":["text"],"tokenizer":"Whisper","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["language","encoding","sample_rate","interim_results","endpointing"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/whisper-large-v3-turbo-stream"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"openai/whisper-large-v3-turbo","powered_by":null,"hosting":null},{"id":"nvidia/parakeet-tdt-0-6b-stream","name":"Parakeet TDT 0.6B v2 (streaming)","canonical_slug":"nvidia/parakeet-tdt-0-6b-stream","description":"Real-time streaming ASR over WebSocket. Deepgram-shape protocol at /v1/listen. NVIDIA NeMo Parakeet-TDT-0.6B v2 — English-only, SOTA Open-ASR-Leaderboard WER, VAD-segmented chunked decode. Self-hosted on our own GPU fleet, EU-resident.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["audio"],"output_modalities":["text"],"tokenizer":"SentencePiece","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["language","encoding","sample_rate","interim_results","endpointing"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/parakeet-tdt-0-6b-stream"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"nvidia/parakeet-tdt-0.6b-v2","powered_by":null,"hosting":null},{"id":"parakeet-tdt-0-6b-stream","name":"Parakeet TDT 0.6B v2 (streaming)","canonical_slug":"nvidia/parakeet-tdt-0-6b-stream","description":"Real-time streaming ASR over WebSocket. Deepgram-shape protocol at /v1/listen. NVIDIA NeMo Parakeet-TDT-0.6B v2 — English-only, SOTA Open-ASR-Leaderboard WER, VAD-segmented chunked decode. Self-hosted on our own GPU fleet, EU-resident.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["audio"],"output_modalities":["text"],"tokenizer":"SentencePiece","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["language","encoding","sample_rate","interim_results","endpointing"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/parakeet-tdt-0-6b-stream"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"nvidia/parakeet-tdt-0.6b-v2","powered_by":null,"hosting":null},{"id":"duguetlabs/piper-tts","name":"Piper TTS (multilingual)","canonical_slug":"duguetlabs/piper-tts","description":"Neural text-to-speech in 10 languages (EN, DE, FR, ES, NL, SV, DA, IT, FI, RU). Streams PCM16 audio; powers the speech output of /v1/realtime. Self-hosted, EU-resident.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["text"],"output_modalities":["audio"],"tokenizer":"none","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["language","voice","sample_rate"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/piper-tts"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"rhasspy/piper-voices","powered_by":null,"hosting":null},{"id":"piper-tts","name":"Piper TTS (multilingual)","canonical_slug":"duguetlabs/piper-tts","description":"Neural text-to-speech in 10 languages (EN, DE, FR, ES, NL, SV, DA, IT, FI, RU). Streams PCM16 audio; powers the speech output of /v1/realtime. Self-hosted, EU-resident.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["text"],"output_modalities":["audio"],"tokenizer":"none","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["language","voice","sample_rate"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/piper-tts"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"rhasspy/piper-voices","powered_by":null,"hosting":null},{"id":"duguetlabs/kataleptic-realtime","name":"Kataleptic Realtime (speech-to-speech)","canonical_slug":"duguetlabs/kataleptic-realtime","description":"OpenAI Realtime API-compatible voice agent endpoint: WebSocket at /v1/realtime. Cascade of streaming Whisper STT, any Kataleptic chat model (pick via ?model=), and Piper TTS — first reply audio in roughly 250 ms after turn detection. Server-side VAD with barge-in, per-utterance language auto-detection across 10 languages, transcript events both directions (incl. detected language). Billed per audio minute (STT in + TTS out) plus chat tokens at the selected model's rate. Self-hosted, EU-resident. Docs: kataleptic.com/docs/realtime/","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["audio","text"],"output_modalities":["audio","text"],"tokenizer":"none","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":4096},"top_provider":{"context_length":131072,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["model","instructions","voice","turn_detection","temperature","max_output_tokens"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/kataleptic-realtime"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":null},{"id":"kataleptic-realtime","name":"Kataleptic Realtime (speech-to-speech)","canonical_slug":"duguetlabs/kataleptic-realtime","description":"OpenAI Realtime API-compatible voice agent endpoint: WebSocket at /v1/realtime. Cascade of streaming Whisper STT, any Kataleptic chat model (pick via ?model=), and Piper TTS — first reply audio in roughly 250 ms after turn detection. Server-side VAD with barge-in, per-utterance language auto-detection across 10 languages, transcript events both directions (incl. detected language). Billed per audio minute (STT in + TTS out) plus chat tokens at the selected model's rate. Self-hosted, EU-resident. Docs: kataleptic.com/docs/realtime/","created":1775000000,"context_length":131072,"architecture":{"input_modalities":["audio","text"],"output_modalities":["audio","text"],"tokenizer":"none","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":131072,"completion_tokens":4096},"top_provider":{"context_length":131072,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["model","instructions","voice","turn_detection","temperature","max_output_tokens"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/kataleptic-realtime"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":null},{"id":"duguetlabs/kataleptic-realtime-hd","name":"Kataleptic Realtime HD (Azure Voice Live)","canonical_slug":"duguetlabs/kataleptic-realtime-hd","description":"Premium tier of /v1/realtime: same OpenAI Realtime-compatible WebSocket, served by Azure Voice Live in Sweden Central — studio-grade HD neural voices (600+), deep noise suppression and echo cancellation, semantic turn detection. Accepts G.711 μ-law/A-law for telephony alongside PCM16. EU-resident processing, exact transcripts. Higher latency than the standard cascade (~1.2 s to first audio vs ~0.25 s). Billed per token at Azure rates with a service margin (≈$0.03/min typical). Docs: kataleptic.com/docs/realtime/","created":1775000000,"context_length":128000,"architecture":{"input_modalities":["audio","text"],"output_modalities":["audio","text"],"tokenizer":"none","instruct_type":"none"},"pricing":{"prompt":"0.0000009900","completion":"0.0000039600","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":128000,"completion_tokens":4096},"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["instructions","voice","turn_detection","temperature","max_output_tokens"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/kataleptic-realtime-hd"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":null},{"id":"kataleptic-realtime-hd","name":"Kataleptic Realtime HD (Azure Voice Live)","canonical_slug":"duguetlabs/kataleptic-realtime-hd","description":"Premium tier of /v1/realtime: same OpenAI Realtime-compatible WebSocket, served by Azure Voice Live in Sweden Central — studio-grade HD neural voices (600+), deep noise suppression and echo cancellation, semantic turn detection. Accepts G.711 μ-law/A-law for telephony alongside PCM16. EU-resident processing, exact transcripts. Higher latency than the standard cascade (~1.2 s to first audio vs ~0.25 s). Billed per token at Azure rates with a service margin (≈$0.03/min typical). Docs: kataleptic.com/docs/realtime/","created":1775000000,"context_length":128000,"architecture":{"input_modalities":["audio","text"],"output_modalities":["audio","text"],"tokenizer":"none","instruct_type":"none"},"pricing":{"prompt":"0.0000009900","completion":"0.0000039600","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":128000,"completion_tokens":4096},"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["instructions","voice","turn_detection","temperature","max_output_tokens"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/kataleptic-realtime-hd"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":null},{"id":"openai/gpt-realtime-2","name":"GPT Realtime 2 (native speech-to-speech)","canonical_slug":"openai/gpt-realtime-2","description":"Native speech-to-speech tier of /v1/realtime: OpenAI gpt-realtime-2 served from Azure AI Foundry behind the same WebSocket protocol. Best-in-class prosody and expressiveness — the model hears tone, not just words. Accepts G.711 μ-law/A-law for telephony alongside PCM16. Notes: Global inference routing (not EU-pinned); transcripts are model approximations, and caller transcripts require enabling input_audio_transcription in session.update. For EU residency or exact call logs use the standard or HD tiers. Billed per text + audio token (≈$0.07/min typical). Docs: kataleptic.com/docs/realtime/","created":1775000000,"context_length":32000,"architecture":{"input_modalities":["audio","text"],"output_modalities":["audio","text"],"tokenizer":"GPT","instruct_type":"none"},"pricing":{"prompt":"0.0000050000","completion":"0.0000300000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":32000,"completion_tokens":4096},"top_provider":{"context_length":32000,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["instructions","voice","turn_detection","temperature","max_output_tokens"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-realtime-2"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":null},{"id":"gpt-realtime-2","name":"GPT Realtime 2 (native speech-to-speech)","canonical_slug":"openai/gpt-realtime-2","description":"Native speech-to-speech tier of /v1/realtime: OpenAI gpt-realtime-2 served from Azure AI Foundry behind the same WebSocket protocol. Best-in-class prosody and expressiveness — the model hears tone, not just words. Accepts G.711 μ-law/A-law for telephony alongside PCM16. Notes: Global inference routing (not EU-pinned); transcripts are model approximations, and caller transcripts require enabling input_audio_transcription in session.update. For EU residency or exact call logs use the standard or HD tiers. Billed per text + audio token (≈$0.07/min typical). Docs: kataleptic.com/docs/realtime/","created":1775000000,"context_length":32000,"architecture":{"input_modalities":["audio","text"],"output_modalities":["audio","text"],"tokenizer":"GPT","instruct_type":"none"},"pricing":{"prompt":"0.0000050000","completion":"0.0000300000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":32000,"completion_tokens":4096},"top_provider":{"context_length":32000,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["instructions","voice","turn_detection","temperature","max_output_tokens"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-realtime-2"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":null},{"id":"openai/gpt-4o-transcribe","name":"GPT-4o Transcribe","canonical_slug":"openai/gpt-4o-transcribe","description":"OpenAI GPT-4o Transcribe — higher-accuracy ASR with semantic context.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["audio"],"output_modalities":["text"],"tokenizer":"GPT-4o","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["language","prompt","response_format","temperature"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-4o-transcribe"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"gpt-4o-transcribe","name":"GPT-4o Transcribe","canonical_slug":"openai/gpt-4o-transcribe","description":"OpenAI GPT-4o Transcribe — higher-accuracy ASR with semantic context.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["audio"],"output_modalities":["text"],"tokenizer":"GPT-4o","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["language","prompt","response_format","temperature"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-4o-transcribe"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"openai/gpt-4o-transcribe-diarize","name":"GPT-4o Transcribe (diarized)","canonical_slug":"openai/gpt-4o-transcribe-diarize","description":"OpenAI GPT-4o Transcribe with speaker diarization — multi-speaker meetings.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["audio"],"output_modalities":["text"],"tokenizer":"GPT-4o","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["language","prompt","response_format","temperature"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-4o-transcribe-diarize"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"gpt-4o-transcribe-diarize","name":"GPT-4o Transcribe (diarized)","canonical_slug":"openai/gpt-4o-transcribe-diarize","description":"OpenAI GPT-4o Transcribe with speaker diarization — multi-speaker meetings.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["audio"],"output_modalities":["text"],"tokenizer":"GPT-4o","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["language","prompt","response_format","temperature"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gpt-4o-transcribe-diarize"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"openai/sora-2","name":"Sora 2","canonical_slug":"openai/sora-2","description":"OpenAI Sora 2 — text-to-video. Async: POST /v1/videos, poll GET /v1/videos/{id} until status=completed, then GET /v1/videos/{id}/content for the MP4. Sizes: 720x1280, 1280x720, 1024x1792, 1792x1024.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["text"],"output_modalities":["video"],"tokenizer":"Other","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["prompt","size","seconds","model"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/sora-2"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"sora-2","name":"Sora 2","canonical_slug":"openai/sora-2","description":"OpenAI Sora 2 — text-to-video. Async: POST /v1/videos, poll GET /v1/videos/{id} until status=completed, then GET /v1/videos/{id}/content for the MP4. Sizes: 720x1280, 1280x720, 1024x1792, 1792x1024.","created":1775000000,"context_length":0,"architecture":{"input_modalities":["text"],"output_modalities":["video"],"tokenizer":"Other","instruct_type":"none"},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":0,"completion_tokens":0},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["prompt","size","seconds","model"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/sora-2"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":null,"powered_by":null,"hosting":"Azure OpenAI"},{"id":"qwen/qwen3-8b","name":"Qwen 3 8B","canonical_slug":"qwen/qwen3-8b","description":"Qwen 3 8B — fast multilingual model, self-hosted on our own GPU fleet.","created":1775000000,"context_length":32768,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":"chatml"},"pricing":{"prompt":"0.0000000500","completion":"0.0000000800","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":32768,"completion_tokens":8192},"top_provider":{"context_length":32768,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/qwen3-8b"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"Qwen/Qwen3-8B","powered_by":"xinity-ai · github.com/xinity-ai","hosting":"self-hosted (sovereign fleet, orchestrated by xinity-ai)"},{"id":"qwen3-8b","name":"Qwen 3 8B","canonical_slug":"qwen/qwen3-8b","description":"Qwen 3 8B — fast multilingual model, self-hosted on our own GPU fleet.","created":1775000000,"context_length":32768,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":"chatml"},"pricing":{"prompt":"0.0000000500","completion":"0.0000000800","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":32768,"completion_tokens":8192},"top_provider":{"context_length":32768,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/qwen3-8b"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"Qwen/Qwen3-8B","powered_by":"xinity-ai · github.com/xinity-ai","hosting":"self-hosted (sovereign fleet, orchestrated by xinity-ai)"},{"id":"qwen/qwen2.5-coder-7b","name":"Qwen 2.5 Coder 7B","canonical_slug":"qwen/qwen2.5-coder-7b","description":"Qwen 2.5 Coder 7B — code-specialised self-hosted model.","created":1775000000,"context_length":32768,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":"chatml"},"pricing":{"prompt":"0.0000000500","completion":"0.0000000800","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":32768,"completion_tokens":8192},"top_provider":{"context_length":32768,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/qwen2.5-coder-7b"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"Qwen/Qwen2.5-Coder-7B-Instruct","powered_by":"xinity-ai · github.com/xinity-ai","hosting":"self-hosted (sovereign fleet, orchestrated by xinity-ai)"},{"id":"qwen2.5-coder-7b","name":"Qwen 2.5 Coder 7B","canonical_slug":"qwen/qwen2.5-coder-7b","description":"Qwen 2.5 Coder 7B — code-specialised self-hosted model.","created":1775000000,"context_length":32768,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":"chatml"},"pricing":{"prompt":"0.0000000500","completion":"0.0000000800","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":32768,"completion_tokens":8192},"top_provider":{"context_length":32768,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/qwen2.5-coder-7b"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"Qwen/Qwen2.5-Coder-7B-Instruct","powered_by":"xinity-ai · github.com/xinity-ai","hosting":"self-hosted (sovereign fleet, orchestrated by xinity-ai)"},{"id":"mistralai/mistral-nemo-12b","name":"Mistral Nemo 12B","canonical_slug":"mistralai/mistral-nemo-12b","description":"Mistral Nemo 12B — strong reasoning, self-hosted on our own GPU fleet.","created":1775000000,"context_length":128000,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":"0.0000001000","completion":"0.0000001500","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":128000,"completion_tokens":8192},"top_provider":{"context_length":128000,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/mistral-nemo-12b"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"mistralai/Mistral-Nemo-Instruct-2407","powered_by":"xinity-ai · github.com/xinity-ai","hosting":"self-hosted (sovereign fleet, orchestrated by xinity-ai)"},{"id":"mistral-nemo-12b","name":"Mistral Nemo 12B","canonical_slug":"mistralai/mistral-nemo-12b","description":"Mistral Nemo 12B — strong reasoning, self-hosted on our own GPU fleet.","created":1775000000,"context_length":128000,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":"0.0000001000","completion":"0.0000001500","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":128000,"completion_tokens":8192},"top_provider":{"context_length":128000,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/mistral-nemo-12b"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"mistralai/Mistral-Nemo-Instruct-2407","powered_by":"xinity-ai · github.com/xinity-ai","hosting":"self-hosted (sovereign fleet, orchestrated by xinity-ai)"},{"id":"google/gemma3-27b","name":"Gemma 3 27B","canonical_slug":"google/gemma3-27b","description":"Google Gemma 3 27B — strong vision + text model, self-hosted on our own GPU fleet.","created":1775000000,"context_length":128000,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Gemma","instruct_type":"gemma"},"pricing":{"prompt":"0.0000001500","completion":"0.0000002000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":128000,"completion_tokens":8192},"top_provider":{"context_length":128000,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gemma3-27b"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"google/gemma-3-27b-it","powered_by":"xinity-ai · github.com/xinity-ai","hosting":"self-hosted (sovereign fleet, orchestrated by xinity-ai)"},{"id":"gemma3-27b","name":"Gemma 3 27B","canonical_slug":"google/gemma3-27b","description":"Google Gemma 3 27B — strong vision + text model, self-hosted on our own GPU fleet.","created":1775000000,"context_length":128000,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Gemma","instruct_type":"gemma"},"pricing":{"prompt":"0.0000001500","completion":"0.0000002000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":128000,"completion_tokens":8192},"top_provider":{"context_length":128000,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/gemma3-27b"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"google/gemma-3-27b-it","powered_by":"xinity-ai · github.com/xinity-ai","hosting":"self-hosted (sovereign fleet, orchestrated by xinity-ai)"},{"id":"thudm/glm4-9b","name":"GLM-4 9B","canonical_slug":"thudm/glm4-9b","description":"Zhipu AI GLM-4 9B — bilingual (EN/ZH) chat model, self-hosted.","created":1775000000,"context_length":128000,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GLM","instruct_type":"chatglm"},"pricing":{"prompt":"0.0000000500","completion":"0.0000000800","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":128000,"completion_tokens":4096},"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/glm4-9b"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"THUDM/glm-4-9b-chat","powered_by":"xinity-ai · github.com/xinity-ai","hosting":"self-hosted (sovereign fleet, orchestrated by xinity-ai)"},{"id":"glm4-9b","name":"GLM-4 9B","canonical_slug":"thudm/glm4-9b","description":"Zhipu AI GLM-4 9B — bilingual (EN/ZH) chat model, self-hosted.","created":1775000000,"context_length":128000,"architecture":{"input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GLM","instruct_type":"chatglm"},"pricing":{"prompt":"0.0000000500","completion":"0.0000000800","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":128000,"completion_tokens":4096},"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["temperature","top_p","stop","max_tokens","seed"],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/glm4-9b"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"THUDM/glm-4-9b-chat","powered_by":"xinity-ai · github.com/xinity-ai","hosting":"self-hosted (sovereign fleet, orchestrated by xinity-ai)"},{"id":"nomic-ai/nomic-embed","name":"Nomic Embed Text","canonical_slug":"nomic-ai/nomic-embed","description":"High-quality text embeddings, self-hosted.","created":1775000000,"context_length":8192,"architecture":{"input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000200","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":8192,"completion_tokens":0},"top_provider":{"context_length":8192,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":[],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/nomic-embed"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"nomic-ai/nomic-embed-text-v1.5","powered_by":"xinity-ai · github.com/xinity-ai","hosting":"self-hosted (sovereign fleet, orchestrated by xinity-ai)"},{"id":"nomic-embed","name":"Nomic Embed Text","canonical_slug":"nomic-ai/nomic-embed","description":"High-quality text embeddings, self-hosted.","created":1775000000,"context_length":8192,"architecture":{"input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000200","completion":"0.0000000000","image":"0","request":"0"},"per_request_limits":{"prompt_tokens":8192,"completion_tokens":0},"top_provider":{"context_length":8192,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":[],"default_parameters":{},"links":{"details":"https://duguetlabs.com/models/nomic-embed"},"knowledge_cutoff":null,"expiration_date":null,"hugging_face_id":"nomic-ai/nomic-embed-text-v1.5","powered_by":"xinity-ai · github.com/xinity-ai","hosting":"self-hosted (sovereign fleet, orchestrated by xinity-ai)"}]}