Hugging Face Inference Providers API
Unified proxy layer providing access to 15+ inference partners through a single OpenAI-compatible endpoint.
Unified proxy layer providing access to 15+ inference partners through a single OpenAI-compatible endpoint.
openapi: 3.1.0
info:
title: Hugging Face Inference Providers API
description: >-
Unified proxy layer providing access to 15+ inference partners through a
single OpenAI-compatible endpoint. Route requests to providers like AWS,
Google, Azure, Together, Fireworks, and more through a consistent API with
automatic model routing and provider selection.
version: 1.0.0
termsOfService: https://huggingface.co/terms-of-service
contact:
name: Hugging Face Support
url: https://huggingface.co/support
license:
name: Apache 2.0
url: https://www.apache.org/licenses/LICENSE-2.0
servers:
- url: https://router.huggingface.co
description: Hugging Face Inference Providers router
security:
- bearerAuth: []
tags:
- name: Chat Completions
description: OpenAI-compatible chat completion endpoints
- name: Text Generation
description: Text generation endpoints
- name: Embeddings
description: Text embedding endpoints
- name: Image Generation
description: Text-to-image generation
- name: Audio
description: Speech-to-text and text-to-speech
- name: Models
description: Model listing and information
paths:
/v1/chat/completions:
post:
summary: Create Chat Completion
description: >-
Create a chat completion using an OpenAI-compatible API. Supports
conversational LLMs and Vision-Language Models (VLMs). Requests are
routed to the optimal inference provider automatically.
operationId: createChatCompletion
tags:
- Chat Completions
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/ChatCompletionRequest'
examples:
CreatechatcompletionRequestExample:
summary: Default createChatCompletion request
x-microcks-default: true
value:
model: example_value
messages:
- role: system
content: example_value
name: Example Title
tool_calls:
- {}
tool_call_id: '500123'
frequency_penalty: 42.5
logprobs: true
max_tokens: 10
presence_penalty: 42.5
reasoning_effort: example_value
response_format: example_value
seed: 10
stop:
- example_value
stream: true
stream_options:
include_usage: true
temperature: 42.5
tool_choice: example_value
tool_prompt: example_value
tools:
- type: example_value
function:
name: Example Title
description: A sample description.
parameters: example_value
top_logprobs: 10
top_p: 42.5
responses:
'200':
description: Chat completion response
content:
application/json:
schema:
$ref: '#/components/schemas/ChatCompletionResponse'
examples:
Createchatcompletion200Example:
summary: Default createChatCompletion 200 response
x-microcks-default: true
value:
id: abc123
object: example_value
created: 10
model: example_value
system_fingerprint: example_value
choices:
- index: 10
message:
role: example_value
content: example_value
tool_calls: {}
finish_reason: stop
logprobs:
content: {}
usage:
prompt_tokens: 10
completion_tokens: 10
total_tokens: 10
text/event-stream:
schema:
$ref: '#/components/schemas/ChatCompletionStreamResponse'
examples:
Createchatcompletion200Example:
summary: Default createChatCompletion 200 response
x-microcks-default: true
value:
id: abc123
object: example_value
created: 10
model: example_value
system_fingerprint: example_value
choices:
- index: 10
delta:
role: example_value
content: example_value
tool_calls: {}
finish_reason: example_value
logprobs:
content: {}
usage:
prompt_tokens: 10
completion_tokens: 10
total_tokens: 10
'400':
description: Bad request - invalid parameters
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
examples:
Createchatcompletion400Example:
summary: Default createChatCompletion 400 response
x-microcks-default: true
value:
error:
message: example_value
type: example_value
code: example_value
'401':
description: Unauthorized - invalid or missing API token
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
examples:
Createchatcompletion401Example:
summary: Default createChatCompletion 401 response
x-microcks-default: true
value:
error:
message: example_value
type: example_value
code: example_value
'404':
description: Model not found or not available through any provider
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
examples:
Createchatcompletion404Example:
summary: Default createChatCompletion 404 response
x-microcks-default: true
value:
error:
message: example_value
type: example_value
code: example_value
'429':
description: Rate limit exceeded
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
examples:
Createchatcompletion429Example:
summary: Default createChatCompletion 429 response
x-microcks-default: true
value:
error:
message: example_value
type: example_value
code: example_value
'502':
description: Provider error
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
examples:
Createchatcompletion502Example:
summary: Default createChatCompletion 502 response
x-microcks-default: true
value:
error:
message: example_value
type: example_value
code: example_value
x-microcks-operation:
delay: 0
dispatcher: FALLBACK
/v1/completions:
post:
summary: Create Text Completion
description: >-
Create a text completion for a given prompt. Supports standard
completion parameters compatible with the OpenAI API format.
operationId: createCompletion
tags:
- Text Generation
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CompletionRequest'
examples:
CreatecompletionRequestExample:
summary: Default createCompletion request
x-microcks-default: true
value:
model: example_value
prompt: example_value
max_tokens: 10
temperature: 42.5
top_p: 42.5
stop:
- example_value
stream: true
seed: 10
responses:
'200':
description: Text completion response
content:
application/json:
schema:
$ref: '#/components/schemas/CompletionResponse'
examples:
Createcompletion200Example:
summary: Default createCompletion 200 response
x-microcks-default: true
value:
id: abc123
object: example_value
created: 10
model: example_value
choices:
- text: example_value
index: 10
finish_reason: example_value
usage:
prompt_tokens: 10
completion_tokens: 10
total_tokens: 10
text/event-stream:
schema:
$ref: '#/components/schemas/CompletionStreamResponse'
examples:
Createcompletion200Example:
summary: Default createCompletion 200 response
x-microcks-default: true
value:
id: abc123
object: example_value
created: 10
model: example_value
choices:
- text: example_value
index: 10
finish_reason: example_value
'400':
description: Bad request
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
examples:
Createcompletion400Example:
summary: Default createCompletion 400 response
x-microcks-default: true
value:
error:
message: example_value
type: example_value
code: example_value
x-microcks-operation:
delay: 0
dispatcher: FALLBACK
/v1/embeddings:
post:
summary: Create Embeddings
description: >-
Create embedding vectors for input text. Returns dense vector
representations useful for semantic search, clustering, and
classification tasks.
operationId: createEmbeddings
tags:
- Embeddings
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/EmbeddingRequest'
examples:
CreateembeddingsRequestExample:
summary: Default createEmbeddings request
x-microcks-default: true
value:
model: example_value
input: example_value
encoding_format: float
responses:
'200':
description: Embedding response
content:
application/json:
schema:
$ref: '#/components/schemas/EmbeddingResponse'
examples:
Createembeddings200Example:
summary: Default createEmbeddings 200 response
x-microcks-default: true
value:
object: example_value
data:
- object: example_value
index: 10
embedding:
- {}
model: example_value
usage:
prompt_tokens: 10
total_tokens: 10
'400':
description: Bad request
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
examples:
Createembeddings400Example:
summary: Default createEmbeddings 400 response
x-microcks-default: true
value:
error:
message: example_value
type: example_value
code: example_value
x-microcks-operation:
delay: 0
dispatcher: FALLBACK
/v1/images/generations:
post:
summary: Generate Images
description: >-
Generate images from text prompts using diffusion models available
through inference providers.
operationId: createImageGeneration
tags:
- Image Generation
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/ImageGenerationRequest'
examples:
CreateimagegenerationRequestExample:
summary: Default createImageGeneration request
x-microcks-default: true
value:
model: example_value
prompt: example_value
n: 10
size: 256x256
response_format: url
negative_prompt: example_value
num_inference_steps: 10
guidance_scale: 42.5
responses:
'200':
description: Generated images
content:
application/json:
schema:
$ref: '#/components/schemas/ImageGenerationResponse'
examples:
Createimagegeneration200Example:
summary: Default createImageGeneration 200 response
x-microcks-default: true
value:
created: 10
data:
- url: https://www.example.com
b64_json: example_value
revised_prompt: example_value
'400':
description: Bad request
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
examples:
Createimagegeneration400Example:
summary: Default createImageGeneration 400 response
x-microcks-default: true
value:
error:
message: example_value
type: example_value
code: example_value
x-microcks-operation:
delay: 0
dispatcher: FALLBACK
/v1/audio/transcriptions:
post:
summary: Transcribe Audio
description: >-
Transcribe audio to text using automatic speech recognition models.
operationId: createTranscription
tags:
- Audio
requestBody:
required: true
content:
multipart/form-data:
schema:
type: object
required:
- file
- model
properties:
file:
type: string
format: binary
description: Audio file to transcribe
model:
type: string
description: Model ID to use for transcription
example: openai/whisper-large-v3
language:
type: string
description: Language of the audio in ISO 639-1 format
prompt:
type: string
description: Optional text to guide transcription
response_format:
type: string
enum:
- json
- text
- srt
- verbose_json
- vtt
default: json
temperature:
type: number
format: float
examples:
CreatetranscriptionRequestExample:
summary: Default createTranscription request
x-microcks-default: true
value:
file: example_value
model: example_value
language: example_value
prompt: example_value
response_format: json
temperature: 42.5
responses:
'200':
description: Transcription result
content:
application/json:
schema:
type: object
properties:
text:
type: string
description: Transcribed text
examples:
Createtranscription200Example:
summary: Default createTranscription 200 response
x-microcks-default: true
value:
text: example_value
x-microcks-operation:
delay: 0
dispatcher: FALLBACK
/v1/audio/speech:
post:
summary: Generate Speech
description: Generate audio speech from text input.
operationId: createSpeech
tags:
- Audio
requestBody:
required: true
content:
application/json:
schema:
type: object
required:
- model
- input
properties:
model:
type: string
description: Model ID for speech generation
input:
type: string
description: Text to generate audio for
maxLength: 4096
voice:
type: string
description: Voice to use for generation
response_format:
type: string
enum:
- mp3
- opus
- aac
- flac
- wav
default: mp3
speed:
type: number
format: float
minimum: 0.25
maximum: 4.0
default: 1.0
examples:
CreatespeechRequestExample:
summary: Default createSpeech request
x-microcks-default: true
value:
model: example_value
input: example_value
voice: example_value
response_format: mp3
speed: 42.5
responses:
'200':
description: Generated audio
content:
audio/mpeg:
schema:
type: string
format: binary
examples:
Createspeech200Example:
summary: Default createSpeech 200 response
x-microcks-default: true
value: example_value
x-microcks-operation:
delay: 0
dispatcher: FALLBACK
/v1/models:
get:
summary: List Available Models
description: >-
List models available through inference providers. Returns model IDs
and basic metadata.
operationId: listModels
tags:
- Models
responses:
'200':
description: List of available models
content:
application/json:
schema:
type: object
properties:
object:
type: string
const: list
data:
type: array
items:
type: object
properties:
id:
type: string
description: Model ID
object:
type: string
const: model
created:
type: integer
description: Unix timestamp of creation
owned_by:
type: string
description: Model owner
examples:
Listmodels200Example:
summary: Default listModels 200 response
x-microcks-default: true
value:
object: example_value
data:
- id: abc123
object: example_value
created: 10
owned_by: example_value
x-microcks-operation:
delay: 0
dispatcher: FALLBACK
/v1/models/{model_id}:
get:
summary: Get Model Information
description: Get information about a specific model available through inference providers.
operationId: getModel
tags:
- Models
parameters:
- name: model_id
in: path
required: true
description: The model ID
schema:
type: string
example: meta-llama/Llama-3-70b-chat-hf
responses:
'200':
description: Model information
content:
application/json:
schema:
type: object
properties:
id:
type: string
object:
type: string
const: model
created:
type: integer
owned_by:
type: string
examples:
Getmodel200Example:
summary: Default getModel 200 response
x-microcks-default: true
value:
id: abc123
object: example_value
created: 10
owned_by: example_value
'404':
description: Model not found
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
examples:
Getmodel404Example:
summary: Default getModel 404 response
x-microcks-default: true
value:
error:
message: example_value
type: example_value
code: example_value
x-microcks-operation:
delay: 0
dispatcher: FALLBACK
components:
securitySchemes:
bearerAuth:
type: http
scheme: bearer
bearerFormat: HF Token
description: >-
Hugging Face API token with Inference Providers permission. Generate
from https://huggingface.co/settings/tokens
schemas:
ChatCompletionRequest:
type: object
required:
- model
- messages
properties:
model:
type: string
description: >-
Model ID to use. Can be a Hugging Face model ID (e.g.,
meta-llama/Llama-3-70b-chat-hf) or a provider-specific identifier.
example: meta-llama/Llama-3-70b-chat-hf
messages:
type: array
description: List of messages comprising the conversation
items:
type: object
required:
- role
properties:
role:
type: string
enum:
- system
- user
- assistant
- tool
description: The role of the message author
content:
oneOf:
- type: string
- type: array
items:
oneOf:
- type: object
required:
- type
- text
properties:
type:
type: string
const: text
text:
type: string
- type: object
required:
- type
- image_url
properties:
type:
type: string
const: image_url
image_url:
type: object
required:
- url
properties:
url:
type: string
description: Message content (string or array for multimodal)
name:
type: string
description: Optional name for the participant
tool_calls:
type: array
items:
type: object
required:
- id
- type
- function
properties:
id:
type: string
type:
type: string
function:
type: object
required:
- name
properties:
name:
type: string
arguments:
type: string
description:
type: string
tool_call_id:
type: string
description: Tool call ID for tool responses
example: []
frequency_penalty:
type: number
minimum: -2.0
maximum: 2.0
default: 0
description: Penalize tokens based on frequency in text so far
example: 42.5
logprobs:
type: boolean
default: false
description: Whether to return log probabilities
example: true
max_tokens:
type: integer
description: Maximum number of tokens to generate
example: 10
presence_penalty:
type: number
minimum: -2.0
maximum: 2.0
default: 0
description: Penalize tokens based on presence in text so far
example: 42.5
reasoning_effort:
type: string
description: >-
Constrains effort on reasoning for models that support it. Common
values are none, minimal, low, medium, high, xhigh.
example: example_value
response_format:
oneOf:
- type: object
properties:
type:
type: string
const: text
- type: object
required:
- type
- json_schema
properties:
type:
type: string
const: json_schema
json_schema:
type: object
required:
- name
properties:
name:
type: string
description:
type: string
schema:
type: object
strict:
type: boolean
- type: object
properties:
type:
type: string
const: json_object
example: example_value
seed:
type: integer
description: Random seed for reproducibility
example: 10
stop:
type: array
items:
type: string
maxItems: 4
description: Up to 4 sequences where generation will stop
example: []
stream:
type: boolean
default: false
description: Whether to stream partial responses using SSE
example: true
stream_options:
type: object
properties:
include_usage:
type: boolean
description: Include usage statistics in stream
example: example_value
temperature:
type: number
minimum: 0
maximum: 2
default: 1.0
description: Sampling temperature
example: 42.5
tool_choice:
oneOf:
- type: string
enum:
- auto
- none
- required
- type: object
required:
- function
properties:
function:
type: object
required:
- name
properties:
name:
type: string
description: Controls tool usage
example: example_value
tool_prompt:
type: string
description: Prompt prepended before tools
example: example_value
tools:
type: array
items:
type: object
required:
- type
- function
properties:
type:
type: string
function:
type: object
required:
- name
properties:
name:
type: string
description:
type: string
parameters:
type: object
description: List of tools the model may call
example: []
top_logprobs:
type: integer
minimum: 0
maximum: 5
description: Number of most likely tokens to return per position
example: 10
top_p:
type: number
minimum: 0
maximum: 1
default: 1.0
description: Nucleus sampling parameter
example: 42.5
ChatCompletionResponse:
type: object
properties:
id:
type: string
description: Unique completion identifier
example: abc123
object:
type: string
const: chat.completion
example: example_value
created:
type: integer
description: Unix timestamp
example: 10
model:
type: string
description: Model used
example: example_value
system_fingerprint:
type: string
example: example_value
choices:
type: array
items:
type: object
properties:
index:
type: integer
message:
type: object
properties:
role:
type: string
content:
type: string
tool_calls:
type: array
items:
type: object
properties:
id:
type: string
type:
type: string
function:
type: object
properties:
name:
type: string
arguments:
type: string
finish_reason:
type: string
enum:
- stop
- length
- tool_calls
- content_filter
logprobs:
type: object
properties:
content:
type: array
# --- truncated at 32 KB (41 KB total) ---
# Full source: https://raw.githubusercontent.com/api-evangelist/hugging-face/refs/heads/main/openapi/hugging-face-inference-providers-api.yml