Hugging Face
Hugging Face Inference Providers API

Unified proxy layer providing access to 15+ inference partners through a single OpenAI-compatible endpoint.
Documentation GitHub OpenAPI
OpenAPI Specification

openapi: 3.1.0
info:
  title: Hugging Face Inference Providers API
  description: >-
    Unified proxy layer providing access to 15+ inference partners through a
    single OpenAI-compatible endpoint. Route requests to providers like AWS,
    Google, Azure, Together, Fireworks, and more through a consistent API with
    automatic model routing and provider selection.
  version: 1.0.0
  termsOfService: https://huggingface.co/terms-of-service
  contact:
    name: Hugging Face Support
    url: https://huggingface.co/support
  license:
    name: Apache 2.0
    url: https://www.apache.org/licenses/LICENSE-2.0
servers:
- url: https://router.huggingface.co
  description: Hugging Face Inference Providers router
security:
- bearerAuth: []
tags:
- name: Chat Completions
  description: OpenAI-compatible chat completion endpoints
- name: Text Generation
  description: Text generation endpoints
- name: Embeddings
  description: Text embedding endpoints
- name: Image Generation
  description: Text-to-image generation
- name: Audio
  description: Speech-to-text and text-to-speech
- name: Models
  description: Model listing and information
paths:
  /v1/chat/completions:
    post:
      summary: Create Chat Completion
      description: >-
        Create a chat completion using an OpenAI-compatible API. Supports
        conversational LLMs and Vision-Language Models (VLMs). Requests are
        routed to the optimal inference provider automatically.
      operationId: createChatCompletion
      tags:
      - Chat Completions
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ChatCompletionRequest'
            examples:
              CreatechatcompletionRequestExample:
                summary: Default createChatCompletion request
                x-microcks-default: true
                value:
                  model: example_value
                  messages:
                  - role: system
                    content: example_value
                    name: Example Title
                    tool_calls:
                    - {}
                    tool_call_id: '500123'
                  frequency_penalty: 42.5
                  logprobs: true
                  max_tokens: 10
                  presence_penalty: 42.5
                  reasoning_effort: example_value
                  response_format: example_value
                  seed: 10
                  stop:
                  - example_value
                  stream: true
                  stream_options:
                    include_usage: true
                  temperature: 42.5
                  tool_choice: example_value
                  tool_prompt: example_value
                  tools:
                  - type: example_value
                    function:
                      name: Example Title
                      description: A sample description.
                      parameters: example_value
                  top_logprobs: 10
                  top_p: 42.5
      responses:
        '200':
          description: Chat completion response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponse'
              examples:
                Createchatcompletion200Example:
                  summary: Default createChatCompletion 200 response
                  x-microcks-default: true
                  value:
                    id: abc123
                    object: example_value
                    created: 10
                    model: example_value
                    system_fingerprint: example_value
                    choices:
                    - index: 10
                      message:
                        role: example_value
                        content: example_value
                        tool_calls: {}
                      finish_reason: stop
                      logprobs:
                        content: {}
                    usage:
                      prompt_tokens: 10
                      completion_tokens: 10
                      total_tokens: 10
            text/event-stream:
              schema:
                $ref: '#/components/schemas/ChatCompletionStreamResponse'
              examples:
                Createchatcompletion200Example:
                  summary: Default createChatCompletion 200 response
                  x-microcks-default: true
                  value:
                    id: abc123
                    object: example_value
                    created: 10
                    model: example_value
                    system_fingerprint: example_value
                    choices:
                    - index: 10
                      delta:
                        role: example_value
                        content: example_value
                        tool_calls: {}
                      finish_reason: example_value
                      logprobs:
                        content: {}
                    usage:
                      prompt_tokens: 10
                      completion_tokens: 10
                      total_tokens: 10
        '400':
          description: Bad request - invalid parameters
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
              examples:
                Createchatcompletion400Example:
                  summary: Default createChatCompletion 400 response
                  x-microcks-default: true
                  value:
                    error:
                      message: example_value
                      type: example_value
                      code: example_value
        '401':
          description: Unauthorized - invalid or missing API token
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
              examples:
                Createchatcompletion401Example:
                  summary: Default createChatCompletion 401 response
                  x-microcks-default: true
                  value:
                    error:
                      message: example_value
                      type: example_value
                      code: example_value
        '404':
          description: Model not found or not available through any provider
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
              examples:
                Createchatcompletion404Example:
                  summary: Default createChatCompletion 404 response
                  x-microcks-default: true
                  value:
                    error:
                      message: example_value
                      type: example_value
                      code: example_value
        '429':
          description: Rate limit exceeded
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
              examples:
                Createchatcompletion429Example:
                  summary: Default createChatCompletion 429 response
                  x-microcks-default: true
                  value:
                    error:
                      message: example_value
                      type: example_value
                      code: example_value
        '502':
          description: Provider error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
              examples:
                Createchatcompletion502Example:
                  summary: Default createChatCompletion 502 response
                  x-microcks-default: true
                  value:
                    error:
                      message: example_value
                      type: example_value
                      code: example_value
      x-microcks-operation:
        delay: 0
        dispatcher: FALLBACK
  /v1/completions:
    post:
      summary: Create Text Completion
      description: >-
        Create a text completion for a given prompt. Supports standard
        completion parameters compatible with the OpenAI API format.
      operationId: createCompletion
      tags:
      - Text Generation
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CompletionRequest'
            examples:
              CreatecompletionRequestExample:
                summary: Default createCompletion request
                x-microcks-default: true
                value:
                  model: example_value
                  prompt: example_value
                  max_tokens: 10
                  temperature: 42.5
                  top_p: 42.5
                  stop:
                  - example_value
                  stream: true
                  seed: 10
      responses:
        '200':
          description: Text completion response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CompletionResponse'
              examples:
                Createcompletion200Example:
                  summary: Default createCompletion 200 response
                  x-microcks-default: true
                  value:
                    id: abc123
                    object: example_value
                    created: 10
                    model: example_value
                    choices:
                    - text: example_value
                      index: 10
                      finish_reason: example_value
                    usage:
                      prompt_tokens: 10
                      completion_tokens: 10
                      total_tokens: 10
            text/event-stream:
              schema:
                $ref: '#/components/schemas/CompletionStreamResponse'
              examples:
                Createcompletion200Example:
                  summary: Default createCompletion 200 response
                  x-microcks-default: true
                  value:
                    id: abc123
                    object: example_value
                    created: 10
                    model: example_value
                    choices:
                    - text: example_value
                      index: 10
                      finish_reason: example_value
        '400':
          description: Bad request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
              examples:
                Createcompletion400Example:
                  summary: Default createCompletion 400 response
                  x-microcks-default: true
                  value:
                    error:
                      message: example_value
                      type: example_value
                      code: example_value
      x-microcks-operation:
        delay: 0
        dispatcher: FALLBACK
  /v1/embeddings:
    post:
      summary: Create Embeddings
      description: >-
        Create embedding vectors for input text. Returns dense vector
        representations useful for semantic search, clustering, and
        classification tasks.
      operationId: createEmbeddings
      tags:
      - Embeddings
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EmbeddingRequest'
            examples:
              CreateembeddingsRequestExample:
                summary: Default createEmbeddings request
                x-microcks-default: true
                value:
                  model: example_value
                  input: example_value
                  encoding_format: float
      responses:
        '200':
          description: Embedding response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EmbeddingResponse'
              examples:
                Createembeddings200Example:
                  summary: Default createEmbeddings 200 response
                  x-microcks-default: true
                  value:
                    object: example_value
                    data:
                    - object: example_value
                      index: 10
                      embedding:
                      - {}
                    model: example_value
                    usage:
                      prompt_tokens: 10
                      total_tokens: 10
        '400':
          description: Bad request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
              examples:
                Createembeddings400Example:
                  summary: Default createEmbeddings 400 response
                  x-microcks-default: true
                  value:
                    error:
                      message: example_value
                      type: example_value
                      code: example_value
      x-microcks-operation:
        delay: 0
        dispatcher: FALLBACK
  /v1/images/generations:
    post:
      summary: Generate Images
      description: >-
        Generate images from text prompts using diffusion models available
        through inference providers.
      operationId: createImageGeneration
      tags:
      - Image Generation
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ImageGenerationRequest'
            examples:
              CreateimagegenerationRequestExample:
                summary: Default createImageGeneration request
                x-microcks-default: true
                value:
                  model: example_value
                  prompt: example_value
                  n: 10
                  size: 256x256
                  response_format: url
                  negative_prompt: example_value
                  num_inference_steps: 10
                  guidance_scale: 42.5
      responses:
        '200':
          description: Generated images
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ImageGenerationResponse'
              examples:
                Createimagegeneration200Example:
                  summary: Default createImageGeneration 200 response
                  x-microcks-default: true
                  value:
                    created: 10
                    data:
                    - url: https://www.example.com
                      b64_json: example_value
                      revised_prompt: example_value
        '400':
          description: Bad request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
              examples:
                Createimagegeneration400Example:
                  summary: Default createImageGeneration 400 response
                  x-microcks-default: true
                  value:
                    error:
                      message: example_value
                      type: example_value
                      code: example_value
      x-microcks-operation:
        delay: 0
        dispatcher: FALLBACK
  /v1/audio/transcriptions:
    post:
      summary: Transcribe Audio
      description: >-
        Transcribe audio to text using automatic speech recognition models.
      operationId: createTranscription
      tags:
      - Audio
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              required:
              - file
              - model
              properties:
                file:
                  type: string
                  format: binary
                  description: Audio file to transcribe
                model:
                  type: string
                  description: Model ID to use for transcription
                  example: openai/whisper-large-v3
                language:
                  type: string
                  description: Language of the audio in ISO 639-1 format
                prompt:
                  type: string
                  description: Optional text to guide transcription
                response_format:
                  type: string
                  enum:
                  - json
                  - text
                  - srt
                  - verbose_json
                  - vtt
                  default: json
                temperature:
                  type: number
                  format: float
            examples:
              CreatetranscriptionRequestExample:
                summary: Default createTranscription request
                x-microcks-default: true
                value:
                  file: example_value
                  model: example_value
                  language: example_value
                  prompt: example_value
                  response_format: json
                  temperature: 42.5
      responses:
        '200':
          description: Transcription result
          content:
            application/json:
              schema:
                type: object
                properties:
                  text:
                    type: string
                    description: Transcribed text
              examples:
                Createtranscription200Example:
                  summary: Default createTranscription 200 response
                  x-microcks-default: true
                  value:
                    text: example_value
      x-microcks-operation:
        delay: 0
        dispatcher: FALLBACK
  /v1/audio/speech:
    post:
      summary: Generate Speech
      description: Generate audio speech from text input.
      operationId: createSpeech
      tags:
      - Audio
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
              - model
              - input
              properties:
                model:
                  type: string
                  description: Model ID for speech generation
                input:
                  type: string
                  description: Text to generate audio for
                  maxLength: 4096
                voice:
                  type: string
                  description: Voice to use for generation
                response_format:
                  type: string
                  enum:
                  - mp3
                  - opus
                  - aac
                  - flac
                  - wav
                  default: mp3
                speed:
                  type: number
                  format: float
                  minimum: 0.25
                  maximum: 4.0
                  default: 1.0
            examples:
              CreatespeechRequestExample:
                summary: Default createSpeech request
                x-microcks-default: true
                value:
                  model: example_value
                  input: example_value
                  voice: example_value
                  response_format: mp3
                  speed: 42.5
      responses:
        '200':
          description: Generated audio
          content:
            audio/mpeg:
              schema:
                type: string
                format: binary
              examples:
                Createspeech200Example:
                  summary: Default createSpeech 200 response
                  x-microcks-default: true
                  value: example_value
      x-microcks-operation:
        delay: 0
        dispatcher: FALLBACK
  /v1/models:
    get:
      summary: List Available Models
      description: >-
        List models available through inference providers. Returns model IDs
        and basic metadata.
      operationId: listModels
      tags:
      - Models
      responses:
        '200':
          description: List of available models
          content:
            application/json:
              schema:
                type: object
                properties:
                  object:
                    type: string
                    const: list
                  data:
                    type: array
                    items:
                      type: object
                      properties:
                        id:
                          type: string
                          description: Model ID
                        object:
                          type: string
                          const: model
                        created:
                          type: integer
                          description: Unix timestamp of creation
                        owned_by:
                          type: string
                          description: Model owner
              examples:
                Listmodels200Example:
                  summary: Default listModels 200 response
                  x-microcks-default: true
                  value:
                    object: example_value
                    data:
                    - id: abc123
                      object: example_value
                      created: 10
                      owned_by: example_value
      x-microcks-operation:
        delay: 0
        dispatcher: FALLBACK
  /v1/models/{model_id}:
    get:
      summary: Get Model Information
      description: Get information about a specific model available through inference providers.
      operationId: getModel
      tags:
      - Models
      parameters:
      - name: model_id
        in: path
        required: true
        description: The model ID
        schema:
          type: string
        example: meta-llama/Llama-3-70b-chat-hf
      responses:
        '200':
          description: Model information
          content:
            application/json:
              schema:
                type: object
                properties:
                  id:
                    type: string
                  object:
                    type: string
                    const: model
                  created:
                    type: integer
                  owned_by:
                    type: string
              examples:
                Getmodel200Example:
                  summary: Default getModel 200 response
                  x-microcks-default: true
                  value:
                    id: abc123
                    object: example_value
                    created: 10
                    owned_by: example_value
        '404':
          description: Model not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
              examples:
                Getmodel404Example:
                  summary: Default getModel 404 response
                  x-microcks-default: true
                  value:
                    error:
                      message: example_value
                      type: example_value
                      code: example_value
      x-microcks-operation:
        delay: 0
        dispatcher: FALLBACK
components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: HF Token
      description: >-
        Hugging Face API token with Inference Providers permission. Generate
        from https://huggingface.co/settings/tokens
  schemas:
    ChatCompletionRequest:
      type: object
      required:
      - model
      - messages
      properties:
        model:
          type: string
          description: >-
            Model ID to use. Can be a Hugging Face model ID (e.g.,
            meta-llama/Llama-3-70b-chat-hf) or a provider-specific identifier.
          example: meta-llama/Llama-3-70b-chat-hf
        messages:
          type: array
          description: List of messages comprising the conversation
          items:
            type: object
            required:
            - role
            properties:
              role:
                type: string
                enum:
                - system
                - user
                - assistant
                - tool
                description: The role of the message author
              content:
                oneOf:
                - type: string
                - type: array
                  items:
                    oneOf:
                    - type: object
                      required:
                      - type
                      - text
                      properties:
                        type:
                          type: string
                          const: text
                        text:
                          type: string
                    - type: object
                      required:
                      - type
                      - image_url
                      properties:
                        type:
                          type: string
                          const: image_url
                        image_url:
                          type: object
                          required:
                          - url
                          properties:
                            url:
                              type: string
                description: Message content (string or array for multimodal)
              name:
                type: string
                description: Optional name for the participant
              tool_calls:
                type: array
                items:
                  type: object
                  required:
                  - id
                  - type
                  - function
                  properties:
                    id:
                      type: string
                    type:
                      type: string
                    function:
                      type: object
                      required:
                      - name
                      properties:
                        name:
                          type: string
                        arguments:
                          type: string
                        description:
                          type: string
              tool_call_id:
                type: string
                description: Tool call ID for tool responses
          example: []
        frequency_penalty:
          type: number
          minimum: -2.0
          maximum: 2.0
          default: 0
          description: Penalize tokens based on frequency in text so far
          example: 42.5
        logprobs:
          type: boolean
          default: false
          description: Whether to return log probabilities
          example: true
        max_tokens:
          type: integer
          description: Maximum number of tokens to generate
          example: 10
        presence_penalty:
          type: number
          minimum: -2.0
          maximum: 2.0
          default: 0
          description: Penalize tokens based on presence in text so far
          example: 42.5
        reasoning_effort:
          type: string
          description: >-
            Constrains effort on reasoning for models that support it. Common
            values are none, minimal, low, medium, high, xhigh.
          example: example_value
        response_format:
          oneOf:
          - type: object
            properties:
              type:
                type: string
                const: text
          - type: object
            required:
            - type
            - json_schema
            properties:
              type:
                type: string
                const: json_schema
              json_schema:
                type: object
                required:
                - name
                properties:
                  name:
                    type: string
                  description:
                    type: string
                  schema:
                    type: object
                  strict:
                    type: boolean
          - type: object
            properties:
              type:
                type: string
                const: json_object
          example: example_value
        seed:
          type: integer
          description: Random seed for reproducibility
          example: 10
        stop:
          type: array
          items:
            type: string
          maxItems: 4
          description: Up to 4 sequences where generation will stop
          example: []
        stream:
          type: boolean
          default: false
          description: Whether to stream partial responses using SSE
          example: true
        stream_options:
          type: object
          properties:
            include_usage:
              type: boolean
              description: Include usage statistics in stream
          example: example_value
        temperature:
          type: number
          minimum: 0
          maximum: 2
          default: 1.0
          description: Sampling temperature
          example: 42.5
        tool_choice:
          oneOf:
          - type: string
            enum:
            - auto
            - none
            - required
          - type: object
            required:
            - function
            properties:
              function:
                type: object
                required:
                - name
                properties:
                  name:
                    type: string
          description: Controls tool usage
          example: example_value
        tool_prompt:
          type: string
          description: Prompt prepended before tools
          example: example_value
        tools:
          type: array
          items:
            type: object
            required:
            - type
            - function
            properties:
              type:
                type: string
              function:
                type: object
                required:
                - name
                properties:
                  name:
                    type: string
                  description:
                    type: string
                  parameters:
                    type: object
          description: List of tools the model may call
          example: []
        top_logprobs:
          type: integer
          minimum: 0
          maximum: 5
          description: Number of most likely tokens to return per position
          example: 10
        top_p:
          type: number
          minimum: 0
          maximum: 1
          default: 1.0
          description: Nucleus sampling parameter
          example: 42.5
    ChatCompletionResponse:
      type: object
      properties:
        id:
          type: string
          description: Unique completion identifier
          example: abc123
        object:
          type: string
          const: chat.completion
          example: example_value
        created:
          type: integer
          description: Unix timestamp
          example: 10
        model:
          type: string
          description: Model used
          example: example_value
        system_fingerprint:
          type: string
          example: example_value
        choices:
          type: array
          items:
            type: object
            properties:
              index:
                type: integer
              message:
                type: object
                properties:
                  role:
                    type: string
                  content:
                    type: string
                  tool_calls:
                    type: array
                    items:
                      type: object
                      properties:
                        id:
                          type: string
                        type:
                          type: string
                        function:
                          type: object
                          properties:
                            name:
                              type: string
                            arguments:
                              type: string
              finish_reason:
                type: string
                enum:
                - stop
                - length
                - tool_calls
                - content_filter
              logprobs:
                type: object
                properties:
                  content:
                    type: array
        

# --- truncated at 32 KB (41 KB total) ---
# Full source: https://raw.githubusercontent.com/api-evangelist/hugging-face/refs/heads/main/openapi/hugging-face-inference-providers-api.yml
Hugging Face Inference Providers API

Documentation

Specifications

Other Resources

OpenAPI Specification