Ollama OpenAI Compatibility API

Ollama provides compatibility with parts of the OpenAI API, allowing existing applications built for OpenAI to connect to locally-running models through Ollama. Supported endpoints include chat completions, completions, embeddings, models, and the Responses API.

OpenAPI Specification

ollama-openai-compatibility-api-openapi.yml Raw ↑
openapi: 3.1.0
info:
  title: Ollama OpenAI Compatibility API
  description: >-
    Ollama provides compatibility with parts of the OpenAI API, allowing
    existing applications built for OpenAI to connect to locally-running
    models through Ollama. Supported endpoints include chat completions,
    completions, embeddings, models, images, and the Responses API.
  version: '0.1.0'
  contact:
    name: Ollama Team
    url: https://ollama.com
  license:
    name: MIT
    url: https://opensource.org/licenses/MIT

externalDocs:
  description: Ollama OpenAI Compatibility Documentation
  url: https://docs.ollama.com/api/openai-compatibility

servers:
  - url: http://localhost:11434/v1
    description: Local Ollama Server (OpenAI-compatible)

tags:
  - name: Chat Completions
    description: >-
      Generate chat completions using the OpenAI-compatible chat endpoint
      with multi-turn conversation support.
  - name: Completions
    description: >-
      Generate text completions using the OpenAI-compatible completions
      endpoint.
  - name: Embeddings
    description: >-
      Generate vector embeddings using the OpenAI-compatible embeddings
      endpoint.
  - name: Images
    description: >-
      Generate images from text descriptions using the OpenAI-compatible
      images endpoint. Experimental feature.
  - name: Models
    description: >-
      List and retrieve model information using the OpenAI-compatible
      models endpoints.
  - name: Responses
    description: >-
      Generate structured responses with optional reasoning using the
      OpenAI-compatible Responses API.

security:
  - bearerAuth: []

paths:
  /chat/completions:
    post:
      operationId: createChatCompletion
      summary: Ollama Create chat completion
      description: >-
        Creates a model response for the given chat conversation. Supports
        streaming, JSON mode, structured output, vision, and tool calling.
        Compatible with the OpenAI Chat Completions API format.
      tags:
        - Chat Completions
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ChatCompletionRequest'
      responses:
        '200':
          description: Successful chat completion response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponse'
            text/event-stream:
              schema:
                $ref: '#/components/schemas/ChatCompletionStreamResponse'
        '400':
          description: Bad Request
        '404':
          description: Model not found

  /completions:
    post:
      operationId: createCompletion
      summary: Ollama Create completion
      description: >-
        Creates a completion for the provided prompt. Compatible with the
        OpenAI Completions API format. Supports streaming, JSON mode,
        and reproducible outputs.
      tags:
        - Completions
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CompletionRequest'
      responses:
        '200':
          description: Successful completion response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CompletionResponse'
            text/event-stream:
              schema:
                $ref: '#/components/schemas/CompletionStreamResponse'
        '400':
          description: Bad Request
        '404':
          description: Model not found

  /embeddings:
    post:
      operationId: createEmbedding
      summary: Ollama Create embeddings
      description: >-
        Creates an embedding vector representing the input text. Compatible
        with the OpenAI Embeddings API format.
      tags:
        - Embeddings
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EmbeddingRequest'
      responses:
        '200':
          description: Successful embedding response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EmbeddingResponse'
        '400':
          description: Bad Request
        '404':
          description: Model not found

  /models:
    get:
      operationId: listModels
      summary: Ollama List models
      description: >-
        Lists the currently available models and provides basic information
        about each one. Compatible with the OpenAI List Models API format.
      tags:
        - Models
      responses:
        '200':
          description: Successful response with list of models
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ModelListResponse'

  /models/{model}:
    get:
      operationId: retrieveModel
      summary: Ollama Retrieve a model
      description: >-
        Retrieves a model instance, providing basic information about the
        model. Compatible with the OpenAI Retrieve Model API format.
      tags:
        - Models
      parameters:
        - name: model
          in: path
          required: true
          description: >-
            The identifier of the model to retrieve.
          schema:
            type: string
      responses:
        '200':
          description: Successful response with model information
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ModelObject'
        '404':
          description: Model not found

  /images/generations:
    post:
      operationId: createImage
      summary: Ollama Create image
      description: >-
        Creates an image given a text prompt. This is an experimental
        endpoint that may change or be removed. Only supports
        b64_json response format.
      tags:
        - Images
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ImageGenerationRequest'
      responses:
        '200':
          description: Successful image generation response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ImageGenerationResponse'
        '400':
          description: Bad Request
        '404':
          description: Model not found

  /responses:
    post:
      operationId: createResponse
      summary: Ollama Create response
      description: >-
        Creates a structured response with optional reasoning. Supports
        streaming, tool calling, and reasoning summaries. Compatible
        with the OpenAI Responses API format. Added in Ollama v0.13.3.
      tags:
        - Responses
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ResponseRequest'
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ResponseObject'
            text/event-stream:
              schema:
                $ref: '#/components/schemas/ResponseStreamEvent'
        '400':
          description: Bad Request
        '404':
          description: Model not found

components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      description: >-
        API key authentication. The key is accepted but not validated
        by Ollama. Use any value such as ollama.

  schemas:
    ChatCompletionRequest:
      type: object
      description: >-
        Request body for creating a chat completion in OpenAI-compatible
        format.
      required:
        - model
        - messages
      properties:
        model:
          type: string
          description: >-
            The model to use for chat completion.
        messages:
          type: array
          description: >-
            A list of messages comprising the conversation so far.
          items:
            $ref: '#/components/schemas/ChatCompletionMessage'
        temperature:
          type: number
          description: >-
            Sampling temperature between 0 and 2. Higher values make
            output more random.
          minimum: 0.0
          maximum: 2.0
        top_p:
          type: number
          description: >-
            Nucleus sampling parameter. Considers tokens with top_p
            probability mass.
          minimum: 0.0
          maximum: 1.0
        max_tokens:
          type: integer
          description: >-
            Maximum number of tokens to generate in the response.
        frequency_penalty:
          type: number
          description: >-
            Penalty for token frequency to reduce repetition.
          minimum: -2.0
          maximum: 2.0
        presence_penalty:
          type: number
          description: >-
            Penalty for token presence to encourage topic diversity.
          minimum: -2.0
          maximum: 2.0
        seed:
          type: integer
          description: >-
            Random seed for deterministic generation.
        stop:
          description: >-
            Sequences where the API will stop generating further tokens.
          oneOf:
            - type: string
            - type: array
              items:
                type: string
        stream:
          type: boolean
          description: >-
            If true, partial message deltas are sent as server-sent
            events.
          default: false
        stream_options:
          type: object
          description: >-
            Options for streaming responses.
          properties:
            include_usage:
              type: boolean
              description: >-
                If true, includes usage information in the stream.
        response_format:
          type: object
          description: >-
            Specifies the format of the response. Use type json_object
            for JSON mode or json_schema for structured output.
          properties:
            type:
              type: string
              description: >-
                The response format type.
              enum:
                - text
                - json_object
                - json_schema
            json_schema:
              type: object
              description: >-
                The JSON Schema for structured output.
              additionalProperties: true
        tools:
          type: array
          description: >-
            A list of tools the model may call.
          items:
            $ref: '#/components/schemas/OpenAIToolDefinition'

    ChatCompletionMessage:
      type: object
      description: >-
        A message in the chat conversation.
      required:
        - role
      properties:
        role:
          type: string
          description: >-
            The role of the message author.
          enum:
            - system
            - user
            - assistant
            - tool
        content:
          description: >-
            The content of the message. Can be a string or an array
            of content parts for multimodal input.
          oneOf:
            - type: string
            - type: array
              items:
                $ref: '#/components/schemas/ContentPart'
        name:
          type: string
          description: >-
            An optional name for the participant.
        tool_calls:
          type: array
          description: >-
            Tool calls generated by the model.
          items:
            $ref: '#/components/schemas/OpenAIToolCall'
        tool_call_id:
          type: string
          description: >-
            Tool call that this message is responding to.

    ContentPart:
      type: object
      description: >-
        A content part within a multimodal message.
      required:
        - type
      properties:
        type:
          type: string
          description: >-
            The type of content part.
          enum:
            - text
            - image_url
        text:
          type: string
          description: >-
            The text content when type is text.
        image_url:
          type: object
          description: >-
            The image URL or base64 data when type is image_url.
          properties:
            url:
              type: string
              description: >-
                URL of the image or a base64-encoded data URI.

    ChatCompletionResponse:
      type: object
      description: >-
        Response object from a chat completion request.
      properties:
        id:
          type: string
          description: >-
            A unique identifier for the chat completion.
        object:
          type: string
          description: >-
            The object type, always chat.completion.
          const: chat.completion
        created:
          type: integer
          description: >-
            Unix timestamp of when the completion was created.
        model:
          type: string
          description: >-
            The model used for the completion.
        choices:
          type: array
          description: >-
            A list of chat completion choices.
          items:
            $ref: '#/components/schemas/ChatCompletionChoice'
        usage:
          $ref: '#/components/schemas/UsageStats'

    ChatCompletionChoice:
      type: object
      description: >-
        A single chat completion choice.
      properties:
        index:
          type: integer
          description: >-
            The index of the choice in the list.
        message:
          $ref: '#/components/schemas/ChatCompletionMessage'
        finish_reason:
          type: string
          description: >-
            The reason the model stopped generating tokens.
          enum:
            - stop
            - length
            - tool_calls

    ChatCompletionStreamResponse:
      type: object
      description: >-
        A streaming chat completion chunk.
      properties:
        id:
          type: string
          description: >-
            A unique identifier for the chat completion.
        object:
          type: string
          description: >-
            The object type, always chat.completion.chunk.
          const: chat.completion.chunk
        created:
          type: integer
          description: >-
            Unix timestamp of when the chunk was created.
        model:
          type: string
          description: >-
            The model used for the completion.
        choices:
          type: array
          description: >-
            A list of chat completion chunk choices.
          items:
            type: object
            properties:
              index:
                type: integer
                description: >-
                  The index of the choice.
              delta:
                type: object
                description: >-
                  The delta content for this chunk.
                properties:
                  role:
                    type: string
                    description: >-
                      The role of the author.
                  content:
                    type: string
                    description: >-
                      The content delta.
                  tool_calls:
                    type: array
                    description: >-
                      Tool call deltas.
                    items:
                      $ref: '#/components/schemas/OpenAIToolCall'
              finish_reason:
                type:
                  - string
                  - 'null'
                description: >-
                  The finish reason, if applicable.

    CompletionRequest:
      type: object
      description: >-
        Request body for creating a text completion in OpenAI-compatible
        format.
      required:
        - model
        - prompt
      properties:
        model:
          type: string
          description: >-
            The model to use for completion.
        prompt:
          description: >-
            The prompt to generate completions for.
          oneOf:
            - type: string
            - type: array
              items:
                type: string
        temperature:
          type: number
          description: >-
            Sampling temperature between 0 and 2.
          minimum: 0.0
          maximum: 2.0
        top_p:
          type: number
          description: >-
            Nucleus sampling parameter.
          minimum: 0.0
          maximum: 1.0
        max_tokens:
          type: integer
          description: >-
            Maximum number of tokens to generate.
        frequency_penalty:
          type: number
          description: >-
            Penalty for token frequency.
          minimum: -2.0
          maximum: 2.0
        presence_penalty:
          type: number
          description: >-
            Penalty for token presence.
          minimum: -2.0
          maximum: 2.0
        seed:
          type: integer
          description: >-
            Random seed for deterministic generation.
        stop:
          description: >-
            Stop sequences for generation.
          oneOf:
            - type: string
            - type: array
              items:
                type: string
        stream:
          type: boolean
          description: >-
            Whether to stream back partial progress.
          default: false
        stream_options:
          type: object
          description: >-
            Options for streaming responses.
          properties:
            include_usage:
              type: boolean
              description: >-
                If true, includes usage info in the stream.
        suffix:
          type: string
          description: >-
            The suffix that comes after the completion.

    CompletionResponse:
      type: object
      description: >-
        Response object from a text completion request.
      properties:
        id:
          type: string
          description: >-
            A unique identifier for the completion.
        object:
          type: string
          description: >-
            The object type, always text_completion.
          const: text_completion
        created:
          type: integer
          description: >-
            Unix timestamp of when the completion was created.
        model:
          type: string
          description: >-
            The model used for the completion.
        choices:
          type: array
          description: >-
            A list of completion choices.
          items:
            type: object
            properties:
              index:
                type: integer
                description: >-
                  The index of the choice.
              text:
                type: string
                description: >-
                  The generated text.
              finish_reason:
                type: string
                description: >-
                  The reason the model stopped generating.
                enum:
                  - stop
                  - length
        usage:
          $ref: '#/components/schemas/UsageStats'

    CompletionStreamResponse:
      type: object
      description: >-
        A streaming text completion chunk.
      properties:
        id:
          type: string
          description: >-
            A unique identifier for the completion.
        object:
          type: string
          description: >-
            The object type.
        created:
          type: integer
          description: >-
            Unix timestamp of creation.
        model:
          type: string
          description: >-
            The model used.
        choices:
          type: array
          description: >-
            A list of completion chunk choices.
          items:
            type: object
            properties:
              index:
                type: integer
                description: >-
                  The index of the choice.
              text:
                type: string
                description: >-
                  The text delta.
              finish_reason:
                type:
                  - string
                  - 'null'
                description: >-
                  The finish reason, if applicable.

    EmbeddingRequest:
      type: object
      description: >-
        Request body for creating embeddings in OpenAI-compatible format.
      required:
        - model
        - input
      properties:
        model:
          type: string
          description: >-
            The model to use for embedding generation.
        input:
          description: >-
            Input text to embed. Can be a string or array of strings.
          oneOf:
            - type: string
            - type: array
              items:
                type: string
        encoding_format:
          type: string
          description: >-
            The format of the returned embeddings.
          enum:
            - float
            - base64
        dimensions:
          type: integer
          description: >-
            The number of dimensions for the output embeddings.

    EmbeddingResponse:
      type: object
      description: >-
        Response object from an embedding request.
      properties:
        object:
          type: string
          description: >-
            The object type, always list.
          const: list
        data:
          type: array
          description: >-
            The list of embedding objects.
          items:
            type: object
            properties:
              object:
                type: string
                description: >-
                  The object type, always embedding.
                const: embedding
              index:
                type: integer
                description: >-
                  The index of the embedding in the list.
              embedding:
                type: array
                description: >-
                  The embedding vector.
                items:
                  type: number
        model:
          type: string
          description: >-
            The model used to generate the embeddings.
        usage:
          $ref: '#/components/schemas/UsageStats'

    ModelListResponse:
      type: object
      description: >-
        Response containing a list of available models in OpenAI format.
      properties:
        object:
          type: string
          description: >-
            The object type, always list.
          const: list
        data:
          type: array
          description: >-
            The list of model objects.
          items:
            $ref: '#/components/schemas/ModelObject'

    ModelObject:
      type: object
      description: >-
        Information about a model in OpenAI-compatible format.
      properties:
        id:
          type: string
          description: >-
            The model identifier.
        object:
          type: string
          description: >-
            The object type, always model.
          const: model
        created:
          type: integer
          description: >-
            Unix timestamp when the model was last modified.
        owned_by:
          type: string
          description: >-
            The organization that owns the model. Defaults to library.

    ImageGenerationRequest:
      type: object
      description: >-
        Request body for generating images in OpenAI-compatible format.
        This is an experimental endpoint.
      required:
        - model
        - prompt
      properties:
        model:
          type: string
          description: >-
            The model to use for image generation.
        prompt:
          type: string
          description: >-
            A text description of the desired image.
        size:
          type: string
          description: >-
            The size of the generated image, such as 1024x1024.
        response_format:
          type: string
          description: >-
            The format in which the image is returned. Only b64_json
            is supported.
          enum:
            - b64_json

    ImageGenerationResponse:
      type: object
      description: >-
        Response object from an image generation request.
      properties:
        created:
          type: integer
          description: >-
            Unix timestamp of when the image was created.
        data:
          type: array
          description: >-
            The generated image data.
          items:
            type: object
            properties:
              b64_json:
                type: string
                description: >-
                  The base64-encoded image data.

    ResponseRequest:
      type: object
      description: >-
        Request body for the Responses API in OpenAI-compatible format.
      required:
        - model
        - input
      properties:
        model:
          type: string
          description: >-
            The model to use for generating the response.
        input:
          description: >-
            The input to generate a response for. Can be a string or
            an array of input items.
          oneOf:
            - type: string
            - type: array
              items:
                type: object
                additionalProperties: true
        instructions:
          type: string
          description: >-
            System-level instructions for the model.
        tools:
          type: array
          description: >-
            A list of tools the model may call.
          items:
            $ref: '#/components/schemas/OpenAIToolDefinition'
        stream:
          type: boolean
          description: >-
            Whether to stream the response.
          default: false
        temperature:
          type: number
          description: >-
            Sampling temperature between 0 and 2.
          minimum: 0.0
          maximum: 2.0
        top_p:
          type: number
          description: >-
            Nucleus sampling parameter.
          minimum: 0.0
          maximum: 1.0
        max_output_tokens:
          type: integer
          description: >-
            Maximum number of output tokens to generate.

    ResponseObject:
      type: object
      description: >-
        Response object from the Responses API.
      properties:
        id:
          type: string
          description: >-
            A unique identifier for the response.
        object:
          type: string
          description: >-
            The object type.
        created_at:
          type: integer
          description: >-
            Unix timestamp of when the response was created.
        model:
          type: string
          description: >-
            The model used.
        output:
          type: array
          description: >-
            The output items generated by the model.
          items:
            type: object
            additionalProperties: true
        usage:
          $ref: '#/components/schemas/UsageStats'

    ResponseStreamEvent:
      type: object
      description: >-
        A streaming event from the Responses API.
      properties:
        type:
          type: string
          description: >-
            The type of streaming event.
        data:
          type: object
          description: >-
            The event data payload.
          additionalProperties: true

    OpenAIToolDefinition:
      type: object
      description: >-
        A tool definition in OpenAI-compatible format.
      required:
        - type
        - function
      properties:
        type:
          type: string
          description: >-
            The type of tool. Currently only function is supported.
          enum:
            - function
        function:
          type: object
          description: >-
            The function definition.
          required:
            - name
          properties:
            name:
              type: string
              description: >-
                The name of the function.
            description:
              type: string
              description: >-
                A description of what the function does.
            parameters:
              type: object
              description: >-
                The function parameters as a JSON Schema object.
              additionalProperties: true

    OpenAIToolCall:
      type: object
      description: >-
        A tool call generated by the model.
      properties:
        id:
          type: string
          description: >-
            A unique identifier for the tool call.
        type:
          type: string
          description: >-
            The type of tool call.
          enum:
            - function
        function:
          type: object
          description: >-
            The function call details.
          properties:
            name:
              type: string
              description: >-
                The name of the function to call.
            arguments:
              type: string
              description: >-
                The arguments to pass to the function as a JSON string.

    UsageStats:
      type: object
      description: >-
        Token usage statistics for the request.
      properties:
        prompt_tokens:
          type: integer
          description: >-
            Number of tokens in the prompt.
        completion_tokens:
          type: integer
          description: >-
            Number of tokens in the generated completion.
        total_tokens:
          type: integer
          description: >-
            Total number of tokens used in the request.