LangDB Embeddings API

OpenAI-compatible embeddings endpoint that creates vector representations of input text or token arrays via any embedding model routed through the gateway.

OpenAPI Specification

langdb-openapi.yml Raw ↑
openapi: 3.0.1
info:
  title: LangDB AI Gateway API
  description: >-
    OpenAI-compatible REST API for the LangDB AI Gateway. A single,
    project-scoped endpoint routes chat completions, embeddings, and image
    generation across 250+ models from providers such as OpenAI, Anthropic,
    Google, Meta, Mistral, and DeepSeek, while adding routing, guardrails,
    tracing, cost control, and an MCP (Model Context Protocol) gateway.
    Requests are authenticated with a Bearer API key and scoped to a project
    either by embedding the project id in the path
    (`/{project_id}/v1/...`) or by sending an `X-Project-Id` header. Tracing
    and session headers (`X-Thread-Id`, `X-Run-Id`, `X-Label`) attach
    observability metadata to each call.
  termsOfService: https://langdb.ai/terms
  contact:
    name: LangDB Support
    url: https://langdb.ai
    email: support@langdb.ai
  version: '1.0'
servers:
  - url: https://api.us-east-1.langdb.ai/{project_id}/v1
    description: Project-scoped OpenAI-compatible base (US East 1).
    variables:
      project_id:
        default: your-langdb-project-id
        description: LangDB project id. May instead be supplied via the X-Project-Id header.
  - url: https://api.us-east-1.langdb.ai
    description: Root base URL for analytics, usage, and thread management endpoints (US East 1).
security:
  - bearerAuth: []
tags:
  - name: Chat
    description: OpenAI-compatible chat completions and model routing.
  - name: Embeddings
    description: Vector embeddings for input text.
  - name: Images
    description: Image generation.
  - name: Models
    description: Catalog of models available through the gateway.
  - name: Threads
    description: Conversation threads, their messages, and per-thread cost.
  - name: Analytics
    description: Usage analytics and cost summaries.
paths:
  /chat/completions:
    post:
      operationId: createChatCompletion
      tags:
        - Chat
      summary: Create a chat completion
      description: >-
        OpenAI-compatible chat completion across any model routable by the
        gateway (e.g. `openai/gpt-4o`, `anthropic/claude-sonnet-4`,
        `gemini/gemini-2.5-pro`). Supports streaming via Server-Sent Events
        when `stream` is true, tool/function calling, structured outputs, and
        attaching MCP servers through the `mcp_servers` array. Tracing
        metadata can be attached via the X-Thread-Id, X-Run-Id, and X-Label
        headers.
      parameters:
        - $ref: '#/components/parameters/ProjectIdHeader'
        - $ref: '#/components/parameters/ThreadIdHeader'
        - $ref: '#/components/parameters/RunIdHeader'
        - $ref: '#/components/parameters/LabelHeader'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ChatCompletionRequest'
      responses:
        '200':
          description: A chat completion, or an SSE stream when stream is true.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponse'
            text/event-stream:
              schema:
                type: string
                description: 'Server-Sent Events stream of ChatCompletionChunk objects terminated by `data: [DONE]`.'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '429':
          $ref: '#/components/responses/RateLimited'
  /embeddings:
    post:
      operationId: createEmbedding
      tags:
        - Embeddings
      summary: Create embeddings
      description: Creates an embedding vector representing the input text or token array.
      parameters:
        - $ref: '#/components/parameters/ProjectIdHeader'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EmbeddingRequest'
      responses:
        '200':
          description: A list of embedding vectors.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EmbeddingResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '429':
          $ref: '#/components/responses/RateLimited'
  /images/generations:
    post:
      operationId: createImage
      tags:
        - Images
      summary: Create an image
      description: Generates one or more images from a text prompt using an image-capable model routed through the gateway.
      parameters:
        - $ref: '#/components/parameters/ProjectIdHeader'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ImageGenerationRequest'
      responses:
        '200':
          description: The generated image(s).
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ImageGenerationResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '429':
          $ref: '#/components/responses/RateLimited'
  /models:
    get:
      operationId: listModels
      tags:
        - Models
      summary: List models
      description: Lists the models available to the project through the gateway, in an OpenAI-compatible shape.
      parameters:
        - $ref: '#/components/parameters/ProjectIdHeader'
      responses:
        '200':
          description: The list of available models.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ModelList'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /threads/{thread_id}/messages:
    get:
      operationId: getThreadMessages
      tags:
        - Threads
      summary: Get messages for a thread
      description: Retrieves the ordered messages belonging to a conversation thread.
      parameters:
        - $ref: '#/components/parameters/ProjectIdHeader'
        - name: thread_id
          in: path
          required: true
          description: The thread identifier.
          schema:
            type: string
      responses:
        '200':
          description: The thread's messages.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ThreadMessageList'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /threads/{thread_id}/cost:
    get:
      operationId: getThreadCost
      tags:
        - Threads
      summary: Get the cost of a thread
      description: Returns the aggregated cost and token usage for a conversation thread.
      parameters:
        - $ref: '#/components/parameters/ProjectIdHeader'
        - name: thread_id
          in: path
          required: true
          description: The thread identifier.
          schema:
            type: string
      responses:
        '200':
          description: The thread's cost summary.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ThreadCost'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /analytics:
    post:
      operationId: fetchAnalytics
      tags:
        - Analytics
      summary: Fetch analytics data
      description: Returns time-series analytics for the project, filterable by a preset interval such as last_day, last_week, or last_month.
      parameters:
        - $ref: '#/components/parameters/ProjectIdHeader'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/AnalyticsRequest'
      responses:
        '200':
          description: Analytics rows for the requested window.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AnalyticsResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /analytics/summary:
    post:
      operationId: fetchAnalyticsSummary
      tags:
        - Analytics
      summary: Fetch analytics summary
      description: Returns an aggregated analytics summary (totals for requests, tokens, and cost) for the requested window.
      parameters:
        - $ref: '#/components/parameters/ProjectIdHeader'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/AnalyticsRequest'
      responses:
        '200':
          description: Aggregated analytics summary.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AnalyticsSummary'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /usage/total:
    post:
      operationId: getTotalUsage
      tags:
        - Analytics
      summary: Get total usage
      description: Returns total usage (cost, input tokens, output tokens) for the project over the requested window.
      parameters:
        - $ref: '#/components/parameters/ProjectIdHeader'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/AnalyticsRequest'
      responses:
        '200':
          description: Total usage figures.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/UsageTotal'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /usage/models:
    post:
      operationId: getUsageByModel
      tags:
        - Analytics
      summary: Get usage by model
      description: Returns usage broken down per model for the project over the requested window.
      parameters:
        - $ref: '#/components/parameters/ProjectIdHeader'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/AnalyticsRequest'
      responses:
        '200':
          description: Per-model usage figures.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/UsageByModel'
        '401':
          $ref: '#/components/responses/Unauthorized'
components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: JWT
      description: 'LangDB API key (project access token) sent as `Authorization: Bearer <token>`.'
  parameters:
    ProjectIdHeader:
      name: X-Project-Id
      in: header
      required: false
      description: LangDB project id. Optional when the project id is embedded in the request path.
      schema:
        type: string
    ThreadIdHeader:
      name: X-Thread-Id
      in: header
      required: false
      description: Groups related requests under the same conversation thread for tracing and session continuity.
      schema:
        type: string
    RunIdHeader:
      name: X-Run-Id
      in: header
      required: false
      description: Tracks a single workflow execution (model call or tool invocation) for observability.
      schema:
        type: string
    LabelHeader:
      name: X-Label
      in: header
      required: false
      description: Attaches a custom label/tag to the model call for categorization and tracing.
      schema:
        type: string
  responses:
    Unauthorized:
      description: Missing or invalid API key.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
    RateLimited:
      description: Rate limit or cost limit exceeded.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
  schemas:
    ChatCompletionRequest:
      type: object
      required:
        - model
        - messages
      properties:
        model:
          type: string
          description: Provider-prefixed model identifier (e.g. openai/gpt-4o, anthropic/claude-sonnet-4) or a LangDB virtual model name.
          example: openai/gpt-4o-mini
        messages:
          type: array
          items:
            $ref: '#/components/schemas/ChatMessage'
        temperature:
          type: number
          format: float
          default: 1
        top_p:
          type: number
          format: float
        max_tokens:
          type: integer
        stream:
          type: boolean
          default: false
          description: When true, partial deltas are streamed as Server-Sent Events.
        stop:
          oneOf:
            - type: string
            - type: array
              items:
                type: string
        tools:
          type: array
          description: Function/tool definitions in OpenAI tool-calling format.
          items:
            type: object
        tool_choice:
          oneOf:
            - type: string
            - type: object
        response_format:
          type: object
          description: 'Structured-output controls (e.g. a JSON object type).'
        mcp_servers:
          type: array
          description: MCP servers to attach to the request, exposing their tools to the model.
          items:
            $ref: '#/components/schemas/McpServer'
        router:
          type: object
          description: LangDB routing configuration (fallbacks, load balancing, model targets).
        extra:
          type: object
          description: Additional gateway-specific routing or guardrail options.
    ChatMessage:
      type: object
      required:
        - role
        - content
      properties:
        role:
          type: string
          enum: [system, user, assistant, tool]
        content:
          oneOf:
            - type: string
            - type: array
              items:
                type: object
        name:
          type: string
        tool_call_id:
          type: string
    McpServer:
      type: object
      properties:
        slug:
          type: string
        name:
          type: string
        type:
          type: string
          enum: [sse, ws]
          description: Transport type for the MCP server.
        server_url:
          type: string
          format: uri
    ChatCompletionResponse:
      type: object
      properties:
        id:
          type: string
        object:
          type: string
          example: chat.completion
        created:
          type: integer
        model:
          type: string
        choices:
          type: array
          items:
            type: object
            properties:
              index:
                type: integer
              message:
                $ref: '#/components/schemas/ChatMessage'
              finish_reason:
                type: string
        usage:
          $ref: '#/components/schemas/Usage'
    EmbeddingRequest:
      type: object
      required:
        - model
        - input
      properties:
        model:
          type: string
          example: openai/text-embedding-3-small
        input:
          oneOf:
            - type: string
            - type: array
              items:
                type: string
        encoding_format:
          type: string
          enum: [float, base64]
    EmbeddingResponse:
      type: object
      properties:
        object:
          type: string
          example: list
        data:
          type: array
          items:
            type: object
            properties:
              object:
                type: string
                example: embedding
              index:
                type: integer
              embedding:
                type: array
                items:
                  type: number
        model:
          type: string
        usage:
          $ref: '#/components/schemas/Usage'
    ImageGenerationRequest:
      type: object
      required:
        - model
        - prompt
      properties:
        model:
          type: string
          example: openai/gpt-image-1
        prompt:
          type: string
        n:
          type: integer
          default: 1
        size:
          type: string
          example: 1024x1024
        response_format:
          type: string
          enum: [url, b64_json]
    ImageGenerationResponse:
      type: object
      properties:
        created:
          type: integer
        data:
          type: array
          items:
            type: object
            properties:
              url:
                type: string
                format: uri
              b64_json:
                type: string
    ModelList:
      type: object
      properties:
        object:
          type: string
          example: list
        data:
          type: array
          items:
            $ref: '#/components/schemas/Model'
    Model:
      type: object
      properties:
        id:
          type: string
          example: openai/gpt-4o
        object:
          type: string
          example: model
        owned_by:
          type: string
        provider:
          type: string
    ThreadMessageList:
      type: object
      properties:
        thread_id:
          type: string
        messages:
          type: array
          items:
            $ref: '#/components/schemas/ChatMessage'
    ThreadCost:
      type: object
      properties:
        thread_id:
          type: string
        total_cost:
          type: number
        input_tokens:
          type: integer
        output_tokens:
          type: integer
    AnalyticsRequest:
      type: object
      properties:
        interval:
          type: string
          description: Preset reporting window.
          enum: [last_day, last_week, last_month]
        start_time:
          type: string
          format: date-time
        end_time:
          type: string
          format: date-time
        group_by:
          type: array
          items:
            type: string
    AnalyticsResponse:
      type: object
      properties:
        rows:
          type: array
          items:
            type: object
            properties:
              timestamp:
                type: string
                format: date-time
              requests:
                type: integer
              total_tokens:
                type: integer
              cost:
                type: number
    AnalyticsSummary:
      type: object
      properties:
        total_requests:
          type: integer
        total_tokens:
          type: integer
        total_cost:
          type: number
    UsageTotal:
      type: object
      properties:
        total_cost:
          type: number
        input_tokens:
          type: integer
        output_tokens:
          type: integer
    UsageByModel:
      type: object
      properties:
        models:
          type: array
          items:
            type: object
            properties:
              model:
                type: string
              total_cost:
                type: number
              input_tokens:
                type: integer
              output_tokens:
                type: integer
    Usage:
      type: object
      properties:
        prompt_tokens:
          type: integer
        completion_tokens:
          type: integer
        total_tokens:
          type: integer
    Error:
      type: object
      properties:
        error:
          type: object
          properties:
            message:
              type: string
            type:
              type: string
            code:
              type: string