Snowflake
Cortex Inference API

OpenAPI 3.0 specification for the Cortex REST API.
Documentation GitHub OpenAPI
OpenAPI Specification

openapi: 3.0.2
info:
  title: Cortex Inference API
  description: OpenAPI 3.0 specification for the Cortex REST API
  version: 0.1.0
  contact:
    name: Snowflake, Inc.
    url: https://snowflake.com
    email: support@snowflake.com
paths:
  /api/v2/cortex/models:
    get:
      summary: Returns the Llms Available for the Current Session
      tags:
      - cortex-inference
      description: Returns the LLMs available for the current session
      operationId: getModels
      requestBody:
        required: false
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GetModelsRequest'
            examples:
              GetmodelsRequestExample:
                summary: Default getModels request
                x-microcks-default: true
                value:
                  models:
                  - example_value
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GetModelsResponse'
              examples:
                Getmodels200Example:
                  summary: Default getModels 200 response
                  x-microcks-default: true
                  value:
                    models:
                    - example_value
        '400':
          $ref: common.yaml#/components/responses/400BadRequest
        '401':
          $ref: common.yaml#/components/responses/401Unauthorized
        '403':
          $ref: common.yaml#/components/responses/403Forbidden
        '404':
          $ref: common.yaml#/components/responses/404NotFound
        '405':
          $ref: common.yaml#/components/responses/405MethodNotAllowed
        '500':
          $ref: common.yaml#/components/responses/500InternalServerError
        '503':
          $ref: common.yaml#/components/responses/503ServiceUnavailable
        '504':
          $ref: common.yaml#/components/responses/504GatewayTimeout
      x-microcks-operation:
        delay: 0
        dispatcher: FALLBACK
  /api/v2/cortex/inference:complete:
    post:
      summary: Perform Llm Text Completion Inference.
      tags:
      - cortex-inference
      description: Perform LLM text completion inference, similar to snowflake.cortex.Complete.
      operationId: cortexLLMInferenceComplete
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CompleteRequest'
            examples:
              CortexllminferencecompleteRequestExample:
                summary: Default cortexLLMInferenceComplete request
                x-microcks-default: true
                value:
                  model: example_value
                  messages:
                  - role: example_value
                    content: example_value
                    content_list:
                    - {}
                  temperature: 42.5
                  top_p: 42.5
                  max_tokens: 10
                  max_output_tokens: 10
                  response_format:
                    type: json
                    schema: example_value
                  guardrails:
                    enabled: true
                    response_when_unsafe: example_value
                  tools:
                  - {}
                  tool_choice: {}
                  provisioned_throughput_id: '500123'
                  sf-ml-xp-inflight-prompt-action: example_value
                  sf-ml-xp-inflight-prompt-client-id: '500123'
                  sf-ml-xp-inflight-prompt-public-key: example_value
                  stream: true
      responses:
        '200':
          description: OK
          content:
            text/event-stream:
              schema:
                $ref: '#/components/schemas/StreamingCompleteResponse'
              examples:
                Cortexllminferencecomplete200Example:
                  summary: Default cortexLLMInferenceComplete 200 response
                  x-microcks-default: true
                  value: {}
        '400':
          $ref: common.yaml#/components/responses/400BadRequest
        '401':
          $ref: common.yaml#/components/responses/401Unauthorized
        '403':
          $ref: common.yaml#/components/responses/403Forbidden
        '404':
          $ref: common.yaml#/components/responses/404NotFound
        '405':
          $ref: common.yaml#/components/responses/405MethodNotAllowed
        '500':
          $ref: common.yaml#/components/responses/500InternalServerError
        '503':
          $ref: common.yaml#/components/responses/503ServiceUnavailable
        '504':
          $ref: common.yaml#/components/responses/504GatewayTimeout
      x-microcks-operation:
        delay: 0
        dispatcher: FALLBACK
components:
  schemas:
    GetModelsRequest:
      type: object
      properties:
        models:
          type: array
          items:
            type: string
          example: []
    GetModelsResponse:
      type: object
      properties:
        models:
          type: array
          items:
            type: string
          example: []
    CompleteRequest:
      type: object
      description: LLM text completion request.
      properties:
        model:
          description: The model name. See documentation for possible values.
          type: string
          example: example_value
        messages:
          type: array
          items:
            type: object
            properties:
              role:
                type: string
                description: "Indicates the role of the message, one of 'system', 'user' or 'assistant'.\n\nRules:\n  - A 'user' message must be the last message in the list.\n  - If a 'system' message
                  is specified, it must be the first message.\n  - If a 'assistant' message is specified, it must be immediately before a 'user' message in the list.\n\nMultiple 'assistant' and 'user' messages
                  can be specified, but they must alternate in sequence.\n"
                default: user
              content:
                type: string
                description: The text completion prompt, e.g. 'What is a Large Language Model?'.
              content_list:
                type: array
                description: Contents of toolUse and toolResults
                items:
                  discriminator:
                    propertyName: type
                    mapping:
                      text: common-cortex-tool.yaml#/components/schemas/TextContent
                      tool_result: common-cortex-tool.yaml#/components/schemas/ToolResults
                      tool_use: common-cortex-tool.yaml#/components/schemas/ToolUse
            required:
            - content
          minItems: 1
          example: []
        temperature:
          description: Temperature controls the amount of randomness used in response generation. A higher temperature corresponds to more randomness.
          type: number
          nullable: true
          minimum: 0.0
          example: 42.5
        top_p:
          description: Threshold probability for nucleus sampling. A higher top-p value increases the diversity of tokens that the model considers, while a lower value results in more predictable 
            output.
          type: number
          default: 1.0
          minimum: 0.0
          maximum: 1.0
          example: 42.5
        max_tokens:
          description: The maximum number of output tokens to produce. The default value is model-dependent.
          type: integer
          default: 4096
          minimum: 0
          example: 10
        max_output_tokens:
          deprecated: true
          description: Deprecated in favor of "max_tokens", which has identical behavior.
          type: integer
          nullable: true
          example: 10
        response_format:
          type: object
          nullable: true
          description: An object describing response format config for structured-output mode.
          properties:
            type:
              type: string
              enum:
              - json
              description: The response format type (e.g., "json").
            schema:
              type: object
              description: The schema defining the structure of the response. If the `type` is "json", the `schema` field should contain a valid JSON schema.
          example: example_value
        guardrails:
          $ref: '#/components/schemas/GuardrailsConfig'
        tools:
          description: List of tools to be used during tool calling
          type: array
          items:
            $ref: common-cortex-tool.yaml#/components/schemas/Tool
          example: []
        tool_choice:
          $ref: common-cortex-tool.yaml#/components/schemas/ToolChoice
        provisioned_throughput_id:
          type: string
          description: The provisioned throughput ID to be used with the request.
          nullable: true
          example: '500123'
        sf-ml-xp-inflight-prompt-action:
          type: string
          description: Reserved
          example: example_value
        sf-ml-xp-inflight-prompt-client-id:
          type: string
          description: Reserved
          example: '500123'
        sf-ml-xp-inflight-prompt-public-key:
          type: string
          description: Reserved
          example: example_value
        stream:
          type: boolean
          default: true
          nullable: true
          description: Reserved
          example: true
      required:
      - model
      - messages
    GuardrailsConfig:
      type: object
      title: GuardrailsConfig
      description: Guardrails configuration
      nullable: true
      properties:
        enabled:
          type: boolean
          description: Controls whether guardrails are enabled
          example: true
        response_when_unsafe:
          type: string
          description: The response when the guardrails model marks the completion as unsafe
          example: Response filtered by Cortex Guard
    NonStreamingCompleteResponse:
      type: object
      description: Text-completion response for non-streaming request.
      properties:
        choices:
          type: array
          items:
            type: object
            properties:
              message:
                type: object
                properties:
                  content:
                    type: string
                    description: The text completion response.
                  content_list:
                    type: array
                    description: Contents of text and toolUse response.
                    items:
                      discriminator:
                        propertyName: type
                        mapping:
                          text: common-cortex-tool.yaml#/components/schemas/TextContent
                          tool_use: common-cortex-tool.yaml#/components/schemas/ToolUse
          example: []
        usage:
          type: object
          title: Usage
          properties:
            prompt_tokens:
              type: integer
              description: Input token count.
            completion_tokens:
              type: integer
              description: Output token count.
            guard_tokens:
              type: integer
              description: Tokens used by cortex guard.
            total_tokens:
              type: integer
              description: Sum of all tokens.
          example: example_value
    StreamingCompleteResponse:
      type: object
      description: Server-sent events for streaming text-completion updates.
      x-events:
        data:
          $ref: '#/components/schemas/StreamingCompleteResponseDataEvent'
    StreamingCompleteResponseDataEvent:
      type: object
      description: Streaming text-completion response event.
      properties:
        choices:
          type: array
          items:
            type: object
            properties:
              delta:
                $ref: '#/components/schemas/StreamingCompleteResponseDelta'
          example: []
    StreamingCompleteResponseDelta:
      type: object
      required:
      - type
      discriminator:
        propertyName: type
        mapping:
          text: common-cortex-tool.yaml#/components/schemas/StreamingTextContent
          tool_use: common-cortex-tool.yaml#/components/schemas/StreamingToolUse
  securitySchemes:
    KeyPair:
      $ref: common.yaml#/components/securitySchemes/KeyPair
    ExternalOAuth:
      $ref: common.yaml#/components/securitySchemes/ExternalOAuth
    SnowflakeOAuth:
      $ref: common.yaml#/components/securitySchemes/SnowflakeOAuth
security:
- KeyPair: []
- ExternalOAuth: []
- SnowflakeOAuth: []
Cortex Inference API

Documentation

Specifications

Other Resources

OpenAPI Specification