Cortex Inference API
OpenAPI 3.0 specification for the Cortex REST API.
OpenAPI 3.0 specification for the Cortex REST API.
openapi: 3.0.2
info:
title: Cortex Inference API
description: OpenAPI 3.0 specification for the Cortex REST API
version: 0.1.0
contact:
name: Snowflake, Inc.
url: https://snowflake.com
email: support@snowflake.com
paths:
/api/v2/cortex/models:
get:
summary: Returns the Llms Available for the Current Session
tags:
- cortex-inference
description: Returns the LLMs available for the current session
operationId: getModels
requestBody:
required: false
content:
application/json:
schema:
$ref: '#/components/schemas/GetModelsRequest'
examples:
GetmodelsRequestExample:
summary: Default getModels request
x-microcks-default: true
value:
models:
- example_value
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/GetModelsResponse'
examples:
Getmodels200Example:
summary: Default getModels 200 response
x-microcks-default: true
value:
models:
- example_value
'400':
$ref: common.yaml#/components/responses/400BadRequest
'401':
$ref: common.yaml#/components/responses/401Unauthorized
'403':
$ref: common.yaml#/components/responses/403Forbidden
'404':
$ref: common.yaml#/components/responses/404NotFound
'405':
$ref: common.yaml#/components/responses/405MethodNotAllowed
'500':
$ref: common.yaml#/components/responses/500InternalServerError
'503':
$ref: common.yaml#/components/responses/503ServiceUnavailable
'504':
$ref: common.yaml#/components/responses/504GatewayTimeout
x-microcks-operation:
delay: 0
dispatcher: FALLBACK
/api/v2/cortex/inference:complete:
post:
summary: Perform Llm Text Completion Inference.
tags:
- cortex-inference
description: Perform LLM text completion inference, similar to snowflake.cortex.Complete.
operationId: cortexLLMInferenceComplete
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CompleteRequest'
examples:
CortexllminferencecompleteRequestExample:
summary: Default cortexLLMInferenceComplete request
x-microcks-default: true
value:
model: example_value
messages:
- role: example_value
content: example_value
content_list:
- {}
temperature: 42.5
top_p: 42.5
max_tokens: 10
max_output_tokens: 10
response_format:
type: json
schema: example_value
guardrails:
enabled: true
response_when_unsafe: example_value
tools:
- {}
tool_choice: {}
provisioned_throughput_id: '500123'
sf-ml-xp-inflight-prompt-action: example_value
sf-ml-xp-inflight-prompt-client-id: '500123'
sf-ml-xp-inflight-prompt-public-key: example_value
stream: true
responses:
'200':
description: OK
content:
text/event-stream:
schema:
$ref: '#/components/schemas/StreamingCompleteResponse'
examples:
Cortexllminferencecomplete200Example:
summary: Default cortexLLMInferenceComplete 200 response
x-microcks-default: true
value: {}
'400':
$ref: common.yaml#/components/responses/400BadRequest
'401':
$ref: common.yaml#/components/responses/401Unauthorized
'403':
$ref: common.yaml#/components/responses/403Forbidden
'404':
$ref: common.yaml#/components/responses/404NotFound
'405':
$ref: common.yaml#/components/responses/405MethodNotAllowed
'500':
$ref: common.yaml#/components/responses/500InternalServerError
'503':
$ref: common.yaml#/components/responses/503ServiceUnavailable
'504':
$ref: common.yaml#/components/responses/504GatewayTimeout
x-microcks-operation:
delay: 0
dispatcher: FALLBACK
components:
schemas:
GetModelsRequest:
type: object
properties:
models:
type: array
items:
type: string
example: []
GetModelsResponse:
type: object
properties:
models:
type: array
items:
type: string
example: []
CompleteRequest:
type: object
description: LLM text completion request.
properties:
model:
description: The model name. See documentation for possible values.
type: string
example: example_value
messages:
type: array
items:
type: object
properties:
role:
type: string
description: "Indicates the role of the message, one of 'system', 'user' or 'assistant'.\n\nRules:\n - A 'user' message must be the last message in the list.\n - If a 'system' message
is specified, it must be the first message.\n - If a 'assistant' message is specified, it must be immediately before a 'user' message in the list.\n\nMultiple 'assistant' and 'user' messages
can be specified, but they must alternate in sequence.\n"
default: user
content:
type: string
description: The text completion prompt, e.g. 'What is a Large Language Model?'.
content_list:
type: array
description: Contents of toolUse and toolResults
items:
discriminator:
propertyName: type
mapping:
text: common-cortex-tool.yaml#/components/schemas/TextContent
tool_result: common-cortex-tool.yaml#/components/schemas/ToolResults
tool_use: common-cortex-tool.yaml#/components/schemas/ToolUse
required:
- content
minItems: 1
example: []
temperature:
description: Temperature controls the amount of randomness used in response generation. A higher temperature corresponds to more randomness.
type: number
nullable: true
minimum: 0.0
example: 42.5
top_p:
description: Threshold probability for nucleus sampling. A higher top-p value increases the diversity of tokens that the model considers, while a lower value results in more predictable
output.
type: number
default: 1.0
minimum: 0.0
maximum: 1.0
example: 42.5
max_tokens:
description: The maximum number of output tokens to produce. The default value is model-dependent.
type: integer
default: 4096
minimum: 0
example: 10
max_output_tokens:
deprecated: true
description: Deprecated in favor of "max_tokens", which has identical behavior.
type: integer
nullable: true
example: 10
response_format:
type: object
nullable: true
description: An object describing response format config for structured-output mode.
properties:
type:
type: string
enum:
- json
description: The response format type (e.g., "json").
schema:
type: object
description: The schema defining the structure of the response. If the `type` is "json", the `schema` field should contain a valid JSON schema.
example: example_value
guardrails:
$ref: '#/components/schemas/GuardrailsConfig'
tools:
description: List of tools to be used during tool calling
type: array
items:
$ref: common-cortex-tool.yaml#/components/schemas/Tool
example: []
tool_choice:
$ref: common-cortex-tool.yaml#/components/schemas/ToolChoice
provisioned_throughput_id:
type: string
description: The provisioned throughput ID to be used with the request.
nullable: true
example: '500123'
sf-ml-xp-inflight-prompt-action:
type: string
description: Reserved
example: example_value
sf-ml-xp-inflight-prompt-client-id:
type: string
description: Reserved
example: '500123'
sf-ml-xp-inflight-prompt-public-key:
type: string
description: Reserved
example: example_value
stream:
type: boolean
default: true
nullable: true
description: Reserved
example: true
required:
- model
- messages
GuardrailsConfig:
type: object
title: GuardrailsConfig
description: Guardrails configuration
nullable: true
properties:
enabled:
type: boolean
description: Controls whether guardrails are enabled
example: true
response_when_unsafe:
type: string
description: The response when the guardrails model marks the completion as unsafe
example: Response filtered by Cortex Guard
NonStreamingCompleteResponse:
type: object
description: Text-completion response for non-streaming request.
properties:
choices:
type: array
items:
type: object
properties:
message:
type: object
properties:
content:
type: string
description: The text completion response.
content_list:
type: array
description: Contents of text and toolUse response.
items:
discriminator:
propertyName: type
mapping:
text: common-cortex-tool.yaml#/components/schemas/TextContent
tool_use: common-cortex-tool.yaml#/components/schemas/ToolUse
example: []
usage:
type: object
title: Usage
properties:
prompt_tokens:
type: integer
description: Input token count.
completion_tokens:
type: integer
description: Output token count.
guard_tokens:
type: integer
description: Tokens used by cortex guard.
total_tokens:
type: integer
description: Sum of all tokens.
example: example_value
StreamingCompleteResponse:
type: object
description: Server-sent events for streaming text-completion updates.
x-events:
data:
$ref: '#/components/schemas/StreamingCompleteResponseDataEvent'
StreamingCompleteResponseDataEvent:
type: object
description: Streaming text-completion response event.
properties:
choices:
type: array
items:
type: object
properties:
delta:
$ref: '#/components/schemas/StreamingCompleteResponseDelta'
example: []
StreamingCompleteResponseDelta:
type: object
required:
- type
discriminator:
propertyName: type
mapping:
text: common-cortex-tool.yaml#/components/schemas/StreamingTextContent
tool_use: common-cortex-tool.yaml#/components/schemas/StreamingToolUse
securitySchemes:
KeyPair:
$ref: common.yaml#/components/securitySchemes/KeyPair
ExternalOAuth:
$ref: common.yaml#/components/securitySchemes/ExternalOAuth
SnowflakeOAuth:
$ref: common.yaml#/components/securitySchemes/SnowflakeOAuth
security:
- KeyPair: []
- ExternalOAuth: []
- SnowflakeOAuth: []