Triton Inference Server · JSON Structure
Triton Model Structure
JSON structure documentation for Triton model configuration and inference objects
Type:
Properties: 0
AIDeep LearningInferenceMachine LearningModel ServingNVIDIAOpen Source
Triton Model Structure is a JSON Structure definition published by Triton Inference Server.
Meta-schema:
JSON Structure
{
"title": "Triton Inference Server Model Structure",
"description": "JSON structure documentation for Triton model configuration and inference objects",
"version": "2.0.0",
"structures": [
{
"name": "Model",
"description": "A machine learning model managed by NVIDIA Triton Inference Server",
"fields": [
{ "name": "name", "type": "string", "required": true, "description": "Unique model name" },
{ "name": "platform", "type": "string", "required": false, "description": "Framework platform (tensorrt_plan, onnxruntime_onnx, pytorch_libtorch, etc.)" },
{ "name": "backend", "type": "string", "required": false, "description": "Inference backend" },
{ "name": "version_policy", "type": "object", "required": false, "description": "Version selection policy (latest, all, specific)" },
{ "name": "max_batch_size", "type": "integer", "required": false, "description": "Max batch size (0 = disabled)" },
{ "name": "input", "type": "array[TensorConfig]", "required": false, "description": "Input tensor definitions" },
{ "name": "output", "type": "array[TensorConfig]", "required": false, "description": "Output tensor definitions" },
{ "name": "instance_group", "type": "array[InstanceGroup]", "required": false, "description": "Device deployment groups" },
{ "name": "dynamic_batching", "type": "object", "required": false, "description": "Dynamic batching config" },
{ "name": "sequence_batching", "type": "object", "required": false, "description": "Sequence batching config for stateful models" },
{ "name": "ensemble_scheduling", "type": "object", "required": false, "description": "Ensemble pipeline config" }
]
},
{
"name": "InferenceRequest",
"description": "Request payload for model inference",
"fields": [
{ "name": "id", "type": "string", "required": false, "description": "Request identifier" },
{ "name": "inputs", "type": "array[InferenceTensor]", "required": true, "description": "Input tensors" },
{ "name": "outputs", "type": "array[RequestedOutputTensor]", "required": false, "description": "Requested output tensors" },
{ "name": "parameters", "type": "object", "required": false, "description": "Request parameters (sequence_id, priority, timeout)" }
]
},
{
"name": "InferenceTensor",
"description": "Input tensor for an inference request",
"fields": [
{ "name": "name", "type": "string", "required": true, "description": "Tensor name" },
{ "name": "shape", "type": "array[integer]", "required": true, "description": "Tensor shape" },
{ "name": "datatype", "type": "string", "required": true, "description": "Data type (FP32, INT64, BYTES, etc.)" },
{ "name": "data", "type": "array", "required": true, "description": "Flattened tensor data" },
{ "name": "parameters", "type": "object", "required": false, "description": "Tensor-level parameters" }
]
},
{
"name": "TensorConfig",
"description": "Model tensor configuration from config.pbtxt",
"fields": [
{ "name": "name", "type": "string", "required": true, "description": "Tensor name" },
{ "name": "data_type", "type": "string", "required": true, "description": "Data type (TYPE_FP32, TYPE_INT64, etc.)" },
{ "name": "dims", "type": "array[integer]", "required": true, "description": "Tensor dimensions (-1 for variable)" }
]
},
{
"name": "RepositoryIndexEntry",
"description": "Model entry in the repository index",
"fields": [
{ "name": "name", "type": "string", "required": true, "description": "Model name" },
{ "name": "version", "type": "string", "required": false, "description": "Model version" },
{ "name": "state", "type": "string", "required": false, "description": "State: READY, UNAVAILABLE, LOADING, UNLOADING" },
{ "name": "reason", "type": "string", "required": false, "description": "Reason for non-READY state" }
]
}
]
}