Letta · JSON Structure
Letta Llm Config Structure
Flat structure summary of the Letta LLMConfig entity, derived from the Letta OpenAPI 1.0.0 spec.
Type:
Properties: 0
AIAgentsStateful AgentsMemoryMemGPTContinual LearningMCPMulti-AgentRAGOpen Source
letta-llm-config is a JSON Structure definition published by Letta.
Meta-schema:
JSON Structure
{
"jsonStructure": "0.1",
"name": "letta-llm-config",
"title": "Letta LLMConfig",
"description": "Flat structure summary of the Letta LLMConfig entity, derived from the Letta OpenAPI 1.0.0 spec.",
"source": "https://raw.githubusercontent.com/letta-ai/letta/main/fern/openapi.json",
"entity": "LLMConfig",
"fields": [
{
"name": "model",
"type": "string",
"required": true,
"description": "LLM model name. ",
"enum": null,
"format": null
},
{
"name": "display_name",
"type": "object",
"required": false,
"description": "A human-friendly display name for the model.",
"enum": null,
"format": null
},
{
"name": "model_endpoint_type",
"type": "string",
"required": true,
"description": "The endpoint type for the model.",
"enum": [
"openai",
"anthropic",
"google_ai",
"google_vertex",
"azure",
"groq",
"ollama",
"webui",
"webui-legacy",
"lmstudio",
"lmstudio-legacy",
"lmstudio-chatcompletions",
"llamacpp",
"koboldcpp",
"vllm",
"hugging-face",
"minimax",
"mistral",
"together",
"bedrock",
"deepseek",
"xai",
"zai",
"zai_coding",
"baseten",
"fireworks",
"openrouter",
"chatgpt_oauth"
],
"format": null
},
{
"name": "model_endpoint",
"type": "object",
"required": false,
"description": "The endpoint for the model.",
"enum": null,
"format": null
},
{
"name": "provider_name",
"type": "object",
"required": false,
"description": "The provider name for the model.",
"enum": null,
"format": null
},
{
"name": "provider_category",
"type": "object",
"required": false,
"description": "The provider category for the model.",
"enum": null,
"format": null
},
{
"name": "model_wrapper",
"type": "object",
"required": false,
"description": "The wrapper for the model.",
"enum": null,
"format": null
},
{
"name": "context_window",
"type": "integer",
"required": true,
"description": "The context window size for the model.",
"enum": null,
"format": null
},
{
"name": "put_inner_thoughts_in_kwargs",
"type": "object",
"required": false,
"description": "Puts 'inner_thoughts' as a kwarg in the function call if this is set to True. This helps with function calling performance and also the generation of inner thoughts.",
"enum": null,
"format": null
},
{
"name": "handle",
"type": "object",
"required": false,
"description": "The handle for this config, in the format provider/model-name.",
"enum": null,
"format": null
},
{
"name": "temperature",
"type": "number",
"required": false,
"description": "The temperature to use when generating text with the model. A higher temperature will result in more random text.",
"enum": null,
"format": null
},
{
"name": "max_tokens",
"type": "object",
"required": false,
"description": "The maximum number of tokens to generate. If not set, the model will use its default value.",
"enum": null,
"format": null
},
{
"name": "enable_reasoner",
"type": "boolean",
"required": false,
"description": "Whether or not the model should use extended thinking if it is a 'reasoning' style model",
"enum": null,
"format": null
},
{
"name": "reasoning_effort",
"type": "object",
"required": false,
"description": "The reasoning effort to use when generating text reasoning models",
"enum": null,
"format": null
},
{
"name": "max_reasoning_tokens",
"type": "integer",
"required": false,
"description": "Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.",
"enum": null,
"format": null
},
{
"name": "effort",
"type": "object",
"required": false,
"description": "The effort level for Anthropic models that support it (Opus 4.5, Opus 4.6). Controls token spending and thinking behavior. Not setting this gives similar performance to 'high'.",
"enum": null,
"format": null
},
{
"name": "frequency_penalty",
"type": "object",
"required": false,
"description": "Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. From OpenAI: Number between -2.0 and 2.0.",
"enum": null,
"format": null
},
{
"name": "compatibility_type",
"type": "object",
"required": false,
"description": "The framework compatibility type for the model.",
"enum": null,
"format": null
},
{
"name": "verbosity",
"type": "object",
"required": false,
"description": "Soft control for how verbose model output should be, used for GPT-5 models.",
"enum": null,
"format": null
},
{
"name": "tier",
"type": "object",
"required": false,
"description": "The cost tier for the model (cloud only).",
"enum": null,
"format": null
},
{
"name": "parallel_tool_calls",
"type": "object",
"required": false,
"description": "Deprecated: Use model_settings to configure parallel tool calls instead. If set to True, enables parallel tool calling. Defaults to False.",
"enum": null,
"format": null
},
{
"name": "response_format",
"type": "object",
"required": false,
"description": "The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings.",
"enum": null,
"format": null
},
{
"name": "strict",
"type": "boolean",
"required": false,
"description": "Enable strict mode for tool calling. When true, tool schemas include strict: true and additionalProperties: false, guaranteeing tool outputs match JSON schemas.",
"enum": null,
"format": null
},
{
"name": "return_logprobs",
"type": "boolean",
"required": false,
"description": "Whether to return log probabilities of the output tokens. Useful for RL training.",
"enum": null,
"format": null
},
{
"name": "top_logprobs",
"type": "object",
"required": false,
"description": "Number of most likely tokens to return at each position (0-20). Requires return_logprobs=True.",
"enum": null,
"format": null
},
{
"name": "return_token_ids",
"type": "boolean",
"required": false,
"description": "Whether to return token IDs for all LLM generations via SGLang native endpoint. Required for multi-turn RL training with loss masking. Only works with SGLang provider.",
"enum": null,
"format": null
},
{
"name": "tool_call_parser",
"type": "object",
"required": false,
"description": "SGLang tool call parser name (e.g. 'glm47', 'qwen25', 'hermes'). Used by the SGLang native adapter to parse tool calls from raw model output.",
"enum": null,
"format": null
}
],
"fieldCount": 27,
"requiredCount": 3
}