Letta · JSON Structure

Letta Llm Config Structure

Flat structure summary of the Letta LLMConfig entity, derived from the Letta OpenAPI 1.0.0 spec.

Type: Properties: 0
AIAgentsStateful AgentsMemoryMemGPTContinual LearningMCPMulti-AgentRAGOpen Source

letta-llm-config is a JSON Structure definition published by Letta.

Meta-schema:

JSON Structure

Raw ↑
{
  "jsonStructure": "0.1",
  "name": "letta-llm-config",
  "title": "Letta LLMConfig",
  "description": "Flat structure summary of the Letta LLMConfig entity, derived from the Letta OpenAPI 1.0.0 spec.",
  "source": "https://raw.githubusercontent.com/letta-ai/letta/main/fern/openapi.json",
  "entity": "LLMConfig",
  "fields": [
    {
      "name": "model",
      "type": "string",
      "required": true,
      "description": "LLM model name. ",
      "enum": null,
      "format": null
    },
    {
      "name": "display_name",
      "type": "object",
      "required": false,
      "description": "A human-friendly display name for the model.",
      "enum": null,
      "format": null
    },
    {
      "name": "model_endpoint_type",
      "type": "string",
      "required": true,
      "description": "The endpoint type for the model.",
      "enum": [
        "openai",
        "anthropic",
        "google_ai",
        "google_vertex",
        "azure",
        "groq",
        "ollama",
        "webui",
        "webui-legacy",
        "lmstudio",
        "lmstudio-legacy",
        "lmstudio-chatcompletions",
        "llamacpp",
        "koboldcpp",
        "vllm",
        "hugging-face",
        "minimax",
        "mistral",
        "together",
        "bedrock",
        "deepseek",
        "xai",
        "zai",
        "zai_coding",
        "baseten",
        "fireworks",
        "openrouter",
        "chatgpt_oauth"
      ],
      "format": null
    },
    {
      "name": "model_endpoint",
      "type": "object",
      "required": false,
      "description": "The endpoint for the model.",
      "enum": null,
      "format": null
    },
    {
      "name": "provider_name",
      "type": "object",
      "required": false,
      "description": "The provider name for the model.",
      "enum": null,
      "format": null
    },
    {
      "name": "provider_category",
      "type": "object",
      "required": false,
      "description": "The provider category for the model.",
      "enum": null,
      "format": null
    },
    {
      "name": "model_wrapper",
      "type": "object",
      "required": false,
      "description": "The wrapper for the model.",
      "enum": null,
      "format": null
    },
    {
      "name": "context_window",
      "type": "integer",
      "required": true,
      "description": "The context window size for the model.",
      "enum": null,
      "format": null
    },
    {
      "name": "put_inner_thoughts_in_kwargs",
      "type": "object",
      "required": false,
      "description": "Puts 'inner_thoughts' as a kwarg in the function call if this is set to True. This helps with function calling performance and also the generation of inner thoughts.",
      "enum": null,
      "format": null
    },
    {
      "name": "handle",
      "type": "object",
      "required": false,
      "description": "The handle for this config, in the format provider/model-name.",
      "enum": null,
      "format": null
    },
    {
      "name": "temperature",
      "type": "number",
      "required": false,
      "description": "The temperature to use when generating text with the model. A higher temperature will result in more random text.",
      "enum": null,
      "format": null
    },
    {
      "name": "max_tokens",
      "type": "object",
      "required": false,
      "description": "The maximum number of tokens to generate. If not set, the model will use its default value.",
      "enum": null,
      "format": null
    },
    {
      "name": "enable_reasoner",
      "type": "boolean",
      "required": false,
      "description": "Whether or not the model should use extended thinking if it is a 'reasoning' style model",
      "enum": null,
      "format": null
    },
    {
      "name": "reasoning_effort",
      "type": "object",
      "required": false,
      "description": "The reasoning effort to use when generating text reasoning models",
      "enum": null,
      "format": null
    },
    {
      "name": "max_reasoning_tokens",
      "type": "integer",
      "required": false,
      "description": "Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.",
      "enum": null,
      "format": null
    },
    {
      "name": "effort",
      "type": "object",
      "required": false,
      "description": "The effort level for Anthropic models that support it (Opus 4.5, Opus 4.6). Controls token spending and thinking behavior. Not setting this gives similar performance to 'high'.",
      "enum": null,
      "format": null
    },
    {
      "name": "frequency_penalty",
      "type": "object",
      "required": false,
      "description": "Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. From OpenAI: Number between -2.0 and 2.0.",
      "enum": null,
      "format": null
    },
    {
      "name": "compatibility_type",
      "type": "object",
      "required": false,
      "description": "The framework compatibility type for the model.",
      "enum": null,
      "format": null
    },
    {
      "name": "verbosity",
      "type": "object",
      "required": false,
      "description": "Soft control for how verbose model output should be, used for GPT-5 models.",
      "enum": null,
      "format": null
    },
    {
      "name": "tier",
      "type": "object",
      "required": false,
      "description": "The cost tier for the model (cloud only).",
      "enum": null,
      "format": null
    },
    {
      "name": "parallel_tool_calls",
      "type": "object",
      "required": false,
      "description": "Deprecated: Use model_settings to configure parallel tool calls instead. If set to True, enables parallel tool calling. Defaults to False.",
      "enum": null,
      "format": null
    },
    {
      "name": "response_format",
      "type": "object",
      "required": false,
      "description": "The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings.",
      "enum": null,
      "format": null
    },
    {
      "name": "strict",
      "type": "boolean",
      "required": false,
      "description": "Enable strict mode for tool calling. When true, tool schemas include strict: true and additionalProperties: false, guaranteeing tool outputs match JSON schemas.",
      "enum": null,
      "format": null
    },
    {
      "name": "return_logprobs",
      "type": "boolean",
      "required": false,
      "description": "Whether to return log probabilities of the output tokens. Useful for RL training.",
      "enum": null,
      "format": null
    },
    {
      "name": "top_logprobs",
      "type": "object",
      "required": false,
      "description": "Number of most likely tokens to return at each position (0-20). Requires return_logprobs=True.",
      "enum": null,
      "format": null
    },
    {
      "name": "return_token_ids",
      "type": "boolean",
      "required": false,
      "description": "Whether to return token IDs for all LLM generations via SGLang native endpoint. Required for multi-turn RL training with loss masking. Only works with SGLang provider.",
      "enum": null,
      "format": null
    },
    {
      "name": "tool_call_parser",
      "type": "object",
      "required": false,
      "description": "SGLang tool call parser name (e.g. 'glm47', 'qwen25', 'hermes'). Used by the SGLang native adapter to parse tool calls from raw model output.",
      "enum": null,
      "format": null
    }
  ],
  "fieldCount": 27,
  "requiredCount": 3
}