Agenta Testsets API

Create, query, version, and import or export the testsets (evaluation datasets) used to drive evaluations, including CSV/JSON file upload and download.

OpenAPI Specification

agenta-openapi.yml Raw ↑
openapi: 3.0.1
info:
  title: Agenta API
  description: >-
    Agenta is an open-source LLMOps platform for prompt management, LLM
    evaluation, and LLM observability. This specification documents the public
    cloud REST API surface used to manage applications and variants, fetch and
    deploy versioned prompt configurations, run evaluations and configure
    evaluators, manage testsets, and ingest and query observability traces. All
    endpoints are authenticated with an Agenta API key passed in the
    Authorization header. Agenta is MIT licensed and may also be self-hosted.
  termsOfService: https://agenta.ai/terms
  contact:
    name: Agenta Support
    url: https://agenta.ai/
    email: team@agenta.ai
  license:
    name: MIT
    url: https://github.com/Agenta-AI/agenta/blob/main/LICENSE
  version: '1.0'
servers:
  - url: https://cloud.agenta.ai/api
    description: Agenta Cloud (US)
  - url: https://eu.cloud.agenta.ai/api
    description: Agenta Cloud (EU)
security:
  - ApiKeyAuth: []
tags:
  - name: Applications
    description: Create and manage LLM applications and their variants.
  - name: Configs
    description: Fetch and deploy versioned prompt configurations.
  - name: Evaluations
    description: Run evaluations of variants against testsets.
  - name: Evaluators
    description: Configure evaluators used to score variants.
  - name: Testsets
    description: Manage evaluation datasets (testsets).
  - name: Traces
    description: Query observability traces and spans.
  - name: OpenTelemetry
    description: Ingest LLM telemetry over OTLP/HTTP.
paths:
  /simple/applications/:
    post:
      operationId: createSimpleApplication
      tags:
        - Applications
      summary: Create a new application.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/SimpleApplicationCreateRequest'
      responses:
        '200':
          description: The created application.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SimpleApplicationResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /simple/applications/query:
    post:
      operationId: querySimpleApplications
      tags:
        - Applications
      summary: List and filter applications.
      requestBody:
        required: false
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ApplicationQueryRequest'
      responses:
        '200':
          description: A list of applications.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/SimpleApplicationResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /simple/applications/{application_id}:
    get:
      operationId: fetchSimpleApplication
      tags:
        - Applications
      summary: Fetch a single application by id.
      parameters:
        - $ref: '#/components/parameters/ApplicationId'
      responses:
        '200':
          description: The requested application.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SimpleApplicationResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
    put:
      operationId: editSimpleApplication
      tags:
        - Applications
      summary: Edit an application by id.
      parameters:
        - $ref: '#/components/parameters/ApplicationId'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/SimpleApplicationEditRequest'
      responses:
        '200':
          description: The updated application.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SimpleApplicationResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /applications/variants/query:
    post:
      operationId: queryApplicationVariants
      tags:
        - Applications
      summary: List and filter application variants.
      requestBody:
        required: false
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/VariantQueryRequest'
      responses:
        '200':
          description: A list of application variants.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/ApplicationVariant'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /applications/variants/{application_variant_id}:
    get:
      operationId: fetchApplicationVariant
      tags:
        - Applications
      summary: Fetch a single application variant by id.
      parameters:
        - name: application_variant_id
          in: path
          required: true
          schema:
            type: string
            format: uuid
      responses:
        '200':
          description: The requested variant.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ApplicationVariant'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
  /applications/revisions/commit:
    post:
      operationId: commitApplicationRevision
      tags:
        - Applications
      summary: Commit a new revision of an application variant.
      description: >-
        Commits the supplied prompt and parameter configuration as a new,
        immutable revision of a variant. This is how new prompt versions are
        published in Agenta's prompt management workflow.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CommitRevisionRequest'
      responses:
        '200':
          description: The committed revision.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Revision'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /applications/revisions/deploy:
    post:
      operationId: deployApplicationRevision
      tags:
        - Configs
      summary: Deploy an application revision to an environment.
      description: >-
        Deploys a committed revision to an environment (for example
        development, staging, or production) so that fetch-config calls scoped
        to that environment return the deployed configuration.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/DeployRevisionRequest'
      responses:
        '200':
          description: The deployment result.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Revision'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /variants/configs/fetch:
    post:
      operationId: fetchConfig
      tags:
        - Configs
      summary: Fetch a prompt configuration.
      description: >-
        Fetches the configuration (prompt template, model, and parameters) for
        a variant or environment reference. Production code uses this endpoint
        (or the configuration management SDK that wraps it) to pull the latest
        committed prompt without redeploying. Identify the configuration by
        application plus environment slug, or by variant reference.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ConfigFetchRequest'
      responses:
        '200':
          description: The resolved configuration.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ConfigResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
  /testsets/:
    post:
      operationId: createTestset
      tags:
        - Testsets
      summary: Create a testset.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/TestsetCreateRequest'
      responses:
        '200':
          description: The created testset.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Testset'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /testsets/query:
    post:
      operationId: queryTestsets
      tags:
        - Testsets
      summary: List and filter testsets.
      requestBody:
        required: false
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/TestsetQueryRequest'
      responses:
        '200':
          description: A list of testsets.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/Testset'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /testsets/{testset_id}:
    get:
      operationId: fetchTestset
      tags:
        - Testsets
      summary: Fetch a single testset by id.
      parameters:
        - name: testset_id
          in: path
          required: true
          schema:
            type: string
            format: uuid
      responses:
        '200':
          description: The requested testset.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Testset'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
  /simple/testsets/upload:
    post:
      operationId: createTestsetFromFile
      tags:
        - Testsets
      summary: Create a testset from an uploaded CSV or JSON file.
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              properties:
                file:
                  type: string
                  format: binary
                name:
                  type: string
      responses:
        '200':
          description: The created testset.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Testset'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /evaluators/query:
    post:
      operationId: queryEvaluators
      tags:
        - Evaluators
      summary: List and filter evaluators.
      requestBody:
        required: false
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EvaluatorQueryRequest'
      responses:
        '200':
          description: A list of evaluators.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/Evaluator'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /evaluators/:
    post:
      operationId: createEvaluator
      tags:
        - Evaluators
      summary: Create an evaluator.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EvaluatorCreateRequest'
      responses:
        '200':
          description: The created evaluator.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Evaluator'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /evaluators/catalog/templates/:
    get:
      operationId: listEvaluatorCatalogTemplates
      tags:
        - Evaluators
      summary: List the built-in evaluator catalog templates.
      responses:
        '200':
          description: A list of evaluator templates.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/EvaluatorTemplate'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /evaluations/runs/query:
    post:
      operationId: queryEvaluationRuns
      tags:
        - Evaluations
      summary: List and filter evaluation runs.
      requestBody:
        required: false
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EvaluationRunQueryRequest'
      responses:
        '200':
          description: A list of evaluation runs.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/EvaluationRun'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /evaluations/runs/:
    post:
      operationId: createEvaluationRun
      tags:
        - Evaluations
      summary: Create an evaluation run.
      description: >-
        Starts an evaluation run that executes one or more variants against a
        testset and scores the outputs with the configured evaluators.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EvaluationRunCreateRequest'
      responses:
        '200':
          description: The created evaluation run.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EvaluationRun'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /evaluations/results/query:
    post:
      operationId: queryEvaluationResults
      tags:
        - Evaluations
      summary: Query evaluation results.
      requestBody:
        required: false
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EvaluationResultQueryRequest'
      responses:
        '200':
          description: A list of evaluation results.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/EvaluationResult'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /evaluations/metrics/query:
    post:
      operationId: queryEvaluationMetrics
      tags:
        - Evaluations
      summary: Query aggregated evaluation metrics.
      requestBody:
        required: false
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EvaluationMetricsQueryRequest'
      responses:
        '200':
          description: Aggregated metrics for the matching runs.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/EvaluationMetrics'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /traces/:
    get:
      operationId: fetchTraces
      tags:
        - Traces
      summary: Fetch traces.
      parameters:
        - name: focus
          in: query
          required: false
          schema:
            type: string
            enum: [trace, span]
        - name: limit
          in: query
          required: false
          schema:
            type: integer
            default: 100
      responses:
        '200':
          description: A list of traces.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/Trace'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /spans/query:
    post:
      operationId: querySpans
      tags:
        - Traces
      summary: Query spans with filters.
      requestBody:
        required: false
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/SpanQueryRequest'
      responses:
        '200':
          description: A list of spans.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/Span'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /spans/analytics/query:
    post:
      operationId: querySpanAnalytics
      tags:
        - Traces
      summary: Query span analytics (cost, latency, token usage over time).
      requestBody:
        required: false
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/AnalyticsQueryRequest'
      responses:
        '200':
          description: Aggregated analytics buckets.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AnalyticsResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /otlp/v1/traces:
    post:
      operationId: ingestOtlpTraces
      tags:
        - OpenTelemetry
      summary: Ingest traces via OpenTelemetry OTLP/HTTP.
      description: >-
        OpenTelemetry OTLP/HTTP trace ingestion endpoint. Send OTLP-formatted
        spans (typically protobuf or JSON over HTTP) here to record LLM
        telemetry. This is the recommended way to push observability data from
        instrumented applications and OpenTelemetry exporters.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/OtlpExportTraceServiceRequest'
          application/x-protobuf:
            schema:
              type: string
              format: binary
      responses:
        '200':
          description: Export accepted.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/OtlpExportTraceServiceResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
    get:
      operationId: otlpStatusCheck
      tags:
        - OpenTelemetry
      summary: Status check for the OTLP traces endpoint.
      responses:
        '200':
          description: The endpoint is available.
        '401':
          $ref: '#/components/responses/Unauthorized'
components:
  securitySchemes:
    ApiKeyAuth:
      type: http
      scheme: bearer
      description: >-
        Agenta API key sent in the Authorization header. Generate keys from the
        Agenta web app under Settings > API keys. The value is passed as
        `Authorization: ApiKey <key>` (Bearer-style header credential).
  parameters:
    ApplicationId:
      name: application_id
      in: path
      required: true
      schema:
        type: string
        format: uuid
  responses:
    Unauthorized:
      description: Missing or invalid API key.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
    NotFound:
      description: The requested resource was not found.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
  schemas:
    Error:
      type: object
      properties:
        detail:
          oneOf:
            - type: string
            - type: object
    Lifecycle:
      type: object
      description: Audit metadata attached to most Agenta resources.
      properties:
        created_at:
          type: string
          format: date-time
        updated_at:
          type: string
          format: date-time
        created_by_id:
          type: string
          format: uuid
        updated_by_id:
          type: string
          format: uuid
    SimpleApplicationCreateRequest:
      type: object
      required:
        - app
      properties:
        app:
          type: object
          required:
            - slug
          properties:
            slug:
              type: string
              description: URL-friendly unique slug for the application.
            name:
              type: string
            description:
              type: string
            flags:
              type: object
              additionalProperties: true
    SimpleApplicationEditRequest:
      type: object
      properties:
        app:
          type: object
          properties:
            name:
              type: string
            description:
              type: string
    ApplicationQueryRequest:
      type: object
      properties:
        application:
          type: object
          properties:
            flags:
              type: object
              additionalProperties: true
        include_archived:
          type: boolean
          default: false
        windowing:
          $ref: '#/components/schemas/Windowing'
    SimpleApplicationResponse:
      type: object
      properties:
        count:
          type: integer
        application:
          $ref: '#/components/schemas/Application'
    Application:
      type: object
      properties:
        id:
          type: string
          format: uuid
        slug:
          type: string
        name:
          type: string
        description:
          type: string
        flags:
          type: object
          additionalProperties: true
        lifecycle:
          $ref: '#/components/schemas/Lifecycle'
    VariantQueryRequest:
      type: object
      properties:
        variant:
          type: object
          properties:
            application_id:
              type: string
              format: uuid
        include_archived:
          type: boolean
          default: false
        windowing:
          $ref: '#/components/schemas/Windowing'
    ApplicationVariant:
      type: object
      properties:
        id:
          type: string
          format: uuid
        slug:
          type: string
        name:
          type: string
        application_id:
          type: string
          format: uuid
        lifecycle:
          $ref: '#/components/schemas/Lifecycle'
    CommitRevisionRequest:
      type: object
      required:
        - revision
      properties:
        revision:
          type: object
          properties:
            slug:
              type: string
            variant_id:
              type: string
              format: uuid
            message:
              type: string
              description: Commit message describing the change.
            data:
              $ref: '#/components/schemas/PromptConfig'
    DeployRevisionRequest:
      type: object
      required:
        - revision
      properties:
        revision:
          type: object
          properties:
            variant_id:
              type: string
              format: uuid
            environment_ref:
              type: object
              properties:
                slug:
                  type: string
                  example: production
    Revision:
      type: object
      properties:
        id:
          type: string
          format: uuid
        slug:
          type: string
        version:
          type: string
        variant_id:
          type: string
          format: uuid
        message:
          type: string
        data:
          $ref: '#/components/schemas/PromptConfig'
        lifecycle:
          $ref: '#/components/schemas/Lifecycle'
    ConfigFetchRequest:
      type: object
      description: >-
        Reference identifying which configuration to resolve. Provide either an
        application + environment reference or a direct variant reference.
      properties:
        application_ref:
          $ref: '#/components/schemas/Reference'
        variant_ref:
          $ref: '#/components/schemas/Reference'
        environment_ref:
          $ref: '#/components/schemas/Reference'
    Reference:
      type: object
      properties:
        id:
          type: string
          format: uuid
        slug:
          type: string
        version:
          type: string
    ConfigResponse:
      type: object
      properties:
        params:
          $ref: '#/components/schemas/PromptConfig'
        url:
          type: string
        application_ref:
          $ref: '#/components/schemas/Reference'
        variant_ref:
          $ref: '#/components/schemas/Reference'
        environment_ref:
          $ref: '#/components/schemas/Reference'
    PromptConfig:
      type: object
      description: A prompt template plus model and inference parameters.
      properties:
        prompt:
          type: object
          properties:
            messages:
              type: array
              items:
                type: object
                properties:
                  role:
                    type: string
                    enum: [system, user, assistant]
                  content:
                    type: string
            llm_config:
              type: object
              properties:
                model:
                  type: string
                  example: gpt-4o
                temperature:
                  type: number
                  format: float
                max_tokens:
                  type: integer
                top_p:
                  type: number
                  format: float
            template_format:
              type: string
              enum: [fstring, jinja2, curly]
              default: fstring
            input_keys:
              type: array
              items:
                type: string
    TestsetCreateRequest:
      type: object
      required:
        - testset
      properties:
        testset:
          type: object
          required:
            - slug
          properties:
            slug:
              type: string
            name:
              type: string
            data:
              type: object
              properties:
                testcases:
                  type: array
                  items:
                    type: object
                    additionalProperties: true
    TestsetQueryRequest:
      type: object
      properties:
        include_archived:
          type: boolean
          default: false
        windowing:
          $ref: '#/components/schemas/Windowing'
    Testset:
      type: object
      properties:
        id:
          type: string
          format: uuid
        slug:
          type: string
        name:
          type: string
        data:
          type: object
          properties:
            testcases:
              type: array
              items:
                type: object
                additionalProperties: true
        lifecycle:
          $ref: '#/components/schemas/Lifecycle'
    EvaluatorQueryRequest:
      type: object
      properties:
        include_archived:
          type: boolean
          default: false
        windowing:
          $ref: '#/components/schemas/Windowing'
    EvaluatorCreateRequest:
      type: object
      required:
        - evaluator
      properties:
        evaluator:
          type: object
          required:
            - slug
          properties:
            slug:
              type: string
            name:
              type: string
            data:
              type: object
              additionalProperties: true
    Evaluator:
      type: object
      properties:
        id:
          type: string
          format: uuid
        slug:
          type: string
        name:
          type: string
        data:
          type: object
          additionalProperties: true
        lifecycle:
          $ref: '#/components/schemas/Lifecycle'
    EvaluatorTemplate:
      type: object
      properties:
        key:
          type: string
          example: auto_exact_match
        name:
          type: string
        description:
          type: string
        settings_schema:
          type: object
          additionalProperties: true
    EvaluationRunQueryRequest:
      type: object
      properties:
        run:
          type: object
          properties:
            application_id:
              type: string
              format: uuid
        include_archived:
          type: boolean
          default: false
        windowing:
          $ref: '#/components/schemas/Windowing'
    EvaluationRunCreateRequest:
      type: object
      required:
        - run
      properties:
        run:
          type: object
          properties:
            name:
              type: string
            testset_id:
              type: string
              format: uuid
            variant_ids:
              type: array
              items:
                type: string
                format: uuid
            evaluator_ids:
              type: array
              items:
                type: string
                format: uuid
    EvaluationRun:
      type: object
      properties:
        id:
          type: string
          format: uuid
        name:
          type: string
        status:
          type: string
          enum: [pending, running, finished, failed, cancelled]
        testset_id:
          type: string
          format: uuid
        lifecycle:
          $ref: '#/components/schemas/Lifecycle'
    EvaluationResultQueryRequest:
      type: object
      properties:
        result:
          type: object
          properties:
            run_id:
              type: string
              format: uuid
        windowing:
          $ref: '#/components/schemas/Windowing'
    EvaluationResult:
      type: object
      properties:
        id:
          type: string
          format: uuid
        run_id:
          type: string
          format: uuid
        scenario_id:
          type: string
          format: uuid
        status:
          type: string
        outputs:
          type: object
          additionalProperties: true
    EvaluationMetricsQueryRequest:
      type: object
      properties:
        metrics:
          type: object
          properties:
            run_id:
              type: string
              format: uuid
    EvaluationMetrics:
      type: object
      properties:
        id:
          type: string
          format: uuid
        run_id:
          type: string
          format: uuid
        evaluator_slug:
          type: string
        value:
          type: number
        data:
          type: object
          additionalProperties: true
    SpanQueryRequest:
      type: object
      properties:
        filtering:
          type: object
          additionalProperties: true
        windowing:
          $ref: '#/components/schemas/Windowing'
    AnalyticsQueryRequest:
      type: object
      properties:
        filtering:
          type: object
          additionalProperties: true
        time_range:
          type: object
          properties:
            oldest:
              type: string
              format: date-time
            newest:
              type: string
              format: date-time
        interval:
          type: string
          example: 1h
    AnalyticsResponse:
      type: object
      properties:
        buckets:
          type: array
          items:
            type: object
            properties:
              timestamp:
                type: string
                format: date-time
              total_count:
                type: integer
              total_tokens:
                type: integer
              total_cost:
                type: number
              duration_ms:
                type: number
    Trace:
      type: object
      properties:
        trace_id:
          type: string
        spans:
          type: array
          items:
            $ref: '#/components/schemas/Span'
    Span:
      type: object
      properties:
        trace_id:
          type: string
        span_id:
          type: string
        span_name:
          type: string
        span_kind:
          type: string
        start_time:
          type: string
          format: date-time
        end_time:
          type: string
          format: date-time
        status_code:
          type: string
        attributes:
          type: object
          additionalProperties: true
    Windowing:
      type: object
      description: Cursor / time-window pagination controls.
      properties:
        next:
          type: str

# --- truncated at 32 KB (32 KB total) ---
# Full source: https://raw.githubusercontent.com/api-evangelist/agenta/refs/heads/main/openapi/agenta-openapi.yml