OpenAI
OpenAI Evals API

Manage and run evals in the OpenAI platform.
Documentation GitHub OpenAPI
OpenAPI Specification

openapi: 3.0.0
info:
  title: OpenAI Assistants Evals API
  description: The Assistants API allows you to build AI assistants within your own applications. An Assistant has instructions and can leverage models, tools, and knowledge to respond to user queries. The Assistants API currently supports three types of tools - Code Interpreter, Retrieval, and Function calling. In the future, we plan to release more OpenAI-built tools, and allow you to provide your own tools on our platform.
  version: 2.0.0
  termsOfService: https://openai.com/policies/terms-of-use
  contact:
    name: OpenAI Support
    url: https://help.openai.com/
  license:
    name: MIT
    url: https://github.com/openai/openai-openapi/blob/master/LICENSE
servers:
- url: https://api.openai.com/v1
security:
- ApiKeyAuth: []
tags:
- name: Evals
  description: Manage and run evals in the OpenAI platform.
paths:
  /evals:
    get:
      operationId: listEvals
      tags:
      - Evals
      summary: 'List evaluations for a project.

        '
      parameters:
      - name: after
        in: query
        description: Identifier for the last eval from the previous pagination request.
        required: false
        schema:
          type: string
      - name: limit
        in: query
        description: Number of evals to retrieve.
        required: false
        schema:
          type: integer
          default: 20
      - name: order
        in: query
        description: Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for descending order.
        required: false
        schema:
          type: string
          enum:
          - asc
          - desc
          default: asc
      - name: order_by
        in: query
        description: 'Evals can be ordered by creation time or last updated time. Use

          `created_at` for creation time or `updated_at` for last updated time.

          '
        required: false
        schema:
          type: string
          enum:
          - created_at
          - updated_at
          default: created_at
      responses:
        '200':
          description: A list of evals
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EvalList'
      x-oaiMeta:
        name: List evals
        group: evals
        path: list
        examples:
          request:
            curl: "curl https://api.openai.com/v1/evals?limit=1 \\\n  -H \"Authorization: Bearer $OPENAI_API_KEY\" \\\n  -H \"Content-Type: application/json\"\n"
            python: "import os\nfrom openai import OpenAI\n\nclient = OpenAI(\n    api_key=os.environ.get(\"OPENAI_API_KEY\"),  # This is the default and can be omitted\n)\npage = client.evals.list()\npage = page.data[0]\nprint(page.id)"
            javascript: 'import OpenAI from "openai";


              const openai = new OpenAI();


              const evals = await openai.evals.list({ limit: 1 });

              console.log(evals);

              '
            node.js: "import OpenAI from 'openai';\n\nconst client = new OpenAI({\n  apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted\n});\n\n// Automatically fetches more pages as needed.\nfor await (const evalListResponse of client.evals.list()) {\n  console.log(evalListResponse.id);\n}"
            java: "package com.openai.example;\n\nimport com.openai.client.OpenAIClient;\nimport com.openai.client.okhttp.OpenAIOkHttpClient;\nimport com.openai.models.evals.EvalListPage;\nimport com.openai.models.evals.EvalListParams;\n\npublic final class Main {\n    private Main() {}\n\n    public static void main(String[] args) {\n        OpenAIClient client = OpenAIOkHttpClient.fromEnv();\n\n        EvalListPage page = client.evals().list();\n    }\n}"
            ruby: 'require "openai"


              openai = OpenAI::Client.new(api_key: "My API Key")


              page = openai.evals.list


              puts(page)'
          response: "{\n  \"object\": \"list\",\n  \"data\": [\n    {\n      \"id\": \"eval_67abd54d9b0081909a86353f6fb9317a\",\n      \"object\": \"eval\",\n      \"data_source_config\": {\n        \"type\": \"stored_completions\",\n        \"metadata\": {\n          \"usecase\": \"push_notifications_summarizer\"\n        },\n        \"schema\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"item\": {\n              \"type\": \"object\"\n            },\n            \"sample\": {\n              \"type\": \"object\"\n            }\n          },\n          \"required\": [\n            \"item\",\n            \"sample\"\n          ]\n        }\n      },\n      \"testing_criteria\": [\n        {\n          \"name\": \"Push Notification Summary Grader\",\n          \"id\": \"Push Notification Summary Grader-9b876f24-4762-4be9-aff4-db7a9b31c673\",\n          \"type\": \"label_model\",\n          \"model\": \"o3-mini\",\n          \"input\": [\n            {\n              \"type\": \"message\",\n              \"role\": \"developer\",\n              \"content\": {\n                \"type\": \"input_text\",\n                \"text\": \"\\nLabel the following push notification summary as either correct or incorrect.\\nThe push notification and the summary will be provided below.\\nA good push notificiation summary is concise and snappy.\\nIf it is good, then label it as correct, if not, then incorrect.\\n\"\n              }\n            },\n            {\n              \"type\": \"message\",\n              \"role\": \"user\",\n              \"content\": {\n                \"type\": \"input_text\",\n                \"text\": \"\\nPush notifications: {{item.input}}\\nSummary: {{sample.output_text}}\\n\"\n              }\n            }\n          ],\n          \"passing_labels\": [\n            \"correct\"\n          ],\n          \"labels\": [\n            \"correct\",\n            \"incorrect\"\n          ],\n          \"sampling_params\": null\n        }\n      ],\n      \"name\": \"Push Notification Summary Grader\",\n      \"created_at\": 1739314509,\n      \"metadata\": {\n        \"description\": \"A stored completions eval for push notification summaries\"\n      }\n    }\n  ],\n  \"first_id\": \"eval_67abd54d9b0081909a86353f6fb9317a\",\n  \"last_id\": \"eval_67aa884cf6688190b58f657d4441c8b7\",\n  \"has_more\": true\n}\n"
    post:
      operationId: createEval
      tags:
      - Evals
      summary: 'Create the structure of an evaluation that can be used to test a model''s performance.

        An evaluation is a set of testing criteria and the config for a data source, which dictates the schema of the data used in the evaluation. After creating an evaluation, you can run it on different models and model parameters. We support several types of graders and datasources.

        For more information, see the [Evals guide](/docs/guides/evals).

        '
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateEvalRequest'
      responses:
        '201':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Eval'
      x-oaiMeta:
        name: Create eval
        group: evals
        path: post
        examples:
          request:
            curl: "curl https://api.openai.com/v1/evals \\\n  -H \"Authorization: Bearer $OPENAI_API_KEY\" \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n        \"name\": \"Sentiment\",\n        \"data_source_config\": {\n          \"type\": \"stored_completions\",\n          \"metadata\": {\n              \"usecase\": \"chatbot\"\n          }\n        },\n        \"testing_criteria\": [\n          {\n            \"type\": \"label_model\",\n            \"model\": \"o3-mini\",\n            \"input\": [\n              {\n                \"role\": \"developer\",\n                \"content\": \"Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'\"\n              },\n              {\n                \"role\": \"user\",\n                \"content\": \"Statement: {{item.input}}\"\n              }\n            ],\n            \"passing_labels\": [\n              \"positive\"\n            ],\n            \"labels\": [\n              \"positive\",\n              \"neutral\",\n              \"negative\"\n            ],\n            \"name\": \"Example label grader\"\n          }\n        ]\n      }'\n"
            python: "import os\nfrom openai import OpenAI\n\nclient = OpenAI(\n    api_key=os.environ.get(\"OPENAI_API_KEY\"),  # This is the default and can be omitted\n)\neval = client.evals.create(\n    data_source_config={\n        \"item_schema\": {\n            \"foo\": \"bar\"\n        },\n        \"type\": \"custom\",\n    },\n    testing_criteria=[{\n        \"input\": [{\n            \"content\": \"content\",\n            \"role\": \"role\",\n        }],\n        \"labels\": [\"string\"],\n        \"model\": \"model\",\n        \"name\": \"name\",\n        \"passing_labels\": [\"string\"],\n        \"type\": \"label_model\",\n    }],\n)\nprint(eval.id)"
            javascript: "import OpenAI from \"openai\";\n\nconst openai = new OpenAI();\n\nconst evalObj = await openai.evals.create({\n  name: \"Sentiment\",\n  data_source_config: {\n    type: \"stored_completions\",\n    metadata: { usecase: \"chatbot\" }\n  },\n  testing_criteria: [\n    {\n      type: \"label_model\",\n      model: \"o3-mini\",\n      input: [\n        { role: \"developer\", content: \"Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'\" },\n        { role: \"user\", content: \"Statement: {{item.input}}\" }\n      ],\n      passing_labels: [\"positive\"],\n      labels: [\"positive\", \"neutral\", \"negative\"],\n      name: \"Example label grader\"\n    }\n  ]\n});\nconsole.log(evalObj);\n"
            node.js: "import OpenAI from 'openai';\n\nconst client = new OpenAI({\n  apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted\n});\n\nconst _eval = await client.evals.create({\n  data_source_config: {\n    item_schema: { foo: 'bar' },\n    type: 'custom',\n  },\n  testing_criteria: [\n    {\n      input: [{ content: 'content', role: 'role' }],\n      labels: ['string'],\n      model: 'model',\n      name: 'name',\n      passing_labels: ['string'],\n      type: 'label_model',\n    },\n  ],\n});\n\nconsole.log(_eval.id);"
            java: "package com.openai.example;\n\nimport com.openai.client.OpenAIClient;\nimport com.openai.client.okhttp.OpenAIOkHttpClient;\nimport com.openai.core.JsonValue;\nimport com.openai.models.evals.EvalCreateParams;\nimport com.openai.models.evals.EvalCreateResponse;\n\npublic final class Main {\n    private Main() {}\n\n    public static void main(String[] args) {\n        OpenAIClient client = OpenAIOkHttpClient.fromEnv();\n\n        EvalCreateParams params = EvalCreateParams.builder()\n            .customDataSourceConfig(EvalCreateParams.DataSourceConfig.Custom.ItemSchema.builder()\n                .putAdditionalProperty(\"foo\", JsonValue.from(\"bar\"))\n                .build())\n            .addTestingCriterion(EvalCreateParams.TestingCriterion.LabelModel.builder()\n                .addInput(EvalCreateParams.TestingCriterion.LabelModel.Input.SimpleInputMessage.builder()\n                    .content(\"content\")\n                    .role(\"role\")\n                    .build())\n                .addLabel(\"string\")\n                .model(\"model\")\n                .name(\"name\")\n                .addPassingLabel(\"string\")\n                .build())\n            .build();\n        EvalCreateResponse eval = client.evals().create(params);\n    }\n}"
            ruby: "require \"openai\"\n\nopenai = OpenAI::Client.new(api_key: \"My API Key\")\n\neval_ = openai.evals.create(\n  data_source_config: {item_schema: {foo: \"bar\"}, type: :custom},\n  testing_criteria: [\n    {\n      input: [{content: \"content\", role: \"role\"}],\n      labels: [\"string\"],\n      model: \"model\",\n      name: \"name\",\n      passing_labels: [\"string\"],\n      type: :label_model\n    }\n  ]\n)\n\nputs(eval_)"
          response: "{\n  \"object\": \"eval\",\n  \"id\": \"eval_67b7fa9a81a88190ab4aa417e397ea21\",\n  \"data_source_config\": {\n    \"type\": \"stored_completions\",\n    \"metadata\": {\n      \"usecase\": \"chatbot\"\n    },\n    \"schema\": {\n      \"type\": \"object\",\n      \"properties\": {\n        \"item\": {\n          \"type\": \"object\"\n        },\n        \"sample\": {\n          \"type\": \"object\"\n        }\n      },\n      \"required\": [\n        \"item\",\n        \"sample\"\n      ]\n  },\n  \"testing_criteria\": [\n    {\n      \"name\": \"Example label grader\",\n      \"type\": \"label_model\",\n      \"model\": \"o3-mini\",\n      \"input\": [\n        {\n          \"type\": \"message\",\n          \"role\": \"developer\",\n          \"content\": {\n            \"type\": \"input_text\",\n            \"text\": \"Classify the sentiment of the following statement as one of positive, neutral, or negative\"\n          }\n        },\n        {\n          \"type\": \"message\",\n          \"role\": \"user\",\n          \"content\": {\n            \"type\": \"input_text\",\n            \"text\": \"Statement: {{item.input}}\"\n          }\n        }\n      ],\n      \"passing_labels\": [\n        \"positive\"\n      ],\n      \"labels\": [\n        \"positive\",\n        \"neutral\",\n        \"negative\"\n      ]\n    }\n  ],\n  \"name\": \"Sentiment\",\n  \"created_at\": 1740110490,\n  \"metadata\": {\n    \"description\": \"An eval for sentiment analysis\"\n  }\n}\n"
  /evals/{eval_id}:
    get:
      operationId: getEval
      tags:
      - Evals
      summary: 'Get an evaluation by ID.

        '
      parameters:
      - name: eval_id
        in: path
        required: true
        schema:
          type: string
        description: The ID of the evaluation to retrieve.
      responses:
        '200':
          description: The evaluation
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Eval'
      x-oaiMeta:
        name: Get an eval
        group: evals
        path: get
        examples:
          request:
            curl: "curl https://api.openai.com/v1/evals/eval_67abd54d9b0081909a86353f6fb9317a \\\n  -H \"Authorization: Bearer $OPENAI_API_KEY\" \\\n  -H \"Content-Type: application/json\"\n"
            python: "import os\nfrom openai import OpenAI\n\nclient = OpenAI(\n    api_key=os.environ.get(\"OPENAI_API_KEY\"),  # This is the default and can be omitted\n)\neval = client.evals.retrieve(\n    \"eval_id\",\n)\nprint(eval.id)"
            javascript: 'import OpenAI from "openai";


              const openai = new OpenAI();


              const evalObj = await openai.evals.retrieve("eval_67abd54d9b0081909a86353f6fb9317a");

              console.log(evalObj);

              '
            node.js: "import OpenAI from 'openai';\n\nconst client = new OpenAI({\n  apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted\n});\n\nconst _eval = await client.evals.retrieve('eval_id');\n\nconsole.log(_eval.id);"
            java: "package com.openai.example;\n\nimport com.openai.client.OpenAIClient;\nimport com.openai.client.okhttp.OpenAIOkHttpClient;\nimport com.openai.models.evals.EvalRetrieveParams;\nimport com.openai.models.evals.EvalRetrieveResponse;\n\npublic final class Main {\n    private Main() {}\n\n    public static void main(String[] args) {\n        OpenAIClient client = OpenAIOkHttpClient.fromEnv();\n\n        EvalRetrieveResponse eval = client.evals().retrieve(\"eval_id\");\n    }\n}"
            ruby: 'require "openai"


              openai = OpenAI::Client.new(api_key: "My API Key")


              eval_ = openai.evals.retrieve("eval_id")


              puts(eval_)'
          response: "{\n  \"object\": \"eval\",\n  \"id\": \"eval_67abd54d9b0081909a86353f6fb9317a\",\n  \"data_source_config\": {\n    \"type\": \"custom\",\n    \"schema\": {\n      \"type\": \"object\",\n      \"properties\": {\n        \"item\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"input\": {\n              \"type\": \"string\"\n            },\n            \"ground_truth\": {\n              \"type\": \"string\"\n            }\n          },\n          \"required\": [\n            \"input\",\n            \"ground_truth\"\n          ]\n        }\n      },\n      \"required\": [\n        \"item\"\n      ]\n    }\n  },\n  \"testing_criteria\": [\n    {\n      \"name\": \"String check\",\n      \"id\": \"String check-2eaf2d8d-d649-4335-8148-9535a7ca73c2\",\n      \"type\": \"string_check\",\n      \"input\": \"{{item.input}}\",\n      \"reference\": \"{{item.ground_truth}}\",\n      \"operation\": \"eq\"\n    }\n  ],\n  \"name\": \"External Data Eval\",\n  \"created_at\": 1739314509,\n  \"metadata\": {},\n}\n"
    post:
      operationId: updateEval
      tags:
      - Evals
      summary: 'Update certain properties of an evaluation.

        '
      parameters:
      - name: eval_id
        in: path
        required: true
        schema:
          type: string
        description: The ID of the evaluation to update.
      requestBody:
        description: Request to update an evaluation
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                name:
                  type: string
                  description: Rename the evaluation.
                metadata:
                  $ref: '#/components/schemas/Metadata'
      responses:
        '200':
          description: The updated evaluation
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Eval'
      x-oaiMeta:
        name: Update an eval
        group: evals
        path: update
        examples:
          request:
            curl: "curl https://api.openai.com/v1/evals/eval_67abd54d9b0081909a86353f6fb9317a \\\n  -H \"Authorization: Bearer $OPENAI_API_KEY\" \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\"name\": \"Updated Eval\", \"metadata\": {\"description\": \"Updated description\"}}'\n"
            python: "import os\nfrom openai import OpenAI\n\nclient = OpenAI(\n    api_key=os.environ.get(\"OPENAI_API_KEY\"),  # This is the default and can be omitted\n)\neval = client.evals.update(\n    eval_id=\"eval_id\",\n)\nprint(eval.id)"
            javascript: "import OpenAI from \"openai\";\n\nconst openai = new OpenAI();\n\nconst updatedEval = await openai.evals.update(\n  \"eval_67abd54d9b0081909a86353f6fb9317a\",\n  {\n    name: \"Updated Eval\",\n    metadata: { description: \"Updated description\" }\n  }\n);\nconsole.log(updatedEval);\n"
            node.js: "import OpenAI from 'openai';\n\nconst client = new OpenAI({\n  apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted\n});\n\nconst _eval = await client.evals.update('eval_id');\n\nconsole.log(_eval.id);"
            java: "package com.openai.example;\n\nimport com.openai.client.OpenAIClient;\nimport com.openai.client.okhttp.OpenAIOkHttpClient;\nimport com.openai.models.evals.EvalUpdateParams;\nimport com.openai.models.evals.EvalUpdateResponse;\n\npublic final class Main {\n    private Main() {}\n\n    public static void main(String[] args) {\n        OpenAIClient client = OpenAIOkHttpClient.fromEnv();\n\n        EvalUpdateResponse eval = client.evals().update(\"eval_id\");\n    }\n}"
            ruby: 'require "openai"


              openai = OpenAI::Client.new(api_key: "My API Key")


              eval_ = openai.evals.update("eval_id")


              puts(eval_)'
          response: "{\n  \"object\": \"eval\",\n  \"id\": \"eval_67abd54d9b0081909a86353f6fb9317a\",\n  \"data_source_config\": {\n    \"type\": \"custom\",\n    \"schema\": {\n      \"type\": \"object\",\n      \"properties\": {\n        \"item\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"input\": {\n              \"type\": \"string\"\n            },\n            \"ground_truth\": {\n              \"type\": \"string\"\n            }\n          },\n          \"required\": [\n            \"input\",\n            \"ground_truth\"\n          ]\n        }\n      },\n      \"required\": [\n        \"item\"\n      ]\n    }\n  },\n  \"testing_criteria\": [\n    {\n      \"name\": \"String check\",\n      \"id\": \"String check-2eaf2d8d-d649-4335-8148-9535a7ca73c2\",\n      \"type\": \"string_check\",\n      \"input\": \"{{item.input}}\",\n      \"reference\": \"{{item.ground_truth}}\",\n      \"operation\": \"eq\"\n    }\n  ],\n  \"name\": \"Updated Eval\",\n  \"created_at\": 1739314509,\n  \"metadata\": {\"description\": \"Updated description\"},\n}\n"
    delete:
      operationId: deleteEval
      tags:
      - Evals
      summary: 'Delete an evaluation.

        '
      parameters:
      - name: eval_id
        in: path
        required: true
        schema:
          type: string
        description: The ID of the evaluation to delete.
      responses:
        '200':
          description: Successfully deleted the evaluation.
          content:
            application/json:
              schema:
                type: object
                properties:
                  object:
                    type: string
                    example: eval.deleted
                  deleted:
                    type: boolean
                    example: true
                  eval_id:
                    type: string
                    example: eval_abc123
                required:
                - object
                - deleted
                - eval_id
        '404':
          description: Evaluation not found.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
      x-oaiMeta:
        name: Delete an eval
        group: evals
        examples:
          request:
            curl: "curl https://api.openai.com/v1/evals/eval_abc123 \\\n  -X DELETE \\\n  -H \"Authorization: Bearer $OPENAI_API_KEY\"\n"
            python: "import os\nfrom openai import OpenAI\n\nclient = OpenAI(\n    api_key=os.environ.get(\"OPENAI_API_KEY\"),  # This is the default and can be omitted\n)\neval = client.evals.delete(\n    \"eval_id\",\n)\nprint(eval.eval_id)"
            javascript: 'import OpenAI from "openai";


              const openai = new OpenAI();


              const deleted = await openai.evals.delete("eval_abc123");

              console.log(deleted);

              '
            node.js: "import OpenAI from 'openai';\n\nconst client = new OpenAI({\n  apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted\n});\n\nconst _eval = await client.evals.delete('eval_id');\n\nconsole.log(_eval.eval_id);"
            java: "package com.openai.example;\n\nimport com.openai.client.OpenAIClient;\nimport com.openai.client.okhttp.OpenAIOkHttpClient;\nimport com.openai.models.evals.EvalDeleteParams;\nimport com.openai.models.evals.EvalDeleteResponse;\n\npublic final class Main {\n    private Main() {}\n\n    public static void main(String[] args) {\n        OpenAIClient client = OpenAIOkHttpClient.fromEnv();\n\n        EvalDeleteResponse eval = client.evals().delete(\"eval_id\");\n    }\n}"
            ruby: 'require "openai"


              openai = OpenAI::Client.new(api_key: "My API Key")


              eval_ = openai.evals.delete("eval_id")


              puts(eval_)'
          response: "{\n  \"object\": \"eval.deleted\",\n  \"deleted\": true,\n  \"eval_id\": \"eval_abc123\"\n}\n"
  /evals/{eval_id}/runs:
    get:
      operationId: getEvalRuns
      tags:
      - Evals
      summary: 'Get a list of runs for an evaluation.

        '
      parameters:
      - name: eval_id
        in: path
        required: true
        schema:
          type: string
        description: The ID of the evaluation to retrieve runs for.
      - name: after
        in: query
        description: Identifier for the last run from the previous pagination request.
        required: false
        schema:
          type: string
      - name: limit
        in: query
        description: Number of runs to retrieve.
        required: false
        schema:
          type: integer
          default: 20
      - name: order
        in: query
        description: Sort order for runs by timestamp. Use `asc` for ascending order or `desc` for descending order. Defaults to `asc`.
        required: false
        schema:
          type: string
          enum:
          - asc
          - desc
          default: asc
      - name: status
        in: query
        description: Filter runs by status. One of `queued` | `in_progress` | `failed` | `completed` | `canceled`.
        required: false
        schema:
          type: string
          enum:
          - queued
          - in_progress
          - completed
          - canceled
          - failed
      responses:
        '200':
          description: A list of runs for the evaluation
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EvalRunList'
      x-oaiMeta:
        name: Get eval runs
        group: evals
        path: get-runs
        examples:
          request:
            curl: "curl https://api.openai.com/v1/evals/egroup_67abd54d9b0081909a86353f6fb9317a/runs \\\n  -H \"Authorization: Bearer $OPENAI_API_KEY\" \\\n  -H \"Content-Type: application/json\"\n"
            python: "import os\nfrom openai import OpenAI\n\nclient = OpenAI(\n    api_key=os.environ.get(\"OPENAI_API_KEY\"),  # This is the default and can be omitted\n)\npage = client.evals.runs.list(\n    eval_id=\"eval_id\",\n)\npage = page.data[0]\nprint(page.id)"
            javascript: 'import OpenAI from "openai";


              const openai = new OpenAI();


              const runs = await openai.evals.runs.list("egroup_67abd54d9b0081909a86353f6fb9317a");

              console.log(runs);

              '
            node.js: "import OpenAI from 'openai';\n\nconst client = new OpenAI({\n  apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted\n});\n\n// Automatically fetches more pages as needed.\nfor await (const runListResponse of client.evals.runs.list('eval_id')) {\n  console.log(runListResponse.id);\n}"
            java: "package com.openai.example;\n\nimport com.openai.client.OpenAIClient;\nimport com.openai.client.okhttp.OpenAIOkHttpClient;\nimport com.openai.models.evals.runs.RunListPage;\nimport com.openai.models.evals.runs.RunListParams;\n\npublic final class Main {\n    private Main() {}\n\n    public static void main(String[] args) {\n        OpenAIClient client = OpenAIOkHttpClient.fromEnv();\n\n        RunListPage page = client.evals().runs().list(\"eval_id\");\n    }\n}"
            ruby: 'require "openai"


              openai = OpenAI::Client.new(api_key: "My API Key")


              page = openai.evals.runs.list("eval_id")


              puts(page)'
          response: "{\n  \"object\": \"list\",\n  \"data\": [\n    {\n      \"object\": \"eval.run\",\n      \"id\": \"evalrun_67e0c7d31560819090d60c0780591042\",\n      \"eval_id\": \"eval_67e0c726d560819083f19a957c4c640b\",\n      \"report_url\": \"https://platform.openai.com/evaluations/eval_67e0c726d560819083f19a957c4c640b\",\n      \"status\": \"completed\",\n      \"model\": \"o3-mini\",\n      \"name\": \"bulk_with_negative_examples_o3-mini\",\n      \"created_at\": 1742784467,\n      \"result_counts\": {\n        \"total\": 1,\n        \"errored\": 0,\n        \"failed\": 0,\n        \"passed\": 1\n      },\n      \"per_model_usage\": [\n        {\n          \"model_name\": \"o3-mini\",\n          \"invocation_count\": 1,\n          \"prompt_tokens\": 563,\n          \"completion_tokens\": 874,\n          \"total_tokens\": 1437,\n          \"cached_tokens\": 0\n        }\n      ],\n      \"per_testing_criteria_results\": [\n        {\n          \"testing_criteria\": \"Push Notification Summary Grader-1808cd0b-eeec-4e0b-a519-337e79f4f5d1\",\n          \"passed\": 1,\n          \"failed\": 0\n        }\n      ],\n      \"data_source\": {\n        \"type\": \"completions\",\n        \"source\": {\n          \"type\": \"file_content\",\n          \"content\": [\n            {\n              \"item\": {\n                \"notifications\": \"\\n- New message from Sarah: \\\"Can you call me later?\\\"\\n- Your package has been delivered!\\n- Flash sale: 20% off electronics for the next 2 hours!\\n\"\n              }\n            }\n          ]\n        },\n        \"input_messages\": {\n          \"type\": \"template\",\n          \"template\": [\n            {\n              \"type\": \"message\",\n              \"role\": \"developer\",\n              \"content\": {\n                \"type\": \"input_text\",\n                \"text\": \"\\n\\n\\n\\nYou are a helpful assistant that takes in an array of push notifications and returns a collapsed summary of them.\\nThe push notification will be provided as follows:\\n<push_notifications>\\n...notificationlist...\\n</push_notifications>\\n\\nYou should return just the summary and nothing else.\\n\\n\\nYou should return a summary that is concise and snappy.\\n\\n\\nHere is an example of a good summary:\\n<push_notifications>\\n- Traffic alert: Accident reported on Main Street.- Package out for delivery: Expected by 5 PM.- New friend suggestion: Connect with Emma.\\n</push_notifications>\\n<summary>\\nTraffic alert, package expected by 5pm, suggestion for new friend (Emily).\\n</summary>\\n\\n\\nHere is an example of a bad summary:\\n<push_notifications>\\n- Traffic alert: Accident reported on Main Street.- Package out for delivery: Expected by 5 PM.- New friend suggestion: Connect with Emma.\\n</push_notifications>\\n<summary>\\nTraffic alert reported on main street. You have a package that will arrive by 5pm, Emily is a new friend suggested for you.\\n</summary>\\n\"\n              }\n            },\n            {\n              \"type\": \"message\",\n              \"role\": \"user\",\n              \"content\": {\n                \"type\": \"input_text\",\n                \"text\": \"<push_notifications>{{item.notifications}}</push_notifications>\"\n              }\n            }\n          ]\n        },\n        \"model\": \"o3-mini\",\n        \"sampling_params\": null\n      },\n      \"error\": null,\n      \"metadata\": {}\n    }\n  ],\n  \"first_id\": \"evalrun_67e0c7d31560819090d60c0780591042\",\n  \"last_id\": \"evalrun_67e0c7d31560819090d60c0780591042\",\n  \"has_more\": true\n}\n"
    post:
      operationId: createEvalRun
      tags:
      - Evals
      summary: 'Kicks off a new run for a given evaluation, specifying the data source, and what model configuration to use to test. The datasource will be validated against the schema specified in the config of the evaluation.

        '
      parameters:
      - in: path
        name: eval_id
        required: true
        schema:
          type: string
        description: The ID of the evaluation to create a run for.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateEvalRunRequest'
      responses:
        '201':
          description: Successfully created a run for the evaluation
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EvalRun'
        '400':
          description: Bad request (for example, missing eval object)
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
      x-oaiMeta:
        name: Create eval run
        group: evals
        examples:
          request:
            curl: "curl https://api.openai.com/v1/evals/eval_67e579652b548190aaa83ada4b125f47/runs \\\n  -X POST \\\n  -H \"Authorization: Bearer $OPENAI_API_KEY\" \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\"name\":\"gpt-4o-mini\",\"data_source\":{\"type\":\"completions\",\"input_messages\":{\"type\":\"template\",\"template\":[{\"role\":\"developer\",\"content\":\"Categorize a given news headline into one of the following topics: Technology, Markets, World, Business, or Sports.\\n\\n# Steps\\n\\n1. Analyze the content of the news headline to understand its primary focus.\\n2. Extract the subject matter, identifying any key indi

# --- truncated at 32 KB (209 KB total) ---
# Full source: https://raw.githubusercontent.com/api-evangelist/openai/refs/heads/main/openapi/openai-evals-api-openapi.yml
OpenAI Evals API

Documentation

Specifications

Schemas & Data

Other Resources

OpenAPI Specification