Bright Data Web Archive API

Search and deliver petabyte-scale historical web snapshots across 250+ domains. Submit a query via `POST /webarchive/search`, monitor with `GET /webarchive/search/{search_id}`, list all searches via `GET /webarchive/searches`, and deliver matching corpora to S3/Azure/GCS via `POST /deliver-to-cloud`.

OpenAPI Specification

bright-data-web-archive-api-openapi.yml Raw ↑
openapi: 3.1.0
info:
  title: Bright Data Web Archive API
  description: |
    The Web Archive API exposes Bright Data's petabyte-scale historical web index across 250+ domains.
    Submit a search via `POST /webarchive/search`, monitor with `GET /webarchive/search/{search_id}`,
    list all searches via `GET /webarchive/searches`, and deliver matching corpora to S3/Azure/GCS
    via `POST /webarchive/deliver-to-cloud`.
  version: '1.0'
  contact:
    name: Bright Data
    url: https://docs.brightdata.com
servers:
  - url: https://api.brightdata.com
    description: Production
security:
  - BearerAuth: []
tags:
  - name: Archive
paths:
  /webarchive/search:
    post:
      summary: Submit a Web Archive Search
      operationId: submitArchiveSearch
      tags: [Archive]
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required: [domain]
              properties:
                domain: { type: string }
                query: { type: string }
                from_date: { type: string, format: date }
                to_date: { type: string, format: date }
                limit: { type: integer }
      responses:
        "200":
          description: Search submitted.
          content:
            application/json:
              schema:
                type: object
                properties:
                  search_id: { type: string }
  /webarchive/search/{search_id}:
    parameters:
      - { name: search_id, in: path, required: true, schema: { type: string } }
    get:
      summary: Get Web Archive Search
      operationId: getArchiveSearch
      tags: [Archive]
      responses:
        "200":
          description: Search status and results pointer.
          content:
            application/json:
              schema:
                type: object
                properties:
                  search_id: { type: string }
                  status: { type: string, enum: [pending, running, ready, failed] }
                  records: { type: integer }
                  download_url: { type: string, format: uri }
  /webarchive/searches:
    get:
      summary: List Web Archive Searches
      operationId: listArchiveSearches
      tags: [Archive]
      responses:
        "200":
          description: List of searches.
          content:
            application/json:
              schema:
                type: array
                items:
                  type: object
                  properties:
                    search_id: { type: string }
                    domain: { type: string }
                    status: { type: string }
                    created: { type: string, format: date-time }
  /webarchive/deliver-to-cloud:
    post:
      summary: Deliver Archive Search to Cloud Storage
      operationId: deliverArchiveToCloud
      tags: [Archive]
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required: [search_id, destination]
              properties:
                search_id: { type: string }
                destination:
                  type: object
                  properties:
                    type: { type: string, enum: [s3, azure, gcs] }
                    bucket: { type: string }
                    credentials: { type: object, additionalProperties: true }
                format: { type: string, enum: [json, ndjson, parquet] }
      responses:
        "200":
          description: Delivery scheduled.
          content:
            application/json: { schema: { type: object } }
components:
  securitySchemes:
    BearerAuth:
      type: http
      scheme: bearer