ScrapingBee HTML API

The core HTML API extracts rendered HTML, screenshots, or structured JSON from any website using headless Chrome with automatic proxy rotation. Supports JavaScript rendering (5 credits per request), js_scenario for automated clicks/scroll/form-fill interactions, wait/wait_for/wait_browser timing controls, residential premium_proxy (10-25 credits), stealth_proxy pool (75 credits), country_code geotargeting, custom cookies and headers, and AI-powered extraction via the ai_query parameter or CSS/XPath extract_rules.

OpenAPI Specification

scrapingbee-openapi.yml Raw ↑
openapi: 3.1.0
info:
  title: ScrapingBee API
  description: |
    ScrapingBee is a France-based web scraping API that handles headless browsers,
    proxy rotation, anti-bot defenses, and CAPTCHA solving so developers can
    extract data from any website with a single API call. This OpenAPI describes
    the core HTML scraping endpoint and the dedicated Google Search SERP API.
    Source: https://www.scrapingbee.com/documentation/
  version: "1.0"
  contact:
    name: ScrapingBee Support
    url: https://help.scrapingbee.com
  license:
    name: ScrapingBee Terms of Service
    url: https://www.scrapingbee.com/terms-and-conditions/
servers:
  - url: https://app.scrapingbee.com/api/v1
    description: ScrapingBee API
security:
  - ApiKeyAuth: []
tags:
  - name: HTML API
    description: Headless-browser scraping with proxy rotation, screenshots, and AI extraction.
  - name: Google Search API
    description: Structured Google SERP results (web, news, maps, images).
paths:
  /:
    get:
      summary: Scrape a URL and Return HTML, Screenshot, or JSON
      description: |
        Fetch a target URL through ScrapingBee's headless browser and proxy network.
        Returns rendered HTML by default, or a screenshot/JSON wrapper/markdown/text
        depending on the parameters supplied.
      operationId: scrape
      tags: [HTML API]
      parameters:
        - $ref: "#/components/parameters/ApiKey"
        - name: url
          in: query
          required: true
          description: URL to scrape (must be URL-encoded, include scheme).
          schema: { type: string, format: uri }
        - name: render_js
          in: query
          description: Execute JavaScript via headless browser. Defaults to true (5 credits).
          schema: { type: boolean, default: true }
        - name: js_scenario
          in: query
          description: JSON-encoded scenario describing browser interactions (clicks, fills, scrolls, waits).
          schema: { type: string }
        - name: wait
          in: query
          description: Additional rendering delay in milliseconds.
          schema: { type: integer }
        - name: wait_for
          in: query
          description: CSS or XPath selector to wait for before returning.
          schema: { type: string }
        - name: wait_browser
          in: query
          description: Browser state condition to wait for.
          schema:
            type: string
            enum: [domcontentloaded, load, networkidle0, networkidle2]
        - name: premium_proxy
          in: query
          description: Use residential premium proxy pool (10 credits without JS, 25 with JS).
          schema: { type: boolean }
        - name: stealth_proxy
          in: query
          description: Use the stealth proxy pool for high-protection sites (75 credits).
          schema: { type: boolean }
        - name: country_code
          in: query
          description: ISO 3166-1 country code for proxy geolocation.
          schema: { type: string }
        - name: block_ads
          in: query
          description: Block advertisements during rendering.
          schema: { type: boolean }
        - name: block_resources
          in: query
          description: Block heavy resources (images, fonts, css) to speed up rendering.
          schema: { type: boolean }
        - name: cookies
          in: query
          description: Semicolon-separated cookie list to send with the request.
          schema: { type: string }
        - name: forward_headers
          in: query
          description: Forward Spb-* prefixed headers to the target site.
          schema: { type: boolean }
        - name: screenshot
          in: query
          description: Return a screenshot of the page instead of HTML.
          schema: { type: boolean }
        - name: screenshot_full_page
          in: query
          description: Capture a full-page screenshot, not just the viewport.
          schema: { type: boolean }
        - name: screenshot_selector
          in: query
          description: CSS selector to limit the screenshot region.
          schema: { type: string }
        - name: json_response
          in: query
          description: Wrap response in JSON format with metadata.
          schema: { type: boolean }
        - name: return_page_text
          in: query
          description: Return plain text content instead of HTML.
          schema: { type: boolean }
        - name: return_page_markdown
          in: query
          description: Return content as Markdown.
          schema: { type: boolean }
        - name: return_page_source
          in: query
          description: Return the pre-JavaScript HTML source.
          schema: { type: boolean }
        - name: extract_rules
          in: query
          description: JSON-encoded extraction rules (CSS or XPath selectors).
          schema: { type: string }
        - name: ai_query
          in: query
          description: Natural-language instruction for AI-powered data extraction.
          schema: { type: string }
        - name: ai_extract_rules
          in: query
          description: JSON-encoded AI extraction rules mapping field names to natural-language descriptions.
          schema: { type: string }
      responses:
        "200":
          description: HTML, screenshot bytes, JSON wrapper, text, or markdown depending on params.
          content:
            text/html:
              schema: { type: string }
            text/plain:
              schema: { type: string }
            image/png:
              schema: { type: string, format: binary }
            application/json:
              schema:
                $ref: "#/components/schemas/JsonResponse"
        "400":
          description: Invalid parameters or unsupported URL.
        "401":
          description: Missing or invalid API key.
        "402":
          description: Out of API credits.
        "404":
          description: Target page not found.
        "429":
          description: Concurrency limit exceeded.
        "500":
          description: Render error or upstream failure.
    post:
      summary: Scrape a URL with a POST Body
      description: |
        Same as the GET variant but accepts a request body that is forwarded to
        the target URL as either form-encoded data or JSON.
      operationId: scrapePost
      tags: [HTML API]
      parameters:
        - $ref: "#/components/parameters/ApiKey"
        - name: url
          in: query
          required: true
          schema: { type: string, format: uri }
        - name: render_js
          in: query
          schema: { type: boolean }
        - name: premium_proxy
          in: query
          schema: { type: boolean }
        - name: country_code
          in: query
          schema: { type: string }
      requestBody:
        content:
          application/x-www-form-urlencoded:
            schema: { type: object, additionalProperties: true }
          application/json:
            schema: { type: object, additionalProperties: true }
      responses:
        "200":
          description: Response forwarded from the target site.
  /google:
    get:
      summary: Run a Google Search and Return Structured SERP JSON
      description: |
        Execute a Google query via ScrapingBee's dedicated Search API and receive
        structured JSON results: organic listings, knowledge graph, featured
        snippets, news, maps, image results, related searches, and ads.
      operationId: googleSearch
      tags: [Google Search API]
      parameters:
        - $ref: "#/components/parameters/ApiKey"
        - name: search
          in: query
          required: true
          description: Query string as typed into the Google search bar.
          schema: { type: string }
        - name: search_type
          in: query
          description: Which Google vertical to query.
          schema:
            type: string
            enum: [classic, news, maps, images]
            default: classic
        - name: country_code
          in: query
          description: ISO 3166-1 alpha-2 country code for result localization.
          schema: { type: string }
        - name: language
          in: query
          description: Result language (hl parameter equivalent).
          schema: { type: string }
        - name: nb_results
          in: query
          description: Number of results to return.
          schema: { type: integer, default: 10 }
        - name: page
          in: query
          description: Result page number.
          schema: { type: integer, default: 1 }
        - name: device
          in: query
          description: Render Google as desktop or mobile.
          schema:
            type: string
            enum: [desktop, mobile]
            default: desktop
        - name: extra_params
          in: query
          description: Additional Google query parameters (gl, hl, num, start, etc.).
          schema: { type: string }
        - name: light_request
          in: query
          description: Use the cheaper, faster light request mode.
          schema: { type: boolean }
        - name: full_html
          in: query
          description: Return the full SERP HTML alongside the structured payload.
          schema: { type: boolean }
      responses:
        "200":
          description: Structured Google SERP response.
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/GoogleSerpResponse"
        "400":
          description: Invalid search parameters.
        "401":
          description: Missing or invalid API key.
        "402":
          description: Out of API credits.
        "429":
          description: Concurrency limit exceeded.
components:
  securitySchemes:
    ApiKeyAuth:
      type: apiKey
      in: query
      name: api_key
      description: ScrapingBee account key, passed as a query parameter.
  parameters:
    ApiKey:
      name: api_key
      in: query
      required: true
      description: ScrapingBee account key.
      schema: { type: string }
  schemas:
    JsonResponse:
      type: object
      description: Wrapper response when json_response=true is used.
      properties:
        body:
          type: string
          description: Rendered page body (HTML, text, or markdown depending on params).
        headers:
          type: object
          additionalProperties:
            type: string
          description: Response headers from the target site.
        cookies:
          type: array
          items: { type: string }
        type:
          type: string
          description: Content type returned (e.g. text/html, image/png).
        "metadata":
          type: object
          additionalProperties: true
          description: ScrapingBee metadata (resolved URL, credits used, etc.).
    GoogleSerpResponse:
      type: object
      description: Structured Google SERP payload.
      properties:
        meta_data:
          type: object
          additionalProperties: true
          description: Search metadata (query, location, total results, request URL).
        organic_results:
          type: array
          items:
            $ref: "#/components/schemas/GoogleOrganicResult"
        knowledge_graph:
          type: object
          additionalProperties: true
        featured_snippet:
          type: object
          additionalProperties: true
        related_queries:
          type: array
          items: { type: string }
        people_also_ask:
          type: array
          items:
            type: object
            additionalProperties: true
        news_results:
          type: array
          items:
            type: object
            additionalProperties: true
        local_results:
          type: array
          items:
            type: object
            additionalProperties: true
        image_results:
          type: array
          items:
            type: object
            additionalProperties: true
        ads:
          type: array
          items:
            type: object
            additionalProperties: true
    GoogleOrganicResult:
      type: object
      properties:
        position:
          type: integer
        title:
          type: string
        url:
          type: string
          format: uri
        displayed_url:
          type: string
        description:
          type: string
        sitelinks:
          type: array
          items:
            type: object
            additionalProperties: true