openapi: 3.1.0
info:
title: ScrapingBee API
description: |
ScrapingBee is a France-based web scraping API that handles headless browsers,
proxy rotation, anti-bot defenses, and CAPTCHA solving so developers can
extract data from any website with a single API call. This OpenAPI describes
the core HTML scraping endpoint and the dedicated Google Search SERP API.
Source: https://www.scrapingbee.com/documentation/
version: "1.0"
contact:
name: ScrapingBee Support
url: https://help.scrapingbee.com
license:
name: ScrapingBee Terms of Service
url: https://www.scrapingbee.com/terms-and-conditions/
servers:
- url: https://app.scrapingbee.com/api/v1
description: ScrapingBee API
security:
- ApiKeyAuth: []
tags:
- name: HTML API
description: Headless-browser scraping with proxy rotation, screenshots, and AI extraction.
- name: Google Search API
description: Structured Google SERP results (web, news, maps, images).
paths:
/:
get:
summary: Scrape a URL and Return HTML, Screenshot, or JSON
description: |
Fetch a target URL through ScrapingBee's headless browser and proxy network.
Returns rendered HTML by default, or a screenshot/JSON wrapper/markdown/text
depending on the parameters supplied.
operationId: scrape
tags: [HTML API]
parameters:
- $ref: "#/components/parameters/ApiKey"
- name: url
in: query
required: true
description: URL to scrape (must be URL-encoded, include scheme).
schema: { type: string, format: uri }
- name: render_js
in: query
description: Execute JavaScript via headless browser. Defaults to true (5 credits).
schema: { type: boolean, default: true }
- name: js_scenario
in: query
description: JSON-encoded scenario describing browser interactions (clicks, fills, scrolls, waits).
schema: { type: string }
- name: wait
in: query
description: Additional rendering delay in milliseconds.
schema: { type: integer }
- name: wait_for
in: query
description: CSS or XPath selector to wait for before returning.
schema: { type: string }
- name: wait_browser
in: query
description: Browser state condition to wait for.
schema:
type: string
enum: [domcontentloaded, load, networkidle0, networkidle2]
- name: premium_proxy
in: query
description: Use residential premium proxy pool (10 credits without JS, 25 with JS).
schema: { type: boolean }
- name: stealth_proxy
in: query
description: Use the stealth proxy pool for high-protection sites (75 credits).
schema: { type: boolean }
- name: country_code
in: query
description: ISO 3166-1 country code for proxy geolocation.
schema: { type: string }
- name: block_ads
in: query
description: Block advertisements during rendering.
schema: { type: boolean }
- name: block_resources
in: query
description: Block heavy resources (images, fonts, css) to speed up rendering.
schema: { type: boolean }
- name: cookies
in: query
description: Semicolon-separated cookie list to send with the request.
schema: { type: string }
- name: forward_headers
in: query
description: Forward Spb-* prefixed headers to the target site.
schema: { type: boolean }
- name: screenshot
in: query
description: Return a screenshot of the page instead of HTML.
schema: { type: boolean }
- name: screenshot_full_page
in: query
description: Capture a full-page screenshot, not just the viewport.
schema: { type: boolean }
- name: screenshot_selector
in: query
description: CSS selector to limit the screenshot region.
schema: { type: string }
- name: json_response
in: query
description: Wrap response in JSON format with metadata.
schema: { type: boolean }
- name: return_page_text
in: query
description: Return plain text content instead of HTML.
schema: { type: boolean }
- name: return_page_markdown
in: query
description: Return content as Markdown.
schema: { type: boolean }
- name: return_page_source
in: query
description: Return the pre-JavaScript HTML source.
schema: { type: boolean }
- name: extract_rules
in: query
description: JSON-encoded extraction rules (CSS or XPath selectors).
schema: { type: string }
- name: ai_query
in: query
description: Natural-language instruction for AI-powered data extraction.
schema: { type: string }
- name: ai_extract_rules
in: query
description: JSON-encoded AI extraction rules mapping field names to natural-language descriptions.
schema: { type: string }
responses:
"200":
description: HTML, screenshot bytes, JSON wrapper, text, or markdown depending on params.
content:
text/html:
schema: { type: string }
text/plain:
schema: { type: string }
image/png:
schema: { type: string, format: binary }
application/json:
schema:
$ref: "#/components/schemas/JsonResponse"
"400":
description: Invalid parameters or unsupported URL.
"401":
description: Missing or invalid API key.
"402":
description: Out of API credits.
"404":
description: Target page not found.
"429":
description: Concurrency limit exceeded.
"500":
description: Render error or upstream failure.
post:
summary: Scrape a URL with a POST Body
description: |
Same as the GET variant but accepts a request body that is forwarded to
the target URL as either form-encoded data or JSON.
operationId: scrapePost
tags: [HTML API]
parameters:
- $ref: "#/components/parameters/ApiKey"
- name: url
in: query
required: true
schema: { type: string, format: uri }
- name: render_js
in: query
schema: { type: boolean }
- name: premium_proxy
in: query
schema: { type: boolean }
- name: country_code
in: query
schema: { type: string }
requestBody:
content:
application/x-www-form-urlencoded:
schema: { type: object, additionalProperties: true }
application/json:
schema: { type: object, additionalProperties: true }
responses:
"200":
description: Response forwarded from the target site.
/google:
get:
summary: Run a Google Search and Return Structured SERP JSON
description: |
Execute a Google query via ScrapingBee's dedicated Search API and receive
structured JSON results: organic listings, knowledge graph, featured
snippets, news, maps, image results, related searches, and ads.
operationId: googleSearch
tags: [Google Search API]
parameters:
- $ref: "#/components/parameters/ApiKey"
- name: search
in: query
required: true
description: Query string as typed into the Google search bar.
schema: { type: string }
- name: search_type
in: query
description: Which Google vertical to query.
schema:
type: string
enum: [classic, news, maps, images]
default: classic
- name: country_code
in: query
description: ISO 3166-1 alpha-2 country code for result localization.
schema: { type: string }
- name: language
in: query
description: Result language (hl parameter equivalent).
schema: { type: string }
- name: nb_results
in: query
description: Number of results to return.
schema: { type: integer, default: 10 }
- name: page
in: query
description: Result page number.
schema: { type: integer, default: 1 }
- name: device
in: query
description: Render Google as desktop or mobile.
schema:
type: string
enum: [desktop, mobile]
default: desktop
- name: extra_params
in: query
description: Additional Google query parameters (gl, hl, num, start, etc.).
schema: { type: string }
- name: light_request
in: query
description: Use the cheaper, faster light request mode.
schema: { type: boolean }
- name: full_html
in: query
description: Return the full SERP HTML alongside the structured payload.
schema: { type: boolean }
responses:
"200":
description: Structured Google SERP response.
content:
application/json:
schema:
$ref: "#/components/schemas/GoogleSerpResponse"
"400":
description: Invalid search parameters.
"401":
description: Missing or invalid API key.
"402":
description: Out of API credits.
"429":
description: Concurrency limit exceeded.
components:
securitySchemes:
ApiKeyAuth:
type: apiKey
in: query
name: api_key
description: ScrapingBee account key, passed as a query parameter.
parameters:
ApiKey:
name: api_key
in: query
required: true
description: ScrapingBee account key.
schema: { type: string }
schemas:
JsonResponse:
type: object
description: Wrapper response when json_response=true is used.
properties:
body:
type: string
description: Rendered page body (HTML, text, or markdown depending on params).
headers:
type: object
additionalProperties:
type: string
description: Response headers from the target site.
cookies:
type: array
items: { type: string }
type:
type: string
description: Content type returned (e.g. text/html, image/png).
"metadata":
type: object
additionalProperties: true
description: ScrapingBee metadata (resolved URL, credits used, etc.).
GoogleSerpResponse:
type: object
description: Structured Google SERP payload.
properties:
meta_data:
type: object
additionalProperties: true
description: Search metadata (query, location, total results, request URL).
organic_results:
type: array
items:
$ref: "#/components/schemas/GoogleOrganicResult"
knowledge_graph:
type: object
additionalProperties: true
featured_snippet:
type: object
additionalProperties: true
related_queries:
type: array
items: { type: string }
people_also_ask:
type: array
items:
type: object
additionalProperties: true
news_results:
type: array
items:
type: object
additionalProperties: true
local_results:
type: array
items:
type: object
additionalProperties: true
image_results:
type: array
items:
type: object
additionalProperties: true
ads:
type: array
items:
type: object
additionalProperties: true
GoogleOrganicResult:
type: object
properties:
position:
type: integer
title:
type: string
url:
type: string
format: uri
displayed_url:
type: string
description:
type: string
sitelinks:
type: array
items:
type: object
additionalProperties: true