Amazon Glue · JSON Structure

Glue Create Crawler Request Structure

CreateCrawlerRequest schema from Amazon Glue API

Type: object Properties: 15 Required: 3
AnalyticsData CatalogData IntegrationData PipelineETLServerless

CreateCrawlerRequest is a JSON Structure definition published by Amazon Glue, describing 15 properties, of which 3 are required. It conforms to the https://json-structure.org/meta/core/v0/# meta-schema.

Properties

Name Role DatabaseName Description Targets Schedule Classifiers TablePrefix SchemaChangePolicy RecrawlPolicy LineageConfiguration LakeFormationConfiguration Configuration CrawlerSecurityConfiguration Tags

Meta-schema: https://json-structure.org/meta/core/v0/#

JSON Structure

Raw ↑
{
  "$schema": "https://json-structure.org/meta/core/v0/#",
  "$id": "https://raw.githubusercontent.com/api-evangelist/amazon-glue/refs/heads/main/json-structure/glue-create-crawler-request-structure.json",
  "name": "CreateCrawlerRequest",
  "description": "CreateCrawlerRequest schema from Amazon Glue API",
  "type": "object",
  "properties": {
    "Name": {
      "allOf": [
        {
          "$ref": "#/components/schemas/NameString"
        },
        {
          "description": "Name of the new crawler."
        }
      ]
    },
    "Role": {
      "allOf": [
        {
          "$ref": "#/components/schemas/Role"
        },
        {
          "description": "The IAM role or Amazon Resource Name (ARN) of an IAM role used by the new crawler to access customer resources."
        }
      ]
    },
    "DatabaseName": {
      "allOf": [
        {
          "$ref": "#/components/schemas/DatabaseName"
        },
        {
          "description": "The Glue database where results are written, such as: <code>arn:aws:daylight:us-east-1::database/sometable/*</code>."
        }
      ]
    },
    "Description": {
      "allOf": [
        {
          "$ref": "#/components/schemas/DescriptionString"
        },
        {
          "description": "A description of the new crawler."
        }
      ]
    },
    "Targets": {
      "allOf": [
        {
          "$ref": "#/components/schemas/CrawlerTargets"
        },
        {
          "description": "A list of collection of targets to crawl."
        }
      ]
    },
    "Schedule": {
      "allOf": [
        {
          "$ref": "#/components/schemas/CronExpression"
        },
        {
          "description": "A <code>cron</code> expression used to specify the schedule (see <a href=\"https://docs.aws.amazon.com/glue/latest/dg/monitor-data-warehouse-schedule.html\">Time-Based Schedules for Jobs and Crawlers</a>. For example, to run something every day at 12:15 UTC, you would specify: <code>cron(15 12 * * ? *)</code>."
        }
      ]
    },
    "Classifiers": {
      "allOf": [
        {
          "$ref": "#/components/schemas/ClassifierNameList"
        },
        {
          "description": "A list of custom classifiers that the user has registered. By default, all built-in classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification."
        }
      ]
    },
    "TablePrefix": {
      "allOf": [
        {
          "$ref": "#/components/schemas/TablePrefix"
        },
        {
          "description": "The table prefix used for catalog tables that are created."
        }
      ]
    },
    "SchemaChangePolicy": {
      "allOf": [
        {
          "$ref": "#/components/schemas/SchemaChangePolicy"
        },
        {
          "description": "The policy for the crawler's update and deletion behavior."
        }
      ]
    },
    "RecrawlPolicy": {
      "allOf": [
        {
          "$ref": "#/components/schemas/RecrawlPolicy"
        },
        {
          "description": "A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run."
        }
      ]
    },
    "LineageConfiguration": {
      "allOf": [
        {
          "$ref": "#/components/schemas/LineageConfiguration"
        },
        {
          "description": "Specifies data lineage configuration settings for the crawler."
        }
      ]
    },
    "LakeFormationConfiguration": {
      "allOf": [
        {
          "$ref": "#/components/schemas/LakeFormationConfiguration"
        },
        {
          "description": "Specifies Lake Formation configuration settings for the crawler."
        }
      ]
    },
    "Configuration": {
      "allOf": [
        {
          "$ref": "#/components/schemas/CrawlerConfiguration"
        },
        {
          "description": "Crawler configuration information. This versioned JSON string allows users to specify aspects of a crawler's behavior. For more information, see <a href=\"https://docs.aws.amazon.com/glue/latest/dg/crawler-configuration.html\">Setting crawler configuration options</a>."
        }
      ]
    },
    "CrawlerSecurityConfiguration": {
      "allOf": [
        {
          "$ref": "#/components/schemas/CrawlerSecurityConfiguration"
        },
        {
          "description": "The name of the <code>SecurityConfiguration</code> structure to be used by this crawler."
        }
      ]
    },
    "Tags": {
      "allOf": [
        {
          "$ref": "#/components/schemas/TagsMap"
        },
        {
          "description": "The tags to use with this crawler request. You may use tags to limit access to the crawler. For more information about tags in Glue, see <a href=\"https://docs.aws.amazon.com/glue/latest/dg/monitor-tags.html\">Amazon Web Services Tags in Glue</a> in the developer guide."
        }
      ]
    }
  },
  "required": [
    "Name",
    "Role",
    "Targets"
  ]
}