Amazon Neptune · JSON Structure

Ml Create Data Processing Job Request Structure

CreateDataProcessingJobRequest schema from Neptune

Type: object Properties: 16 Required: 2
DatabaseGraph DatabaseGremlinNeptuneProperty GraphRDFSPARQL

CreateDataProcessingJobRequest is a JSON Structure definition published by Amazon Neptune, describing 16 properties, of which 2 are required. It conforms to the https://json-structure.org/meta/core/v0/# meta-schema.

Properties

id inputDataS3Location processedDataS3Location previousDataProcessingJobId sagemakerIamRoleArn neptuneIamRoleArn processingInstanceType processingInstanceVolumeSizeInGB processingTimeOutInSeconds modelType configFileName subnets securityGroupIds volumeEncryptionKMSKey s3OutputEncryptionKMSKey enableInterContainerTrafficEncryption

Meta-schema: https://json-structure.org/meta/core/v0/#

JSON Structure

Raw ↑
{
  "$schema": "https://json-structure.org/meta/core/v0/#",
  "$id": "https://raw.githubusercontent.com/api-evangelist/amazon-neptune/refs/heads/main/json-structure/ml-create-data-processing-job-request-structure.json",
  "name": "CreateDataProcessingJobRequest",
  "description": "CreateDataProcessingJobRequest schema from Neptune",
  "type": "object",
  "properties": {
    "id": {
      "type": "string",
      "description": "Unique identifier for the job (auto-generated UUID if omitted)."
    },
    "inputDataS3Location": {
      "type": "string",
      "description": "S3 URI for the input data."
    },
    "processedDataS3Location": {
      "type": "string",
      "description": "S3 URI where processed output is written."
    },
    "previousDataProcessingJobId": {
      "type": "string",
      "description": "Job ID of a previous job for incremental processing."
    },
    "sagemakerIamRoleArn": {
      "type": "string",
      "description": "IAM role ARN for SageMaker execution."
    },
    "neptuneIamRoleArn": {
      "type": "string",
      "description": "IAM role ARN for Neptune access."
    },
    "processingInstanceType": {
      "type": "string",
      "description": "ML instance type (default auto-selected ml.r5 type)."
    },
    "processingInstanceVolumeSizeInGB": {
      "type": "int32",
      "description": "Disk volume size in GB (default 0 means auto-selected)."
    },
    "processingTimeOutInSeconds": {
      "type": "int32",
      "description": "Timeout in seconds (default 86400, i.e., 1 day).",
      "default": 86400
    },
    "modelType": {
      "type": "string",
      "description": "The type of model to prepare data for.",
      "enum": [
        "heterogeneous",
        "kge"
      ]
    },
    "configFileName": {
      "type": "string",
      "description": "The data specification configuration file name.",
      "default": "training-data-configuration.json"
    },
    "subnets": {
      "type": "array",
      "description": "VPC subnet IDs for SageMaker processing.",
      "items": {
        "type": "string"
      }
    },
    "securityGroupIds": {
      "type": "array",
      "description": "VPC security group IDs.",
      "items": {
        "type": "string"
      }
    },
    "volumeEncryptionKMSKey": {
      "type": "string",
      "description": "KMS key for storage volume encryption."
    },
    "s3OutputEncryptionKMSKey": {
      "type": "string",
      "description": "KMS key for S3 output encryption."
    },
    "enableInterContainerTrafficEncryption": {
      "type": "boolean",
      "description": "Whether to enable inter-container traffic encryption.",
      "default": true
    }
  },
  "required": [
    "inputDataS3Location",
    "processedDataS3Location"
  ]
}