Amazon Neptune · JSON Structure

Data Start Ml Data Processing Job Input Structure

StartMLDataProcessingJobInput schema from Neptune

Type: object Properties: 16 Required: 2
DatabaseGraph DatabaseGremlinNeptuneProperty GraphRDFSPARQL

StartMLDataProcessingJobInput is a JSON Structure definition published by Amazon Neptune, describing 16 properties, of which 2 are required. It conforms to the https://json-structure.org/meta/core/v0/# meta-schema.

Properties

id inputDataS3Location processedDataS3Location previousDataProcessingJobId sagemakerIamRoleArn neptuneIamRoleArn processingInstanceType processingInstanceVolumeSizeInGB processingTimeOutInSeconds modelType configFileName subnets securityGroupIds volumeEncryptionKMSKey s3OutputEncryptionKMSKey enableInterContainerTrafficEncryption

Meta-schema: https://json-structure.org/meta/core/v0/#

JSON Structure

Raw ↑
{
  "$schema": "https://json-structure.org/meta/core/v0/#",
  "$id": "https://raw.githubusercontent.com/api-evangelist/amazon-neptune/refs/heads/main/json-structure/data-start-ml-data-processing-job-input-structure.json",
  "name": "StartMLDataProcessingJobInput",
  "description": "StartMLDataProcessingJobInput schema from Neptune",
  "type": "object",
  "properties": {
    "id": {
      "type": "string",
      "description": "Unique identifier for the job (auto-generated if omitted)."
    },
    "inputDataS3Location": {
      "type": "string",
      "description": "S3 URI for input data."
    },
    "processedDataS3Location": {
      "type": "string",
      "description": "S3 URI for output results."
    },
    "previousDataProcessingJobId": {
      "type": "string",
      "description": "Job ID of a previous job for incremental processing."
    },
    "sagemakerIamRoleArn": {
      "type": "string",
      "description": "IAM role ARN for SageMaker execution."
    },
    "neptuneIamRoleArn": {
      "type": "string",
      "description": "IAM role ARN for Neptune access."
    },
    "processingInstanceType": {
      "type": "string",
      "description": "ML instance type (default auto-selected ml.r5 type)."
    },
    "processingInstanceVolumeSizeInGB": {
      "type": "int32",
      "description": "Disk volume size in GB (default 0 = auto-selected)."
    },
    "processingTimeOutInSeconds": {
      "type": "int32",
      "description": "Timeout in seconds (default 86400)."
    },
    "modelType": {
      "type": "string",
      "description": "Model type selection.",
      "enum": [
        "heterogeneous",
        "kge"
      ]
    },
    "configFileName": {
      "type": "string",
      "description": "Data specification file name.",
      "default": "training-data-configuration.json"
    },
    "subnets": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "description": "Subnet IDs in Neptune VPC."
    },
    "securityGroupIds": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "description": "VPC security group IDs."
    },
    "volumeEncryptionKMSKey": {
      "type": "string"
    },
    "s3OutputEncryptionKMSKey": {
      "type": "string"
    },
    "enableInterContainerTrafficEncryption": {
      "type": "boolean",
      "default": true
    }
  },
  "required": [
    "inputDataS3Location",
    "processedDataS3Location"
  ]
}