Amazon Neptune · JSON Structure
Amazon Neptune Loader Job Structure
Represents a Neptune bulk loader job configuration and its status. The loader ingests data from Amazon S3 into a Neptune DB instance, supporting CSV for property graphs and N-Triples, N-Quads, RDF/XML, and Turtle for RDF data.
Type: object
Properties: 12
Required: 4
DatabaseGraph DatabaseGremlinNeptuneProperty GraphRDFSPARQL
Amazon Neptune Loader Job is a JSON Structure definition published by Amazon Neptune, describing 12 properties, of which 4 are required. It conforms to the https://json-structure.org/meta/core/v0/# meta-schema.
Properties
source
format
iamRoleArn
region
mode
failOnError
parallelism
parserConfiguration
updateSingleCardinalityProperties
queueRequest
dependencies
userProvidedEdgeIds
Meta-schema: https://json-structure.org/meta/core/v0/#
JSON Structure
{
"$schema": "https://json-structure.org/meta/core/v0/#",
"$id": "https://github.com/api-search/amazon-neptune/json-structure/amazon-neptune-loader-job-structure.json",
"name": "Amazon Neptune Loader Job",
"description": "Represents a Neptune bulk loader job configuration and its status. The loader ingests data from Amazon S3 into a Neptune DB instance, supporting CSV for property graphs and N-Triples, N-Quads, RDF/XML, and Turtle for RDF data.",
"type": "object",
"required": [
"source",
"format",
"iamRoleArn",
"region"
],
"properties": {
"source": {
"type": "string",
"description": "Amazon S3 URI identifying the data file(s), folder, or multiple folders to load. Supported URI formats: s3://bucket/key, https://s3.amazonaws.com/bucket/key.",
"examples": [
"s3://my-bucket/graph-data/",
"s3://my-bucket/data/vertices.csv"
]
},
"format": {
"type": "string",
"description": "The data format of the source files to be loaded.",
"enum": [
"csv",
"opencypher",
"ntriples",
"nquads",
"rdfxml",
"turtle"
]
},
"iamRoleArn": {
"type": "string",
"description": "The ARN of the IAM role that provides Neptune access to the S3 bucket. Can be a comma-separated list of role ARNs for cross-account access.",
"pattern": "^arn:aws[a-z-]*:iam::[0-9]+:role/"
},
"region": {
"type": "string",
"description": "The AWS Region of the S3 bucket containing the data to load.",
"examples": [
"us-east-1",
"eu-west-1"
]
},
"mode": {
"type": "string",
"description": "The load mode. NEW fails if data was previously loaded. RESUME continues a failed load from where it left off. AUTO resumes if possible, otherwise starts new.",
"enum": [
"NEW",
"RESUME",
"AUTO"
],
"default": "AUTO"
},
"failOnError": {
"type": "string",
"description": "Whether to stop the entire load job when an error is encountered.",
"enum": [
"TRUE",
"FALSE"
],
"default": "TRUE"
},
"parallelism": {
"type": "string",
"description": "The degree of parallelism for loading. LOW uses a single thread, MEDIUM uses num_vCPU/2, HIGH uses num_vCPU, OVERSUBSCRIBE uses all available resources.",
"enum": [
"LOW",
"MEDIUM",
"HIGH",
"OVERSUBSCRIBE"
],
"default": "HIGH"
},
"parserConfiguration": {
"type": "object",
"description": "Optional parser configuration settings for RDF data.",
"properties": {
"baseUri": {
"type": "string",
"description": "The base URI for resolving relative URIs in the data."
},
"namedGraphUri": {
"type": "string",
"description": "The default named graph URI for loaded triples."
},
"allowEmptyStrings": {
"type": "boolean",
"description": "Whether to allow empty string values for properties."
}
}
},
"updateSingleCardinalityProperties": {
"type": "string",
"description": "Whether to update existing single-cardinality vertex properties with new values. Not supported for openCypher format.",
"enum": [
"TRUE",
"FALSE"
],
"default": "FALSE"
},
"queueRequest": {
"type": "string",
"description": "Whether to queue the request if a load job is already running. Neptune queues up to 64 jobs in FIFO order.",
"enum": [
"TRUE",
"FALSE"
],
"default": "FALSE"
},
"dependencies": {
"type": "array",
"description": "An array of load job IDs that must complete successfully before this job runs.",
"items": {
"type": "string"
}
},
"userProvidedEdgeIds": {
"type": "string",
"description": "For openCypher format only. TRUE means edge files contain an :ID column. FALSE means Neptune auto-generates edge IDs.",
"enum": [
"TRUE",
"FALSE"
]
}
},
"definitions": {
"LoaderJobStatus": {
"type": "object",
"name": "LoaderJobStatus",
"description": "The status of a Neptune bulk loader job.",
"properties": {
"loadId": {
"type": "string",
"description": "The unique identifier for the load job."
},
"overallStatus": {
"type": "object",
"properties": {
"fullUri": {
"type": "string",
"description": "The S3 URI of the data source."
},
"runNumber": {
"type": "int32",
"description": "The run number for this load."
},
"retryNumber": {
"type": "int32",
"description": "The number of retries."
},
"status": {
"type": "string",
"description": "The current status of the load job.",
"enum": [
"LOAD_NOT_STARTED",
"LOAD_IN_PROGRESS",
"LOAD_COMPLETED",
"LOAD_CANCELLED_BY_USER",
"LOAD_CANCELLED_DUE_TO_ERRORS",
"LOAD_FAILED",
"LOAD_UNEXPECTED_ERROR",
"LOAD_DATA_DEADLOCK",
"LOAD_DATA_FAILED_DUE_TO_FEED_MODIFIED_OR_DELETED",
"LOAD_S3_READ_ERROR",
"LOAD_S3_ACCESS_DENIED_ERROR",
"LOAD_COMMITTED_W_WRITE_CONFLICTS"
]
},
"totalTimeSpent": {
"type": "int32",
"description": "Total time spent on the load in seconds."
},
"startTime": {
"type": "int32",
"description": "The start time as a Unix timestamp."
},
"totalRecords": {
"type": "int32",
"description": "Total number of records processed."
},
"totalDuplicates": {
"type": "int32",
"description": "Total number of duplicate records encountered."
},
"parsingErrors": {
"type": "int32",
"description": "Total number of parsing errors."
},
"datatypeMismatchErrors": {
"type": "int32",
"description": "Total number of datatype mismatch errors."
},
"insertErrors": {
"type": "int32",
"description": "Total number of insert errors."
}
}
}
}
}
}
}