Amazon Neptune · JSON Structure

Amazon Neptune Loader Job Structure

Represents a Neptune bulk loader job configuration and its status. The loader ingests data from Amazon S3 into a Neptune DB instance, supporting CSV for property graphs and N-Triples, N-Quads, RDF/XML, and Turtle for RDF data.

Type: object Properties: 12 Required: 4
DatabaseGraph DatabaseGremlinNeptuneProperty GraphRDFSPARQL

Amazon Neptune Loader Job is a JSON Structure definition published by Amazon Neptune, describing 12 properties, of which 4 are required. It conforms to the https://json-structure.org/meta/core/v0/# meta-schema.

Properties

source format iamRoleArn region mode failOnError parallelism parserConfiguration updateSingleCardinalityProperties queueRequest dependencies userProvidedEdgeIds

Meta-schema: https://json-structure.org/meta/core/v0/#

JSON Structure

Raw ↑
{
  "$schema": "https://json-structure.org/meta/core/v0/#",
  "$id": "https://github.com/api-search/amazon-neptune/json-structure/amazon-neptune-loader-job-structure.json",
  "name": "Amazon Neptune Loader Job",
  "description": "Represents a Neptune bulk loader job configuration and its status. The loader ingests data from Amazon S3 into a Neptune DB instance, supporting CSV for property graphs and N-Triples, N-Quads, RDF/XML, and Turtle for RDF data.",
  "type": "object",
  "required": [
    "source",
    "format",
    "iamRoleArn",
    "region"
  ],
  "properties": {
    "source": {
      "type": "string",
      "description": "Amazon S3 URI identifying the data file(s), folder, or multiple folders to load. Supported URI formats: s3://bucket/key, https://s3.amazonaws.com/bucket/key.",
      "examples": [
        "s3://my-bucket/graph-data/",
        "s3://my-bucket/data/vertices.csv"
      ]
    },
    "format": {
      "type": "string",
      "description": "The data format of the source files to be loaded.",
      "enum": [
        "csv",
        "opencypher",
        "ntriples",
        "nquads",
        "rdfxml",
        "turtle"
      ]
    },
    "iamRoleArn": {
      "type": "string",
      "description": "The ARN of the IAM role that provides Neptune access to the S3 bucket. Can be a comma-separated list of role ARNs for cross-account access.",
      "pattern": "^arn:aws[a-z-]*:iam::[0-9]+:role/"
    },
    "region": {
      "type": "string",
      "description": "The AWS Region of the S3 bucket containing the data to load.",
      "examples": [
        "us-east-1",
        "eu-west-1"
      ]
    },
    "mode": {
      "type": "string",
      "description": "The load mode. NEW fails if data was previously loaded. RESUME continues a failed load from where it left off. AUTO resumes if possible, otherwise starts new.",
      "enum": [
        "NEW",
        "RESUME",
        "AUTO"
      ],
      "default": "AUTO"
    },
    "failOnError": {
      "type": "string",
      "description": "Whether to stop the entire load job when an error is encountered.",
      "enum": [
        "TRUE",
        "FALSE"
      ],
      "default": "TRUE"
    },
    "parallelism": {
      "type": "string",
      "description": "The degree of parallelism for loading. LOW uses a single thread, MEDIUM uses num_vCPU/2, HIGH uses num_vCPU, OVERSUBSCRIBE uses all available resources.",
      "enum": [
        "LOW",
        "MEDIUM",
        "HIGH",
        "OVERSUBSCRIBE"
      ],
      "default": "HIGH"
    },
    "parserConfiguration": {
      "type": "object",
      "description": "Optional parser configuration settings for RDF data.",
      "properties": {
        "baseUri": {
          "type": "string",
          "description": "The base URI for resolving relative URIs in the data."
        },
        "namedGraphUri": {
          "type": "string",
          "description": "The default named graph URI for loaded triples."
        },
        "allowEmptyStrings": {
          "type": "boolean",
          "description": "Whether to allow empty string values for properties."
        }
      }
    },
    "updateSingleCardinalityProperties": {
      "type": "string",
      "description": "Whether to update existing single-cardinality vertex properties with new values. Not supported for openCypher format.",
      "enum": [
        "TRUE",
        "FALSE"
      ],
      "default": "FALSE"
    },
    "queueRequest": {
      "type": "string",
      "description": "Whether to queue the request if a load job is already running. Neptune queues up to 64 jobs in FIFO order.",
      "enum": [
        "TRUE",
        "FALSE"
      ],
      "default": "FALSE"
    },
    "dependencies": {
      "type": "array",
      "description": "An array of load job IDs that must complete successfully before this job runs.",
      "items": {
        "type": "string"
      }
    },
    "userProvidedEdgeIds": {
      "type": "string",
      "description": "For openCypher format only. TRUE means edge files contain an :ID column. FALSE means Neptune auto-generates edge IDs.",
      "enum": [
        "TRUE",
        "FALSE"
      ]
    }
  },
  "definitions": {
    "LoaderJobStatus": {
      "type": "object",
      "name": "LoaderJobStatus",
      "description": "The status of a Neptune bulk loader job.",
      "properties": {
        "loadId": {
          "type": "string",
          "description": "The unique identifier for the load job."
        },
        "overallStatus": {
          "type": "object",
          "properties": {
            "fullUri": {
              "type": "string",
              "description": "The S3 URI of the data source."
            },
            "runNumber": {
              "type": "int32",
              "description": "The run number for this load."
            },
            "retryNumber": {
              "type": "int32",
              "description": "The number of retries."
            },
            "status": {
              "type": "string",
              "description": "The current status of the load job.",
              "enum": [
                "LOAD_NOT_STARTED",
                "LOAD_IN_PROGRESS",
                "LOAD_COMPLETED",
                "LOAD_CANCELLED_BY_USER",
                "LOAD_CANCELLED_DUE_TO_ERRORS",
                "LOAD_FAILED",
                "LOAD_UNEXPECTED_ERROR",
                "LOAD_DATA_DEADLOCK",
                "LOAD_DATA_FAILED_DUE_TO_FEED_MODIFIED_OR_DELETED",
                "LOAD_S3_READ_ERROR",
                "LOAD_S3_ACCESS_DENIED_ERROR",
                "LOAD_COMMITTED_W_WRITE_CONFLICTS"
              ]
            },
            "totalTimeSpent": {
              "type": "int32",
              "description": "Total time spent on the load in seconds."
            },
            "startTime": {
              "type": "int32",
              "description": "The start time as a Unix timestamp."
            },
            "totalRecords": {
              "type": "int32",
              "description": "Total number of records processed."
            },
            "totalDuplicates": {
              "type": "int32",
              "description": "Total number of duplicate records encountered."
            },
            "parsingErrors": {
              "type": "int32",
              "description": "Total number of parsing errors."
            },
            "datatypeMismatchErrors": {
              "type": "int32",
              "description": "Total number of datatype mismatch errors."
            },
            "insertErrors": {
              "type": "int32",
              "description": "Total number of insert errors."
            }
          }
        }
      }
    }
  }
}