Apache Nutch · JSON Structure

Apache Nutch Job Info Structure

Information about a crawl job.

Type: object Properties: 8 Required: 2
Web CrawlerIndexingSearchApacheJavaHadoopOpen Source

JobInfo is a JSON Structure definition published by Apache Nutch, describing 8 properties, of which 2 are required. It conforms to the https://json-structure.org/meta/core/v0/# meta-schema.

Properties

id type confId args result state msg crawlId

Meta-schema: https://json-structure.org/meta/core/v0/#

JSON Structure

Raw ↑
{
  "$schema": "https://json-structure.org/meta/core/v0/#",
  "$id": "https://raw.githubusercontent.com/api-evangelist/apache-nutch/refs/heads/main/json-structure/apache-nutch-job-info-structure.json",
  "name": "JobInfo",
  "description": "Information about a crawl job.",
  "type": "object",
  "properties": {
    "id": {
      "type": "string",
      "description": "The unique job identifier."
    },
    "type": {
      "type": "string",
      "description": "The type of Nutch crawl job.",
      "enum": [
        "INJECT",
        "GENERATE",
        "FETCH",
        "PARSE",
        "UPDATEDB",
        "INDEX",
        "READDB",
        "CLASS",
        "INVERTLINKS",
        "DEDUP"
      ]
    },
    "confId": {
      "type": "string",
      "description": "The configuration ID used for this job."
    },
    "args": {
      "type": "object",
      "additionalProperties": true,
      "description": "Arguments passed to the job."
    },
    "result": {
      "type": "object",
      "additionalProperties": true,
      "description": "Result data returned after job completion."
    },
    "state": {
      "type": "string",
      "description": "The current state of a job.",
      "enum": [
        "IDLE",
        "RUNNING",
        "FINISHED",
        "FAILED",
        "KILLED",
        "STOPPING",
        "KILLING",
        "ANY"
      ]
    },
    "msg": {
      "type": "string",
      "description": "A human-readable status or error message."
    },
    "crawlId": {
      "type": "string",
      "description": "The crawl identifier associated with this job."
    }
  },
  "required": [
    "type",
    "state"
  ]
}