Home
Apache Nutch
Apache Nutch Job Info Structure
Apache Nutch Job Info Structure
Information about a crawl job.
Type: object
Properties: 8
Required: 2
Web Crawler Indexing Search Apache Java Hadoop Open Source
JobInfo is a JSON Structure definition published by Apache Nutch, describing 8 properties, of which 2 are required. It conforms to the https://json-structure.org/meta/core/v0/# meta-schema.
Properties
id
type
confId
args
result
state
msg
crawlId
Meta-schema: https://json-structure.org/meta/core/v0/#
JSON Structure
{
"$schema": "https://json-structure.org/meta/core/v0/#",
"$id": "https://raw.githubusercontent.com/api-evangelist/apache-nutch/refs/heads/main/json-structure/apache-nutch-job-info-structure.json",
"name": "JobInfo",
"description": "Information about a crawl job.",
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "The unique job identifier."
},
"type": {
"type": "string",
"description": "The type of Nutch crawl job.",
"enum": [
"INJECT",
"GENERATE",
"FETCH",
"PARSE",
"UPDATEDB",
"INDEX",
"READDB",
"CLASS",
"INVERTLINKS",
"DEDUP"
]
},
"confId": {
"type": "string",
"description": "The configuration ID used for this job."
},
"args": {
"type": "object",
"additionalProperties": true,
"description": "Arguments passed to the job."
},
"result": {
"type": "object",
"additionalProperties": true,
"description": "Result data returned after job completion."
},
"state": {
"type": "string",
"description": "The current state of a job.",
"enum": [
"IDLE",
"RUNNING",
"FINISHED",
"FAILED",
"KILLED",
"STOPPING",
"KILLING",
"ANY"
]
},
"msg": {
"type": "string",
"description": "A human-readable status or error message."
},
"crawlId": {
"type": "string",
"description": "The crawl identifier associated with this job."
}
},
"required": [
"type",
"state"
]
}