LlamaCloud Pipelines and Indexes API

Managed ingestion and indexing pipelines that chunk, embed, and store documents into a scalable vector index with no infrastructure to manage. Create and configure pipelines, attach files and data sources, and monitor ingestion status.

OpenAPI Specification

llamacloud-openapi.yml Raw ↑
openapi: 3.0.1
info:
  title: LlamaCloud API
  description: >-
    REST API for the LlamaCloud managed document platform from LlamaIndex,
    covering LlamaParse document parsing, managed ingestion pipelines and
    indexes, document and file management, retrieval, and LlamaExtract
    structured extraction. All endpoints are authenticated with a Bearer
    API key.
  termsOfService: https://www.llamaindex.ai/terms-of-service
  contact:
    name: LlamaIndex Support
    email: support@llamaindex.ai
  version: '1.0'
servers:
  - url: https://api.cloud.llamaindex.ai/api/v1
    description: LlamaCloud v1 API
security:
  - bearerAuth: []
tags:
  - name: Parsing
    description: LlamaParse document parsing jobs and results.
  - name: Pipelines
    description: Managed ingestion and indexing pipelines.
  - name: Documents
    description: Files and pipeline documents.
  - name: Retrieval
    description: Query a managed index for relevant chunks.
  - name: Extraction
    description: LlamaExtract schema-driven structured extraction.
paths:
  /parsing/upload:
    post:
      operationId: uploadParsingJob
      tags:
        - Parsing
      summary: Upload a file to start a parse job
      description: >-
        Uploads a document and creates an asynchronous LlamaParse job. Returns
        a job id used to poll status and retrieve results in Markdown or JSON.
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              properties:
                file:
                  type: string
                  format: binary
                  description: The document to parse (PDF, DOCX, PPTX, image, etc.).
                parsing_instruction:
                  type: string
                  description: Natural-language instructions to guide parsing.
                result_type:
                  type: string
                  enum: [markdown, text, json]
                  default: markdown
                language:
                  type: string
                  description: Primary document language hint (e.g. en).
      responses:
        '200':
          description: Parse job created.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ParsingJob'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '422':
          $ref: '#/components/responses/ValidationError'
  /parsing/job/{job_id}:
    get:
      operationId: getParsingJob
      tags:
        - Parsing
      summary: Get parse job status
      description: Returns the status of a parse job. Poll until status is SUCCESS.
      parameters:
        - $ref: '#/components/parameters/JobId'
      responses:
        '200':
          description: Parse job status.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ParsingJob'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
  /parsing/job/{job_id}/result/markdown:
    get:
      operationId: getParsingResultMarkdown
      tags:
        - Parsing
      summary: Get parse result as Markdown
      parameters:
        - $ref: '#/components/parameters/JobId'
      responses:
        '200':
          description: Markdown parse result.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/MarkdownResult'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
  /parsing/job/{job_id}/result/json:
    get:
      operationId: getParsingResultJson
      tags:
        - Parsing
      summary: Get parse result as structured JSON
      parameters:
        - $ref: '#/components/parameters/JobId'
      responses:
        '200':
          description: JSON parse result with pages, items, and metadata.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/JsonResult'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
  /files:
    post:
      operationId: uploadFile
      tags:
        - Documents
      summary: Upload a file
      description: Uploads a file to LlamaCloud for use in pipelines or extraction.
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              properties:
                upload_file:
                  type: string
                  format: binary
      responses:
        '200':
          description: File uploaded.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/File'
        '401':
          $ref: '#/components/responses/Unauthorized'
    get:
      operationId: listFiles
      tags:
        - Documents
      summary: List files
      parameters:
        - name: project_id
          in: query
          schema:
            type: string
      responses:
        '200':
          description: A list of files.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/File'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /pipelines:
    post:
      operationId: createPipeline
      tags:
        - Pipelines
      summary: Create or upsert a pipeline
      description: Creates a managed ingestion and indexing pipeline.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/PipelineCreate'
      responses:
        '200':
          description: Pipeline created.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Pipeline'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '422':
          $ref: '#/components/responses/ValidationError'
    get:
      operationId: listPipelines
      tags:
        - Pipelines
      summary: List pipelines
      parameters:
        - name: project_id
          in: query
          schema:
            type: string
      responses:
        '200':
          description: A list of pipelines.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/Pipeline'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /pipelines/{pipeline_id}:
    get:
      operationId: getPipeline
      tags:
        - Pipelines
      summary: Get a pipeline
      parameters:
        - $ref: '#/components/parameters/PipelineId'
      responses:
        '200':
          description: Pipeline detail.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Pipeline'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
  /pipelines/{pipeline_id}/status:
    get:
      operationId: getPipelineStatus
      tags:
        - Pipelines
      summary: Get pipeline ingestion status
      parameters:
        - $ref: '#/components/parameters/PipelineId'
      responses:
        '200':
          description: Ingestion status.
          content:
            application/json:
              schema:
                type: object
                properties:
                  status:
                    type: string
                    enum: [NOT_STARTED, IN_PROGRESS, SUCCESS, ERROR, PARTIAL_SUCCESS]
        '401':
          $ref: '#/components/responses/Unauthorized'
  /pipelines/{pipeline_id}/files:
    post:
      operationId: addFilesToPipeline
      tags:
        - Pipelines
      summary: Add files to a pipeline
      description: Attaches previously uploaded files to a pipeline for ingestion.
      parameters:
        - $ref: '#/components/parameters/PipelineId'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: array
              items:
                type: object
                properties:
                  file_id:
                    type: string
      responses:
        '200':
          description: Files added to the pipeline.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/PipelineFile'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /pipelines/{pipeline_id}/documents:
    post:
      operationId: createPipelineDocuments
      tags:
        - Documents
      summary: Add documents to a pipeline
      description: Adds raw documents directly to a pipeline's managed index.
      parameters:
        - $ref: '#/components/parameters/PipelineId'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: array
              items:
                $ref: '#/components/schemas/Document'
      responses:
        '200':
          description: Documents added.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/Document'
        '401':
          $ref: '#/components/responses/Unauthorized'
    get:
      operationId: listPipelineDocuments
      tags:
        - Documents
      summary: List pipeline documents
      parameters:
        - $ref: '#/components/parameters/PipelineId'
      responses:
        '200':
          description: A list of documents in the pipeline.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/Document'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /pipelines/{pipeline_id}/retrieve:
    post:
      operationId: retrieveFromPipeline
      tags:
        - Retrieval
      summary: Retrieve relevant chunks from a pipeline index
      description: >-
        Runs a retrieval query against a pipeline's managed index and returns
        the most relevant nodes, with optional reranking and metadata filters.
      parameters:
        - $ref: '#/components/parameters/PipelineId'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RetrieveRequest'
      responses:
        '200':
          description: Retrieval results.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RetrieveResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '422':
          $ref: '#/components/responses/ValidationError'
  /extraction/extraction-agents:
    post:
      operationId: createExtractionAgent
      tags:
        - Extraction
      summary: Create an extraction agent
      description: >-
        Defines a LlamaExtract agent bound to a JSON data schema describing the
        fields to extract from documents.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ExtractionAgentCreate'
      responses:
        '200':
          description: Extraction agent created.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExtractionAgent'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '422':
          $ref: '#/components/responses/ValidationError'
    get:
      operationId: listExtractionAgents
      tags:
        - Extraction
      summary: List extraction agents
      responses:
        '200':
          description: A list of extraction agents.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/ExtractionAgent'
        '401':
          $ref: '#/components/responses/Unauthorized'
  /extraction/jobs:
    post:
      operationId: runExtractionJob
      tags:
        - Extraction
      summary: Run an extraction job
      description: >-
        Runs an extraction agent against an uploaded file and returns an async
        job id. Poll the job to retrieve the structured result.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ExtractionJobCreate'
      responses:
        '200':
          description: Extraction job created.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExtractionJob'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '422':
          $ref: '#/components/responses/ValidationError'
  /extraction/jobs/{job_id}:
    get:
      operationId: getExtractionJob
      tags:
        - Extraction
      summary: Get extraction job status
      parameters:
        - $ref: '#/components/parameters/JobId'
      responses:
        '200':
          description: Extraction job status.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExtractionJob'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
  /extraction/jobs/{job_id}/result:
    get:
      operationId: getExtractionJobResult
      tags:
        - Extraction
      summary: Get extraction job result
      parameters:
        - $ref: '#/components/parameters/JobId'
      responses:
        '200':
          description: Structured extraction result.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExtractionResult'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      description: LlamaCloud API key sent as a Bearer token.
  parameters:
    JobId:
      name: job_id
      in: path
      required: true
      schema:
        type: string
    PipelineId:
      name: pipeline_id
      in: path
      required: true
      schema:
        type: string
  responses:
    Unauthorized:
      description: Missing or invalid API key.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
    NotFound:
      description: Resource not found.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
    ValidationError:
      description: Request validation failed.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
  schemas:
    ParsingJob:
      type: object
      properties:
        id:
          type: string
        status:
          type: string
          enum: [PENDING, RUNNING, SUCCESS, ERROR, CANCELLED]
        error_message:
          type: string
          nullable: true
    MarkdownResult:
      type: object
      properties:
        markdown:
          type: string
        job_metadata:
          type: object
          additionalProperties: true
    JsonResult:
      type: object
      properties:
        pages:
          type: array
          items:
            type: object
            properties:
              page:
                type: integer
              text:
                type: string
              md:
                type: string
              items:
                type: array
                items:
                  type: object
                  additionalProperties: true
        job_metadata:
          type: object
          additionalProperties: true
    File:
      type: object
      properties:
        id:
          type: string
        name:
          type: string
        file_size:
          type: integer
        file_type:
          type: string
        project_id:
          type: string
        created_at:
          type: string
          format: date-time
    PipelineCreate:
      type: object
      required:
        - name
      properties:
        name:
          type: string
        embedding_config:
          type: object
          additionalProperties: true
        transform_config:
          type: object
          additionalProperties: true
    Pipeline:
      type: object
      properties:
        id:
          type: string
        name:
          type: string
        project_id:
          type: string
        pipeline_type:
          type: string
          enum: [MANAGED, PLAYGROUND]
        created_at:
          type: string
          format: date-time
    PipelineFile:
      type: object
      properties:
        id:
          type: string
        file_id:
          type: string
        pipeline_id:
          type: string
    Document:
      type: object
      properties:
        id:
          type: string
        text:
          type: string
        metadata:
          type: object
          additionalProperties: true
    RetrieveRequest:
      type: object
      required:
        - query
      properties:
        query:
          type: string
        dense_similarity_top_k:
          type: integer
          default: 6
        sparse_similarity_top_k:
          type: integer
        enable_reranking:
          type: boolean
          default: true
        rerank_top_n:
          type: integer
        retrieval_mode:
          type: string
          enum: [chunks, files_via_metadata, files_via_content, auto_routed]
        filters:
          type: object
          additionalProperties: true
    RetrieveResponse:
      type: object
      properties:
        retrieval_nodes:
          type: array
          items:
            type: object
            properties:
              node:
                $ref: '#/components/schemas/Document'
              score:
                type: number
                format: float
    ExtractionAgentCreate:
      type: object
      required:
        - name
        - data_schema
      properties:
        name:
          type: string
        data_schema:
          type: object
          additionalProperties: true
        config:
          type: object
          additionalProperties: true
    ExtractionAgent:
      type: object
      properties:
        id:
          type: string
        name:
          type: string
        data_schema:
          type: object
          additionalProperties: true
        project_id:
          type: string
        created_at:
          type: string
          format: date-time
    ExtractionJobCreate:
      type: object
      required:
        - extraction_agent_id
        - file_id
      properties:
        extraction_agent_id:
          type: string
        file_id:
          type: string
    ExtractionJob:
      type: object
      properties:
        id:
          type: string
        extraction_agent_id:
          type: string
        status:
          type: string
          enum: [PENDING, RUNNING, SUCCESS, ERROR, CANCELLED]
        error_message:
          type: string
          nullable: true
    ExtractionResult:
      type: object
      properties:
        data:
          type: object
          additionalProperties: true
        extraction_metadata:
          type: object
          additionalProperties: true
    Error:
      type: object
      properties:
        detail:
          type: string