fal

fal Model APIs

Unified queue-based REST API for invoking 1,000+ generative image, video, audio, and multimodal models hosted on fal's inference infrastructure. Submit a request to `https://queue.fal.run/{model-id}`, poll `/requests/{request_id}/status` or `/requests/{request_id}` for progress and results, or subscribe to webhook callbacks. Supports synchronous responses, asynchronous queueing, server-sent streaming progress, and request cancellation. Powers flagship models including FLUX, Veo 3, Kling 2.5, Wan 2.5, Seedream, Nano Banana, Qwen, SDXL, and Stable Diffusion variants.

OpenAPI Specification

fal-model-apis-openapi.yml Raw ↑
openapi: 3.1.0
info:
  title: fal Model APIs
  description: >
    The fal Model APIs are a unified queue-based REST surface for invoking
    1,000+ production generative image, video, audio, and multimodal models
    hosted on fal's GPU inference infrastructure. Clients submit a job to
    `https://queue.fal.run/{model-owner}/{model-name}`, then either poll for
    status and result, subscribe via webhook, or stream incremental progress.
  version: 'v1'
  contact:
    name: fal Support
    url: https://fal.ai/support
  license:
    name: fal Terms of Service
    url: https://fal.ai/legal/terms-of-service

servers:
  - url: https://queue.fal.run
    description: Production queue endpoint

security:
  - FalKeyAuth: []

tags:
  - name: Queue
    description: Submit, inspect, and cancel model inference jobs.
  - name: Streaming
    description: Server-sent streaming of incremental model output.

paths:
  /{model_owner}/{model_name}:
    post:
      summary: Submit Model Inference Request
      description: >
        Submit a new inference request to the named model. The response includes
        a `request_id` that can be used to poll status, fetch the final result,
        or cancel the job. Provide `fal_webhook` as a query parameter to receive
        a POST callback when the job completes.
      operationId: submitRequest
      tags:
        - Queue
      parameters:
        - $ref: '#/components/parameters/ModelOwner'
        - $ref: '#/components/parameters/ModelName'
        - name: fal_webhook
          in: query
          description: Optional webhook URL that fal will POST to when the job finishes.
          required: false
          schema:
            type: string
            format: uri
      requestBody:
        required: true
        description: Model-specific JSON input. See the per-model schema in the fal model gallery.
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/InferenceRequest'
            examples:
              FluxSchnell:
                $ref: '#/components/examples/FluxSchnellRequest'
      responses:
        '200':
          description: Job accepted and queued.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueAcceptedResponse'
              examples:
                Queued:
                  $ref: '#/components/examples/QueueAcceptedExample'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '402':
          $ref: '#/components/responses/PaymentRequired'
        '422':
          $ref: '#/components/responses/ValidationError'
        '429':
          $ref: '#/components/responses/RateLimited'
        '500':
          $ref: '#/components/responses/ServerError'

  /{model_owner}/{model_name}/requests/{request_id}/status:
    get:
      summary: Get Inference Request Status
      description: Retrieve current status (IN_QUEUE, IN_PROGRESS, COMPLETED, FAILED, CANCELED) for a queued request.
      operationId: getRequestStatus
      tags:
        - Queue
      parameters:
        - $ref: '#/components/parameters/ModelOwner'
        - $ref: '#/components/parameters/ModelName'
        - $ref: '#/components/parameters/RequestId'
        - name: logs
          in: query
          description: When set to `1`, include log lines emitted by the running model.
          required: false
          schema:
            type: integer
            enum: [0, 1]
            default: 0
      responses:
        '200':
          description: Current status payload.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueStatusResponse'
              examples:
                InProgress:
                  $ref: '#/components/examples/QueueInProgressExample'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'

  /{model_owner}/{model_name}/requests/{request_id}:
    get:
      summary: Get Inference Request Result
      description: Retrieve the final result for a completed request. Returns 202 while the job is still running.
      operationId: getRequestResult
      tags:
        - Queue
      parameters:
        - $ref: '#/components/parameters/ModelOwner'
        - $ref: '#/components/parameters/ModelName'
        - $ref: '#/components/parameters/RequestId'
      responses:
        '200':
          description: Final inference output.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/InferenceResult'
              examples:
                FluxResult:
                  $ref: '#/components/examples/FluxResultExample'
        '202':
          description: Still running. Poll again shortly.
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'

  /{model_owner}/{model_name}/requests/{request_id}/cancel:
    put:
      summary: Cancel Inference Request
      description: Cancel an in-queue or in-progress request. Released GPU capacity is returned to the pool.
      operationId: cancelRequest
      tags:
        - Queue
      parameters:
        - $ref: '#/components/parameters/ModelOwner'
        - $ref: '#/components/parameters/ModelName'
        - $ref: '#/components/parameters/RequestId'
      responses:
        '200':
          description: Cancellation accepted.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CancelResponse'
        '404':
          $ref: '#/components/responses/NotFound'
        '409':
          description: Request already finished and cannot be canceled.

  /{model_owner}/{model_name}/stream:
    post:
      summary: Stream Model Inference Output
      description: >
        Synchronous streaming endpoint that emits Server-Sent Events as the
        model produces progressive output (tokens, intermediate frames, diffusion
        steps). Useful for chatty multimodal models and live previews.
      operationId: streamRequest
      tags:
        - Streaming
      parameters:
        - $ref: '#/components/parameters/ModelOwner'
        - $ref: '#/components/parameters/ModelName'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/InferenceRequest'
      responses:
        '200':
          description: 'SSE stream of `event: progress` and `event: output` messages.'
          content:
            text/event-stream:
              schema:
                type: string
                description: Server-Sent Events stream.
        '401':
          $ref: '#/components/responses/Unauthorized'
        '422':
          $ref: '#/components/responses/ValidationError'

components:
  securitySchemes:
    FalKeyAuth:
      type: apiKey
      in: header
      name: Authorization
      description: >
        Pass the fal API key as `Authorization: Key $FAL_KEY`. Keys are issued
        from the fal dashboard at https://fal.ai/dashboard/keys.

  parameters:
    ModelOwner:
      name: model_owner
      in: path
      required: true
      description: Owning organization of the model (e.g. `fal-ai`, `black-forest-labs`).
      schema:
        type: string
    ModelName:
      name: model_name
      in: path
      required: true
      description: Model identifier (e.g. `flux/schnell`, `kling-2.5/text-to-video`, `veo-3`).
      schema:
        type: string
    RequestId:
      name: request_id
      in: path
      required: true
      description: Unique identifier for a queued inference job, returned by `submitRequest`.
      schema:
        type: string
        format: uuid

  schemas:
    InferenceRequest:
      type: object
      description: >
        Model-specific JSON payload. Each model defines its own schema in the
        model gallery (https://fal.ai/models). Common fields include `prompt`,
        `image_url`, `seed`, `num_inference_steps`, and `guidance_scale`.
      additionalProperties: true
      properties:
        prompt:
          type: string
          description: Natural-language prompt describing the desired output.
        image_url:
          type: string
          format: uri
          description: Optional reference image URL (commonly an `https://v3.fal.media/...` asset).
        seed:
          type: integer
          description: Optional integer seed for deterministic outputs.

    QueueAcceptedResponse:
      type: object
      required: [request_id, status]
      properties:
        request_id:
          type: string
          format: uuid
        status:
          type: string
          enum: [IN_QUEUE]
        response_url:
          type: string
          format: uri
          description: Convenience URL that resolves to the final result once ready.
        status_url:
          type: string
          format: uri
        cancel_url:
          type: string
          format: uri
        gateway_request_id:
          type: string

    QueueStatusResponse:
      type: object
      required: [status]
      properties:
        status:
          type: string
          enum: [IN_QUEUE, IN_PROGRESS, COMPLETED, FAILED, CANCELED]
        queue_position:
          type: integer
          description: Position in the queue when `status` is `IN_QUEUE`.
        logs:
          type: array
          description: Log lines emitted by the model. Present when `?logs=1` is supplied.
          items:
            type: object
            properties:
              timestamp:
                type: string
                format: date-time
              message:
                type: string
              level:
                type: string
                enum: [DEBUG, INFO, WARN, ERROR]

    InferenceResult:
      type: object
      description: >
        Model-specific output. Image models typically return an `images[]`
        array with signed CDN URLs; video models return `video.url`; audio
        models return `audio.url`. Always includes a `seed` field for
        reproducibility.
      additionalProperties: true

    CancelResponse:
      type: object
      properties:
        status:
          type: string
          enum: [CANCELED]
        request_id:
          type: string
          format: uuid

    ErrorResponse:
      type: object
      properties:
        detail:
          oneOf:
            - type: string
            - type: array
              items:
                type: object

  responses:
    Unauthorized:
      description: 'Missing or invalid `Authorization: Key …` header.'
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
    PaymentRequired:
      description: Insufficient credits / unpaid account.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
    ValidationError:
      description: The model input failed validation against the per-model schema.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
    RateLimited:
      description: Concurrency or per-key rate limit exceeded.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
    NotFound:
      description: Request ID or model not found.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
    ServerError:
      description: Internal server error.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'

  examples:
    FluxSchnellRequest:
      summary: FLUX Schnell text-to-image input
      value:
        prompt: a futuristic cityscape at sunset, cinematic lighting, ultra-detailed
        image_size: landscape_16_9
        num_inference_steps: 4
        enable_safety_checker: true
    QueueAcceptedExample:
      summary: Queue acceptance
      value:
        request_id: 9a2f1c8e-9b1c-4d6f-9e21-7f3d4e5b1234
        status: IN_QUEUE
        status_url: https://queue.fal.run/fal-ai/flux/schnell/requests/9a2f1c8e-9b1c-4d6f-9e21-7f3d4e5b1234/status
        response_url: https://queue.fal.run/fal-ai/flux/schnell/requests/9a2f1c8e-9b1c-4d6f-9e21-7f3d4e5b1234
        cancel_url: https://queue.fal.run/fal-ai/flux/schnell/requests/9a2f1c8e-9b1c-4d6f-9e21-7f3d4e5b1234/cancel
    QueueInProgressExample:
      summary: In-progress status
      value:
        status: IN_PROGRESS
        logs:
          - timestamp: '2026-05-25T16:01:02Z'
            level: INFO
            message: Loading checkpoint flux-schnell.safetensors
          - timestamp: '2026-05-25T16:01:04Z'
            level: INFO
            message: Step 2/4
    FluxResultExample:
      summary: FLUX Schnell output
      value:
        images:
          - url: https://v3.fal.media/files/lion/abc123.png
            width: 1344
            height: 768
            content_type: image/png
        seed: 271828
        timings:
          inference: 0.83
        has_nsfw_concepts: [false]
        prompt: a futuristic cityscape at sunset, cinematic lighting, ultra-detailed