Skip to main content
The video generation service provides several capabilities:
  1. Text-to-Video: Generate a video directly from a text description.
  2. Image-to-Video: Generate a video based on an initial image combined with a text description.
  3. First-and-Last-Frame Video: Generate a video by providing both the starting and ending frames.
  4. Subject-Reference Video: Generate a video using a subject’s face photo and a text description, ensuring consistency of facial features throughout the video.

Workflow

Video generation is an asynchronous process consisting of three steps:
  1. Create a generation task: Submit a video generation request and receive a task ID (task_id).
  2. Check task status: Poll the task status using the task_id. Once successful, you will receive a file ID (file_id).
  3. Retrieve video file: Use the file_id to obtain a download link and save the video file.

Features and Code Examples

For simplicity, we encapsulate polling and downloading logic into reusable functions. The following examples demonstrate how to create tasks in two different modes.
import os
import time
import requests

api_key = os.environ["MINIMAX_API_KEY"]
headers = {"Authorization": f"Bearer {api_key}"}


# --- Step 1: Create a video generation task ---
# The API supports two modes: text-to-video and image-to-video.
# Each function below starts an asynchronous task and returns a unique task_id.

def invoke_text_to_video() -> str:
    """(Mode 1) Create a video generation task from a text description."""
    url = "https://api.minimax.io/v1/video_generation"
    payload = {
        # 'prompt' is the key parameter that defines the video’s content and motion.
        "prompt": "A man picks up a book [pan up], then starts reading [static].",
        "model": "MiniMax-Hailuo-02",
        "duration": 6,
        "resolution": "1080P",
    }
    response = requests.post(url, headers=headers, json=payload)
    response.raise_for_status()
    task_id = response.json()["task_id"]
    return task_id


def invoke_image_to_video() -> str:
    """(Mode 2) Create a video generation task using a first-frame image and text description."""
    url = "https://api.minimax.io/v1/video_generation"
    payload = {
        # In image-to-video mode, 'prompt' describes the dynamic scene evolution from the first image.
        "prompt": "A mouse runs toward the camera, smiling and blinking.",
        # 'first_frame_image' specifies the starting frame of the video.
        "first_frame_image": "https://cdn.hailuoai.com/prod/2024-09-18-16/user/multi_chat_file/9c0b5c14-ee88-4a5b-b503-4f626f018639.jpeg",
        "model": "MiniMax-Hailuo-02",
        "duration": 6,
        "resolution": "1080P",
    }
    response = requests.post(url, headers=headers, json=payload)
    response.raise_for_status()
    task_id = response.json()["task_id"]
    return task_id

def invoke_subject_reference() -> str:
    """(Mode 3) Create a video generation task using a subject's face photo and a text description."""
    url = "https://api.minimax.io/v1/video_generation"
    payload = {
    "prompt": "A girl runs toward the camera and winks with a smile.",
    "subject_reference": [
        {
            "type": "character",
            "image": [
                "https://cdn.hailuoai.com/prod/2025-08-12-17/video_cover/1754990600020238321-411603868533342214-cover.jpg"
            ],
        }
    ],
    "model": "S2V-01",
    "duration": 6,
    "resolution": "1080P",
}
    response = requests.post(url, headers=headers, json=payload)
    response.raise_for_status()
    task_id = response.json()["task_id"]
    return task_id

# --- Step 2: Poll task status ---
# Since video generation is time-consuming, the API works asynchronously.
# After submitting a task, poll its status using the task_id until it succeeds or fails.
def query_task_status(task_id: str):
    """Poll task status by task_id until it succeeds or fails."""
    url = "https://api.minimax.io/v1/query/video_generation"
    params = {"task_id": task_id}
    while True:
        # A recommended polling interval is 10 seconds to avoid unnecessary server load.
        time.sleep(10)
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()
        response_json = response.json()
        status = response_json["status"]
        print(f"Current task status: {status}")
        # On success, the API returns a 'file_id' to fetch the video file.
        if status == "Success":
            return response_json["file_id"]
        elif status == "Fail":
            raise Exception(f"Video generation failed: {response_json.get('error_message', 'Unknown error')}")


# --- Step 3: Retrieve and save the video file ---
# When the task succeeds, the response includes a file_id instead of a direct download link.
# This function fetches the download URL and saves the video locally.
def fetch_video(file_id: str):
    """Retrieve the download URL from file_id and save the video locally."""
    url = "https://api.minimax.io/v1/files/retrieve"
    params = {"file_id": file_id}
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
    download_url = response.json()["file"]["download_url"]

    with open("output.mp4", "wb") as f:
        video_response = requests.get(download_url)
        video_response.raise_for_status()
        f.write(video_response.content)
    print("Video successfully saved as output.mp4")


# --- Main process: end-to-end example ---
# Demonstrates the full workflow from task creation to video retrieval.
if __name__ == "__main__":
    # Choose a task creation mode
    task_id = invoke_text_to_video()  # Mode 1: Text-to-Video
    # task_id = invoke_image_to_video() # Mode 2: Image-to-Video
    # task_id = invoke_subject_reference() # Mode 3: Subject Reference
    print(f"Video generation task submitted, Task ID: {task_id}")
    file_id = query_task_status(task_id)
    print(f"Task succeeded, File ID: {file_id}")
    fetch_video(file_id)

Video Generation Results

Text-to-Video

Provide a text description through the prompt parameter to generate a video. For finer control, some models support adding camera motion instructions (e.g., [pan], [zoom], [static]) directly after key descriptions in the prompt. Example output:

Image-to-Video

This mode uses the image specified in the first_frame_image parameter as the video’s opening frame. The prompt then describes how the scene evolves from this static image into motion. This feature is ideal for animating static images. Example output:

Subject Reference

This mode uses the subject_reference parameter, taking the provided face photo as input and combining it with the prompt description to generate a video, while ensuring consistency of the subject’s facial features throughout. Example output: