Skip to main content

Workflow

The quick cloning feature follows these steps:
  1. Upload the source audio Use the File Upload API to upload the audio you want to clone and obtain a file_id.
  • Requirements for uploaded files:
    • Supported formats: mp3, m4a, wav
    • Duration: minimum 10 seconds, maximum 5 minutes
    • File size: up to 20 MB
  1. Upload example audio (optional) To enhance cloning quality, you can upload an example audio file via the File Upload API and obtain a file_id. Include this in clone_prompt under prompt_audio.
  • Requirements for example files:
    • Supported formats: mp3, m4a, wav
    • Duration: less than 8 seconds
    • File size: up to 20 MB
  1. Call the cloning API Use the obtained file_id and a custom voice_id as input parameters to call the Voice Clone API to clone the voice.
  2. Use the cloned voice With the generated voice_id, you can call the speech synthesis API as needed, for example:

Process Examples

1. Upload Source Audio

"""
Example for obtaining the file_id of the source audio.
Note: Make sure your API key is set in the environment variable `MINIMAX_API_KEY`.
"""
import requests
import os

api_key = os.getenv("MINIMAX_API_KEY")
url = "https://api.minimax.io/v1/files/upload"

payload = {"purpose": "voice_clone"}
files = [
  ("file", ("clone_input.mp3", open("/path/to/clone_input.mp3", "rb")))
]
headers = {"Authorization": f"Bearer {api_key}"}

response = requests.post(url, headers=headers, data=payload, files=files)
response.raise_for_status()
file_id = response.json().get("file", {}).get("file_id")
print(file_id)

2. Upload Example Audio

"""
Example for obtaining the file_id of the example audio.
Note: Ensure `MINIMAX_API_KEY` is set.
"""
import requests
import os

api_key = os.getenv("MINIMAX_API_KEY")
url = "https://api.minimax.io/v1/files/upload"

payload = {"purpose": "prompt_audio"}
files = [
  ("file", ("clone_prompt.mp3", open("/path/to/clone_prompt.mp3", "rb")))
]
headers = {"Authorization": f"Bearer {api_key}"}

response = requests.post(url, headers=headers, data=payload, files=files)
response.raise_for_status()
prompt_file_id = response.json().get("file", {}).get("file_id")
print(prompt_file_id)

3. Clone the Voice

"""
Example for voice cloning.
Note: Set `MINIMAX_API_KEY` in environment variables,
and replace "<voice_id>", <file_id_of_cloned_voice>, <file_id_of_prompt_audio> with actual values.
"""
import requests
import os

api_key = os.getenv("MINIMAX_API_KEY")
url = "https://api.minimax.io/v1/voice_clone"

payload = {
    "file_id": <file_id_of_cloned_voice>,
    "voice_id": "<your_custom_voice_id>",
    "clone_prompt": {
        "prompt_audio": <file_id_of_prompt_audio>,
        "prompt_text": "This voice sounds natural and pleasant."
    },
    "text": "A gentle breeze passes over the soft grass, accompanied by the fresh scent and birdsong.",
    "model": "speech-2.6-hd"
}

headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
}

response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
print(response.text)

Full Example

"""
Example demonstrating full voice cloning workflow and obtaining preview audio.
Note: Set `MINIMAX_API_KEY` in environment variables,
and replace "<your_custom_voice_id>" with your defined voice ID.
"""
import requests
import os

api_key = os.getenv("MINIMAX_API_KEY")
upload_url = "https://api.minimax.io/v1/files/upload"
clone_url = "https://api.minimax.io/v1/voice_clone"
headers = {"Authorization": f"Bearer {api_key}"}

# 1. Upload source audio
with open("/path/to/clone_input.mp3", "rb") as f:
    files = {"file": ("clone_input.mp3", f)}
    data = {"purpose": "voice_clone"}
    response = requests.post(upload_url, headers=headers, data=data, files=files)
    file_id = response.json()["file"]["file_id"]
    print(f"File ID of the cloned audio: {file_id}")

# 2. Upload example audio
with open("/path/to/clone_prompt.mp3", "rb") as f:
    files = {"file": ("clone_prompt.mp3", f)}
    data = {"purpose": "prompt_audio"}
    response = requests.post(upload_url, headers=headers, data=data, files=files)
    prompt_file_id = response.json()["file"]["file_id"]
    print(f"File ID of the prompt audio: {prompt_file_id}")

# 3. Clone the voice
clone_payload = {
    "file_id": file_id,
    "voice_id": "<your_custom_voice_id>",
    "clone_prompt": {
        "prompt_audio": prompt_file_id,
        "prompt_text": "Humans are creatures of habits, it can be tough"
    },
    "text": "These three nighttime habits will transform your life. First, 15 minutes of prep to set up your first morning tasks. Write them down, pull them up on your screen, whatever it takes to make it easy to hit the ground running the next morning. Second, turn your phone on grayscale mode for the entire evening. It makes your phone 90% less addicting. You will not be inclined to look at it. You won't look at the notifications, and it'll pull you away from that addictive piece of technology sitting in your pocket. And No.3, use my 1-1-1 journaling method.",

    "model": "speech-2.6-hd"
}
clone_headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
}
clone_headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
}
response = requests.post(clone_url, headers=clone_headers, json=clone_payload)
print(response.text)

Example Results

  • Cloned Audio
  • Example Audio
  • Resulting Audio