> ## Documentation Index
> Fetch the complete documentation index at: https://docs.fish.audio/llms.txt
> Use this file to discover all available pages before exploring further.

# Creating Voice Models

> Learn how to create custom voice models with Fish Audio

export const AudioTranscript = ({voices, page}) => {
  const resolvedVoices = voices?.length ? voices : (() => {
    if (!page) return [];
    const baseUrl = 'https://pub-b995142090474379a930b856ab79b4d4.r2.dev/audio';
    const pageVoices = [{
      id: '8ef4a238714b45718ce04243307c57a7',
      name: 'E-girl'
    }, {
      id: '802e3bc2b27e49c2995d23ef70e6ac89',
      name: 'Energetic Male'
    }, {
      id: '933563129e564b19a115bedd57b7406a',
      name: 'Sarah'
    }, {
      id: 'bf322df2096a46f18c579d0baa36f41d',
      name: 'Adrian'
    }, {
      id: 'b347db033a6549378b48d00acb0d06cd',
      name: 'Selene'
    }, {
      id: '536d3a5e000945adb7038665781a4aca',
      name: 'Ethan'
    }];
    return pageVoices.map(voice => ({
      ...voice,
      url: `${baseUrl}/${page}/${voice.id}.mp3`
    }));
  })();
  const [selectedVoice, setSelectedVoice] = useState(0);
  const [isPlaying, setIsPlaying] = useState(false);
  const [currentTime, setCurrentTime] = useState(0);
  const [duration, setDuration] = useState(0);
  const [isDropdownOpen, setIsDropdownOpen] = useState(false);
  const audioRef = useRef(null);
  const dropdownRef = useRef(null);
  useEffect(() => {
    const audio = audioRef.current;
    if (!audio) return;
    const updateTime = () => setCurrentTime(audio.currentTime);
    const updateDuration = () => setDuration(audio.duration);
    const handleEnded = () => setIsPlaying(false);
    audio.addEventListener('timeupdate', updateTime);
    audio.addEventListener('loadedmetadata', updateDuration);
    audio.addEventListener('ended', handleEnded);
    return () => {
      audio.removeEventListener('timeupdate', updateTime);
      audio.removeEventListener('loadedmetadata', updateDuration);
      audio.removeEventListener('ended', handleEnded);
    };
  }, []);
  useEffect(() => {
    const handleClickOutside = event => {
      if (dropdownRef.current && !dropdownRef.current.contains(event.target)) {
        setIsDropdownOpen(false);
      }
    };
    if (isDropdownOpen) {
      document.addEventListener('mousedown', handleClickOutside);
    }
    return () => {
      document.removeEventListener('mousedown', handleClickOutside);
    };
  }, [isDropdownOpen]);
  useEffect(() => {
    if (audioRef.current) {
      audioRef.current.pause();
      audioRef.current.load();
      setIsPlaying(false);
      setCurrentTime(0);
    }
  }, [selectedVoice]);
  const togglePlay = () => {
    if (isPlaying) {
      audioRef.current.pause();
    } else {
      audioRef.current.play();
    }
    setIsPlaying(!isPlaying);
  };
  const handleProgressChange = e => {
    const newTime = parseFloat(e.target.value);
    audioRef.current.currentTime = newTime;
    setCurrentTime(newTime);
  };
  const formatTime = time => {
    if (isNaN(time)) return '0:00';
    const minutes = Math.floor(time / 60);
    const seconds = Math.floor(time % 60);
    return `${minutes}:${seconds.toString().padStart(2, '0')}`;
  };
  const currentVoice = resolvedVoices[selectedVoice];
  return <div className="border rounded-lg bg-card border-gray-200 dark:border-gray-800">
      {}
      <div className="grid grid-cols-3 items-center px-3 py-1.5 bg-muted border-b border-gray-200 dark:border-gray-800">
        <span className="text-xs font-medium">Listen to Page</span>

        <span className="text-xs font-semibold text-muted-foreground text-center">Powered by Fish Audio S2 Pro</span>

        {resolvedVoices.length > 1 ? <div className="relative justify-self-end" ref={dropdownRef}>
            <button onClick={() => setIsDropdownOpen(!isDropdownOpen)} className="flex items-center gap-1.5 px-3 py-1 rounded-full bg-muted hover:bg-gray-200 dark:hover:bg-gray-700 transition-all duration-200 cursor-pointer text-xs">
              <span className="text-muted-foreground">Voice:</span>
              <span className="font-medium">{resolvedVoices[selectedVoice]?.name}</span>
              <svg className={`w-3 h-3 transition-transform duration-200 ${isDropdownOpen ? 'rotate-180' : ''}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
                <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
              </svg>
            </button>

            {isDropdownOpen && <div className="absolute right-0 mt-1 w-auto bg-white dark:bg-black border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden z-50">
                {resolvedVoices.map((voice, index) => <button key={index} onClick={() => {
    setSelectedVoice(index);
    setIsDropdownOpen(false);
  }} className={`w-full px-3 py-1.5 text-left text-xs hover:bg-gray-100 dark:hover:bg-gray-800 transition-colors flex items-center gap-2 ${index === selectedVoice ? 'bg-gray-100 dark:bg-gray-800 font-medium' : ''}`}>
                    {voice.id && <img src={`https://public-platform.r2.fish.audio/coverimage/${voice.id}`} alt={voice.name} className="w-5 h-5 rounded-full m-0 flex-shrink-0 object-cover" />}
                    <span className="flex-1 whitespace-nowrap">{voice.name}</span>
                  </button>)}
              </div>}
          </div> : <div className="justify-self-end" />}
      </div>

      {}
      <div className="px-3 py-1.5 bg-card">
        <audio ref={audioRef} src={currentVoice?.url} preload="metadata" />

        <div className="flex items-center gap-2">
          {}
          <button onClick={togglePlay} className="flex-shrink-0 w-6 h-6 flex items-center justify-center bg-gray-300 dark:bg-gray-600 text-gray-800 dark:text-gray-200 rounded-full hover:opacity-80 transition-opacity relative overflow-hidden" aria-label={isPlaying ? 'Pause' : 'Play'}>
            <div className="transition-transform duration-300 ease-in-out" style={{
    transform: isPlaying ? 'rotate(180deg)' : 'rotate(0deg)'
  }}>
              {isPlaying ? <svg className="w-3 h-3" fill="currentColor" viewBox="0 0 24 24">
                  <path d="M6 4h4v16H6V4zm8 0h4v16h-4V4z" />
                </svg> : <svg className="w-3 h-3 ml-0.5" fill="currentColor" viewBox="0 0 24 24">
                  <path d="M8 5v14l11-7z" />
                </svg>}
            </div>
          </button>

          {}
          <div className="flex-1 flex items-center gap-2">
            <span className="text-xs font-mono text-gray-500 dark:text-gray-400 min-w-[35px]">
              {formatTime(currentTime)}
            </span>

            <div className="flex-1 relative h-1 bg-gray-200 dark:bg-gray-700 rounded-full overflow-hidden">
              <div className="absolute top-0 left-0 h-full bg-gray-400 dark:bg-gray-500 transition-all duration-100" style={{
    width: `${duration ? currentTime / duration * 100 : 0}%`
  }} />
              <input type="range" min="0" max={duration || 0} value={currentTime} onChange={handleProgressChange} className="absolute top-0 left-0 w-full h-full opacity-0 cursor-pointer" />
            </div>
            <span className="text-xs font-mono text-gray-500 dark:text-gray-400 min-w-[35px]">
              {formatTime(duration)}
            </span>
          </div>
        </div>
      </div>
    </div>;
};

## Overview

Create custom voice models to generate consistent, high-quality speech. You can create models through our web interface or programmatically via API.

## Web Interface

The easiest way to create a voice model:

<Steps>
  <Step title="Go to Fish Audio">
    Visit [fish.audio](https://fish.audio) and log in
  </Step>

  <Step title="Navigate to Models">
    Click on "Models" in your dashboard
  </Step>

  <Step title="Click Create Model">
    Select "Create New Model"
  </Step>

  <Step title="Upload Your Audio">
    Add 1 or more voice samples (at least 10 seconds each)
  </Step>

  <Step title="Configure Settings">
    Choose privacy settings and training options
  </Step>

  <Step title="Start Training">
    Click "Create" and wait for processing
  </Step>
</Steps>

## Using the API

### Using the SDK

Create models with the Python or JavaScript SDK:

<Tabs>
  <Tab title="Python">
    First, install the SDK:

    ```bash theme={null}
    pip install fish-audio-sdk
    ```

    Then create a model:

    ```python theme={null}
    from fish_audio_sdk import Session

    # Initialize session with your API key
    session = Session("your_api_key")

    # Create the model
    model = session.create_model(
        title="My Voice Model",
        description="Custom voice for storytelling",
        voices=[
            voice_file1.read(),
            voice_file2.read()
        ],
        cover_image=image_file.read()  # Optional
    )

    print(f"Model created: {model.id}")
    ```
  </Tab>

  <Tab title="JavaScript">
    First, install the SDK:

    ```bash theme={null}
    npm install fish-audio
    ```

    Then create a model:

    ```javascript theme={null}
    import { FishAudioClient } from "fish-audio";
    import { createReadStream } from "fs";

    const fishAudio = new FishAudioClient({ apiKey: process.env.FISH_API_KEY });

    const title = "My Voice Model";
    const audioFile1 = createReadStream("sample1.mp3");
    // Optionally add more samples:
    // const audioFile2 = createReadStream("sample2.wav");
    const coverImageFile = createReadStream("cover.png"); // optional

    try {
      const response = await fishAudio.voices.ivc.create({
        title,
        voices: [audioFile1],
        cover_image: coverImageFile,
        description: "Custom voice for storytelling",
        visibility: "private",
      });

      console.log("Voice created:", {
        id: response._id,
        title: response.title,
        state: response.state,
      });
    } catch (err) {
      console.error("Create voice request failed:", err);
    }
    ```
  </Tab>
</Tabs>

### Direct API

Create models directly using the REST API:

<Tabs>
  <Tab title="Python">
    ```python theme={null}
    import requests

    response = requests.post(
        "https://api.fish.audio/model",
        files=[
            ("voices", open("sample1.mp3", "rb")),
            ("voices", open("sample2.wav", "rb"))
        ],
        data=[
            ("title", "My Voice Model"),
            ("description", "Custom voice model"),
            ("visibility", "private"),
            ("type", "tts"),
            ("train_mode", "fast"),
            ("enhance_audio_quality", "true")
        ],
        headers={
            "Authorization": "Bearer YOUR_API_KEY"
        }
    )

    result = response.json()
    print(f"Model ID: {result['id']}")
    ```
  </Tab>

  <Tab title="JavaScript">
    ```javascript theme={null}
    import { readFile } from "fs/promises";

    const form = new FormData();
    form.append("title", "My Voice Model");
    form.append("description", "Custom voice model");
    form.append("visibility", "private");
    form.append("type", "tts");
    form.append("train_mode", "fast");
    form.append("enhance_audio_quality", "true");

    const v1 = await readFile("sample1.mp3");
    const v2 = await readFile("sample2.wav");
    form.append("voices", new File([v1], "sample1.mp3"));
    form.append("voices", new File([v2], "sample2.wav"));

    const res = await fetch("https://api.fish.audio/model", {
      method: "POST",
      headers: { Authorization: "Bearer <YOUR_API_KEY>" },
      body: form,
    });

    const result = await res.json();
    console.log("Model ID:", result.id);
    ```
  </Tab>
</Tabs>

## Model Settings

### Required Parameters

| Parameter         | Description                                                           | Type           | Options                 |
| ----------------- | --------------------------------------------------------------------- | -------------- | ----------------------- |
| **title**         | Name of your model                                                    | `string`       | Any text                |
| **voices**        | Audio samples                                                         | `Array<File>`  | .mp3, .wav, .m4a, .opus |
| **type**\*        | Model type                                                            | `enum<string>` | `tts`                   |
| **train\_mode**\* | Model train mode, fast means model instantly available after creation | `enum<string>` | `fast`                  |

\*Automatically set by Python and JavaScript SDKs

### Optional Parameters

| Parameter                   | Description                                        | Type            | Options                                              |
| --------------------------- | -------------------------------------------------- | --------------- | ---------------------------------------------------- |
| **visibility**              | Who can use your model                             | `enum<string>`  | `private`, `public`, `unlist`<br />`default: public` |
| **description**             | Model description                                  | `string`        | Any text                                             |
| **cover\_image**            | Model cover image, required if the model is public | `File`          | .jpg, .png                                           |
| **texts**                   | Transcripts of audio samples                       | `Array<string>` | Must match number of audio files                     |
| **tags**                    | Tags for your model                                | `string[]`      | Any text                                             |
| **enhance\_audio\_quality** | Remove background noise                            | `boolean`       | `true`, `false`<br />`default: false`                |

For detailed explanations view our [API reference](/api-reference/endpoint/model/create-model).

## Audio Requirements

### Quality Guidelines

**Minimum Requirements:**

* At least 1 audio sample
* 10+ seconds per sample

**Best Practices:**

* Use multiple diverse samples
* 1 consistent speaker throughout
* Include different emotions and tones
* Record in a quiet environment
* Maintain steady volume

## Adding Transcripts

Including text transcripts improves model quality:

<Tabs>
  <Tab title="Python">
    ```python theme={null}
    response = requests.post(
        "https://api.fish.audio/model",
        files=[
            ("voices", open("hello.mp3", "rb")),
            ("voices", open("world.wav", "rb"))
        ],
        data=[
            ("title", "Enhanced Model"),
            ("texts", "Hello, this is my first recording."),
            ("texts", "Welcome to the world of AI voices."),
            # ... other parameters
        ],
        headers={"Authorization": "Bearer YOUR_API_KEY"}
    )
    ```
  </Tab>

  <Tab title="JavaScript">
    ```javascript theme={null}
    import { FishAudioClient } from "fish-audio";
    import { createReadStream } from "fs";

    const fishAudio = new FishAudioClient({ apiKey: process.env.FISH_API_KEY });

    const response = await fishAudio.voices.ivc.create({
      title: "Enhanced Model",
      voices: [
        createReadStream("hello.mp3"),
        createReadStream("world.wav"),
      ],
      texts: [
        "Hello, this is my first recording.",
        "Welcome to the world of AI voices.",
      ],
      // other optional fields:
      // visibility: "private",
      // enhance_audio_quality: true,
    });

    console.log("Model ID:", response._id);
    ```
  </Tab>
</Tabs>

<Note>
  Text transcripts must match the exact number of audio files. If you provide 3 audio files, you must provide exactly 3 text transcripts.
</Note>

## Using Your Model

Once training is complete:

<Tabs>
  <Tab title="Python">
    ```python theme={null}
    # Generate speech with your model
    response = requests.post(
        "https://api.fish.audio/v1/tts",
        json={
            "text": "Hello from my custom voice!",
            "model_id": model_id,
            "format": "mp3"
        },
        headers={"Authorization": "Bearer YOUR_API_KEY"}
    )

    # Save the audio
    with open("output.mp3", "wb") as f:
        f.write(response.content)
    ```
  </Tab>

  <Tab title="JavaScript">
    ```javascript theme={null}
    import { FishAudioClient } from "fish-audio";
    import { writeFile } from "fs/promises";

    const fishAudio = new FishAudioClient({ apiKey: process.env.FISH_API_KEY });

    const audio = await fishAudio.textToSpeech.convert({
      text: "Hello from my custom voice!",
      model_id: "your_model_id_here",
      format: "mp3",
    });

    const buffer = Buffer.from(await new Response(audio).arrayBuffer());
    await writeFile("output.mp3", buffer);
    console.log("✓ Audio saved to output.mp3");
    ```
  </Tab>
</Tabs>

## Troubleshooting

### Common Issues

**Model training fails:**

* Check audio quality and format
* Ensure single speaker in all samples
* Verify files are not corrupted

**Poor voice quality:**

* Add more diverse audio samples
* Enable audio enhancement
* Use higher quality recording

## Best Practices

1. **Start Simple:** Begin with 2-3 samples in fast mode to test
2. **Iterate:** Refine with more samples and quality mode
3. **Document:** Keep track of which samples work best
4. **Test Thoroughly:** Try different texts and emotions
5. **Privacy First:** Keep personal models private

## Support

Need help creating models?

* **API Documentation:** [Full API Reference](/api-reference/introduction)
* **Discord Community:** [Join our Discord](https://discord.gg/fish-audio)
* **Email Support:** [support@fish.audio](mailto:support@fish.audio)
