> ## Documentation Index
> Fetch the complete documentation index at: https://docs.fish.audio/llms.txt
> Use this file to discover all available pages before exploring further.

# WebSocket Streaming

> Stream text-to-speech in real-time with WebSocket connections

export const AudioTranscript = ({voices, page}) => {
  const resolvedVoices = voices?.length ? voices : (() => {
    if (!page) return [];
    const baseUrl = 'https://pub-b995142090474379a930b856ab79b4d4.r2.dev/audio';
    const pageVoices = [{
      id: '8ef4a238714b45718ce04243307c57a7',
      name: 'E-girl'
    }, {
      id: '802e3bc2b27e49c2995d23ef70e6ac89',
      name: 'Energetic Male'
    }, {
      id: '933563129e564b19a115bedd57b7406a',
      name: 'Sarah'
    }, {
      id: 'bf322df2096a46f18c579d0baa36f41d',
      name: 'Adrian'
    }, {
      id: 'b347db033a6549378b48d00acb0d06cd',
      name: 'Selene'
    }, {
      id: '536d3a5e000945adb7038665781a4aca',
      name: 'Ethan'
    }];
    return pageVoices.map(voice => ({
      ...voice,
      url: `${baseUrl}/${page}/${voice.id}.mp3`
    }));
  })();
  const [selectedVoice, setSelectedVoice] = useState(0);
  const [isPlaying, setIsPlaying] = useState(false);
  const [currentTime, setCurrentTime] = useState(0);
  const [duration, setDuration] = useState(0);
  const [isDropdownOpen, setIsDropdownOpen] = useState(false);
  const audioRef = useRef(null);
  const dropdownRef = useRef(null);
  useEffect(() => {
    const audio = audioRef.current;
    if (!audio) return;
    const updateTime = () => setCurrentTime(audio.currentTime);
    const updateDuration = () => setDuration(audio.duration);
    const handleEnded = () => setIsPlaying(false);
    audio.addEventListener('timeupdate', updateTime);
    audio.addEventListener('loadedmetadata', updateDuration);
    audio.addEventListener('ended', handleEnded);
    return () => {
      audio.removeEventListener('timeupdate', updateTime);
      audio.removeEventListener('loadedmetadata', updateDuration);
      audio.removeEventListener('ended', handleEnded);
    };
  }, []);
  useEffect(() => {
    const handleClickOutside = event => {
      if (dropdownRef.current && !dropdownRef.current.contains(event.target)) {
        setIsDropdownOpen(false);
      }
    };
    if (isDropdownOpen) {
      document.addEventListener('mousedown', handleClickOutside);
    }
    return () => {
      document.removeEventListener('mousedown', handleClickOutside);
    };
  }, [isDropdownOpen]);
  useEffect(() => {
    if (audioRef.current) {
      audioRef.current.pause();
      audioRef.current.load();
      setIsPlaying(false);
      setCurrentTime(0);
    }
  }, [selectedVoice]);
  const togglePlay = () => {
    if (isPlaying) {
      audioRef.current.pause();
    } else {
      audioRef.current.play();
    }
    setIsPlaying(!isPlaying);
  };
  const handleProgressChange = e => {
    const newTime = parseFloat(e.target.value);
    audioRef.current.currentTime = newTime;
    setCurrentTime(newTime);
  };
  const formatTime = time => {
    if (isNaN(time)) return '0:00';
    const minutes = Math.floor(time / 60);
    const seconds = Math.floor(time % 60);
    return `${minutes}:${seconds.toString().padStart(2, '0')}`;
  };
  const currentVoice = resolvedVoices[selectedVoice];
  return <div className="border rounded-lg bg-card border-gray-200 dark:border-gray-800">
      {}
      <div className="grid grid-cols-3 items-center px-3 py-1.5 bg-muted border-b border-gray-200 dark:border-gray-800">
        <span className="text-xs font-medium">Listen to Page</span>

        <span className="text-xs font-semibold text-muted-foreground text-center">Powered by Fish Audio S2 Pro</span>

        {resolvedVoices.length > 1 ? <div className="relative justify-self-end" ref={dropdownRef}>
            <button onClick={() => setIsDropdownOpen(!isDropdownOpen)} className="flex items-center gap-1.5 px-3 py-1 rounded-full bg-muted hover:bg-gray-200 dark:hover:bg-gray-700 transition-all duration-200 cursor-pointer text-xs">
              <span className="text-muted-foreground">Voice:</span>
              <span className="font-medium">{resolvedVoices[selectedVoice]?.name}</span>
              <svg className={`w-3 h-3 transition-transform duration-200 ${isDropdownOpen ? 'rotate-180' : ''}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
                <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
              </svg>
            </button>

            {isDropdownOpen && <div className="absolute right-0 mt-1 w-auto bg-white dark:bg-black border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden z-50">
                {resolvedVoices.map((voice, index) => <button key={index} onClick={() => {
    setSelectedVoice(index);
    setIsDropdownOpen(false);
  }} className={`w-full px-3 py-1.5 text-left text-xs hover:bg-gray-100 dark:hover:bg-gray-800 transition-colors flex items-center gap-2 ${index === selectedVoice ? 'bg-gray-100 dark:bg-gray-800 font-medium' : ''}`}>
                    {voice.id && <img src={`https://public-platform.r2.fish.audio/coverimage/${voice.id}`} alt={voice.name} className="w-5 h-5 rounded-full m-0 flex-shrink-0 object-cover" />}
                    <span className="flex-1 whitespace-nowrap">{voice.name}</span>
                  </button>)}
              </div>}
          </div> : <div className="justify-self-end" />}
      </div>

      {}
      <div className="px-3 py-1.5 bg-card">
        <audio ref={audioRef} src={currentVoice?.url} preload="metadata" />

        <div className="flex items-center gap-2">
          {}
          <button onClick={togglePlay} className="flex-shrink-0 w-6 h-6 flex items-center justify-center bg-gray-300 dark:bg-gray-600 text-gray-800 dark:text-gray-200 rounded-full hover:opacity-80 transition-opacity relative overflow-hidden" aria-label={isPlaying ? 'Pause' : 'Play'}>
            <div className="transition-transform duration-300 ease-in-out" style={{
    transform: isPlaying ? 'rotate(180deg)' : 'rotate(0deg)'
  }}>
              {isPlaying ? <svg className="w-3 h-3" fill="currentColor" viewBox="0 0 24 24">
                  <path d="M6 4h4v16H6V4zm8 0h4v16h-4V4z" />
                </svg> : <svg className="w-3 h-3 ml-0.5" fill="currentColor" viewBox="0 0 24 24">
                  <path d="M8 5v14l11-7z" />
                </svg>}
            </div>
          </button>

          {}
          <div className="flex-1 flex items-center gap-2">
            <span className="text-xs font-mono text-gray-500 dark:text-gray-400 min-w-[35px]">
              {formatTime(currentTime)}
            </span>

            <div className="flex-1 relative h-1 bg-gray-200 dark:bg-gray-700 rounded-full overflow-hidden">
              <div className="absolute top-0 left-0 h-full bg-gray-400 dark:bg-gray-500 transition-all duration-100" style={{
    width: `${duration ? currentTime / duration * 100 : 0}%`
  }} />
              <input type="range" min="0" max={duration || 0} value={currentTime} onChange={handleProgressChange} className="absolute top-0 left-0 w-full h-full opacity-0 cursor-pointer" />
            </div>
            <span className="text-xs font-mono text-gray-500 dark:text-gray-400 min-w-[35px]">
              {formatTime(duration)}
            </span>
          </div>
        </div>
      </div>
    </div>;
};

## Prerequisites

<AccordionGroup>
  <Accordion icon="user-plus" title="Create a Fish Audio account">
    Sign up for a free Fish Audio account to get started with our API.

    1. Go to [fish.audio/auth/signup](https://fish.audio/auth/signup)
    2. Fill in your details to create an account, complete steps to verify your account.
    3. Log in to your account and navigate to the [API section](https://fish.audio/app/api-keys)
  </Accordion>

  <Accordion icon="key" title="Get your API key">
    Once you have an account, you'll need an API key to authenticate your requests.

    1. Log in to your [Fish Audio Dashboard](https://fish.audio/app/api-keys/)
    2. Navigate to the API Keys section
    3. Click "Create New Key" and give it a descriptive name, set a expiration if desired
    4. Copy your key and store it securely

    <Warning>Keep your API key secret! Never commit it to version control or share it publicly.</Warning>
  </Accordion>
</AccordionGroup>

## Overview

Use [`stream_websocket()`](/api-reference/sdk/python/resources#stream_websocket) for real-time text streaming with LLMs and live captions. The connection automatically buffers incoming text and generates audio as it becomes available.

## Basic Usage

Stream text chunks and receive audio in real-time:

<CodeGroup>
  ```python Synchronous focus={5-17} theme={null}
  from fishaudio import FishAudio
  from fishaudio.utils import play

  client = FishAudio()

  # Define text generator
  def text_chunks():
      yield "Hello, "
      yield "this is "
      yield "real-time "
      yield "streaming!"

  # Stream audio via WebSocket
  audio_stream = client.tts.stream_websocket(
      text_chunks(),
      latency="balanced"  # Use "balanced" for real-time, "normal" for quality
  )

  # Play streamed audio
  play(audio_stream)
  ```

  ```python Asynchronous focus={8-20} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio
  from fishaudio.utils import play

  async def main():
      client = AsyncFishAudio()

      # Define async text generator
      async def text_chunks():
          yield "Hello, "
          yield "this is "
          yield "real-time "
          yield "streaming!"

      # Stream audio via WebSocket
      audio_stream = await client.tts.stream_websocket(
          text_chunks(),
          latency="balanced"  # Use "balanced" for real-time, "normal" for quality
      )

      # Play streamed audio
      play(audio_stream)

  asyncio.run(main())
  ```
</CodeGroup>

<Note>
  For details on audio formats, voice selection, and advanced configuration options like `TTSConfig`, see the [Text-to-Speech guide](/developer-guide/sdk-guide/python/text-to-speech).
</Note>

## Using FlushEvent

Force immediate audio generation to create pauses using [`FlushEvent`](/api-reference/sdk/python/types#flushevent-objects):

<CodeGroup>
  ```python Synchronous focus={6-12} theme={null}
  from fishaudio import FishAudio
  from fishaudio.types import FlushEvent

  client = FishAudio()

  def text_with_flush():
      yield "First sentence. "
      yield "Second sentence. "
      yield FlushEvent()  # Forces generation NOW
      yield "Third sentence."

  audio_stream = client.tts.stream_websocket(text_with_flush())
  ```

  ```python Asynchronous focus={8-14} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio
  from fishaudio.types import FlushEvent

  async def main():
      client = AsyncFishAudio()

      async def text_with_flush():
          yield "First sentence. "
          yield "Second sentence. "
          yield FlushEvent()  # Forces generation NOW
          yield "Third sentence."

      audio_stream = await client.tts.stream_websocket(text_with_flush())

  asyncio.run(main())
  ```
</CodeGroup>

<Note>
  See [Text-to-Speech guide](/developer-guide/sdk-guide/python/text-to-speech#understanding-flushevent) for detailed FlushEvent usage and advanced examples.
</Note>

## LLM Integration

WebSocket streaming is designed for integrating with LLM streaming responses. The TTS engine automatically buffers incoming text chunks and generates audio when it has enough context for natural speech:

<CodeGroup>
  ```python Synchronous focus={5-21} theme={null}
  from fishaudio import FishAudio
  from fishaudio.utils import play

  client = FishAudio()

  # Simulate streaming LLM response
  def llm_stream():
      """Simulates text chunks from an LLM."""
      tokens = [
          "The ", "weather ", "today ", "is ", "sunny ",
          "with ", "clear ", "skies. ", "Perfect ",
          "for ", "outdoor ", "activities!"
      ]
      for token in tokens:
          yield token

  # Stream to speech in real-time
  audio_stream = client.tts.stream_websocket(
      llm_stream(),
      latency="balanced"
  )
  play(audio_stream)
  ```

  ```python Asynchronous focus={7-23} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio
  from fishaudio.utils import play

  async def main():
      client = AsyncFishAudio()

      # Simulate streaming LLM response
      async def llm_stream():
          """Simulates text chunks from an LLM."""
          tokens = [
              "The ", "weather ", "today ", "is ", "sunny ",
              "with ", "clear ", "skies. ", "Perfect ",
              "for ", "outdoor ", "activities!"
          ]
          for token in tokens:
              yield token

      # Stream to speech in real-time
      audio_stream = await client.tts.stream_websocket(
          llm_stream(),
          latency="balanced"
      )
      play(audio_stream)

  asyncio.run(main())
  ```
</CodeGroup>

<Note>
  The WebSocket connection automatically buffers incoming text and generates audio when it has accumulated enough context for natural-sounding speech. You don't need to manually batch tokens unless you want to force generation at specific points using `FlushEvent`.
</Note>

## Next Steps

<CardGroup cols={2}>
  <Card title="Text-to-Speech" icon="microphone" href="/developer-guide/sdk-guide/python/text-to-speech">
    Learn about non-streaming TTS options, audio formats, TextEvent vs plain strings, and advanced configuration
  </Card>

  <Card title="Voice Cloning" icon="clone" href="/developer-guide/sdk-guide/python/voice-cloning">
    Use custom voices in streams and learn about voice selection
  </Card>

  <Card title="TTS API Reference" icon="book" href="/api-reference/sdk/python/resources#tts">
    Complete streaming API documentation
  </Card>

  <Card title="Best Practices" icon="lightbulb" href="/developer-guide/best-practices/">
    Production streaming optimization
  </Card>
</CardGroup>

## Related Resources

* [WebSocket Types](/api-reference/sdk/python/types#tts) - TextEvent, FlushEvent, and more
* [Utils Reference](/api-reference/sdk/python/utils) - Audio playback utilities
* [Error Handling](/api-reference/sdk/python/exceptions) - WebSocket exception handling
* [Fine-grained Control](/developer-guide/core-features/fine-grained-control) - Advanced speech control
