Flutter Integration Guide

Dependencies

Add these packages to your pubspec.yaml:

dependencies:
  web_socket_channel: ^2.4.0   # WebSocket client
  record: ^5.0.0               # Microphone capture (PCM)
  flutter_sound: ^9.0.0        # Audio playback (PCM stream)
  http: ^1.0.0                 # HTTP client for token endpoint

1. Fetch Session Token

Call the token endpoint with your API key.

import 'dart:convert';
import 'package:http/http.dart' as http;

class VoiceSessionApi {
  final String apiUrl;
  final String apiKey;

  VoiceSessionApi({required this.apiUrl, required this.apiKey});

  Future<({String token, String wsUrl})> getSessionToken() async {
    final response = await http.post(
      Uri.parse('$apiUrl/api/v1/sdk/token'),
      headers: {
        'Authorization': 'Bearer $apiKey',
        'Content-Type': 'application/json',
      },
    );

    if (response.statusCode != 200) {
      throw Exception('Token request failed: ${response.statusCode}');
    }

    final data = jsonDecode(response.body);
    return (
      token: data['token'] as String,
      wsUrl: data['ws_url'] as String,
    );
  }
}

2. Connect WebSocket

Open the WebSocket and send the token as the first message. Handle text (JSON control) and binary (PCM audio) messages separately.

import 'dart:convert';
import 'dart:typed_data';
import 'dart:ui' show VoidCallback;
import 'package:web_socket_channel/web_socket_channel.dart';

class VoiceSession {
  WebSocketChannel? _channel;
  bool _isConnected = false;

  final VoidCallback? onAgentReady;
  final VoidCallback? onSessionEnded;
  final void Function(String)? onError;
  final void Function(Uint8List)? onAudioFrame;

  VoiceSession({this.onAgentReady, this.onSessionEnded, this.onError, this.onAudioFrame});

  Future<void> connect(String wsUrl, String token) async {
    _channel = WebSocketChannel.connect(Uri.parse(wsUrl));

    // First message must be the auth token
    _channel!.sink.add(jsonEncode({'token': token}));

    _channel!.stream.listen(
      (message) {
        if (message is String) {
          _handleControlMessage(jsonDecode(message));
        } else if (message is List<int>) {
          onAudioFrame?.call(Uint8List.fromList(message));
        }
      },
      onDone: () {
        _isConnected = false;
        onSessionEnded?.call();
      },
      onError: (error) {
        _isConnected = false;
        onError?.call(error.toString());
      },
    );
  }

  void _handleControlMessage(Map<String, dynamic> msg) {
    switch (msg['type']) {
      case 'connected':
        _isConnected = true;
        break;
      case 'agent_ready':
        onAgentReady?.call();
        break;
      case 'session_ended':
        disconnect();
        break;
      case 'error':
        onError?.call('${msg['code']}: ${msg['message']}');
        break;
    }
  }

  void sendAudio(Uint8List pcmFrame) {
    if (_isConnected) {
      _channel?.sink.add(pcmFrame);
    }
  }

  void disconnect() {
    _channel?.sink.close();
    _cleanup();
  }
}

3. Microphone Capture

Use the record package to capture PCM audio and buffer it into exact 640-byte frames.

import 'dart:async';
import 'dart:typed_data';
import 'package:record/record.dart';

class MicCapture {
  final AudioRecorder _recorder = AudioRecorder();
  StreamSubscription? _subscription;

  Future<void> start({required void Function(Uint8List) onFrame}) async {
    if (!await _recorder.hasPermission()) {
      throw Exception('Microphone permission denied');
    }

    final stream = await _recorder.startStream(
      const RecordConfig(
        encoder: AudioEncoder.pcm16bits,
        sampleRate: 16000,
        numChannels: 1,
        autoGain: true,
        echoCancel: true,
        noiseSuppress: true,
      ),
    );

    final buffer = BytesBuilder(copy: false);

    _subscription = stream.listen((data) {
      buffer.add(data);

      while (buffer.length >= 640) {
        final bytes = buffer.takeBytes();
        onFrame(Uint8List.fromList(bytes.sublist(0, 640)));
        if (bytes.length > 640) {
          buffer.add(bytes.sublist(640));
        }
      }
    });
  }

  Future<void> stop() async {
    await _subscription?.cancel();
    await _recorder.stop();
  }
}

4. Audio Playback

Use flutter_sound to play raw PCM audio received from the WebSocket.

import 'dart:typed_data';
import 'package:flutter_sound/flutter_sound.dart';

class AudioPlayer {
  final FlutterSoundPlayer _player = FlutterSoundPlayer();

  Future<void> start() async {
    await _player.openPlayer();
    await _player.startPlayerFromStream(
      codec: Codec.pcm16,
      sampleRate: 16000,
      numChannels: 1,
    );
  }

  void feedFrame(Uint8List pcmData) {
    _player.foodSink?.add(FoodData(pcmData));
  }

  Future<void> stop() async {
    _player.foodSink?.add(FoodEvent(() async {
      await _player.stopPlayer();
    }));
    await _player.closePlayer();
  }
}

5. Putting It All Together

class KioskVoiceAgent {
  final VoiceSessionApi _api;
  VoiceSession? _session;
  MicCapture? _mic;
  AudioPlayer? _player;

  KioskVoiceAgent({required String apiUrl, required String apiKey})
      : _api = VoiceSessionApi(apiUrl: apiUrl, apiKey: apiKey);

  Future<void> startSession() async {
    final (:token, :wsUrl) = await _api.getSessionToken();

    _player = AudioPlayer();
    await _player!.start();

    _session = VoiceSession(
      onAgentReady: () => print('Agent is ready'),
      onSessionEnded: () => stopSession(),
      onError: (e) => print('Error: $e'),
      onAudioFrame: (data) => _player!.feedFrame(data),
    );
    await _session!.connect(wsUrl, token);

    _mic = MicCapture();
    await _mic!.start(onFrame: (frame) {
      _session!.sendAudio(frame);
    });
  }

  Future<void> stopSession() async {
    await _mic?.stop();
    await _player?.stop();
    _session?.disconnect();
  }
}

6. Platform Setup

Android

<!-- android/app/src/main/AndroidManifest.xml -->
<uses-permission android:name="android.permission.RECORD_AUDIO" />
<uses-permission android:name="android.permission.INTERNET" />

Request runtime permission before starting mic capture:

import 'package:permission_handler/permission_handler.dart';

final status = await Permission.microphone.request();
if (!status.isGranted) {
  throw Exception('Microphone permission required');
}

iOS

<!-- ios/Runner/Info.plist -->
<key>NSMicrophoneUsageDescription</key>
<string>Required for voice conversation with the AI assistant</string>

The flutter_sound package handles AVAudioSession configuration automatically.

Getting Started

Integration

Implementation Guides

Reference

Dependencies

1. Fetch Session Token

2. Connect WebSocket

3. Microphone Capture

4. Audio Playback

5. Putting It All Together

6. Platform Setup

Android

iOS

Getting Started

Integration

Implementation Guides

Reference

Documentation Index

​Dependencies

​1. Fetch Session Token

​2. Connect WebSocket

​3. Microphone Capture

​4. Audio Playback

​5. Putting It All Together

​6. Platform Setup

​Android

​iOS

Dependencies

1. Fetch Session Token

2. Connect WebSocket

3. Microphone Capture

4. Audio Playback

5. Putting It All Together

6. Platform Setup

Android

iOS