Skip to main content

Dependencies

Add these packages to your pubspec.yaml:
dependencies:
  web_socket_channel: ^2.4.0   # WebSocket client
  record: ^5.0.0               # Microphone capture (PCM)
  flutter_sound: ^9.0.0        # Audio playback (PCM stream)
  http: ^1.0.0                 # HTTP client for token endpoint

1. Fetch Session Token

Call the token endpoint with your API key.
import 'dart:convert';
import 'package:http/http.dart' as http;

class VoiceSessionApi {
  final String apiUrl;
  final String apiKey;

  VoiceSessionApi({required this.apiUrl, required this.apiKey});

  Future<({String token, String wsUrl})> getSessionToken() async {
    final response = await http.post(
      Uri.parse('$apiUrl/api/v1/sdk/token'),
      headers: {
        'Authorization': 'Bearer $apiKey',
        'Content-Type': 'application/json',
      },
    );

    if (response.statusCode != 200) {
      throw Exception('Token request failed: ${response.statusCode}');
    }

    final data = jsonDecode(response.body);
    return (
      token: data['token'] as String,
      wsUrl: data['ws_url'] as String,
    );
  }
}

2. Connect WebSocket

Open the WebSocket and send the token as the first message. Handle text (JSON control) and binary (PCM audio) messages separately.
import 'dart:convert';
import 'dart:typed_data';
import 'package:web_socket_channel/web_socket_channel.dart';

class VoiceSession {
  WebSocketChannel? _channel;
  bool _isConnected = false;

  Future<void> connect(String wsUrl, String token) async {
    _channel = WebSocketChannel.connect(Uri.parse(wsUrl));

    // First message must be the auth token
    _channel!.sink.add(jsonEncode({'token': token}));

    _channel!.stream.listen(
      (message) {
        if (message is String) {
          _handleControlMessage(jsonDecode(message));
        } else if (message is List<int>) {
          _handleAudioFrame(Uint8List.fromList(message));
        }
      },
      onDone: () {
        _isConnected = false;
        _onSessionEnded();
      },
      onError: (error) {
        _isConnected = false;
        _onError(error.toString());
      },
    );
  }

  void _handleControlMessage(Map<String, dynamic> msg) {
    switch (msg['type']) {
      case 'connected':
        _isConnected = true;
        _startMicCapture();
        break;
      case 'agent_ready':
        _onAgentReady();
        break;
      case 'session_ended':
        _cleanup();
        break;
      case 'error':
        _onError('${msg['code']}: ${msg['message']}');
        break;
    }
  }

  void sendAudio(Uint8List pcmFrame) {
    if (_isConnected) {
      _channel?.sink.add(pcmFrame);
    }
  }

  void disconnect() {
    _channel?.sink.close();
    _cleanup();
  }
}

3. Microphone Capture

Use the record package to capture PCM audio and buffer it into exact 640-byte frames.
import 'dart:async';
import 'dart:typed_data';
import 'package:record/record.dart';

class MicCapture {
  final AudioRecorder _recorder = AudioRecorder();
  StreamSubscription? _subscription;

  Future<void> start({required void Function(Uint8List) onFrame}) async {
    if (!await _recorder.hasPermission()) {
      throw Exception('Microphone permission denied');
    }

    final stream = await _recorder.startStream(
      const RecordConfig(
        encoder: AudioEncoder.pcm16bits,
        sampleRate: 16000,
        numChannels: 1,
        autoGain: true,
        echoCancel: true,
        noiseSuppress: true,
      ),
    );

    final buffer = BytesBuilder(copy: false);

    _subscription = stream.listen((data) {
      buffer.add(data);

      while (buffer.length >= 640) {
        final bytes = buffer.takeBytes();
        onFrame(Uint8List.fromList(bytes.sublist(0, 640)));
        if (bytes.length > 640) {
          buffer.add(bytes.sublist(640));
        }
      }
    });
  }

  Future<void> stop() async {
    await _subscription?.cancel();
    await _recorder.stop();
  }
}

4. Audio Playback

Use flutter_sound to play raw PCM audio received from the WebSocket.
import 'dart:typed_data';
import 'package:flutter_sound/flutter_sound.dart';

class AudioPlayer {
  final FlutterSoundPlayer _player = FlutterSoundPlayer();

  Future<void> start() async {
    await _player.openPlayer();
    await _player.startPlayerFromStream(
      codec: Codec.pcm16,
      sampleRate: 16000,
      numChannels: 1,
    );
  }

  void feedFrame(Uint8List pcmData) {
    _player.foodSink?.add(FoodData(pcmData));
  }

  Future<void> stop() async {
    _player.foodSink?.add(FoodEvent(() async {
      await _player.stopPlayer();
    }));
    await _player.closePlayer();
  }
}

5. Putting It All Together

class KioskVoiceAgent {
  final VoiceSessionApi _api;
  VoiceSession? _session;
  MicCapture? _mic;
  AudioPlayer? _player;

  KioskVoiceAgent({required String apiUrl, required String apiKey})
      : _api = VoiceSessionApi(apiUrl: apiUrl, apiKey: apiKey);

  Future<void> startSession() async {
    final (:token, :wsUrl) = await _api.getSessionToken();

    _player = AudioPlayer();
    await _player!.start();

    _session = VoiceSession(
      onAgentReady: () => print('Agent is ready'),
      onSessionEnded: () => stopSession(),
      onError: (e) => print('Error: $e'),
      onAudioFrame: (data) => _player!.feedFrame(data),
    );
    await _session!.connect(wsUrl, token);

    _mic = MicCapture();
    await _mic!.start(onFrame: (frame) {
      _session!.sendAudio(frame);
    });
  }

  Future<void> stopSession() async {
    await _mic?.stop();
    await _player?.stop();
    _session?.disconnect();
  }
}

6. Platform Setup

Android

<!-- android/app/src/main/AndroidManifest.xml -->
<uses-permission android:name="android.permission.RECORD_AUDIO" />
<uses-permission android:name="android.permission.INTERNET" />
Request runtime permission before starting mic capture:
import 'package:permission_handler/permission_handler.dart';

final status = await Permission.microphone.request();
if (!status.isGranted) {
  throw Exception('Microphone permission required');
}

iOS

<!-- ios/Runner/Info.plist -->
<key>NSMicrophoneUsageDescription</key>
<string>Required for voice conversation with the AI assistant</string>
The flutter_sound package handles AVAudioSession configuration automatically.