Dependencies
Add these packages to yourpubspec.yaml:
Copy
Ask AI
dependencies:
web_socket_channel: ^2.4.0 # WebSocket client
record: ^5.0.0 # Microphone capture (PCM)
flutter_sound: ^9.0.0 # Audio playback (PCM stream)
http: ^1.0.0 # HTTP client for token endpoint
1. Fetch Session Token
Call the token endpoint with your API key.Copy
Ask AI
import 'dart:convert';
import 'package:http/http.dart' as http;
class VoiceSessionApi {
final String apiUrl;
final String apiKey;
VoiceSessionApi({required this.apiUrl, required this.apiKey});
Future<({String token, String wsUrl})> getSessionToken() async {
final response = await http.post(
Uri.parse('$apiUrl/api/v1/sdk/token'),
headers: {
'Authorization': 'Bearer $apiKey',
'Content-Type': 'application/json',
},
);
if (response.statusCode != 200) {
throw Exception('Token request failed: ${response.statusCode}');
}
final data = jsonDecode(response.body);
return (
token: data['token'] as String,
wsUrl: data['ws_url'] as String,
);
}
}
2. Connect WebSocket
Open the WebSocket and send the token as the first message. Handle text (JSON control) and binary (PCM audio) messages separately.Copy
Ask AI
import 'dart:convert';
import 'dart:typed_data';
import 'package:web_socket_channel/web_socket_channel.dart';
class VoiceSession {
WebSocketChannel? _channel;
bool _isConnected = false;
Future<void> connect(String wsUrl, String token) async {
_channel = WebSocketChannel.connect(Uri.parse(wsUrl));
// First message must be the auth token
_channel!.sink.add(jsonEncode({'token': token}));
_channel!.stream.listen(
(message) {
if (message is String) {
_handleControlMessage(jsonDecode(message));
} else if (message is List<int>) {
_handleAudioFrame(Uint8List.fromList(message));
}
},
onDone: () {
_isConnected = false;
_onSessionEnded();
},
onError: (error) {
_isConnected = false;
_onError(error.toString());
},
);
}
void _handleControlMessage(Map<String, dynamic> msg) {
switch (msg['type']) {
case 'connected':
_isConnected = true;
_startMicCapture();
break;
case 'agent_ready':
_onAgentReady();
break;
case 'session_ended':
_cleanup();
break;
case 'error':
_onError('${msg['code']}: ${msg['message']}');
break;
}
}
void sendAudio(Uint8List pcmFrame) {
if (_isConnected) {
_channel?.sink.add(pcmFrame);
}
}
void disconnect() {
_channel?.sink.close();
_cleanup();
}
}
3. Microphone Capture
Use therecord package to capture PCM audio and buffer it into exact 640-byte frames.
Copy
Ask AI
import 'dart:async';
import 'dart:typed_data';
import 'package:record/record.dart';
class MicCapture {
final AudioRecorder _recorder = AudioRecorder();
StreamSubscription? _subscription;
Future<void> start({required void Function(Uint8List) onFrame}) async {
if (!await _recorder.hasPermission()) {
throw Exception('Microphone permission denied');
}
final stream = await _recorder.startStream(
const RecordConfig(
encoder: AudioEncoder.pcm16bits,
sampleRate: 16000,
numChannels: 1,
autoGain: true,
echoCancel: true,
noiseSuppress: true,
),
);
final buffer = BytesBuilder(copy: false);
_subscription = stream.listen((data) {
buffer.add(data);
while (buffer.length >= 640) {
final bytes = buffer.takeBytes();
onFrame(Uint8List.fromList(bytes.sublist(0, 640)));
if (bytes.length > 640) {
buffer.add(bytes.sublist(640));
}
}
});
}
Future<void> stop() async {
await _subscription?.cancel();
await _recorder.stop();
}
}
4. Audio Playback
Useflutter_sound to play raw PCM audio received from the WebSocket.
Copy
Ask AI
import 'dart:typed_data';
import 'package:flutter_sound/flutter_sound.dart';
class AudioPlayer {
final FlutterSoundPlayer _player = FlutterSoundPlayer();
Future<void> start() async {
await _player.openPlayer();
await _player.startPlayerFromStream(
codec: Codec.pcm16,
sampleRate: 16000,
numChannels: 1,
);
}
void feedFrame(Uint8List pcmData) {
_player.foodSink?.add(FoodData(pcmData));
}
Future<void> stop() async {
_player.foodSink?.add(FoodEvent(() async {
await _player.stopPlayer();
}));
await _player.closePlayer();
}
}
5. Putting It All Together
Copy
Ask AI
class KioskVoiceAgent {
final VoiceSessionApi _api;
VoiceSession? _session;
MicCapture? _mic;
AudioPlayer? _player;
KioskVoiceAgent({required String apiUrl, required String apiKey})
: _api = VoiceSessionApi(apiUrl: apiUrl, apiKey: apiKey);
Future<void> startSession() async {
final (:token, :wsUrl) = await _api.getSessionToken();
_player = AudioPlayer();
await _player!.start();
_session = VoiceSession(
onAgentReady: () => print('Agent is ready'),
onSessionEnded: () => stopSession(),
onError: (e) => print('Error: $e'),
onAudioFrame: (data) => _player!.feedFrame(data),
);
await _session!.connect(wsUrl, token);
_mic = MicCapture();
await _mic!.start(onFrame: (frame) {
_session!.sendAudio(frame);
});
}
Future<void> stopSession() async {
await _mic?.stop();
await _player?.stop();
_session?.disconnect();
}
}
6. Platform Setup
Android
Copy
Ask AI
<!-- android/app/src/main/AndroidManifest.xml -->
<uses-permission android:name="android.permission.RECORD_AUDIO" />
<uses-permission android:name="android.permission.INTERNET" />
Copy
Ask AI
import 'package:permission_handler/permission_handler.dart';
final status = await Permission.microphone.request();
if (!status.isGranted) {
throw Exception('Microphone permission required');
}
iOS
Copy
Ask AI
<!-- ios/Runner/Info.plist -->
<key>NSMicrophoneUsageDescription</key>
<string>Required for voice conversation with the AI assistant</string>
flutter_sound package handles AVAudioSession configuration automatically.