Files
app/wei_ai_app/lib/screens/interaction/voice_session_controller.dart
2026-02-09 21:54:32 +08:00

260 lines
6.9 KiB
Dart
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import 'dart:async';
import 'package:flutter/foundation.dart';
import '../../core/core.dart';
import '../../core/services/vad_stt_service.dart';
import 'package:permission_handler/permission_handler.dart';
enum VoiceState {
listening, // Waiting for user input
processing, // Sending to AI
speaking, // AI is talking
error,
}
class VoiceSessionController extends ChangeNotifier {
final CharacterModel character;
final Function(String) onUserMessage;
final Function(ChatMessage) onAiMessage;
// Speaking Queue
final List<String> _speakQueue = [];
bool _isSpeaking = false;
Timer? _silenceTimer;
VoiceState _state = VoiceState.listening;
String _recognizedText = '';
String _aiTypingText = '';
bool _isMicMuted = false;
// Services - 使用 VAD 替代系统 STT
final VadSttService _vad = VadSttService();
final TTSService _tts = TTSService();
// State getters
VoiceState get state => _state;
String get recognizedText => _recognizedText;
String get aiTypingText => _aiTypingText;
bool get isMicMuted => _isMicMuted;
VoiceSessionController({
required this.character,
required this.onUserMessage,
required this.onAiMessage,
}) {
_init();
}
Future<void> _init() async {
// Request permissions
await [Permission.microphone, Permission.speech].request();
// Init services
await _vad.init();
await _tts.init();
// 预连接 TTS WebSocket减少首次 TTS 延迟)
_tts.preconnect();
// Setup TTS callbacks
_tts.setStartHandler(() {
debugPrint('🔊 TTS Started');
});
_tts.setCompletionHandler(() {
debugPrint('✅ TTS Completed');
_isSpeaking = false;
_processSpeakQueue(); // Play next
});
_tts.setErrorHandler((msg) {
debugPrint('❌ TTS Error: $msg');
_isSpeaking = false;
_processSpeakQueue();
});
// Start listening immediately
if (!_isMicMuted) {
startListening();
}
}
void toggleMic() {
_isMicMuted = !_isMicMuted;
if (_isMicMuted) {
stopListening();
} else {
if (_state == VoiceState.listening) {
startListening();
}
}
notifyListeners();
}
Future<void> startListening() async {
if (_isMicMuted) return;
_state = VoiceState.listening;
_recognizedText = '';
notifyListeners();
// Stop TTS if it's playing (Interruption)
if (_isSpeaking || _speakQueue.isNotEmpty) {
_speakQueue.clear();
await _tts.stop();
_isSpeaking = false;
}
// 使用 VAD 监听
await _vad.startListening(
onSpeechStart: () {
// 用户开始说话
_recognizedText = 'Listening...';
notifyListeners();
},
onSpeechEnd: () {
// 用户说完了,等待 STT 处理
_recognizedText = 'Processing...';
notifyListeners();
},
onResult: (text) {
_recognizedText = text;
notifyListeners();
},
onFinalResult: (text) {
_recognizedText = text;
notifyListeners();
// 发送给 LLM 处理
_processUserMessage(text);
},
);
}
Future<void> stopListening() async {
await _vad.stopListening();
}
Future<void> _processUserMessage(String text) async {
if (text.trim().isEmpty) {
// If empty, just listen again
startListening();
return;
}
_state = VoiceState.processing;
onUserMessage(text); // Notify UI to show user message
notifyListeners();
// Construct history for context (simplified for now, ideally pass full history)
// We will rely on ChatService to handle the full history if we pass the latest message
// But ChatService needs the list. For the voice mode, let's assume interacting adds to DB
// and we might need to fetch fresh context or pass it in.
// Ideally, the InteractionScreen manages the source of truth for messages.
// Here we'll just send the text to prompt the AI.
// Construct history
final session = await ChatStorageService.getSession(character.id);
var messages = session.messages;
// ChatService.sendMessage appends the userMessage automatically.
// We need to ensure 'messages' doesn't already contain it (if DB write was fast).
if (messages.isNotEmpty) {
final lastMsg = messages.last;
if (lastMsg.isUser && lastMsg.content == text) {
messages = List.from(messages)..removeLast();
}
}
_aiTypingText = '';
try {
final fullResponse = await ChatService.sendMessage(
character: character,
messages: messages,
userMessage: text,
onStream: (content) {
_aiTypingText = content;
notifyListeners();
},
);
// Interaction finished, save AI message
final aiMsg = ChatMessage.assistant(fullResponse);
onAiMessage(aiMsg);
// Filter emojis and speak full text
final textToSpeak = _filterEmojis(fullResponse);
if (textToSpeak.isNotEmpty) {
if (_state != VoiceState.speaking) {
_state = VoiceState.speaking;
notifyListeners();
}
await _speak(textToSpeak);
}
} catch (e) {
debugPrint('❌ Voice Process Error: $e');
_state = VoiceState.error;
notifyListeners();
// Retry listening after error
Future.delayed(const Duration(seconds: 2), startListening);
}
}
String _filterEmojis(String text) {
// Regex matches common emoji ranges
final RegExp emojiRegex = RegExp(
r'(\u00a9|\u00ae|[\u2000-\u3300]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff])'
);
return text.replaceAll(emojiRegex, '').trim();
}
Future<void> _speak(String text) async {
// If we are listening (interrupted), ignore (or should check state)
if (_state == VoiceState.listening) return;
_speakQueue.add(text);
_processSpeakQueue();
}
void _processSpeakQueue() async {
if (_isSpeaking) return;
if (_speakQueue.isEmpty) {
// All done speaking (or no TTS was produced)
if (_state == VoiceState.speaking || _state == VoiceState.processing) {
debugPrint('🎤 Queue empty, resuming listening...');
_state = VoiceState.listening;
notifyListeners();
// 延迟启动 STT让音频会话有时间从播放切换到录音
_silenceTimer?.cancel();
_silenceTimer = Timer(const Duration(milliseconds: 800), () {
debugPrint('🎤 延迟后启动 STT...');
startListening();
});
}
return;
}
// Pop first
String text = _speakQueue.removeAt(0);
_isSpeaking = true;
// Ensure STT is paused while speaking
await stopListening();
await _tts.speak(text, voiceConfig: character.aiVoiceConfig);
}
@override
void dispose() {
_vad.stopListening();
_tts.stop();
super.dispose();
}
}