import 'dart:async'; import 'package:flutter/foundation.dart'; import '../../core/core.dart'; import '../../core/services/vad_stt_service.dart'; import 'package:permission_handler/permission_handler.dart'; enum VoiceState { listening, // Waiting for user input processing, // Sending to AI speaking, // AI is talking error, } class VoiceSessionController extends ChangeNotifier { final CharacterModel character; final Function(String) onUserMessage; final Function(ChatMessage) onAiMessage; // Speaking Queue final List _speakQueue = []; bool _isSpeaking = false; Timer? _silenceTimer; VoiceState _state = VoiceState.listening; String _recognizedText = ''; String _aiTypingText = ''; bool _isMicMuted = false; // Services - 使用 VAD 替代系统 STT final VadSttService _vad = VadSttService(); final TTSService _tts = TTSService(); // State getters VoiceState get state => _state; String get recognizedText => _recognizedText; String get aiTypingText => _aiTypingText; bool get isMicMuted => _isMicMuted; VoiceSessionController({ required this.character, required this.onUserMessage, required this.onAiMessage, }) { _init(); } Future _init() async { // Request permissions await [Permission.microphone, Permission.speech].request(); // Init services await _vad.init(); await _tts.init(); // 预连接 TTS WebSocket（减少首次 TTS 延迟） _tts.preconnect(); // Setup TTS callbacks _tts.setStartHandler(() { debugPrint('🔊 TTS Started'); }); _tts.setCompletionHandler(() { debugPrint('✅ TTS Completed'); _isSpeaking = false; _processSpeakQueue(); // Play next }); _tts.setErrorHandler((msg) { debugPrint('❌ TTS Error: $msg'); _isSpeaking = false; _processSpeakQueue(); }); // Start listening immediately if (!_isMicMuted) { startListening(); } } void toggleMic() { _isMicMuted = !_isMicMuted; if (_isMicMuted) { stopListening(); } else { if (_state == VoiceState.listening) { startListening(); } } notifyListeners(); } Future startListening() async { if (_isMicMuted) return; _state = VoiceState.listening; _recognizedText = ''; notifyListeners(); // Stop TTS if it's playing (Interruption) if (_isSpeaking || _speakQueue.isNotEmpty) { _speakQueue.clear(); await _tts.stop(); _isSpeaking = false; } // 使用 VAD 监听 await _vad.startListening( onSpeechStart: () { // 用户开始说话 _recognizedText = 'Listening...'; notifyListeners(); }, onSpeechEnd: () { // 用户说完了，等待 STT 处理 _recognizedText = 'Processing...'; notifyListeners(); }, onResult: (text) { _recognizedText = text; notifyListeners(); }, onFinalResult: (text) { _recognizedText = text; notifyListeners(); // 发送给 LLM 处理 _processUserMessage(text); }, ); } Future stopListening() async { await _vad.stopListening(); } Future _processUserMessage(String text) async { if (text.trim().isEmpty) { // If empty, just listen again startListening(); return; } _state = VoiceState.processing; onUserMessage(text); // Notify UI to show user message notifyListeners(); // Construct history for context (simplified for now, ideally pass full history) // We will rely on ChatService to handle the full history if we pass the latest message // But ChatService needs the list. For the voice mode, let's assume interacting adds to DB // and we might need to fetch fresh context or pass it in. // Ideally, the InteractionScreen manages the source of truth for messages. // Here we'll just send the text to prompt the AI. // Construct history final session = await ChatStorageService.getSession(character.id); var messages = session.messages; // ChatService.sendMessage appends the userMessage automatically. // We need to ensure 'messages' doesn't already contain it (if DB write was fast). if (messages.isNotEmpty) { final lastMsg = messages.last; if (lastMsg.isUser && lastMsg.content == text) { messages = List.from(messages)..removeLast(); } } _aiTypingText = ''; try { final fullResponse = await ChatService.sendMessage( character: character, messages: messages, userMessage: text, onStream: (content) { _aiTypingText = content; notifyListeners(); }, ); // Interaction finished, save AI message final aiMsg = ChatMessage.assistant(fullResponse); onAiMessage(aiMsg); // Filter emojis and speak full text final textToSpeak = _filterEmojis(fullResponse); if (textToSpeak.isNotEmpty) { if (_state != VoiceState.speaking) { _state = VoiceState.speaking; notifyListeners(); } await _speak(textToSpeak); } } catch (e) { debugPrint('❌ Voice Process Error: $e'); _state = VoiceState.error; notifyListeners(); // Retry listening after error Future.delayed(const Duration(seconds: 2), startListening); } } String _filterEmojis(String text) { // Regex matches common emoji ranges final RegExp emojiRegex = RegExp( r'(\u00a9|\u00ae|[\u2000-\u3300]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff])' ); return text.replaceAll(emojiRegex, '').trim(); } Future _speak(String text) async { // If we are listening (interrupted), ignore (or should check state) if (_state == VoiceState.listening) return; _speakQueue.add(text); _processSpeakQueue(); } void _processSpeakQueue() async { if (_isSpeaking) return; if (_speakQueue.isEmpty) { // All done speaking (or no TTS was produced) if (_state == VoiceState.speaking || _state == VoiceState.processing) { debugPrint('🎤 Queue empty, resuming listening...'); _state = VoiceState.listening; notifyListeners(); // 延迟启动 STT，让音频会话有时间从播放切换到录音 _silenceTimer?.cancel(); _silenceTimer = Timer(const Duration(milliseconds: 800), () { debugPrint('🎤 延迟后启动 STT...'); startListening(); }); } return; } // Pop first String text = _speakQueue.removeAt(0); _isSpeaking = true; // Ensure STT is paused while speaking await stopListening(); await _tts.speak(text, voiceConfig: character.aiVoiceConfig); } @override void dispose() { _vad.stopListening(); _tts.stop(); super.dispose(); } }