import 'dart:async'; import 'package:flutter/foundation.dart'; import '../../core/core.dart'; import 'package:permission_handler/permission_handler.dart'; enum VoiceState { listening, // Waiting for user input processing, // Sending to AI speaking, // AI is talking error, } class VoiceSessionController extends ChangeNotifier { final CharacterModel character; final Function(String) onUserMessage; final Function(ChatMessage) onAiMessage; // Speaking Queue final List _speakQueue = []; bool _isSpeaking = false; Timer? _silenceTimer; VoiceState _state = VoiceState.listening; String _recognizedText = ''; String _aiTypingText = ''; bool _isMicMuted = false; // Services final STTService _stt = STTService(); final TTSService _tts = TTSService(); // State getters VoiceState get state => _state; String get recognizedText => _recognizedText; String get aiTypingText => _aiTypingText; bool get isMicMuted => _isMicMuted; // Buffer for sentence completion String _sentenceBuffer = ''; final List _punctuation = ['。', '?', '!', '.', '?', '!', '\n']; VoiceSessionController({ required this.character, required this.onUserMessage, required this.onAiMessage, }) { _init(); } Future _init() async { // Request permissions await [Permission.microphone, Permission.speech].request(); // Init services await _stt.init(); await _tts.init(); // Setup TTS callbacks _tts.setStartHandler(() { debugPrint('🔊 TTS Started'); // Already paused STT in _processSpeakQueue }); _tts.setCompletionHandler(() { debugPrint('✅ TTS Completed'); _isSpeaking = false; _processSpeakQueue(); // Play next }); _tts.setErrorHandler((msg) { debugPrint('❌ TTS Error: $msg'); _isSpeaking = false; _processSpeakQueue(); }); // Start listening immediately if (!_isMicMuted) { startListening(); } } void toggleMic() { _isMicMuted = !_isMicMuted; if (_isMicMuted) { stopListening(); } else { if (_state == VoiceState.listening) { startListening(); } } notifyListeners(); } Future startListening() async { if (_isMicMuted) return; _state = VoiceState.listening; _recognizedText = ''; _lastProcessedLength = 0; notifyListeners(); // Stop TTS if it's playing (Interruption) if (_isSpeaking || _speakQueue.isNotEmpty) { _speakQueue.clear(); await _tts.stop(); _isSpeaking = false; } await _stt.listen( onResult: (text) { _recognizedText = text; notifyListeners(); }, onFinalResult: (text) { _recognizedText = text; notifyListeners(); _processUserMessage(text); }, localeId: 'zh-CN', // Make dynamic later if needed ); } Future stopListening() async { await _stt.stop(); } Future _processUserMessage(String text) async { if (text.trim().isEmpty) { // If empty, just listen again startListening(); return; } _state = VoiceState.processing; onUserMessage(text); // Notify UI to show user message notifyListeners(); // Construct history for context (simplified for now, ideally pass full history) // We will rely on ChatService to handle the full history if we pass the latest message // But ChatService needs the list. For the voice mode, let's assume interacting adds to DB // and we might need to fetch fresh context or pass it in. // Ideally, the InteractionScreen manages the source of truth for messages. // Here we'll just send the text to prompt the AI. // Construct history final session = await ChatStorageService.getSession(character.id); var messages = session.messages; // ChatService.sendMessage appends the userMessage automatically. // We need to ensure 'messages' doesn't already contain it (if DB write was fast). if (messages.isNotEmpty) { final lastMsg = messages.last; if (lastMsg.isUser && lastMsg.content == text) { messages = List.from(messages)..removeLast(); } } _aiTypingText = ''; _sentenceBuffer = ''; _lastProcessedLength = 0; try { final fullResponse = await ChatService.sendMessage( character: character, messages: messages, userMessage: text, // ChatService handles appending this if we use the right method onStream: (content) { _aiTypingText = content; _processStreamChunk(content); notifyListeners(); }, ); // Process any remaining text in buffer if (_sentenceBuffer.isNotEmpty) { if (_state != VoiceState.speaking) { _state = VoiceState.speaking; notifyListeners(); } await _speak(_sentenceBuffer); } // Interaction finished, save AI message final aiMsg = ChatMessage.assistant(fullResponse); onAiMessage(aiMsg); // Note: We do NOT immediately startListening here. // We rely on the TTS Completion Handler to trigger startListening // when the entire queue is drained. } catch (e) { debugPrint('❌ Voice Process Error: $e'); _state = VoiceState.error; notifyListeners(); // Retry listening after error Future.delayed(const Duration(seconds: 2), startListening); } } // Better implementation needs to handle state to avoid infinite loops int _lastProcessedLength = 0; void _processStreamChunk(String content) { if (_state != VoiceState.speaking) { _state = VoiceState.speaking; notifyListeners(); } // Calculate delta (new content only) if (content.length <= _lastProcessedLength) return; String delta = content.substring(_lastProcessedLength); _lastProcessedLength = content.length; _sentenceBuffer += delta; // Check for punctuation to split sentences bool foundPunctuation = false; for (var p in _punctuation) { if (_sentenceBuffer.contains(p)) { foundPunctuation = true; break; } } if (foundPunctuation) { _processBufferForSentences(); } } void _processBufferForSentences() { String tempBuffer = _sentenceBuffer; String keepBuffer = ''; // Simple tokenizer: split by punctuation but keep the punctuation attached to the sentence // This is a naive implementation. // "Hello! How are you?" -> ["Hello!", "How are you?"] // We iterate through chars to find split points int lastSplitIndex = 0; for (int i = 0; i < tempBuffer.length; i++) { String char = tempBuffer[i]; if (_punctuation.contains(char)) { // Found end of a sentence String sentence = tempBuffer.substring(lastSplitIndex, i + 1); if (sentence.trim().isNotEmpty) { _speak(sentence); } lastSplitIndex = i + 1; } } // Keep the remaining part that didn't end with punctuation if (lastSplitIndex < tempBuffer.length) { keepBuffer = tempBuffer.substring(lastSplitIndex); } _sentenceBuffer = keepBuffer; } Future _speak(String text) async { // If we are listening (interrupted), ignore (or should check state) if (_state == VoiceState.listening) return; _speakQueue.add(text); _processSpeakQueue(); } void _processSpeakQueue() async { if (_isSpeaking) return; if (_speakQueue.isEmpty) { // All done speaking (or no TTS was produced) if (_state == VoiceState.speaking || _state == VoiceState.processing) { debugPrint('🎤 Queue empty, resuming listening...'); _state = VoiceState.listening; notifyListeners(); // Debounce STT restart to avoid rapid stop/start deadlocks _silenceTimer?.cancel(); _silenceTimer = Timer(const Duration(milliseconds: 250), () { startListening(); }); } return; } // Pop first String text = _speakQueue.removeAt(0); _isSpeaking = true; // Ensure STT is paused while speaking await stopListening(); await _tts.speak(text); } @override void dispose() { _stt.stop(); _tts.stop(); super.dispose(); } }