feat: mvp viode

2026-02-03 21:41:25 +08:00
parent dec5748cca
commit 8f19377517
13 changed files with 701 additions and 31 deletions
--- a/wei_ai_app/lib/screens/interaction/voice_session_controller.dart
+++ b/wei_ai_app/lib/screens/interaction/voice_session_controller.dart
@@ -0,0 +1,303 @@
+import 'dart:async';
+import 'package:flutter/foundation.dart';
+import '../../core/core.dart';
+import 'package:permission_handler/permission_handler.dart';
+
+enum VoiceState {
+  listening, // Waiting for user input
+  processing, // Sending to AI
+  speaking, // AI is talking
+  error,
+}
+
+class VoiceSessionController extends ChangeNotifier {
+  final CharacterModel character;
+  final Function(String) onUserMessage;
+  final Function(ChatMessage) onAiMessage;
+  
+  // Speaking Queue
+  final List<String> _speakQueue = [];
+  bool _isSpeaking = false;
+  Timer? _silenceTimer;
+  
+  VoiceState _state = VoiceState.listening;
+  String _recognizedText = '';
+  String _aiTypingText = '';
+  bool _isMicMuted = false;
+  
+  // Services
+  final STTService _stt = STTService();
+  final TTSService _tts = TTSService();
+  
+  // State getters
+  VoiceState get state => _state;
+  String get recognizedText => _recognizedText;
+  String get aiTypingText => _aiTypingText;
+  bool get isMicMuted => _isMicMuted;
+
+  // Buffer for sentence completion
+  String _sentenceBuffer = '';
+  final List<String> _punctuation = ['。', '？', '！', '.', '?', '!', '\n'];
+
+  VoiceSessionController({
+    required this.character,
+    required this.onUserMessage,
+    required this.onAiMessage,
+  }) {
+    _init();
+  }
+
+  Future<void> _init() async {
+    // Request permissions
+    await [Permission.microphone, Permission.speech].request();
+    
+    // Init services
+    await _stt.init();
+    await _tts.init();
+
+    // Setup TTS callbacks
+    _tts.setStartHandler(() {
+      debugPrint('🔊 TTS Started');
+      // Already paused STT in _processSpeakQueue
+    });
+
+    _tts.setCompletionHandler(() {
+      debugPrint('✅ TTS Completed');
+      _isSpeaking = false;
+      _processSpeakQueue(); // Play next
+    });
+    
+    _tts.setErrorHandler((msg) {
+       debugPrint('❌ TTS Error: $msg');
+       _isSpeaking = false;
+       _processSpeakQueue();
+    });
+
+    // Start listening immediately
+    if (!_isMicMuted) {
+      startListening();
+    }
+  }
+
+  void toggleMic() {
+    _isMicMuted = !_isMicMuted;
+    if (_isMicMuted) {
+      stopListening();
+    } else {
+      if (_state == VoiceState.listening) {
+        startListening();
+      }
+    }
+    notifyListeners();
+  }
+
+  Future<void> startListening() async {
+    if (_isMicMuted) return;
+    
+    _state = VoiceState.listening;
+    _recognizedText = '';
+    _lastProcessedLength = 0;
+    notifyListeners();
+
+    // Stop TTS if it's playing (Interruption)
+    if (_isSpeaking || _speakQueue.isNotEmpty) {
+        _speakQueue.clear();
+        await _tts.stop();
+        _isSpeaking = false;
+    }
+
+    await _stt.listen(
+      onResult: (text) {
+        _recognizedText = text;
+        notifyListeners();
+      },
+      onFinalResult: (text) {
+        _recognizedText = text;
+        notifyListeners();
+        _processUserMessage(text);
+      },
+      localeId: 'zh-CN', // Make dynamic later if needed
+    );
+  }
+
+  Future<void> stopListening() async {
+    await _stt.stop();
+  }
+
+  Future<void> _processUserMessage(String text) async {
+    if (text.trim().isEmpty) {
+      // If empty, just listen again
+      startListening();
+      return;
+    }
+
+    _state = VoiceState.processing;
+    onUserMessage(text); // Notify UI to show user message
+    notifyListeners();
+
+    // Construct history for context (simplified for now, ideally pass full history)
+    // We will rely on ChatService to handle the full history if we pass the latest message
+    // But ChatService needs the list. For the voice mode, let's assume interacting adds to DB
+    // and we might need to fetch fresh context or pass it in.
+    // Ideally, the InteractionScreen manages the source of truth for messages.
+    // Here we'll just send the text to prompt the AI.
+    
+    // Construct history
+    final session = await ChatStorageService.getSession(character.id);
+    var messages = session.messages;
+
+    // ChatService.sendMessage appends the userMessage automatically.
+    // We need to ensure 'messages' doesn't already contain it (if DB write was fast).
+    if (messages.isNotEmpty) {
+      final lastMsg = messages.last;
+      if (lastMsg.isUser && lastMsg.content == text) {
+        messages = List.from(messages)..removeLast();
+      }
+    }
+    
+    _aiTypingText = '';
+    _sentenceBuffer = '';
+    _lastProcessedLength = 0;
+
+    try {
+      final fullResponse = await ChatService.sendMessage(
+        character: character,
+        messages: messages,
+        userMessage: text, // ChatService handles appending this if we use the right method
+        onStream: (content) {
+          _aiTypingText = content;
+          _processStreamChunk(content);
+          notifyListeners();
+        },
+      );
+
+      // Process any remaining text in buffer
+      if (_sentenceBuffer.isNotEmpty) {
+        if (_state != VoiceState.speaking) {
+          _state = VoiceState.speaking;
+          notifyListeners();
+        }
+        await _speak(_sentenceBuffer);
+      }
+
+      // Interaction finished, save AI message
+      final aiMsg = ChatMessage.assistant(fullResponse);
+      onAiMessage(aiMsg);
+      
+      // Note: We do NOT immediately startListening here.
+      // We rely on the TTS Completion Handler to trigger startListening
+      // when the entire queue is drained.
+      
+    } catch (e) {
+      debugPrint('❌ Voice Process Error: $e');
+      _state = VoiceState.error;
+      notifyListeners();
+      // Retry listening after error
+      Future.delayed(const Duration(seconds: 2), startListening);
+    }
+  }
+
+  // Better implementation needs to handle state to avoid infinite loops
+  int _lastProcessedLength = 0;
+
+  void _processStreamChunk(String content) {
+    if (_state != VoiceState.speaking) {
+      _state = VoiceState.speaking;
+      notifyListeners();
+    }
+    
+    // Calculate delta (new content only)
+    if (content.length <= _lastProcessedLength) return;
+    
+    String delta = content.substring(_lastProcessedLength);
+    _lastProcessedLength = content.length;
+    _sentenceBuffer += delta;
+
+    // Check for punctuation to split sentences
+    bool foundPunctuation = false;
+    for (var p in _punctuation) {
+      if (_sentenceBuffer.contains(p)) {
+        foundPunctuation = true;
+        break;
+      }
+    }
+
+    if (foundPunctuation) {
+      _processBufferForSentences();
+    }
+  }
+  
+  void _processBufferForSentences() {
+    String tempBuffer = _sentenceBuffer;
+    String keepBuffer = '';
+
+    // Simple tokenizer: split by punctuation but keep the punctuation attached to the sentence
+    // This is a naive implementation. 
+    // "Hello! How are you?" -> ["Hello!", "How are you?"]
+    
+    // We iterate through chars to find split points
+    int lastSplitIndex = 0;
+    
+    for (int i = 0; i < tempBuffer.length; i++) {
+        String char = tempBuffer[i];
+        if (_punctuation.contains(char)) {
+            // Found end of a sentence
+            String sentence = tempBuffer.substring(lastSplitIndex, i + 1);
+            if (sentence.trim().isNotEmpty) {
+                _speak(sentence);
+            }
+            lastSplitIndex = i + 1;
+        }
+    }
+    
+    // Keep the remaining part that didn't end with punctuation
+    if (lastSplitIndex < tempBuffer.length) {
+        keepBuffer = tempBuffer.substring(lastSplitIndex);
+    }
+    
+    _sentenceBuffer = keepBuffer;
+  }
+  
+  Future<void> _speak(String text) async {
+     // If we are listening (interrupted), ignore (or should check state)
+     if (_state == VoiceState.listening) return;
+     
+     _speakQueue.add(text);
+     _processSpeakQueue();
+  }
+  
+  void _processSpeakQueue() async {
+      if (_isSpeaking) return;
+      
+      if (_speakQueue.isEmpty) {
+          // All done speaking (or no TTS was produced)
+          if (_state == VoiceState.speaking || _state == VoiceState.processing) {
+             debugPrint('🎤 Queue empty, resuming listening...');
+             _state = VoiceState.listening;
+             notifyListeners();
+             // Debounce STT restart to avoid rapid stop/start deadlocks
+             _silenceTimer?.cancel();
+             _silenceTimer = Timer(const Duration(milliseconds: 250), () {
+               startListening();
+             });
+          }
+          return;
+      }
+      
+      // Pop first
+      String text = _speakQueue.removeAt(0);
+      _isSpeaking = true;
+      
+      // Ensure STT is paused while speaking
+      await stopListening();
+      
+      await _tts.speak(text);
+  }
+
+  @override
+  void dispose() {
+    _stt.stop();
+    _tts.stop();
+    super.dispose();
+  }
+}