feat:v1.0.0

2026-02-09 21:54:32 +08:00
parent 8f19377517
commit 68d25581e8
49 changed files with 1522 additions and 528 deletions
--- a/wei_ai_app/lib/screens/interaction/voice_session_controller.dart
+++ b/wei_ai_app/lib/screens/interaction/voice_session_controller.dart
@@ -1,6 +1,7 @@
 import 'dart:async';
 import 'package:flutter/foundation.dart';
 import '../../core/core.dart';
+import '../../core/services/vad_stt_service.dart';
 import 'package:permission_handler/permission_handler.dart';

 enum VoiceState {
@@ -25,8 +26,8 @@ class VoiceSessionController extends ChangeNotifier {
  String _aiTypingText = '';
  bool _isMicMuted = false;
  
-  // Services
-  final STTService _stt = STTService();
+  // Services - 使用 VAD 替代系统 STT
+  final VadSttService _vad = VadSttService();
  final TTSService _tts = TTSService();
  
  // State getters
@@ -35,9 +36,7 @@ class VoiceSessionController extends ChangeNotifier {
  String get aiTypingText => _aiTypingText;
  bool get isMicMuted => _isMicMuted;

-  // Buffer for sentence completion
-  String _sentenceBuffer = '';
-  final List<String> _punctuation = ['。', '？', '！', '.', '?', '!', '\n'];
+

  VoiceSessionController({
    required this.character,
@@ -52,13 +51,15 @@ class VoiceSessionController extends ChangeNotifier {
    await [Permission.microphone, Permission.speech].request();
    
    // Init services
-    await _stt.init();
+    await _vad.init();
    await _tts.init();
+    
+    // 预连接 TTS WebSocket（减少首次 TTS 延迟）
+    _tts.preconnect();

    // Setup TTS callbacks
    _tts.setStartHandler(() {
      debugPrint('🔊 TTS Started');
-      // Already paused STT in _processSpeakQueue
    });

    _tts.setCompletionHandler(() {
@@ -96,7 +97,7 @@ class VoiceSessionController extends ChangeNotifier {
    
    _state = VoiceState.listening;
    _recognizedText = '';
-    _lastProcessedLength = 0;
+
    notifyListeners();

    // Stop TTS if it's playing (Interruption)
@@ -106,7 +107,18 @@ class VoiceSessionController extends ChangeNotifier {
        _isSpeaking = false;
    }

-    await _stt.listen(
+    // 使用 VAD 监听
+    await _vad.startListening(
+      onSpeechStart: () {
+        // 用户开始说话
+        _recognizedText = 'Listening...';
+        notifyListeners();
+      },
+      onSpeechEnd: () {
+        // 用户说完了，等待 STT 处理
+        _recognizedText = 'Processing...';
+        notifyListeners();
+      },
      onResult: (text) {
        _recognizedText = text;
        notifyListeners();
@@ -114,14 +126,14 @@ class VoiceSessionController extends ChangeNotifier {
      onFinalResult: (text) {
        _recognizedText = text;
        notifyListeners();
+        // 发送给 LLM 处理
        _processUserMessage(text);
      },
-      localeId: 'zh-CN', // Make dynamic later if needed
    );
  }

  Future<void> stopListening() async {
-    await _stt.stop();
+    await _vad.stopListening();
  }

  Future<void> _processUserMessage(String text) async {
@@ -131,6 +143,8 @@ class VoiceSessionController extends ChangeNotifier {
      return;
    }

+
+
    _state = VoiceState.processing;
    onUserMessage(text); // Notify UI to show user message
    notifyListeners();
@@ -156,37 +170,31 @@ class VoiceSessionController extends ChangeNotifier {
    }
    
    _aiTypingText = '';
-    _sentenceBuffer = '';
-    _lastProcessedLength = 0;

    try {
      final fullResponse = await ChatService.sendMessage(
        character: character,
        messages: messages,
-        userMessage: text, // ChatService handles appending this if we use the right method
+        userMessage: text,
        onStream: (content) {
          _aiTypingText = content;
-          _processStreamChunk(content);
          notifyListeners();
        },
      );

-      // Process any remaining text in buffer
-      if (_sentenceBuffer.isNotEmpty) {
+      // Interaction finished, save AI message
+      final aiMsg = ChatMessage.assistant(fullResponse);
+      onAiMessage(aiMsg);
+
+      // Filter emojis and speak full text
+      final textToSpeak = _filterEmojis(fullResponse);
+      if (textToSpeak.isNotEmpty) {
        if (_state != VoiceState.speaking) {
          _state = VoiceState.speaking;
          notifyListeners();
        }
-        await _speak(_sentenceBuffer);
+        await _speak(textToSpeak);
      }
-
-      // Interaction finished, save AI message
-      final aiMsg = ChatMessage.assistant(fullResponse);
-      onAiMessage(aiMsg);
-      
-      // Note: We do NOT immediately startListening here.
-      // We rely on the TTS Completion Handler to trigger startListening
-      // when the entire queue is drained.
      
    } catch (e) {
      debugPrint('❌ Voice Process Error: $e');
@@ -197,65 +205,12 @@ class VoiceSessionController extends ChangeNotifier {
    }
  }

-  // Better implementation needs to handle state to avoid infinite loops
-  int _lastProcessedLength = 0;
-
-  void _processStreamChunk(String content) {
-    if (_state != VoiceState.speaking) {
-      _state = VoiceState.speaking;
-      notifyListeners();
-    }
-    
-    // Calculate delta (new content only)
-    if (content.length <= _lastProcessedLength) return;
-    
-    String delta = content.substring(_lastProcessedLength);
-    _lastProcessedLength = content.length;
-    _sentenceBuffer += delta;
-
-    // Check for punctuation to split sentences
-    bool foundPunctuation = false;
-    for (var p in _punctuation) {
-      if (_sentenceBuffer.contains(p)) {
-        foundPunctuation = true;
-        break;
-      }
-    }
-
-    if (foundPunctuation) {
-      _processBufferForSentences();
-    }
-  }
-  
-  void _processBufferForSentences() {
-    String tempBuffer = _sentenceBuffer;
-    String keepBuffer = '';
-
-    // Simple tokenizer: split by punctuation but keep the punctuation attached to the sentence
-    // This is a naive implementation. 
-    // "Hello! How are you?" -> ["Hello!", "How are you?"]
-    
-    // We iterate through chars to find split points
-    int lastSplitIndex = 0;
-    
-    for (int i = 0; i < tempBuffer.length; i++) {
-        String char = tempBuffer[i];
-        if (_punctuation.contains(char)) {
-            // Found end of a sentence
-            String sentence = tempBuffer.substring(lastSplitIndex, i + 1);
-            if (sentence.trim().isNotEmpty) {
-                _speak(sentence);
-            }
-            lastSplitIndex = i + 1;
-        }
-    }
-    
-    // Keep the remaining part that didn't end with punctuation
-    if (lastSplitIndex < tempBuffer.length) {
-        keepBuffer = tempBuffer.substring(lastSplitIndex);
-    }
-    
-    _sentenceBuffer = keepBuffer;
+  String _filterEmojis(String text) {
+    // Regex matches common emoji ranges
+    final RegExp emojiRegex = RegExp(
+      r'(\u00a9|\u00ae|[\u2000-\u3300]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff])'
+    );
+    return text.replaceAll(emojiRegex, '').trim();
  }
  
  Future<void> _speak(String text) async {
@@ -275,9 +230,10 @@ class VoiceSessionController extends ChangeNotifier {
             debugPrint('🎤 Queue empty, resuming listening...');
             _state = VoiceState.listening;
             notifyListeners();
-             // Debounce STT restart to avoid rapid stop/start deadlocks
+             // 延迟启动 STT，让音频会话有时间从播放切换到录音
             _silenceTimer?.cancel();
-             _silenceTimer = Timer(const Duration(milliseconds: 250), () {
+             _silenceTimer = Timer(const Duration(milliseconds: 800), () {
+               debugPrint('🎤 延迟后启动 STT...');
               startListening();
             });
          }
@@ -291,12 +247,12 @@ class VoiceSessionController extends ChangeNotifier {
      // Ensure STT is paused while speaking
      await stopListening();
      
-      await _tts.speak(text);
+      await _tts.speak(text, voiceConfig: character.aiVoiceConfig);
  }

  @override
  void dispose() {
-    _stt.stop();
+    _vad.stopListening();
    _tts.stop();
    super.dispose();
  }