feat:v1.0.0

2026-02-09 21:54:32 +08:00
parent 8f19377517
commit 68d25581e8
49 changed files with 1522 additions and 528 deletions
--- a/wei_ai_app/lib/core/services/stt_service.dart
+++ b/wei_ai_app/lib/core/services/stt_service.dart
@@ -12,16 +12,31 @@ class STTService {

  bool get isListening => _isListening;

+  // 回调
+  Function(String text)? _onResult;
+  Function(String text)? _onFinalResult;
+
  Future<bool> init() async {
    if (_isInitialized) return true;

    try {
      _isInitialized = await _speech.initialize(
-        onError: (error) => debugPrint('❌ STT Error: $error'),
+        onError: (error) {
+          debugPrint('⚠️ STT Error: ${error.errorMsg}');
+          
+          // error_no_match 是常见的"没听到有效语音"错误
+          // 不应该中断整个流程
+          if (error.errorMsg == 'error_no_match') {
+            debugPrint('   (没有匹配到语音，忽略)');
+          }
+        },
        onStatus: (status) {
          debugPrint('🎤 STT Status: $status');
-          if (status == 'listening') _isListening = true;
-          if (status == 'notListening') _isListening = false;
+          if (status == 'listening') {
+            _isListening = true;
+          } else if (status == 'notListening' || status == 'done') {
+            _isListening = false;
+          }
        },
      );
      debugPrint('✅ STT Initialized: $_isInitialized');
@@ -44,20 +59,28 @@ class STTService {

    if (_isListening) await stop();

+    _onResult = onResult;
+    _onFinalResult = onFinalResult;
+
    await _speech.listen(
      onResult: (result) {
-        if (result.finalResult) {
-          onFinalResult(result.recognizedWords);
-        } else {
-          onResult(result.recognizedWords);
+        if (result.recognizedWords.isNotEmpty) {
+          if (result.finalResult) {
+            debugPrint('🎤 Final: "${result.recognizedWords}"');
+            _onFinalResult?.call(result.recognizedWords);
+          } else {
+            _onResult?.call(result.recognizedWords);
+          }
        }
      },
      localeId: localeId,
-      listenFor: const Duration(seconds: 30),
-      pauseFor: const Duration(seconds: 3), // Wait 3s of silence to consider "done"
-      partialResults: true,
-      cancelOnError: true,
-      listenMode: ListenMode.dictation,
+      listenFor: const Duration(seconds: 60),  // 最大监听时长
+      pauseFor: const Duration(milliseconds: 1500),  // 1.5秒静音后视为说完
+      listenOptions: SpeechListenOptions(
+        partialResults: true,
+        cancelOnError: false,  // 不要因错误取消
+        listenMode: ListenMode.dictation,
+      ),
    );
  }

--- a/wei_ai_app/lib/core/services/tts/minimax_tts_engine.dart
+++ b/wei_ai_app/lib/core/services/tts/minimax_tts_engine.dart
@@ -0,0 +1,502 @@
+import 'dart:async';
+import 'dart:convert';
+
+import 'package:flutter/foundation.dart';
+import 'package:flutter_pcm_sound/flutter_pcm_sound.dart';
+import 'package:web_socket_channel/web_socket_channel.dart';
+
+import '../../config/minimax_config.dart';
+import '../../models/character_model.dart';
+import 'tts_engine.dart';
+import 'ws_client.dart';
+
+/// MiniMax TTS 引擎 (PCM 流式版本 - 带预连接优化)
+/// 
+/// 优化特性：
+/// - 预连接：提前建立 WebSocket 连接
+/// - 连接复用：一个连接处理多次 TTS 请求
+/// - 自动重连：连接断开后自动重连
+class MiniMaxTtsEngine implements TtsEngine {
+  WebSocketChannel? _channel;
+  StreamSubscription? _wsSub;
+  Timer? _timeoutTimer;
+  Timer? _keepAliveTimer;
+
+  bool _isInitialized = false;
+  bool _isDisposed = false;
+  bool _isCancelled = false;
+  bool _isPlaying = false;
+
+  // 连接状态
+  bool _isConnected = false;
+  bool _isTaskReady = false;  // task_started 状态
+  Completer<void>? _connectCompleter;
+  Completer<void>? _speakCompleter;
+
+  // 当前任务的文本
+  String? _pendingText;
+
+  VoidCallback? _onStart;
+  VoidCallback? _onComplete;
+  Function(dynamic)? _onError;
+
+  // 统计信息
+  int _chunkCount = 0;
+  int _totalBytes = 0;
+  final Stopwatch _stopwatch = Stopwatch();
+
+  // 跟踪播放完成
+  bool _isFinalReceived = false;
+
+  @override
+  Future<void> init() async {
+    if (_isInitialized) return;
+    
+    // 初始化 PCM 播放器
+    await FlutterPcmSound.setup(
+      sampleRate: MinimaxConfig.sampleRate,
+      channelCount: MinimaxConfig.channels,
+    );
+    
+    FlutterPcmSound.setLogLevel(LogLevel.none);
+    FlutterPcmSound.setFeedCallback(_onFeedCallback);
+    
+    _isInitialized = true;
+    debugPrint('✅ MiniMaxTtsEngine initialized (PCM streaming mode)');
+  }
+
+  /// 预连接 WebSocket
+  /// 在语音页面打开时调用，提前建立连接
+  @override
+  Future<void> preconnect() async {
+    if (!_isInitialized) await init();
+    if (_isConnected && _isTaskReady) {
+      debugPrint('⚡ TTS 连接已就绪，无需重连');
+      return;
+    }
+    
+    debugPrint('🔌 TTS 预连接中...');
+    await _ensureConnection();
+    debugPrint('⚡ TTS 预连接完成，已就绪');
+  }
+
+  /// 确保 WebSocket 连接已建立且处于就绪状态
+  Future<void> _ensureConnection() async {
+    if (_isConnected && _isTaskReady) return;
+    
+    // 清理旧连接
+    await _cleanupConnection();
+    
+    _connectCompleter = Completer<void>();
+    
+    try {
+      final channel = connectTtsSocket(
+        Uri.parse(MinimaxConfig.wsUrl),
+        {
+          'Authorization': 'Bearer ${MinimaxConfig.apiKey}',
+        },
+      );
+      _channel = channel;
+      
+      // 监听消息
+      _wsSub = channel.stream.listen(
+        _handleConnectionMessage,
+        onError: (error) {
+          debugPrint('❌ TTS WebSocket 错误: $error');
+          _handleDisconnect();
+        },
+        onDone: () {
+          debugPrint('⚠️ TTS WebSocket 连接关闭');
+          _handleDisconnect();
+        },
+        cancelOnError: false,
+      );
+      
+      // 等待连接就绪（connected_success + task_started）
+      // 设置超时
+      _timeoutTimer?.cancel();
+      _timeoutTimer = Timer(const Duration(seconds: 10), () {
+        if (_connectCompleter != null && !_connectCompleter!.isCompleted) {
+          _connectCompleter!.completeError('连接超时');
+          _handleDisconnect();
+        }
+      });
+      
+      await _connectCompleter!.future;
+      _timeoutTimer?.cancel();
+      
+      // 启动保活
+      _startKeepAlive();
+      
+    } catch (e) {
+      debugPrint('❌ TTS 连接失败: $e');
+      _handleDisconnect();
+      rethrow;
+    }
+  }
+
+  void _handleConnectionMessage(dynamic message) {
+    if (_isDisposed) return;
+    
+    try {
+      final Map<String, dynamic> data = jsonDecode(message as String);
+      final String? event = data['event'] as String?;
+      
+      if (event == 'connected_success') {
+        debugPrint('📥 TTS connected_success');
+        _isConnected = true;
+        // 发送 task_start 进入就绪状态
+        _sendTaskStart();
+        return;
+      }
+      
+      if (event == 'task_started') {
+        debugPrint('📥 TTS task_started (就绪)');
+        _isTaskReady = true;
+        
+        // 如果是预连接，完成连接
+        if (_connectCompleter != null && !_connectCompleter!.isCompleted) {
+          _connectCompleter!.complete();
+        }
+        
+        // 如果有待发送的文本，立即发送
+        if (_pendingText != null) {
+          _sendText(_pendingText!);
+          _pendingText = null;
+        }
+        return;
+      }
+      
+      if (event == 'task_continued') {
+        // 正常的流式响应
+      }
+      
+      if (event == 'task_failed') {
+        final errorInfo = data['base_resp'] ?? data;
+        debugPrint('❌ TTS task_failed: ${jsonEncode(errorInfo)}');
+        _handleTaskError('task_failed: ${jsonEncode(errorInfo)}');
+        return;
+      }
+      
+      // 处理音频数据
+      if (data['data'] != null && data['data'] is Map<String, dynamic>) {
+        final audioData = data['data'] as Map<String, dynamic>;
+        final audioHex = audioData['audio'];
+        
+        if (audioHex is String && audioHex.isNotEmpty) {
+          final bytes = _hexToBytes(audioHex);
+          _chunkCount++;
+          _totalBytes += bytes.length;
+          
+          _feedAudioData(bytes);
+          
+          if (_chunkCount % 20 == 0) {
+            debugPrint('📥 已接收 $_chunkCount 块 ($_totalBytes bytes)');
+          }
+        }
+      }
+      
+      // 任务完成
+      if (data['is_final'] == true) {
+        _handleTaskComplete();
+      }
+      
+    } catch (e) {
+      debugPrint('❌ TTS 消息解析错误: $e');
+    }
+  }
+
+  void _sendTaskStart() {
+    if (_channel == null) return;
+    
+    final payload = {
+      'event': 'task_start',
+      'model': MinimaxConfig.model,
+      'voice_setting': {
+        'voice_id': MinimaxConfig.defaultVoiceId,
+        'speed': 1,
+        'vol': 1,
+        'pitch': 0,
+        'english_normalization': false,
+      },
+      'audio_setting': {
+        'sample_rate': MinimaxConfig.sampleRate,
+        'format': MinimaxConfig.format,
+        'channel': MinimaxConfig.channels,
+      },
+    };
+    
+    _channel!.sink.add(jsonEncode(payload));
+  }
+
+  void _sendText(String text) {
+    if (_channel == null || !_isTaskReady) return;
+    
+    debugPrint('📤 发送文本: "$text"');
+    _channel!.sink.add(jsonEncode({
+      'event': 'task_continue',
+      'text': text,
+    }));
+    _isPlaying = true;
+  }
+
+  void _handleTaskComplete() {
+    _stopwatch.stop();
+    _isFinalReceived = true;
+    _isTaskReady = false;
+    
+    debugPrint('');
+    debugPrint('═══════════════════════════════════════');
+    debugPrint('📊 TTS 完成: $_chunkCount 块, $_totalBytes bytes, ${_stopwatch.elapsedMilliseconds}ms');
+    debugPrint('═══════════════════════════════════════');
+    
+    // 发送 task_finish
+    _channel?.sink.add(jsonEncode({'event': 'task_finish'}));
+    
+    // 完成当前任务
+    if (_speakCompleter != null && !_speakCompleter!.isCompleted) {
+      _speakCompleter!.complete();
+    }
+    
+    // 准备下一次任务：重新发送 task_start
+    Future.delayed(const Duration(milliseconds: 100), () {
+      if (_isConnected && !_isDisposed && !_isCancelled) {
+        _sendTaskStart();
+      }
+    });
+  }
+
+  void _handleTaskError(String error) {
+    _isTaskReady = false;
+    
+    if (_speakCompleter != null && !_speakCompleter!.isCompleted) {
+      _speakCompleter!.completeError(TtsEngineException(error));
+    }
+    
+    if (_onError != null) _onError!(error);
+    
+    // 尝试重新进入就绪状态
+    Future.delayed(const Duration(milliseconds: 500), () {
+      if (_isConnected && !_isDisposed) {
+        _sendTaskStart();
+      }
+    });
+  }
+
+  void _handleDisconnect() {
+    final wasConnected = _isConnected;
+    _isConnected = false;
+    _isTaskReady = false;
+    _keepAliveTimer?.cancel();
+    
+    if (_connectCompleter != null && !_connectCompleter!.isCompleted) {
+      _connectCompleter!.completeError('连接断开');
+    }
+    
+    if (_speakCompleter != null && !_speakCompleter!.isCompleted) {
+      _speakCompleter!.completeError(const TtsEngineException('连接断开'));
+    }
+    
+    // 如果之前是连接状态，静默自动重连（为下次 TTS 做准备）
+    if (wasConnected && !_isDisposed && !_isCancelled) {
+      Future.delayed(const Duration(milliseconds: 500), () {
+        if (!_isDisposed && !_isCancelled) {
+          debugPrint('🔄 TTS 自动重连...');
+          _ensureConnection().then((_) {
+            debugPrint('⚡ TTS 重连成功');
+          }).catchError((e) {
+            debugPrint('⚠️ TTS 重连失败: $e (下次 speak 时会重试)');
+          });
+        }
+      });
+    }
+  }
+
+  void _startKeepAlive() {
+    _keepAliveTimer?.cancel();
+    // 每 30 秒检查连接状态
+    _keepAliveTimer = Timer.periodic(const Duration(seconds: 30), (_) {
+      if (!_isConnected && !_isDisposed) {
+        debugPrint('🔄 TTS 重连中...');
+        _ensureConnection().catchError((e) {
+          debugPrint('❌ TTS 重连失败: $e');
+        });
+      }
+    });
+  }
+
+  void _onFeedCallback(int remainingFrames) {
+    if (_isFinalReceived && remainingFrames == 0 && _isPlaying) {
+      _isPlaying = false;
+      debugPrint('🔊 PCM 播放完成');
+      if (_onComplete != null) {
+        _onComplete!();
+      }
+    }
+  }
+
+  @override
+  Future<void> speak(
+    String text, {
+    AiVoiceConfig? voiceConfig,
+  }) async {
+    if (!_isInitialized) await init();
+    if (_isDisposed) return;
+
+    final trimmed = text.trim();
+    if (trimmed.isEmpty) return;
+
+    if (!MinimaxConfig.isEnabled) {
+      throw const TtsEngineException('MiniMax API key is missing');
+    }
+
+    // 重置状态
+    _isCancelled = false;
+    _isFinalReceived = false;
+    _chunkCount = 0;
+    _totalBytes = 0;
+    _stopwatch.reset();
+    _stopwatch.start();
+
+    debugPrint('');
+    debugPrint('═══════════════════════════════════════');
+    debugPrint('🎤 TTS: "$trimmed"');
+    debugPrint('═══════════════════════════════════════');
+
+    _speakCompleter = Completer<void>();
+
+    try {
+      // 确保连接已就绪
+      if (!_isConnected || !_isTaskReady) {
+        debugPrint('⏳ 等待连接就绪...');
+        await _ensureConnection();
+      }
+      
+      // 发送文本
+      if (_isTaskReady) {
+        _sendText(trimmed);
+        if (_onStart != null) _onStart!();
+      } else {
+        // 连接还在建立中，设置待发送文本
+        _pendingText = trimmed;
+        if (_onStart != null) _onStart!();
+      }
+
+      // 设置超时
+      _timeoutTimer?.cancel();
+      _timeoutTimer = Timer(const Duration(seconds: 30), () {
+        if (_speakCompleter != null && !_speakCompleter!.isCompleted) {
+          _handleTaskError('TTS 超时');
+        }
+      });
+
+      // 等待任务完成
+      await _speakCompleter!.future;
+      _timeoutTimer?.cancel();
+
+    } catch (e) {
+      debugPrint('❌ TTS 异常: $e');
+      if (_onError != null) _onError!(e);
+      rethrow;
+    }
+  }
+
+  void _feedAudioData(Uint8List pcmBytes) {
+    if (_isCancelled || _isDisposed) return;
+    
+    final int16List = <int>[];
+    for (var i = 0; i < pcmBytes.length - 1; i += 2) {
+      final int16Value = (pcmBytes[i + 1] << 8) | pcmBytes[i];
+      final signed = int16Value >= 32768 ? int16Value - 65536 : int16Value;
+      int16List.add(signed);
+    }
+    
+    FlutterPcmSound.feed(PcmArrayInt16.fromList(int16List));
+  }
+
+  Uint8List _hexToBytes(String hex) {
+    final cleaned = hex.trim();
+    final len = cleaned.length;
+    if (len == 0) return Uint8List(0);
+    if (len % 2 != 0) {
+      throw const FormatException('Invalid hex string length');
+    }
+
+    final bytes = Uint8List(len ~/ 2);
+    for (var i = 0; i < len; i += 2) {
+      bytes[i ~/ 2] = int.parse(cleaned.substring(i, i + 2), radix: 16);
+    }
+    return bytes;
+  }
+
+  Future<void> _cleanupConnection() async {
+    _wsSub?.cancel();
+    _wsSub = null;
+    _channel?.sink.close();
+    _channel = null;
+    _isConnected = false;
+    _isTaskReady = false;
+  }
+
+  @override
+  Future<void> stop() async {
+    if (_isDisposed) return;
+    _isCancelled = true;
+    _isPlaying = false;
+    _isFinalReceived = false;
+    _pendingText = null;
+    _timeoutTimer?.cancel();
+    
+    // 停止 PCM 播放器
+    await FlutterPcmSound.release();
+    
+    // 重新设置播放器
+    if (_isInitialized) {
+      await FlutterPcmSound.setup(
+        sampleRate: MinimaxConfig.sampleRate,
+        channelCount: MinimaxConfig.channels,
+      );
+      FlutterPcmSound.setFeedCallback(_onFeedCallback);
+    }
+    
+    // 注意：不断开 WebSocket 连接，保持复用
+    // 如果有进行中的任务，发送 task_finish
+    if (_isTaskReady && _channel != null) {
+      _channel!.sink.add(jsonEncode({'event': 'task_finish'}));
+      _isTaskReady = false;
+      // 准备下一次任务
+      Future.delayed(const Duration(milliseconds: 100), () {
+        if (_isConnected && !_isDisposed) {
+          _sendTaskStart();
+        }
+      });
+    }
+    
+    debugPrint('🛑 TTS 已停止');
+  }
+
+  @override
+  void setCompletionHandler(VoidCallback handler) {
+    _onComplete = handler;
+  }
+
+  @override
+  void setErrorHandler(Function(dynamic) handler) {
+    _onError = handler;
+  }
+
+  @override
+  void setStartHandler(VoidCallback handler) {
+    _onStart = handler;
+  }
+
+  @override
+  void dispose() {
+    _isDisposed = true;
+    _keepAliveTimer?.cancel();
+    _timeoutTimer?.cancel();
+    _cleanupConnection();
+    FlutterPcmSound.release();
+    debugPrint('🗑️ MiniMaxTtsEngine disposed');
+  }
+}
--- a/wei_ai_app/lib/core/services/tts/system_tts_engine.dart
+++ b/wei_ai_app/lib/core/services/tts/system_tts_engine.dart
@@ -0,0 +1,88 @@
+import 'package:flutter/foundation.dart';
+import 'package:flutter_tts/flutter_tts.dart';
+import '../../models/character_model.dart';
+import 'tts_engine.dart';
+
+class SystemTtsEngine implements TtsEngine {
+  final FlutterTts _flutterTts = FlutterTts();
+  bool _isInitialized = false;
+
+  @override
+  Future<void> init() async {
+    if (_isInitialized) return;
+
+    try {
+      if (!kIsWeb) {
+        if (defaultTargetPlatform == TargetPlatform.iOS) {
+          await _flutterTts.setSharedInstance(true);
+          await _flutterTts.setIosAudioCategory(
+            IosTextToSpeechAudioCategory.playAndRecord,
+            [
+              IosTextToSpeechAudioCategoryOptions.allowBluetooth,
+              IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
+              IosTextToSpeechAudioCategoryOptions.mixWithOthers,
+              IosTextToSpeechAudioCategoryOptions.defaultToSpeaker,
+            ],
+            IosTextToSpeechAudioMode.defaultMode,
+          );
+        }
+      }
+
+      await _flutterTts.setLanguage("zh-CN");
+      await _flutterTts.setPitch(1.0);
+      await _flutterTts.setSpeechRate(0.5);
+
+      _isInitialized = true;
+      debugPrint('✅ SystemTtsEngine initialized');
+    } catch (e) {
+      debugPrint('❌ SystemTtsEngine init error: $e');
+    }
+  }
+
+  @override
+  Future<void> preconnect() async {
+    // 系统 TTS 不需要预连接
+  }
+
+  @override
+  Future<void> speak(
+    String text, {
+    AiVoiceConfig? voiceConfig,
+  }) async {
+    if (!_isInitialized) await init();
+    if (text.trim().isEmpty) return;
+
+    if (voiceConfig != null) {
+      await _flutterTts.setSpeechRate(voiceConfig.speed.clamp(0.2, 2.0));
+      await _flutterTts.setPitch(voiceConfig.pitch.clamp(0.5, 2.0));
+    }
+
+    debugPrint('🗣️ SystemTtsEngine Speaking: $text');
+    await _flutterTts.speak(text);
+  }
+
+  @override
+  Future<void> stop() async {
+    await _flutterTts.stop();
+  }
+
+  @override
+  void setCompletionHandler(VoidCallback handler) {
+    _flutterTts.setCompletionHandler(handler);
+  }
+
+  @override
+  void setStartHandler(VoidCallback handler) {
+    _flutterTts.setStartHandler(handler);
+  }
+
+  @override
+  void setErrorHandler(Function(dynamic) handler) {
+    _flutterTts.setErrorHandler(handler);
+  }
+
+  @override
+  void dispose() {
+    _flutterTts.stop();
+  }
+}
--- a/wei_ai_app/lib/core/services/tts/tts_engine.dart
+++ b/wei_ai_app/lib/core/services/tts/tts_engine.dart
@@ -0,0 +1,37 @@
+import 'package:flutter/foundation.dart';
+import '../../models/character_model.dart';
+
+abstract class TtsEngine {
+  Future<void> init();
+
+  /// 预连接（可选实现，用于提前建立连接）
+  Future<void> preconnect() async {}
+
+  Future<void> speak(
+    String text, {
+    AiVoiceConfig? voiceConfig,
+  });
+
+  Future<void> stop();
+
+  void setStartHandler(VoidCallback handler);
+
+  void setCompletionHandler(VoidCallback handler);
+
+  void setErrorHandler(Function(dynamic) handler);
+
+  void dispose();
+}
+
+class TtsEngineException implements Exception {
+  final String message;
+  final bool isCancelled;
+
+  const TtsEngineException(this.message, {this.isCancelled = false});
+
+  factory TtsEngineException.cancelled() =>
+      const TtsEngineException('cancelled', isCancelled: true);
+
+  @override
+  String toString() => 'TtsEngineException($message)';
+}
--- a/wei_ai_app/lib/core/services/tts/ws_client.dart
+++ b/wei_ai_app/lib/core/services/tts/ws_client.dart
@@ -0,0 +1,6 @@
+import 'package:web_socket_channel/web_socket_channel.dart';
+import 'ws_client_io.dart' if (dart.library.html) 'ws_client_web.dart';
+
+WebSocketChannel connectTtsSocket(Uri uri, Map<String, String> headers) {
+  return createWebSocketChannel(uri, headers);
+}
--- a/wei_ai_app/lib/core/services/tts/ws_client_io.dart
+++ b/wei_ai_app/lib/core/services/tts/ws_client_io.dart
@@ -0,0 +1,9 @@
+import 'package:web_socket_channel/io.dart';
+import 'package:web_socket_channel/web_socket_channel.dart';
+
+WebSocketChannel createWebSocketChannel(Uri uri, Map<String, String> headers) {
+  return IOWebSocketChannel.connect(
+    uri,
+    headers: headers,
+  );
+}
--- a/wei_ai_app/lib/core/services/tts/ws_client_web.dart
+++ b/wei_ai_app/lib/core/services/tts/ws_client_web.dart
@@ -0,0 +1,6 @@
+import 'package:web_socket_channel/web_socket_channel.dart';
+
+WebSocketChannel createWebSocketChannel(Uri uri, Map<String, String> headers) {
+  // WebSocket in browser doesn't support custom headers.
+  return WebSocketChannel.connect(uri);
+}
--- a/wei_ai_app/lib/core/services/tts_service.dart
+++ b/wei_ai_app/lib/core/services/tts_service.dart
@@ -1,65 +1,115 @@
-import 'package:flutter_tts/flutter_tts.dart';
 import 'package:flutter/foundation.dart';
+import '../config/minimax_config.dart';
+import '../models/character_model.dart';
+import 'tts/minimax_tts_engine.dart';
+import 'tts/system_tts_engine.dart';
+import 'tts/tts_engine.dart';

 class TTSService {
  static final TTSService _instance = TTSService._internal();
  factory TTSService() => _instance;
  TTSService._internal();

-  final FlutterTts _flutterTts = FlutterTts();
+  final TtsEngine _minimaxEngine = MiniMaxTtsEngine();
+  final TtsEngine _systemEngine = SystemTtsEngine();
+  TtsEngine? _activeEngine;
+
  bool _isInitialized = false;

+  VoidCallback? _onStart;
+  VoidCallback? _onComplete;
+  Function(dynamic)? _onError;
+
  Future<void> init() async {
    if (_isInitialized) return;

    try {
-      if (!kIsWeb) {
-        if (defaultTargetPlatform == TargetPlatform.iOS) {
-          await _flutterTts.setSharedInstance(true);
-          await _flutterTts.setIosAudioCategory(
-              IosTextToSpeechAudioCategory.playAndRecord,
-              [
-                IosTextToSpeechAudioCategoryOptions.allowBluetooth,
-                IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
-                IosTextToSpeechAudioCategoryOptions.mixWithOthers,
-                IosTextToSpeechAudioCategoryOptions.defaultToSpeaker
-              ],
-              IosTextToSpeechAudioMode.defaultMode);
-        }
-      }
-
-      await _flutterTts.setLanguage("zh-CN"); // Default to Chinese
-      await _flutterTts.setPitch(1.0);
-      await _flutterTts.setSpeechRate(0.5); // Normal rate
-      
+      _activeEngine = _selectEngine();
+      _applyHandlers(_activeEngine);
+      await _activeEngine!.init();
      _isInitialized = true;
-      debugPrint('✅ TTSService initialized');
+      debugPrint('✅ TTSService initialized (${_activeEngine.runtimeType})');
    } catch (e) {
      debugPrint('❌ TTSService init error: $e');
    }
  }

-  Future<void> speak(String text) async {
+  Future<void> speak(String text, {AiVoiceConfig? voiceConfig}) async {
    if (!_isInitialized) await init();
    if (text.isEmpty) return;
    
+    if (_activeEngine == null) {
+      _activeEngine = _selectEngine();
+      _applyHandlers(_activeEngine);
+      await _activeEngine!.init();
+    }
+
    debugPrint('🗣️ TTS Speaking: $text');
-    await _flutterTts.speak(text);
+    if (_activeEngine is MiniMaxTtsEngine) {
+      try {
+        await _activeEngine!.speak(text, voiceConfig: voiceConfig);
+        return;
+      } catch (e) {
+        debugPrint('⚠️ MiniMax TTS failed, falling back to system TTS: $e');
+        await _fallbackSpeak(text, voiceConfig: voiceConfig);
+        return;
+      }
+    }
+
+    await _activeEngine!.speak(text, voiceConfig: voiceConfig);
+  }
+
+  /// 预连接 TTS 服务（在语音页面打开时调用）
+  Future<void> preconnect() async {
+    if (!_isInitialized) await init();
+    await _activeEngine?.preconnect();
  }

  Future<void> stop() async {
-    await _flutterTts.stop();
+    await _activeEngine?.stop();
  }

  void setCompletionHandler(VoidCallback handler) {
-    _flutterTts.setCompletionHandler(handler);
+    _onComplete = handler;
+    _minimaxEngine.setCompletionHandler(handler);
+    _systemEngine.setCompletionHandler(handler);
  }
  
  void setStartHandler(VoidCallback handler) {
-    _flutterTts.setStartHandler(handler);
+    _onStart = handler;
+    _minimaxEngine.setStartHandler(handler);
+    _systemEngine.setStartHandler(handler);
  }

  void setErrorHandler(Function(dynamic) handler) {
-    _flutterTts.setErrorHandler(handler);
+    _onError = handler;
+    _minimaxEngine.setErrorHandler(handler);
+    _systemEngine.setErrorHandler(handler);
+  }
+
+  TtsEngine _selectEngine() {
+    if (MinimaxConfig.isEnabled && !kIsWeb) {
+      return _minimaxEngine;
+    }
+    return _systemEngine;
+  }
+
+  Future<void> _fallbackSpeak(String text, {AiVoiceConfig? voiceConfig}) async {
+    try {
+      _activeEngine = _systemEngine;
+      _applyHandlers(_activeEngine);
+      await _activeEngine!.init();
+      await _activeEngine!.speak(text, voiceConfig: voiceConfig);
+    } catch (e) {
+      debugPrint('❌ System TTS failed: $e');
+      if (_onError != null) _onError!(e);
+    }
+  }
+
+  void _applyHandlers(TtsEngine? engine) {
+    if (engine == null) return;
+    if (_onStart != null) engine.setStartHandler(_onStart!);
+    if (_onComplete != null) engine.setCompletionHandler(_onComplete!);
+    if (_onError != null) engine.setErrorHandler(_onError!);
  }
 }
--- a/wei_ai_app/lib/core/services/vad_stt_service.dart
+++ b/wei_ai_app/lib/core/services/vad_stt_service.dart
@@ -0,0 +1,258 @@
+import 'dart:convert';
+import 'package:flutter/foundation.dart';
+import 'package:http/http.dart' as http;
+import 'package:vad/vad.dart';
+
+/// VAD (Voice Activity Detection) + Google Speech-to-Text 服务
+/// 
+/// 使用 Silero VAD 模型检测语音开始/结束
+/// 然后将录制的音频发送给 Google Cloud Speech-to-Text API
+class VadSttService {
+  static final VadSttService _instance = VadSttService._internal();
+  factory VadSttService() => _instance;
+  VadSttService._internal();
+
+  VadHandler? _vadHandler;
+  bool _isInitialized = false;
+  bool _isListening = false;
+
+  // Google Cloud STT 配置
+  // TODO: 替换为你的 API Key
+  static const String _googleApiKey = 'AIzaSyD7Dg_Goc5Z9c5LzjTCnhCxLuwCVQz89bk';
+  static const String _googleSttUrl = 'https://speech.googleapis.com/v1/speech:recognize';
+
+  // 回调
+  Function(String text)? _onResult;
+  Function(String text)? _onFinalResult;
+  VoidCallback? _onSpeechStart;
+  VoidCallback? _onSpeechEnd;
+
+  // 统计
+  int _speechSegmentCount = 0;
+  final Stopwatch _speechStopwatch = Stopwatch();
+
+  bool get isListening => _isListening;
+
+  Future<bool> init() async {
+    if (_isInitialized) return true;
+
+    try {
+      _vadHandler = VadHandler.create(isDebug: true);
+      
+      // 设置事件监听
+      _setupEventHandlers();
+      
+      _isInitialized = true;
+      debugPrint('✅ VadSttService initialized (Silero VAD + Google STT)');
+      return true;
+    } catch (e) {
+      debugPrint('❌ VadSttService init failed: $e');
+      return false;
+    }
+  }
+
+  void _setupEventHandlers() {
+    if (_vadHandler == null) return;
+
+    // 检测到开始说话（可能是误触发）
+    _vadHandler!.onSpeechStart.listen((_) {
+      debugPrint('🎤 [VAD] 检测到声音...');
+      _speechStopwatch.reset();
+      _speechStopwatch.start();
+    });
+
+    // 确认是真正说话（不是噪音误触发）
+    _vadHandler!.onRealSpeechStart.listen((_) {
+      _speechSegmentCount++;
+      debugPrint('');
+      debugPrint('═══════════════════════════════════════');
+      debugPrint('🎤 [VAD] 语音段 #$_speechSegmentCount 开始');
+      debugPrint('═══════════════════════════════════════');
+      
+      if (_onSpeechStart != null) _onSpeechStart!();
+    });
+
+    // 说话结束，获取音频数据
+    _vadHandler!.onSpeechEnd.listen((List<double> samples) async {
+      _speechStopwatch.stop();
+      final durationMs = _speechStopwatch.elapsedMilliseconds;
+      
+      final sampleCount = samples.length;
+      final estimatedBytes = sampleCount * 2; // 16-bit = 2 bytes per sample
+      
+      debugPrint('');
+      debugPrint('═══════════════════════════════════════');
+      debugPrint('🎤 [VAD] 语音段 #$_speechSegmentCount 结束');
+      debugPrint('   时长: ${durationMs}ms');
+      debugPrint('   采样点: $sampleCount');
+      debugPrint('   数据大小: ~${(estimatedBytes / 1024).toStringAsFixed(1)} KB');
+      debugPrint('═══════════════════════════════════════');
+      
+      if (_onSpeechEnd != null) _onSpeechEnd!();
+      
+      // 太短的语音忽略
+      if (durationMs < 500) {
+        debugPrint('⚠️ 语音太短，忽略');
+        return;
+      }
+      
+      // 发送给 Google STT
+      await _transcribeWithGoogle(samples, durationMs);
+    });
+
+    // 误触发（检测到声音但不是有效语音）
+    _vadHandler!.onVADMisfire.listen((_) {
+      _speechStopwatch.stop();
+      debugPrint('⚠️ [VAD] 误触发（不是有效语音），忽略');
+    });
+
+    // 错误
+    _vadHandler!.onError.listen((String message) {
+      debugPrint('❌ [VAD] 错误: $message');
+    });
+  }
+
+  /// 将 double 采样点转换为 16-bit PCM 字节
+  Uint8List _convertSamplesToBytes(List<double> samples) {
+    final bytes = Uint8List(samples.length * 2);
+    for (int i = 0; i < samples.length; i++) {
+      // 将 -1.0 ~ 1.0 转换为 -32768 ~ 32767
+      int sample = (samples[i] * 32767).clamp(-32768, 32767).toInt();
+      // Little endian
+      bytes[i * 2] = sample & 0xFF;
+      bytes[i * 2 + 1] = (sample >> 8) & 0xFF;
+    }
+    return bytes;
+  }
+
+  /// 发送给 Google Speech-to-Text API
+  Future<void> _transcribeWithGoogle(List<double> samples, int durationMs) async {
+    if (_googleApiKey == 'YOUR_GOOGLE_API_KEY') {
+      debugPrint('⚠️ [Google STT] 请先配置 API Key!');
+      debugPrint('📝 [模拟结果] 语音时长 ${durationMs}ms, 采样点 ${samples.length}');
+      
+      // 模拟返回结果
+      if (_onFinalResult != null) {
+        _onFinalResult!('[模拟: ${durationMs}ms 语音]');
+      }
+      return;
+    }
+    
+    debugPrint('📤 [Google STT] 发送音频数据...');
+    
+    try {
+      // 转换为 PCM 字节
+      final audioBytes = _convertSamplesToBytes(samples);
+      final audioBase64 = base64Encode(audioBytes);
+      
+      debugPrint('   音频大小: ${(audioBytes.length / 1024).toStringAsFixed(1)} KB');
+      
+      // 构建请求
+      final requestBody = {
+        'config': {
+          'encoding': 'LINEAR16',
+          'sampleRateHertz': 16000, // VAD 默认采样率
+          'languageCode': 'zh-CN', // 中文
+          'enableAutomaticPunctuation': true,
+          // 不指定 model，使用默认模型
+        },
+        'audio': {
+          'content': audioBase64,
+        },
+      };
+      
+      final stopwatch = Stopwatch()..start();
+      
+      final response = await http.post(
+        Uri.parse('$_googleSttUrl?key=$_googleApiKey'),
+        headers: {'Content-Type': 'application/json'},
+        body: jsonEncode(requestBody),
+      );
+      
+      stopwatch.stop();
+      debugPrint('   响应时间: ${stopwatch.elapsedMilliseconds}ms');
+      
+      if (response.statusCode == 200) {
+        final result = jsonDecode(response.body);
+        
+        if (result['results'] != null && (result['results'] as List).isNotEmpty) {
+          final transcript = result['results'][0]['alternatives'][0]['transcript'] as String;
+          final confidence = result['results'][0]['alternatives'][0]['confidence'] ?? 0.0;
+          
+          debugPrint('');
+          debugPrint('═══════════════════════════════════════');
+          debugPrint('📝 [Google STT] 识别结果:');
+          debugPrint('   "$transcript"');
+          debugPrint('   置信度: ${(confidence * 100).toStringAsFixed(1)}%');
+          debugPrint('═══════════════════════════════════════');
+          
+          if (_onFinalResult != null) {
+            _onFinalResult!(transcript);
+          }
+        } else {
+          debugPrint('⚠️ [Google STT] 没有识别到文字');
+        }
+      } else {
+        debugPrint('❌ [Google STT] 请求失败: ${response.statusCode}');
+        debugPrint('   ${response.body}');
+      }
+    } catch (e) {
+      debugPrint('❌ [Google STT] 错误: $e');
+    }
+  }
+
+  Future<void> startListening({
+    Function(String text)? onResult,
+    Function(String text)? onFinalResult,
+    VoidCallback? onSpeechStart,
+    VoidCallback? onSpeechEnd,
+  }) async {
+    if (!_isInitialized) {
+      bool success = await init();
+      if (!success) return;
+    }
+
+    if (_isListening) {
+      debugPrint('⚠️ [VAD] 已经在监听中');
+      return;
+    }
+
+    _onResult = onResult;
+    _onFinalResult = onFinalResult;
+    _onSpeechStart = onSpeechStart;
+    _onSpeechEnd = onSpeechEnd;
+    _speechSegmentCount = 0;
+
+    debugPrint('');
+    debugPrint('🎤 [VAD] 开始监听...');
+    
+    try {
+      await _vadHandler?.startListening();
+      _isListening = true;
+      debugPrint('✅ [VAD] 监听已启动，等待语音输入...');
+    } catch (e) {
+      debugPrint('❌ [VAD] 启动监听失败: $e');
+    }
+  }
+
+  Future<void> stopListening() async {
+    if (!_isListening) return;
+    
+    debugPrint('🛑 [VAD] 停止监听');
+    
+    try {
+      await _vadHandler?.stopListening();
+      _isListening = false;
+    } catch (e) {
+      debugPrint('❌ [VAD] 停止监听失败: $e');
+    }
+  }
+
+  void dispose() {
+    _vadHandler?.dispose();
+    _vadHandler = null;
+    _isInitialized = false;
+    _isListening = false;
+    debugPrint('🗑️ VadSttService disposed');
+  }
+}