feat:v1.0.0

This commit is contained in:
liqupan
2026-02-09 21:54:32 +08:00
parent 8f19377517
commit 68d25581e8
49 changed files with 1522 additions and 528 deletions

View File

@@ -1,5 +1,5 @@
# Uncomment this line to define a global platform for your project
# platform :ios, '13.0'
# VAD 插件需要 iOS 15.1+
platform :ios, '15.1'
# CocoaPods analytics sends network stats synchronously affecting flutter build latency.
ENV['COCOAPODS_DISABLE_STATS'] = 'true'

View File

@@ -1,17 +1,31 @@
PODS:
- app_links (6.4.1):
- Flutter
- audio_session (0.0.1):
- Flutter
- CwlCatchException (2.2.1):
- CwlCatchExceptionSupport (~> 2.2.1)
- CwlCatchExceptionSupport (2.2.1)
- Flutter (1.0.0)
- flutter_pcm_sound (0.0.1):
- Flutter
- flutter_tts (0.0.1):
- Flutter
- just_audio (0.0.1):
- Flutter
- FlutterMacOS
- onnxruntime-c (1.22.0)
- onnxruntime-objc (1.22.0):
- onnxruntime-objc/Core (= 1.22.0)
- onnxruntime-objc/Core (1.22.0):
- onnxruntime-c (= 1.22.0)
- path_provider_foundation (0.0.1):
- Flutter
- FlutterMacOS
- permission_handler_apple (9.3.0):
- Flutter
- record_ios (1.2.0):
- Flutter
- shared_preferences_foundation (0.0.1):
- Flutter
- FlutterMacOS
@@ -21,52 +35,79 @@ PODS:
- FlutterMacOS
- url_launcher_ios (0.0.1):
- Flutter
- vad (0.0.6):
- Flutter
- onnxruntime-objc (= 1.22.0)
DEPENDENCIES:
- app_links (from `.symlinks/plugins/app_links/ios`)
- audio_session (from `.symlinks/plugins/audio_session/ios`)
- Flutter (from `Flutter`)
- flutter_pcm_sound (from `.symlinks/plugins/flutter_pcm_sound/ios`)
- flutter_tts (from `.symlinks/plugins/flutter_tts/ios`)
- just_audio (from `.symlinks/plugins/just_audio/darwin`)
- path_provider_foundation (from `.symlinks/plugins/path_provider_foundation/darwin`)
- permission_handler_apple (from `.symlinks/plugins/permission_handler_apple/ios`)
- record_ios (from `.symlinks/plugins/record_ios/ios`)
- shared_preferences_foundation (from `.symlinks/plugins/shared_preferences_foundation/darwin`)
- speech_to_text (from `.symlinks/plugins/speech_to_text/darwin`)
- url_launcher_ios (from `.symlinks/plugins/url_launcher_ios/ios`)
- vad (from `.symlinks/plugins/vad/ios`)
SPEC REPOS:
trunk:
- CwlCatchException
- CwlCatchExceptionSupport
- onnxruntime-c
- onnxruntime-objc
EXTERNAL SOURCES:
app_links:
:path: ".symlinks/plugins/app_links/ios"
audio_session:
:path: ".symlinks/plugins/audio_session/ios"
Flutter:
:path: Flutter
flutter_pcm_sound:
:path: ".symlinks/plugins/flutter_pcm_sound/ios"
flutter_tts:
:path: ".symlinks/plugins/flutter_tts/ios"
just_audio:
:path: ".symlinks/plugins/just_audio/darwin"
path_provider_foundation:
:path: ".symlinks/plugins/path_provider_foundation/darwin"
permission_handler_apple:
:path: ".symlinks/plugins/permission_handler_apple/ios"
record_ios:
:path: ".symlinks/plugins/record_ios/ios"
shared_preferences_foundation:
:path: ".symlinks/plugins/shared_preferences_foundation/darwin"
speech_to_text:
:path: ".symlinks/plugins/speech_to_text/darwin"
url_launcher_ios:
:path: ".symlinks/plugins/url_launcher_ios/ios"
vad:
:path: ".symlinks/plugins/vad/ios"
SPEC CHECKSUMS:
app_links: 3dbc685f76b1693c66a6d9dd1e9ab6f73d97dc0a
audio_session: 9bb7f6c970f21241b19f5a3658097ae459681ba0
CwlCatchException: 7acc161b299a6de7f0a46a6ed741eae2c8b4d75a
CwlCatchExceptionSupport: 54ccab8d8c78907b57f99717fb19d4cc3bce02dc
Flutter: cabc95a1d2626b1b06e7179b784ebcf0c0cde467
flutter_pcm_sound: e9c2f6ce580eefcab2af46763f0354484d5c4ac8
flutter_tts: 35ac3c7d42412733e795ea96ad2d7e05d0a75113
just_audio: 4e391f57b79cad2b0674030a00453ca5ce817eed
onnxruntime-c: 7f778680e96145956c0a31945f260321eed2611a
onnxruntime-objc: 83d28b87525bd971259a66e153ea32b5d023de19
path_provider_foundation: bb55f6dbba17d0dccd6737fe6f7f34fbd0376880
permission_handler_apple: 4ed2196e43d0651e8ff7ca3483a069d469701f2d
record_ios: 412daca2350b228e698fffcd08f1f94ceb1e3844
shared_preferences_foundation: 7036424c3d8ec98dfe75ff1667cb0cd531ec82bb
speech_to_text: 3b313d98516d3d0406cea424782ec25470c59d19
url_launcher_ios: 7a95fa5b60cc718a708b8f2966718e93db0cef1b
vad: 7934867589afe53567f492df66fb1615f2185822
PODFILE CHECKSUM: 3c63482e143d1b91d2d2560aee9fb04ecc74ac7e
PODFILE CHECKSUM: 8af221031d17e57937852c3979a7d2c40538cf89
COCOAPODS: 1.16.2

View File

@@ -45,6 +45,26 @@
<string>We need access to your microphone for voice chat with AI characters.</string>
<key>NSSpeechRecognitionUsageDescription</key>
<string>We need speech recognition to convert your voice to text.</string>
<key>NSAppTransportSecurity</key>
<dict>
<key>NSExceptionDomains</key>
<dict>
<key>localhost</key>
<dict>
<key>NSExceptionAllowsInsecureHTTPLoads</key>
<true/>
<key>NSIncludesSubdomains</key>
<true/>
</dict>
<key>127.0.0.1</key>
<dict>
<key>NSExceptionAllowsInsecureHTTPLoads</key>
<true/>
<key>NSIncludesSubdomains</key>
<true/>
</dict>
</dict>
</dict>
<key>CADisableMinimumFrameDurationOnPhone</key>
<true/>
<key>UIApplicationSupportsIndirectInputEvents</key>

View File

@@ -0,0 +1,26 @@
/// MiniMax TTS 配置
///
/// 注意:客户端直连会暴露 API Key建议通过 --dart-define 注入并做轮换。
class MinimaxConfig {
MinimaxConfig._();
/// WebSocket endpoint
static const String wsUrl = 'wss://api.minimax.io/ws/v1/t2a_v2';
/// API Key (临时写死,用于本地调试)
static const String apiKey =
'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJHcm91cE5hbWUiOiJ2YmlvZGJkcCIsIlVzZXJOYW1lIjoidHNldCIsIkFjY291bnQiOiIiLCJTdWJqZWN0SUQiOiIxOTkyOTAyNTAzMzg5MjA1NDY3IiwiUGhvbmUiOiIiLCJHcm91cElEIjoiMTk5MjkwMjUwMzM4MDgyMDk1NSIsIlBhZ2VOYW1lIjoiIiwiTWFpbCI6InZiaW9kYmRwQGdtYWlsLmNvbSIsIkNyZWF0ZVRpbWUiOiIyMDI1LTEyLTA2IDE1OjQzOjUxIiwiVG9rZW5UeXBlIjoxLCJpc3MiOiJtaW5pbWF4In0.hf1M4cPe27Sz_QeSyYODqM6yrN8aQ68nRwYB7iQ3uO5nu0NSN7qHQRVxAt2tVuoOf503SEx5F-PfYyC85OFJFhWNNhhDuFuxPIz97LVz1oQUlIejZ_BmCMj4iWwGXTUmEugGK1lzcsI6eJz8eRjQHsxOgJJmxPLXWHTPs1gDqtnckAgjOBRQJSadP58Xe9EdI6n-2_SL_ni3Tqm3LuWq9tUPJa5WgDMZX9IDK7XXyZy0i1GoSXmp8P1O1JmIecBVUoCzyYFwWW787BNdYiyEV3UrFjC_4onJ8Tzh-eGq84-rtxBR5FKO2MpNU_I0xI-W3YJxOEl_JPXXGgX5ASTKNw';
/// 默认模型
static const String model = 'speech-2.6-turbo';
/// 默认音色MiniMax 系统音色 - 青涩青年音色)
static const String defaultVoiceId = 'Chinese (Mandarin)_BashfulGirl';
/// 默认音频参数 (PCM 格式用于流式播放)
static const int sampleRate = 32000;
static const int channels = 1;
static const String format = 'pcm';
static bool get isEnabled => apiKey.isNotEmpty;
}

View File

@@ -12,16 +12,31 @@ class STTService {
bool get isListening => _isListening;
// 回调
Function(String text)? _onResult;
Function(String text)? _onFinalResult;
Future<bool> init() async {
if (_isInitialized) return true;
try {
_isInitialized = await _speech.initialize(
onError: (error) => debugPrint('❌ STT Error: $error'),
onError: (error) {
debugPrint('⚠️ STT Error: ${error.errorMsg}');
// error_no_match 是常见的"没听到有效语音"错误
// 不应该中断整个流程
if (error.errorMsg == 'error_no_match') {
debugPrint(' (没有匹配到语音,忽略)');
}
},
onStatus: (status) {
debugPrint('🎤 STT Status: $status');
if (status == 'listening') _isListening = true;
if (status == 'notListening') _isListening = false;
if (status == 'listening') {
_isListening = true;
} else if (status == 'notListening' || status == 'done') {
_isListening = false;
}
},
);
debugPrint('✅ STT Initialized: $_isInitialized');
@@ -44,20 +59,28 @@ class STTService {
if (_isListening) await stop();
_onResult = onResult;
_onFinalResult = onFinalResult;
await _speech.listen(
onResult: (result) {
if (result.finalResult) {
onFinalResult(result.recognizedWords);
} else {
onResult(result.recognizedWords);
if (result.recognizedWords.isNotEmpty) {
if (result.finalResult) {
debugPrint('🎤 Final: "${result.recognizedWords}"');
_onFinalResult?.call(result.recognizedWords);
} else {
_onResult?.call(result.recognizedWords);
}
}
},
localeId: localeId,
listenFor: const Duration(seconds: 30),
pauseFor: const Duration(seconds: 3), // Wait 3s of silence to consider "done"
partialResults: true,
cancelOnError: true,
listenMode: ListenMode.dictation,
listenFor: const Duration(seconds: 60), // 最大监听时长
pauseFor: const Duration(milliseconds: 1500), // 1.5秒静音后视为说完
listenOptions: SpeechListenOptions(
partialResults: true,
cancelOnError: false, // 不要因错误取消
listenMode: ListenMode.dictation,
),
);
}

View File

@@ -0,0 +1,502 @@
import 'dart:async';
import 'dart:convert';
import 'package:flutter/foundation.dart';
import 'package:flutter_pcm_sound/flutter_pcm_sound.dart';
import 'package:web_socket_channel/web_socket_channel.dart';
import '../../config/minimax_config.dart';
import '../../models/character_model.dart';
import 'tts_engine.dart';
import 'ws_client.dart';
/// MiniMax TTS 引擎 (PCM 流式版本 - 带预连接优化)
///
/// 优化特性:
/// - 预连接:提前建立 WebSocket 连接
/// - 连接复用:一个连接处理多次 TTS 请求
/// - 自动重连:连接断开后自动重连
class MiniMaxTtsEngine implements TtsEngine {
WebSocketChannel? _channel;
StreamSubscription? _wsSub;
Timer? _timeoutTimer;
Timer? _keepAliveTimer;
bool _isInitialized = false;
bool _isDisposed = false;
bool _isCancelled = false;
bool _isPlaying = false;
// 连接状态
bool _isConnected = false;
bool _isTaskReady = false; // task_started 状态
Completer<void>? _connectCompleter;
Completer<void>? _speakCompleter;
// 当前任务的文本
String? _pendingText;
VoidCallback? _onStart;
VoidCallback? _onComplete;
Function(dynamic)? _onError;
// 统计信息
int _chunkCount = 0;
int _totalBytes = 0;
final Stopwatch _stopwatch = Stopwatch();
// 跟踪播放完成
bool _isFinalReceived = false;
@override
Future<void> init() async {
if (_isInitialized) return;
// 初始化 PCM 播放器
await FlutterPcmSound.setup(
sampleRate: MinimaxConfig.sampleRate,
channelCount: MinimaxConfig.channels,
);
FlutterPcmSound.setLogLevel(LogLevel.none);
FlutterPcmSound.setFeedCallback(_onFeedCallback);
_isInitialized = true;
debugPrint('✅ MiniMaxTtsEngine initialized (PCM streaming mode)');
}
/// 预连接 WebSocket
/// 在语音页面打开时调用,提前建立连接
@override
Future<void> preconnect() async {
if (!_isInitialized) await init();
if (_isConnected && _isTaskReady) {
debugPrint('⚡ TTS 连接已就绪,无需重连');
return;
}
debugPrint('🔌 TTS 预连接中...');
await _ensureConnection();
debugPrint('⚡ TTS 预连接完成,已就绪');
}
/// 确保 WebSocket 连接已建立且处于就绪状态
Future<void> _ensureConnection() async {
if (_isConnected && _isTaskReady) return;
// 清理旧连接
await _cleanupConnection();
_connectCompleter = Completer<void>();
try {
final channel = connectTtsSocket(
Uri.parse(MinimaxConfig.wsUrl),
{
'Authorization': 'Bearer ${MinimaxConfig.apiKey}',
},
);
_channel = channel;
// 监听消息
_wsSub = channel.stream.listen(
_handleConnectionMessage,
onError: (error) {
debugPrint('❌ TTS WebSocket 错误: $error');
_handleDisconnect();
},
onDone: () {
debugPrint('⚠️ TTS WebSocket 连接关闭');
_handleDisconnect();
},
cancelOnError: false,
);
// 等待连接就绪connected_success + task_started
// 设置超时
_timeoutTimer?.cancel();
_timeoutTimer = Timer(const Duration(seconds: 10), () {
if (_connectCompleter != null && !_connectCompleter!.isCompleted) {
_connectCompleter!.completeError('连接超时');
_handleDisconnect();
}
});
await _connectCompleter!.future;
_timeoutTimer?.cancel();
// 启动保活
_startKeepAlive();
} catch (e) {
debugPrint('❌ TTS 连接失败: $e');
_handleDisconnect();
rethrow;
}
}
void _handleConnectionMessage(dynamic message) {
if (_isDisposed) return;
try {
final Map<String, dynamic> data = jsonDecode(message as String);
final String? event = data['event'] as String?;
if (event == 'connected_success') {
debugPrint('📥 TTS connected_success');
_isConnected = true;
// 发送 task_start 进入就绪状态
_sendTaskStart();
return;
}
if (event == 'task_started') {
debugPrint('📥 TTS task_started (就绪)');
_isTaskReady = true;
// 如果是预连接,完成连接
if (_connectCompleter != null && !_connectCompleter!.isCompleted) {
_connectCompleter!.complete();
}
// 如果有待发送的文本,立即发送
if (_pendingText != null) {
_sendText(_pendingText!);
_pendingText = null;
}
return;
}
if (event == 'task_continued') {
// 正常的流式响应
}
if (event == 'task_failed') {
final errorInfo = data['base_resp'] ?? data;
debugPrint('❌ TTS task_failed: ${jsonEncode(errorInfo)}');
_handleTaskError('task_failed: ${jsonEncode(errorInfo)}');
return;
}
// 处理音频数据
if (data['data'] != null && data['data'] is Map<String, dynamic>) {
final audioData = data['data'] as Map<String, dynamic>;
final audioHex = audioData['audio'];
if (audioHex is String && audioHex.isNotEmpty) {
final bytes = _hexToBytes(audioHex);
_chunkCount++;
_totalBytes += bytes.length;
_feedAudioData(bytes);
if (_chunkCount % 20 == 0) {
debugPrint('📥 已接收 $_chunkCount 块 ($_totalBytes bytes)');
}
}
}
// 任务完成
if (data['is_final'] == true) {
_handleTaskComplete();
}
} catch (e) {
debugPrint('❌ TTS 消息解析错误: $e');
}
}
void _sendTaskStart() {
if (_channel == null) return;
final payload = {
'event': 'task_start',
'model': MinimaxConfig.model,
'voice_setting': {
'voice_id': MinimaxConfig.defaultVoiceId,
'speed': 1,
'vol': 1,
'pitch': 0,
'english_normalization': false,
},
'audio_setting': {
'sample_rate': MinimaxConfig.sampleRate,
'format': MinimaxConfig.format,
'channel': MinimaxConfig.channels,
},
};
_channel!.sink.add(jsonEncode(payload));
}
void _sendText(String text) {
if (_channel == null || !_isTaskReady) return;
debugPrint('📤 发送文本: "$text"');
_channel!.sink.add(jsonEncode({
'event': 'task_continue',
'text': text,
}));
_isPlaying = true;
}
void _handleTaskComplete() {
_stopwatch.stop();
_isFinalReceived = true;
_isTaskReady = false;
debugPrint('');
debugPrint('═══════════════════════════════════════');
debugPrint('📊 TTS 完成: $_chunkCount 块, $_totalBytes bytes, ${_stopwatch.elapsedMilliseconds}ms');
debugPrint('═══════════════════════════════════════');
// 发送 task_finish
_channel?.sink.add(jsonEncode({'event': 'task_finish'}));
// 完成当前任务
if (_speakCompleter != null && !_speakCompleter!.isCompleted) {
_speakCompleter!.complete();
}
// 准备下一次任务:重新发送 task_start
Future.delayed(const Duration(milliseconds: 100), () {
if (_isConnected && !_isDisposed && !_isCancelled) {
_sendTaskStart();
}
});
}
void _handleTaskError(String error) {
_isTaskReady = false;
if (_speakCompleter != null && !_speakCompleter!.isCompleted) {
_speakCompleter!.completeError(TtsEngineException(error));
}
if (_onError != null) _onError!(error);
// 尝试重新进入就绪状态
Future.delayed(const Duration(milliseconds: 500), () {
if (_isConnected && !_isDisposed) {
_sendTaskStart();
}
});
}
void _handleDisconnect() {
final wasConnected = _isConnected;
_isConnected = false;
_isTaskReady = false;
_keepAliveTimer?.cancel();
if (_connectCompleter != null && !_connectCompleter!.isCompleted) {
_connectCompleter!.completeError('连接断开');
}
if (_speakCompleter != null && !_speakCompleter!.isCompleted) {
_speakCompleter!.completeError(const TtsEngineException('连接断开'));
}
// 如果之前是连接状态,静默自动重连(为下次 TTS 做准备)
if (wasConnected && !_isDisposed && !_isCancelled) {
Future.delayed(const Duration(milliseconds: 500), () {
if (!_isDisposed && !_isCancelled) {
debugPrint('🔄 TTS 自动重连...');
_ensureConnection().then((_) {
debugPrint('⚡ TTS 重连成功');
}).catchError((e) {
debugPrint('⚠️ TTS 重连失败: $e (下次 speak 时会重试)');
});
}
});
}
}
void _startKeepAlive() {
_keepAliveTimer?.cancel();
// 每 30 秒检查连接状态
_keepAliveTimer = Timer.periodic(const Duration(seconds: 30), (_) {
if (!_isConnected && !_isDisposed) {
debugPrint('🔄 TTS 重连中...');
_ensureConnection().catchError((e) {
debugPrint('❌ TTS 重连失败: $e');
});
}
});
}
void _onFeedCallback(int remainingFrames) {
if (_isFinalReceived && remainingFrames == 0 && _isPlaying) {
_isPlaying = false;
debugPrint('🔊 PCM 播放完成');
if (_onComplete != null) {
_onComplete!();
}
}
}
@override
Future<void> speak(
String text, {
AiVoiceConfig? voiceConfig,
}) async {
if (!_isInitialized) await init();
if (_isDisposed) return;
final trimmed = text.trim();
if (trimmed.isEmpty) return;
if (!MinimaxConfig.isEnabled) {
throw const TtsEngineException('MiniMax API key is missing');
}
// 重置状态
_isCancelled = false;
_isFinalReceived = false;
_chunkCount = 0;
_totalBytes = 0;
_stopwatch.reset();
_stopwatch.start();
debugPrint('');
debugPrint('═══════════════════════════════════════');
debugPrint('🎤 TTS: "$trimmed"');
debugPrint('═══════════════════════════════════════');
_speakCompleter = Completer<void>();
try {
// 确保连接已就绪
if (!_isConnected || !_isTaskReady) {
debugPrint('⏳ 等待连接就绪...');
await _ensureConnection();
}
// 发送文本
if (_isTaskReady) {
_sendText(trimmed);
if (_onStart != null) _onStart!();
} else {
// 连接还在建立中,设置待发送文本
_pendingText = trimmed;
if (_onStart != null) _onStart!();
}
// 设置超时
_timeoutTimer?.cancel();
_timeoutTimer = Timer(const Duration(seconds: 30), () {
if (_speakCompleter != null && !_speakCompleter!.isCompleted) {
_handleTaskError('TTS 超时');
}
});
// 等待任务完成
await _speakCompleter!.future;
_timeoutTimer?.cancel();
} catch (e) {
debugPrint('❌ TTS 异常: $e');
if (_onError != null) _onError!(e);
rethrow;
}
}
void _feedAudioData(Uint8List pcmBytes) {
if (_isCancelled || _isDisposed) return;
final int16List = <int>[];
for (var i = 0; i < pcmBytes.length - 1; i += 2) {
final int16Value = (pcmBytes[i + 1] << 8) | pcmBytes[i];
final signed = int16Value >= 32768 ? int16Value - 65536 : int16Value;
int16List.add(signed);
}
FlutterPcmSound.feed(PcmArrayInt16.fromList(int16List));
}
Uint8List _hexToBytes(String hex) {
final cleaned = hex.trim();
final len = cleaned.length;
if (len == 0) return Uint8List(0);
if (len % 2 != 0) {
throw const FormatException('Invalid hex string length');
}
final bytes = Uint8List(len ~/ 2);
for (var i = 0; i < len; i += 2) {
bytes[i ~/ 2] = int.parse(cleaned.substring(i, i + 2), radix: 16);
}
return bytes;
}
Future<void> _cleanupConnection() async {
_wsSub?.cancel();
_wsSub = null;
_channel?.sink.close();
_channel = null;
_isConnected = false;
_isTaskReady = false;
}
@override
Future<void> stop() async {
if (_isDisposed) return;
_isCancelled = true;
_isPlaying = false;
_isFinalReceived = false;
_pendingText = null;
_timeoutTimer?.cancel();
// 停止 PCM 播放器
await FlutterPcmSound.release();
// 重新设置播放器
if (_isInitialized) {
await FlutterPcmSound.setup(
sampleRate: MinimaxConfig.sampleRate,
channelCount: MinimaxConfig.channels,
);
FlutterPcmSound.setFeedCallback(_onFeedCallback);
}
// 注意:不断开 WebSocket 连接,保持复用
// 如果有进行中的任务,发送 task_finish
if (_isTaskReady && _channel != null) {
_channel!.sink.add(jsonEncode({'event': 'task_finish'}));
_isTaskReady = false;
// 准备下一次任务
Future.delayed(const Duration(milliseconds: 100), () {
if (_isConnected && !_isDisposed) {
_sendTaskStart();
}
});
}
debugPrint('🛑 TTS 已停止');
}
@override
void setCompletionHandler(VoidCallback handler) {
_onComplete = handler;
}
@override
void setErrorHandler(Function(dynamic) handler) {
_onError = handler;
}
@override
void setStartHandler(VoidCallback handler) {
_onStart = handler;
}
@override
void dispose() {
_isDisposed = true;
_keepAliveTimer?.cancel();
_timeoutTimer?.cancel();
_cleanupConnection();
FlutterPcmSound.release();
debugPrint('🗑️ MiniMaxTtsEngine disposed');
}
}

View File

@@ -0,0 +1,88 @@
import 'package:flutter/foundation.dart';
import 'package:flutter_tts/flutter_tts.dart';
import '../../models/character_model.dart';
import 'tts_engine.dart';
class SystemTtsEngine implements TtsEngine {
final FlutterTts _flutterTts = FlutterTts();
bool _isInitialized = false;
@override
Future<void> init() async {
if (_isInitialized) return;
try {
if (!kIsWeb) {
if (defaultTargetPlatform == TargetPlatform.iOS) {
await _flutterTts.setSharedInstance(true);
await _flutterTts.setIosAudioCategory(
IosTextToSpeechAudioCategory.playAndRecord,
[
IosTextToSpeechAudioCategoryOptions.allowBluetooth,
IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
IosTextToSpeechAudioCategoryOptions.mixWithOthers,
IosTextToSpeechAudioCategoryOptions.defaultToSpeaker,
],
IosTextToSpeechAudioMode.defaultMode,
);
}
}
await _flutterTts.setLanguage("zh-CN");
await _flutterTts.setPitch(1.0);
await _flutterTts.setSpeechRate(0.5);
_isInitialized = true;
debugPrint('✅ SystemTtsEngine initialized');
} catch (e) {
debugPrint('❌ SystemTtsEngine init error: $e');
}
}
@override
Future<void> preconnect() async {
// 系统 TTS 不需要预连接
}
@override
Future<void> speak(
String text, {
AiVoiceConfig? voiceConfig,
}) async {
if (!_isInitialized) await init();
if (text.trim().isEmpty) return;
if (voiceConfig != null) {
await _flutterTts.setSpeechRate(voiceConfig.speed.clamp(0.2, 2.0));
await _flutterTts.setPitch(voiceConfig.pitch.clamp(0.5, 2.0));
}
debugPrint('🗣️ SystemTtsEngine Speaking: $text');
await _flutterTts.speak(text);
}
@override
Future<void> stop() async {
await _flutterTts.stop();
}
@override
void setCompletionHandler(VoidCallback handler) {
_flutterTts.setCompletionHandler(handler);
}
@override
void setStartHandler(VoidCallback handler) {
_flutterTts.setStartHandler(handler);
}
@override
void setErrorHandler(Function(dynamic) handler) {
_flutterTts.setErrorHandler(handler);
}
@override
void dispose() {
_flutterTts.stop();
}
}

View File

@@ -0,0 +1,37 @@
import 'package:flutter/foundation.dart';
import '../../models/character_model.dart';
abstract class TtsEngine {
Future<void> init();
/// 预连接(可选实现,用于提前建立连接)
Future<void> preconnect() async {}
Future<void> speak(
String text, {
AiVoiceConfig? voiceConfig,
});
Future<void> stop();
void setStartHandler(VoidCallback handler);
void setCompletionHandler(VoidCallback handler);
void setErrorHandler(Function(dynamic) handler);
void dispose();
}
class TtsEngineException implements Exception {
final String message;
final bool isCancelled;
const TtsEngineException(this.message, {this.isCancelled = false});
factory TtsEngineException.cancelled() =>
const TtsEngineException('cancelled', isCancelled: true);
@override
String toString() => 'TtsEngineException($message)';
}

View File

@@ -0,0 +1,6 @@
import 'package:web_socket_channel/web_socket_channel.dart';
import 'ws_client_io.dart' if (dart.library.html) 'ws_client_web.dart';
WebSocketChannel connectTtsSocket(Uri uri, Map<String, String> headers) {
return createWebSocketChannel(uri, headers);
}

View File

@@ -0,0 +1,9 @@
import 'package:web_socket_channel/io.dart';
import 'package:web_socket_channel/web_socket_channel.dart';
WebSocketChannel createWebSocketChannel(Uri uri, Map<String, String> headers) {
return IOWebSocketChannel.connect(
uri,
headers: headers,
);
}

View File

@@ -0,0 +1,6 @@
import 'package:web_socket_channel/web_socket_channel.dart';
WebSocketChannel createWebSocketChannel(Uri uri, Map<String, String> headers) {
// WebSocket in browser doesn't support custom headers.
return WebSocketChannel.connect(uri);
}

View File

@@ -1,65 +1,115 @@
import 'package:flutter_tts/flutter_tts.dart';
import 'package:flutter/foundation.dart';
import '../config/minimax_config.dart';
import '../models/character_model.dart';
import 'tts/minimax_tts_engine.dart';
import 'tts/system_tts_engine.dart';
import 'tts/tts_engine.dart';
class TTSService {
static final TTSService _instance = TTSService._internal();
factory TTSService() => _instance;
TTSService._internal();
final FlutterTts _flutterTts = FlutterTts();
final TtsEngine _minimaxEngine = MiniMaxTtsEngine();
final TtsEngine _systemEngine = SystemTtsEngine();
TtsEngine? _activeEngine;
bool _isInitialized = false;
VoidCallback? _onStart;
VoidCallback? _onComplete;
Function(dynamic)? _onError;
Future<void> init() async {
if (_isInitialized) return;
try {
if (!kIsWeb) {
if (defaultTargetPlatform == TargetPlatform.iOS) {
await _flutterTts.setSharedInstance(true);
await _flutterTts.setIosAudioCategory(
IosTextToSpeechAudioCategory.playAndRecord,
[
IosTextToSpeechAudioCategoryOptions.allowBluetooth,
IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
IosTextToSpeechAudioCategoryOptions.mixWithOthers,
IosTextToSpeechAudioCategoryOptions.defaultToSpeaker
],
IosTextToSpeechAudioMode.defaultMode);
}
}
await _flutterTts.setLanguage("zh-CN"); // Default to Chinese
await _flutterTts.setPitch(1.0);
await _flutterTts.setSpeechRate(0.5); // Normal rate
_activeEngine = _selectEngine();
_applyHandlers(_activeEngine);
await _activeEngine!.init();
_isInitialized = true;
debugPrint('✅ TTSService initialized');
debugPrint('✅ TTSService initialized (${_activeEngine.runtimeType})');
} catch (e) {
debugPrint('❌ TTSService init error: $e');
}
}
Future<void> speak(String text) async {
Future<void> speak(String text, {AiVoiceConfig? voiceConfig}) async {
if (!_isInitialized) await init();
if (text.isEmpty) return;
if (_activeEngine == null) {
_activeEngine = _selectEngine();
_applyHandlers(_activeEngine);
await _activeEngine!.init();
}
debugPrint('🗣️ TTS Speaking: $text');
await _flutterTts.speak(text);
if (_activeEngine is MiniMaxTtsEngine) {
try {
await _activeEngine!.speak(text, voiceConfig: voiceConfig);
return;
} catch (e) {
debugPrint('⚠️ MiniMax TTS failed, falling back to system TTS: $e');
await _fallbackSpeak(text, voiceConfig: voiceConfig);
return;
}
}
await _activeEngine!.speak(text, voiceConfig: voiceConfig);
}
/// 预连接 TTS 服务(在语音页面打开时调用)
Future<void> preconnect() async {
if (!_isInitialized) await init();
await _activeEngine?.preconnect();
}
Future<void> stop() async {
await _flutterTts.stop();
await _activeEngine?.stop();
}
void setCompletionHandler(VoidCallback handler) {
_flutterTts.setCompletionHandler(handler);
_onComplete = handler;
_minimaxEngine.setCompletionHandler(handler);
_systemEngine.setCompletionHandler(handler);
}
void setStartHandler(VoidCallback handler) {
_flutterTts.setStartHandler(handler);
_onStart = handler;
_minimaxEngine.setStartHandler(handler);
_systemEngine.setStartHandler(handler);
}
void setErrorHandler(Function(dynamic) handler) {
_flutterTts.setErrorHandler(handler);
_onError = handler;
_minimaxEngine.setErrorHandler(handler);
_systemEngine.setErrorHandler(handler);
}
TtsEngine _selectEngine() {
if (MinimaxConfig.isEnabled && !kIsWeb) {
return _minimaxEngine;
}
return _systemEngine;
}
Future<void> _fallbackSpeak(String text, {AiVoiceConfig? voiceConfig}) async {
try {
_activeEngine = _systemEngine;
_applyHandlers(_activeEngine);
await _activeEngine!.init();
await _activeEngine!.speak(text, voiceConfig: voiceConfig);
} catch (e) {
debugPrint('❌ System TTS failed: $e');
if (_onError != null) _onError!(e);
}
}
void _applyHandlers(TtsEngine? engine) {
if (engine == null) return;
if (_onStart != null) engine.setStartHandler(_onStart!);
if (_onComplete != null) engine.setCompletionHandler(_onComplete!);
if (_onError != null) engine.setErrorHandler(_onError!);
}
}

View File

@@ -0,0 +1,258 @@
import 'dart:convert';
import 'package:flutter/foundation.dart';
import 'package:http/http.dart' as http;
import 'package:vad/vad.dart';
/// VAD (Voice Activity Detection) + Google Speech-to-Text 服务
///
/// 使用 Silero VAD 模型检测语音开始/结束
/// 然后将录制的音频发送给 Google Cloud Speech-to-Text API
class VadSttService {
static final VadSttService _instance = VadSttService._internal();
factory VadSttService() => _instance;
VadSttService._internal();
VadHandler? _vadHandler;
bool _isInitialized = false;
bool _isListening = false;
// Google Cloud STT 配置
// TODO: 替换为你的 API Key
static const String _googleApiKey = 'AIzaSyD7Dg_Goc5Z9c5LzjTCnhCxLuwCVQz89bk';
static const String _googleSttUrl = 'https://speech.googleapis.com/v1/speech:recognize';
// 回调
Function(String text)? _onResult;
Function(String text)? _onFinalResult;
VoidCallback? _onSpeechStart;
VoidCallback? _onSpeechEnd;
// 统计
int _speechSegmentCount = 0;
final Stopwatch _speechStopwatch = Stopwatch();
bool get isListening => _isListening;
Future<bool> init() async {
if (_isInitialized) return true;
try {
_vadHandler = VadHandler.create(isDebug: true);
// 设置事件监听
_setupEventHandlers();
_isInitialized = true;
debugPrint('✅ VadSttService initialized (Silero VAD + Google STT)');
return true;
} catch (e) {
debugPrint('❌ VadSttService init failed: $e');
return false;
}
}
void _setupEventHandlers() {
if (_vadHandler == null) return;
// 检测到开始说话(可能是误触发)
_vadHandler!.onSpeechStart.listen((_) {
debugPrint('🎤 [VAD] 检测到声音...');
_speechStopwatch.reset();
_speechStopwatch.start();
});
// 确认是真正说话(不是噪音误触发)
_vadHandler!.onRealSpeechStart.listen((_) {
_speechSegmentCount++;
debugPrint('');
debugPrint('═══════════════════════════════════════');
debugPrint('🎤 [VAD] 语音段 #$_speechSegmentCount 开始');
debugPrint('═══════════════════════════════════════');
if (_onSpeechStart != null) _onSpeechStart!();
});
// 说话结束,获取音频数据
_vadHandler!.onSpeechEnd.listen((List<double> samples) async {
_speechStopwatch.stop();
final durationMs = _speechStopwatch.elapsedMilliseconds;
final sampleCount = samples.length;
final estimatedBytes = sampleCount * 2; // 16-bit = 2 bytes per sample
debugPrint('');
debugPrint('═══════════════════════════════════════');
debugPrint('🎤 [VAD] 语音段 #$_speechSegmentCount 结束');
debugPrint(' 时长: ${durationMs}ms');
debugPrint(' 采样点: $sampleCount');
debugPrint(' 数据大小: ~${(estimatedBytes / 1024).toStringAsFixed(1)} KB');
debugPrint('═══════════════════════════════════════');
if (_onSpeechEnd != null) _onSpeechEnd!();
// 太短的语音忽略
if (durationMs < 500) {
debugPrint('⚠️ 语音太短,忽略');
return;
}
// 发送给 Google STT
await _transcribeWithGoogle(samples, durationMs);
});
// 误触发(检测到声音但不是有效语音)
_vadHandler!.onVADMisfire.listen((_) {
_speechStopwatch.stop();
debugPrint('⚠️ [VAD] 误触发(不是有效语音),忽略');
});
// 错误
_vadHandler!.onError.listen((String message) {
debugPrint('❌ [VAD] 错误: $message');
});
}
/// 将 double 采样点转换为 16-bit PCM 字节
Uint8List _convertSamplesToBytes(List<double> samples) {
final bytes = Uint8List(samples.length * 2);
for (int i = 0; i < samples.length; i++) {
// 将 -1.0 ~ 1.0 转换为 -32768 ~ 32767
int sample = (samples[i] * 32767).clamp(-32768, 32767).toInt();
// Little endian
bytes[i * 2] = sample & 0xFF;
bytes[i * 2 + 1] = (sample >> 8) & 0xFF;
}
return bytes;
}
/// 发送给 Google Speech-to-Text API
Future<void> _transcribeWithGoogle(List<double> samples, int durationMs) async {
if (_googleApiKey == 'YOUR_GOOGLE_API_KEY') {
debugPrint('⚠️ [Google STT] 请先配置 API Key!');
debugPrint('📝 [模拟结果] 语音时长 ${durationMs}ms, 采样点 ${samples.length}');
// 模拟返回结果
if (_onFinalResult != null) {
_onFinalResult!('[模拟: ${durationMs}ms 语音]');
}
return;
}
debugPrint('📤 [Google STT] 发送音频数据...');
try {
// 转换为 PCM 字节
final audioBytes = _convertSamplesToBytes(samples);
final audioBase64 = base64Encode(audioBytes);
debugPrint(' 音频大小: ${(audioBytes.length / 1024).toStringAsFixed(1)} KB');
// 构建请求
final requestBody = {
'config': {
'encoding': 'LINEAR16',
'sampleRateHertz': 16000, // VAD 默认采样率
'languageCode': 'zh-CN', // 中文
'enableAutomaticPunctuation': true,
// 不指定 model使用默认模型
},
'audio': {
'content': audioBase64,
},
};
final stopwatch = Stopwatch()..start();
final response = await http.post(
Uri.parse('$_googleSttUrl?key=$_googleApiKey'),
headers: {'Content-Type': 'application/json'},
body: jsonEncode(requestBody),
);
stopwatch.stop();
debugPrint(' 响应时间: ${stopwatch.elapsedMilliseconds}ms');
if (response.statusCode == 200) {
final result = jsonDecode(response.body);
if (result['results'] != null && (result['results'] as List).isNotEmpty) {
final transcript = result['results'][0]['alternatives'][0]['transcript'] as String;
final confidence = result['results'][0]['alternatives'][0]['confidence'] ?? 0.0;
debugPrint('');
debugPrint('═══════════════════════════════════════');
debugPrint('📝 [Google STT] 识别结果:');
debugPrint(' "$transcript"');
debugPrint(' 置信度: ${(confidence * 100).toStringAsFixed(1)}%');
debugPrint('═══════════════════════════════════════');
if (_onFinalResult != null) {
_onFinalResult!(transcript);
}
} else {
debugPrint('⚠️ [Google STT] 没有识别到文字');
}
} else {
debugPrint('❌ [Google STT] 请求失败: ${response.statusCode}');
debugPrint(' ${response.body}');
}
} catch (e) {
debugPrint('❌ [Google STT] 错误: $e');
}
}
Future<void> startListening({
Function(String text)? onResult,
Function(String text)? onFinalResult,
VoidCallback? onSpeechStart,
VoidCallback? onSpeechEnd,
}) async {
if (!_isInitialized) {
bool success = await init();
if (!success) return;
}
if (_isListening) {
debugPrint('⚠️ [VAD] 已经在监听中');
return;
}
_onResult = onResult;
_onFinalResult = onFinalResult;
_onSpeechStart = onSpeechStart;
_onSpeechEnd = onSpeechEnd;
_speechSegmentCount = 0;
debugPrint('');
debugPrint('🎤 [VAD] 开始监听...');
try {
await _vadHandler?.startListening();
_isListening = true;
debugPrint('✅ [VAD] 监听已启动,等待语音输入...');
} catch (e) {
debugPrint('❌ [VAD] 启动监听失败: $e');
}
}
Future<void> stopListening() async {
if (!_isListening) return;
debugPrint('🛑 [VAD] 停止监听');
try {
await _vadHandler?.stopListening();
_isListening = false;
} catch (e) {
debugPrint('❌ [VAD] 停止监听失败: $e');
}
}
void dispose() {
_vadHandler?.dispose();
_vadHandler = null;
_isInitialized = false;
_isListening = false;
debugPrint('🗑️ VadSttService disposed');
}
}

View File

@@ -1,6 +1,7 @@
import 'dart:async';
import 'package:flutter/foundation.dart';
import '../../core/core.dart';
import '../../core/services/vad_stt_service.dart';
import 'package:permission_handler/permission_handler.dart';
enum VoiceState {
@@ -25,8 +26,8 @@ class VoiceSessionController extends ChangeNotifier {
String _aiTypingText = '';
bool _isMicMuted = false;
// Services
final STTService _stt = STTService();
// Services - 使用 VAD 替代系统 STT
final VadSttService _vad = VadSttService();
final TTSService _tts = TTSService();
// State getters
@@ -35,9 +36,7 @@ class VoiceSessionController extends ChangeNotifier {
String get aiTypingText => _aiTypingText;
bool get isMicMuted => _isMicMuted;
// Buffer for sentence completion
String _sentenceBuffer = '';
final List<String> _punctuation = ['', '', '', '.', '?', '!', '\n'];
VoiceSessionController({
required this.character,
@@ -52,13 +51,15 @@ class VoiceSessionController extends ChangeNotifier {
await [Permission.microphone, Permission.speech].request();
// Init services
await _stt.init();
await _vad.init();
await _tts.init();
// 预连接 TTS WebSocket减少首次 TTS 延迟)
_tts.preconnect();
// Setup TTS callbacks
_tts.setStartHandler(() {
debugPrint('🔊 TTS Started');
// Already paused STT in _processSpeakQueue
});
_tts.setCompletionHandler(() {
@@ -96,7 +97,7 @@ class VoiceSessionController extends ChangeNotifier {
_state = VoiceState.listening;
_recognizedText = '';
_lastProcessedLength = 0;
notifyListeners();
// Stop TTS if it's playing (Interruption)
@@ -106,7 +107,18 @@ class VoiceSessionController extends ChangeNotifier {
_isSpeaking = false;
}
await _stt.listen(
// 使用 VAD 监听
await _vad.startListening(
onSpeechStart: () {
// 用户开始说话
_recognizedText = 'Listening...';
notifyListeners();
},
onSpeechEnd: () {
// 用户说完了,等待 STT 处理
_recognizedText = 'Processing...';
notifyListeners();
},
onResult: (text) {
_recognizedText = text;
notifyListeners();
@@ -114,14 +126,14 @@ class VoiceSessionController extends ChangeNotifier {
onFinalResult: (text) {
_recognizedText = text;
notifyListeners();
// 发送给 LLM 处理
_processUserMessage(text);
},
localeId: 'zh-CN', // Make dynamic later if needed
);
}
Future<void> stopListening() async {
await _stt.stop();
await _vad.stopListening();
}
Future<void> _processUserMessage(String text) async {
@@ -131,6 +143,8 @@ class VoiceSessionController extends ChangeNotifier {
return;
}
_state = VoiceState.processing;
onUserMessage(text); // Notify UI to show user message
notifyListeners();
@@ -156,37 +170,31 @@ class VoiceSessionController extends ChangeNotifier {
}
_aiTypingText = '';
_sentenceBuffer = '';
_lastProcessedLength = 0;
try {
final fullResponse = await ChatService.sendMessage(
character: character,
messages: messages,
userMessage: text, // ChatService handles appending this if we use the right method
userMessage: text,
onStream: (content) {
_aiTypingText = content;
_processStreamChunk(content);
notifyListeners();
},
);
// Process any remaining text in buffer
if (_sentenceBuffer.isNotEmpty) {
// Interaction finished, save AI message
final aiMsg = ChatMessage.assistant(fullResponse);
onAiMessage(aiMsg);
// Filter emojis and speak full text
final textToSpeak = _filterEmojis(fullResponse);
if (textToSpeak.isNotEmpty) {
if (_state != VoiceState.speaking) {
_state = VoiceState.speaking;
notifyListeners();
}
await _speak(_sentenceBuffer);
await _speak(textToSpeak);
}
// Interaction finished, save AI message
final aiMsg = ChatMessage.assistant(fullResponse);
onAiMessage(aiMsg);
// Note: We do NOT immediately startListening here.
// We rely on the TTS Completion Handler to trigger startListening
// when the entire queue is drained.
} catch (e) {
debugPrint('❌ Voice Process Error: $e');
@@ -197,65 +205,12 @@ class VoiceSessionController extends ChangeNotifier {
}
}
// Better implementation needs to handle state to avoid infinite loops
int _lastProcessedLength = 0;
void _processStreamChunk(String content) {
if (_state != VoiceState.speaking) {
_state = VoiceState.speaking;
notifyListeners();
}
// Calculate delta (new content only)
if (content.length <= _lastProcessedLength) return;
String delta = content.substring(_lastProcessedLength);
_lastProcessedLength = content.length;
_sentenceBuffer += delta;
// Check for punctuation to split sentences
bool foundPunctuation = false;
for (var p in _punctuation) {
if (_sentenceBuffer.contains(p)) {
foundPunctuation = true;
break;
}
}
if (foundPunctuation) {
_processBufferForSentences();
}
}
void _processBufferForSentences() {
String tempBuffer = _sentenceBuffer;
String keepBuffer = '';
// Simple tokenizer: split by punctuation but keep the punctuation attached to the sentence
// This is a naive implementation.
// "Hello! How are you?" -> ["Hello!", "How are you?"]
// We iterate through chars to find split points
int lastSplitIndex = 0;
for (int i = 0; i < tempBuffer.length; i++) {
String char = tempBuffer[i];
if (_punctuation.contains(char)) {
// Found end of a sentence
String sentence = tempBuffer.substring(lastSplitIndex, i + 1);
if (sentence.trim().isNotEmpty) {
_speak(sentence);
}
lastSplitIndex = i + 1;
}
}
// Keep the remaining part that didn't end with punctuation
if (lastSplitIndex < tempBuffer.length) {
keepBuffer = tempBuffer.substring(lastSplitIndex);
}
_sentenceBuffer = keepBuffer;
String _filterEmojis(String text) {
// Regex matches common emoji ranges
final RegExp emojiRegex = RegExp(
r'(\u00a9|\u00ae|[\u2000-\u3300]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff])'
);
return text.replaceAll(emojiRegex, '').trim();
}
Future<void> _speak(String text) async {
@@ -275,9 +230,10 @@ class VoiceSessionController extends ChangeNotifier {
debugPrint('🎤 Queue empty, resuming listening...');
_state = VoiceState.listening;
notifyListeners();
// Debounce STT restart to avoid rapid stop/start deadlocks
// 延迟启动 STT让音频会话有时间从播放切换到录音
_silenceTimer?.cancel();
_silenceTimer = Timer(const Duration(milliseconds: 250), () {
_silenceTimer = Timer(const Duration(milliseconds: 800), () {
debugPrint('🎤 延迟后启动 STT...');
startListening();
});
}
@@ -291,12 +247,12 @@ class VoiceSessionController extends ChangeNotifier {
// Ensure STT is paused while speaking
await stopListening();
await _tts.speak(text);
await _tts.speak(text, voiceConfig: character.aiVoiceConfig);
}
@override
void dispose() {
_stt.stop();
_vad.stopListening();
_tts.stop();
super.dispose();
}

View File

@@ -6,16 +6,24 @@ import FlutterMacOS
import Foundation
import app_links
import audio_session
import flutter_pcm_sound
import flutter_tts
import just_audio
import path_provider_foundation
import record_macos
import shared_preferences_foundation
import speech_to_text
import url_launcher_macos
func RegisterGeneratedPlugins(registry: FlutterPluginRegistry) {
AppLinksMacosPlugin.register(with: registry.registrar(forPlugin: "AppLinksMacosPlugin"))
AudioSessionPlugin.register(with: registry.registrar(forPlugin: "AudioSessionPlugin"))
FlutterPcmSoundPlugin.register(with: registry.registrar(forPlugin: "FlutterPcmSoundPlugin"))
FlutterTtsPlugin.register(with: registry.registrar(forPlugin: "FlutterTtsPlugin"))
JustAudioPlugin.register(with: registry.registrar(forPlugin: "JustAudioPlugin"))
PathProviderPlugin.register(with: registry.registrar(forPlugin: "PathProviderPlugin"))
RecordMacOsPlugin.register(with: registry.registrar(forPlugin: "RecordMacOsPlugin"))
SharedPreferencesPlugin.register(with: registry.registrar(forPlugin: "SharedPreferencesPlugin"))
SpeechToTextPlugin.register(with: registry.registrar(forPlugin: "SpeechToTextPlugin"))
UrlLauncherPlugin.register(with: registry.registrar(forPlugin: "UrlLauncherPlugin"))

View File

@@ -73,6 +73,14 @@ packages:
url: "https://pub.dev"
source: hosted
version: "2.13.0"
audio_session:
dependency: transitive
description:
name: audio_session
sha256: "8f96a7fecbb718cb093070f868b4cdcb8a9b1053dce342ff8ab2fde10eb9afb7"
url: "https://pub.dev"
source: hosted
version: "0.2.2"
boolean_selector:
dependency: transitive
description:
@@ -230,6 +238,14 @@ packages:
url: "https://pub.dev"
source: hosted
version: "6.0.0"
flutter_pcm_sound:
dependency: "direct main"
description:
name: flutter_pcm_sound
sha256: "15c6894da8195122001375084d51449bd77849579c93fca2800c00b615699dc0"
url: "https://pub.dev"
source: hosted
version: "3.3.3"
flutter_riverpod:
dependency: "direct main"
description:
@@ -368,6 +384,30 @@ packages:
url: "https://pub.dev"
source: hosted
version: "4.10.0"
just_audio:
dependency: "direct main"
description:
name: just_audio
sha256: "9694e4734f515f2a052493d1d7e0d6de219ee0427c7c29492e246ff32a219908"
url: "https://pub.dev"
source: hosted
version: "0.10.5"
just_audio_platform_interface:
dependency: transitive
description:
name: just_audio_platform_interface
sha256: "2532c8d6702528824445921c5ff10548b518b13f808c2e34c2fd54793b999a6a"
url: "https://pub.dev"
source: hosted
version: "4.6.0"
just_audio_web:
dependency: transitive
description:
name: just_audio_web
sha256: "6ba8a2a7e87d57d32f0f7b42856ade3d6a9fbe0f1a11fabae0a4f00bb73f0663"
url: "https://pub.dev"
source: hosted
version: "0.4.16"
jwt_decode:
dependency: transitive
description:
@@ -640,6 +680,70 @@ packages:
url: "https://pub.dev"
source: hosted
version: "2.7.0"
record:
dependency: transitive
description:
name: record
sha256: d5b6b334f3ab02460db6544e08583c942dbf23e3504bf1e14fd4cbe3d9409277
url: "https://pub.dev"
source: hosted
version: "6.2.0"
record_android:
dependency: transitive
description:
name: record_android
sha256: "3bb3c6abbcb5fc1e86719fc6f0acdee89dfe8078543b92caad11854c487e435a"
url: "https://pub.dev"
source: hosted
version: "1.5.0"
record_ios:
dependency: transitive
description:
name: record_ios
sha256: "8df7c136131bd05efc19256af29b2ba6ccc000ccc2c80d4b6b6d7a8d21a3b5a9"
url: "https://pub.dev"
source: hosted
version: "1.2.0"
record_linux:
dependency: transitive
description:
name: record_linux
sha256: c31a35cc158cd666fc6395f7f56fc054f31685571684be6b97670a27649ce5c7
url: "https://pub.dev"
source: hosted
version: "1.3.0"
record_macos:
dependency: transitive
description:
name: record_macos
sha256: f04d1547ff61ae54b4154e9726f656a17ad993f1a90f8f44bc40de94bafa072f
url: "https://pub.dev"
source: hosted
version: "1.2.0"
record_platform_interface:
dependency: transitive
description:
name: record_platform_interface
sha256: "8a81dbc4e14e1272a285bbfef6c9136d070a47d9b0d1f40aa6193516253ee2f6"
url: "https://pub.dev"
source: hosted
version: "1.5.0"
record_web:
dependency: transitive
description:
name: record_web
sha256: "7e9846981c1f2d111d86f0ae3309071f5bba8b624d1c977316706f08fc31d16d"
url: "https://pub.dev"
source: hosted
version: "1.3.0"
record_windows:
dependency: transitive
description:
name: record_windows
sha256: "223258060a1d25c62bae18282c16783f28581ec19401d17e56b5205b9f039d78"
url: "https://pub.dev"
source: hosted
version: "1.0.7"
retry:
dependency: transitive
description:
@@ -861,6 +965,14 @@ packages:
url: "https://pub.dev"
source: hosted
version: "2.12.0"
synchronized:
dependency: transitive
description:
name: synchronized
sha256: c254ade258ec8282947a0acbbc90b9575b4f19673533ee46f2f6e9b3aeefd7c0
url: "https://pub.dev"
source: hosted
version: "3.4.0"
term_glyph:
dependency: transitive
description:
@@ -965,6 +1077,22 @@ packages:
url: "https://pub.dev"
source: hosted
version: "3.1.5"
uuid:
dependency: transitive
description:
name: uuid
sha256: a11b666489b1954e01d992f3d601b1804a33937b5a8fe677bd26b8a9f96f96e8
url: "https://pub.dev"
source: hosted
version: "4.5.2"
vad:
dependency: "direct main"
description:
name: vad
sha256: ef6c8b12c5af7a6a519ff5684f074b8a2ac00c434705f544af379ea77bccd258
url: "https://pub.dev"
source: hosted
version: "0.0.7+1"
vector_math:
dependency: transitive
description:
@@ -1006,7 +1134,7 @@ packages:
source: hosted
version: "1.0.1"
web_socket_channel:
dependency: transitive
dependency: "direct main"
description:
name: web_socket_channel
sha256: d645757fb0f4773d602444000a8131ff5d48c9e47adfe9772652dd1a4f2d45c8

View File

@@ -46,6 +46,10 @@ dependencies:
speech_to_text: ^7.3.0
flutter_tts: ^4.2.5
permission_handler: ^12.0.1
just_audio: ^0.10.3
web_socket_channel: ^3.0.1
flutter_pcm_sound: ^3.3.3
vad: ^0.0.7+1
dev_dependencies:
flutter_test:

85
wei_ai_app/优化.md Normal file
View File

@@ -0,0 +1,85 @@
我已经详细阅读了整个 interaction
模块和相关服务代码,以下是完整的架构分析和优化建议:
---
架构概览
screens/interaction/
├── interaction_screen.dart # 文字聊天 UI
├── voice_mode_overlay.dart # 语音通话 UI
└── voice_session_controller.dart # 语音状态编排
core/services/
├── chat_service.dart # LLM API (SSE 流式)
├── chat_storage_service.dart # SharedPreferences 持久化
├── stt_service.dart # 系统 STT (已废弃)
├── vad_stt_service.dart # Silero VAD + Google Cloud STT
├── tts_service.dart # TTS 门面 + 引擎选择
└── tts/
├── tts_engine.dart # 抽象接口
├── minimax_tts_engine.dart # MiniMax WebSocket 流式 TTS
├── system_tts_engine.dart # 系统 TTS 兜底
└── ws_client_*.dart # 跨平台 WebSocket
整体评价: 服务分层清晰TTS 的抽象接口设计不错,语音对话的"句子级 TTS +
预连接"体验优化做得挺好。但有几个明显需要处理的问题:
---
需要优化的问题 (按优先级)
P0 - 安全问题
1. API Key 硬编码 — minimax_config.dart 中 MiniMax JWT 和 vad_stt_service.dart
中 Google Cloud STT API Key 直接写在代码里。APK 反编译即可提取,存在严重的
Key 被盗刷风险。
- 建议: 所有第三方 API 调用走后端代理,客户端不存放任何 Key
P1 - 潜在 Bug
2. 消息重复保存 — InteractionScreen._sendMessage() 会保存消息到
storage语音流程中 VoiceSessionController._processUserMessage()
也通过回调保存。如果两个流程不小心交叉,可能产生重复消息。
- 建议: 统一消息保存入口,只在一处写入
3. 句子缓冲丢失 — _sentenceBuffer
在流式结束或网络中断时未说完的文本会丢失TTS 不完整但文字显示完整)。
- 建议: 流结束时 flush 残留 buffer增加超时机制如 2s 无标点则强制切句)
4. 语音打断时资源泄漏 — 用户在 TTS 播放中关闭语音界面_speakCompleter
可能不会被 complete定时器可能继续运行。
- 建议: dispose() 中强制 complete 所有 Completer取消所有 Timer
5. WebSocket 连接泄漏 — 退出语音模式后 TTS WebSocket
连接没有显式关闭,可能造成连接泄漏。
- 建议: 退出语音模式时显式调用 disconnect
P2 - 架构优化
6. 状态管理不统一 — InteractionScreen 用 Riverpod + setState
混合VoiceSessionController 用 ChangeNotifier。风格不一致维护成本高。
- 建议: 统一用 RiverpodVoiceSessionController 迁移为 StateNotifierProvider
7. 服务紧耦合 — VoiceSessionController 直接 new VadSttService() /
TTSService(),无法做单元测试。
- 建议: 通过依赖注入 (Riverpod Provider) 注入服务实例
8. 存储性能 — SharedPreferences 每次写入都序列化整个 session
JSON消息多了会卡。
- 建议: 考虑换用 Isar/Hive 等本地数据库,支持增量写入和分页加载
P3 - 体验优化
9. 无重试机制 — Google STT API
调用失败时没有重试逻辑,网络抖动直接导致语音识别失败。
- 建议: 增加 1-2 次重试,带指数退避
10. 未完成功能 — VoiceModeOverlay 的扬声器切换按钮是空实现 (onPressed: ()
{}),波形动画是固定的不跟实际音频挂钩。
11. 流式响应类型安全 — ChatService._sendStreamRequest() 中 JSON 解析用了
dynamic 类型API 返回异常格式时可能 crash。
---
做得好的地方
- TTS 引擎抽象 + MiniMax/System 自动降级兜底
- WebSocket 预连接 降低首次语音响应延迟
- 句子级流式 TTS 而非等全文,对话体感自然
- 30s keep-alive ping 保持连接复用
- LLM 流式渲染 + 打字动画,用户反馈及时
---
要我针对某个具体问题开始修复吗?比如先处理 P1 的 bug 或 P2 的架构重构?