feat:v1.0.0
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
# Uncomment this line to define a global platform for your project
|
||||
# platform :ios, '13.0'
|
||||
# VAD 插件需要 iOS 15.1+
|
||||
platform :ios, '15.1'
|
||||
|
||||
# CocoaPods analytics sends network stats synchronously affecting flutter build latency.
|
||||
ENV['COCOAPODS_DISABLE_STATS'] = 'true'
|
||||
|
||||
@@ -1,17 +1,31 @@
|
||||
PODS:
|
||||
- app_links (6.4.1):
|
||||
- Flutter
|
||||
- audio_session (0.0.1):
|
||||
- Flutter
|
||||
- CwlCatchException (2.2.1):
|
||||
- CwlCatchExceptionSupport (~> 2.2.1)
|
||||
- CwlCatchExceptionSupport (2.2.1)
|
||||
- Flutter (1.0.0)
|
||||
- flutter_pcm_sound (0.0.1):
|
||||
- Flutter
|
||||
- flutter_tts (0.0.1):
|
||||
- Flutter
|
||||
- just_audio (0.0.1):
|
||||
- Flutter
|
||||
- FlutterMacOS
|
||||
- onnxruntime-c (1.22.0)
|
||||
- onnxruntime-objc (1.22.0):
|
||||
- onnxruntime-objc/Core (= 1.22.0)
|
||||
- onnxruntime-objc/Core (1.22.0):
|
||||
- onnxruntime-c (= 1.22.0)
|
||||
- path_provider_foundation (0.0.1):
|
||||
- Flutter
|
||||
- FlutterMacOS
|
||||
- permission_handler_apple (9.3.0):
|
||||
- Flutter
|
||||
- record_ios (1.2.0):
|
||||
- Flutter
|
||||
- shared_preferences_foundation (0.0.1):
|
||||
- Flutter
|
||||
- FlutterMacOS
|
||||
@@ -21,52 +35,79 @@ PODS:
|
||||
- FlutterMacOS
|
||||
- url_launcher_ios (0.0.1):
|
||||
- Flutter
|
||||
- vad (0.0.6):
|
||||
- Flutter
|
||||
- onnxruntime-objc (= 1.22.0)
|
||||
|
||||
DEPENDENCIES:
|
||||
- app_links (from `.symlinks/plugins/app_links/ios`)
|
||||
- audio_session (from `.symlinks/plugins/audio_session/ios`)
|
||||
- Flutter (from `Flutter`)
|
||||
- flutter_pcm_sound (from `.symlinks/plugins/flutter_pcm_sound/ios`)
|
||||
- flutter_tts (from `.symlinks/plugins/flutter_tts/ios`)
|
||||
- just_audio (from `.symlinks/plugins/just_audio/darwin`)
|
||||
- path_provider_foundation (from `.symlinks/plugins/path_provider_foundation/darwin`)
|
||||
- permission_handler_apple (from `.symlinks/plugins/permission_handler_apple/ios`)
|
||||
- record_ios (from `.symlinks/plugins/record_ios/ios`)
|
||||
- shared_preferences_foundation (from `.symlinks/plugins/shared_preferences_foundation/darwin`)
|
||||
- speech_to_text (from `.symlinks/plugins/speech_to_text/darwin`)
|
||||
- url_launcher_ios (from `.symlinks/plugins/url_launcher_ios/ios`)
|
||||
- vad (from `.symlinks/plugins/vad/ios`)
|
||||
|
||||
SPEC REPOS:
|
||||
trunk:
|
||||
- CwlCatchException
|
||||
- CwlCatchExceptionSupport
|
||||
- onnxruntime-c
|
||||
- onnxruntime-objc
|
||||
|
||||
EXTERNAL SOURCES:
|
||||
app_links:
|
||||
:path: ".symlinks/plugins/app_links/ios"
|
||||
audio_session:
|
||||
:path: ".symlinks/plugins/audio_session/ios"
|
||||
Flutter:
|
||||
:path: Flutter
|
||||
flutter_pcm_sound:
|
||||
:path: ".symlinks/plugins/flutter_pcm_sound/ios"
|
||||
flutter_tts:
|
||||
:path: ".symlinks/plugins/flutter_tts/ios"
|
||||
just_audio:
|
||||
:path: ".symlinks/plugins/just_audio/darwin"
|
||||
path_provider_foundation:
|
||||
:path: ".symlinks/plugins/path_provider_foundation/darwin"
|
||||
permission_handler_apple:
|
||||
:path: ".symlinks/plugins/permission_handler_apple/ios"
|
||||
record_ios:
|
||||
:path: ".symlinks/plugins/record_ios/ios"
|
||||
shared_preferences_foundation:
|
||||
:path: ".symlinks/plugins/shared_preferences_foundation/darwin"
|
||||
speech_to_text:
|
||||
:path: ".symlinks/plugins/speech_to_text/darwin"
|
||||
url_launcher_ios:
|
||||
:path: ".symlinks/plugins/url_launcher_ios/ios"
|
||||
vad:
|
||||
:path: ".symlinks/plugins/vad/ios"
|
||||
|
||||
SPEC CHECKSUMS:
|
||||
app_links: 3dbc685f76b1693c66a6d9dd1e9ab6f73d97dc0a
|
||||
audio_session: 9bb7f6c970f21241b19f5a3658097ae459681ba0
|
||||
CwlCatchException: 7acc161b299a6de7f0a46a6ed741eae2c8b4d75a
|
||||
CwlCatchExceptionSupport: 54ccab8d8c78907b57f99717fb19d4cc3bce02dc
|
||||
Flutter: cabc95a1d2626b1b06e7179b784ebcf0c0cde467
|
||||
flutter_pcm_sound: e9c2f6ce580eefcab2af46763f0354484d5c4ac8
|
||||
flutter_tts: 35ac3c7d42412733e795ea96ad2d7e05d0a75113
|
||||
just_audio: 4e391f57b79cad2b0674030a00453ca5ce817eed
|
||||
onnxruntime-c: 7f778680e96145956c0a31945f260321eed2611a
|
||||
onnxruntime-objc: 83d28b87525bd971259a66e153ea32b5d023de19
|
||||
path_provider_foundation: bb55f6dbba17d0dccd6737fe6f7f34fbd0376880
|
||||
permission_handler_apple: 4ed2196e43d0651e8ff7ca3483a069d469701f2d
|
||||
record_ios: 412daca2350b228e698fffcd08f1f94ceb1e3844
|
||||
shared_preferences_foundation: 7036424c3d8ec98dfe75ff1667cb0cd531ec82bb
|
||||
speech_to_text: 3b313d98516d3d0406cea424782ec25470c59d19
|
||||
url_launcher_ios: 7a95fa5b60cc718a708b8f2966718e93db0cef1b
|
||||
vad: 7934867589afe53567f492df66fb1615f2185822
|
||||
|
||||
PODFILE CHECKSUM: 3c63482e143d1b91d2d2560aee9fb04ecc74ac7e
|
||||
PODFILE CHECKSUM: 8af221031d17e57937852c3979a7d2c40538cf89
|
||||
|
||||
COCOAPODS: 1.16.2
|
||||
|
||||
@@ -45,6 +45,26 @@
|
||||
<string>We need access to your microphone for voice chat with AI characters.</string>
|
||||
<key>NSSpeechRecognitionUsageDescription</key>
|
||||
<string>We need speech recognition to convert your voice to text.</string>
|
||||
<key>NSAppTransportSecurity</key>
|
||||
<dict>
|
||||
<key>NSExceptionDomains</key>
|
||||
<dict>
|
||||
<key>localhost</key>
|
||||
<dict>
|
||||
<key>NSExceptionAllowsInsecureHTTPLoads</key>
|
||||
<true/>
|
||||
<key>NSIncludesSubdomains</key>
|
||||
<true/>
|
||||
</dict>
|
||||
<key>127.0.0.1</key>
|
||||
<dict>
|
||||
<key>NSExceptionAllowsInsecureHTTPLoads</key>
|
||||
<true/>
|
||||
<key>NSIncludesSubdomains</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
<key>CADisableMinimumFrameDurationOnPhone</key>
|
||||
<true/>
|
||||
<key>UIApplicationSupportsIndirectInputEvents</key>
|
||||
|
||||
26
wei_ai_app/lib/core/config/minimax_config.dart
Normal file
26
wei_ai_app/lib/core/config/minimax_config.dart
Normal file
@@ -0,0 +1,26 @@
|
||||
/// MiniMax TTS 配置
|
||||
///
|
||||
/// 注意:客户端直连会暴露 API Key,建议通过 --dart-define 注入并做轮换。
|
||||
class MinimaxConfig {
|
||||
MinimaxConfig._();
|
||||
|
||||
/// WebSocket endpoint
|
||||
static const String wsUrl = 'wss://api.minimax.io/ws/v1/t2a_v2';
|
||||
|
||||
/// API Key (临时写死,用于本地调试)
|
||||
static const String apiKey =
|
||||
'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJHcm91cE5hbWUiOiJ2YmlvZGJkcCIsIlVzZXJOYW1lIjoidHNldCIsIkFjY291bnQiOiIiLCJTdWJqZWN0SUQiOiIxOTkyOTAyNTAzMzg5MjA1NDY3IiwiUGhvbmUiOiIiLCJHcm91cElEIjoiMTk5MjkwMjUwMzM4MDgyMDk1NSIsIlBhZ2VOYW1lIjoiIiwiTWFpbCI6InZiaW9kYmRwQGdtYWlsLmNvbSIsIkNyZWF0ZVRpbWUiOiIyMDI1LTEyLTA2IDE1OjQzOjUxIiwiVG9rZW5UeXBlIjoxLCJpc3MiOiJtaW5pbWF4In0.hf1M4cPe27Sz_QeSyYODqM6yrN8aQ68nRwYB7iQ3uO5nu0NSN7qHQRVxAt2tVuoOf503SEx5F-PfYyC85OFJFhWNNhhDuFuxPIz97LVz1oQUlIejZ_BmCMj4iWwGXTUmEugGK1lzcsI6eJz8eRjQHsxOgJJmxPLXWHTPs1gDqtnckAgjOBRQJSadP58Xe9EdI6n-2_SL_ni3Tqm3LuWq9tUPJa5WgDMZX9IDK7XXyZy0i1GoSXmp8P1O1JmIecBVUoCzyYFwWW787BNdYiyEV3UrFjC_4onJ8Tzh-eGq84-rtxBR5FKO2MpNU_I0xI-W3YJxOEl_JPXXGgX5ASTKNw';
|
||||
|
||||
/// 默认模型
|
||||
static const String model = 'speech-2.6-turbo';
|
||||
|
||||
/// 默认音色(MiniMax 系统音色 - 青涩青年音色)
|
||||
static const String defaultVoiceId = 'Chinese (Mandarin)_BashfulGirl';
|
||||
|
||||
/// 默认音频参数 (PCM 格式用于流式播放)
|
||||
static const int sampleRate = 32000;
|
||||
static const int channels = 1;
|
||||
static const String format = 'pcm';
|
||||
|
||||
static bool get isEnabled => apiKey.isNotEmpty;
|
||||
}
|
||||
@@ -12,16 +12,31 @@ class STTService {
|
||||
|
||||
bool get isListening => _isListening;
|
||||
|
||||
// 回调
|
||||
Function(String text)? _onResult;
|
||||
Function(String text)? _onFinalResult;
|
||||
|
||||
Future<bool> init() async {
|
||||
if (_isInitialized) return true;
|
||||
|
||||
try {
|
||||
_isInitialized = await _speech.initialize(
|
||||
onError: (error) => debugPrint('❌ STT Error: $error'),
|
||||
onError: (error) {
|
||||
debugPrint('⚠️ STT Error: ${error.errorMsg}');
|
||||
|
||||
// error_no_match 是常见的"没听到有效语音"错误
|
||||
// 不应该中断整个流程
|
||||
if (error.errorMsg == 'error_no_match') {
|
||||
debugPrint(' (没有匹配到语音,忽略)');
|
||||
}
|
||||
},
|
||||
onStatus: (status) {
|
||||
debugPrint('🎤 STT Status: $status');
|
||||
if (status == 'listening') _isListening = true;
|
||||
if (status == 'notListening') _isListening = false;
|
||||
if (status == 'listening') {
|
||||
_isListening = true;
|
||||
} else if (status == 'notListening' || status == 'done') {
|
||||
_isListening = false;
|
||||
}
|
||||
},
|
||||
);
|
||||
debugPrint('✅ STT Initialized: $_isInitialized');
|
||||
@@ -44,20 +59,28 @@ class STTService {
|
||||
|
||||
if (_isListening) await stop();
|
||||
|
||||
_onResult = onResult;
|
||||
_onFinalResult = onFinalResult;
|
||||
|
||||
await _speech.listen(
|
||||
onResult: (result) {
|
||||
if (result.finalResult) {
|
||||
onFinalResult(result.recognizedWords);
|
||||
} else {
|
||||
onResult(result.recognizedWords);
|
||||
if (result.recognizedWords.isNotEmpty) {
|
||||
if (result.finalResult) {
|
||||
debugPrint('🎤 Final: "${result.recognizedWords}"');
|
||||
_onFinalResult?.call(result.recognizedWords);
|
||||
} else {
|
||||
_onResult?.call(result.recognizedWords);
|
||||
}
|
||||
}
|
||||
},
|
||||
localeId: localeId,
|
||||
listenFor: const Duration(seconds: 30),
|
||||
pauseFor: const Duration(seconds: 3), // Wait 3s of silence to consider "done"
|
||||
partialResults: true,
|
||||
cancelOnError: true,
|
||||
listenMode: ListenMode.dictation,
|
||||
listenFor: const Duration(seconds: 60), // 最大监听时长
|
||||
pauseFor: const Duration(milliseconds: 1500), // 1.5秒静音后视为说完
|
||||
listenOptions: SpeechListenOptions(
|
||||
partialResults: true,
|
||||
cancelOnError: false, // 不要因错误取消
|
||||
listenMode: ListenMode.dictation,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
502
wei_ai_app/lib/core/services/tts/minimax_tts_engine.dart
Normal file
502
wei_ai_app/lib/core/services/tts/minimax_tts_engine.dart
Normal file
@@ -0,0 +1,502 @@
|
||||
import 'dart:async';
|
||||
import 'dart:convert';
|
||||
|
||||
import 'package:flutter/foundation.dart';
|
||||
import 'package:flutter_pcm_sound/flutter_pcm_sound.dart';
|
||||
import 'package:web_socket_channel/web_socket_channel.dart';
|
||||
|
||||
import '../../config/minimax_config.dart';
|
||||
import '../../models/character_model.dart';
|
||||
import 'tts_engine.dart';
|
||||
import 'ws_client.dart';
|
||||
|
||||
/// MiniMax TTS 引擎 (PCM 流式版本 - 带预连接优化)
|
||||
///
|
||||
/// 优化特性:
|
||||
/// - 预连接:提前建立 WebSocket 连接
|
||||
/// - 连接复用:一个连接处理多次 TTS 请求
|
||||
/// - 自动重连:连接断开后自动重连
|
||||
class MiniMaxTtsEngine implements TtsEngine {
|
||||
WebSocketChannel? _channel;
|
||||
StreamSubscription? _wsSub;
|
||||
Timer? _timeoutTimer;
|
||||
Timer? _keepAliveTimer;
|
||||
|
||||
bool _isInitialized = false;
|
||||
bool _isDisposed = false;
|
||||
bool _isCancelled = false;
|
||||
bool _isPlaying = false;
|
||||
|
||||
// 连接状态
|
||||
bool _isConnected = false;
|
||||
bool _isTaskReady = false; // task_started 状态
|
||||
Completer<void>? _connectCompleter;
|
||||
Completer<void>? _speakCompleter;
|
||||
|
||||
// 当前任务的文本
|
||||
String? _pendingText;
|
||||
|
||||
VoidCallback? _onStart;
|
||||
VoidCallback? _onComplete;
|
||||
Function(dynamic)? _onError;
|
||||
|
||||
// 统计信息
|
||||
int _chunkCount = 0;
|
||||
int _totalBytes = 0;
|
||||
final Stopwatch _stopwatch = Stopwatch();
|
||||
|
||||
// 跟踪播放完成
|
||||
bool _isFinalReceived = false;
|
||||
|
||||
@override
|
||||
Future<void> init() async {
|
||||
if (_isInitialized) return;
|
||||
|
||||
// 初始化 PCM 播放器
|
||||
await FlutterPcmSound.setup(
|
||||
sampleRate: MinimaxConfig.sampleRate,
|
||||
channelCount: MinimaxConfig.channels,
|
||||
);
|
||||
|
||||
FlutterPcmSound.setLogLevel(LogLevel.none);
|
||||
FlutterPcmSound.setFeedCallback(_onFeedCallback);
|
||||
|
||||
_isInitialized = true;
|
||||
debugPrint('✅ MiniMaxTtsEngine initialized (PCM streaming mode)');
|
||||
}
|
||||
|
||||
/// 预连接 WebSocket
|
||||
/// 在语音页面打开时调用,提前建立连接
|
||||
@override
|
||||
Future<void> preconnect() async {
|
||||
if (!_isInitialized) await init();
|
||||
if (_isConnected && _isTaskReady) {
|
||||
debugPrint('⚡ TTS 连接已就绪,无需重连');
|
||||
return;
|
||||
}
|
||||
|
||||
debugPrint('🔌 TTS 预连接中...');
|
||||
await _ensureConnection();
|
||||
debugPrint('⚡ TTS 预连接完成,已就绪');
|
||||
}
|
||||
|
||||
/// 确保 WebSocket 连接已建立且处于就绪状态
|
||||
Future<void> _ensureConnection() async {
|
||||
if (_isConnected && _isTaskReady) return;
|
||||
|
||||
// 清理旧连接
|
||||
await _cleanupConnection();
|
||||
|
||||
_connectCompleter = Completer<void>();
|
||||
|
||||
try {
|
||||
final channel = connectTtsSocket(
|
||||
Uri.parse(MinimaxConfig.wsUrl),
|
||||
{
|
||||
'Authorization': 'Bearer ${MinimaxConfig.apiKey}',
|
||||
},
|
||||
);
|
||||
_channel = channel;
|
||||
|
||||
// 监听消息
|
||||
_wsSub = channel.stream.listen(
|
||||
_handleConnectionMessage,
|
||||
onError: (error) {
|
||||
debugPrint('❌ TTS WebSocket 错误: $error');
|
||||
_handleDisconnect();
|
||||
},
|
||||
onDone: () {
|
||||
debugPrint('⚠️ TTS WebSocket 连接关闭');
|
||||
_handleDisconnect();
|
||||
},
|
||||
cancelOnError: false,
|
||||
);
|
||||
|
||||
// 等待连接就绪(connected_success + task_started)
|
||||
// 设置超时
|
||||
_timeoutTimer?.cancel();
|
||||
_timeoutTimer = Timer(const Duration(seconds: 10), () {
|
||||
if (_connectCompleter != null && !_connectCompleter!.isCompleted) {
|
||||
_connectCompleter!.completeError('连接超时');
|
||||
_handleDisconnect();
|
||||
}
|
||||
});
|
||||
|
||||
await _connectCompleter!.future;
|
||||
_timeoutTimer?.cancel();
|
||||
|
||||
// 启动保活
|
||||
_startKeepAlive();
|
||||
|
||||
} catch (e) {
|
||||
debugPrint('❌ TTS 连接失败: $e');
|
||||
_handleDisconnect();
|
||||
rethrow;
|
||||
}
|
||||
}
|
||||
|
||||
void _handleConnectionMessage(dynamic message) {
|
||||
if (_isDisposed) return;
|
||||
|
||||
try {
|
||||
final Map<String, dynamic> data = jsonDecode(message as String);
|
||||
final String? event = data['event'] as String?;
|
||||
|
||||
if (event == 'connected_success') {
|
||||
debugPrint('📥 TTS connected_success');
|
||||
_isConnected = true;
|
||||
// 发送 task_start 进入就绪状态
|
||||
_sendTaskStart();
|
||||
return;
|
||||
}
|
||||
|
||||
if (event == 'task_started') {
|
||||
debugPrint('📥 TTS task_started (就绪)');
|
||||
_isTaskReady = true;
|
||||
|
||||
// 如果是预连接,完成连接
|
||||
if (_connectCompleter != null && !_connectCompleter!.isCompleted) {
|
||||
_connectCompleter!.complete();
|
||||
}
|
||||
|
||||
// 如果有待发送的文本,立即发送
|
||||
if (_pendingText != null) {
|
||||
_sendText(_pendingText!);
|
||||
_pendingText = null;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (event == 'task_continued') {
|
||||
// 正常的流式响应
|
||||
}
|
||||
|
||||
if (event == 'task_failed') {
|
||||
final errorInfo = data['base_resp'] ?? data;
|
||||
debugPrint('❌ TTS task_failed: ${jsonEncode(errorInfo)}');
|
||||
_handleTaskError('task_failed: ${jsonEncode(errorInfo)}');
|
||||
return;
|
||||
}
|
||||
|
||||
// 处理音频数据
|
||||
if (data['data'] != null && data['data'] is Map<String, dynamic>) {
|
||||
final audioData = data['data'] as Map<String, dynamic>;
|
||||
final audioHex = audioData['audio'];
|
||||
|
||||
if (audioHex is String && audioHex.isNotEmpty) {
|
||||
final bytes = _hexToBytes(audioHex);
|
||||
_chunkCount++;
|
||||
_totalBytes += bytes.length;
|
||||
|
||||
_feedAudioData(bytes);
|
||||
|
||||
if (_chunkCount % 20 == 0) {
|
||||
debugPrint('📥 已接收 $_chunkCount 块 ($_totalBytes bytes)');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 任务完成
|
||||
if (data['is_final'] == true) {
|
||||
_handleTaskComplete();
|
||||
}
|
||||
|
||||
} catch (e) {
|
||||
debugPrint('❌ TTS 消息解析错误: $e');
|
||||
}
|
||||
}
|
||||
|
||||
void _sendTaskStart() {
|
||||
if (_channel == null) return;
|
||||
|
||||
final payload = {
|
||||
'event': 'task_start',
|
||||
'model': MinimaxConfig.model,
|
||||
'voice_setting': {
|
||||
'voice_id': MinimaxConfig.defaultVoiceId,
|
||||
'speed': 1,
|
||||
'vol': 1,
|
||||
'pitch': 0,
|
||||
'english_normalization': false,
|
||||
},
|
||||
'audio_setting': {
|
||||
'sample_rate': MinimaxConfig.sampleRate,
|
||||
'format': MinimaxConfig.format,
|
||||
'channel': MinimaxConfig.channels,
|
||||
},
|
||||
};
|
||||
|
||||
_channel!.sink.add(jsonEncode(payload));
|
||||
}
|
||||
|
||||
void _sendText(String text) {
|
||||
if (_channel == null || !_isTaskReady) return;
|
||||
|
||||
debugPrint('📤 发送文本: "$text"');
|
||||
_channel!.sink.add(jsonEncode({
|
||||
'event': 'task_continue',
|
||||
'text': text,
|
||||
}));
|
||||
_isPlaying = true;
|
||||
}
|
||||
|
||||
void _handleTaskComplete() {
|
||||
_stopwatch.stop();
|
||||
_isFinalReceived = true;
|
||||
_isTaskReady = false;
|
||||
|
||||
debugPrint('');
|
||||
debugPrint('═══════════════════════════════════════');
|
||||
debugPrint('📊 TTS 完成: $_chunkCount 块, $_totalBytes bytes, ${_stopwatch.elapsedMilliseconds}ms');
|
||||
debugPrint('═══════════════════════════════════════');
|
||||
|
||||
// 发送 task_finish
|
||||
_channel?.sink.add(jsonEncode({'event': 'task_finish'}));
|
||||
|
||||
// 完成当前任务
|
||||
if (_speakCompleter != null && !_speakCompleter!.isCompleted) {
|
||||
_speakCompleter!.complete();
|
||||
}
|
||||
|
||||
// 准备下一次任务:重新发送 task_start
|
||||
Future.delayed(const Duration(milliseconds: 100), () {
|
||||
if (_isConnected && !_isDisposed && !_isCancelled) {
|
||||
_sendTaskStart();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void _handleTaskError(String error) {
|
||||
_isTaskReady = false;
|
||||
|
||||
if (_speakCompleter != null && !_speakCompleter!.isCompleted) {
|
||||
_speakCompleter!.completeError(TtsEngineException(error));
|
||||
}
|
||||
|
||||
if (_onError != null) _onError!(error);
|
||||
|
||||
// 尝试重新进入就绪状态
|
||||
Future.delayed(const Duration(milliseconds: 500), () {
|
||||
if (_isConnected && !_isDisposed) {
|
||||
_sendTaskStart();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void _handleDisconnect() {
|
||||
final wasConnected = _isConnected;
|
||||
_isConnected = false;
|
||||
_isTaskReady = false;
|
||||
_keepAliveTimer?.cancel();
|
||||
|
||||
if (_connectCompleter != null && !_connectCompleter!.isCompleted) {
|
||||
_connectCompleter!.completeError('连接断开');
|
||||
}
|
||||
|
||||
if (_speakCompleter != null && !_speakCompleter!.isCompleted) {
|
||||
_speakCompleter!.completeError(const TtsEngineException('连接断开'));
|
||||
}
|
||||
|
||||
// 如果之前是连接状态,静默自动重连(为下次 TTS 做准备)
|
||||
if (wasConnected && !_isDisposed && !_isCancelled) {
|
||||
Future.delayed(const Duration(milliseconds: 500), () {
|
||||
if (!_isDisposed && !_isCancelled) {
|
||||
debugPrint('🔄 TTS 自动重连...');
|
||||
_ensureConnection().then((_) {
|
||||
debugPrint('⚡ TTS 重连成功');
|
||||
}).catchError((e) {
|
||||
debugPrint('⚠️ TTS 重连失败: $e (下次 speak 时会重试)');
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void _startKeepAlive() {
|
||||
_keepAliveTimer?.cancel();
|
||||
// 每 30 秒检查连接状态
|
||||
_keepAliveTimer = Timer.periodic(const Duration(seconds: 30), (_) {
|
||||
if (!_isConnected && !_isDisposed) {
|
||||
debugPrint('🔄 TTS 重连中...');
|
||||
_ensureConnection().catchError((e) {
|
||||
debugPrint('❌ TTS 重连失败: $e');
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void _onFeedCallback(int remainingFrames) {
|
||||
if (_isFinalReceived && remainingFrames == 0 && _isPlaying) {
|
||||
_isPlaying = false;
|
||||
debugPrint('🔊 PCM 播放完成');
|
||||
if (_onComplete != null) {
|
||||
_onComplete!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@override
|
||||
Future<void> speak(
|
||||
String text, {
|
||||
AiVoiceConfig? voiceConfig,
|
||||
}) async {
|
||||
if (!_isInitialized) await init();
|
||||
if (_isDisposed) return;
|
||||
|
||||
final trimmed = text.trim();
|
||||
if (trimmed.isEmpty) return;
|
||||
|
||||
if (!MinimaxConfig.isEnabled) {
|
||||
throw const TtsEngineException('MiniMax API key is missing');
|
||||
}
|
||||
|
||||
// 重置状态
|
||||
_isCancelled = false;
|
||||
_isFinalReceived = false;
|
||||
_chunkCount = 0;
|
||||
_totalBytes = 0;
|
||||
_stopwatch.reset();
|
||||
_stopwatch.start();
|
||||
|
||||
debugPrint('');
|
||||
debugPrint('═══════════════════════════════════════');
|
||||
debugPrint('🎤 TTS: "$trimmed"');
|
||||
debugPrint('═══════════════════════════════════════');
|
||||
|
||||
_speakCompleter = Completer<void>();
|
||||
|
||||
try {
|
||||
// 确保连接已就绪
|
||||
if (!_isConnected || !_isTaskReady) {
|
||||
debugPrint('⏳ 等待连接就绪...');
|
||||
await _ensureConnection();
|
||||
}
|
||||
|
||||
// 发送文本
|
||||
if (_isTaskReady) {
|
||||
_sendText(trimmed);
|
||||
if (_onStart != null) _onStart!();
|
||||
} else {
|
||||
// 连接还在建立中,设置待发送文本
|
||||
_pendingText = trimmed;
|
||||
if (_onStart != null) _onStart!();
|
||||
}
|
||||
|
||||
// 设置超时
|
||||
_timeoutTimer?.cancel();
|
||||
_timeoutTimer = Timer(const Duration(seconds: 30), () {
|
||||
if (_speakCompleter != null && !_speakCompleter!.isCompleted) {
|
||||
_handleTaskError('TTS 超时');
|
||||
}
|
||||
});
|
||||
|
||||
// 等待任务完成
|
||||
await _speakCompleter!.future;
|
||||
_timeoutTimer?.cancel();
|
||||
|
||||
} catch (e) {
|
||||
debugPrint('❌ TTS 异常: $e');
|
||||
if (_onError != null) _onError!(e);
|
||||
rethrow;
|
||||
}
|
||||
}
|
||||
|
||||
void _feedAudioData(Uint8List pcmBytes) {
|
||||
if (_isCancelled || _isDisposed) return;
|
||||
|
||||
final int16List = <int>[];
|
||||
for (var i = 0; i < pcmBytes.length - 1; i += 2) {
|
||||
final int16Value = (pcmBytes[i + 1] << 8) | pcmBytes[i];
|
||||
final signed = int16Value >= 32768 ? int16Value - 65536 : int16Value;
|
||||
int16List.add(signed);
|
||||
}
|
||||
|
||||
FlutterPcmSound.feed(PcmArrayInt16.fromList(int16List));
|
||||
}
|
||||
|
||||
Uint8List _hexToBytes(String hex) {
|
||||
final cleaned = hex.trim();
|
||||
final len = cleaned.length;
|
||||
if (len == 0) return Uint8List(0);
|
||||
if (len % 2 != 0) {
|
||||
throw const FormatException('Invalid hex string length');
|
||||
}
|
||||
|
||||
final bytes = Uint8List(len ~/ 2);
|
||||
for (var i = 0; i < len; i += 2) {
|
||||
bytes[i ~/ 2] = int.parse(cleaned.substring(i, i + 2), radix: 16);
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
Future<void> _cleanupConnection() async {
|
||||
_wsSub?.cancel();
|
||||
_wsSub = null;
|
||||
_channel?.sink.close();
|
||||
_channel = null;
|
||||
_isConnected = false;
|
||||
_isTaskReady = false;
|
||||
}
|
||||
|
||||
@override
|
||||
Future<void> stop() async {
|
||||
if (_isDisposed) return;
|
||||
_isCancelled = true;
|
||||
_isPlaying = false;
|
||||
_isFinalReceived = false;
|
||||
_pendingText = null;
|
||||
_timeoutTimer?.cancel();
|
||||
|
||||
// 停止 PCM 播放器
|
||||
await FlutterPcmSound.release();
|
||||
|
||||
// 重新设置播放器
|
||||
if (_isInitialized) {
|
||||
await FlutterPcmSound.setup(
|
||||
sampleRate: MinimaxConfig.sampleRate,
|
||||
channelCount: MinimaxConfig.channels,
|
||||
);
|
||||
FlutterPcmSound.setFeedCallback(_onFeedCallback);
|
||||
}
|
||||
|
||||
// 注意:不断开 WebSocket 连接,保持复用
|
||||
// 如果有进行中的任务,发送 task_finish
|
||||
if (_isTaskReady && _channel != null) {
|
||||
_channel!.sink.add(jsonEncode({'event': 'task_finish'}));
|
||||
_isTaskReady = false;
|
||||
// 准备下一次任务
|
||||
Future.delayed(const Duration(milliseconds: 100), () {
|
||||
if (_isConnected && !_isDisposed) {
|
||||
_sendTaskStart();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
debugPrint('🛑 TTS 已停止');
|
||||
}
|
||||
|
||||
@override
|
||||
void setCompletionHandler(VoidCallback handler) {
|
||||
_onComplete = handler;
|
||||
}
|
||||
|
||||
@override
|
||||
void setErrorHandler(Function(dynamic) handler) {
|
||||
_onError = handler;
|
||||
}
|
||||
|
||||
@override
|
||||
void setStartHandler(VoidCallback handler) {
|
||||
_onStart = handler;
|
||||
}
|
||||
|
||||
@override
|
||||
void dispose() {
|
||||
_isDisposed = true;
|
||||
_keepAliveTimer?.cancel();
|
||||
_timeoutTimer?.cancel();
|
||||
_cleanupConnection();
|
||||
FlutterPcmSound.release();
|
||||
debugPrint('🗑️ MiniMaxTtsEngine disposed');
|
||||
}
|
||||
}
|
||||
88
wei_ai_app/lib/core/services/tts/system_tts_engine.dart
Normal file
88
wei_ai_app/lib/core/services/tts/system_tts_engine.dart
Normal file
@@ -0,0 +1,88 @@
|
||||
import 'package:flutter/foundation.dart';
|
||||
import 'package:flutter_tts/flutter_tts.dart';
|
||||
import '../../models/character_model.dart';
|
||||
import 'tts_engine.dart';
|
||||
|
||||
class SystemTtsEngine implements TtsEngine {
|
||||
final FlutterTts _flutterTts = FlutterTts();
|
||||
bool _isInitialized = false;
|
||||
|
||||
@override
|
||||
Future<void> init() async {
|
||||
if (_isInitialized) return;
|
||||
|
||||
try {
|
||||
if (!kIsWeb) {
|
||||
if (defaultTargetPlatform == TargetPlatform.iOS) {
|
||||
await _flutterTts.setSharedInstance(true);
|
||||
await _flutterTts.setIosAudioCategory(
|
||||
IosTextToSpeechAudioCategory.playAndRecord,
|
||||
[
|
||||
IosTextToSpeechAudioCategoryOptions.allowBluetooth,
|
||||
IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
|
||||
IosTextToSpeechAudioCategoryOptions.mixWithOthers,
|
||||
IosTextToSpeechAudioCategoryOptions.defaultToSpeaker,
|
||||
],
|
||||
IosTextToSpeechAudioMode.defaultMode,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
await _flutterTts.setLanguage("zh-CN");
|
||||
await _flutterTts.setPitch(1.0);
|
||||
await _flutterTts.setSpeechRate(0.5);
|
||||
|
||||
_isInitialized = true;
|
||||
debugPrint('✅ SystemTtsEngine initialized');
|
||||
} catch (e) {
|
||||
debugPrint('❌ SystemTtsEngine init error: $e');
|
||||
}
|
||||
}
|
||||
|
||||
@override
|
||||
Future<void> preconnect() async {
|
||||
// 系统 TTS 不需要预连接
|
||||
}
|
||||
|
||||
@override
|
||||
Future<void> speak(
|
||||
String text, {
|
||||
AiVoiceConfig? voiceConfig,
|
||||
}) async {
|
||||
if (!_isInitialized) await init();
|
||||
if (text.trim().isEmpty) return;
|
||||
|
||||
if (voiceConfig != null) {
|
||||
await _flutterTts.setSpeechRate(voiceConfig.speed.clamp(0.2, 2.0));
|
||||
await _flutterTts.setPitch(voiceConfig.pitch.clamp(0.5, 2.0));
|
||||
}
|
||||
|
||||
debugPrint('🗣️ SystemTtsEngine Speaking: $text');
|
||||
await _flutterTts.speak(text);
|
||||
}
|
||||
|
||||
@override
|
||||
Future<void> stop() async {
|
||||
await _flutterTts.stop();
|
||||
}
|
||||
|
||||
@override
|
||||
void setCompletionHandler(VoidCallback handler) {
|
||||
_flutterTts.setCompletionHandler(handler);
|
||||
}
|
||||
|
||||
@override
|
||||
void setStartHandler(VoidCallback handler) {
|
||||
_flutterTts.setStartHandler(handler);
|
||||
}
|
||||
|
||||
@override
|
||||
void setErrorHandler(Function(dynamic) handler) {
|
||||
_flutterTts.setErrorHandler(handler);
|
||||
}
|
||||
|
||||
@override
|
||||
void dispose() {
|
||||
_flutterTts.stop();
|
||||
}
|
||||
}
|
||||
37
wei_ai_app/lib/core/services/tts/tts_engine.dart
Normal file
37
wei_ai_app/lib/core/services/tts/tts_engine.dart
Normal file
@@ -0,0 +1,37 @@
|
||||
import 'package:flutter/foundation.dart';
|
||||
import '../../models/character_model.dart';
|
||||
|
||||
abstract class TtsEngine {
|
||||
Future<void> init();
|
||||
|
||||
/// 预连接(可选实现,用于提前建立连接)
|
||||
Future<void> preconnect() async {}
|
||||
|
||||
Future<void> speak(
|
||||
String text, {
|
||||
AiVoiceConfig? voiceConfig,
|
||||
});
|
||||
|
||||
Future<void> stop();
|
||||
|
||||
void setStartHandler(VoidCallback handler);
|
||||
|
||||
void setCompletionHandler(VoidCallback handler);
|
||||
|
||||
void setErrorHandler(Function(dynamic) handler);
|
||||
|
||||
void dispose();
|
||||
}
|
||||
|
||||
class TtsEngineException implements Exception {
|
||||
final String message;
|
||||
final bool isCancelled;
|
||||
|
||||
const TtsEngineException(this.message, {this.isCancelled = false});
|
||||
|
||||
factory TtsEngineException.cancelled() =>
|
||||
const TtsEngineException('cancelled', isCancelled: true);
|
||||
|
||||
@override
|
||||
String toString() => 'TtsEngineException($message)';
|
||||
}
|
||||
6
wei_ai_app/lib/core/services/tts/ws_client.dart
Normal file
6
wei_ai_app/lib/core/services/tts/ws_client.dart
Normal file
@@ -0,0 +1,6 @@
|
||||
import 'package:web_socket_channel/web_socket_channel.dart';
|
||||
import 'ws_client_io.dart' if (dart.library.html) 'ws_client_web.dart';
|
||||
|
||||
WebSocketChannel connectTtsSocket(Uri uri, Map<String, String> headers) {
|
||||
return createWebSocketChannel(uri, headers);
|
||||
}
|
||||
9
wei_ai_app/lib/core/services/tts/ws_client_io.dart
Normal file
9
wei_ai_app/lib/core/services/tts/ws_client_io.dart
Normal file
@@ -0,0 +1,9 @@
|
||||
import 'package:web_socket_channel/io.dart';
|
||||
import 'package:web_socket_channel/web_socket_channel.dart';
|
||||
|
||||
WebSocketChannel createWebSocketChannel(Uri uri, Map<String, String> headers) {
|
||||
return IOWebSocketChannel.connect(
|
||||
uri,
|
||||
headers: headers,
|
||||
);
|
||||
}
|
||||
6
wei_ai_app/lib/core/services/tts/ws_client_web.dart
Normal file
6
wei_ai_app/lib/core/services/tts/ws_client_web.dart
Normal file
@@ -0,0 +1,6 @@
|
||||
import 'package:web_socket_channel/web_socket_channel.dart';
|
||||
|
||||
WebSocketChannel createWebSocketChannel(Uri uri, Map<String, String> headers) {
|
||||
// WebSocket in browser doesn't support custom headers.
|
||||
return WebSocketChannel.connect(uri);
|
||||
}
|
||||
@@ -1,65 +1,115 @@
|
||||
import 'package:flutter_tts/flutter_tts.dart';
|
||||
import 'package:flutter/foundation.dart';
|
||||
import '../config/minimax_config.dart';
|
||||
import '../models/character_model.dart';
|
||||
import 'tts/minimax_tts_engine.dart';
|
||||
import 'tts/system_tts_engine.dart';
|
||||
import 'tts/tts_engine.dart';
|
||||
|
||||
class TTSService {
|
||||
static final TTSService _instance = TTSService._internal();
|
||||
factory TTSService() => _instance;
|
||||
TTSService._internal();
|
||||
|
||||
final FlutterTts _flutterTts = FlutterTts();
|
||||
final TtsEngine _minimaxEngine = MiniMaxTtsEngine();
|
||||
final TtsEngine _systemEngine = SystemTtsEngine();
|
||||
TtsEngine? _activeEngine;
|
||||
|
||||
bool _isInitialized = false;
|
||||
|
||||
VoidCallback? _onStart;
|
||||
VoidCallback? _onComplete;
|
||||
Function(dynamic)? _onError;
|
||||
|
||||
Future<void> init() async {
|
||||
if (_isInitialized) return;
|
||||
|
||||
try {
|
||||
if (!kIsWeb) {
|
||||
if (defaultTargetPlatform == TargetPlatform.iOS) {
|
||||
await _flutterTts.setSharedInstance(true);
|
||||
await _flutterTts.setIosAudioCategory(
|
||||
IosTextToSpeechAudioCategory.playAndRecord,
|
||||
[
|
||||
IosTextToSpeechAudioCategoryOptions.allowBluetooth,
|
||||
IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
|
||||
IosTextToSpeechAudioCategoryOptions.mixWithOthers,
|
||||
IosTextToSpeechAudioCategoryOptions.defaultToSpeaker
|
||||
],
|
||||
IosTextToSpeechAudioMode.defaultMode);
|
||||
}
|
||||
}
|
||||
|
||||
await _flutterTts.setLanguage("zh-CN"); // Default to Chinese
|
||||
await _flutterTts.setPitch(1.0);
|
||||
await _flutterTts.setSpeechRate(0.5); // Normal rate
|
||||
|
||||
_activeEngine = _selectEngine();
|
||||
_applyHandlers(_activeEngine);
|
||||
await _activeEngine!.init();
|
||||
_isInitialized = true;
|
||||
debugPrint('✅ TTSService initialized');
|
||||
debugPrint('✅ TTSService initialized (${_activeEngine.runtimeType})');
|
||||
} catch (e) {
|
||||
debugPrint('❌ TTSService init error: $e');
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> speak(String text) async {
|
||||
Future<void> speak(String text, {AiVoiceConfig? voiceConfig}) async {
|
||||
if (!_isInitialized) await init();
|
||||
if (text.isEmpty) return;
|
||||
|
||||
if (_activeEngine == null) {
|
||||
_activeEngine = _selectEngine();
|
||||
_applyHandlers(_activeEngine);
|
||||
await _activeEngine!.init();
|
||||
}
|
||||
|
||||
debugPrint('🗣️ TTS Speaking: $text');
|
||||
await _flutterTts.speak(text);
|
||||
if (_activeEngine is MiniMaxTtsEngine) {
|
||||
try {
|
||||
await _activeEngine!.speak(text, voiceConfig: voiceConfig);
|
||||
return;
|
||||
} catch (e) {
|
||||
debugPrint('⚠️ MiniMax TTS failed, falling back to system TTS: $e');
|
||||
await _fallbackSpeak(text, voiceConfig: voiceConfig);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
await _activeEngine!.speak(text, voiceConfig: voiceConfig);
|
||||
}
|
||||
|
||||
/// 预连接 TTS 服务(在语音页面打开时调用)
|
||||
Future<void> preconnect() async {
|
||||
if (!_isInitialized) await init();
|
||||
await _activeEngine?.preconnect();
|
||||
}
|
||||
|
||||
Future<void> stop() async {
|
||||
await _flutterTts.stop();
|
||||
await _activeEngine?.stop();
|
||||
}
|
||||
|
||||
void setCompletionHandler(VoidCallback handler) {
|
||||
_flutterTts.setCompletionHandler(handler);
|
||||
_onComplete = handler;
|
||||
_minimaxEngine.setCompletionHandler(handler);
|
||||
_systemEngine.setCompletionHandler(handler);
|
||||
}
|
||||
|
||||
void setStartHandler(VoidCallback handler) {
|
||||
_flutterTts.setStartHandler(handler);
|
||||
_onStart = handler;
|
||||
_minimaxEngine.setStartHandler(handler);
|
||||
_systemEngine.setStartHandler(handler);
|
||||
}
|
||||
|
||||
void setErrorHandler(Function(dynamic) handler) {
|
||||
_flutterTts.setErrorHandler(handler);
|
||||
_onError = handler;
|
||||
_minimaxEngine.setErrorHandler(handler);
|
||||
_systemEngine.setErrorHandler(handler);
|
||||
}
|
||||
|
||||
TtsEngine _selectEngine() {
|
||||
if (MinimaxConfig.isEnabled && !kIsWeb) {
|
||||
return _minimaxEngine;
|
||||
}
|
||||
return _systemEngine;
|
||||
}
|
||||
|
||||
Future<void> _fallbackSpeak(String text, {AiVoiceConfig? voiceConfig}) async {
|
||||
try {
|
||||
_activeEngine = _systemEngine;
|
||||
_applyHandlers(_activeEngine);
|
||||
await _activeEngine!.init();
|
||||
await _activeEngine!.speak(text, voiceConfig: voiceConfig);
|
||||
} catch (e) {
|
||||
debugPrint('❌ System TTS failed: $e');
|
||||
if (_onError != null) _onError!(e);
|
||||
}
|
||||
}
|
||||
|
||||
void _applyHandlers(TtsEngine? engine) {
|
||||
if (engine == null) return;
|
||||
if (_onStart != null) engine.setStartHandler(_onStart!);
|
||||
if (_onComplete != null) engine.setCompletionHandler(_onComplete!);
|
||||
if (_onError != null) engine.setErrorHandler(_onError!);
|
||||
}
|
||||
}
|
||||
|
||||
258
wei_ai_app/lib/core/services/vad_stt_service.dart
Normal file
258
wei_ai_app/lib/core/services/vad_stt_service.dart
Normal file
@@ -0,0 +1,258 @@
|
||||
import 'dart:convert';
|
||||
import 'package:flutter/foundation.dart';
|
||||
import 'package:http/http.dart' as http;
|
||||
import 'package:vad/vad.dart';
|
||||
|
||||
/// VAD (Voice Activity Detection) + Google Speech-to-Text 服务
|
||||
///
|
||||
/// 使用 Silero VAD 模型检测语音开始/结束
|
||||
/// 然后将录制的音频发送给 Google Cloud Speech-to-Text API
|
||||
class VadSttService {
|
||||
static final VadSttService _instance = VadSttService._internal();
|
||||
factory VadSttService() => _instance;
|
||||
VadSttService._internal();
|
||||
|
||||
VadHandler? _vadHandler;
|
||||
bool _isInitialized = false;
|
||||
bool _isListening = false;
|
||||
|
||||
// Google Cloud STT 配置
|
||||
// TODO: 替换为你的 API Key
|
||||
static const String _googleApiKey = 'AIzaSyD7Dg_Goc5Z9c5LzjTCnhCxLuwCVQz89bk';
|
||||
static const String _googleSttUrl = 'https://speech.googleapis.com/v1/speech:recognize';
|
||||
|
||||
// 回调
|
||||
Function(String text)? _onResult;
|
||||
Function(String text)? _onFinalResult;
|
||||
VoidCallback? _onSpeechStart;
|
||||
VoidCallback? _onSpeechEnd;
|
||||
|
||||
// 统计
|
||||
int _speechSegmentCount = 0;
|
||||
final Stopwatch _speechStopwatch = Stopwatch();
|
||||
|
||||
bool get isListening => _isListening;
|
||||
|
||||
Future<bool> init() async {
|
||||
if (_isInitialized) return true;
|
||||
|
||||
try {
|
||||
_vadHandler = VadHandler.create(isDebug: true);
|
||||
|
||||
// 设置事件监听
|
||||
_setupEventHandlers();
|
||||
|
||||
_isInitialized = true;
|
||||
debugPrint('✅ VadSttService initialized (Silero VAD + Google STT)');
|
||||
return true;
|
||||
} catch (e) {
|
||||
debugPrint('❌ VadSttService init failed: $e');
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void _setupEventHandlers() {
|
||||
if (_vadHandler == null) return;
|
||||
|
||||
// 检测到开始说话(可能是误触发)
|
||||
_vadHandler!.onSpeechStart.listen((_) {
|
||||
debugPrint('🎤 [VAD] 检测到声音...');
|
||||
_speechStopwatch.reset();
|
||||
_speechStopwatch.start();
|
||||
});
|
||||
|
||||
// 确认是真正说话(不是噪音误触发)
|
||||
_vadHandler!.onRealSpeechStart.listen((_) {
|
||||
_speechSegmentCount++;
|
||||
debugPrint('');
|
||||
debugPrint('═══════════════════════════════════════');
|
||||
debugPrint('🎤 [VAD] 语音段 #$_speechSegmentCount 开始');
|
||||
debugPrint('═══════════════════════════════════════');
|
||||
|
||||
if (_onSpeechStart != null) _onSpeechStart!();
|
||||
});
|
||||
|
||||
// 说话结束,获取音频数据
|
||||
_vadHandler!.onSpeechEnd.listen((List<double> samples) async {
|
||||
_speechStopwatch.stop();
|
||||
final durationMs = _speechStopwatch.elapsedMilliseconds;
|
||||
|
||||
final sampleCount = samples.length;
|
||||
final estimatedBytes = sampleCount * 2; // 16-bit = 2 bytes per sample
|
||||
|
||||
debugPrint('');
|
||||
debugPrint('═══════════════════════════════════════');
|
||||
debugPrint('🎤 [VAD] 语音段 #$_speechSegmentCount 结束');
|
||||
debugPrint(' 时长: ${durationMs}ms');
|
||||
debugPrint(' 采样点: $sampleCount');
|
||||
debugPrint(' 数据大小: ~${(estimatedBytes / 1024).toStringAsFixed(1)} KB');
|
||||
debugPrint('═══════════════════════════════════════');
|
||||
|
||||
if (_onSpeechEnd != null) _onSpeechEnd!();
|
||||
|
||||
// 太短的语音忽略
|
||||
if (durationMs < 500) {
|
||||
debugPrint('⚠️ 语音太短,忽略');
|
||||
return;
|
||||
}
|
||||
|
||||
// 发送给 Google STT
|
||||
await _transcribeWithGoogle(samples, durationMs);
|
||||
});
|
||||
|
||||
// 误触发(检测到声音但不是有效语音)
|
||||
_vadHandler!.onVADMisfire.listen((_) {
|
||||
_speechStopwatch.stop();
|
||||
debugPrint('⚠️ [VAD] 误触发(不是有效语音),忽略');
|
||||
});
|
||||
|
||||
// 错误
|
||||
_vadHandler!.onError.listen((String message) {
|
||||
debugPrint('❌ [VAD] 错误: $message');
|
||||
});
|
||||
}
|
||||
|
||||
/// 将 double 采样点转换为 16-bit PCM 字节
|
||||
Uint8List _convertSamplesToBytes(List<double> samples) {
|
||||
final bytes = Uint8List(samples.length * 2);
|
||||
for (int i = 0; i < samples.length; i++) {
|
||||
// 将 -1.0 ~ 1.0 转换为 -32768 ~ 32767
|
||||
int sample = (samples[i] * 32767).clamp(-32768, 32767).toInt();
|
||||
// Little endian
|
||||
bytes[i * 2] = sample & 0xFF;
|
||||
bytes[i * 2 + 1] = (sample >> 8) & 0xFF;
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/// 发送给 Google Speech-to-Text API
|
||||
Future<void> _transcribeWithGoogle(List<double> samples, int durationMs) async {
|
||||
if (_googleApiKey == 'YOUR_GOOGLE_API_KEY') {
|
||||
debugPrint('⚠️ [Google STT] 请先配置 API Key!');
|
||||
debugPrint('📝 [模拟结果] 语音时长 ${durationMs}ms, 采样点 ${samples.length}');
|
||||
|
||||
// 模拟返回结果
|
||||
if (_onFinalResult != null) {
|
||||
_onFinalResult!('[模拟: ${durationMs}ms 语音]');
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
debugPrint('📤 [Google STT] 发送音频数据...');
|
||||
|
||||
try {
|
||||
// 转换为 PCM 字节
|
||||
final audioBytes = _convertSamplesToBytes(samples);
|
||||
final audioBase64 = base64Encode(audioBytes);
|
||||
|
||||
debugPrint(' 音频大小: ${(audioBytes.length / 1024).toStringAsFixed(1)} KB');
|
||||
|
||||
// 构建请求
|
||||
final requestBody = {
|
||||
'config': {
|
||||
'encoding': 'LINEAR16',
|
||||
'sampleRateHertz': 16000, // VAD 默认采样率
|
||||
'languageCode': 'zh-CN', // 中文
|
||||
'enableAutomaticPunctuation': true,
|
||||
// 不指定 model,使用默认模型
|
||||
},
|
||||
'audio': {
|
||||
'content': audioBase64,
|
||||
},
|
||||
};
|
||||
|
||||
final stopwatch = Stopwatch()..start();
|
||||
|
||||
final response = await http.post(
|
||||
Uri.parse('$_googleSttUrl?key=$_googleApiKey'),
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: jsonEncode(requestBody),
|
||||
);
|
||||
|
||||
stopwatch.stop();
|
||||
debugPrint(' 响应时间: ${stopwatch.elapsedMilliseconds}ms');
|
||||
|
||||
if (response.statusCode == 200) {
|
||||
final result = jsonDecode(response.body);
|
||||
|
||||
if (result['results'] != null && (result['results'] as List).isNotEmpty) {
|
||||
final transcript = result['results'][0]['alternatives'][0]['transcript'] as String;
|
||||
final confidence = result['results'][0]['alternatives'][0]['confidence'] ?? 0.0;
|
||||
|
||||
debugPrint('');
|
||||
debugPrint('═══════════════════════════════════════');
|
||||
debugPrint('📝 [Google STT] 识别结果:');
|
||||
debugPrint(' "$transcript"');
|
||||
debugPrint(' 置信度: ${(confidence * 100).toStringAsFixed(1)}%');
|
||||
debugPrint('═══════════════════════════════════════');
|
||||
|
||||
if (_onFinalResult != null) {
|
||||
_onFinalResult!(transcript);
|
||||
}
|
||||
} else {
|
||||
debugPrint('⚠️ [Google STT] 没有识别到文字');
|
||||
}
|
||||
} else {
|
||||
debugPrint('❌ [Google STT] 请求失败: ${response.statusCode}');
|
||||
debugPrint(' ${response.body}');
|
||||
}
|
||||
} catch (e) {
|
||||
debugPrint('❌ [Google STT] 错误: $e');
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> startListening({
|
||||
Function(String text)? onResult,
|
||||
Function(String text)? onFinalResult,
|
||||
VoidCallback? onSpeechStart,
|
||||
VoidCallback? onSpeechEnd,
|
||||
}) async {
|
||||
if (!_isInitialized) {
|
||||
bool success = await init();
|
||||
if (!success) return;
|
||||
}
|
||||
|
||||
if (_isListening) {
|
||||
debugPrint('⚠️ [VAD] 已经在监听中');
|
||||
return;
|
||||
}
|
||||
|
||||
_onResult = onResult;
|
||||
_onFinalResult = onFinalResult;
|
||||
_onSpeechStart = onSpeechStart;
|
||||
_onSpeechEnd = onSpeechEnd;
|
||||
_speechSegmentCount = 0;
|
||||
|
||||
debugPrint('');
|
||||
debugPrint('🎤 [VAD] 开始监听...');
|
||||
|
||||
try {
|
||||
await _vadHandler?.startListening();
|
||||
_isListening = true;
|
||||
debugPrint('✅ [VAD] 监听已启动,等待语音输入...');
|
||||
} catch (e) {
|
||||
debugPrint('❌ [VAD] 启动监听失败: $e');
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> stopListening() async {
|
||||
if (!_isListening) return;
|
||||
|
||||
debugPrint('🛑 [VAD] 停止监听');
|
||||
|
||||
try {
|
||||
await _vadHandler?.stopListening();
|
||||
_isListening = false;
|
||||
} catch (e) {
|
||||
debugPrint('❌ [VAD] 停止监听失败: $e');
|
||||
}
|
||||
}
|
||||
|
||||
void dispose() {
|
||||
_vadHandler?.dispose();
|
||||
_vadHandler = null;
|
||||
_isInitialized = false;
|
||||
_isListening = false;
|
||||
debugPrint('🗑️ VadSttService disposed');
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
import 'dart:async';
|
||||
import 'package:flutter/foundation.dart';
|
||||
import '../../core/core.dart';
|
||||
import '../../core/services/vad_stt_service.dart';
|
||||
import 'package:permission_handler/permission_handler.dart';
|
||||
|
||||
enum VoiceState {
|
||||
@@ -25,8 +26,8 @@ class VoiceSessionController extends ChangeNotifier {
|
||||
String _aiTypingText = '';
|
||||
bool _isMicMuted = false;
|
||||
|
||||
// Services
|
||||
final STTService _stt = STTService();
|
||||
// Services - 使用 VAD 替代系统 STT
|
||||
final VadSttService _vad = VadSttService();
|
||||
final TTSService _tts = TTSService();
|
||||
|
||||
// State getters
|
||||
@@ -35,9 +36,7 @@ class VoiceSessionController extends ChangeNotifier {
|
||||
String get aiTypingText => _aiTypingText;
|
||||
bool get isMicMuted => _isMicMuted;
|
||||
|
||||
// Buffer for sentence completion
|
||||
String _sentenceBuffer = '';
|
||||
final List<String> _punctuation = ['。', '?', '!', '.', '?', '!', '\n'];
|
||||
|
||||
|
||||
VoiceSessionController({
|
||||
required this.character,
|
||||
@@ -52,13 +51,15 @@ class VoiceSessionController extends ChangeNotifier {
|
||||
await [Permission.microphone, Permission.speech].request();
|
||||
|
||||
// Init services
|
||||
await _stt.init();
|
||||
await _vad.init();
|
||||
await _tts.init();
|
||||
|
||||
// 预连接 TTS WebSocket(减少首次 TTS 延迟)
|
||||
_tts.preconnect();
|
||||
|
||||
// Setup TTS callbacks
|
||||
_tts.setStartHandler(() {
|
||||
debugPrint('🔊 TTS Started');
|
||||
// Already paused STT in _processSpeakQueue
|
||||
});
|
||||
|
||||
_tts.setCompletionHandler(() {
|
||||
@@ -96,7 +97,7 @@ class VoiceSessionController extends ChangeNotifier {
|
||||
|
||||
_state = VoiceState.listening;
|
||||
_recognizedText = '';
|
||||
_lastProcessedLength = 0;
|
||||
|
||||
notifyListeners();
|
||||
|
||||
// Stop TTS if it's playing (Interruption)
|
||||
@@ -106,7 +107,18 @@ class VoiceSessionController extends ChangeNotifier {
|
||||
_isSpeaking = false;
|
||||
}
|
||||
|
||||
await _stt.listen(
|
||||
// 使用 VAD 监听
|
||||
await _vad.startListening(
|
||||
onSpeechStart: () {
|
||||
// 用户开始说话
|
||||
_recognizedText = 'Listening...';
|
||||
notifyListeners();
|
||||
},
|
||||
onSpeechEnd: () {
|
||||
// 用户说完了,等待 STT 处理
|
||||
_recognizedText = 'Processing...';
|
||||
notifyListeners();
|
||||
},
|
||||
onResult: (text) {
|
||||
_recognizedText = text;
|
||||
notifyListeners();
|
||||
@@ -114,14 +126,14 @@ class VoiceSessionController extends ChangeNotifier {
|
||||
onFinalResult: (text) {
|
||||
_recognizedText = text;
|
||||
notifyListeners();
|
||||
// 发送给 LLM 处理
|
||||
_processUserMessage(text);
|
||||
},
|
||||
localeId: 'zh-CN', // Make dynamic later if needed
|
||||
);
|
||||
}
|
||||
|
||||
Future<void> stopListening() async {
|
||||
await _stt.stop();
|
||||
await _vad.stopListening();
|
||||
}
|
||||
|
||||
Future<void> _processUserMessage(String text) async {
|
||||
@@ -131,6 +143,8 @@ class VoiceSessionController extends ChangeNotifier {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
_state = VoiceState.processing;
|
||||
onUserMessage(text); // Notify UI to show user message
|
||||
notifyListeners();
|
||||
@@ -156,37 +170,31 @@ class VoiceSessionController extends ChangeNotifier {
|
||||
}
|
||||
|
||||
_aiTypingText = '';
|
||||
_sentenceBuffer = '';
|
||||
_lastProcessedLength = 0;
|
||||
|
||||
try {
|
||||
final fullResponse = await ChatService.sendMessage(
|
||||
character: character,
|
||||
messages: messages,
|
||||
userMessage: text, // ChatService handles appending this if we use the right method
|
||||
userMessage: text,
|
||||
onStream: (content) {
|
||||
_aiTypingText = content;
|
||||
_processStreamChunk(content);
|
||||
notifyListeners();
|
||||
},
|
||||
);
|
||||
|
||||
// Process any remaining text in buffer
|
||||
if (_sentenceBuffer.isNotEmpty) {
|
||||
// Interaction finished, save AI message
|
||||
final aiMsg = ChatMessage.assistant(fullResponse);
|
||||
onAiMessage(aiMsg);
|
||||
|
||||
// Filter emojis and speak full text
|
||||
final textToSpeak = _filterEmojis(fullResponse);
|
||||
if (textToSpeak.isNotEmpty) {
|
||||
if (_state != VoiceState.speaking) {
|
||||
_state = VoiceState.speaking;
|
||||
notifyListeners();
|
||||
}
|
||||
await _speak(_sentenceBuffer);
|
||||
await _speak(textToSpeak);
|
||||
}
|
||||
|
||||
// Interaction finished, save AI message
|
||||
final aiMsg = ChatMessage.assistant(fullResponse);
|
||||
onAiMessage(aiMsg);
|
||||
|
||||
// Note: We do NOT immediately startListening here.
|
||||
// We rely on the TTS Completion Handler to trigger startListening
|
||||
// when the entire queue is drained.
|
||||
|
||||
} catch (e) {
|
||||
debugPrint('❌ Voice Process Error: $e');
|
||||
@@ -197,65 +205,12 @@ class VoiceSessionController extends ChangeNotifier {
|
||||
}
|
||||
}
|
||||
|
||||
// Better implementation needs to handle state to avoid infinite loops
|
||||
int _lastProcessedLength = 0;
|
||||
|
||||
void _processStreamChunk(String content) {
|
||||
if (_state != VoiceState.speaking) {
|
||||
_state = VoiceState.speaking;
|
||||
notifyListeners();
|
||||
}
|
||||
|
||||
// Calculate delta (new content only)
|
||||
if (content.length <= _lastProcessedLength) return;
|
||||
|
||||
String delta = content.substring(_lastProcessedLength);
|
||||
_lastProcessedLength = content.length;
|
||||
_sentenceBuffer += delta;
|
||||
|
||||
// Check for punctuation to split sentences
|
||||
bool foundPunctuation = false;
|
||||
for (var p in _punctuation) {
|
||||
if (_sentenceBuffer.contains(p)) {
|
||||
foundPunctuation = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (foundPunctuation) {
|
||||
_processBufferForSentences();
|
||||
}
|
||||
}
|
||||
|
||||
void _processBufferForSentences() {
|
||||
String tempBuffer = _sentenceBuffer;
|
||||
String keepBuffer = '';
|
||||
|
||||
// Simple tokenizer: split by punctuation but keep the punctuation attached to the sentence
|
||||
// This is a naive implementation.
|
||||
// "Hello! How are you?" -> ["Hello!", "How are you?"]
|
||||
|
||||
// We iterate through chars to find split points
|
||||
int lastSplitIndex = 0;
|
||||
|
||||
for (int i = 0; i < tempBuffer.length; i++) {
|
||||
String char = tempBuffer[i];
|
||||
if (_punctuation.contains(char)) {
|
||||
// Found end of a sentence
|
||||
String sentence = tempBuffer.substring(lastSplitIndex, i + 1);
|
||||
if (sentence.trim().isNotEmpty) {
|
||||
_speak(sentence);
|
||||
}
|
||||
lastSplitIndex = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Keep the remaining part that didn't end with punctuation
|
||||
if (lastSplitIndex < tempBuffer.length) {
|
||||
keepBuffer = tempBuffer.substring(lastSplitIndex);
|
||||
}
|
||||
|
||||
_sentenceBuffer = keepBuffer;
|
||||
String _filterEmojis(String text) {
|
||||
// Regex matches common emoji ranges
|
||||
final RegExp emojiRegex = RegExp(
|
||||
r'(\u00a9|\u00ae|[\u2000-\u3300]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff])'
|
||||
);
|
||||
return text.replaceAll(emojiRegex, '').trim();
|
||||
}
|
||||
|
||||
Future<void> _speak(String text) async {
|
||||
@@ -275,9 +230,10 @@ class VoiceSessionController extends ChangeNotifier {
|
||||
debugPrint('🎤 Queue empty, resuming listening...');
|
||||
_state = VoiceState.listening;
|
||||
notifyListeners();
|
||||
// Debounce STT restart to avoid rapid stop/start deadlocks
|
||||
// 延迟启动 STT,让音频会话有时间从播放切换到录音
|
||||
_silenceTimer?.cancel();
|
||||
_silenceTimer = Timer(const Duration(milliseconds: 250), () {
|
||||
_silenceTimer = Timer(const Duration(milliseconds: 800), () {
|
||||
debugPrint('🎤 延迟后启动 STT...');
|
||||
startListening();
|
||||
});
|
||||
}
|
||||
@@ -291,12 +247,12 @@ class VoiceSessionController extends ChangeNotifier {
|
||||
// Ensure STT is paused while speaking
|
||||
await stopListening();
|
||||
|
||||
await _tts.speak(text);
|
||||
await _tts.speak(text, voiceConfig: character.aiVoiceConfig);
|
||||
}
|
||||
|
||||
@override
|
||||
void dispose() {
|
||||
_stt.stop();
|
||||
_vad.stopListening();
|
||||
_tts.stop();
|
||||
super.dispose();
|
||||
}
|
||||
|
||||
@@ -6,16 +6,24 @@ import FlutterMacOS
|
||||
import Foundation
|
||||
|
||||
import app_links
|
||||
import audio_session
|
||||
import flutter_pcm_sound
|
||||
import flutter_tts
|
||||
import just_audio
|
||||
import path_provider_foundation
|
||||
import record_macos
|
||||
import shared_preferences_foundation
|
||||
import speech_to_text
|
||||
import url_launcher_macos
|
||||
|
||||
func RegisterGeneratedPlugins(registry: FlutterPluginRegistry) {
|
||||
AppLinksMacosPlugin.register(with: registry.registrar(forPlugin: "AppLinksMacosPlugin"))
|
||||
AudioSessionPlugin.register(with: registry.registrar(forPlugin: "AudioSessionPlugin"))
|
||||
FlutterPcmSoundPlugin.register(with: registry.registrar(forPlugin: "FlutterPcmSoundPlugin"))
|
||||
FlutterTtsPlugin.register(with: registry.registrar(forPlugin: "FlutterTtsPlugin"))
|
||||
JustAudioPlugin.register(with: registry.registrar(forPlugin: "JustAudioPlugin"))
|
||||
PathProviderPlugin.register(with: registry.registrar(forPlugin: "PathProviderPlugin"))
|
||||
RecordMacOsPlugin.register(with: registry.registrar(forPlugin: "RecordMacOsPlugin"))
|
||||
SharedPreferencesPlugin.register(with: registry.registrar(forPlugin: "SharedPreferencesPlugin"))
|
||||
SpeechToTextPlugin.register(with: registry.registrar(forPlugin: "SpeechToTextPlugin"))
|
||||
UrlLauncherPlugin.register(with: registry.registrar(forPlugin: "UrlLauncherPlugin"))
|
||||
|
||||
@@ -73,6 +73,14 @@ packages:
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.13.0"
|
||||
audio_session:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: audio_session
|
||||
sha256: "8f96a7fecbb718cb093070f868b4cdcb8a9b1053dce342ff8ab2fde10eb9afb7"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.2.2"
|
||||
boolean_selector:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -230,6 +238,14 @@ packages:
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "6.0.0"
|
||||
flutter_pcm_sound:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: flutter_pcm_sound
|
||||
sha256: "15c6894da8195122001375084d51449bd77849579c93fca2800c00b615699dc0"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "3.3.3"
|
||||
flutter_riverpod:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
@@ -368,6 +384,30 @@ packages:
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "4.10.0"
|
||||
just_audio:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: just_audio
|
||||
sha256: "9694e4734f515f2a052493d1d7e0d6de219ee0427c7c29492e246ff32a219908"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.10.5"
|
||||
just_audio_platform_interface:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: just_audio_platform_interface
|
||||
sha256: "2532c8d6702528824445921c5ff10548b518b13f808c2e34c2fd54793b999a6a"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "4.6.0"
|
||||
just_audio_web:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: just_audio_web
|
||||
sha256: "6ba8a2a7e87d57d32f0f7b42856ade3d6a9fbe0f1a11fabae0a4f00bb73f0663"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.4.16"
|
||||
jwt_decode:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -640,6 +680,70 @@ packages:
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.7.0"
|
||||
record:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: record
|
||||
sha256: d5b6b334f3ab02460db6544e08583c942dbf23e3504bf1e14fd4cbe3d9409277
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "6.2.0"
|
||||
record_android:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: record_android
|
||||
sha256: "3bb3c6abbcb5fc1e86719fc6f0acdee89dfe8078543b92caad11854c487e435a"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.5.0"
|
||||
record_ios:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: record_ios
|
||||
sha256: "8df7c136131bd05efc19256af29b2ba6ccc000ccc2c80d4b6b6d7a8d21a3b5a9"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.2.0"
|
||||
record_linux:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: record_linux
|
||||
sha256: c31a35cc158cd666fc6395f7f56fc054f31685571684be6b97670a27649ce5c7
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.3.0"
|
||||
record_macos:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: record_macos
|
||||
sha256: f04d1547ff61ae54b4154e9726f656a17ad993f1a90f8f44bc40de94bafa072f
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.2.0"
|
||||
record_platform_interface:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: record_platform_interface
|
||||
sha256: "8a81dbc4e14e1272a285bbfef6c9136d070a47d9b0d1f40aa6193516253ee2f6"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.5.0"
|
||||
record_web:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: record_web
|
||||
sha256: "7e9846981c1f2d111d86f0ae3309071f5bba8b624d1c977316706f08fc31d16d"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.3.0"
|
||||
record_windows:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: record_windows
|
||||
sha256: "223258060a1d25c62bae18282c16783f28581ec19401d17e56b5205b9f039d78"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.0.7"
|
||||
retry:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -861,6 +965,14 @@ packages:
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.12.0"
|
||||
synchronized:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: synchronized
|
||||
sha256: c254ade258ec8282947a0acbbc90b9575b4f19673533ee46f2f6e9b3aeefd7c0
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "3.4.0"
|
||||
term_glyph:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -965,6 +1077,22 @@ packages:
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "3.1.5"
|
||||
uuid:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: uuid
|
||||
sha256: a11b666489b1954e01d992f3d601b1804a33937b5a8fe677bd26b8a9f96f96e8
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "4.5.2"
|
||||
vad:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: vad
|
||||
sha256: ef6c8b12c5af7a6a519ff5684f074b8a2ac00c434705f544af379ea77bccd258
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.0.7+1"
|
||||
vector_math:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -1006,7 +1134,7 @@ packages:
|
||||
source: hosted
|
||||
version: "1.0.1"
|
||||
web_socket_channel:
|
||||
dependency: transitive
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: web_socket_channel
|
||||
sha256: d645757fb0f4773d602444000a8131ff5d48c9e47adfe9772652dd1a4f2d45c8
|
||||
|
||||
@@ -46,6 +46,10 @@ dependencies:
|
||||
speech_to_text: ^7.3.0
|
||||
flutter_tts: ^4.2.5
|
||||
permission_handler: ^12.0.1
|
||||
just_audio: ^0.10.3
|
||||
web_socket_channel: ^3.0.1
|
||||
flutter_pcm_sound: ^3.3.3
|
||||
vad: ^0.0.7+1
|
||||
|
||||
dev_dependencies:
|
||||
flutter_test:
|
||||
|
||||
85
wei_ai_app/优化.md
Normal file
85
wei_ai_app/优化.md
Normal file
@@ -0,0 +1,85 @@
|
||||
我已经详细阅读了整个 interaction
|
||||
模块和相关服务代码,以下是完整的架构分析和优化建议:
|
||||
|
||||
---
|
||||
架构概览
|
||||
|
||||
screens/interaction/
|
||||
├── interaction_screen.dart # 文字聊天 UI
|
||||
├── voice_mode_overlay.dart # 语音通话 UI
|
||||
└── voice_session_controller.dart # 语音状态编排
|
||||
|
||||
core/services/
|
||||
├── chat_service.dart # LLM API (SSE 流式)
|
||||
├── chat_storage_service.dart # SharedPreferences 持久化
|
||||
├── stt_service.dart # 系统 STT (已废弃)
|
||||
├── vad_stt_service.dart # Silero VAD + Google Cloud STT
|
||||
├── tts_service.dart # TTS 门面 + 引擎选择
|
||||
└── tts/
|
||||
├── tts_engine.dart # 抽象接口
|
||||
├── minimax_tts_engine.dart # MiniMax WebSocket 流式 TTS
|
||||
├── system_tts_engine.dart # 系统 TTS 兜底
|
||||
└── ws_client_*.dart # 跨平台 WebSocket
|
||||
|
||||
整体评价: 服务分层清晰,TTS 的抽象接口设计不错,语音对话的"句子级 TTS +
|
||||
预连接"体验优化做得挺好。但有几个明显需要处理的问题:
|
||||
|
||||
---
|
||||
需要优化的问题 (按优先级)
|
||||
|
||||
P0 - 安全问题
|
||||
|
||||
1. API Key 硬编码 — minimax_config.dart 中 MiniMax JWT 和 vad_stt_service.dart
|
||||
中 Google Cloud STT API Key 直接写在代码里。APK 反编译即可提取,存在严重的
|
||||
Key 被盗刷风险。
|
||||
- 建议: 所有第三方 API 调用走后端代理,客户端不存放任何 Key
|
||||
|
||||
P1 - 潜在 Bug
|
||||
|
||||
2. 消息重复保存 — InteractionScreen._sendMessage() 会保存消息到
|
||||
storage,语音流程中 VoiceSessionController._processUserMessage()
|
||||
也通过回调保存。如果两个流程不小心交叉,可能产生重复消息。
|
||||
- 建议: 统一消息保存入口,只在一处写入
|
||||
3. 句子缓冲丢失 — _sentenceBuffer
|
||||
在流式结束或网络中断时,未说完的文本会丢失(TTS 不完整但文字显示完整)。
|
||||
- 建议: 流结束时 flush 残留 buffer,增加超时机制(如 2s 无标点则强制切句)
|
||||
4. 语音打断时资源泄漏 — 用户在 TTS 播放中关闭语音界面,_speakCompleter
|
||||
可能不会被 complete,定时器可能继续运行。
|
||||
- 建议: dispose() 中强制 complete 所有 Completer,取消所有 Timer
|
||||
5. WebSocket 连接泄漏 — 退出语音模式后 TTS WebSocket
|
||||
连接没有显式关闭,可能造成连接泄漏。
|
||||
- 建议: 退出语音模式时显式调用 disconnect
|
||||
|
||||
P2 - 架构优化
|
||||
|
||||
6. 状态管理不统一 — InteractionScreen 用 Riverpod + setState
|
||||
混合,VoiceSessionController 用 ChangeNotifier。风格不一致,维护成本高。
|
||||
- 建议: 统一用 Riverpod,VoiceSessionController 迁移为 StateNotifierProvider
|
||||
7. 服务紧耦合 — VoiceSessionController 直接 new VadSttService() /
|
||||
TTSService(),无法做单元测试。
|
||||
- 建议: 通过依赖注入 (Riverpod Provider) 注入服务实例
|
||||
8. 存储性能 — SharedPreferences 每次写入都序列化整个 session
|
||||
JSON,消息多了会卡。
|
||||
- 建议: 考虑换用 Isar/Hive 等本地数据库,支持增量写入和分页加载
|
||||
|
||||
P3 - 体验优化
|
||||
|
||||
9. 无重试机制 — Google STT API
|
||||
调用失败时没有重试逻辑,网络抖动直接导致语音识别失败。
|
||||
- 建议: 增加 1-2 次重试,带指数退避
|
||||
10. 未完成功能 — VoiceModeOverlay 的扬声器切换按钮是空实现 (onPressed: ()
|
||||
{}),波形动画是固定的不跟实际音频挂钩。
|
||||
11. 流式响应类型安全 — ChatService._sendStreamRequest() 中 JSON 解析用了
|
||||
dynamic 类型,API 返回异常格式时可能 crash。
|
||||
|
||||
---
|
||||
做得好的地方
|
||||
|
||||
- TTS 引擎抽象 + MiniMax/System 自动降级兜底
|
||||
- WebSocket 预连接 降低首次语音响应延迟
|
||||
- 句子级流式 TTS 而非等全文,对话体感自然
|
||||
- 30s keep-alive ping 保持连接复用
|
||||
- LLM 流式渲染 + 打字动画,用户反馈及时
|
||||
|
||||
---
|
||||
要我针对某个具体问题开始修复吗?比如先处理 P1 的 bug 或 P2 的架构重构?
|
||||
Reference in New Issue
Block a user