From 8f19377517110da4450efb6038c4d866931be6ea Mon Sep 17 00:00:00 2001 From: liqupan Date: Tue, 3 Feb 2026 21:41:25 +0800 Subject: [PATCH] feat: mvp viode --- .../android/app/src/main/AndroidManifest.xml | 1 + wei_ai_app/ios/Podfile.lock | 30 ++ .../ios/Runner.xcodeproj/project.pbxproj | 18 ++ wei_ai_app/ios/Runner/Info.plist | 4 + wei_ai_app/lib/core/services/services.dart | 2 + wei_ai_app/lib/core/services/stt_service.dart | 73 +++++ wei_ai_app/lib/core/services/tts_service.dart | 65 ++++ .../interaction/interaction_screen.dart | 46 ++- .../interaction/voice_mode_overlay.dart | 87 +++-- .../interaction/voice_session_controller.dart | 303 ++++++++++++++++++ .../Flutter/GeneratedPluginRegistrant.swift | 4 + wei_ai_app/pubspec.lock | 96 ++++++ wei_ai_app/pubspec.yaml | 3 + 13 files changed, 701 insertions(+), 31 deletions(-) create mode 100644 wei_ai_app/lib/core/services/stt_service.dart create mode 100644 wei_ai_app/lib/core/services/tts_service.dart create mode 100644 wei_ai_app/lib/screens/interaction/voice_session_controller.dart diff --git a/wei_ai_app/android/app/src/main/AndroidManifest.xml b/wei_ai_app/android/app/src/main/AndroidManifest.xml index 9c4967a..9d3c33e 100644 --- a/wei_ai_app/android/app/src/main/AndroidManifest.xml +++ b/wei_ai_app/android/app/src/main/AndroidManifest.xml @@ -1,6 +1,7 @@ + 2.2.1) + - CwlCatchExceptionSupport (2.2.1) - Flutter (1.0.0) + - flutter_tts (0.0.1): + - Flutter - path_provider_foundation (0.0.1): - Flutter - FlutterMacOS + - permission_handler_apple (9.3.0): + - Flutter - shared_preferences_foundation (0.0.1): - Flutter - FlutterMacOS + - speech_to_text (7.2.0): + - CwlCatchException + - Flutter + - FlutterMacOS - url_launcher_ios (0.0.1): - Flutter DEPENDENCIES: - app_links (from `.symlinks/plugins/app_links/ios`) - Flutter (from `Flutter`) + - flutter_tts (from `.symlinks/plugins/flutter_tts/ios`) - path_provider_foundation (from `.symlinks/plugins/path_provider_foundation/darwin`) + - permission_handler_apple (from `.symlinks/plugins/permission_handler_apple/ios`) - shared_preferences_foundation (from `.symlinks/plugins/shared_preferences_foundation/darwin`) + - speech_to_text (from `.symlinks/plugins/speech_to_text/darwin`) - url_launcher_ios (from `.symlinks/plugins/url_launcher_ios/ios`) +SPEC REPOS: + trunk: + - CwlCatchException + - CwlCatchExceptionSupport + EXTERNAL SOURCES: app_links: :path: ".symlinks/plugins/app_links/ios" Flutter: :path: Flutter + flutter_tts: + :path: ".symlinks/plugins/flutter_tts/ios" path_provider_foundation: :path: ".symlinks/plugins/path_provider_foundation/darwin" + permission_handler_apple: + :path: ".symlinks/plugins/permission_handler_apple/ios" shared_preferences_foundation: :path: ".symlinks/plugins/shared_preferences_foundation/darwin" + speech_to_text: + :path: ".symlinks/plugins/speech_to_text/darwin" url_launcher_ios: :path: ".symlinks/plugins/url_launcher_ios/ios" SPEC CHECKSUMS: app_links: 3dbc685f76b1693c66a6d9dd1e9ab6f73d97dc0a + CwlCatchException: 7acc161b299a6de7f0a46a6ed741eae2c8b4d75a + CwlCatchExceptionSupport: 54ccab8d8c78907b57f99717fb19d4cc3bce02dc Flutter: cabc95a1d2626b1b06e7179b784ebcf0c0cde467 + flutter_tts: 35ac3c7d42412733e795ea96ad2d7e05d0a75113 path_provider_foundation: bb55f6dbba17d0dccd6737fe6f7f34fbd0376880 + permission_handler_apple: 4ed2196e43d0651e8ff7ca3483a069d469701f2d shared_preferences_foundation: 7036424c3d8ec98dfe75ff1667cb0cd531ec82bb + speech_to_text: 3b313d98516d3d0406cea424782ec25470c59d19 url_launcher_ios: 7a95fa5b60cc718a708b8f2966718e93db0cef1b PODFILE CHECKSUM: 3c63482e143d1b91d2d2560aee9fb04ecc74ac7e diff --git a/wei_ai_app/ios/Runner.xcodeproj/project.pbxproj b/wei_ai_app/ios/Runner.xcodeproj/project.pbxproj index bed076e..7669818 100644 --- a/wei_ai_app/ios/Runner.xcodeproj/project.pbxproj +++ b/wei_ai_app/ios/Runner.xcodeproj/project.pbxproj @@ -199,6 +199,7 @@ 9705A1C41CF9048500538489 /* Embed Frameworks */, 3B06AD1E1E4923F5004D2608 /* Thin Binary */, E1FBF8521399AC335A04FC96 /* [CP] Embed Pods Frameworks */, + DFE1ACAD91639A0D9A85544A /* [CP] Copy Pods Resources */, ); buildRules = ( ); @@ -345,6 +346,23 @@ shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n # print error to STDERR\n echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n"; showEnvVarsInLog = 0; }; + DFE1ACAD91639A0D9A85544A /* [CP] Copy Pods Resources */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputFileListPaths = ( + "${PODS_ROOT}/Target Support Files/Pods-Runner/Pods-Runner-resources-${CONFIGURATION}-input-files.xcfilelist", + ); + name = "[CP] Copy Pods Resources"; + outputFileListPaths = ( + "${PODS_ROOT}/Target Support Files/Pods-Runner/Pods-Runner-resources-${CONFIGURATION}-output-files.xcfilelist", + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "\"${PODS_ROOT}/Target Support Files/Pods-Runner/Pods-Runner-resources.sh\"\n"; + showEnvVarsInLog = 0; + }; E1FBF8521399AC335A04FC96 /* [CP] Embed Pods Frameworks */ = { isa = PBXShellScriptBuildPhase; buildActionMask = 2147483647; diff --git a/wei_ai_app/ios/Runner/Info.plist b/wei_ai_app/ios/Runner/Info.plist index 78fe71f..7eb665f 100644 --- a/wei_ai_app/ios/Runner/Info.plist +++ b/wei_ai_app/ios/Runner/Info.plist @@ -41,6 +41,10 @@ UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight + NSMicrophoneUsageDescription + We need access to your microphone for voice chat with AI characters. + NSSpeechRecognitionUsageDescription + We need speech recognition to convert your voice to text. CADisableMinimumFrameDurationOnPhone UIApplicationSupportsIndirectInputEvents diff --git a/wei_ai_app/lib/core/services/services.dart b/wei_ai_app/lib/core/services/services.dart index 1f5c32e..b5ca974 100644 --- a/wei_ai_app/lib/core/services/services.dart +++ b/wei_ai_app/lib/core/services/services.dart @@ -4,3 +4,5 @@ library services; export 'supabase_service.dart'; export 'chat_service.dart'; export 'chat_storage_service.dart'; +export 'stt_service.dart'; +export 'tts_service.dart'; diff --git a/wei_ai_app/lib/core/services/stt_service.dart b/wei_ai_app/lib/core/services/stt_service.dart new file mode 100644 index 0000000..0c43901 --- /dev/null +++ b/wei_ai_app/lib/core/services/stt_service.dart @@ -0,0 +1,73 @@ +import 'package:speech_to_text/speech_to_text.dart'; +import 'package:flutter/foundation.dart'; + +class STTService { + static final STTService _instance = STTService._internal(); + factory STTService() => _instance; + STTService._internal(); + + final SpeechToText _speech = SpeechToText(); + bool _isInitialized = false; + bool _isListening = false; + + bool get isListening => _isListening; + + Future init() async { + if (_isInitialized) return true; + + try { + _isInitialized = await _speech.initialize( + onError: (error) => debugPrint('❌ STT Error: $error'), + onStatus: (status) { + debugPrint('🎤 STT Status: $status'); + if (status == 'listening') _isListening = true; + if (status == 'notListening') _isListening = false; + }, + ); + debugPrint('✅ STT Initialized: $_isInitialized'); + return _isInitialized; + } catch (e) { + debugPrint('❌ STT Init failed: $e'); + return false; + } + } + + Future listen({ + required Function(String text) onResult, + required Function(String text) onFinalResult, + String localeId = 'zh-CN', + }) async { + if (!_isInitialized) { + bool success = await init(); + if (!success) return; + } + + if (_isListening) await stop(); + + await _speech.listen( + onResult: (result) { + if (result.finalResult) { + onFinalResult(result.recognizedWords); + } else { + onResult(result.recognizedWords); + } + }, + localeId: localeId, + listenFor: const Duration(seconds: 30), + pauseFor: const Duration(seconds: 3), // Wait 3s of silence to consider "done" + partialResults: true, + cancelOnError: true, + listenMode: ListenMode.dictation, + ); + } + + Future stop() async { + await _speech.stop(); + _isListening = false; + } + + Future cancel() async { + await _speech.cancel(); + _isListening = false; + } +} diff --git a/wei_ai_app/lib/core/services/tts_service.dart b/wei_ai_app/lib/core/services/tts_service.dart new file mode 100644 index 0000000..ad71702 --- /dev/null +++ b/wei_ai_app/lib/core/services/tts_service.dart @@ -0,0 +1,65 @@ +import 'package:flutter_tts/flutter_tts.dart'; +import 'package:flutter/foundation.dart'; + +class TTSService { + static final TTSService _instance = TTSService._internal(); + factory TTSService() => _instance; + TTSService._internal(); + + final FlutterTts _flutterTts = FlutterTts(); + bool _isInitialized = false; + + Future init() async { + if (_isInitialized) return; + + try { + if (!kIsWeb) { + if (defaultTargetPlatform == TargetPlatform.iOS) { + await _flutterTts.setSharedInstance(true); + await _flutterTts.setIosAudioCategory( + IosTextToSpeechAudioCategory.playAndRecord, + [ + IosTextToSpeechAudioCategoryOptions.allowBluetooth, + IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP, + IosTextToSpeechAudioCategoryOptions.mixWithOthers, + IosTextToSpeechAudioCategoryOptions.defaultToSpeaker + ], + IosTextToSpeechAudioMode.defaultMode); + } + } + + await _flutterTts.setLanguage("zh-CN"); // Default to Chinese + await _flutterTts.setPitch(1.0); + await _flutterTts.setSpeechRate(0.5); // Normal rate + + _isInitialized = true; + debugPrint('✅ TTSService initialized'); + } catch (e) { + debugPrint('❌ TTSService init error: $e'); + } + } + + Future speak(String text) async { + if (!_isInitialized) await init(); + if (text.isEmpty) return; + + debugPrint('🗣️ TTS Speaking: $text'); + await _flutterTts.speak(text); + } + + Future stop() async { + await _flutterTts.stop(); + } + + void setCompletionHandler(VoidCallback handler) { + _flutterTts.setCompletionHandler(handler); + } + + void setStartHandler(VoidCallback handler) { + _flutterTts.setStartHandler(handler); + } + + void setErrorHandler(Function(dynamic) handler) { + _flutterTts.setErrorHandler(handler); + } +} diff --git a/wei_ai_app/lib/screens/interaction/interaction_screen.dart b/wei_ai_app/lib/screens/interaction/interaction_screen.dart index 2dcec93..4b6be27 100644 --- a/wei_ai_app/lib/screens/interaction/interaction_screen.dart +++ b/wei_ai_app/lib/screens/interaction/interaction_screen.dart @@ -5,6 +5,7 @@ import 'package:go_router/go_router.dart'; import 'package:flutter_riverpod/flutter_riverpod.dart'; import '../../core/core.dart'; import 'voice_mode_overlay.dart'; +import 'voice_session_controller.dart'; class InteractionScreen extends ConsumerStatefulWidget { final String characterId; @@ -20,6 +21,7 @@ class _InteractionScreenState extends ConsumerState { List _messages = []; final TextEditingController _controller = TextEditingController(); final ScrollController _scrollController = ScrollController(); + VoiceSessionController? _voiceController; bool _isVoiceMode = false; bool _isLoading = false; bool _isTyping = false; @@ -31,8 +33,10 @@ class _InteractionScreenState extends ConsumerState { _loadCharacterAndMessages(); } + @override @override void dispose() { + _voiceController?.dispose(); _controller.dispose(); _scrollController.dispose(); super.dispose(); @@ -81,6 +85,38 @@ class _InteractionScreenState extends ConsumerState { }); } + void _enterVoiceMode() { + FocusScope.of(context).unfocus(); + _voiceController = VoiceSessionController( + character: _character!, + onUserMessage: (text) { + if (!mounted) return; + final userMsg = ChatMessage.user(text); + setState(() { + _messages = [..._messages, userMsg]; + }); + ChatStorageService.addMessage(widget.characterId, userMsg); + _scrollToBottom(); + }, + onAiMessage: (msg) { + if (!mounted) return; + setState(() { + _messages = [..._messages, msg]; + }); + ChatStorageService.addMessage(widget.characterId, msg); + _scrollToBottom(); + }, + ); + + setState(() => _isVoiceMode = true); + } + + void _exitVoiceMode() { + _voiceController?.dispose(); + _voiceController = null; + setState(() => _isVoiceMode = false); + } + Future _sendMessage() async { if (_controller.text.trim().isEmpty || _character == null || _isLoading) return; @@ -273,10 +309,11 @@ class _InteractionScreenState extends ConsumerState { ), - if (_isVoiceMode && _character != null) + if (_isVoiceMode && _character != null && _voiceController != null) VoiceModeOverlay( character: _character!, - onClose: () => setState(() => _isVoiceMode = false), + controller: _voiceController!, + onClose: _exitVoiceMode, ), ], ), @@ -405,10 +442,7 @@ class _InteractionScreenState extends ConsumerState { child: Row( children: [ GestureDetector( - onTap: () { - FocusScope.of(context).unfocus(); - setState(() => _isVoiceMode = true); - }, + onTap: _enterVoiceMode, child: Container( width: 44, height: 44, diff --git a/wei_ai_app/lib/screens/interaction/voice_mode_overlay.dart b/wei_ai_app/lib/screens/interaction/voice_mode_overlay.dart index 4e1adf2..4773fa9 100644 --- a/wei_ai_app/lib/screens/interaction/voice_mode_overlay.dart +++ b/wei_ai_app/lib/screens/interaction/voice_mode_overlay.dart @@ -2,14 +2,17 @@ import 'dart:ui'; import 'package:flutter/material.dart'; import 'package:lucide_icons/lucide_icons.dart'; import '../../core/core.dart'; +import 'voice_session_controller.dart'; class VoiceModeOverlay extends StatefulWidget { final CharacterModel character; + final VoiceSessionController controller; final VoidCallback onClose; const VoiceModeOverlay({ super.key, required this.character, + required this.controller, required this.onClose, }); @@ -18,8 +21,6 @@ class VoiceModeOverlay extends StatefulWidget { } class _VoiceModeOverlayState extends State with SingleTickerProviderStateMixin { - bool _isMicMuted = false; - bool _isSpeakerOn = true; late AnimationController _controller; String get _avatarUrl => CharacterRepository.getAvatarUrl(widget.character.avatarPath); @@ -30,10 +31,18 @@ class _VoiceModeOverlayState extends State with SingleTickerPr _controller = AnimationController( vsync: this, duration: const Duration(seconds: 2)) ..repeat(reverse: true); + + // Listen to controller changes to update UI + widget.controller.addListener(_onStateChange); + } + + void _onStateChange() { + if (mounted) setState(() {}); } @override void dispose() { + widget.controller.removeListener(_onStateChange); _controller.dispose(); super.dispose(); } @@ -102,13 +111,14 @@ class _VoiceModeOverlayState extends State with SingleTickerPr ), const SizedBox(height: 8), Text( - _isMicMuted ? 'Mic Muted' : 'Listening...', + _getStatusText(), style: TextStyle( color: Colors.white.withOpacity(0.6), fontSize: 12, letterSpacing: 2, fontWeight: FontWeight.w500, ), + textAlign: TextAlign.center, ), ], ), @@ -122,13 +132,23 @@ class _VoiceModeOverlayState extends State with SingleTickerPr child: Stack( alignment: Alignment.center, children: [ - if (!_isMicMuted) + // Show animation only when listening or speaking + if (widget.controller.state == VoiceState.listening || widget.controller.state == VoiceState.speaking) AnimatedBuilder( animation: _controller, builder: (context, child) { + double scale = 1.0; + if (widget.controller.state == VoiceState.speaking) { + // Faster pulse when AI is speaking + scale = 0.8 + 0.3 * _controller.value; + } else { + // Slower pulse when listening + scale = 0.8 + 0.1 * _controller.value; + } + return Container( - width: 200 * (0.8 + 0.2 * _controller.value), - height: 200 * (0.8 + 0.2 * _controller.value), + width: 200 * scale, + height: 200 * scale, decoration: BoxDecoration( shape: BoxShape.circle, border: Border.all( @@ -178,11 +198,13 @@ class _VoiceModeOverlayState extends State with SingleTickerPr builder: (context, child) { return Container( width: 4, - height: _isMicMuted - ? 4 - : 10 + (20 * (index % 2 == 0 ? _controller.value : 1 - _controller.value)), + height: widget.controller.state == VoiceState.speaking + ? 10 + (30 * (index % 2 == 0 ? _controller.value : 1 - _controller.value)) // Active wave + : widget.controller.state == VoiceState.processing + ? 8 + (5 * (index % 2 == 0 ? _controller.value : 1 - _controller.value)) // Thinking wave + : 4, // Idle decoration: BoxDecoration( - color: Colors.white.withOpacity(_isMicMuted ? 0.2 : 0.8), + color: Colors.white.withOpacity(widget.controller.state != VoiceState.listening ? 0.8 : 0.4), borderRadius: BorderRadius.circular(2), ), ); @@ -203,14 +225,14 @@ class _VoiceModeOverlayState extends State with SingleTickerPr children: [ // Mic Toggle IconButton( - onPressed: () => setState(() => _isMicMuted = !_isMicMuted), - icon: Icon(_isMicMuted ? LucideIcons.micOff : LucideIcons.mic), + onPressed: widget.controller.toggleMic, + icon: Icon(widget.controller.isMicMuted ? LucideIcons.micOff : LucideIcons.mic), iconSize: 24, style: IconButton.styleFrom( - backgroundColor: _isMicMuted + backgroundColor: widget.controller.isMicMuted ? Colors.white : Colors.white.withOpacity(0.1), - foregroundColor: _isMicMuted + foregroundColor: widget.controller.isMicMuted ? const Color(0xFF2E1065) : Colors.white, padding: const EdgeInsets.all(16), @@ -231,20 +253,14 @@ class _VoiceModeOverlayState extends State with SingleTickerPr ), ), - // Speaker Toggle + // Speaker Toggle - Placeholder for now IconButton( - onPressed: () => setState(() => _isSpeakerOn = !_isSpeakerOn), - icon: Icon( - _isSpeakerOn ? LucideIcons.volume2 : LucideIcons.volumeX - ), + onPressed: () {}, // TODO: Implement speaker toggle in controller + icon: const Icon(LucideIcons.volume2), iconSize: 24, style: IconButton.styleFrom( - backgroundColor: _isSpeakerOn - ? Colors.white.withOpacity(0.1) - : Colors.white.withOpacity(0.05), - foregroundColor: _isSpeakerOn - ? Colors.white - : Colors.white.withOpacity(0.5), + backgroundColor: Colors.white.withOpacity(0.1), + foregroundColor: Colors.white, padding: const EdgeInsets.all(16), minimumSize: const Size(64, 64), ), @@ -261,4 +277,25 @@ class _VoiceModeOverlayState extends State with SingleTickerPr ), ); } + + String _getStatusText() { + if (widget.controller.isMicMuted) return 'Microphone Muted'; + + switch (widget.controller.state) { + case VoiceState.listening: + if (widget.controller.recognizedText.isNotEmpty) { + // Show last few words of what user said + String text = widget.controller.recognizedText; + if (text.length > 20) text = '...${text.substring(text.length - 20)}'; + return text; + } + return 'Listening...'; + case VoiceState.processing: + return 'Thinking...'; + case VoiceState.speaking: + return 'Speaking...'; + case VoiceState.error: + return 'Error'; + } + } } diff --git a/wei_ai_app/lib/screens/interaction/voice_session_controller.dart b/wei_ai_app/lib/screens/interaction/voice_session_controller.dart new file mode 100644 index 0000000..800773d --- /dev/null +++ b/wei_ai_app/lib/screens/interaction/voice_session_controller.dart @@ -0,0 +1,303 @@ +import 'dart:async'; +import 'package:flutter/foundation.dart'; +import '../../core/core.dart'; +import 'package:permission_handler/permission_handler.dart'; + +enum VoiceState { + listening, // Waiting for user input + processing, // Sending to AI + speaking, // AI is talking + error, +} + +class VoiceSessionController extends ChangeNotifier { + final CharacterModel character; + final Function(String) onUserMessage; + final Function(ChatMessage) onAiMessage; + + // Speaking Queue + final List _speakQueue = []; + bool _isSpeaking = false; + Timer? _silenceTimer; + + VoiceState _state = VoiceState.listening; + String _recognizedText = ''; + String _aiTypingText = ''; + bool _isMicMuted = false; + + // Services + final STTService _stt = STTService(); + final TTSService _tts = TTSService(); + + // State getters + VoiceState get state => _state; + String get recognizedText => _recognizedText; + String get aiTypingText => _aiTypingText; + bool get isMicMuted => _isMicMuted; + + // Buffer for sentence completion + String _sentenceBuffer = ''; + final List _punctuation = ['。', '?', '!', '.', '?', '!', '\n']; + + VoiceSessionController({ + required this.character, + required this.onUserMessage, + required this.onAiMessage, + }) { + _init(); + } + + Future _init() async { + // Request permissions + await [Permission.microphone, Permission.speech].request(); + + // Init services + await _stt.init(); + await _tts.init(); + + // Setup TTS callbacks + _tts.setStartHandler(() { + debugPrint('🔊 TTS Started'); + // Already paused STT in _processSpeakQueue + }); + + _tts.setCompletionHandler(() { + debugPrint('✅ TTS Completed'); + _isSpeaking = false; + _processSpeakQueue(); // Play next + }); + + _tts.setErrorHandler((msg) { + debugPrint('❌ TTS Error: $msg'); + _isSpeaking = false; + _processSpeakQueue(); + }); + + // Start listening immediately + if (!_isMicMuted) { + startListening(); + } + } + + void toggleMic() { + _isMicMuted = !_isMicMuted; + if (_isMicMuted) { + stopListening(); + } else { + if (_state == VoiceState.listening) { + startListening(); + } + } + notifyListeners(); + } + + Future startListening() async { + if (_isMicMuted) return; + + _state = VoiceState.listening; + _recognizedText = ''; + _lastProcessedLength = 0; + notifyListeners(); + + // Stop TTS if it's playing (Interruption) + if (_isSpeaking || _speakQueue.isNotEmpty) { + _speakQueue.clear(); + await _tts.stop(); + _isSpeaking = false; + } + + await _stt.listen( + onResult: (text) { + _recognizedText = text; + notifyListeners(); + }, + onFinalResult: (text) { + _recognizedText = text; + notifyListeners(); + _processUserMessage(text); + }, + localeId: 'zh-CN', // Make dynamic later if needed + ); + } + + Future stopListening() async { + await _stt.stop(); + } + + Future _processUserMessage(String text) async { + if (text.trim().isEmpty) { + // If empty, just listen again + startListening(); + return; + } + + _state = VoiceState.processing; + onUserMessage(text); // Notify UI to show user message + notifyListeners(); + + // Construct history for context (simplified for now, ideally pass full history) + // We will rely on ChatService to handle the full history if we pass the latest message + // But ChatService needs the list. For the voice mode, let's assume interacting adds to DB + // and we might need to fetch fresh context or pass it in. + // Ideally, the InteractionScreen manages the source of truth for messages. + // Here we'll just send the text to prompt the AI. + + // Construct history + final session = await ChatStorageService.getSession(character.id); + var messages = session.messages; + + // ChatService.sendMessage appends the userMessage automatically. + // We need to ensure 'messages' doesn't already contain it (if DB write was fast). + if (messages.isNotEmpty) { + final lastMsg = messages.last; + if (lastMsg.isUser && lastMsg.content == text) { + messages = List.from(messages)..removeLast(); + } + } + + _aiTypingText = ''; + _sentenceBuffer = ''; + _lastProcessedLength = 0; + + try { + final fullResponse = await ChatService.sendMessage( + character: character, + messages: messages, + userMessage: text, // ChatService handles appending this if we use the right method + onStream: (content) { + _aiTypingText = content; + _processStreamChunk(content); + notifyListeners(); + }, + ); + + // Process any remaining text in buffer + if (_sentenceBuffer.isNotEmpty) { + if (_state != VoiceState.speaking) { + _state = VoiceState.speaking; + notifyListeners(); + } + await _speak(_sentenceBuffer); + } + + // Interaction finished, save AI message + final aiMsg = ChatMessage.assistant(fullResponse); + onAiMessage(aiMsg); + + // Note: We do NOT immediately startListening here. + // We rely on the TTS Completion Handler to trigger startListening + // when the entire queue is drained. + + } catch (e) { + debugPrint('❌ Voice Process Error: $e'); + _state = VoiceState.error; + notifyListeners(); + // Retry listening after error + Future.delayed(const Duration(seconds: 2), startListening); + } + } + + // Better implementation needs to handle state to avoid infinite loops + int _lastProcessedLength = 0; + + void _processStreamChunk(String content) { + if (_state != VoiceState.speaking) { + _state = VoiceState.speaking; + notifyListeners(); + } + + // Calculate delta (new content only) + if (content.length <= _lastProcessedLength) return; + + String delta = content.substring(_lastProcessedLength); + _lastProcessedLength = content.length; + _sentenceBuffer += delta; + + // Check for punctuation to split sentences + bool foundPunctuation = false; + for (var p in _punctuation) { + if (_sentenceBuffer.contains(p)) { + foundPunctuation = true; + break; + } + } + + if (foundPunctuation) { + _processBufferForSentences(); + } + } + + void _processBufferForSentences() { + String tempBuffer = _sentenceBuffer; + String keepBuffer = ''; + + // Simple tokenizer: split by punctuation but keep the punctuation attached to the sentence + // This is a naive implementation. + // "Hello! How are you?" -> ["Hello!", "How are you?"] + + // We iterate through chars to find split points + int lastSplitIndex = 0; + + for (int i = 0; i < tempBuffer.length; i++) { + String char = tempBuffer[i]; + if (_punctuation.contains(char)) { + // Found end of a sentence + String sentence = tempBuffer.substring(lastSplitIndex, i + 1); + if (sentence.trim().isNotEmpty) { + _speak(sentence); + } + lastSplitIndex = i + 1; + } + } + + // Keep the remaining part that didn't end with punctuation + if (lastSplitIndex < tempBuffer.length) { + keepBuffer = tempBuffer.substring(lastSplitIndex); + } + + _sentenceBuffer = keepBuffer; + } + + Future _speak(String text) async { + // If we are listening (interrupted), ignore (or should check state) + if (_state == VoiceState.listening) return; + + _speakQueue.add(text); + _processSpeakQueue(); + } + + void _processSpeakQueue() async { + if (_isSpeaking) return; + + if (_speakQueue.isEmpty) { + // All done speaking (or no TTS was produced) + if (_state == VoiceState.speaking || _state == VoiceState.processing) { + debugPrint('🎤 Queue empty, resuming listening...'); + _state = VoiceState.listening; + notifyListeners(); + // Debounce STT restart to avoid rapid stop/start deadlocks + _silenceTimer?.cancel(); + _silenceTimer = Timer(const Duration(milliseconds: 250), () { + startListening(); + }); + } + return; + } + + // Pop first + String text = _speakQueue.removeAt(0); + _isSpeaking = true; + + // Ensure STT is paused while speaking + await stopListening(); + + await _tts.speak(text); + } + + @override + void dispose() { + _stt.stop(); + _tts.stop(); + super.dispose(); + } +} diff --git a/wei_ai_app/macos/Flutter/GeneratedPluginRegistrant.swift b/wei_ai_app/macos/Flutter/GeneratedPluginRegistrant.swift index 92b6497..496a75c 100644 --- a/wei_ai_app/macos/Flutter/GeneratedPluginRegistrant.swift +++ b/wei_ai_app/macos/Flutter/GeneratedPluginRegistrant.swift @@ -6,13 +6,17 @@ import FlutterMacOS import Foundation import app_links +import flutter_tts import path_provider_foundation import shared_preferences_foundation +import speech_to_text import url_launcher_macos func RegisterGeneratedPlugins(registry: FlutterPluginRegistry) { AppLinksMacosPlugin.register(with: registry.registrar(forPlugin: "AppLinksMacosPlugin")) + FlutterTtsPlugin.register(with: registry.registrar(forPlugin: "FlutterTtsPlugin")) PathProviderPlugin.register(with: registry.registrar(forPlugin: "PathProviderPlugin")) SharedPreferencesPlugin.register(with: registry.registrar(forPlugin: "SharedPreferencesPlugin")) + SpeechToTextPlugin.register(with: registry.registrar(forPlugin: "SpeechToTextPlugin")) UrlLauncherPlugin.register(with: registry.registrar(forPlugin: "UrlLauncherPlugin")) } diff --git a/wei_ai_app/pubspec.lock b/wei_ai_app/pubspec.lock index 3c05d02..faa455b 100644 --- a/wei_ai_app/pubspec.lock +++ b/wei_ai_app/pubspec.lock @@ -251,6 +251,14 @@ packages: description: flutter source: sdk version: "0.0.0" + flutter_tts: + dependency: "direct main" + description: + name: flutter_tts + sha256: ce5eb209b40e95f2f4a1397116c87ab2fcdff32257d04ed7a764e75894c03775 + url: "https://pub.dev" + source: hosted + version: "4.2.5" flutter_web_plugins: dependency: transitive description: flutter @@ -352,6 +360,14 @@ packages: url: "https://pub.dev" source: hosted version: "0.7.2" + json_annotation: + dependency: transitive + description: + name: json_annotation + sha256: "805fa86df56383000f640384b282ce0cb8431f1a7a2396de92fb66186d8c57df" + url: "https://pub.dev" + source: hosted + version: "4.10.0" jwt_decode: dependency: transitive description: @@ -512,6 +528,62 @@ packages: url: "https://pub.dev" source: hosted version: "2.3.0" + pedantic: + dependency: transitive + description: + name: pedantic + sha256: "67fc27ed9639506c856c840ccce7594d0bdcd91bc8d53d6e52359449a1d50602" + url: "https://pub.dev" + source: hosted + version: "1.11.1" + permission_handler: + dependency: "direct main" + description: + name: permission_handler + sha256: bc917da36261b00137bbc8896bf1482169cd76f866282368948f032c8c1caae1 + url: "https://pub.dev" + source: hosted + version: "12.0.1" + permission_handler_android: + dependency: transitive + description: + name: permission_handler_android + sha256: "1e3bc410ca1bf84662104b100eb126e066cb55791b7451307f9708d4007350e6" + url: "https://pub.dev" + source: hosted + version: "13.0.1" + permission_handler_apple: + dependency: transitive + description: + name: permission_handler_apple + sha256: f000131e755c54cf4d84a5d8bd6e4149e262cc31c5a8b1d698de1ac85fa41023 + url: "https://pub.dev" + source: hosted + version: "9.4.7" + permission_handler_html: + dependency: transitive + description: + name: permission_handler_html + sha256: "38f000e83355abb3392140f6bc3030660cfaef189e1f87824facb76300b4ff24" + url: "https://pub.dev" + source: hosted + version: "0.1.3+5" + permission_handler_platform_interface: + dependency: transitive + description: + name: permission_handler_platform_interface + sha256: eb99b295153abce5d683cac8c02e22faab63e50679b937fa1bf67d58bb282878 + url: "https://pub.dev" + source: hosted + version: "4.3.0" + permission_handler_windows: + dependency: transitive + description: + name: permission_handler_windows + sha256: "1a790728016f79a41216d88672dbc5df30e686e811ad4e698bfc51f76ad91f1e" + url: "https://pub.dev" + source: hosted + version: "0.2.1" platform: dependency: transitive description: @@ -709,6 +781,30 @@ packages: url: "https://pub.dev" source: hosted version: "1.10.1" + speech_to_text: + dependency: "direct main" + description: + name: speech_to_text + sha256: c07557664974afa061f221d0d4186935bea4220728ea9446702825e8b988db04 + url: "https://pub.dev" + source: hosted + version: "7.3.0" + speech_to_text_platform_interface: + dependency: transitive + description: + name: speech_to_text_platform_interface + sha256: a1935847704e41ee468aad83181ddd2423d0833abe55d769c59afca07adb5114 + url: "https://pub.dev" + source: hosted + version: "2.3.0" + speech_to_text_windows: + dependency: transitive + description: + name: speech_to_text_windows + sha256: "2c9846d18253c7bbe059a276297ef9f27e8a2745dead32192525beb208195072" + url: "https://pub.dev" + source: hosted + version: "1.0.0+beta.8" stack_trace: dependency: transitive description: diff --git a/wei_ai_app/pubspec.yaml b/wei_ai_app/pubspec.yaml index 8c0af01..a67b4cd 100644 --- a/wei_ai_app/pubspec.yaml +++ b/wei_ai_app/pubspec.yaml @@ -43,6 +43,9 @@ dependencies: supabase_flutter: ^2.12.0 http: ^1.6.0 shared_preferences: ^2.5.4 + speech_to_text: ^7.3.0 + flutter_tts: ^4.2.5 + permission_handler: ^12.0.1 dev_dependencies: flutter_test: