From 8f19377517110da4450efb6038c4d866931be6ea Mon Sep 17 00:00:00 2001
From: liqupan <iqupan@liqupan.cn>
Date: Tue, 3 Feb 2026 21:41:25 +0800
Subject: [PATCH] feat: mvp viode

---
 .../android/app/src/main/AndroidManifest.xml  |   1 +
 wei_ai_app/ios/Podfile.lock                   |  30 ++
 .../ios/Runner.xcodeproj/project.pbxproj      |  18 ++
 wei_ai_app/ios/Runner/Info.plist              |   4 +
 wei_ai_app/lib/core/services/services.dart    |   2 +
 wei_ai_app/lib/core/services/stt_service.dart |  73 +++++
 wei_ai_app/lib/core/services/tts_service.dart |  65 ++++
 .../interaction/interaction_screen.dart       |  46 ++-
 .../interaction/voice_mode_overlay.dart       |  87 +++--
 .../interaction/voice_session_controller.dart | 303 ++++++++++++++++++
 .../Flutter/GeneratedPluginRegistrant.swift   |   4 +
 wei_ai_app/pubspec.lock                       |  96 ++++++
 wei_ai_app/pubspec.yaml                       |   3 +
 13 files changed, 701 insertions(+), 31 deletions(-)
 create mode 100644 wei_ai_app/lib/core/services/stt_service.dart
 create mode 100644 wei_ai_app/lib/core/services/tts_service.dart
 create mode 100644 wei_ai_app/lib/screens/interaction/voice_session_controller.dart
diff --git a/wei_ai_app/android/app/src/main/AndroidManifest.xml b/wei_ai_app/android/app/src/main/AndroidManifest.xml
index 9c4967a..9d3c33e 100644
--- a/wei_ai_app/android/app/src/main/AndroidManifest.xml
+++ b/wei_ai_app/android/app/src/main/AndroidManifest.xml
@@ -1,6 +1,7 @@
 <manifest xmlns:android="http://schemas.android.com/apk/res/android">
     <!-- 网络权限 -->
     <uses-permission android:name="android.permission.INTERNET" />
+    <uses-permission android:name="android.permission.RECORD_AUDIO" />
     
     <application
         android:label="wei_ai_app"
diff --git a/wei_ai_app/ios/Podfile.lock b/wei_ai_app/ios/Podfile.lock
index 54c021e..053a93f 100644
--- a/wei_ai_app/ios/Podfile.lock
+++ b/wei_ai_app/ios/Podfile.lock
@@ -1,40 +1,70 @@
 PODS:
   - app_links (6.4.1):
     - Flutter
+  - CwlCatchException (2.2.1):
+    - CwlCatchExceptionSupport (~> 2.2.1)
+  - CwlCatchExceptionSupport (2.2.1)
   - Flutter (1.0.0)
+  - flutter_tts (0.0.1):
+    - Flutter
   - path_provider_foundation (0.0.1):
     - Flutter
     - FlutterMacOS
+  - permission_handler_apple (9.3.0):
+    - Flutter
   - shared_preferences_foundation (0.0.1):
     - Flutter
     - FlutterMacOS
+  - speech_to_text (7.2.0):
+    - CwlCatchException
+    - Flutter
+    - FlutterMacOS
   - url_launcher_ios (0.0.1):
     - Flutter
 
 DEPENDENCIES:
   - app_links (from `.symlinks/plugins/app_links/ios`)
   - Flutter (from `Flutter`)
+  - flutter_tts (from `.symlinks/plugins/flutter_tts/ios`)
   - path_provider_foundation (from `.symlinks/plugins/path_provider_foundation/darwin`)
+  - permission_handler_apple (from `.symlinks/plugins/permission_handler_apple/ios`)
   - shared_preferences_foundation (from `.symlinks/plugins/shared_preferences_foundation/darwin`)
+  - speech_to_text (from `.symlinks/plugins/speech_to_text/darwin`)
   - url_launcher_ios (from `.symlinks/plugins/url_launcher_ios/ios`)
 
+SPEC REPOS:
+  trunk:
+    - CwlCatchException
+    - CwlCatchExceptionSupport
+
 EXTERNAL SOURCES:
   app_links:
     :path: ".symlinks/plugins/app_links/ios"
   Flutter:
     :path: Flutter
+  flutter_tts:
+    :path: ".symlinks/plugins/flutter_tts/ios"
   path_provider_foundation:
     :path: ".symlinks/plugins/path_provider_foundation/darwin"
+  permission_handler_apple:
+    :path: ".symlinks/plugins/permission_handler_apple/ios"
   shared_preferences_foundation:
     :path: ".symlinks/plugins/shared_preferences_foundation/darwin"
+  speech_to_text:
+    :path: ".symlinks/plugins/speech_to_text/darwin"
   url_launcher_ios:
     :path: ".symlinks/plugins/url_launcher_ios/ios"
 
 SPEC CHECKSUMS:
   app_links: 3dbc685f76b1693c66a6d9dd1e9ab6f73d97dc0a
+  CwlCatchException: 7acc161b299a6de7f0a46a6ed741eae2c8b4d75a
+  CwlCatchExceptionSupport: 54ccab8d8c78907b57f99717fb19d4cc3bce02dc
   Flutter: cabc95a1d2626b1b06e7179b784ebcf0c0cde467
+  flutter_tts: 35ac3c7d42412733e795ea96ad2d7e05d0a75113
   path_provider_foundation: bb55f6dbba17d0dccd6737fe6f7f34fbd0376880
+  permission_handler_apple: 4ed2196e43d0651e8ff7ca3483a069d469701f2d
   shared_preferences_foundation: 7036424c3d8ec98dfe75ff1667cb0cd531ec82bb
+  speech_to_text: 3b313d98516d3d0406cea424782ec25470c59d19
   url_launcher_ios: 7a95fa5b60cc718a708b8f2966718e93db0cef1b
 
 PODFILE CHECKSUM: 3c63482e143d1b91d2d2560aee9fb04ecc74ac7e
diff --git a/wei_ai_app/ios/Runner.xcodeproj/project.pbxproj b/wei_ai_app/ios/Runner.xcodeproj/project.pbxproj
index bed076e..7669818 100644
--- a/wei_ai_app/ios/Runner.xcodeproj/project.pbxproj
+++ b/wei_ai_app/ios/Runner.xcodeproj/project.pbxproj
@@ -199,6 +199,7 @@
 				9705A1C41CF9048500538489 /* Embed Frameworks */,
 				3B06AD1E1E4923F5004D2608 /* Thin Binary */,
 				E1FBF8521399AC335A04FC96 /* [CP] Embed Pods Frameworks */,
+				DFE1ACAD91639A0D9A85544A /* [CP] Copy Pods Resources */,
 			);
 			buildRules = (
 			);
@@ -345,6 +346,23 @@
 			shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n    # print error to STDERR\n    echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n    exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n";
 			showEnvVarsInLog = 0;
 		};
+		DFE1ACAD91639A0D9A85544A /* [CP] Copy Pods Resources */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputFileListPaths = (
+				"${PODS_ROOT}/Target Support Files/Pods-Runner/Pods-Runner-resources-${CONFIGURATION}-input-files.xcfilelist",
+			);
+			name = "[CP] Copy Pods Resources";
+			outputFileListPaths = (
+				"${PODS_ROOT}/Target Support Files/Pods-Runner/Pods-Runner-resources-${CONFIGURATION}-output-files.xcfilelist",
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "\"${PODS_ROOT}/Target Support Files/Pods-Runner/Pods-Runner-resources.sh\"\n";
+			showEnvVarsInLog = 0;
+		};
 		E1FBF8521399AC335A04FC96 /* [CP] Embed Pods Frameworks */ = {
 			isa = PBXShellScriptBuildPhase;
 			buildActionMask = 2147483647;
diff --git a/wei_ai_app/ios/Runner/Info.plist b/wei_ai_app/ios/Runner/Info.plist
index 78fe71f..7eb665f 100644
--- a/wei_ai_app/ios/Runner/Info.plist
+++ b/wei_ai_app/ios/Runner/Info.plist
@@ -41,6 +41,10 @@
 		<string>UIInterfaceOrientationLandscapeLeft</string>
 		<string>UIInterfaceOrientationLandscapeRight</string>
 	</array>
+	<key>NSMicrophoneUsageDescription</key>
+	<string>We need access to your microphone for voice chat with AI characters.</string>
+	<key>NSSpeechRecognitionUsageDescription</key>
+	<string>We need speech recognition to convert your voice to text.</string>
 	<key>CADisableMinimumFrameDurationOnPhone</key>
 	<true/>
 	<key>UIApplicationSupportsIndirectInputEvents</key>
diff --git a/wei_ai_app/lib/core/services/services.dart b/wei_ai_app/lib/core/services/services.dart
index 1f5c32e..b5ca974 100644
--- a/wei_ai_app/lib/core/services/services.dart
+++ b/wei_ai_app/lib/core/services/services.dart
@@ -4,3 +4,5 @@ library services;
 export 'supabase_service.dart';
 export 'chat_service.dart';
 export 'chat_storage_service.dart';
+export 'stt_service.dart';
+export 'tts_service.dart';
diff --git a/wei_ai_app/lib/core/services/stt_service.dart b/wei_ai_app/lib/core/services/stt_service.dart
new file mode 100644
index 0000000..0c43901
--- /dev/null
+++ b/wei_ai_app/lib/core/services/stt_service.dart
@@ -0,0 +1,73 @@
+import 'package:speech_to_text/speech_to_text.dart';
+import 'package:flutter/foundation.dart';
+
+class STTService {
+  static final STTService _instance = STTService._internal();
+  factory STTService() => _instance;
+  STTService._internal();
+
+  final SpeechToText _speech = SpeechToText();
+  bool _isInitialized = false;
+  bool _isListening = false;
+
+  bool get isListening => _isListening;
+
+  Future<bool> init() async {
+    if (_isInitialized) return true;
+
+    try {
+      _isInitialized = await _speech.initialize(
+        onError: (error) => debugPrint('❌ STT Error: $error'),
+        onStatus: (status) {
+          debugPrint('🎤 STT Status: $status');
+          if (status == 'listening') _isListening = true;
+          if (status == 'notListening') _isListening = false;
+        },
+      );
+      debugPrint('✅ STT Initialized: $_isInitialized');
+      return _isInitialized;
+    } catch (e) {
+      debugPrint('❌ STT Init failed: $e');
+      return false;
+    }
+  }
+
+  Future<void> listen({
+    required Function(String text) onResult,
+    required Function(String text) onFinalResult,
+    String localeId = 'zh-CN',
+  }) async {
+    if (!_isInitialized) {
+      bool success = await init();
+      if (!success) return;
+    }
+
+    if (_isListening) await stop();
+
+    await _speech.listen(
+      onResult: (result) {
+        if (result.finalResult) {
+          onFinalResult(result.recognizedWords);
+        } else {
+          onResult(result.recognizedWords);
+        }
+      },
+      localeId: localeId,
+      listenFor: const Duration(seconds: 30),
+      pauseFor: const Duration(seconds: 3), // Wait 3s of silence to consider "done"
+      partialResults: true,
+      cancelOnError: true,
+      listenMode: ListenMode.dictation,
+    );
+  }
+
+  Future<void> stop() async {
+    await _speech.stop();
+    _isListening = false;
+  }
+
+  Future<void> cancel() async {
+    await _speech.cancel();
+    _isListening = false;
+  }
+}
diff --git a/wei_ai_app/lib/core/services/tts_service.dart b/wei_ai_app/lib/core/services/tts_service.dart
new file mode 100644
index 0000000..ad71702
--- /dev/null
+++ b/wei_ai_app/lib/core/services/tts_service.dart
@@ -0,0 +1,65 @@
+import 'package:flutter_tts/flutter_tts.dart';
+import 'package:flutter/foundation.dart';
+
+class TTSService {
+  static final TTSService _instance = TTSService._internal();
+  factory TTSService() => _instance;
+  TTSService._internal();
+
+  final FlutterTts _flutterTts = FlutterTts();
+  bool _isInitialized = false;
+
+  Future<void> init() async {
+    if (_isInitialized) return;
+
+    try {
+      if (!kIsWeb) {
+        if (defaultTargetPlatform == TargetPlatform.iOS) {
+          await _flutterTts.setSharedInstance(true);
+          await _flutterTts.setIosAudioCategory(
+              IosTextToSpeechAudioCategory.playAndRecord,
+              [
+                IosTextToSpeechAudioCategoryOptions.allowBluetooth,
+                IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
+                IosTextToSpeechAudioCategoryOptions.mixWithOthers,
+                IosTextToSpeechAudioCategoryOptions.defaultToSpeaker
+              ],
+              IosTextToSpeechAudioMode.defaultMode);
+        }
+      }
+
+      await _flutterTts.setLanguage("zh-CN"); // Default to Chinese
+      await _flutterTts.setPitch(1.0);
+      await _flutterTts.setSpeechRate(0.5); // Normal rate
+      
+      _isInitialized = true;
+      debugPrint('✅ TTSService initialized');
+    } catch (e) {
+      debugPrint('❌ TTSService init error: $e');
+    }
+  }
+
+  Future<void> speak(String text) async {
+    if (!_isInitialized) await init();
+    if (text.isEmpty) return;
+    
+    debugPrint('🗣️ TTS Speaking: $text');
+    await _flutterTts.speak(text);
+  }
+
+  Future<void> stop() async {
+    await _flutterTts.stop();
+  }
+
+  void setCompletionHandler(VoidCallback handler) {
+    _flutterTts.setCompletionHandler(handler);
+  }
+  
+  void setStartHandler(VoidCallback handler) {
+    _flutterTts.setStartHandler(handler);
+  }
+
+  void setErrorHandler(Function(dynamic) handler) {
+    _flutterTts.setErrorHandler(handler);
+  }
+}
diff --git a/wei_ai_app/lib/screens/interaction/interaction_screen.dart b/wei_ai_app/lib/screens/interaction/interaction_screen.dart
index 2dcec93..4b6be27 100644
--- a/wei_ai_app/lib/screens/interaction/interaction_screen.dart
+++ b/wei_ai_app/lib/screens/interaction/interaction_screen.dart
@@ -5,6 +5,7 @@ import 'package:go_router/go_router.dart';
 import 'package:flutter_riverpod/flutter_riverpod.dart';
 import '../../core/core.dart';
 import 'voice_mode_overlay.dart';
+import 'voice_session_controller.dart';
 
 class InteractionScreen extends ConsumerStatefulWidget {
   final String characterId;
@@ -20,6 +21,7 @@ class _InteractionScreenState extends ConsumerState<InteractionScreen> {
   List<ChatMessage> _messages = [];
   final TextEditingController _controller = TextEditingController();
   final ScrollController _scrollController = ScrollController();
+  VoiceSessionController? _voiceController;
   bool _isVoiceMode = false;
   bool _isLoading = false;
   bool _isTyping = false;
@@ -31,8 +33,10 @@ class _InteractionScreenState extends ConsumerState<InteractionScreen> {
     _loadCharacterAndMessages();
   }
 
+  @override
   @override
   void dispose() {
+    _voiceController?.dispose();
     _controller.dispose();
     _scrollController.dispose();
     super.dispose();
@@ -81,6 +85,38 @@ class _InteractionScreenState extends ConsumerState<InteractionScreen> {
     });
   }
 
+  void _enterVoiceMode() {
+    FocusScope.of(context).unfocus();
+    _voiceController = VoiceSessionController(
+      character: _character!,
+      onUserMessage: (text) {
+        if (!mounted) return;
+        final userMsg = ChatMessage.user(text);
+        setState(() {
+          _messages = [..._messages, userMsg];
+        });
+        ChatStorageService.addMessage(widget.characterId, userMsg);
+        _scrollToBottom();
+      },
+      onAiMessage: (msg) {
+        if (!mounted) return;
+        setState(() {
+          _messages = [..._messages, msg];
+        });
+        ChatStorageService.addMessage(widget.characterId, msg);
+        _scrollToBottom();
+      },
+    );
+    
+    setState(() => _isVoiceMode = true);
+  }
+
+  void _exitVoiceMode() {
+    _voiceController?.dispose();
+    _voiceController = null;
+    setState(() => _isVoiceMode = false);
+  }
+
   Future<void> _sendMessage() async {
     if (_controller.text.trim().isEmpty || _character == null || _isLoading) return;
 
@@ -273,10 +309,11 @@ class _InteractionScreenState extends ConsumerState<InteractionScreen> {
           ),
 
 
-          if (_isVoiceMode && _character != null)
+          if (_isVoiceMode && _character != null && _voiceController != null)
             VoiceModeOverlay(
               character: _character!,
-              onClose: () => setState(() => _isVoiceMode = false),
+              controller: _voiceController!,
+              onClose: _exitVoiceMode,
             ),
         ],
       ),
@@ -405,10 +442,7 @@ class _InteractionScreenState extends ConsumerState<InteractionScreen> {
           child: Row(
             children: [
               GestureDetector(
-                onTap: () {
-                  FocusScope.of(context).unfocus();
-                  setState(() => _isVoiceMode = true);
-                },
+                onTap: _enterVoiceMode,
                 child: Container(
                   width: 44,
                   height: 44,
diff --git a/wei_ai_app/lib/screens/interaction/voice_mode_overlay.dart b/wei_ai_app/lib/screens/interaction/voice_mode_overlay.dart
index 4e1adf2..4773fa9 100644
--- a/wei_ai_app/lib/screens/interaction/voice_mode_overlay.dart
+++ b/wei_ai_app/lib/screens/interaction/voice_mode_overlay.dart
@@ -2,14 +2,17 @@ import 'dart:ui';
 import 'package:flutter/material.dart';
 import 'package:lucide_icons/lucide_icons.dart';
 import '../../core/core.dart';
+import 'voice_session_controller.dart';
 
 class VoiceModeOverlay extends StatefulWidget {
   final CharacterModel character;
+  final VoiceSessionController controller;
   final VoidCallback onClose;
 
   const VoiceModeOverlay({
     super.key,
     required this.character,
+    required this.controller,
     required this.onClose,
   });
 
@@ -18,8 +21,6 @@ class VoiceModeOverlay extends StatefulWidget {
 }
 
 class _VoiceModeOverlayState extends State<VoiceModeOverlay> with SingleTickerProviderStateMixin {
-  bool _isMicMuted = false;
-  bool _isSpeakerOn = true;
   late AnimationController _controller;
 
   String get _avatarUrl => CharacterRepository.getAvatarUrl(widget.character.avatarPath);
@@ -30,10 +31,18 @@ class _VoiceModeOverlayState extends State<VoiceModeOverlay> with SingleTickerPr
     _controller = AnimationController(
         vsync: this, duration: const Duration(seconds: 2))
       ..repeat(reverse: true);
+    
+    // Listen to controller changes to update UI
+    widget.controller.addListener(_onStateChange);
+  }
+
+  void _onStateChange() {
+    if (mounted) setState(() {});
   }
 
   @override
   void dispose() {
+    widget.controller.removeListener(_onStateChange);
     _controller.dispose();
     super.dispose();
   }
@@ -102,13 +111,14 @@ class _VoiceModeOverlayState extends State<VoiceModeOverlay> with SingleTickerPr
                       ),
                       const SizedBox(height: 8),
                       Text(
-                        _isMicMuted ? 'Mic Muted' : 'Listening...',
+                        _getStatusText(),
                         style: TextStyle(
                           color: Colors.white.withOpacity(0.6),
                           fontSize: 12,
                           letterSpacing: 2,
                           fontWeight: FontWeight.w500,
                         ),
+                        textAlign: TextAlign.center,
                       ),
                     ],
                   ),
@@ -122,13 +132,23 @@ class _VoiceModeOverlayState extends State<VoiceModeOverlay> with SingleTickerPr
                     child: Stack(
                       alignment: Alignment.center,
                       children: [
-                        if (!_isMicMuted)
+                        // Show animation only when listening or speaking
+                        if (widget.controller.state == VoiceState.listening || widget.controller.state == VoiceState.speaking)
                           AnimatedBuilder(
                             animation: _controller,
                             builder: (context, child) {
+                              double scale = 1.0;
+                              if (widget.controller.state == VoiceState.speaking) {
+                                // Faster pulse when AI is speaking
+                                scale = 0.8 + 0.3 * _controller.value; 
+                              } else {
+                                // Slower pulse when listening
+                                scale = 0.8 + 0.1 * _controller.value;
+                              }
+                              
                               return Container(
-                                width: 200 * (0.8 + 0.2 * _controller.value),
-                                height: 200 * (0.8 + 0.2 * _controller.value),
+                                width: 200 * scale,
+                                height: 200 * scale,
                                 decoration: BoxDecoration(
                                   shape: BoxShape.circle,
                                   border: Border.all(
@@ -178,11 +198,13 @@ class _VoiceModeOverlayState extends State<VoiceModeOverlay> with SingleTickerPr
                              builder: (context, child) {
                                 return Container(
                                   width: 4,
-                                  height: _isMicMuted 
-                                    ? 4 
-                                    : 10 + (20 * (index % 2 == 0 ? _controller.value : 1 - _controller.value)),
+                                  height: widget.controller.state == VoiceState.speaking
+                                    ? 10 + (30 * (index % 2 == 0 ? _controller.value : 1 - _controller.value)) // Active wave
+                                    : widget.controller.state == VoiceState.processing 
+                                      ? 8 + (5 * (index % 2 == 0 ? _controller.value : 1 - _controller.value)) // Thinking wave
+                                      : 4, // Idle
                                   decoration: BoxDecoration(
-                                    color: Colors.white.withOpacity(_isMicMuted ? 0.2 : 0.8),
+                                    color: Colors.white.withOpacity(widget.controller.state != VoiceState.listening ? 0.8 : 0.4),
                                     borderRadius: BorderRadius.circular(2),
                                   ),
                                 );
@@ -203,14 +225,14 @@ class _VoiceModeOverlayState extends State<VoiceModeOverlay> with SingleTickerPr
                       children: [
                         // Mic Toggle
                         IconButton(
-                          onPressed: () => setState(() => _isMicMuted = !_isMicMuted),
-                          icon: Icon(_isMicMuted ? LucideIcons.micOff : LucideIcons.mic),
+                          onPressed: widget.controller.toggleMic,
+                          icon: Icon(widget.controller.isMicMuted ? LucideIcons.micOff : LucideIcons.mic),
                           iconSize: 24,
                           style: IconButton.styleFrom(
-                            backgroundColor: _isMicMuted 
+                            backgroundColor: widget.controller.isMicMuted 
                               ? Colors.white 
                               : Colors.white.withOpacity(0.1),
-                            foregroundColor: _isMicMuted 
+                            foregroundColor: widget.controller.isMicMuted 
                               ? const Color(0xFF2E1065) 
                               : Colors.white,
                             padding: const EdgeInsets.all(16),
@@ -231,20 +253,14 @@ class _VoiceModeOverlayState extends State<VoiceModeOverlay> with SingleTickerPr
                           ),
                         ),
 
-                        // Speaker Toggle
+                        // Speaker Toggle - Placeholder for now
                         IconButton(
-                          onPressed: () => setState(() => _isSpeakerOn = !_isSpeakerOn),
-                          icon: Icon(
-                            _isSpeakerOn ? LucideIcons.volume2 : LucideIcons.volumeX
-                          ),
+                          onPressed: () {}, // TODO: Implement speaker toggle in controller
+                          icon: const Icon(LucideIcons.volume2),
                           iconSize: 24,
                           style: IconButton.styleFrom(
-                            backgroundColor: _isSpeakerOn 
-                               ? Colors.white.withOpacity(0.1)
-                               : Colors.white.withOpacity(0.05),
-                             foregroundColor: _isSpeakerOn
-                               ? Colors.white
-                               : Colors.white.withOpacity(0.5),
+                            backgroundColor: Colors.white.withOpacity(0.1),
+                             foregroundColor: Colors.white,
                             padding: const EdgeInsets.all(16),
                             minimumSize: const Size(64, 64),
                           ),
@@ -261,4 +277,25 @@ class _VoiceModeOverlayState extends State<VoiceModeOverlay> with SingleTickerPr
       ),
     );
   }
+
+  String _getStatusText() {
+    if (widget.controller.isMicMuted) return 'Microphone Muted';
+    
+    switch (widget.controller.state) {
+      case VoiceState.listening:
+        if (widget.controller.recognizedText.isNotEmpty) {
+           // Show last few words of what user said
+           String text = widget.controller.recognizedText;
+           if (text.length > 20) text = '...${text.substring(text.length - 20)}';
+           return text;
+        }
+        return 'Listening...';
+      case VoiceState.processing:
+        return 'Thinking...';
+      case VoiceState.speaking:
+        return 'Speaking...';
+      case VoiceState.error:
+        return 'Error';
+    }
+  }
 }
diff --git a/wei_ai_app/lib/screens/interaction/voice_session_controller.dart b/wei_ai_app/lib/screens/interaction/voice_session_controller.dart
new file mode 100644
index 0000000..800773d
--- /dev/null
+++ b/wei_ai_app/lib/screens/interaction/voice_session_controller.dart
@@ -0,0 +1,303 @@
+import 'dart:async';
+import 'package:flutter/foundation.dart';
+import '../../core/core.dart';
+import 'package:permission_handler/permission_handler.dart';
+
+enum VoiceState {
+  listening, // Waiting for user input
+  processing, // Sending to AI
+  speaking, // AI is talking
+  error,
+}
+
+class VoiceSessionController extends ChangeNotifier {
+  final CharacterModel character;
+  final Function(String) onUserMessage;
+  final Function(ChatMessage) onAiMessage;
+  
+  // Speaking Queue
+  final List<String> _speakQueue = [];
+  bool _isSpeaking = false;
+  Timer? _silenceTimer;
+  
+  VoiceState _state = VoiceState.listening;
+  String _recognizedText = '';
+  String _aiTypingText = '';
+  bool _isMicMuted = false;
+  
+  // Services
+  final STTService _stt = STTService();
+  final TTSService _tts = TTSService();
+  
+  // State getters
+  VoiceState get state => _state;
+  String get recognizedText => _recognizedText;
+  String get aiTypingText => _aiTypingText;
+  bool get isMicMuted => _isMicMuted;
+
+  // Buffer for sentence completion
+  String _sentenceBuffer = '';
+  final List<String> _punctuation = ['。', '？', '！', '.', '?', '!', '\n'];
+
+  VoiceSessionController({
+    required this.character,
+    required this.onUserMessage,
+    required this.onAiMessage,
+  }) {
+    _init();
+  }
+
+  Future<void> _init() async {
+    // Request permissions
+    await [Permission.microphone, Permission.speech].request();
+    
+    // Init services
+    await _stt.init();
+    await _tts.init();
+
+    // Setup TTS callbacks
+    _tts.setStartHandler(() {
+      debugPrint('🔊 TTS Started');
+      // Already paused STT in _processSpeakQueue
+    });
+
+    _tts.setCompletionHandler(() {
+      debugPrint('✅ TTS Completed');
+      _isSpeaking = false;
+      _processSpeakQueue(); // Play next
+    });
+    
+    _tts.setErrorHandler((msg) {
+       debugPrint('❌ TTS Error: $msg');
+       _isSpeaking = false;
+       _processSpeakQueue();
+    });
+
+    // Start listening immediately
+    if (!_isMicMuted) {
+      startListening();
+    }
+  }
+
+  void toggleMic() {
+    _isMicMuted = !_isMicMuted;
+    if (_isMicMuted) {
+      stopListening();
+    } else {
+      if (_state == VoiceState.listening) {
+        startListening();
+      }
+    }
+    notifyListeners();
+  }
+
+  Future<void> startListening() async {
+    if (_isMicMuted) return;
+    
+    _state = VoiceState.listening;
+    _recognizedText = '';
+    _lastProcessedLength = 0;
+    notifyListeners();
+
+    // Stop TTS if it's playing (Interruption)
+    if (_isSpeaking || _speakQueue.isNotEmpty) {
+        _speakQueue.clear();
+        await _tts.stop();
+        _isSpeaking = false;
+    }
+
+    await _stt.listen(
+      onResult: (text) {
+        _recognizedText = text;
+        notifyListeners();
+      },
+      onFinalResult: (text) {
+        _recognizedText = text;
+        notifyListeners();
+        _processUserMessage(text);
+      },
+      localeId: 'zh-CN', // Make dynamic later if needed
+    );
+  }
+
+  Future<void> stopListening() async {
+    await _stt.stop();
+  }
+
+  Future<void> _processUserMessage(String text) async {
+    if (text.trim().isEmpty) {
+      // If empty, just listen again
+      startListening();
+      return;
+    }
+
+    _state = VoiceState.processing;
+    onUserMessage(text); // Notify UI to show user message
+    notifyListeners();
+
+    // Construct history for context (simplified for now, ideally pass full history)
+    // We will rely on ChatService to handle the full history if we pass the latest message
+    // But ChatService needs the list. For the voice mode, let's assume interacting adds to DB
+    // and we might need to fetch fresh context or pass it in.
+    // Ideally, the InteractionScreen manages the source of truth for messages.
+    // Here we'll just send the text to prompt the AI.
+    
+    // Construct history
+    final session = await ChatStorageService.getSession(character.id);
+    var messages = session.messages;
+
+    // ChatService.sendMessage appends the userMessage automatically.
+    // We need to ensure 'messages' doesn't already contain it (if DB write was fast).
+    if (messages.isNotEmpty) {
+      final lastMsg = messages.last;
+      if (lastMsg.isUser && lastMsg.content == text) {
+        messages = List.from(messages)..removeLast();
+      }
+    }
+    
+    _aiTypingText = '';
+    _sentenceBuffer = '';
+    _lastProcessedLength = 0;
+
+    try {
+      final fullResponse = await ChatService.sendMessage(
+        character: character,
+        messages: messages,
+        userMessage: text, // ChatService handles appending this if we use the right method
+        onStream: (content) {
+          _aiTypingText = content;
+          _processStreamChunk(content);
+          notifyListeners();
+        },
+      );
+
+      // Process any remaining text in buffer
+      if (_sentenceBuffer.isNotEmpty) {
+        if (_state != VoiceState.speaking) {
+          _state = VoiceState.speaking;
+          notifyListeners();
+        }
+        await _speak(_sentenceBuffer);
+      }
+
+      // Interaction finished, save AI message
+      final aiMsg = ChatMessage.assistant(fullResponse);
+      onAiMessage(aiMsg);
+      
+      // Note: We do NOT immediately startListening here.
+      // We rely on the TTS Completion Handler to trigger startListening
+      // when the entire queue is drained.
+      
+    } catch (e) {
+      debugPrint('❌ Voice Process Error: $e');
+      _state = VoiceState.error;
+      notifyListeners();
+      // Retry listening after error
+      Future.delayed(const Duration(seconds: 2), startListening);
+    }
+  }
+
+  // Better implementation needs to handle state to avoid infinite loops
+  int _lastProcessedLength = 0;
+
+  void _processStreamChunk(String content) {
+    if (_state != VoiceState.speaking) {
+      _state = VoiceState.speaking;
+      notifyListeners();
+    }
+    
+    // Calculate delta (new content only)
+    if (content.length <= _lastProcessedLength) return;
+    
+    String delta = content.substring(_lastProcessedLength);
+    _lastProcessedLength = content.length;
+    _sentenceBuffer += delta;
+
+    // Check for punctuation to split sentences
+    bool foundPunctuation = false;
+    for (var p in _punctuation) {
+      if (_sentenceBuffer.contains(p)) {
+        foundPunctuation = true;
+        break;
+      }
+    }
+
+    if (foundPunctuation) {
+      _processBufferForSentences();
+    }
+  }
+  
+  void _processBufferForSentences() {
+    String tempBuffer = _sentenceBuffer;
+    String keepBuffer = '';
+
+    // Simple tokenizer: split by punctuation but keep the punctuation attached to the sentence
+    // This is a naive implementation. 
+    // "Hello! How are you?" -> ["Hello!", "How are you?"]
+    
+    // We iterate through chars to find split points
+    int lastSplitIndex = 0;
+    
+    for (int i = 0; i < tempBuffer.length; i++) {
+        String char = tempBuffer[i];
+        if (_punctuation.contains(char)) {
+            // Found end of a sentence
+            String sentence = tempBuffer.substring(lastSplitIndex, i + 1);
+            if (sentence.trim().isNotEmpty) {
+                _speak(sentence);
+            }
+            lastSplitIndex = i + 1;
+        }
+    }
+    
+    // Keep the remaining part that didn't end with punctuation
+    if (lastSplitIndex < tempBuffer.length) {
+        keepBuffer = tempBuffer.substring(lastSplitIndex);
+    }
+    
+    _sentenceBuffer = keepBuffer;
+  }
+  
+  Future<void> _speak(String text) async {
+     // If we are listening (interrupted), ignore (or should check state)
+     if (_state == VoiceState.listening) return;
+     
+     _speakQueue.add(text);
+     _processSpeakQueue();
+  }
+  
+  void _processSpeakQueue() async {
+      if (_isSpeaking) return;
+      
+      if (_speakQueue.isEmpty) {
+          // All done speaking (or no TTS was produced)
+          if (_state == VoiceState.speaking || _state == VoiceState.processing) {
+             debugPrint('🎤 Queue empty, resuming listening...');
+             _state = VoiceState.listening;
+             notifyListeners();
+             // Debounce STT restart to avoid rapid stop/start deadlocks
+             _silenceTimer?.cancel();
+             _silenceTimer = Timer(const Duration(milliseconds: 250), () {
+               startListening();
+             });
+          }
+          return;
+      }
+      
+      // Pop first
+      String text = _speakQueue.removeAt(0);
+      _isSpeaking = true;
+      
+      // Ensure STT is paused while speaking
+      await stopListening();
+      
+      await _tts.speak(text);
+  }
+
+  @override
+  void dispose() {
+    _stt.stop();
+    _tts.stop();
+    super.dispose();
+  }
+}
diff --git a/wei_ai_app/macos/Flutter/GeneratedPluginRegistrant.swift b/wei_ai_app/macos/Flutter/GeneratedPluginRegistrant.swift
index 92b6497..496a75c 100644
--- a/wei_ai_app/macos/Flutter/GeneratedPluginRegistrant.swift
+++ b/wei_ai_app/macos/Flutter/GeneratedPluginRegistrant.swift
@@ -6,13 +6,17 @@ import FlutterMacOS
 import Foundation
 
 import app_links
+import flutter_tts
 import path_provider_foundation
 import shared_preferences_foundation
+import speech_to_text
 import url_launcher_macos
 
 func RegisterGeneratedPlugins(registry: FlutterPluginRegistry) {
   AppLinksMacosPlugin.register(with: registry.registrar(forPlugin: "AppLinksMacosPlugin"))
+  FlutterTtsPlugin.register(with: registry.registrar(forPlugin: "FlutterTtsPlugin"))
   PathProviderPlugin.register(with: registry.registrar(forPlugin: "PathProviderPlugin"))
   SharedPreferencesPlugin.register(with: registry.registrar(forPlugin: "SharedPreferencesPlugin"))
+  SpeechToTextPlugin.register(with: registry.registrar(forPlugin: "SpeechToTextPlugin"))
   UrlLauncherPlugin.register(with: registry.registrar(forPlugin: "UrlLauncherPlugin"))
 }
diff --git a/wei_ai_app/pubspec.lock b/wei_ai_app/pubspec.lock
index 3c05d02..faa455b 100644
--- a/wei_ai_app/pubspec.lock
+++ b/wei_ai_app/pubspec.lock
@@ -251,6 +251,14 @@ packages:
     description: flutter
     source: sdk
     version: "0.0.0"
+  flutter_tts:
+    dependency: "direct main"
+    description:
+      name: flutter_tts
+      sha256: ce5eb209b40e95f2f4a1397116c87ab2fcdff32257d04ed7a764e75894c03775
+      url: "https://pub.dev"
+    source: hosted
+    version: "4.2.5"
   flutter_web_plugins:
     dependency: transitive
     description: flutter
@@ -352,6 +360,14 @@ packages:
       url: "https://pub.dev"
     source: hosted
     version: "0.7.2"
+  json_annotation:
+    dependency: transitive
+    description:
+      name: json_annotation
+      sha256: "805fa86df56383000f640384b282ce0cb8431f1a7a2396de92fb66186d8c57df"
+      url: "https://pub.dev"
+    source: hosted
+    version: "4.10.0"
   jwt_decode:
     dependency: transitive
     description:
@@ -512,6 +528,62 @@ packages:
       url: "https://pub.dev"
     source: hosted
     version: "2.3.0"
+  pedantic:
+    dependency: transitive
+    description:
+      name: pedantic
+      sha256: "67fc27ed9639506c856c840ccce7594d0bdcd91bc8d53d6e52359449a1d50602"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.11.1"
+  permission_handler:
+    dependency: "direct main"
+    description:
+      name: permission_handler
+      sha256: bc917da36261b00137bbc8896bf1482169cd76f866282368948f032c8c1caae1
+      url: "https://pub.dev"
+    source: hosted
+    version: "12.0.1"
+  permission_handler_android:
+    dependency: transitive
+    description:
+      name: permission_handler_android
+      sha256: "1e3bc410ca1bf84662104b100eb126e066cb55791b7451307f9708d4007350e6"
+      url: "https://pub.dev"
+    source: hosted
+    version: "13.0.1"
+  permission_handler_apple:
+    dependency: transitive
+    description:
+      name: permission_handler_apple
+      sha256: f000131e755c54cf4d84a5d8bd6e4149e262cc31c5a8b1d698de1ac85fa41023
+      url: "https://pub.dev"
+    source: hosted
+    version: "9.4.7"
+  permission_handler_html:
+    dependency: transitive
+    description:
+      name: permission_handler_html
+      sha256: "38f000e83355abb3392140f6bc3030660cfaef189e1f87824facb76300b4ff24"
+      url: "https://pub.dev"
+    source: hosted
+    version: "0.1.3+5"
+  permission_handler_platform_interface:
+    dependency: transitive
+    description:
+      name: permission_handler_platform_interface
+      sha256: eb99b295153abce5d683cac8c02e22faab63e50679b937fa1bf67d58bb282878
+      url: "https://pub.dev"
+    source: hosted
+    version: "4.3.0"
+  permission_handler_windows:
+    dependency: transitive
+    description:
+      name: permission_handler_windows
+      sha256: "1a790728016f79a41216d88672dbc5df30e686e811ad4e698bfc51f76ad91f1e"
+      url: "https://pub.dev"
+    source: hosted
+    version: "0.2.1"
   platform:
     dependency: transitive
     description:
@@ -709,6 +781,30 @@ packages:
       url: "https://pub.dev"
     source: hosted
     version: "1.10.1"
+  speech_to_text:
+    dependency: "direct main"
+    description:
+      name: speech_to_text
+      sha256: c07557664974afa061f221d0d4186935bea4220728ea9446702825e8b988db04
+      url: "https://pub.dev"
+    source: hosted
+    version: "7.3.0"
+  speech_to_text_platform_interface:
+    dependency: transitive
+    description:
+      name: speech_to_text_platform_interface
+      sha256: a1935847704e41ee468aad83181ddd2423d0833abe55d769c59afca07adb5114
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.3.0"
+  speech_to_text_windows:
+    dependency: transitive
+    description:
+      name: speech_to_text_windows
+      sha256: "2c9846d18253c7bbe059a276297ef9f27e8a2745dead32192525beb208195072"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.0.0+beta.8"
   stack_trace:
     dependency: transitive
     description:
diff --git a/wei_ai_app/pubspec.yaml b/wei_ai_app/pubspec.yaml
index 8c0af01..a67b4cd 100644
--- a/wei_ai_app/pubspec.yaml
+++ b/wei_ai_app/pubspec.yaml
@@ -43,6 +43,9 @@ dependencies:
   supabase_flutter: ^2.12.0
   http: ^1.6.0
   shared_preferences: ^2.5.4
+  speech_to_text: ^7.3.0
+  flutter_tts: ^4.2.5
+  permission_handler: ^12.0.1
 
 dev_dependencies:
   flutter_test: