feat: mvp viode

This commit is contained in:
liqupan
2026-02-03 21:41:25 +08:00
parent dec5748cca
commit 8f19377517
13 changed files with 701 additions and 31 deletions

View File

@@ -5,6 +5,7 @@ import 'package:go_router/go_router.dart';
import 'package:flutter_riverpod/flutter_riverpod.dart';
import '../../core/core.dart';
import 'voice_mode_overlay.dart';
import 'voice_session_controller.dart';
class InteractionScreen extends ConsumerStatefulWidget {
final String characterId;
@@ -20,6 +21,7 @@ class _InteractionScreenState extends ConsumerState<InteractionScreen> {
List<ChatMessage> _messages = [];
final TextEditingController _controller = TextEditingController();
final ScrollController _scrollController = ScrollController();
VoiceSessionController? _voiceController;
bool _isVoiceMode = false;
bool _isLoading = false;
bool _isTyping = false;
@@ -31,8 +33,10 @@ class _InteractionScreenState extends ConsumerState<InteractionScreen> {
_loadCharacterAndMessages();
}
@override
@override
void dispose() {
_voiceController?.dispose();
_controller.dispose();
_scrollController.dispose();
super.dispose();
@@ -81,6 +85,38 @@ class _InteractionScreenState extends ConsumerState<InteractionScreen> {
});
}
void _enterVoiceMode() {
FocusScope.of(context).unfocus();
_voiceController = VoiceSessionController(
character: _character!,
onUserMessage: (text) {
if (!mounted) return;
final userMsg = ChatMessage.user(text);
setState(() {
_messages = [..._messages, userMsg];
});
ChatStorageService.addMessage(widget.characterId, userMsg);
_scrollToBottom();
},
onAiMessage: (msg) {
if (!mounted) return;
setState(() {
_messages = [..._messages, msg];
});
ChatStorageService.addMessage(widget.characterId, msg);
_scrollToBottom();
},
);
setState(() => _isVoiceMode = true);
}
void _exitVoiceMode() {
_voiceController?.dispose();
_voiceController = null;
setState(() => _isVoiceMode = false);
}
Future<void> _sendMessage() async {
if (_controller.text.trim().isEmpty || _character == null || _isLoading) return;
@@ -273,10 +309,11 @@ class _InteractionScreenState extends ConsumerState<InteractionScreen> {
),
if (_isVoiceMode && _character != null)
if (_isVoiceMode && _character != null && _voiceController != null)
VoiceModeOverlay(
character: _character!,
onClose: () => setState(() => _isVoiceMode = false),
controller: _voiceController!,
onClose: _exitVoiceMode,
),
],
),
@@ -405,10 +442,7 @@ class _InteractionScreenState extends ConsumerState<InteractionScreen> {
child: Row(
children: [
GestureDetector(
onTap: () {
FocusScope.of(context).unfocus();
setState(() => _isVoiceMode = true);
},
onTap: _enterVoiceMode,
child: Container(
width: 44,
height: 44,

View File

@@ -2,14 +2,17 @@ import 'dart:ui';
import 'package:flutter/material.dart';
import 'package:lucide_icons/lucide_icons.dart';
import '../../core/core.dart';
import 'voice_session_controller.dart';
class VoiceModeOverlay extends StatefulWidget {
final CharacterModel character;
final VoiceSessionController controller;
final VoidCallback onClose;
const VoiceModeOverlay({
super.key,
required this.character,
required this.controller,
required this.onClose,
});
@@ -18,8 +21,6 @@ class VoiceModeOverlay extends StatefulWidget {
}
class _VoiceModeOverlayState extends State<VoiceModeOverlay> with SingleTickerProviderStateMixin {
bool _isMicMuted = false;
bool _isSpeakerOn = true;
late AnimationController _controller;
String get _avatarUrl => CharacterRepository.getAvatarUrl(widget.character.avatarPath);
@@ -30,10 +31,18 @@ class _VoiceModeOverlayState extends State<VoiceModeOverlay> with SingleTickerPr
_controller = AnimationController(
vsync: this, duration: const Duration(seconds: 2))
..repeat(reverse: true);
// Listen to controller changes to update UI
widget.controller.addListener(_onStateChange);
}
void _onStateChange() {
if (mounted) setState(() {});
}
@override
void dispose() {
widget.controller.removeListener(_onStateChange);
_controller.dispose();
super.dispose();
}
@@ -102,13 +111,14 @@ class _VoiceModeOverlayState extends State<VoiceModeOverlay> with SingleTickerPr
),
const SizedBox(height: 8),
Text(
_isMicMuted ? 'Mic Muted' : 'Listening...',
_getStatusText(),
style: TextStyle(
color: Colors.white.withOpacity(0.6),
fontSize: 12,
letterSpacing: 2,
fontWeight: FontWeight.w500,
),
textAlign: TextAlign.center,
),
],
),
@@ -122,13 +132,23 @@ class _VoiceModeOverlayState extends State<VoiceModeOverlay> with SingleTickerPr
child: Stack(
alignment: Alignment.center,
children: [
if (!_isMicMuted)
// Show animation only when listening or speaking
if (widget.controller.state == VoiceState.listening || widget.controller.state == VoiceState.speaking)
AnimatedBuilder(
animation: _controller,
builder: (context, child) {
double scale = 1.0;
if (widget.controller.state == VoiceState.speaking) {
// Faster pulse when AI is speaking
scale = 0.8 + 0.3 * _controller.value;
} else {
// Slower pulse when listening
scale = 0.8 + 0.1 * _controller.value;
}
return Container(
width: 200 * (0.8 + 0.2 * _controller.value),
height: 200 * (0.8 + 0.2 * _controller.value),
width: 200 * scale,
height: 200 * scale,
decoration: BoxDecoration(
shape: BoxShape.circle,
border: Border.all(
@@ -178,11 +198,13 @@ class _VoiceModeOverlayState extends State<VoiceModeOverlay> with SingleTickerPr
builder: (context, child) {
return Container(
width: 4,
height: _isMicMuted
? 4
: 10 + (20 * (index % 2 == 0 ? _controller.value : 1 - _controller.value)),
height: widget.controller.state == VoiceState.speaking
? 10 + (30 * (index % 2 == 0 ? _controller.value : 1 - _controller.value)) // Active wave
: widget.controller.state == VoiceState.processing
? 8 + (5 * (index % 2 == 0 ? _controller.value : 1 - _controller.value)) // Thinking wave
: 4, // Idle
decoration: BoxDecoration(
color: Colors.white.withOpacity(_isMicMuted ? 0.2 : 0.8),
color: Colors.white.withOpacity(widget.controller.state != VoiceState.listening ? 0.8 : 0.4),
borderRadius: BorderRadius.circular(2),
),
);
@@ -203,14 +225,14 @@ class _VoiceModeOverlayState extends State<VoiceModeOverlay> with SingleTickerPr
children: [
// Mic Toggle
IconButton(
onPressed: () => setState(() => _isMicMuted = !_isMicMuted),
icon: Icon(_isMicMuted ? LucideIcons.micOff : LucideIcons.mic),
onPressed: widget.controller.toggleMic,
icon: Icon(widget.controller.isMicMuted ? LucideIcons.micOff : LucideIcons.mic),
iconSize: 24,
style: IconButton.styleFrom(
backgroundColor: _isMicMuted
backgroundColor: widget.controller.isMicMuted
? Colors.white
: Colors.white.withOpacity(0.1),
foregroundColor: _isMicMuted
foregroundColor: widget.controller.isMicMuted
? const Color(0xFF2E1065)
: Colors.white,
padding: const EdgeInsets.all(16),
@@ -231,20 +253,14 @@ class _VoiceModeOverlayState extends State<VoiceModeOverlay> with SingleTickerPr
),
),
// Speaker Toggle
// Speaker Toggle - Placeholder for now
IconButton(
onPressed: () => setState(() => _isSpeakerOn = !_isSpeakerOn),
icon: Icon(
_isSpeakerOn ? LucideIcons.volume2 : LucideIcons.volumeX
),
onPressed: () {}, // TODO: Implement speaker toggle in controller
icon: const Icon(LucideIcons.volume2),
iconSize: 24,
style: IconButton.styleFrom(
backgroundColor: _isSpeakerOn
? Colors.white.withOpacity(0.1)
: Colors.white.withOpacity(0.05),
foregroundColor: _isSpeakerOn
? Colors.white
: Colors.white.withOpacity(0.5),
backgroundColor: Colors.white.withOpacity(0.1),
foregroundColor: Colors.white,
padding: const EdgeInsets.all(16),
minimumSize: const Size(64, 64),
),
@@ -261,4 +277,25 @@ class _VoiceModeOverlayState extends State<VoiceModeOverlay> with SingleTickerPr
),
);
}
String _getStatusText() {
if (widget.controller.isMicMuted) return 'Microphone Muted';
switch (widget.controller.state) {
case VoiceState.listening:
if (widget.controller.recognizedText.isNotEmpty) {
// Show last few words of what user said
String text = widget.controller.recognizedText;
if (text.length > 20) text = '...${text.substring(text.length - 20)}';
return text;
}
return 'Listening...';
case VoiceState.processing:
return 'Thinking...';
case VoiceState.speaking:
return 'Speaking...';
case VoiceState.error:
return 'Error';
}
}
}

View File

@@ -0,0 +1,303 @@
import 'dart:async';
import 'package:flutter/foundation.dart';
import '../../core/core.dart';
import 'package:permission_handler/permission_handler.dart';
enum VoiceState {
listening, // Waiting for user input
processing, // Sending to AI
speaking, // AI is talking
error,
}
class VoiceSessionController extends ChangeNotifier {
final CharacterModel character;
final Function(String) onUserMessage;
final Function(ChatMessage) onAiMessage;
// Speaking Queue
final List<String> _speakQueue = [];
bool _isSpeaking = false;
Timer? _silenceTimer;
VoiceState _state = VoiceState.listening;
String _recognizedText = '';
String _aiTypingText = '';
bool _isMicMuted = false;
// Services
final STTService _stt = STTService();
final TTSService _tts = TTSService();
// State getters
VoiceState get state => _state;
String get recognizedText => _recognizedText;
String get aiTypingText => _aiTypingText;
bool get isMicMuted => _isMicMuted;
// Buffer for sentence completion
String _sentenceBuffer = '';
final List<String> _punctuation = ['', '', '', '.', '?', '!', '\n'];
VoiceSessionController({
required this.character,
required this.onUserMessage,
required this.onAiMessage,
}) {
_init();
}
Future<void> _init() async {
// Request permissions
await [Permission.microphone, Permission.speech].request();
// Init services
await _stt.init();
await _tts.init();
// Setup TTS callbacks
_tts.setStartHandler(() {
debugPrint('🔊 TTS Started');
// Already paused STT in _processSpeakQueue
});
_tts.setCompletionHandler(() {
debugPrint('✅ TTS Completed');
_isSpeaking = false;
_processSpeakQueue(); // Play next
});
_tts.setErrorHandler((msg) {
debugPrint('❌ TTS Error: $msg');
_isSpeaking = false;
_processSpeakQueue();
});
// Start listening immediately
if (!_isMicMuted) {
startListening();
}
}
void toggleMic() {
_isMicMuted = !_isMicMuted;
if (_isMicMuted) {
stopListening();
} else {
if (_state == VoiceState.listening) {
startListening();
}
}
notifyListeners();
}
Future<void> startListening() async {
if (_isMicMuted) return;
_state = VoiceState.listening;
_recognizedText = '';
_lastProcessedLength = 0;
notifyListeners();
// Stop TTS if it's playing (Interruption)
if (_isSpeaking || _speakQueue.isNotEmpty) {
_speakQueue.clear();
await _tts.stop();
_isSpeaking = false;
}
await _stt.listen(
onResult: (text) {
_recognizedText = text;
notifyListeners();
},
onFinalResult: (text) {
_recognizedText = text;
notifyListeners();
_processUserMessage(text);
},
localeId: 'zh-CN', // Make dynamic later if needed
);
}
Future<void> stopListening() async {
await _stt.stop();
}
Future<void> _processUserMessage(String text) async {
if (text.trim().isEmpty) {
// If empty, just listen again
startListening();
return;
}
_state = VoiceState.processing;
onUserMessage(text); // Notify UI to show user message
notifyListeners();
// Construct history for context (simplified for now, ideally pass full history)
// We will rely on ChatService to handle the full history if we pass the latest message
// But ChatService needs the list. For the voice mode, let's assume interacting adds to DB
// and we might need to fetch fresh context or pass it in.
// Ideally, the InteractionScreen manages the source of truth for messages.
// Here we'll just send the text to prompt the AI.
// Construct history
final session = await ChatStorageService.getSession(character.id);
var messages = session.messages;
// ChatService.sendMessage appends the userMessage automatically.
// We need to ensure 'messages' doesn't already contain it (if DB write was fast).
if (messages.isNotEmpty) {
final lastMsg = messages.last;
if (lastMsg.isUser && lastMsg.content == text) {
messages = List.from(messages)..removeLast();
}
}
_aiTypingText = '';
_sentenceBuffer = '';
_lastProcessedLength = 0;
try {
final fullResponse = await ChatService.sendMessage(
character: character,
messages: messages,
userMessage: text, // ChatService handles appending this if we use the right method
onStream: (content) {
_aiTypingText = content;
_processStreamChunk(content);
notifyListeners();
},
);
// Process any remaining text in buffer
if (_sentenceBuffer.isNotEmpty) {
if (_state != VoiceState.speaking) {
_state = VoiceState.speaking;
notifyListeners();
}
await _speak(_sentenceBuffer);
}
// Interaction finished, save AI message
final aiMsg = ChatMessage.assistant(fullResponse);
onAiMessage(aiMsg);
// Note: We do NOT immediately startListening here.
// We rely on the TTS Completion Handler to trigger startListening
// when the entire queue is drained.
} catch (e) {
debugPrint('❌ Voice Process Error: $e');
_state = VoiceState.error;
notifyListeners();
// Retry listening after error
Future.delayed(const Duration(seconds: 2), startListening);
}
}
// Better implementation needs to handle state to avoid infinite loops
int _lastProcessedLength = 0;
void _processStreamChunk(String content) {
if (_state != VoiceState.speaking) {
_state = VoiceState.speaking;
notifyListeners();
}
// Calculate delta (new content only)
if (content.length <= _lastProcessedLength) return;
String delta = content.substring(_lastProcessedLength);
_lastProcessedLength = content.length;
_sentenceBuffer += delta;
// Check for punctuation to split sentences
bool foundPunctuation = false;
for (var p in _punctuation) {
if (_sentenceBuffer.contains(p)) {
foundPunctuation = true;
break;
}
}
if (foundPunctuation) {
_processBufferForSentences();
}
}
void _processBufferForSentences() {
String tempBuffer = _sentenceBuffer;
String keepBuffer = '';
// Simple tokenizer: split by punctuation but keep the punctuation attached to the sentence
// This is a naive implementation.
// "Hello! How are you?" -> ["Hello!", "How are you?"]
// We iterate through chars to find split points
int lastSplitIndex = 0;
for (int i = 0; i < tempBuffer.length; i++) {
String char = tempBuffer[i];
if (_punctuation.contains(char)) {
// Found end of a sentence
String sentence = tempBuffer.substring(lastSplitIndex, i + 1);
if (sentence.trim().isNotEmpty) {
_speak(sentence);
}
lastSplitIndex = i + 1;
}
}
// Keep the remaining part that didn't end with punctuation
if (lastSplitIndex < tempBuffer.length) {
keepBuffer = tempBuffer.substring(lastSplitIndex);
}
_sentenceBuffer = keepBuffer;
}
Future<void> _speak(String text) async {
// If we are listening (interrupted), ignore (or should check state)
if (_state == VoiceState.listening) return;
_speakQueue.add(text);
_processSpeakQueue();
}
void _processSpeakQueue() async {
if (_isSpeaking) return;
if (_speakQueue.isEmpty) {
// All done speaking (or no TTS was produced)
if (_state == VoiceState.speaking || _state == VoiceState.processing) {
debugPrint('🎤 Queue empty, resuming listening...');
_state = VoiceState.listening;
notifyListeners();
// Debounce STT restart to avoid rapid stop/start deadlocks
_silenceTimer?.cancel();
_silenceTimer = Timer(const Duration(milliseconds: 250), () {
startListening();
});
}
return;
}
// Pop first
String text = _speakQueue.removeAt(0);
_isSpeaking = true;
// Ensure STT is paused while speaking
await stopListening();
await _tts.speak(text);
}
@override
void dispose() {
_stt.stop();
_tts.stop();
super.dispose();
}
}