Files
app/wei_ai_app/lib/screens/interaction/voice_session_controller.dart
2026-02-03 21:41:25 +08:00

304 lines
8.3 KiB
Dart
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import 'dart:async';
import 'package:flutter/foundation.dart';
import '../../core/core.dart';
import 'package:permission_handler/permission_handler.dart';
enum VoiceState {
listening, // Waiting for user input
processing, // Sending to AI
speaking, // AI is talking
error,
}
class VoiceSessionController extends ChangeNotifier {
final CharacterModel character;
final Function(String) onUserMessage;
final Function(ChatMessage) onAiMessage;
// Speaking Queue
final List<String> _speakQueue = [];
bool _isSpeaking = false;
Timer? _silenceTimer;
VoiceState _state = VoiceState.listening;
String _recognizedText = '';
String _aiTypingText = '';
bool _isMicMuted = false;
// Services
final STTService _stt = STTService();
final TTSService _tts = TTSService();
// State getters
VoiceState get state => _state;
String get recognizedText => _recognizedText;
String get aiTypingText => _aiTypingText;
bool get isMicMuted => _isMicMuted;
// Buffer for sentence completion
String _sentenceBuffer = '';
final List<String> _punctuation = ['', '', '', '.', '?', '!', '\n'];
VoiceSessionController({
required this.character,
required this.onUserMessage,
required this.onAiMessage,
}) {
_init();
}
Future<void> _init() async {
// Request permissions
await [Permission.microphone, Permission.speech].request();
// Init services
await _stt.init();
await _tts.init();
// Setup TTS callbacks
_tts.setStartHandler(() {
debugPrint('🔊 TTS Started');
// Already paused STT in _processSpeakQueue
});
_tts.setCompletionHandler(() {
debugPrint('✅ TTS Completed');
_isSpeaking = false;
_processSpeakQueue(); // Play next
});
_tts.setErrorHandler((msg) {
debugPrint('❌ TTS Error: $msg');
_isSpeaking = false;
_processSpeakQueue();
});
// Start listening immediately
if (!_isMicMuted) {
startListening();
}
}
void toggleMic() {
_isMicMuted = !_isMicMuted;
if (_isMicMuted) {
stopListening();
} else {
if (_state == VoiceState.listening) {
startListening();
}
}
notifyListeners();
}
Future<void> startListening() async {
if (_isMicMuted) return;
_state = VoiceState.listening;
_recognizedText = '';
_lastProcessedLength = 0;
notifyListeners();
// Stop TTS if it's playing (Interruption)
if (_isSpeaking || _speakQueue.isNotEmpty) {
_speakQueue.clear();
await _tts.stop();
_isSpeaking = false;
}
await _stt.listen(
onResult: (text) {
_recognizedText = text;
notifyListeners();
},
onFinalResult: (text) {
_recognizedText = text;
notifyListeners();
_processUserMessage(text);
},
localeId: 'zh-CN', // Make dynamic later if needed
);
}
Future<void> stopListening() async {
await _stt.stop();
}
Future<void> _processUserMessage(String text) async {
if (text.trim().isEmpty) {
// If empty, just listen again
startListening();
return;
}
_state = VoiceState.processing;
onUserMessage(text); // Notify UI to show user message
notifyListeners();
// Construct history for context (simplified for now, ideally pass full history)
// We will rely on ChatService to handle the full history if we pass the latest message
// But ChatService needs the list. For the voice mode, let's assume interacting adds to DB
// and we might need to fetch fresh context or pass it in.
// Ideally, the InteractionScreen manages the source of truth for messages.
// Here we'll just send the text to prompt the AI.
// Construct history
final session = await ChatStorageService.getSession(character.id);
var messages = session.messages;
// ChatService.sendMessage appends the userMessage automatically.
// We need to ensure 'messages' doesn't already contain it (if DB write was fast).
if (messages.isNotEmpty) {
final lastMsg = messages.last;
if (lastMsg.isUser && lastMsg.content == text) {
messages = List.from(messages)..removeLast();
}
}
_aiTypingText = '';
_sentenceBuffer = '';
_lastProcessedLength = 0;
try {
final fullResponse = await ChatService.sendMessage(
character: character,
messages: messages,
userMessage: text, // ChatService handles appending this if we use the right method
onStream: (content) {
_aiTypingText = content;
_processStreamChunk(content);
notifyListeners();
},
);
// Process any remaining text in buffer
if (_sentenceBuffer.isNotEmpty) {
if (_state != VoiceState.speaking) {
_state = VoiceState.speaking;
notifyListeners();
}
await _speak(_sentenceBuffer);
}
// Interaction finished, save AI message
final aiMsg = ChatMessage.assistant(fullResponse);
onAiMessage(aiMsg);
// Note: We do NOT immediately startListening here.
// We rely on the TTS Completion Handler to trigger startListening
// when the entire queue is drained.
} catch (e) {
debugPrint('❌ Voice Process Error: $e');
_state = VoiceState.error;
notifyListeners();
// Retry listening after error
Future.delayed(const Duration(seconds: 2), startListening);
}
}
// Better implementation needs to handle state to avoid infinite loops
int _lastProcessedLength = 0;
void _processStreamChunk(String content) {
if (_state != VoiceState.speaking) {
_state = VoiceState.speaking;
notifyListeners();
}
// Calculate delta (new content only)
if (content.length <= _lastProcessedLength) return;
String delta = content.substring(_lastProcessedLength);
_lastProcessedLength = content.length;
_sentenceBuffer += delta;
// Check for punctuation to split sentences
bool foundPunctuation = false;
for (var p in _punctuation) {
if (_sentenceBuffer.contains(p)) {
foundPunctuation = true;
break;
}
}
if (foundPunctuation) {
_processBufferForSentences();
}
}
void _processBufferForSentences() {
String tempBuffer = _sentenceBuffer;
String keepBuffer = '';
// Simple tokenizer: split by punctuation but keep the punctuation attached to the sentence
// This is a naive implementation.
// "Hello! How are you?" -> ["Hello!", "How are you?"]
// We iterate through chars to find split points
int lastSplitIndex = 0;
for (int i = 0; i < tempBuffer.length; i++) {
String char = tempBuffer[i];
if (_punctuation.contains(char)) {
// Found end of a sentence
String sentence = tempBuffer.substring(lastSplitIndex, i + 1);
if (sentence.trim().isNotEmpty) {
_speak(sentence);
}
lastSplitIndex = i + 1;
}
}
// Keep the remaining part that didn't end with punctuation
if (lastSplitIndex < tempBuffer.length) {
keepBuffer = tempBuffer.substring(lastSplitIndex);
}
_sentenceBuffer = keepBuffer;
}
Future<void> _speak(String text) async {
// If we are listening (interrupted), ignore (or should check state)
if (_state == VoiceState.listening) return;
_speakQueue.add(text);
_processSpeakQueue();
}
void _processSpeakQueue() async {
if (_isSpeaking) return;
if (_speakQueue.isEmpty) {
// All done speaking (or no TTS was produced)
if (_state == VoiceState.speaking || _state == VoiceState.processing) {
debugPrint('🎤 Queue empty, resuming listening...');
_state = VoiceState.listening;
notifyListeners();
// Debounce STT restart to avoid rapid stop/start deadlocks
_silenceTimer?.cancel();
_silenceTimer = Timer(const Duration(milliseconds: 250), () {
startListening();
});
}
return;
}
// Pop first
String text = _speakQueue.removeAt(0);
_isSpeaking = true;
// Ensure STT is paused while speaking
await stopListening();
await _tts.speak(text);
}
@override
void dispose() {
_stt.stop();
_tts.stop();
super.dispose();
}
}