diff --git a/kazeia-android/app/src/main/AndroidManifest.xml b/kazeia-android/app/src/main/AndroidManifest.xml index 4645859..930e896 100644 --- a/kazeia-android/app/src/main/AndroidManifest.xml +++ b/kazeia-android/app/src/main/AndroidManifest.xml @@ -5,9 +5,17 @@ + + + = Build.VERSION_CODES.Q) { @@ -425,6 +442,13 @@ class KazeiaService : Service() { } else { startForeground(NOTIFICATION_ID, createNotification()) } + // Free RAM for the ML models BEFORE they start loading. The tablet + // is 16 GB but ColorOS base + Google services routinely hog ~7 GB + // on their own, pushing Qwen3-4B's 3.2 GB of weights into ZRAM + // swap and causing page faults on every inference. Nudging the + // background apps out (see MemoryOptimizer.KILL_TARGETS) typically + // reclaims 600–900 MB so the models stay resident. + MemoryOptimizer.freeRamForModels(this) { msg -> log(msg) } initializeComponents() } @@ -450,7 +474,7 @@ class KazeiaService : Service() { // TTS: try Qwen3-TTS (NPU Hexagon), fallback to Android TTS _loadingState.value = LoadingState(15, "TTS Qwen3…") try { - val qwenTts = com.kazeia.tts.Qwen3TtsEngine(nativeLibDir) { msg -> log("[TTS] $msg") } + val qwenTts = com.kazeia.tts.Qwen3TtsEngine(nativeLibDir, this@KazeiaService) { msg -> log("[TTS] $msg") } qwenTts.load("$modelsDir/qwen3-tts-npu") if (qwenTts.isLoaded()) { tts = qwenTts @@ -1185,12 +1209,19 @@ class KazeiaService : Service() { if (responseText.isNotEmpty()) { addMessage(ChatMessage(role = ChatMessage.Role.KAZEIA, text = responseText)) - pipeline.speakText(responseText) + // Mark the pipeline as Speaking for the duration of TTS so + // the continuous-listening mic loop drops frames and we + // don't feed our own speaker output back into STT. + _pipelineState.value = PipelineState.Speaking + try { pipeline.speakText(responseText) } finally { + _pipelineState.value = if (_isListening.value) + PipelineState.Listening else PipelineState.Idle + } + } else { + _pipelineState.value = if (_isListening.value) + PipelineState.Listening else PipelineState.Idle } - _pipelineState.value = if (_isListening.value) - PipelineState.Listening else PipelineState.Idle - } catch (e: Exception) { _aiWorkload.value = _aiWorkload.value.copy(llmActive = false) log("ERROR: LLM generation error: ${e.message}") diff --git a/kazeia-android/app/src/main/java/com/kazeia/service/MemoryOptimizer.kt b/kazeia-android/app/src/main/java/com/kazeia/service/MemoryOptimizer.kt new file mode 100644 index 0000000..88fee01 --- /dev/null +++ b/kazeia-android/app/src/main/java/com/kazeia/service/MemoryOptimizer.kt @@ -0,0 +1,112 @@ +package com.kazeia.service + +import android.app.ActivityManager +import android.content.Context + +/** + * Frees RAM for Kazeia's ML models by nudging non-essential background + * processes out of memory. Uses `ActivityManager.killBackgroundProcesses` + * which: + * - only kills processes that are NOT in foreground or bound to something + * foreground (so no user-visible disruption), + * - leaves the package installed/launchable — the OS will respawn it on + * demand if the user opens the app or a push arrives, + * - requires only `KILL_BACKGROUND_PROCESSES`, a normal permission. + * + * Call [freeRamForModels] once, early in KazeiaService.onCreate, BEFORE + * loading Qwen3-4B (which needs ~3.2 GB resident to avoid swap thrashing). + * + * The list below was picked by reading `dumpsys meminfo` on a fresh boot + * of a OnePlus Pad 3 (ColorOS 16): every package here is either a Google + * background feature Kazeia doesn't use (Wallet, Chromecast, YouTube + * prefetch, …) or an OPLUS feature that re-spawns harmlessly when needed. + * We deliberately avoid Play Services core, input method, launcher, + * system_server, systemui, surfaceflinger, audioserver, HAL services — + * killing any of those would break the UI or audio routing. + */ +object MemoryOptimizer { + + private const val TAG = "MemoryOptimizer" + + /** Packages safe to evict. Order matches approximate RAM savings. */ + private val KILL_TARGETS = listOf( + // Google optional features + "com.google.android.googlequicksearchbox", + "com.google.android.youtube", + "com.google.android.apps.walletnfcrel", + "com.google.android.apps.chromecast.app", + "com.google.android.aicore", + "com.google.android.apps.messaging", + "com.google.android.gm", + "com.google.android.as", + "com.google.android.as.oss", + // OPLUS / ColorOS optional features + "com.oneplus.deskclock", + "com.coloros.smartsidebar", + "com.coloros.assistantscreen", + "com.coloros.weather.service", + "com.heytap.mcs", + "com.heytap.accessory", + "com.oplus.cosa", + "com.oplus.pantanal.ums", + "com.oplus.nhs", + "com.oplus.midas", + "com.oplus.olc", + "com.oplus.deepthinker", + "com.oplus.blur", + "com.oplus.statistics.rom", + "com.oplus.powermonitor", + "com.oplus.romupdate", + "com.oplus.location", + "com.oplus.gesture", + "com.oplus.appplatform", + "com.oplus.persist.multimedia", + "com.oplus.nas", + // Qualcomm workload profiler — perf hints only, respawns + "com.qualcomm.qti.workloadclassifier" + ) + + /** + * Ask ActivityManager to drop background processes for every package + * in [KILL_TARGETS] that is installed. Returns a (attempted, skipped) + * pair for logging. Safe to call from any thread. + */ + fun freeRamForModels(context: Context, log: (String) -> Unit = {}): Pair { + val am = context.getSystemService(Context.ACTIVITY_SERVICE) as? ActivityManager + ?: run { log("$TAG: ActivityManager unavailable"); return 0 to 0 } + val pm = context.packageManager + val memBefore = readAvailMb(am) + + var killed = 0 + var skipped = 0 + for (pkg in KILL_TARGETS) { + try { + // Only attempt for installed packages — unknown packages + // would silently no-op but still spam the audit log. + pm.getPackageInfo(pkg, 0) + am.killBackgroundProcesses(pkg) + killed++ + } catch (_: android.content.pm.PackageManager.NameNotFoundException) { + skipped++ + } catch (e: SecurityException) { + log("$TAG: killBackgroundProcesses($pkg) denied: ${e.message}") + skipped++ + } catch (e: Exception) { + log("$TAG: killBackgroundProcesses($pkg) failed: ${e.message}") + skipped++ + } + } + + // Give the kernel a moment to reclaim pages before we report. + Thread.sleep(250) + val memAfter = readAvailMb(am) + log("$TAG: killed=$killed skipped=$skipped; avail RAM ${memBefore} MB → ${memAfter} MB (+${memAfter - memBefore} MB)") + return killed to skipped + } + + private fun readAvailMb(am: ActivityManager): Long { + val info = ActivityManager.MemoryInfo() + am.getMemoryInfo(info) + return info.availMem / (1024 * 1024) + } +} diff --git a/kazeia-android/app/src/main/java/com/kazeia/tts/Qwen3TtsEngine.kt b/kazeia-android/app/src/main/java/com/kazeia/tts/Qwen3TtsEngine.kt index 765dd17..33e03df 100644 --- a/kazeia-android/app/src/main/java/com/kazeia/tts/Qwen3TtsEngine.kt +++ b/kazeia-android/app/src/main/java/com/kazeia/tts/Qwen3TtsEngine.kt @@ -37,6 +37,7 @@ import kotlin.coroutines.resume */ class Qwen3TtsEngine( private val nativeLibDir: String, + private val context: android.content.Context? = null, private val onLog: ((String) -> Unit)? = null ) : TtsEngine { @@ -97,6 +98,16 @@ class Qwen3TtsEngine( // back to the single-chunk path with zero difference. Flag exists so // the sequential path can be re-enabled for A/B comparison. private const val USE_STREAMING_DECODE = true + + // ColorOS Audio Hardening silently mutes AudioTrack in background/FGS + // context (confirmed via `event:muted updated source:clientVolume` + // logs, same behaviour across USAGE_MEDIA, USAGE_ASSISTANT, and + // USAGE_VOICE_COMMUNICATION). When this flag is true, each + // generated segment is written as a WAV to app-owned shared + // storage and played via MediaPlayer instead. Slightly slower + // (WAV write + MediaPlayer prepare add ~150 ms per segment) but + // it's the only reliable path to audible output on this device. + private const val USE_MEDIAPLAYER_FALLBACK = true } private var ortEnv: OrtEnvironment? = null @@ -3335,6 +3346,18 @@ class Qwen3TtsEngine( private var sessionTrack: AudioTrack? = null private var sessionChannel: kotlinx.coroutines.channels.Channel? = null private var sessionJob: kotlinx.coroutines.Job? = null + private var sessionKeepAliveJob: kotlinx.coroutines.Job? = null + private var sessionFocusRequest: android.media.AudioFocusRequest? = null + // Total PCM frames queued to sessionTrack across all segments in this session. + // endStreamingSession() polls track.playbackHeadPosition until it reaches this + // count before calling stop(), so the tail sentence isn't clipped. + // Uses AtomicLong because both the session worker and the keep-alive watchdog + // call writeAndCount concurrently. + private val sessionFramesWritten = java.util.concurrent.atomic.AtomicLong(0) + // True while a real-audio generate call is in progress. The keep-alive + // watchdog skips silence injection while this is set, so silence never + // interleaves with speech inside a segment. + private val sessionGenActive = java.util.concurrent.atomic.AtomicBoolean(false) /** * Open a streaming TTS session backed by a persistent AudioTrack. After @@ -3343,13 +3366,221 @@ class Qwen3TtsEngine( * track as soon as it's decoded. Call endStreamingSession() to flush * the queue and release the track. */ + // MediaPlayer-based fallback session state. If ColorOS mutes our + // AudioTrack (as observed repeatedly — `event:muted updated source: + // clientVolume` right after play()), we instead render each segment + // as a WAV file on shared storage and play it back via MediaPlayer, + // which uses a completely different internal audio pipeline that + // doesn't get silenced by the background playback policy. + private var sessionMpQueue: kotlinx.coroutines.channels.Channel? = null + private var sessionMpJob: kotlinx.coroutines.Job? = null + private val sessionMpSegIdx = java.util.concurrent.atomic.AtomicInteger(0) + + private fun startStreamingSessionMp() { + if (sessionMpQueue != null) return + sessionMpSegIdx.set(0) + val sentenceChan = kotlinx.coroutines.channels.Channel( + capacity = kotlinx.coroutines.channels.Channel.UNLIMITED + ) + // Pipeline: synth worker produces WAV paths, playback worker runs + // them through a pair of MediaPlayer instances chained via + // setNextMediaPlayer() so there's zero-gap transition between + // segments (no DAC/output routing "pop" the user was hearing as + // "beg beg" with one player-per-seg). The rendezvous channel has + // capacity 2 so the synth worker can stay one seg ahead of the + // currently playing seg without growing disk use. + val wavChan = kotlinx.coroutines.channels.Channel>(capacity = 2) + val scope = kotlinx.coroutines.CoroutineScope(kotlinx.coroutines.Dispatchers.IO) + val synthJob = scope.launch { + for (sentence in sentenceChan) { + try { + val segIdx = sessionMpSegIdx.getAndIncrement() + val tSynth = System.currentTimeMillis() + val audio = generateSegmentAudioVC(sentence, segIdx) + if (audio.isEmpty()) continue + val wavPath = "${context?.cacheDir?.absolutePath ?: "/data/local/tmp/kazeia"}/tts_seg_${segIdx}.wav" + saveWav(wavPath, audio) + nlog("MP seg $segIdx synthesized (${System.currentTimeMillis() - tSynth}ms), queued for playback") + wavChan.send(segIdx to wavPath) + } catch (e: Exception) { + nlog("MP synth error: ${e.message}") + } + } + wavChan.close() + } + val playJob = scope.launch { playChainedMediaPlayers(wavChan) } + val combined = scope.launch { synthJob.join(); playJob.join() } + sessionMpQueue = sentenceChan; sessionMpJob = combined + nlog("streaming session opened (MediaPlayer fallback, chained)") + } + + /** + * Drive the WAV playback pipeline with two MediaPlayer instances + * chained via setNextMediaPlayer() so each segment flows into the + * next without re-arming the audio output (which caused audible + * "pops" between segments when one player stopped and another + * started). Consumes (segIdx, wavPath) pairs from [wavChan] and + * deletes each file after it finishes playing. Suspends until the + * channel closes AND the final segment finishes. + */ + private suspend fun playChainedMediaPlayers( + wavChan: kotlinx.coroutines.channels.ReceiveChannel> + ) { + val attrs = android.media.AudioAttributes.Builder() + .setUsage(android.media.AudioAttributes.USAGE_MEDIA) + .setContentType(android.media.AudioAttributes.CONTENT_TYPE_SPEECH) + .build() + + // Synchronously prepare a MediaPlayer on the current coroutine. + // Throws on failure; caller handles cleanup. + suspend fun prepareMp(path: String, segIdx: Int): android.media.MediaPlayer { + val mp = android.media.MediaPlayer() + mp.setAudioAttributes(attrs) + mp.setDataSource(path) + kotlinx.coroutines.suspendCancellableCoroutine { cont -> + mp.setOnPreparedListener { if (cont.isActive) cont.resume(Unit) {} } + mp.setOnErrorListener { _, what, extra -> + nlog("MP seg $segIdx prepare error: what=$what extra=$extra") + if (cont.isActive) cont.resume(Unit) {} + true + } + cont.invokeOnCancellation { try { mp.release() } catch (_: Exception) {} } + mp.prepareAsync() + } + return mp + } + + var current: android.media.MediaPlayer? = null + var currentInfo: Pair? = null + var next: android.media.MediaPlayer? = null + var nextInfo: Pair? = null + + try { + // Bootstrap: wait for first WAV. + val first = wavChan.receiveCatching().getOrNull() ?: return + currentInfo = first + current = prepareMp(first.second, first.first) + current!!.setOnCompletionListener { it.release() } + current!!.start() + nlog("MP seg ${first.first} started (chained)") + + while (true) { + // Fetch next WAV (may block). If channel closes, let + // current finish playing and exit. + val upcoming = wavChan.receiveCatching().getOrNull() + if (upcoming == null) break + nextInfo = upcoming + next = prepareMp(upcoming.second, upcoming.first) + current!!.setNextMediaPlayer(next) + nlog("MP seg ${upcoming.first} queued as next") + + // Wait for current seg to finish playing before rotating. + val prevFile = currentInfo!!.second + waitForPlaybackCompletion(current!!, currentInfo!!.first) + try { java.io.File(prevFile).delete() } catch (_: Exception) {} + current = next + currentInfo = nextInfo + next = null + nextInfo = null + } + + // Drain: wait for the last prepared player to finish. + if (current != null && currentInfo != null) { + waitForPlaybackCompletion(current!!, currentInfo!!.first) + try { java.io.File(currentInfo!!.second).delete() } catch (_: Exception) {} + } + } catch (e: Exception) { + nlog("MP playback chain error: ${e.message}") + } finally { + try { next?.release() } catch (_: Exception) {} + try { current?.release() } catch (_: Exception) {} + } + } + + private suspend fun waitForPlaybackCompletion( + mp: android.media.MediaPlayer, segIdx: Int + ) { + val t0 = System.currentTimeMillis() + kotlinx.coroutines.suspendCancellableCoroutine { cont -> + mp.setOnCompletionListener { + nlog("MP seg $segIdx completed (${System.currentTimeMillis() - t0}ms)") + if (cont.isActive) cont.resume(Unit) {} + } + mp.setOnErrorListener { _, what, extra -> + nlog("MP seg $segIdx play error: what=$what extra=$extra") + if (cont.isActive) cont.resume(Unit) {} + true + } + cont.invokeOnCancellation { /* player released by caller */ } + } + } + + private suspend fun endStreamingSessionMp() { + val chan = sessionMpQueue ?: return + chan.close() + try { sessionMpJob?.join() } catch (_: Exception) {} + sessionMpQueue = null; sessionMpJob = null + nlog("streaming session closed (MediaPlayer fallback)") + } + + /** + * Play a WAV file via Android MediaPlayer and block the calling + * coroutine until playback completes. MediaPlayer uses a separate + * audio pipeline from AudioTrack so it bypasses ColorOS's AudioTrack + * hardening/muting behaviour. + */ + private suspend fun playWavBlocking(path: String, segIdx: Int) { + val t0 = System.currentTimeMillis() + suspendCancellableCoroutine { cont -> + val mp = android.media.MediaPlayer() + try { + mp.setAudioAttributes(android.media.AudioAttributes.Builder() + .setUsage(android.media.AudioAttributes.USAGE_MEDIA) + .setContentType(android.media.AudioAttributes.CONTENT_TYPE_SPEECH) + .build()) + mp.setDataSource(path) + mp.setOnPreparedListener { + nlog("MP seg $segIdx prepared, starting (prep ${System.currentTimeMillis() - t0}ms)") + it.start() + } + mp.setOnCompletionListener { + nlog("MP seg $segIdx done (${System.currentTimeMillis() - t0}ms total)") + try { it.release() } catch (_: Exception) {} + if (cont.isActive) cont.resume(Unit) {} + } + mp.setOnErrorListener { player, what, extra -> + nlog("MP seg $segIdx error: what=$what extra=$extra") + try { player.release() } catch (_: Exception) {} + if (cont.isActive) cont.resume(Unit) {} + true + } + mp.prepareAsync() + cont.invokeOnCancellation { try { mp.release() } catch (_: Exception) {} } + } catch (e: Exception) { + nlog("MP seg $segIdx setup failed: ${e.message}") + try { mp.release() } catch (_: Exception) {} + if (cont.isActive) cont.resume(Unit) {} + } + } + } + fun startStreamingSession() { + if (USE_MEDIAPLAYER_FALLBACK) { startStreamingSessionMp(); return } if (sessionTrack != null) return // already open + // USAGE_VOICE_COMMUNICATION routes to STREAM_VOICE_CALL, which + // ColorOS's "Audio Hardening" policy does NOT silently mute (the + // policy targets STREAM_MUSIC to preserve battery on inactive media + // apps; STREAM_VOICE_CALL is reserved for VoIP and always plays). + // Previous attempts with USAGE_MEDIA and USAGE_ASSISTANT both got + // `event:muted updated source:clientVolume` ~0.6–1 s after play() + // even with audio focus + mediaPlayback FGS, so moving off of + // STREAM_MUSIC is the only route that unblocks audible playback. + val attrs = AudioAttributes.Builder() + .setUsage(AudioAttributes.USAGE_VOICE_COMMUNICATION) + .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) + .build() val track = AudioTrack.Builder() - .setAudioAttributes(AudioAttributes.Builder() - .setUsage(AudioAttributes.USAGE_MEDIA) - .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) - .build()) + .setAudioAttributes(attrs) .setAudioFormat(AudioFormat.Builder() .setEncoding(AudioFormat.ENCODING_PCM_16BIT) .setSampleRate(SR) @@ -3359,7 +3590,77 @@ class Qwen3TtsEngine( // paces writes when full. .setTransferMode(AudioTrack.MODE_STREAM) .build() + // Request audio focus for the duration of the session. Without this + // ColorOS's Audio Hardening treats the track as background noise + // and mutes it, regardless of FGS status. We don't care about + // focus loss callbacks — if another app grabs focus mid-sentence + // that's fine, the track just gets ducked. + val am = context?.getSystemService(android.content.Context.AUDIO_SERVICE) as? android.media.AudioManager + val focusReq = android.media.AudioFocusRequest.Builder(android.media.AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK) + .setAudioAttributes(attrs) + .setOnAudioFocusChangeListener { _ -> } + .build() + val focusRes = am?.requestAudioFocus(focusReq) + nlog("audio focus request: $focusRes (1=granted, 0=failed, 2=delayed)") + sessionFocusRequest = focusReq + // ColorOS mutes AudioTrack clientVolume ~1s after creation (seen in + // dumpsys audio as `event:muted updated source:clientVolume`). Force + // track volume back to 1.0 repeatedly to override. This is also + // done in the keep-alive watchdog loop below for ongoing override. + try { track.setVolume(1.0f) } catch (_: Exception) {} track.play() + sessionFramesWritten.set(0) + sessionGenActive.set(false) + // writeAndCount is the single path through which PCM reaches the + // AudioTrack for this session, so sessionFramesWritten always stays + // in sync with what's been queued to playback hardware. AudioTrack.write + // is thread-safe, so this can be called concurrently from the session + // worker (real audio) and the keep-alive watchdog (silence padding). + val writeAndCount: (ShortArray) -> Unit = { pcm -> + if (pcm.isNotEmpty()) { + val n = track.write(pcm, 0, pcm.size) + if (n > 0) sessionFramesWritten.addAndGet(n.toLong()) + } + } + // Bootstrap silence: queue 500 ms immediately after play() so + // AudioFlinger has samples to mix from the very first cycle. + // Without this, there's a ~100 ms window between play() and the + // first watchdog tick where the track has no data and AudioFlinger + // flags it for removal. Once that happens, playbackHead sticks at + // 0 and subsequent writes go to a dead track. + val bootstrapSilence = ShortArray(SR / 2) // 500 ms + writeAndCount(bootstrapSilence) + // Keep-alive watchdog. AudioFlinger on OnePlus/ColorOS kills a track + // that underruns for ~1 s (confirmed via `prepareTracks_l BUFFER + // TIMEOUT: remove track … due to underrun on thread 29`). Our + // per-segment synthesis takes 3–5 s, which always exceeds that + // window between writes, so the track was getting silenced after + // the first ~1 s of audio played. The watchdog pads with 200 ms of + // silence any time the buffered-ahead audio drops below 400 ms, + // regardless of segment state — silence only advances playback head + // in the gaps between real audio and is never inserted inside a + // contiguous burst of real writes (those bring buffered above 400 ms + // and keep the watchdog quiet). + val keepAliveBuffer = ShortArray(SR / 5) // 200 ms of silence + val keepAliveJob = kotlinx.coroutines.CoroutineScope( + kotlinx.coroutines.Dispatchers.IO + ).launch { + var tick = 0 + while (kotlinx.coroutines.currentCoroutineContext()[kotlinx.coroutines.Job]?.isActive != false) { + kotlinx.coroutines.delay(100) + val head = track.playbackHeadPosition.toLong() and 0xFFFFFFFFL + val written = sessionFramesWritten.get() and 0xFFFFFFFFL + val buffered = written - head + val needsPad = buffered < SR * 2 / 5 // < 400 ms + if ((tick and 0x1F) == 0) { + nlog("keepAlive tick=$tick head=$head written=$written buffered=$buffered pad=$needsPad state=${track.playState}") + } + tick++ + // Override any clientVolume mute that ColorOS keeps applying. + try { track.setVolume(1.0f) } catch (_: Exception) {} + if (needsPad) writeAndCount(keepAliveBuffer) + } + } val chan = kotlinx.coroutines.channels.Channel( capacity = kotlinx.coroutines.channels.Channel.UNLIMITED ) @@ -3369,24 +3670,26 @@ class Qwen3TtsEngine( var segIdx = 0 for (sentence in chan) { try { + sessionGenActive.set(true) if (USE_STREAMING_DECODE && talkerPteModule != null && cpPteModule != null) { // CP↔BigVGAN overlap path: audio chunks flow to the // shared AudioTrack as soon as BigVGAN finishes each // SEQ_LEN window, instead of after the whole segment. - generateSegmentAudioVCStreaming(sentence, segIdx) { pcm -> - if (pcm.isNotEmpty()) track.write(pcm, 0, pcm.size) - } + generateSegmentAudioVCStreaming(sentence, segIdx, writeAndCount) } else { val audio = generateSegmentAudioVC(sentence, segIdx) - if (audio.isNotEmpty()) track.write(audio, 0, audio.size) + writeAndCount(audio) } segIdx++ } catch (e: Exception) { nlog("session seg $segIdx error: ${e.message}") + } finally { + sessionGenActive.set(false) } } } sessionTrack = track; sessionChannel = chan; sessionJob = job + sessionKeepAliveJob = keepAliveJob nlog("streaming session opened") } @@ -3396,6 +3699,12 @@ class Qwen3TtsEngine( * immediately. Sentences play in the order they were enqueued. */ fun enqueueSentence(sentence: String) { + if (USE_MEDIAPLAYER_FALLBACK) { + val chan = sessionMpQueue ?: run { nlog("enqueueSentence: no MP session"); return } + val r = chan.trySend(sentence) + if (r.isFailure) nlog("enqueueSentence: MP channel full / closed") + return + } val chan = sessionChannel ?: run { nlog("enqueueSentence: no session open"); return } val r = chan.trySend(sentence) if (r.isFailure) nlog("enqueueSentence: channel full / closed") @@ -3407,17 +3716,46 @@ class Qwen3TtsEngine( * drains), then release the shared track. Safe to call more than once. */ suspend fun endStreamingSession() { + if (USE_MEDIAPLAYER_FALLBACK) { endStreamingSessionMp(); return } val chan = sessionChannel ?: return chan.close() try { sessionJob?.join() } catch (_: Exception) {} + // Stop the keep-alive watchdog BEFORE draining so it doesn't pad more + // silence onto the tail while we're waiting for the existing buffer + // to play out. + try { sessionKeepAliveJob?.cancel() } catch (_: Exception) {} + try { sessionKeepAliveJob?.join() } catch (_: Exception) {} try { - sessionTrack?.let { - // Block until written samples have been consumed by the - // hardware so users aren't cut off mid-syllable. - it.stop(); it.release() + sessionTrack?.let { track -> + // AudioTrack.stop() in MODE_STREAM DISCARDS unplayed buffered + // samples — it doesn't block for drain. Poll getPlaybackHead + // Position() until it reaches what we wrote, then stop. The + // head is a 32-bit wrap-around counter, so compare modulo. + // Cap the drain wait so a stalled track can't block us forever. + val targetFrames = sessionFramesWritten.get() + val startMs = System.currentTimeMillis() + val maxDrainMs = (targetFrames * 1000L / SR) + 500L // audio dur + 500ms slack + while (true) { + val head = track.playbackHeadPosition.toLong() and 0xFFFFFFFFL + val reached = head >= (targetFrames and 0xFFFFFFFFL) + val state = track.playState + if (reached || state != AudioTrack.PLAYSTATE_PLAYING) break + if (System.currentTimeMillis() - startMs > maxDrainMs) { + nlog("endStreamingSession: drain timeout at head=$head/$targetFrames") + break + } + kotlinx.coroutines.delay(20) + } + track.stop(); track.release() } } catch (_: Exception) {} - sessionTrack = null; sessionChannel = null; sessionJob = null + // Release audio focus after the track is fully drained and stopped. + try { + val am = context?.getSystemService(android.content.Context.AUDIO_SERVICE) as? android.media.AudioManager + sessionFocusRequest?.let { am?.abandonAudioFocusRequest(it) } + } catch (_: Exception) {} + sessionFocusRequest = null + sessionTrack = null; sessionChannel = null; sessionJob = null; sessionKeepAliveJob = null nlog("streaming session closed") }