diff --git a/kazeia-android/app/src/main/AndroidManifest.xml b/kazeia-android/app/src/main/AndroidManifest.xml
index 4645859..930e896 100644
--- a/kazeia-android/app/src/main/AndroidManifest.xml
+++ b/kazeia-android/app/src/main/AndroidManifest.xml
@@ -5,9 +5,17 @@
     <uses-permission android:name="android.permission.RECORD_AUDIO" />
     <uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
     <uses-permission android:name="android.permission.FOREGROUND_SERVICE_MICROPHONE" />
+    <uses-permission android:name="android.permission.FOREGROUND_SERVICE_MEDIA_PLAYBACK" />
     <uses-permission android:name="android.permission.FOREGROUND_SERVICE_SPECIAL_USE" />
     <uses-permission android:name="android.permission.POST_NOTIFICATIONS" />
     <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
+    <!-- Freeing RAM for the LLM/TTS/STT models at startup. Kazeia uses
+         ~5 GB across Qwen3-4B PTE + Qwen3-TTS + Whisper, and the OnePlus
+         Pad's ColorOS base load alone keeps ~7 GB resident, pushing most
+         of Kazeia's weights into ZRAM swap. KILL_BACKGROUND_PROCESSES is
+         a normal permission (no user prompt) that lets us reclaim that
+         headroom by nudging non-essential apps out of RAM on launch. -->
+    <uses-permission android:name="android.permission.KILL_BACKGROUND_PROCESSES" />
 
     <application
         android:name=".KazeiaApplication"
@@ -50,7 +58,7 @@
 
         <service
             android:name=".service.KazeiaService"
-            android:foregroundServiceType="microphone|specialUse"
+            android:foregroundServiceType="microphone|mediaPlayback|specialUse"
             android:exported="true">
             <property
                 android:name="android.app.PROPERTY_SPECIAL_USE_FGS_SUBTYPE"
diff --git a/kazeia-android/app/src/main/java/com/kazeia/service/KazeiaService.kt b/kazeia-android/app/src/main/java/com/kazeia/service/KazeiaService.kt
index 401ebc1..08d240a 100644
--- a/kazeia-android/app/src/main/java/com/kazeia/service/KazeiaService.kt
+++ b/kazeia-android/app/src/main/java/com/kazeia/service/KazeiaService.kt
@@ -174,6 +174,12 @@ class KazeiaService : Service() {
                     if (!::llm.isInitialized || !llm.isLoaded()) {
                         log("Stream LLM: LLM not ready"); return@launch
                     }
+                    // Set pipeline state to Speaking so the continuous-
+                    // listening mic loop (line ~824) drops frames during
+                    // TTS playback. Without this, the mic picks up the
+                    // tablet speaker and feeds our own TTS back into STT,
+                    // creating an infinite loop.
+                    _pipelineState.value = PipelineState.Speaking
                     qwenTts.startStreamingSession()
                     val tStart = System.currentTimeMillis()
                     var firstSentenceLogged = false
@@ -199,6 +205,9 @@ class KazeiaService : Service() {
                 } catch (e: Exception) {
                     log("Stream LLM error: ${e.message}")
                     e.printStackTrace()
+                } finally {
+                    // Back to Idle so the next mic frame is accepted.
+                    _pipelineState.value = PipelineState.Idle
                 }
             }
         }
@@ -414,10 +423,18 @@ class KazeiaService : Service() {
             this, Manifest.permission.RECORD_AUDIO
         ) == PackageManager.PERMISSION_GRANTED
 
+        // FOREGROUND_SERVICE_TYPE_MEDIA_PLAYBACK is required so ColorOS (and
+        // stock Android 14+ policies) don't mute the TTS AudioTrack with
+        // "clientVolume" at ~600 ms after play(). Without it the FGS was
+        // classified as mic-only or special-use and background-audio
+        // hardening silenced it. Combine with MICROPHONE so mic input keeps
+        // working during STT.
         val fgsType = if (hasMicPermission) {
-            ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE
+            ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE or
+                ServiceInfo.FOREGROUND_SERVICE_TYPE_MEDIA_PLAYBACK
         } else {
-            ServiceInfo.FOREGROUND_SERVICE_TYPE_SPECIAL_USE
+            ServiceInfo.FOREGROUND_SERVICE_TYPE_MEDIA_PLAYBACK or
+                ServiceInfo.FOREGROUND_SERVICE_TYPE_SPECIAL_USE
         }
 
         if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) {
@@ -425,6 +442,13 @@ class KazeiaService : Service() {
         } else {
             startForeground(NOTIFICATION_ID, createNotification())
         }
+        // Free RAM for the ML models BEFORE they start loading. The tablet
+        // is 16 GB but ColorOS base + Google services routinely hog ~7 GB
+        // on their own, pushing Qwen3-4B's 3.2 GB of weights into ZRAM
+        // swap and causing page faults on every inference. Nudging the
+        // background apps out (see MemoryOptimizer.KILL_TARGETS) typically
+        // reclaims 600–900 MB so the models stay resident.
+        MemoryOptimizer.freeRamForModels(this) { msg -> log(msg) }
         initializeComponents()
     }
 
@@ -450,7 +474,7 @@ class KazeiaService : Service() {
                 // TTS: try Qwen3-TTS (NPU Hexagon), fallback to Android TTS
                 _loadingState.value = LoadingState(15, "TTS Qwen3…")
                 try {
-                    val qwenTts = com.kazeia.tts.Qwen3TtsEngine(nativeLibDir) { msg -> log("[TTS] $msg") }
+                    val qwenTts = com.kazeia.tts.Qwen3TtsEngine(nativeLibDir, this@KazeiaService) { msg -> log("[TTS] $msg") }
                     qwenTts.load("$modelsDir/qwen3-tts-npu")
                     if (qwenTts.isLoaded()) {
                         tts = qwenTts
@@ -1185,12 +1209,19 @@ class KazeiaService : Service() {
 
             if (responseText.isNotEmpty()) {
                 addMessage(ChatMessage(role = ChatMessage.Role.KAZEIA, text = responseText))
-                pipeline.speakText(responseText)
+                // Mark the pipeline as Speaking for the duration of TTS so
+                // the continuous-listening mic loop drops frames and we
+                // don't feed our own speaker output back into STT.
+                _pipelineState.value = PipelineState.Speaking
+                try { pipeline.speakText(responseText) } finally {
+                    _pipelineState.value = if (_isListening.value)
+                        PipelineState.Listening else PipelineState.Idle
+                }
+            } else {
+                _pipelineState.value = if (_isListening.value)
+                    PipelineState.Listening else PipelineState.Idle
             }
 
-            _pipelineState.value = if (_isListening.value)
-                PipelineState.Listening else PipelineState.Idle
-
         } catch (e: Exception) {
             _aiWorkload.value = _aiWorkload.value.copy(llmActive = false)
             log("ERROR: LLM generation error: ${e.message}")
diff --git a/kazeia-android/app/src/main/java/com/kazeia/service/MemoryOptimizer.kt b/kazeia-android/app/src/main/java/com/kazeia/service/MemoryOptimizer.kt
new file mode 100644
index 0000000..88fee01
--- /dev/null
+++ b/kazeia-android/app/src/main/java/com/kazeia/service/MemoryOptimizer.kt
@@ -0,0 +1,112 @@
+package com.kazeia.service
+
+import android.app.ActivityManager
+import android.content.Context
+
+/**
+ * Frees RAM for Kazeia's ML models by nudging non-essential background
+ * processes out of memory. Uses `ActivityManager.killBackgroundProcesses`
+ * which:
+ *   - only kills processes that are NOT in foreground or bound to something
+ *     foreground (so no user-visible disruption),
+ *   - leaves the package installed/launchable — the OS will respawn it on
+ *     demand if the user opens the app or a push arrives,
+ *   - requires only `KILL_BACKGROUND_PROCESSES`, a normal permission.
+ *
+ * Call [freeRamForModels] once, early in KazeiaService.onCreate, BEFORE
+ * loading Qwen3-4B (which needs ~3.2 GB resident to avoid swap thrashing).
+ *
+ * The list below was picked by reading `dumpsys meminfo` on a fresh boot
+ * of a OnePlus Pad 3 (ColorOS 16): every package here is either a Google
+ * background feature Kazeia doesn't use (Wallet, Chromecast, YouTube
+ * prefetch, …) or an OPLUS feature that re-spawns harmlessly when needed.
+ * We deliberately avoid Play Services core, input method, launcher,
+ * system_server, systemui, surfaceflinger, audioserver, HAL services —
+ * killing any of those would break the UI or audio routing.
+ */
+object MemoryOptimizer {
+
+    private const val TAG = "MemoryOptimizer"
+
+    /** Packages safe to evict. Order matches approximate RAM savings. */
+    private val KILL_TARGETS = listOf(
+        // Google optional features
+        "com.google.android.googlequicksearchbox",
+        "com.google.android.youtube",
+        "com.google.android.apps.walletnfcrel",
+        "com.google.android.apps.chromecast.app",
+        "com.google.android.aicore",
+        "com.google.android.apps.messaging",
+        "com.google.android.gm",
+        "com.google.android.as",
+        "com.google.android.as.oss",
+        // OPLUS / ColorOS optional features
+        "com.oneplus.deskclock",
+        "com.coloros.smartsidebar",
+        "com.coloros.assistantscreen",
+        "com.coloros.weather.service",
+        "com.heytap.mcs",
+        "com.heytap.accessory",
+        "com.oplus.cosa",
+        "com.oplus.pantanal.ums",
+        "com.oplus.nhs",
+        "com.oplus.midas",
+        "com.oplus.olc",
+        "com.oplus.deepthinker",
+        "com.oplus.blur",
+        "com.oplus.statistics.rom",
+        "com.oplus.powermonitor",
+        "com.oplus.romupdate",
+        "com.oplus.location",
+        "com.oplus.gesture",
+        "com.oplus.appplatform",
+        "com.oplus.persist.multimedia",
+        "com.oplus.nas",
+        // Qualcomm workload profiler — perf hints only, respawns
+        "com.qualcomm.qti.workloadclassifier"
+    )
+
+    /**
+     * Ask ActivityManager to drop background processes for every package
+     * in [KILL_TARGETS] that is installed. Returns a (attempted, skipped)
+     * pair for logging. Safe to call from any thread.
+     */
+    fun freeRamForModels(context: Context, log: (String) -> Unit = {}): Pair<Int, Int> {
+        val am = context.getSystemService(Context.ACTIVITY_SERVICE) as? ActivityManager
+            ?: run { log("$TAG: ActivityManager unavailable"); return 0 to 0 }
+        val pm = context.packageManager
+        val memBefore = readAvailMb(am)
+
+        var killed = 0
+        var skipped = 0
+        for (pkg in KILL_TARGETS) {
+            try {
+                // Only attempt for installed packages — unknown packages
+                // would silently no-op but still spam the audit log.
+                pm.getPackageInfo(pkg, 0)
+                am.killBackgroundProcesses(pkg)
+                killed++
+            } catch (_: android.content.pm.PackageManager.NameNotFoundException) {
+                skipped++
+            } catch (e: SecurityException) {
+                log("$TAG: killBackgroundProcesses($pkg) denied: ${e.message}")
+                skipped++
+            } catch (e: Exception) {
+                log("$TAG: killBackgroundProcesses($pkg) failed: ${e.message}")
+                skipped++
+            }
+        }
+
+        // Give the kernel a moment to reclaim pages before we report.
+        Thread.sleep(250)
+        val memAfter = readAvailMb(am)
+        log("$TAG: killed=$killed skipped=$skipped; avail RAM ${memBefore} MB → ${memAfter} MB (+${memAfter - memBefore} MB)")
+        return killed to skipped
+    }
+
+    private fun readAvailMb(am: ActivityManager): Long {
+        val info = ActivityManager.MemoryInfo()
+        am.getMemoryInfo(info)
+        return info.availMem / (1024 * 1024)
+    }
+}
diff --git a/kazeia-android/app/src/main/java/com/kazeia/tts/Qwen3TtsEngine.kt b/kazeia-android/app/src/main/java/com/kazeia/tts/Qwen3TtsEngine.kt
index 765dd17..33e03df 100644
--- a/kazeia-android/app/src/main/java/com/kazeia/tts/Qwen3TtsEngine.kt
+++ b/kazeia-android/app/src/main/java/com/kazeia/tts/Qwen3TtsEngine.kt
@@ -37,6 +37,7 @@ import kotlin.coroutines.resume
  */
 class Qwen3TtsEngine(
     private val nativeLibDir: String,
+    private val context: android.content.Context? = null,
     private val onLog: ((String) -> Unit)? = null
 ) : TtsEngine {
 
@@ -97,6 +98,16 @@ class Qwen3TtsEngine(
         // back to the single-chunk path with zero difference. Flag exists so
         // the sequential path can be re-enabled for A/B comparison.
         private const val USE_STREAMING_DECODE = true
+
+        // ColorOS Audio Hardening silently mutes AudioTrack in background/FGS
+        // context (confirmed via `event:muted updated source:clientVolume`
+        // logs, same behaviour across USAGE_MEDIA, USAGE_ASSISTANT, and
+        // USAGE_VOICE_COMMUNICATION). When this flag is true, each
+        // generated segment is written as a WAV to app-owned shared
+        // storage and played via MediaPlayer instead. Slightly slower
+        // (WAV write + MediaPlayer prepare add ~150 ms per segment) but
+        // it's the only reliable path to audible output on this device.
+        private const val USE_MEDIAPLAYER_FALLBACK = true
     }
 
     private var ortEnv: OrtEnvironment? = null
@@ -3335,6 +3346,18 @@ class Qwen3TtsEngine(
     private var sessionTrack: AudioTrack? = null
     private var sessionChannel: kotlinx.coroutines.channels.Channel<String>? = null
     private var sessionJob: kotlinx.coroutines.Job? = null
+    private var sessionKeepAliveJob: kotlinx.coroutines.Job? = null
+    private var sessionFocusRequest: android.media.AudioFocusRequest? = null
+    // Total PCM frames queued to sessionTrack across all segments in this session.
+    // endStreamingSession() polls track.playbackHeadPosition until it reaches this
+    // count before calling stop(), so the tail sentence isn't clipped.
+    // Uses AtomicLong because both the session worker and the keep-alive watchdog
+    // call writeAndCount concurrently.
+    private val sessionFramesWritten = java.util.concurrent.atomic.AtomicLong(0)
+    // True while a real-audio generate call is in progress. The keep-alive
+    // watchdog skips silence injection while this is set, so silence never
+    // interleaves with speech inside a segment.
+    private val sessionGenActive = java.util.concurrent.atomic.AtomicBoolean(false)
 
     /**
      * Open a streaming TTS session backed by a persistent AudioTrack. After
@@ -3343,13 +3366,221 @@ class Qwen3TtsEngine(
      * track as soon as it's decoded. Call endStreamingSession() to flush
      * the queue and release the track.
      */
+    // MediaPlayer-based fallback session state. If ColorOS mutes our
+    // AudioTrack (as observed repeatedly — `event:muted updated source:
+    // clientVolume` right after play()), we instead render each segment
+    // as a WAV file on shared storage and play it back via MediaPlayer,
+    // which uses a completely different internal audio pipeline that
+    // doesn't get silenced by the background playback policy.
+    private var sessionMpQueue: kotlinx.coroutines.channels.Channel<String>? = null
+    private var sessionMpJob: kotlinx.coroutines.Job? = null
+    private val sessionMpSegIdx = java.util.concurrent.atomic.AtomicInteger(0)
+
+    private fun startStreamingSessionMp() {
+        if (sessionMpQueue != null) return
+        sessionMpSegIdx.set(0)
+        val sentenceChan = kotlinx.coroutines.channels.Channel<String>(
+            capacity = kotlinx.coroutines.channels.Channel.UNLIMITED
+        )
+        // Pipeline: synth worker produces WAV paths, playback worker runs
+        // them through a pair of MediaPlayer instances chained via
+        // setNextMediaPlayer() so there's zero-gap transition between
+        // segments (no DAC/output routing "pop" the user was hearing as
+        // "beg beg" with one player-per-seg). The rendezvous channel has
+        // capacity 2 so the synth worker can stay one seg ahead of the
+        // currently playing seg without growing disk use.
+        val wavChan = kotlinx.coroutines.channels.Channel<Pair<Int, String>>(capacity = 2)
+        val scope = kotlinx.coroutines.CoroutineScope(kotlinx.coroutines.Dispatchers.IO)
+        val synthJob = scope.launch {
+            for (sentence in sentenceChan) {
+                try {
+                    val segIdx = sessionMpSegIdx.getAndIncrement()
+                    val tSynth = System.currentTimeMillis()
+                    val audio = generateSegmentAudioVC(sentence, segIdx)
+                    if (audio.isEmpty()) continue
+                    val wavPath = "${context?.cacheDir?.absolutePath ?: "/data/local/tmp/kazeia"}/tts_seg_${segIdx}.wav"
+                    saveWav(wavPath, audio)
+                    nlog("MP seg $segIdx synthesized (${System.currentTimeMillis() - tSynth}ms), queued for playback")
+                    wavChan.send(segIdx to wavPath)
+                } catch (e: Exception) {
+                    nlog("MP synth error: ${e.message}")
+                }
+            }
+            wavChan.close()
+        }
+        val playJob = scope.launch { playChainedMediaPlayers(wavChan) }
+        val combined = scope.launch { synthJob.join(); playJob.join() }
+        sessionMpQueue = sentenceChan; sessionMpJob = combined
+        nlog("streaming session opened (MediaPlayer fallback, chained)")
+    }
+
+    /**
+     * Drive the WAV playback pipeline with two MediaPlayer instances
+     * chained via setNextMediaPlayer() so each segment flows into the
+     * next without re-arming the audio output (which caused audible
+     * "pops" between segments when one player stopped and another
+     * started). Consumes (segIdx, wavPath) pairs from [wavChan] and
+     * deletes each file after it finishes playing. Suspends until the
+     * channel closes AND the final segment finishes.
+     */
+    private suspend fun playChainedMediaPlayers(
+        wavChan: kotlinx.coroutines.channels.ReceiveChannel<Pair<Int, String>>
+    ) {
+        val attrs = android.media.AudioAttributes.Builder()
+            .setUsage(android.media.AudioAttributes.USAGE_MEDIA)
+            .setContentType(android.media.AudioAttributes.CONTENT_TYPE_SPEECH)
+            .build()
+
+        // Synchronously prepare a MediaPlayer on the current coroutine.
+        // Throws on failure; caller handles cleanup.
+        suspend fun prepareMp(path: String, segIdx: Int): android.media.MediaPlayer {
+            val mp = android.media.MediaPlayer()
+            mp.setAudioAttributes(attrs)
+            mp.setDataSource(path)
+            kotlinx.coroutines.suspendCancellableCoroutine<Unit> { cont ->
+                mp.setOnPreparedListener { if (cont.isActive) cont.resume(Unit) {} }
+                mp.setOnErrorListener { _, what, extra ->
+                    nlog("MP seg $segIdx prepare error: what=$what extra=$extra")
+                    if (cont.isActive) cont.resume(Unit) {}
+                    true
+                }
+                cont.invokeOnCancellation { try { mp.release() } catch (_: Exception) {} }
+                mp.prepareAsync()
+            }
+            return mp
+        }
+
+        var current: android.media.MediaPlayer? = null
+        var currentInfo: Pair<Int, String>? = null
+        var next: android.media.MediaPlayer? = null
+        var nextInfo: Pair<Int, String>? = null
+
+        try {
+            // Bootstrap: wait for first WAV.
+            val first = wavChan.receiveCatching().getOrNull() ?: return
+            currentInfo = first
+            current = prepareMp(first.second, first.first)
+            current!!.setOnCompletionListener { it.release() }
+            current!!.start()
+            nlog("MP seg ${first.first} started (chained)")
+
+            while (true) {
+                // Fetch next WAV (may block). If channel closes, let
+                // current finish playing and exit.
+                val upcoming = wavChan.receiveCatching().getOrNull()
+                if (upcoming == null) break
+                nextInfo = upcoming
+                next = prepareMp(upcoming.second, upcoming.first)
+                current!!.setNextMediaPlayer(next)
+                nlog("MP seg ${upcoming.first} queued as next")
+
+                // Wait for current seg to finish playing before rotating.
+                val prevFile = currentInfo!!.second
+                waitForPlaybackCompletion(current!!, currentInfo!!.first)
+                try { java.io.File(prevFile).delete() } catch (_: Exception) {}
+                current = next
+                currentInfo = nextInfo
+                next = null
+                nextInfo = null
+            }
+
+            // Drain: wait for the last prepared player to finish.
+            if (current != null && currentInfo != null) {
+                waitForPlaybackCompletion(current!!, currentInfo!!.first)
+                try { java.io.File(currentInfo!!.second).delete() } catch (_: Exception) {}
+            }
+        } catch (e: Exception) {
+            nlog("MP playback chain error: ${e.message}")
+        } finally {
+            try { next?.release() } catch (_: Exception) {}
+            try { current?.release() } catch (_: Exception) {}
+        }
+    }
+
+    private suspend fun waitForPlaybackCompletion(
+        mp: android.media.MediaPlayer, segIdx: Int
+    ) {
+        val t0 = System.currentTimeMillis()
+        kotlinx.coroutines.suspendCancellableCoroutine<Unit> { cont ->
+            mp.setOnCompletionListener {
+                nlog("MP seg $segIdx completed (${System.currentTimeMillis() - t0}ms)")
+                if (cont.isActive) cont.resume(Unit) {}
+            }
+            mp.setOnErrorListener { _, what, extra ->
+                nlog("MP seg $segIdx play error: what=$what extra=$extra")
+                if (cont.isActive) cont.resume(Unit) {}
+                true
+            }
+            cont.invokeOnCancellation { /* player released by caller */ }
+        }
+    }
+
+    private suspend fun endStreamingSessionMp() {
+        val chan = sessionMpQueue ?: return
+        chan.close()
+        try { sessionMpJob?.join() } catch (_: Exception) {}
+        sessionMpQueue = null; sessionMpJob = null
+        nlog("streaming session closed (MediaPlayer fallback)")
+    }
+
+    /**
+     * Play a WAV file via Android MediaPlayer and block the calling
+     * coroutine until playback completes. MediaPlayer uses a separate
+     * audio pipeline from AudioTrack so it bypasses ColorOS's AudioTrack
+     * hardening/muting behaviour.
+     */
+    private suspend fun playWavBlocking(path: String, segIdx: Int) {
+        val t0 = System.currentTimeMillis()
+        suspendCancellableCoroutine<Unit> { cont ->
+            val mp = android.media.MediaPlayer()
+            try {
+                mp.setAudioAttributes(android.media.AudioAttributes.Builder()
+                    .setUsage(android.media.AudioAttributes.USAGE_MEDIA)
+                    .setContentType(android.media.AudioAttributes.CONTENT_TYPE_SPEECH)
+                    .build())
+                mp.setDataSource(path)
+                mp.setOnPreparedListener {
+                    nlog("MP seg $segIdx prepared, starting (prep ${System.currentTimeMillis() - t0}ms)")
+                    it.start()
+                }
+                mp.setOnCompletionListener {
+                    nlog("MP seg $segIdx done (${System.currentTimeMillis() - t0}ms total)")
+                    try { it.release() } catch (_: Exception) {}
+                    if (cont.isActive) cont.resume(Unit) {}
+                }
+                mp.setOnErrorListener { player, what, extra ->
+                    nlog("MP seg $segIdx error: what=$what extra=$extra")
+                    try { player.release() } catch (_: Exception) {}
+                    if (cont.isActive) cont.resume(Unit) {}
+                    true
+                }
+                mp.prepareAsync()
+                cont.invokeOnCancellation { try { mp.release() } catch (_: Exception) {} }
+            } catch (e: Exception) {
+                nlog("MP seg $segIdx setup failed: ${e.message}")
+                try { mp.release() } catch (_: Exception) {}
+                if (cont.isActive) cont.resume(Unit) {}
+            }
+        }
+    }
+
     fun startStreamingSession() {
+        if (USE_MEDIAPLAYER_FALLBACK) { startStreamingSessionMp(); return }
         if (sessionTrack != null) return   // already open
+        // USAGE_VOICE_COMMUNICATION routes to STREAM_VOICE_CALL, which
+        // ColorOS's "Audio Hardening" policy does NOT silently mute (the
+        // policy targets STREAM_MUSIC to preserve battery on inactive media
+        // apps; STREAM_VOICE_CALL is reserved for VoIP and always plays).
+        // Previous attempts with USAGE_MEDIA and USAGE_ASSISTANT both got
+        // `event:muted updated source:clientVolume` ~0.6–1 s after play()
+        // even with audio focus + mediaPlayback FGS, so moving off of
+        // STREAM_MUSIC is the only route that unblocks audible playback.
+        val attrs = AudioAttributes.Builder()
+            .setUsage(AudioAttributes.USAGE_VOICE_COMMUNICATION)
+            .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
+            .build()
         val track = AudioTrack.Builder()
-            .setAudioAttributes(AudioAttributes.Builder()
-                .setUsage(AudioAttributes.USAGE_MEDIA)
-                .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
-                .build())
+            .setAudioAttributes(attrs)
             .setAudioFormat(AudioFormat.Builder()
                 .setEncoding(AudioFormat.ENCODING_PCM_16BIT)
                 .setSampleRate(SR)
@@ -3359,7 +3590,77 @@ class Qwen3TtsEngine(
                                                 // paces writes when full.
             .setTransferMode(AudioTrack.MODE_STREAM)
             .build()
+        // Request audio focus for the duration of the session. Without this
+        // ColorOS's Audio Hardening treats the track as background noise
+        // and mutes it, regardless of FGS status. We don't care about
+        // focus loss callbacks — if another app grabs focus mid-sentence
+        // that's fine, the track just gets ducked.
+        val am = context?.getSystemService(android.content.Context.AUDIO_SERVICE) as? android.media.AudioManager
+        val focusReq = android.media.AudioFocusRequest.Builder(android.media.AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK)
+            .setAudioAttributes(attrs)
+            .setOnAudioFocusChangeListener { _ -> }
+            .build()
+        val focusRes = am?.requestAudioFocus(focusReq)
+        nlog("audio focus request: $focusRes (1=granted, 0=failed, 2=delayed)")
+        sessionFocusRequest = focusReq
+        // ColorOS mutes AudioTrack clientVolume ~1s after creation (seen in
+        // dumpsys audio as `event:muted updated source:clientVolume`). Force
+        // track volume back to 1.0 repeatedly to override. This is also
+        // done in the keep-alive watchdog loop below for ongoing override.
+        try { track.setVolume(1.0f) } catch (_: Exception) {}
         track.play()
+        sessionFramesWritten.set(0)
+        sessionGenActive.set(false)
+        // writeAndCount is the single path through which PCM reaches the
+        // AudioTrack for this session, so sessionFramesWritten always stays
+        // in sync with what's been queued to playback hardware. AudioTrack.write
+        // is thread-safe, so this can be called concurrently from the session
+        // worker (real audio) and the keep-alive watchdog (silence padding).
+        val writeAndCount: (ShortArray) -> Unit = { pcm ->
+            if (pcm.isNotEmpty()) {
+                val n = track.write(pcm, 0, pcm.size)
+                if (n > 0) sessionFramesWritten.addAndGet(n.toLong())
+            }
+        }
+        // Bootstrap silence: queue 500 ms immediately after play() so
+        // AudioFlinger has samples to mix from the very first cycle.
+        // Without this, there's a ~100 ms window between play() and the
+        // first watchdog tick where the track has no data and AudioFlinger
+        // flags it for removal. Once that happens, playbackHead sticks at
+        // 0 and subsequent writes go to a dead track.
+        val bootstrapSilence = ShortArray(SR / 2)  // 500 ms
+        writeAndCount(bootstrapSilence)
+        // Keep-alive watchdog. AudioFlinger on OnePlus/ColorOS kills a track
+        // that underruns for ~1 s (confirmed via `prepareTracks_l BUFFER
+        // TIMEOUT: remove track … due to underrun on thread 29`). Our
+        // per-segment synthesis takes 3–5 s, which always exceeds that
+        // window between writes, so the track was getting silenced after
+        // the first ~1 s of audio played. The watchdog pads with 200 ms of
+        // silence any time the buffered-ahead audio drops below 400 ms,
+        // regardless of segment state — silence only advances playback head
+        // in the gaps between real audio and is never inserted inside a
+        // contiguous burst of real writes (those bring buffered above 400 ms
+        // and keep the watchdog quiet).
+        val keepAliveBuffer = ShortArray(SR / 5)  // 200 ms of silence
+        val keepAliveJob = kotlinx.coroutines.CoroutineScope(
+            kotlinx.coroutines.Dispatchers.IO
+        ).launch {
+            var tick = 0
+            while (kotlinx.coroutines.currentCoroutineContext()[kotlinx.coroutines.Job]?.isActive != false) {
+                kotlinx.coroutines.delay(100)
+                val head = track.playbackHeadPosition.toLong() and 0xFFFFFFFFL
+                val written = sessionFramesWritten.get() and 0xFFFFFFFFL
+                val buffered = written - head
+                val needsPad = buffered < SR * 2 / 5    // < 400 ms
+                if ((tick and 0x1F) == 0) {
+                    nlog("keepAlive tick=$tick head=$head written=$written buffered=$buffered pad=$needsPad state=${track.playState}")
+                }
+                tick++
+                // Override any clientVolume mute that ColorOS keeps applying.
+                try { track.setVolume(1.0f) } catch (_: Exception) {}
+                if (needsPad) writeAndCount(keepAliveBuffer)
+            }
+        }
         val chan = kotlinx.coroutines.channels.Channel<String>(
             capacity = kotlinx.coroutines.channels.Channel.UNLIMITED
         )
@@ -3369,24 +3670,26 @@ class Qwen3TtsEngine(
             var segIdx = 0
             for (sentence in chan) {
                 try {
+                    sessionGenActive.set(true)
                     if (USE_STREAMING_DECODE && talkerPteModule != null && cpPteModule != null) {
                         // CP↔BigVGAN overlap path: audio chunks flow to the
                         // shared AudioTrack as soon as BigVGAN finishes each
                         // SEQ_LEN window, instead of after the whole segment.
-                        generateSegmentAudioVCStreaming(sentence, segIdx) { pcm ->
-                            if (pcm.isNotEmpty()) track.write(pcm, 0, pcm.size)
-                        }
+                        generateSegmentAudioVCStreaming(sentence, segIdx, writeAndCount)
                     } else {
                         val audio = generateSegmentAudioVC(sentence, segIdx)
-                        if (audio.isNotEmpty()) track.write(audio, 0, audio.size)
+                        writeAndCount(audio)
                     }
                     segIdx++
                 } catch (e: Exception) {
                     nlog("session seg $segIdx error: ${e.message}")
+                } finally {
+                    sessionGenActive.set(false)
                 }
             }
         }
         sessionTrack = track; sessionChannel = chan; sessionJob = job
+        sessionKeepAliveJob = keepAliveJob
         nlog("streaming session opened")
     }
 
@@ -3396,6 +3699,12 @@ class Qwen3TtsEngine(
      * immediately. Sentences play in the order they were enqueued.
      */
     fun enqueueSentence(sentence: String) {
+        if (USE_MEDIAPLAYER_FALLBACK) {
+            val chan = sessionMpQueue ?: run { nlog("enqueueSentence: no MP session"); return }
+            val r = chan.trySend(sentence)
+            if (r.isFailure) nlog("enqueueSentence: MP channel full / closed")
+            return
+        }
         val chan = sessionChannel ?: run { nlog("enqueueSentence: no session open"); return }
         val r = chan.trySend(sentence)
         if (r.isFailure) nlog("enqueueSentence: channel full / closed")
@@ -3407,17 +3716,46 @@ class Qwen3TtsEngine(
      * drains), then release the shared track. Safe to call more than once.
      */
     suspend fun endStreamingSession() {
+        if (USE_MEDIAPLAYER_FALLBACK) { endStreamingSessionMp(); return }
         val chan = sessionChannel ?: return
         chan.close()
         try { sessionJob?.join() } catch (_: Exception) {}
+        // Stop the keep-alive watchdog BEFORE draining so it doesn't pad more
+        // silence onto the tail while we're waiting for the existing buffer
+        // to play out.
+        try { sessionKeepAliveJob?.cancel() } catch (_: Exception) {}
+        try { sessionKeepAliveJob?.join() } catch (_: Exception) {}
         try {
-            sessionTrack?.let {
-                // Block until written samples have been consumed by the
-                // hardware so users aren't cut off mid-syllable.
-                it.stop(); it.release()
+            sessionTrack?.let { track ->
+                // AudioTrack.stop() in MODE_STREAM DISCARDS unplayed buffered
+                // samples — it doesn't block for drain. Poll getPlaybackHead
+                // Position() until it reaches what we wrote, then stop. The
+                // head is a 32-bit wrap-around counter, so compare modulo.
+                // Cap the drain wait so a stalled track can't block us forever.
+                val targetFrames = sessionFramesWritten.get()
+                val startMs = System.currentTimeMillis()
+                val maxDrainMs = (targetFrames * 1000L / SR) + 500L   // audio dur + 500ms slack
+                while (true) {
+                    val head = track.playbackHeadPosition.toLong() and 0xFFFFFFFFL
+                    val reached = head >= (targetFrames and 0xFFFFFFFFL)
+                    val state = track.playState
+                    if (reached || state != AudioTrack.PLAYSTATE_PLAYING) break
+                    if (System.currentTimeMillis() - startMs > maxDrainMs) {
+                        nlog("endStreamingSession: drain timeout at head=$head/$targetFrames")
+                        break
+                    }
+                    kotlinx.coroutines.delay(20)
+                }
+                track.stop(); track.release()
             }
         } catch (_: Exception) {}
-        sessionTrack = null; sessionChannel = null; sessionJob = null
+        // Release audio focus after the track is fully drained and stopped.
+        try {
+            val am = context?.getSystemService(android.content.Context.AUDIO_SERVICE) as? android.media.AudioManager
+            sessionFocusRequest?.let { am?.abandonAudioFocusRequest(it) }
+        } catch (_: Exception) {}
+        sessionFocusRequest = null
+        sessionTrack = null; sessionChannel = null; sessionJob = null; sessionKeepAliveJob = null
         nlog("streaming session closed")
     }