diff --git a/kazeia-android/app/src/main/java/com/kazeia/service/KazeiaService.kt b/kazeia-android/app/src/main/java/com/kazeia/service/KazeiaService.kt index 1522f50..6730c42 100644 --- a/kazeia-android/app/src/main/java/com/kazeia/service/KazeiaService.kt +++ b/kazeia-android/app/src/main/java/com/kazeia/service/KazeiaService.kt @@ -79,6 +79,7 @@ class KazeiaService : Service() { val aiWorkload: StateFlow = _aiWorkload private val serviceScope = CoroutineScope(SupervisorJob() + Dispatchers.Default) + private var currentPipelineJob: kotlinx.coroutines.Job? = null private val _isListening = MutableStateFlow(false) val isListening: StateFlow = _isListening @@ -108,12 +109,18 @@ class KazeiaService : Service() { // Auto-pipeline trigger via: am startservice -n com.kazeia/.service.KazeiaService --ez run_pipeline true if (intent?.getBooleanExtra("run_pipeline", false) == true) { log("Auto-pipeline triggered via intent") - serviceScope.launch { + // Cancel any running pipeline to avoid blocking + currentPipelineJob?.cancel() + currentPipelineJob = serviceScope.launch(Dispatchers.IO) { // Wait for TTS to be loaded while (!::tts.isInitialized || tts !is com.kazeia.tts.Qwen3TtsEngine) { - kotlinx.coroutines.delay(1000) + kotlinx.coroutines.delay(500) } - processTextInput("pipeline") + val embedsPath = "/data/local/tmp/kazeia/models/qwen3-tts-npu/full_pipeline_embeds.bin" + val qwenTts = tts as? com.kazeia.tts.Qwen3TtsEngine ?: return@launch + val audio = qwenTts.generateFromEmbeds(embedsPath) + log("Pipeline done: ${audio.size} samples (${audio.size/24000f}s)") + // Audio is played by the TTS engine internally } } intent?.getStringExtra("full_pipeline")?.let { embedsPath -> @@ -912,8 +919,10 @@ class KazeiaService : Service() { fun processTextInput(text: String) { log("processTextInput: '$text'") serviceScope.launch { - // Special test commands + // Special test commands — cancel previous pipeline first if (text.trim().lowercase().let { it.startsWith("pipeline") || it.startsWith("!pipeline") || it.startsWith("\\!pipeline") || it == "go" }) { + currentPipelineJob?.cancel() + currentPipelineJob = kotlin.coroutines.coroutineContext[kotlinx.coroutines.Job] val embedsPath = "/data/local/tmp/kazeia/models/qwen3-tts-npu/full_pipeline_embeds.bin" val wavPath = "/data/local/tmp/kazeia/tts_output.wav" addMessage(ChatMessage(role = ChatMessage.Role.SYSTEM, text = "Running full pipeline...")) diff --git a/kazeia-android/app/src/main/java/com/kazeia/tts/Qwen3TtsEngine.kt b/kazeia-android/app/src/main/java/com/kazeia/tts/Qwen3TtsEngine.kt index edd4e0a..b378674 100644 --- a/kazeia-android/app/src/main/java/com/kazeia/tts/Qwen3TtsEngine.kt +++ b/kazeia-android/app/src/main/java/com/kazeia/tts/Qwen3TtsEngine.kt @@ -265,11 +265,47 @@ class Qwen3TtsEngine( nlog("Talker .pte JNI loaded+compiled: ${System.currentTimeMillis() - t0}ms, result=$lm") if (lm != 0) { nlog("Talker .pte loadMethod failed"); talkerPteModule = null } else { - // Load rotary tables for talker .pte val path = "/data/local/tmp/kazeia/models" talkerPteRotaryCos = loadNpy("$path/talker_pte_rotary_cos.npy") talkerPteRotarySin = loadNpy("$path/talker_pte_rotary_sin.npy") nlog("Talker .pte rotary: ${talkerPteRotaryCos?.size} floats") + + // Warmup both models: first forward() triggers QNN DSP compilation (~7s) + // Better to pay this cost at init than at first pipeline run + val tw = System.currentTimeMillis() + try { + val dE = FloatArray(TALKER_DIM) + val dM = FloatArray(TALKER_PTE_KV_LEN) { -1e9f }; dM[TALKER_PTE_KV_LEN - 1] = 0f + val dC = FloatArray(TALKER_HEAD_DIM) { 1f } + val dS = FloatArray(TALKER_HEAD_DIM) + val tkvSz = TALKER_HEADS * TALKER_PTE_KV_LEN * TALKER_HEAD_DIM + val ins = mutableListOf( + org.pytorch.executorch.EValue.from(org.pytorch.executorch.Tensor.fromBlob(dE, longArrayOf(1,1,TALKER_DIM.toLong()))), + org.pytorch.executorch.EValue.from(org.pytorch.executorch.Tensor.fromBlob(dM, longArrayOf(1,1,1,TALKER_PTE_KV_LEN.toLong()))), + org.pytorch.executorch.EValue.from(org.pytorch.executorch.Tensor.fromBlob(dC, longArrayOf(1,1,TALKER_HEAD_DIM.toLong()))), + org.pytorch.executorch.EValue.from(org.pytorch.executorch.Tensor.fromBlob(dS, longArrayOf(1,1,TALKER_HEAD_DIM.toLong()))) + ) + for (i in 0 until TALKER_LAYERS * 2) ins.add(org.pytorch.executorch.EValue.from( + org.pytorch.executorch.Tensor.fromBlob(FloatArray(tkvSz), longArrayOf(1,TALKER_HEADS.toLong(),TALKER_PTE_KV_LEN.toLong(),TALKER_HEAD_DIM.toLong())))) + talkerPteModule!!.forward(*ins.toTypedArray()) + nlog("Talker warmup: ${System.currentTimeMillis() - tw}ms") + } catch (e: Exception) { nlog("Talker warmup failed: ${e.message}") } + + // CP warmup + val cw = System.currentTimeMillis() + try { + val ckvSz = CP_KV_HEADS * CP_KV_LEN * CP_HEAD_DIM + val cIns = mutableListOf( + org.pytorch.executorch.EValue.from(org.pytorch.executorch.Tensor.fromBlob(FloatArray(TALKER_DIM), longArrayOf(1,1,TALKER_DIM.toLong()))), + org.pytorch.executorch.EValue.from(org.pytorch.executorch.Tensor.fromBlob(FloatArray(CP_KV_LEN){-1e9f}.also{it[CP_KV_LEN-1]=0f}, longArrayOf(1,1,1,CP_KV_LEN.toLong()))), + org.pytorch.executorch.EValue.from(org.pytorch.executorch.Tensor.fromBlob(FloatArray(CP_HEAD_DIM){1f}, longArrayOf(1,1,CP_HEAD_DIM.toLong()))), + org.pytorch.executorch.EValue.from(org.pytorch.executorch.Tensor.fromBlob(FloatArray(CP_HEAD_DIM), longArrayOf(1,1,CP_HEAD_DIM.toLong()))) + ) + for (i in 0 until CP_LAYERS * 2) cIns.add(org.pytorch.executorch.EValue.from( + org.pytorch.executorch.Tensor.fromBlob(FloatArray(ckvSz), longArrayOf(1,CP_KV_HEADS.toLong(),CP_KV_LEN.toLong(),CP_HEAD_DIM.toLong())))) + cpPteModule!!.forward(*cIns.toTypedArray()) + nlog("CP warmup: ${System.currentTimeMillis() - cw}ms") + } catch (e: Exception) { nlog("CP warmup failed: ${e.message}") } } } catch (e: Exception) { nlog("Talker .pte JNI failed: ${e.message}")