Compare commits
21 Commits
backup/pre
...
main
| Author | SHA1 | Date |
|---|---|---|
|
|
db281002d9 | |
|
|
c2f7859dfe | |
|
|
b5b13780f7 | |
|
|
2fe46e0f15 | |
|
|
06dcd76dcb | |
|
|
8939c680b2 | |
|
|
f17131aefb | |
|
|
6a958c1a10 | |
|
|
751e3e0868 | |
|
|
39babcb158 | |
|
|
0632db1ee0 | |
|
|
10fd10fd90 | |
|
|
67de8d4767 | |
|
|
a41619ed67 | |
|
|
f4b15a72a7 | |
|
|
3d435f9cdd | |
|
|
7dc6704e95 | |
|
|
6c7746c5d0 | |
|
|
b57719fa5e | |
|
|
f32b5ddfdd | |
|
|
809a6d4fed |
|
|
@ -0,0 +1,72 @@
|
||||||
|
diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt
|
||||||
|
index e93731e..4951e1d 100644
|
||||||
|
--- a/backends/qualcomm/CMakeLists.txt
|
||||||
|
+++ b/backends/qualcomm/CMakeLists.txt
|
||||||
|
@@ -308,8 +308,8 @@ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES Hexagon)
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
-# QNN pybind
|
||||||
|
-if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64")
|
||||||
|
+# QNN pybind — host Python bindings, not for Android cross-compile
|
||||||
|
+if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64" AND NOT ANDROID)
|
||||||
|
add_subdirectory(
|
||||||
|
${EXECUTORCH_SOURCE_DIR}/third-party/pybind11
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/pybind11
|
||||||
|
diff --git a/extension/android/jni/jni_layer_llama.cpp b/extension/android/jni/jni_layer_llama.cpp
|
||||||
|
index 45f2414..ae3d79f 100644
|
||||||
|
--- a/extension/android/jni/jni_layer_llama.cpp
|
||||||
|
+++ b/extension/android/jni/jni_layer_llama.cpp
|
||||||
|
@@ -171,14 +171,44 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass<ExecuTorchLlmJni> {
|
||||||
|
model_path->toStdString().c_str(),
|
||||||
|
data_files_vector,
|
||||||
|
executorch::extension::Module::LoadMode::MmapUseMlockIgnoreErrors);
|
||||||
|
- std::string decoder_model = "llama3"; // use llama3 for now
|
||||||
|
- runner_ = std::make_unique<example::Runner<uint16_t>>( // QNN runner
|
||||||
|
- std::move(module),
|
||||||
|
- decoder_model.c_str(),
|
||||||
|
- model_path->toStdString().c_str(),
|
||||||
|
- tokenizer_path->toStdString().c_str(),
|
||||||
|
- "",
|
||||||
|
- "");
|
||||||
|
+ std::string decoder_model = "qwen3"; // Kazeia: our .pte was exported with --decoder_model qwen3-4b
|
||||||
|
+
|
||||||
|
+ // Mirror qnn_llama_runner.cpp main(): pick the Runner<T> template based
|
||||||
|
+ // on the model's get_kv_io_bit_width metadata. The 16-bit KV I/O models
|
||||||
|
+ // were introduced after the 8-bit ones, and using the wrong T treats
|
||||||
|
+ // KV-cache bytes as the wrong width → garbage logits → gibberish output.
|
||||||
|
+ example::KvBitWidth kv_bitwidth = example::KvBitWidth::kWidth8;
|
||||||
|
+ if (module->method_names()->count("get_kv_io_bit_width") > 0) {
|
||||||
|
+ kv_bitwidth = static_cast<example::KvBitWidth>(
|
||||||
|
+ module->get("get_kv_io_bit_width").get().toScalar().to<int64_t>());
|
||||||
|
+ }
|
||||||
|
+ // Auto-detect eval_mode: kv-only (0) if the .pte only carries
|
||||||
|
+ // kv_forward, hybrid (1) if it also has prefill_forward (which lets the
|
||||||
|
+ // runner batch the prompt prefill — TTFT drops from ~52 ms/token to
|
||||||
|
+ // sub-ms after the one-shot prefill graph). Same JNI binary works with
|
||||||
|
+ // both export modes, no code change needed when the .pte is upgraded.
|
||||||
|
+ int eval_mode = 0;
|
||||||
|
+ if (module->method_names()->count("prefill_forward") > 0) {
|
||||||
|
+ eval_mode = 1; // EvalMode::kHybrid
|
||||||
|
+ }
|
||||||
|
+ auto make_runner = [&](auto sample) -> std::unique_ptr<llm::IRunner> {
|
||||||
|
+ using T = decltype(sample);
|
||||||
|
+ return std::make_unique<example::Runner<T>>(
|
||||||
|
+ std::move(module),
|
||||||
|
+ decoder_model.c_str(),
|
||||||
|
+ model_path->toStdString().c_str(),
|
||||||
|
+ tokenizer_path->toStdString().c_str(),
|
||||||
|
+ /* performance_output_path */ "",
|
||||||
|
+ /* dump_logits_path */ "",
|
||||||
|
+ /* temperature */ 0.0f, // greedy
|
||||||
|
+ eval_mode,
|
||||||
|
+ /* shared_buffer */ true);
|
||||||
|
+ };
|
||||||
|
+ if (kv_bitwidth == example::KvBitWidth::kWidth16) {
|
||||||
|
+ runner_ = make_runner(uint16_t{0});
|
||||||
|
+ } else {
|
||||||
|
+ runner_ = make_runner(uint8_t{0});
|
||||||
|
+ }
|
||||||
|
model_type_category_ = MODEL_TYPE_CATEGORY_LLM;
|
||||||
|
#endif
|
||||||
|
#if defined(EXECUTORCH_BUILD_MEDIATEK)
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
diff --git a/examples/qualcomm/oss_scripts/llama/__init__.py b/examples/qualcomm/oss_scripts/llama/__init__.py
|
diff --git a/examples/qualcomm/oss_scripts/llama/__init__.py b/examples/qualcomm/oss_scripts/llama/__init__.py
|
||||||
index 963db6e..953dc4c 100644
|
index 963db6e..9ccfdd0 100644
|
||||||
--- a/examples/qualcomm/oss_scripts/llama/__init__.py
|
--- a/examples/qualcomm/oss_scripts/llama/__init__.py
|
||||||
+++ b/examples/qualcomm/oss_scripts/llama/__init__.py
|
+++ b/examples/qualcomm/oss_scripts/llama/__init__.py
|
||||||
@@ -25,9 +25,14 @@ from executorch.examples.models.granite import (
|
@@ -25,9 +25,14 @@ from executorch.examples.models.granite import (
|
||||||
|
|
@ -20,7 +20,7 @@ index 963db6e..953dc4c 100644
|
||||||
from executorch.examples.models.qwen2_5 import (
|
from executorch.examples.models.qwen2_5 import (
|
||||||
convert_weights as convert_qwen2_5_weights,
|
convert_weights as convert_qwen2_5_weights,
|
||||||
)
|
)
|
||||||
@@ -479,6 +484,34 @@ class Qwen3_1_7B(LLMModelConfig):
|
@@ -479,6 +484,37 @@ class Qwen3_1_7B(LLMModelConfig):
|
||||||
quant_recipe = Qwen3_1_7BQuantRecipe
|
quant_recipe = Qwen3_1_7BQuantRecipe
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -40,10 +40,13 @@ index 963db6e..953dc4c 100644
|
||||||
+ convert_weights = convert_qwen3_weights
|
+ convert_weights = convert_qwen3_weights
|
||||||
+ transform_weight = False
|
+ transform_weight = False
|
||||||
+ instruct_model = True
|
+ instruct_model = True
|
||||||
+ # Bumped to 2 to halve peak host RAM during QNN compile (4B at sharding=1
|
+ # num_sharding=1 for hybrid mode: sharding=2 produces a multi-context
|
||||||
+ # OOMed on a 62 GB box, peak anon-rss 46 GB). At sharding=2 each shard
|
+ # .pte (2 graphs × 2 shards = 4 contexts) that the LlmModule load path
|
||||||
+ # compile fits comfortably; runner stitches them at load time.
|
+ # can't restore (error 5010 "Context group 1 does not exist"). With
|
||||||
+ num_sharding = 2
|
+ # sharding=1 the hybrid export needs ~46 GB RAM peak — the 192 GB swap
|
||||||
|
+ # on /swapfile handles this; compile takes ~80 min wall but completes
|
||||||
|
+ # cleanly. Single-context .pte loads fine through the JNI runner.
|
||||||
|
+ num_sharding = 1
|
||||||
+ masked_softmax = True
|
+ masked_softmax = True
|
||||||
+ seq_mse_candidates = 0
|
+ seq_mse_candidates = 0
|
||||||
+ r1 = False
|
+ r1 = False
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@
|
||||||
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
||||||
<uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
|
<uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
|
||||||
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MICROPHONE" />
|
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MICROPHONE" />
|
||||||
|
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MEDIA_PLAYBACK" />
|
||||||
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_SPECIAL_USE" />
|
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_SPECIAL_USE" />
|
||||||
<uses-permission android:name="android.permission.POST_NOTIFICATIONS" />
|
<uses-permission android:name="android.permission.POST_NOTIFICATIONS" />
|
||||||
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
|
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
|
||||||
|
|
@ -50,7 +51,7 @@
|
||||||
|
|
||||||
<service
|
<service
|
||||||
android:name=".service.KazeiaService"
|
android:name=".service.KazeiaService"
|
||||||
android:foregroundServiceType="microphone|specialUse"
|
android:foregroundServiceType="microphone|mediaPlayback|specialUse"
|
||||||
android:exported="true">
|
android:exported="true">
|
||||||
<property
|
<property
|
||||||
android:name="android.app.PROPERTY_SPECIAL_USE_FGS_SUBTYPE"
|
android:name="android.app.PROPERTY_SPECIAL_USE_FGS_SUBTYPE"
|
||||||
|
|
|
||||||
|
|
@ -1,43 +1,49 @@
|
||||||
package com.kazeia.llm
|
package com.kazeia.llm
|
||||||
|
|
||||||
|
import android.content.Context
|
||||||
import android.util.Log
|
import android.util.Log
|
||||||
import com.kazeia.core.*
|
import com.kazeia.core.*
|
||||||
import kotlinx.coroutines.Dispatchers
|
import kotlinx.coroutines.Dispatchers
|
||||||
import kotlinx.coroutines.withContext
|
import kotlinx.coroutines.withContext
|
||||||
import java.io.File
|
import java.io.File
|
||||||
|
import org.pytorch.executorch.extension.llm.LlmCallback
|
||||||
|
import org.pytorch.executorch.extension.llm.LlmModule
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* LLM Engine using ExecuTorch + QNN backend via subprocess.
|
* LLM Engine using ExecuTorch LlmModule in-process — **no root required**.
|
||||||
* Calls qnn_llama_runner binary with root access (Magisk su).
|
*
|
||||||
|
* Runs Qwen3-4B via `org.pytorch.executorch.extension.llm.LlmModule`, which
|
||||||
|
* wraps the same C++ TextLlmRunner as the standalone qnn_llama_runner binary
|
||||||
|
* but inside the app's own process. The QNN HTP backend works because the
|
||||||
|
* DSP fastrpc service accepts the Zygote-forked app process (unlike
|
||||||
|
* ProcessBuilder-spawned subprocesses which lose supplementary GIDs on exec
|
||||||
|
* and get rejected by the fastrpc credential checks).
|
||||||
|
*
|
||||||
|
* Model + tokenizer live in /data/local/tmp/kazeia-et/ (readable by the app
|
||||||
|
* on this device's permissive SELinux policy). libexecutorch.so + QNN libs
|
||||||
|
* are bundled in jniLibs.
|
||||||
*
|
*
|
||||||
* Current tablet config: Qwen3-4B KV-mode, ~18-22 tok/s on Hexagon V79
|
* Current tablet config: Qwen3-4B KV-mode, ~18-22 tok/s on Hexagon V79
|
||||||
* (Snapdragon 8 Elite), TTFT 0.9 s, RSS 1.76 GB.
|
* (Snapdragon 8 Elite), TTFT 0.9 s, RSS 1.76 GB.
|
||||||
*
|
|
||||||
* Why root: the runner binary plus its QNN v2.42 .so deps live in
|
|
||||||
* /data/local/tmp/kazeia-et/ (shell_data_file SELinux context). Untrusted
|
|
||||||
* apps can't exec binaries from there. The Hexagon DSP fastrpc service also
|
|
||||||
* refuses to load the v2.42 Skel from the app's own files dir — only from
|
|
||||||
* nativeLibraryDir — but that dir already holds the TTS stack's v2.31 Skel
|
|
||||||
* (same filename, different version, can't coexist). Rebuilding everything
|
|
||||||
* against one QNN version would eliminate the conflict, but would require
|
|
||||||
* re-exporting the TTS .pte with the new runtime (tooling currently broken
|
|
||||||
* on the flatc schema/dataclass mismatch in the qnn_venv).
|
|
||||||
*/
|
*/
|
||||||
class ExecuTorchLlmEngine(
|
class ExecuTorchLlmEngine(
|
||||||
|
private val context: Context,
|
||||||
private val onLog: ((String) -> Unit)? = null
|
private val onLog: ((String) -> Unit)? = null
|
||||||
) : LlmEngine {
|
) : LlmEngine {
|
||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
private const val TAG = "ExecuTorchLLM"
|
private const val TAG = "ExecuTorchLLM"
|
||||||
private const val RUNNER_DIR = "/data/local/tmp/kazeia-et"
|
// /no_think disables Qwen3's chain-of-thought block. Compact wording
|
||||||
// /no_think disables Qwen3's chain-of-thought block so the full token
|
// keeps prefill cost low: this prompt is ~25 tokens vs ~55 in the
|
||||||
// budget goes to the actual answer (without it, 120-200 tokens get
|
// earlier verbose version → saves ~1.5 s of TTFT in kv-only mode.
|
||||||
// consumed by <think>…</think> leaving nothing to speak).
|
private const val SYSTEM_PROMPT = "Tu es Kazeia, à l'écoute en français. Réponds en 1-2 phrases courtes, sans raisonnement. /no_think"
|
||||||
// Short-response directive keeps TTS latency manageable — each sentence
|
|
||||||
// costs ~3-5 s on the .pte path, so 1-2 sentences is the sweet spot.
|
private const val MODEL_DIR = "/data/local/tmp/kazeia-et"
|
||||||
private const val SYSTEM_PROMPT = "Tu es Kazeia, un compagnon bienveillant d'écoute émotionnelle. Réponds toujours en français, en 1 ou 2 phrases courtes (40 mots maximum). Pas de raisonnement, donne directement la réponse. /no_think"
|
private const val MODEL_PATH = "$MODEL_DIR/hybrid_llama_qnn.pte"
|
||||||
|
private const val TOKENIZER_PATH = "$MODEL_DIR/tokenizer.json"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private var llmModule: LlmModule? = null
|
||||||
private var modelName = ""
|
private var modelName = ""
|
||||||
private var loaded = false
|
private var loaded = false
|
||||||
|
|
||||||
|
|
@ -48,77 +54,152 @@ class ExecuTorchLlmEngine(
|
||||||
|
|
||||||
override suspend fun load(modelPath: String, config: LlmConfig) {
|
override suspend fun load(modelPath: String, config: LlmConfig) {
|
||||||
withContext(Dispatchers.IO) {
|
withContext(Dispatchers.IO) {
|
||||||
val check = execRoot("ls $RUNNER_DIR/qnn_llama_runner $RUNNER_DIR/hybrid_llama_qnn.pte $RUNNER_DIR/tokenizer.json 2>&1")
|
if (!File(MODEL_PATH).exists()) {
|
||||||
if (check.contains("No such file")) {
|
nlog("ERROR: model not found at $MODEL_PATH")
|
||||||
nlog("ERROR: runner or model not found in $RUNNER_DIR")
|
return@withContext
|
||||||
|
}
|
||||||
|
if (!File(TOKENIZER_PATH).exists()) {
|
||||||
|
nlog("ERROR: tokenizer not found at $TOKENIZER_PATH")
|
||||||
return@withContext
|
return@withContext
|
||||||
}
|
}
|
||||||
|
|
||||||
deployRunnerScript()
|
try {
|
||||||
|
val t0 = System.currentTimeMillis()
|
||||||
|
// MODEL_TYPE_QNN_LLAMA=4 selects the Qualcomm runner path in
|
||||||
|
// jni_layer_llama.cpp, which uses example::Runner (same code
|
||||||
|
// as the qnn_llama_runner binary) instead of the generic
|
||||||
|
// TextLLMRunner. Our .pte was exported with
|
||||||
|
// --decoder_model qwen3-4b which requires this path.
|
||||||
|
val MODEL_TYPE_QNN_LLAMA = 4
|
||||||
|
llmModule = LlmModule(MODEL_TYPE_QNN_LLAMA, MODEL_PATH, TOKENIZER_PATH, 0.7f)
|
||||||
|
nlog("LlmModule instantiated in ${System.currentTimeMillis() - t0}ms")
|
||||||
|
|
||||||
writeFileRoot("$RUNNER_DIR/outputs/prompt.b64",
|
// Load the PTE into QNN HTP (calls the native load()).
|
||||||
android.util.Base64.encodeToString("Bonjour".toByteArray(), android.util.Base64.NO_WRAP))
|
val loadResult = llmModule!!.load()
|
||||||
if (SYSTEM_PROMPT.isNotEmpty()) {
|
if (loadResult != 0) {
|
||||||
writeFileRoot("$RUNNER_DIR/outputs/system.b64",
|
nlog("ERROR: LlmModule.load() returned $loadResult")
|
||||||
android.util.Base64.encodeToString(SYSTEM_PROMPT.toByteArray(), android.util.Base64.NO_WRAP))
|
llmModule = null
|
||||||
} else {
|
return@withContext
|
||||||
execRoot("rm -f $RUNNER_DIR/outputs/system.b64")
|
|
||||||
}
|
}
|
||||||
val test = execRoot("sh $RUNNER_DIR/run_llm.sh 0.0 80 2>&1")
|
nlog("LlmModule loaded in ${System.currentTimeMillis() - t0}ms total")
|
||||||
|
|
||||||
if (test.contains("Generated Tokens") || test.contains("Rate:")) {
|
|
||||||
loaded = true
|
loaded = true
|
||||||
val rateMatch = Regex("Generated \\d+ tokens:.*Rate:\\s+([\\d.]+)").find(test)
|
modelName = "Qwen3-4B LlmModule"
|
||||||
val rate = rateMatch?.groupValues?.get(1) ?: "?"
|
|
||||||
modelName = "Qwen3 (${rate} tok/s NPU)"
|
|
||||||
nlog("Ready: $modelName")
|
nlog("Ready: $modelName")
|
||||||
} else {
|
} catch (e: Throwable) {
|
||||||
nlog("ERROR: test failed: ${test.takeLast(200)}")
|
nlog("ERROR: LlmModule init failed: ${e.javaClass.simpleName}: ${e.message}")
|
||||||
|
llmModule = null
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun isLoaded(): Boolean = loaded
|
override fun isLoaded(): Boolean = loaded && llmModule != null
|
||||||
|
|
||||||
override suspend fun generate(
|
override suspend fun generate(
|
||||||
prompt: String,
|
prompt: String,
|
||||||
params: SamplingParams,
|
params: SamplingParams,
|
||||||
onToken: ((String) -> Boolean)?
|
onToken: ((String) -> Boolean)?
|
||||||
): GenerationResult = withContext(Dispatchers.IO) {
|
): GenerationResult = withContext(Dispatchers.IO) {
|
||||||
if (!loaded) throw IllegalStateException("Model not loaded")
|
val mod = llmModule ?: throw IllegalStateException("Model not loaded")
|
||||||
|
|
||||||
val startTime = System.currentTimeMillis()
|
val startTime = System.currentTimeMillis()
|
||||||
|
val fullPrompt = buildChatTemplate(prompt)
|
||||||
writeFileRoot("$RUNNER_DIR/outputs/prompt.b64",
|
|
||||||
android.util.Base64.encodeToString(prompt.toByteArray(), android.util.Base64.NO_WRAP))
|
|
||||||
if (SYSTEM_PROMPT.isNotEmpty()) {
|
|
||||||
writeFileRoot("$RUNNER_DIR/outputs/system.b64",
|
|
||||||
android.util.Base64.encodeToString(SYSTEM_PROMPT.toByteArray(), android.util.Base64.NO_WRAP))
|
|
||||||
} else {
|
|
||||||
execRoot("rm -f $RUNNER_DIR/outputs/system.b64")
|
|
||||||
}
|
|
||||||
|
|
||||||
nlog("Prompt: '${prompt.take(80)}'")
|
nlog("Prompt: '${prompt.take(80)}'")
|
||||||
|
|
||||||
|
val responseBuilder = StringBuilder()
|
||||||
|
var firstTokenMs = -1L
|
||||||
|
// Track whether we're inside a <think>…</think> block so the upstream
|
||||||
|
// SentenceStreamer / TTS doesn't get fed reasoning tokens. Even with
|
||||||
|
// /no_think in the system prompt Qwen3 still emits empty <think></think>
|
||||||
|
// wrappers for ~3 tokens before the real answer.
|
||||||
|
var inThink = false
|
||||||
|
val tokenScan = StringBuilder() // small lookahead to spot tag boundaries
|
||||||
|
|
||||||
|
// Singleton special tokens that should never reach the TTS streamer
|
||||||
|
// (they leak when the model wraps its reply or signals end-of-turn).
|
||||||
|
val stripTokens = listOf("<|im_start|>", "<|im_end|>", "<|endoftext|>")
|
||||||
|
val maxTagLen = listOf("<think>", "</think>", "<|im_start|>", "<|im_end|>", "<|endoftext|>")
|
||||||
|
.maxOf { it.length }
|
||||||
|
|
||||||
|
val cb = object : LlmCallback {
|
||||||
|
override fun onResult(result: String) {
|
||||||
|
if (firstTokenMs < 0) firstTokenMs = System.currentTimeMillis() - startTime
|
||||||
|
responseBuilder.append(result)
|
||||||
|
|
||||||
|
// Forward to caller only outside <think> blocks, and strip
|
||||||
|
// singleton special tokens. We accumulate a tiny lookahead buffer
|
||||||
|
// so tag tokens that arrive split ("<thi", "nk>") still match.
|
||||||
|
tokenScan.append(result)
|
||||||
|
while (true) {
|
||||||
|
if (!inThink) {
|
||||||
|
val open = tokenScan.indexOf("<think>")
|
||||||
|
if (open < 0) {
|
||||||
|
// No <think> open pending — strip any singleton tokens
|
||||||
|
// that fully landed in the buffer, then flush prose
|
||||||
|
// up to a safe point preserving lookahead.
|
||||||
|
for (tok in stripTokens) {
|
||||||
|
var idx = tokenScan.indexOf(tok)
|
||||||
|
while (idx >= 0) {
|
||||||
|
tokenScan.delete(idx, idx + tok.length)
|
||||||
|
idx = tokenScan.indexOf(tok)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
val safe = tokenScan.length - maxTagLen
|
||||||
|
if (safe > 0) {
|
||||||
|
onToken?.invoke(tokenScan.substring(0, safe))
|
||||||
|
tokenScan.delete(0, safe)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Flush the prose before the <think> tag, then enter think mode.
|
||||||
|
if (open > 0) onToken?.invoke(tokenScan.substring(0, open))
|
||||||
|
tokenScan.delete(0, open + "<think>".length)
|
||||||
|
inThink = true
|
||||||
|
} else {
|
||||||
|
val close = tokenScan.indexOf("</think>")
|
||||||
|
if (close < 0) {
|
||||||
|
// Drop all buffered chars except a small tail in case
|
||||||
|
// the closing tag is split across tokens.
|
||||||
|
val keep = "</think>".length - 1
|
||||||
|
if (tokenScan.length > keep) tokenScan.delete(0, tokenScan.length - keep)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
tokenScan.delete(0, close + "</think>".length)
|
||||||
|
inThink = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
override fun onStats(stats: String) {
|
||||||
|
nlog("stats: ${stats.take(200)}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
val seqLen = minOf(params.maxNewTokens, 512)
|
val seqLen = minOf(params.maxNewTokens, 512)
|
||||||
val output = execRoot("sh $RUNNER_DIR/run_llm.sh ${params.temperature} $seqLen 2>&1")
|
val rc = try {
|
||||||
|
// echo=false so onResult() only receives the generated completion,
|
||||||
|
// not the prompt tokens echoed back — otherwise the sentence
|
||||||
|
// streamer would feed '<|im_start|>user …' to the TTS.
|
||||||
|
mod.generate(fullPrompt, seqLen, cb, /* echo */ false)
|
||||||
|
} catch (e: Throwable) {
|
||||||
|
nlog("generate() threw: ${e.message}")
|
||||||
|
-1
|
||||||
|
}
|
||||||
|
|
||||||
val tokenCount = Regex("Generated Tokens:\\s+(\\d+)").find(output)
|
// Drain any leftover prose buffered during <think>-suppression so the
|
||||||
?.groupValues?.get(1)?.toIntOrNull() ?: 0
|
// last sentence reaches the TTS even if it ran past the closing tag.
|
||||||
val rate = Regex("Generated \\d+ tokens:.*Rate:\\s+([\\d.]+)").find(output)
|
if (!inThink && tokenScan.isNotEmpty()) {
|
||||||
?.groupValues?.get(1)?.toFloatOrNull() ?: 0f
|
onToken?.invoke(tokenScan.toString())
|
||||||
val ttft = Regex("Time to first generated token:\\s+([\\d.]+)").find(output)
|
tokenScan.clear()
|
||||||
?.groupValues?.get(1)?.toFloatOrNull() ?: 0f
|
}
|
||||||
|
|
||||||
val responseRaw = execRoot("cat $RUNNER_DIR/outputs/response.txt 2>/dev/null")
|
|
||||||
nlog("RAW: ${responseRaw.take(300)}")
|
|
||||||
val responseText = extractResponse(responseRaw)
|
|
||||||
|
|
||||||
val elapsed = System.currentTimeMillis() - startTime
|
val elapsed = System.currentTimeMillis() - startTime
|
||||||
nlog("Response: '$responseText'")
|
val rawText = responseBuilder.toString()
|
||||||
nlog("Stats: ${tokenCount}tok ${rate}tok/s TTFT=${ttft}s ${elapsed}ms")
|
val responseText = cleanResponse(rawText)
|
||||||
|
val tokenCount = rawText.length / 4 // rough estimate without a tokenizer
|
||||||
|
val rate = if (elapsed > 0) (tokenCount * 1000f) / elapsed else 0f
|
||||||
|
|
||||||
onToken?.invoke(responseText)
|
nlog("Response: '${responseText.take(80)}'")
|
||||||
|
nlog("Stats: rc=$rc ~${tokenCount}tok ~${"%.1f".format(rate)}tok/s TTFT=${firstTokenMs}ms total=${elapsed}ms")
|
||||||
|
|
||||||
GenerationResult(
|
GenerationResult(
|
||||||
text = responseText,
|
text = responseText,
|
||||||
|
|
@ -128,20 +209,32 @@ class ExecuTorchLlmEngine(
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun extractResponse(raw: String): String {
|
/**
|
||||||
|
* Qwen3 chat template matching qnn_llama_runner.cpp's get_formatted_prompt()
|
||||||
|
* for DecoderModelVersion::kQwen3. Note the user-first-then-system ordering
|
||||||
|
* (quirky but required — the runner binary produces the same layout and our
|
||||||
|
* .pte was trained with it). Terminates with `<|im_start|>assistant` with
|
||||||
|
* no trailing newline, matching the binary exactly.
|
||||||
|
*/
|
||||||
|
private fun buildChatTemplate(userInput: String): String {
|
||||||
|
val sb = StringBuilder()
|
||||||
|
sb.append("<|im_start|>user\n").append(userInput).append("<|im_end|>\n")
|
||||||
|
if (SYSTEM_PROMPT.isNotEmpty()) {
|
||||||
|
sb.append("<|im_start|>system\n").append(SYSTEM_PROMPT).append("<|im_end|>\n")
|
||||||
|
}
|
||||||
|
sb.append("<|im_start|>assistant")
|
||||||
|
return sb.toString()
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Strip <think>…</think>, special tokens, and leading/trailing whitespace. */
|
||||||
|
private fun cleanResponse(raw: String): String {
|
||||||
var text = raw
|
var text = raw
|
||||||
val thinkEnd = text.indexOf("</think>")
|
val thinkEnd = text.indexOf("</think>")
|
||||||
if (thinkEnd >= 0) {
|
if (thinkEnd >= 0) {
|
||||||
text = text.substring(thinkEnd + "</think>".length)
|
text = text.substring(thinkEnd + "</think>".length)
|
||||||
} else {
|
} else if (text.indexOf("<think>") >= 0) {
|
||||||
val thinkStart = text.indexOf("<think>")
|
nlog("WARN: <think> block never closed")
|
||||||
val assistantTag = text.indexOf("assistant")
|
|
||||||
if (thinkStart >= 0) {
|
|
||||||
nlog("WARN: <think> block never closed, no response generated")
|
|
||||||
return ""
|
return ""
|
||||||
} else if (assistantTag >= 0) {
|
|
||||||
text = text.substring(assistantTag + "assistant".length)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return text
|
return text
|
||||||
.replace("<|im_start|>", "")
|
.replace("<|im_start|>", "")
|
||||||
|
|
@ -152,82 +245,9 @@ class ExecuTorchLlmEngine(
|
||||||
.trim()
|
.trim()
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun deployRunnerScript() {
|
|
||||||
val script = """
|
|
||||||
#!/bin/sh
|
|
||||||
cd $RUNNER_DIR
|
|
||||||
export LD_LIBRARY_PATH=$RUNNER_DIR
|
|
||||||
export ADSP_LIBRARY_PATH=$RUNNER_DIR
|
|
||||||
|
|
||||||
TEMP=${'$'}1
|
|
||||||
SEQ_LEN=${'$'}2
|
|
||||||
|
|
||||||
PROMPT=${'$'}(base64 -d $RUNNER_DIR/outputs/prompt.b64)
|
|
||||||
|
|
||||||
rm -f $RUNNER_DIR/outputs/response.txt
|
|
||||||
|
|
||||||
SYSTEM_ARGS=""
|
|
||||||
if [ -s $RUNNER_DIR/outputs/system.b64 ]; then
|
|
||||||
SYSTEM=${'$'}(base64 -d $RUNNER_DIR/outputs/system.b64)
|
|
||||||
SYSTEM_ARGS="--system_prompt"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -n "${'$'}SYSTEM_ARGS" ]; then
|
|
||||||
exec ./qnn_llama_runner \
|
|
||||||
--model_path hybrid_llama_qnn.pte \
|
|
||||||
--tokenizer_path tokenizer.json \
|
|
||||||
--decoder_model_version qwen3 \
|
|
||||||
--output_path $RUNNER_DIR/outputs/response.txt \
|
|
||||||
--performance_output_path $RUNNER_DIR/outputs/perf.txt \
|
|
||||||
--shared_buffer \
|
|
||||||
--system_prompt "${'$'}SYSTEM" \
|
|
||||||
--prompt "${'$'}PROMPT" \
|
|
||||||
--temperature ${'$'}TEMP \
|
|
||||||
--seq_len ${'$'}SEQ_LEN \
|
|
||||||
--eval_mode 0
|
|
||||||
else
|
|
||||||
exec ./qnn_llama_runner \
|
|
||||||
--model_path hybrid_llama_qnn.pte \
|
|
||||||
--tokenizer_path tokenizer.json \
|
|
||||||
--decoder_model_version qwen3 \
|
|
||||||
--output_path $RUNNER_DIR/outputs/response.txt \
|
|
||||||
--performance_output_path $RUNNER_DIR/outputs/perf.txt \
|
|
||||||
--shared_buffer \
|
|
||||||
--prompt "${'$'}PROMPT" \
|
|
||||||
--temperature ${'$'}TEMP \
|
|
||||||
--seq_len ${'$'}SEQ_LEN \
|
|
||||||
--eval_mode 0
|
|
||||||
fi
|
|
||||||
""".trimIndent()
|
|
||||||
|
|
||||||
writeFileRoot("$RUNNER_DIR/run_llm.sh", script)
|
|
||||||
execRoot("chmod 755 $RUNNER_DIR/run_llm.sh")
|
|
||||||
}
|
|
||||||
|
|
||||||
override fun release() {
|
override fun release() {
|
||||||
|
try { llmModule?.resetNative() } catch (_: Throwable) {}
|
||||||
|
llmModule = null
|
||||||
loaded = false
|
loaded = false
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun writeFileRoot(path: String, content: String) {
|
|
||||||
try {
|
|
||||||
val process = Runtime.getRuntime().exec(arrayOf("su", "-c", "cat > $path"))
|
|
||||||
process.outputStream.bufferedWriter().use { it.write(content) }
|
|
||||||
process.waitFor()
|
|
||||||
} catch (e: Exception) {
|
|
||||||
Log.e(TAG, "writeFileRoot failed: ${e.message}")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun execRoot(cmd: String): String {
|
|
||||||
return try {
|
|
||||||
val process = Runtime.getRuntime().exec(arrayOf("su", "-c", cmd))
|
|
||||||
val result = process.inputStream.bufferedReader().readText()
|
|
||||||
val error = process.errorStream.bufferedReader().readText()
|
|
||||||
process.waitFor()
|
|
||||||
if (error.isNotEmpty() && result.isEmpty()) error else result
|
|
||||||
} catch (e: Exception) {
|
|
||||||
Log.e(TAG, "execRoot failed: ${e.message}")
|
|
||||||
""
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -142,14 +142,36 @@ class KazeiaPipeline {
|
||||||
* the echo-mode playback through the same path — otherwise each TTS
|
* the echo-mode playback through the same path — otherwise each TTS
|
||||||
* site reimplemented the "streaming-or-fallback" dispatch.
|
* site reimplemented the "streaming-or-fallback" dispatch.
|
||||||
*/
|
*/
|
||||||
suspend fun speakText(text: String) {
|
suspend fun speakText(
|
||||||
|
text: String,
|
||||||
|
// Fires the instant each synthesized sentence starts playing
|
||||||
|
// through the speaker, with the sentence text, audio duration,
|
||||||
|
// and a per-ENVELOPE_WINDOW_MS RMS envelope. Used by
|
||||||
|
// processLlmResponse to defer the KAZEIA chat bubble appearance
|
||||||
|
// until sound is audible, pace word-by-word reveal inside the
|
||||||
|
// bubble, and drive the AudioVisualizerView orb.
|
||||||
|
onSegmentPlaying: ((
|
||||||
|
sentence: String,
|
||||||
|
durationMs: Long,
|
||||||
|
rmsEnvelope: FloatArray,
|
||||||
|
spectrogram: Array<FloatArray>
|
||||||
|
) -> Unit)? = null
|
||||||
|
) {
|
||||||
val ttsEngine = tts ?: return
|
val ttsEngine = tts ?: return
|
||||||
_pipelineState.value = PipelineState.Speaking
|
_pipelineState.value = PipelineState.Speaking
|
||||||
try {
|
try {
|
||||||
val qwen = ttsEngine as? com.kazeia.tts.Qwen3TtsEngine
|
val qwen = ttsEngine as? com.kazeia.tts.Qwen3TtsEngine
|
||||||
if (qwen != null) {
|
if (qwen != null) {
|
||||||
|
qwen.onSegmentPlaying = onSegmentPlaying
|
||||||
qwen.startStreamingSession()
|
qwen.startStreamingSession()
|
||||||
val streamer = com.kazeia.tts.SentenceStreamer { s -> qwen.enqueueSentence(s) }
|
val streamer = com.kazeia.tts.SentenceStreamer { raw ->
|
||||||
|
// Strip emoji / non-speakable pictographs before TTS
|
||||||
|
// so a standalone "😊" doesn't become its own noisy
|
||||||
|
// segment. The chat bubble keeps the original text —
|
||||||
|
// only the audio path sees the cleaned version.
|
||||||
|
val spoken = stripNonSpeakable(raw).trim()
|
||||||
|
if (spoken.isNotEmpty()) qwen.enqueueSentence(spoken)
|
||||||
|
}
|
||||||
streamer.append(text)
|
streamer.append(text)
|
||||||
streamer.flush()
|
streamer.flush()
|
||||||
qwen.endStreamingSession()
|
qwen.endStreamingSession()
|
||||||
|
|
@ -168,6 +190,41 @@ class KazeiaPipeline {
|
||||||
_messages.value = _messages.value + msg
|
_messages.value = _messages.value + msg
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Drop emoji + dingbat + pictographic characters so the TTS engine
|
||||||
|
* doesn't try to synthesize them. Covers the main Unicode emoji
|
||||||
|
* blocks (Miscellaneous Symbols, Dingbats, Emoticons, Transport,
|
||||||
|
* Supplemental Symbols and Pictographs, etc.) plus variation
|
||||||
|
* selectors and zero-width joiners that tag emoji sequences.
|
||||||
|
* Keeps everything in the Basic Latin / Latin-1 / Latin Extended
|
||||||
|
* ranges + common French punctuation untouched.
|
||||||
|
*/
|
||||||
|
private fun stripNonSpeakable(text: String): String {
|
||||||
|
val sb = StringBuilder(text.length)
|
||||||
|
var i = 0
|
||||||
|
while (i < text.length) {
|
||||||
|
val cp = text.codePointAt(i)
|
||||||
|
val skip = when {
|
||||||
|
cp in 0x2600..0x27BF -> true // misc symbols + dingbats
|
||||||
|
cp in 0x1F300..0x1F5FF -> true // pictographs
|
||||||
|
cp in 0x1F600..0x1F64F -> true // emoticons
|
||||||
|
cp in 0x1F680..0x1F6FF -> true // transport
|
||||||
|
cp in 0x1F700..0x1F77F -> true // alchemical
|
||||||
|
cp in 0x1F780..0x1F7FF -> true // geometric extended
|
||||||
|
cp in 0x1F800..0x1F8FF -> true // supplemental arrows-c
|
||||||
|
cp in 0x1F900..0x1F9FF -> true // supplemental pictographs
|
||||||
|
cp in 0x1FA00..0x1FAFF -> true // symbols & pictographs extended-A
|
||||||
|
cp == 0x200D -> true // zero-width joiner
|
||||||
|
cp in 0xFE00..0xFE0F -> true // variation selectors
|
||||||
|
cp in 0x1F1E6..0x1F1FF -> true // regional indicators (flags)
|
||||||
|
else -> false
|
||||||
|
}
|
||||||
|
if (!skip) sb.appendCodePoint(cp)
|
||||||
|
i += Character.charCount(cp)
|
||||||
|
}
|
||||||
|
return sb.toString()
|
||||||
|
}
|
||||||
|
|
||||||
fun log(msg: String) {
|
fun log(msg: String) {
|
||||||
Log.i(TAG, msg)
|
Log.i(TAG, msg)
|
||||||
val time = java.text.SimpleDateFormat("HH:mm:ss.SSS", java.util.Locale.FRANCE)
|
val time = java.text.SimpleDateFormat("HH:mm:ss.SSS", java.util.Locale.FRANCE)
|
||||||
|
|
|
||||||
|
|
@ -83,6 +83,34 @@ class KazeiaService : Service() {
|
||||||
private val _isListening = MutableStateFlow(false)
|
private val _isListening = MutableStateFlow(false)
|
||||||
val isListening: StateFlow<Boolean> = _isListening
|
val isListening: StateFlow<Boolean> = _isListening
|
||||||
|
|
||||||
|
// Drives the AudioVisualizerView orb. Pushed from the VAD loop
|
||||||
|
// during mic capture (mic RMS, normalized) and from the TTS engine's
|
||||||
|
// onSegmentPlaying callback (TTS RMS envelope per-segment). The view
|
||||||
|
// reads this via collectLatest in ChatActivity; the signals carry
|
||||||
|
// their own state so the visualizer knows whether it's idle, tracking
|
||||||
|
// the mic, or rendering a TTS segment.
|
||||||
|
sealed class VisualizerSignal {
|
||||||
|
object Idle : VisualizerSignal()
|
||||||
|
data class Listening(val micRms: Float) : VisualizerSignal()
|
||||||
|
data class Speaking(
|
||||||
|
val rmsEnvelope: FloatArray,
|
||||||
|
val spectrogram: Array<FloatArray>,
|
||||||
|
val durationMs: Long
|
||||||
|
) : VisualizerSignal()
|
||||||
|
}
|
||||||
|
private val _visualizerSignal = MutableStateFlow<VisualizerSignal>(VisualizerSignal.Idle)
|
||||||
|
val visualizerSignal: StateFlow<VisualizerSignal> = _visualizerSignal
|
||||||
|
|
||||||
|
// Kazeia's orb color is bound to the selected voice so the user
|
||||||
|
// visually associates a palette with the speaker they picked. UI
|
||||||
|
// sets this whenever the voice spinner changes; the orb view
|
||||||
|
// listens via the StateFlow and tweens the current → target color.
|
||||||
|
private val _voiceColor = MutableStateFlow(0xFFBCA4E8.toInt()) // lavender = Damien default
|
||||||
|
val voiceColor: StateFlow<Int> = _voiceColor
|
||||||
|
|
||||||
|
/** Called by the UI whenever the voice selector changes. */
|
||||||
|
fun setVoiceColor(color: Int) { _voiceColor.value = color }
|
||||||
|
|
||||||
private val _debugMode = MutableStateFlow(false)
|
private val _debugMode = MutableStateFlow(false)
|
||||||
val debugMode: StateFlow<Boolean> = _debugMode
|
val debugMode: StateFlow<Boolean> = _debugMode
|
||||||
|
|
||||||
|
|
@ -174,6 +202,12 @@ class KazeiaService : Service() {
|
||||||
if (!::llm.isInitialized || !llm.isLoaded()) {
|
if (!::llm.isInitialized || !llm.isLoaded()) {
|
||||||
log("Stream LLM: LLM not ready"); return@launch
|
log("Stream LLM: LLM not ready"); return@launch
|
||||||
}
|
}
|
||||||
|
// Set pipeline state to Speaking so the continuous-
|
||||||
|
// listening mic loop (line ~824) drops frames during
|
||||||
|
// TTS playback. Without this, the mic picks up the
|
||||||
|
// tablet speaker and feeds our own TTS back into STT,
|
||||||
|
// creating an infinite loop.
|
||||||
|
_pipelineState.value = PipelineState.Speaking
|
||||||
qwenTts.startStreamingSession()
|
qwenTts.startStreamingSession()
|
||||||
val tStart = System.currentTimeMillis()
|
val tStart = System.currentTimeMillis()
|
||||||
var firstSentenceLogged = false
|
var firstSentenceLogged = false
|
||||||
|
|
@ -199,6 +233,9 @@ class KazeiaService : Service() {
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
log("Stream LLM error: ${e.message}")
|
log("Stream LLM error: ${e.message}")
|
||||||
e.printStackTrace()
|
e.printStackTrace()
|
||||||
|
} finally {
|
||||||
|
// Back to Idle so the next mic frame is accepted.
|
||||||
|
_pipelineState.value = PipelineState.Idle
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -414,9 +451,17 @@ class KazeiaService : Service() {
|
||||||
this, Manifest.permission.RECORD_AUDIO
|
this, Manifest.permission.RECORD_AUDIO
|
||||||
) == PackageManager.PERMISSION_GRANTED
|
) == PackageManager.PERMISSION_GRANTED
|
||||||
|
|
||||||
|
// FOREGROUND_SERVICE_TYPE_MEDIA_PLAYBACK is required so ColorOS (and
|
||||||
|
// stock Android 14+ policies) don't mute the TTS AudioTrack with
|
||||||
|
// "clientVolume" at ~600 ms after play(). Without it the FGS was
|
||||||
|
// classified as mic-only or special-use and background-audio
|
||||||
|
// hardening silenced it. Combine with MICROPHONE so mic input keeps
|
||||||
|
// working during STT.
|
||||||
val fgsType = if (hasMicPermission) {
|
val fgsType = if (hasMicPermission) {
|
||||||
ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE
|
ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE or
|
||||||
|
ServiceInfo.FOREGROUND_SERVICE_TYPE_MEDIA_PLAYBACK
|
||||||
} else {
|
} else {
|
||||||
|
ServiceInfo.FOREGROUND_SERVICE_TYPE_MEDIA_PLAYBACK or
|
||||||
ServiceInfo.FOREGROUND_SERVICE_TYPE_SPECIAL_USE
|
ServiceInfo.FOREGROUND_SERVICE_TYPE_SPECIAL_USE
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -450,7 +495,7 @@ class KazeiaService : Service() {
|
||||||
// TTS: try Qwen3-TTS (NPU Hexagon), fallback to Android TTS
|
// TTS: try Qwen3-TTS (NPU Hexagon), fallback to Android TTS
|
||||||
_loadingState.value = LoadingState(15, "TTS Qwen3…")
|
_loadingState.value = LoadingState(15, "TTS Qwen3…")
|
||||||
try {
|
try {
|
||||||
val qwenTts = com.kazeia.tts.Qwen3TtsEngine(nativeLibDir) { msg -> log("[TTS] $msg") }
|
val qwenTts = com.kazeia.tts.Qwen3TtsEngine(nativeLibDir, this@KazeiaService) { msg -> log("[TTS] $msg") }
|
||||||
qwenTts.load("$modelsDir/qwen3-tts-npu")
|
qwenTts.load("$modelsDir/qwen3-tts-npu")
|
||||||
if (qwenTts.isLoaded()) {
|
if (qwenTts.isLoaded()) {
|
||||||
tts = qwenTts
|
tts = qwenTts
|
||||||
|
|
@ -518,7 +563,7 @@ class KazeiaService : Service() {
|
||||||
|
|
||||||
// LLM: Qwen3-4B on Hexagon V79 via qnn_llama_runner.
|
// LLM: Qwen3-4B on Hexagon V79 via qnn_llama_runner.
|
||||||
_loadingState.value = LoadingState(50, "LLM Qwen3-4B NPU…")
|
_loadingState.value = LoadingState(50, "LLM Qwen3-4B NPU…")
|
||||||
llm = ExecuTorchLlmEngine { msg -> log(msg) }
|
llm = ExecuTorchLlmEngine(this@KazeiaService) { msg -> log(msg) }
|
||||||
try {
|
try {
|
||||||
llm.load("${KazeiaApplication.MODELS_DIR}/qwen3-4b", com.kazeia.core.LlmConfig())
|
llm.load("${KazeiaApplication.MODELS_DIR}/qwen3-4b", com.kazeia.core.LlmConfig())
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
|
|
@ -583,6 +628,16 @@ class KazeiaService : Service() {
|
||||||
if (chatterbox != null) {
|
if (chatterbox != null) {
|
||||||
chatterbox.setVoice(voicePath)
|
chatterbox.setVoice(voicePath)
|
||||||
log("Voice set to: $voicePath")
|
log("Voice set to: $voicePath")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
val qwen = tts as? com.kazeia.tts.Qwen3TtsEngine
|
||||||
|
if (qwen != null) {
|
||||||
|
// Hot-swap prefix/suffix embeddings — no model reload. Takes
|
||||||
|
// effect from the NEXT synthesized segment (current in-flight
|
||||||
|
// one, if any, finishes with the old voice since the arrays
|
||||||
|
// are already in its closure).
|
||||||
|
qwen.setVoice(voicePath)
|
||||||
|
log("Voice set to: $voicePath")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -835,6 +890,14 @@ class KazeiaService : Service() {
|
||||||
for (s in frame) sumSq += s.toLong() * s.toLong()
|
for (s in frame) sumSq += s.toLong() * s.toLong()
|
||||||
val rms = Math.sqrt(sumSq.toDouble() / frameSize).toInt()
|
val rms = Math.sqrt(sumSq.toDouble() / frameSize).toInt()
|
||||||
|
|
||||||
|
// Drive the visualizer orb. Normalize with the same
|
||||||
|
// sqrt squashing used for TTS so loud peaks don't
|
||||||
|
// saturate and quiet speech is still visible. The
|
||||||
|
// visualizer stays in Listening mode; it will swap
|
||||||
|
// to Speaking or Idle when pipelineState moves on.
|
||||||
|
val rmsNorm = kotlin.math.sqrt((rms / 6000f).coerceIn(0f, 1f))
|
||||||
|
_visualizerSignal.value = VisualizerSignal.Listening(rmsNorm)
|
||||||
|
|
||||||
// Log RMS every second for calibration
|
// Log RMS every second for calibration
|
||||||
if (frameCount % 10 == 0) {
|
if (frameCount % 10 == 0) {
|
||||||
Log.d(TAG, "VAD RMS=$rms (threshold=$silenceThreshold)")
|
Log.d(TAG, "VAD RMS=$rms (threshold=$silenceThreshold)")
|
||||||
|
|
@ -1184,12 +1247,99 @@ class KazeiaService : Service() {
|
||||||
log("LLM stats: ${result.tokenCount} tokens in ${result.timeMs}ms (${result.tokensPerSecond} tok/s)")
|
log("LLM stats: ${result.tokenCount} tokens in ${result.timeMs}ms (${result.tokensPerSecond} tok/s)")
|
||||||
|
|
||||||
if (responseText.isNotEmpty()) {
|
if (responseText.isNotEmpty()) {
|
||||||
addMessage(ChatMessage(role = ChatMessage.Role.KAZEIA, text = responseText))
|
// Mark the pipeline as Speaking for the duration of TTS so
|
||||||
pipeline.speakText(responseText)
|
// the continuous-listening mic loop drops frames and we
|
||||||
|
// don't feed our own speaker output back into STT.
|
||||||
|
_pipelineState.value = PipelineState.Speaking
|
||||||
|
// Create a KAZEIA bubble up-front. Until the first TTS
|
||||||
|
// segment actually starts playing the bubble shows an
|
||||||
|
// animated "." → ".." → "..." typing indicator so the
|
||||||
|
// user knows Kazeia is thinking/synthesising; once the
|
||||||
|
// first segment plays the dots are cleared and the
|
||||||
|
// per-sentence word reveal takes over.
|
||||||
|
val bubble = ChatMessage(role = ChatMessage.Role.KAZEIA, text = ".")
|
||||||
|
addMessage(bubble)
|
||||||
|
val revealScope = kotlinx.coroutines.CoroutineScope(kotlinx.coroutines.Dispatchers.Default)
|
||||||
|
var revealedSoFar = ""
|
||||||
|
val revealJobs = mutableListOf<kotlinx.coroutines.Job>()
|
||||||
|
val firstSegmentSeen = java.util.concurrent.atomic.AtomicBoolean(false)
|
||||||
|
val typingJob = revealScope.launch {
|
||||||
|
var tick = 0
|
||||||
|
while (!firstSegmentSeen.get()) {
|
||||||
|
val dots = ".".repeat(1 + (tick % 3)) // . → .. → ...
|
||||||
|
updateMessageText(bubble.id, dots)
|
||||||
|
tick++
|
||||||
|
kotlinx.coroutines.delay(400)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
pipeline.speakText(responseText) { sentence, durationMs, envelope, spectrogram ->
|
||||||
|
// First segment: stop the typing indicator and
|
||||||
|
// reset the bubble to empty so the word reveal
|
||||||
|
// doesn't collide with the dots.
|
||||||
|
if (firstSegmentSeen.compareAndSet(false, true)) {
|
||||||
|
try { typingJob.cancel() } catch (_: Exception) {}
|
||||||
|
updateMessageText(bubble.id, "")
|
||||||
|
}
|
||||||
|
// Push the envelope + spectrogram to the
|
||||||
|
// visualizer at the same moment the MediaPlayer
|
||||||
|
// starts playing so the orb reacts to this
|
||||||
|
// segment's actual energy and the in-sphere
|
||||||
|
// spectrum bars match the audio content.
|
||||||
|
_visualizerSignal.value =
|
||||||
|
VisualizerSignal.Speaking(envelope, spectrogram, durationMs)
|
||||||
|
// Start a coroutine that appends one word at a time
|
||||||
|
// over the segment's audio duration. Words are
|
||||||
|
// separated on whitespace; punctuation rides with
|
||||||
|
// the trailing word. The prefix (= text already
|
||||||
|
// revealed from previous sentences) carries over so
|
||||||
|
// earlier sentences stay on screen.
|
||||||
|
val prefix = revealedSoFar
|
||||||
|
val words = sentence.split(Regex("\\s+")).filter { it.isNotBlank() }
|
||||||
|
revealedSoFar =
|
||||||
|
if (prefix.isEmpty()) sentence
|
||||||
|
else "$prefix $sentence"
|
||||||
|
if (words.isEmpty()) return@speakText
|
||||||
|
val perWordMs = (durationMs / words.size).coerceAtLeast(40L)
|
||||||
|
val job = revealScope.launch {
|
||||||
|
val sb = StringBuilder(prefix)
|
||||||
|
if (prefix.isNotEmpty()) sb.append(' ')
|
||||||
|
// Immediately reveal the first word so there's
|
||||||
|
// no visible gap between audio start and text.
|
||||||
|
sb.append(words[0])
|
||||||
|
updateMessageText(bubble.id, sb.toString())
|
||||||
|
for (i in 1 until words.size) {
|
||||||
|
kotlinx.coroutines.delay(perWordMs)
|
||||||
|
sb.append(' ').append(words[i])
|
||||||
|
updateMessageText(bubble.id, sb.toString())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
revealJobs.add(job)
|
||||||
|
}
|
||||||
|
// After all segments finished playing, ensure the full
|
||||||
|
// text is visible even if a reveal job was racing.
|
||||||
|
revealJobs.forEach { try { it.join() } catch (_: Exception) {} }
|
||||||
|
updateMessageText(bubble.id, responseText)
|
||||||
|
} finally {
|
||||||
|
// Defensive: cancel the typing dots in case no
|
||||||
|
// segment ever fired (e.g. the response was entirely
|
||||||
|
// emojis and got stripped empty).
|
||||||
|
firstSegmentSeen.set(true)
|
||||||
|
try { typingJob.cancel() } catch (_: Exception) {}
|
||||||
_pipelineState.value = if (_isListening.value)
|
_pipelineState.value = if (_isListening.value)
|
||||||
PipelineState.Listening else PipelineState.Idle
|
PipelineState.Listening else PipelineState.Idle
|
||||||
|
// If we're going back to mic listening, the VAD loop
|
||||||
|
// will keep pushing Listening signals; otherwise drop
|
||||||
|
// to Idle so the orb settles back to its breathing
|
||||||
|
// baseline.
|
||||||
|
if (!_isListening.value) {
|
||||||
|
_visualizerSignal.value = VisualizerSignal.Idle
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
_pipelineState.value = if (_isListening.value)
|
||||||
|
PipelineState.Listening else PipelineState.Idle
|
||||||
|
}
|
||||||
|
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
_aiWorkload.value = _aiWorkload.value.copy(llmActive = false)
|
_aiWorkload.value = _aiWorkload.value.copy(llmActive = false)
|
||||||
|
|
@ -1207,6 +1357,19 @@ class KazeiaService : Service() {
|
||||||
_messages.value = _messages.value + message
|
_messages.value = _messages.value + message
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Replace the text of an existing message (identified by id) in the
|
||||||
|
* message list. Used by the progressive-reveal flow to grow a
|
||||||
|
* KAZEIA message word-by-word as TTS audio plays. */
|
||||||
|
private fun updateMessageText(id: Long, newText: String) {
|
||||||
|
val current = _messages.value
|
||||||
|
val idx = current.indexOfLast { it.id == id }
|
||||||
|
if (idx < 0) return
|
||||||
|
val m = current[idx]
|
||||||
|
_messages.value = current.toMutableList().also {
|
||||||
|
it[idx] = m.copy(text = newText)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private fun createNotification(): Notification {
|
private fun createNotification(): Notification {
|
||||||
val intent = Intent(this, ChatActivity::class.java)
|
val intent = Intent(this, ChatActivity::class.java)
|
||||||
val pendingIntent = PendingIntent.getActivity(
|
val pendingIntent = PendingIntent.getActivity(
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,7 @@ import kotlin.coroutines.resume
|
||||||
*/
|
*/
|
||||||
class Qwen3TtsEngine(
|
class Qwen3TtsEngine(
|
||||||
private val nativeLibDir: String,
|
private val nativeLibDir: String,
|
||||||
|
private val context: android.content.Context? = null,
|
||||||
private val onLog: ((String) -> Unit)? = null
|
private val onLog: ((String) -> Unit)? = null
|
||||||
) : TtsEngine {
|
) : TtsEngine {
|
||||||
|
|
||||||
|
|
@ -88,6 +89,38 @@ class Qwen3TtsEngine(
|
||||||
private const val TOKEN_USER = 872
|
private const val TOKEN_USER = 872
|
||||||
private const val TOKEN_ASSISTANT = 1042
|
private const val TOKEN_ASSISTANT = 1042
|
||||||
private const val TOKEN_NEWLINE = 198
|
private const val TOKEN_NEWLINE = 198
|
||||||
|
|
||||||
|
// Streaming decode: when true, BigVGAN dispatches a chunk's audio as
|
||||||
|
// soon as SEQ_LEN codes are ready from the talker/CP loop rather than
|
||||||
|
// waiting for all tokens. For long segments this overlaps the final
|
||||||
|
// BigVGAN passes with ongoing talker/CP work on Hexagon, cutting the
|
||||||
|
// first-audio latency by ~4 s. Short segments (<SEQ_LEN codes) fall
|
||||||
|
// back to the single-chunk path with zero difference. Flag exists so
|
||||||
|
// the sequential path can be re-enabled for A/B comparison.
|
||||||
|
private const val USE_STREAMING_DECODE = true
|
||||||
|
|
||||||
|
// ColorOS Audio Hardening silently mutes AudioTrack in background/FGS
|
||||||
|
// context (confirmed via `event:muted updated source:clientVolume`
|
||||||
|
// logs, same behaviour across USAGE_MEDIA, USAGE_ASSISTANT, and
|
||||||
|
// USAGE_VOICE_COMMUNICATION). When this flag is true, each
|
||||||
|
// generated segment is written as a WAV to app-owned shared
|
||||||
|
// storage and played via MediaPlayer instead. Slightly slower
|
||||||
|
// (WAV write + MediaPlayer prepare add ~150 ms per segment) but
|
||||||
|
// it's the only reliable path to audible output on this device.
|
||||||
|
private const val USE_MEDIAPLAYER_FALLBACK = true
|
||||||
|
|
||||||
|
// Window size for the TTS→visualizer RMS sidecar. 50 ms at 24 kHz
|
||||||
|
// = 1200 samples/window — small enough for a 60 fps visualizer to
|
||||||
|
// track formants, large enough to run at negligible CPU cost.
|
||||||
|
const val ENVELOPE_WINDOW_MS = 50
|
||||||
|
// FFT size for the spectrum-in-sphere sidecar. 1024 samples at
|
||||||
|
// 24 kHz = 43 ms — slightly narrower than the hop so each frame
|
||||||
|
// gives a clean snapshot centered on its hop boundary.
|
||||||
|
private const val FFT_SIZE = 1024
|
||||||
|
// Number of log-spaced bands 120 Hz–4 kHz rendered as vertical
|
||||||
|
// bars inside the sphere during Speaking. 12 feels like a real
|
||||||
|
// spectrometer without cluttering at smaller sphere sizes.
|
||||||
|
const val SPECTRUM_BANDS = 12
|
||||||
}
|
}
|
||||||
|
|
||||||
private var ortEnv: OrtEnvironment? = null
|
private var ortEnv: OrtEnvironment? = null
|
||||||
|
|
@ -243,7 +276,12 @@ class Qwen3TtsEngine(
|
||||||
return session
|
return session
|
||||||
}
|
}
|
||||||
|
|
||||||
// Speech decoder V2 on CPU (HTP tested: BigVGAN convolutions too slow to compile)
|
// Speech decoder V2 on CPU. Two paths tried, both worse than CPU:
|
||||||
|
// - HTP: BigVGAN convolutions too slow to compile (timeout)
|
||||||
|
// - GPU Adreno via QNN GPU EP: model loads but per-phrase
|
||||||
|
// inference is ~3.5 s vs ~2 s on CPU (GPU/CPU memory transfer
|
||||||
|
// overhead dominates for this conv-heavy model)
|
||||||
|
// CPU 8-thread stays the practical optimum.
|
||||||
val v2Path = "$path/v2_pre_conv"
|
val v2Path = "$path/v2_pre_conv"
|
||||||
if (File("$v2Path/model.onnx").exists()) {
|
if (File("$v2Path/model.onnx").exists()) {
|
||||||
nlog("Loading V2 speech decoder (CPU ONNX)...")
|
nlog("Loading V2 speech decoder (CPU ONNX)...")
|
||||||
|
|
@ -570,8 +608,53 @@ class Qwen3TtsEngine(
|
||||||
|
|
||||||
override fun isLoaded(): Boolean = loaded
|
override fun isLoaded(): Boolean = loaded
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hot-swap the speaker prefix/suffix embeddings used for voice
|
||||||
|
* conditioning. [voicePath] is a WAV path like
|
||||||
|
* `/…/voix/elodie.wav` — we derive the voice id from its basename
|
||||||
|
* and look for matching `<id>_voice_prefix.bin` + `<id>_voice_suffix.bin`
|
||||||
|
* in the model dir. If both files exist they replace the current
|
||||||
|
* [damienVoicePrefix] / [damienVoiceSuffix] arrays so the next
|
||||||
|
* segment generated uses the new voice. If either file is missing
|
||||||
|
* we log a warning and keep the current voice — per-voice
|
||||||
|
* prefix/suffix files are offline-generated via
|
||||||
|
* scripts/prepare_tts_native.py; run once per voice WAV and
|
||||||
|
* `adb push` into the model dir to enable.
|
||||||
|
*
|
||||||
|
* Thread-safety: the arrays are read by the synth worker on
|
||||||
|
* Dispatchers.IO; replacing a reference via a volatile var is
|
||||||
|
* atomic on the JVM so a mid-segment replacement just takes
|
||||||
|
* effect on the next segment boundary.
|
||||||
|
*/
|
||||||
fun setVoice(voicePath: String) {
|
fun setVoice(voicePath: String) {
|
||||||
nlog("Voice: $voicePath")
|
val modelDir = "/data/local/tmp/kazeia/models/qwen3-tts-npu"
|
||||||
|
val id = java.io.File(voicePath).nameWithoutExtension.lowercase()
|
||||||
|
val prefixFile = java.io.File("$modelDir/${id}_voice_prefix.bin")
|
||||||
|
val suffixFile = java.io.File("$modelDir/${id}_voice_suffix.bin")
|
||||||
|
if (!prefixFile.exists() || !suffixFile.exists()) {
|
||||||
|
nlog("Voice '$id' not available (missing ${prefixFile.name} or ${suffixFile.name}); keeping current voice. " +
|
||||||
|
"Run scripts/prepare_tts_native.py with this WAV to generate the files.")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
val pBytes = prefixFile.readBytes()
|
||||||
|
val pHead = java.nio.ByteBuffer.wrap(pBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN)
|
||||||
|
val nPref = pHead.int; val dimPref = pHead.int
|
||||||
|
if (dimPref != TALKER_DIM) throw IllegalStateException("prefix dim $dimPref != $TALKER_DIM")
|
||||||
|
val newPrefix = Array(nPref) { FloatArray(TALKER_DIM).also { arr -> for (j in 0 until TALKER_DIM) arr[j] = pHead.float } }
|
||||||
|
|
||||||
|
val sBytes = suffixFile.readBytes()
|
||||||
|
val sHead = java.nio.ByteBuffer.wrap(sBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN)
|
||||||
|
val nSuf = sHead.int; val dimSuf = sHead.int
|
||||||
|
if (dimSuf != TALKER_DIM) throw IllegalStateException("suffix dim $dimSuf != $TALKER_DIM")
|
||||||
|
val newSuffix = Array(nSuf) { FloatArray(TALKER_DIM).also { arr -> for (j in 0 until TALKER_DIM) arr[j] = sHead.float } }
|
||||||
|
|
||||||
|
damienVoicePrefix = newPrefix
|
||||||
|
damienVoiceSuffix = newSuffix
|
||||||
|
nlog("Voice switched to '$id' ($nPref prefix + $nSuf suffix embeds)")
|
||||||
|
} catch (e: Exception) {
|
||||||
|
nlog("Voice swap failed for '$id': ${e.message}")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
override suspend fun synthesize(text: String, language: String): TtsResult {
|
override suspend fun synthesize(text: String, language: String): TtsResult {
|
||||||
|
|
@ -2669,7 +2752,11 @@ class Qwen3TtsEngine(
|
||||||
|
|
||||||
/** PTE pipeline from pre-computed embeddings (prefill + trailing). */
|
/** PTE pipeline from pre-computed embeddings (prefill + trailing). */
|
||||||
private fun runInterleavedPteFromEmbeds(
|
private fun runInterleavedPteFromEmbeds(
|
||||||
prefillEmbeds: List<FloatArray>, trailingEmbeds: List<FloatArray>, maxGenTokens: Int
|
prefillEmbeds: List<FloatArray>, trailingEmbeds: List<FloatArray>, maxGenTokens: Int,
|
||||||
|
// Invoked synchronously after each generated step with (stepIdx, 16-codebook codes).
|
||||||
|
// Streaming callers use it to dispatch SEQ_LEN-sized chunks to the BigVGAN pipeline
|
||||||
|
// as soon as they are ready. null preserves the original batch behaviour.
|
||||||
|
onCodeStep: ((step: Int, codes: IntArray) -> Unit)? = null
|
||||||
): Array<IntArray> {
|
): Array<IntArray> {
|
||||||
val talkerMod = talkerPteModule ?: return emptyArray()
|
val talkerMod = talkerPteModule ?: return emptyArray()
|
||||||
val cpMod = cpPteModule ?: return emptyArray()
|
val cpMod = cpPteModule ?: return emptyArray()
|
||||||
|
|
@ -2747,6 +2834,7 @@ class Qwen3TtsEngine(
|
||||||
totalCpMs += System.currentTimeMillis() - tCp0
|
totalCpMs += System.currentTimeMillis() - tCp0
|
||||||
for (cb in 1 until NUM_CODEBOOKS) codes[cb] = cpCodes[cb - 1]
|
for (cb in 1 until NUM_CODEBOOKS) codes[cb] = cpCodes[cb - 1]
|
||||||
allCodes.add(codes); generatedCb0.add(currentCb0)
|
allCodes.add(codes); generatedCb0.add(currentCb0)
|
||||||
|
onCodeStep?.invoke(genStep, codes)
|
||||||
|
|
||||||
if (genStep < 3) nlog("Step ${genStep+1}: cb0=$currentCb0 cb1=${codes[1]}")
|
if (genStep < 3) nlog("Step ${genStep+1}: cb0=$currentCb0 cb1=${codes[1]}")
|
||||||
|
|
||||||
|
|
@ -3316,6 +3404,18 @@ class Qwen3TtsEngine(
|
||||||
private var sessionTrack: AudioTrack? = null
|
private var sessionTrack: AudioTrack? = null
|
||||||
private var sessionChannel: kotlinx.coroutines.channels.Channel<String>? = null
|
private var sessionChannel: kotlinx.coroutines.channels.Channel<String>? = null
|
||||||
private var sessionJob: kotlinx.coroutines.Job? = null
|
private var sessionJob: kotlinx.coroutines.Job? = null
|
||||||
|
private var sessionKeepAliveJob: kotlinx.coroutines.Job? = null
|
||||||
|
private var sessionFocusRequest: android.media.AudioFocusRequest? = null
|
||||||
|
// Total PCM frames queued to sessionTrack across all segments in this session.
|
||||||
|
// endStreamingSession() polls track.playbackHeadPosition until it reaches this
|
||||||
|
// count before calling stop(), so the tail sentence isn't clipped.
|
||||||
|
// Uses AtomicLong because both the session worker and the keep-alive watchdog
|
||||||
|
// call writeAndCount concurrently.
|
||||||
|
private val sessionFramesWritten = java.util.concurrent.atomic.AtomicLong(0)
|
||||||
|
// True while a real-audio generate call is in progress. The keep-alive
|
||||||
|
// watchdog skips silence injection while this is set, so silence never
|
||||||
|
// interleaves with speech inside a segment.
|
||||||
|
private val sessionGenActive = java.util.concurrent.atomic.AtomicBoolean(false)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Open a streaming TTS session backed by a persistent AudioTrack. After
|
* Open a streaming TTS session backed by a persistent AudioTrack. After
|
||||||
|
|
@ -3324,13 +3424,403 @@ class Qwen3TtsEngine(
|
||||||
* track as soon as it's decoded. Call endStreamingSession() to flush
|
* track as soon as it's decoded. Call endStreamingSession() to flush
|
||||||
* the queue and release the track.
|
* the queue and release the track.
|
||||||
*/
|
*/
|
||||||
fun startStreamingSession() {
|
// MediaPlayer-based fallback session state. If ColorOS mutes our
|
||||||
if (sessionTrack != null) return // already open
|
// AudioTrack (as observed repeatedly — `event:muted updated source:
|
||||||
val track = AudioTrack.Builder()
|
// clientVolume` right after play()), we instead render each segment
|
||||||
.setAudioAttributes(AudioAttributes.Builder()
|
// as a WAV file on shared storage and play it back via MediaPlayer,
|
||||||
.setUsage(AudioAttributes.USAGE_MEDIA)
|
// which uses a completely different internal audio pipeline that
|
||||||
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
|
// doesn't get silenced by the background playback policy.
|
||||||
|
private var sessionMpQueue: kotlinx.coroutines.channels.Channel<String>? = null
|
||||||
|
private var sessionMpJob: kotlinx.coroutines.Job? = null
|
||||||
|
private val sessionMpSegIdx = java.util.concurrent.atomic.AtomicInteger(0)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fires the moment a synthesized segment starts playing through the
|
||||||
|
* speaker. Carries the sentence text, audio duration, per-window RMS
|
||||||
|
* envelope (for orb amplitude) and per-window log-spaced band
|
||||||
|
* spectrogram (for the spectrum-in-sphere visualizer). All three
|
||||||
|
* share the same time axis — one entry per [ENVELOPE_WINDOW_MS].
|
||||||
|
*/
|
||||||
|
var onSegmentPlaying: ((
|
||||||
|
sentence: String,
|
||||||
|
durationMs: Long,
|
||||||
|
rmsEnvelope: FloatArray,
|
||||||
|
spectrogram: Array<FloatArray>
|
||||||
|
) -> Unit)? = null
|
||||||
|
|
||||||
|
private fun startStreamingSessionMp() {
|
||||||
|
if (sessionMpQueue != null) return
|
||||||
|
sessionMpSegIdx.set(0)
|
||||||
|
val sentenceChan = kotlinx.coroutines.channels.Channel<String>(
|
||||||
|
capacity = kotlinx.coroutines.channels.Channel.UNLIMITED
|
||||||
|
)
|
||||||
|
// Pipeline: synth worker produces WAV paths, playback worker runs
|
||||||
|
// them through a pair of MediaPlayer instances chained via
|
||||||
|
// setNextMediaPlayer() so there's zero-gap transition between
|
||||||
|
// segments (no DAC/output routing "pop" the user was hearing as
|
||||||
|
// "beg beg" with one player-per-seg). The rendezvous channel has
|
||||||
|
// capacity 2 so the synth worker can stay one seg ahead of the
|
||||||
|
// currently playing seg without growing disk use.
|
||||||
|
// Carry (segIdx, wavPath, sentence, durationMs) together so the
|
||||||
|
// playback worker can invoke onSegmentPlaying with the matching
|
||||||
|
// text and audio length when the segment actually starts playing.
|
||||||
|
val wavChan = kotlinx.coroutines.channels.Channel<SegmentReady>(capacity = 2)
|
||||||
|
val scope = kotlinx.coroutines.CoroutineScope(kotlinx.coroutines.Dispatchers.IO)
|
||||||
|
val synthJob = scope.launch {
|
||||||
|
for (sentence in sentenceChan) {
|
||||||
|
try {
|
||||||
|
val segIdx = sessionMpSegIdx.getAndIncrement()
|
||||||
|
val tSynth = System.currentTimeMillis()
|
||||||
|
val audio = generateSegmentAudioVC(sentence, segIdx)
|
||||||
|
if (audio.isEmpty()) continue
|
||||||
|
val wavPath = "${context?.cacheDir?.absolutePath ?: "/data/local/tmp/kazeia"}/tts_seg_${segIdx}.wav"
|
||||||
|
saveWav(wavPath, audio)
|
||||||
|
val durationMs = audio.size * 1000L / SR
|
||||||
|
val envelope = computeRmsEnvelope(audio)
|
||||||
|
val spectrogram = computeSpectrogram(audio)
|
||||||
|
nlog("MP seg $segIdx synthesized (${System.currentTimeMillis() - tSynth}ms, ${durationMs}ms audio, ${envelope.size} env × ${SPECTRUM_BANDS} bands), queued for playback")
|
||||||
|
wavChan.send(SegmentReady(segIdx, wavPath, sentence, durationMs, envelope, spectrogram))
|
||||||
|
} catch (e: Exception) {
|
||||||
|
nlog("MP synth error: ${e.message}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wavChan.close()
|
||||||
|
}
|
||||||
|
val playJob = scope.launch { playChainedMediaPlayers(wavChan) }
|
||||||
|
val combined = scope.launch { synthJob.join(); playJob.join() }
|
||||||
|
sessionMpQueue = sentenceChan; sessionMpJob = combined
|
||||||
|
nlog("streaming session opened (MediaPlayer fallback, chained)")
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Drive the WAV playback pipeline with two MediaPlayer instances
|
||||||
|
* chained via setNextMediaPlayer() so each segment flows into the
|
||||||
|
* next without re-arming the audio output (which caused audible
|
||||||
|
* "pops" between segments when one player stopped and another
|
||||||
|
* started). Consumes (segIdx, wavPath) pairs from [wavChan] and
|
||||||
|
* deletes each file after it finishes playing. Suspends until the
|
||||||
|
* channel closes AND the final segment finishes.
|
||||||
|
*/
|
||||||
|
private suspend fun playChainedMediaPlayers(
|
||||||
|
wavChan: kotlinx.coroutines.channels.ReceiveChannel<SegmentReady>
|
||||||
|
) {
|
||||||
|
val attrs = android.media.AudioAttributes.Builder()
|
||||||
|
.setUsage(android.media.AudioAttributes.USAGE_MEDIA)
|
||||||
|
.setContentType(android.media.AudioAttributes.CONTENT_TYPE_SPEECH)
|
||||||
|
.build()
|
||||||
|
|
||||||
|
// Synchronously prepare a MediaPlayer on the current coroutine.
|
||||||
|
// Throws on failure; caller handles cleanup.
|
||||||
|
suspend fun prepareMp(path: String, segIdx: Int): android.media.MediaPlayer {
|
||||||
|
val mp = android.media.MediaPlayer()
|
||||||
|
mp.setAudioAttributes(attrs)
|
||||||
|
mp.setDataSource(path)
|
||||||
|
kotlinx.coroutines.suspendCancellableCoroutine<Unit> { cont ->
|
||||||
|
mp.setOnPreparedListener { if (cont.isActive) cont.resume(Unit) {} }
|
||||||
|
mp.setOnErrorListener { _, what, extra ->
|
||||||
|
nlog("MP seg $segIdx prepare error: what=$what extra=$extra")
|
||||||
|
if (cont.isActive) cont.resume(Unit) {}
|
||||||
|
true
|
||||||
|
}
|
||||||
|
cont.invokeOnCancellation { try { mp.release() } catch (_: Exception) {} }
|
||||||
|
mp.prepareAsync()
|
||||||
|
}
|
||||||
|
return mp
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per-player book-keeping. `done` completes the moment the
|
||||||
|
// MediaPlayer's OnCompletionListener fires, so the loop can
|
||||||
|
// tell *before* calling setNextMediaPlayer whether the chain
|
||||||
|
// will actually trigger (setNextMediaPlayer on a player already
|
||||||
|
// in the Completed state is a silent no-op — that was the root
|
||||||
|
// cause of missing audio on seg 1 when synthesis ran longer
|
||||||
|
// than seg 0's playback).
|
||||||
|
class Live(
|
||||||
|
val mp: android.media.MediaPlayer,
|
||||||
|
val info: SegmentReady,
|
||||||
|
val done: kotlinx.coroutines.CompletableDeferred<Unit>
|
||||||
|
)
|
||||||
|
|
||||||
|
fun arm(info: SegmentReady, mp: android.media.MediaPlayer): Live {
|
||||||
|
val done = kotlinx.coroutines.CompletableDeferred<Unit>()
|
||||||
|
mp.setOnCompletionListener {
|
||||||
|
try { it.release() } catch (_: Exception) {}
|
||||||
|
if (!done.isCompleted) done.complete(Unit)
|
||||||
|
}
|
||||||
|
mp.setOnErrorListener { _, what, extra ->
|
||||||
|
nlog("MP seg ${info.segIdx} play error: what=$what extra=$extra")
|
||||||
|
if (!done.isCompleted) done.complete(Unit)
|
||||||
|
true
|
||||||
|
}
|
||||||
|
return Live(mp, info, done)
|
||||||
|
}
|
||||||
|
|
||||||
|
var current: Live? = null
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Bootstrap with the first segment.
|
||||||
|
val first = wavChan.receiveCatching().getOrNull() ?: return
|
||||||
|
val firstMp = prepareMp(first.wavPath, first.segIdx)
|
||||||
|
firstMp.start()
|
||||||
|
current = arm(first, firstMp)
|
||||||
|
try { onSegmentPlaying?.invoke(first.sentence, first.durationMs, first.rmsEnvelope, first.spectrogram) } catch (_: Exception) {}
|
||||||
|
nlog("MP seg ${first.segIdx} started (${first.durationMs}ms)")
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
val upcoming = wavChan.receiveCatching().getOrNull() ?: break
|
||||||
|
val nextMp = prepareMp(upcoming.wavPath, upcoming.segIdx)
|
||||||
|
|
||||||
|
// Try to chain so Android auto-starts next when current
|
||||||
|
// finishes — gives zero-gap playback without re-arming
|
||||||
|
// the DAC. Skipped if current has already completed
|
||||||
|
// (setNext on Completed is a no-op); we fall back to an
|
||||||
|
// explicit start() below in that case.
|
||||||
|
var chained = false
|
||||||
|
try {
|
||||||
|
if (!current!!.done.isCompleted) {
|
||||||
|
current!!.mp.setNextMediaPlayer(nextMp)
|
||||||
|
chained = true
|
||||||
|
}
|
||||||
|
} catch (e: Exception) {
|
||||||
|
nlog("MP seg ${upcoming.segIdx} setNext failed: ${e.message}")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for current playback to finish before rotating.
|
||||||
|
current!!.done.await()
|
||||||
|
try { java.io.File(current!!.info.wavPath).delete() } catch (_: Exception) {}
|
||||||
|
|
||||||
|
// If we never chained (or the chain raced with the
|
||||||
|
// current's completion), start next manually. Safe to
|
||||||
|
// start() again even if Android already auto-started.
|
||||||
|
val autoStarted = try { chained && (nextMp.isPlaying || nextMp.currentPosition > 0) } catch (_: Exception) { false }
|
||||||
|
if (!autoStarted) {
|
||||||
|
try { nextMp.start() } catch (e: Exception) {
|
||||||
|
nlog("MP seg ${upcoming.segIdx} manual start failed: ${e.message}")
|
||||||
|
}
|
||||||
|
nlog("MP seg ${upcoming.segIdx} started manually (chain missed)")
|
||||||
|
} else {
|
||||||
|
nlog("MP seg ${upcoming.segIdx} auto-chained")
|
||||||
|
}
|
||||||
|
|
||||||
|
current = arm(upcoming, nextMp)
|
||||||
|
try { onSegmentPlaying?.invoke(upcoming.sentence, upcoming.durationMs, upcoming.rmsEnvelope, upcoming.spectrogram) } catch (_: Exception) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Drain: wait for the last player to finish.
|
||||||
|
current?.done?.await()
|
||||||
|
current?.let { try { java.io.File(it.info.wavPath).delete() } catch (_: Exception) {} }
|
||||||
|
} catch (e: Exception) {
|
||||||
|
nlog("MP playback chain error: ${e.message}")
|
||||||
|
} finally {
|
||||||
|
try { current?.mp?.release() } catch (_: Exception) {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Payload handed from the synth worker to the playback worker so
|
||||||
|
* the UI can be notified with matching text + duration when each
|
||||||
|
* segment starts playing. The [rmsEnvelope] is an optional sidecar
|
||||||
|
* array of per-ENVELOPE_WINDOW_MS RMS values normalized to [0, 1]
|
||||||
|
* that drives the audio-reactive orb visualizer without having to
|
||||||
|
* read PCM back from MediaPlayer. */
|
||||||
|
private data class SegmentReady(
|
||||||
|
val segIdx: Int,
|
||||||
|
val wavPath: String,
|
||||||
|
val sentence: String,
|
||||||
|
val durationMs: Long,
|
||||||
|
val rmsEnvelope: FloatArray,
|
||||||
|
val spectrogram: Array<FloatArray>
|
||||||
|
)
|
||||||
|
|
||||||
|
/** Compute a per-ENVELOPE_WINDOW_MS normalized RMS envelope from a
|
||||||
|
* mono 16-bit PCM buffer at [SR]. Cheap (one pass, trivially fast
|
||||||
|
* on the ~100 k samples we generate per segment) and called only
|
||||||
|
* once per segment right after synthesis. */
|
||||||
|
private fun computeRmsEnvelope(audio: ShortArray): FloatArray {
|
||||||
|
if (audio.isEmpty()) return FloatArray(0)
|
||||||
|
val windowSamples = SR * ENVELOPE_WINDOW_MS / 1000
|
||||||
|
val nWindows = (audio.size + windowSamples - 1) / windowSamples
|
||||||
|
val env = FloatArray(nWindows)
|
||||||
|
for (w in 0 until nWindows) {
|
||||||
|
val start = w * windowSamples
|
||||||
|
val end = minOf(start + windowSamples, audio.size)
|
||||||
|
var sumSq = 0.0
|
||||||
|
for (i in start until end) {
|
||||||
|
val s = audio[i].toDouble()
|
||||||
|
sumSq += s * s
|
||||||
|
}
|
||||||
|
val rms = kotlin.math.sqrt(sumSq / (end - start))
|
||||||
|
// Normalize: 32767 is full-scale; squash the upper range
|
||||||
|
// with a sqrt curve so even quiet speech shows visible
|
||||||
|
// motion without saturating on loud peaks.
|
||||||
|
env[w] = kotlin.math.sqrt((rms / 32767.0).coerceIn(0.0, 1.0)).toFloat()
|
||||||
|
}
|
||||||
|
return env
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Compute a per-window log-spaced band spectrogram used by the
|
||||||
|
* spectrum-in-sphere visualizer. Time axis aligned with the RMS
|
||||||
|
* envelope (one column per ENVELOPE_WINDOW_MS). FFT size is 1024
|
||||||
|
* samples (~43 ms at 24 kHz), windowed with Hann and centered on
|
||||||
|
* each hop. [SPECTRUM_BANDS] log-spaced bands from 120 Hz to
|
||||||
|
* 4 kHz — covers the vocal formant range without wasting visual
|
||||||
|
* space on silent sub-100 Hz or frictive >4 kHz content. */
|
||||||
|
private fun computeSpectrogram(audio: ShortArray): Array<FloatArray> {
|
||||||
|
if (audio.isEmpty()) return emptyArray()
|
||||||
|
val fftSize = FFT_SIZE
|
||||||
|
val hopSamples = SR * ENVELOPE_WINDOW_MS / 1000
|
||||||
|
val nFrames = (audio.size + hopSamples - 1) / hopSamples
|
||||||
|
// Pre-compute band edges as FFT bin indices.
|
||||||
|
val binHzRes = SR.toDouble() / fftSize
|
||||||
|
val fMin = 120.0; val fMax = 4000.0
|
||||||
|
val bandEdges = IntArray(SPECTRUM_BANDS + 1) { i ->
|
||||||
|
val f = fMin * Math.pow(fMax / fMin, i.toDouble() / SPECTRUM_BANDS)
|
||||||
|
(f / binHzRes).toInt().coerceIn(1, fftSize / 2 - 1)
|
||||||
|
}
|
||||||
|
// Hann window — reduces spectral leakage, gives cleaner bars.
|
||||||
|
val hann = FloatArray(fftSize) { i ->
|
||||||
|
(0.5 - 0.5 * Math.cos(2.0 * Math.PI * i / (fftSize - 1))).toFloat()
|
||||||
|
}
|
||||||
|
val re = FloatArray(fftSize)
|
||||||
|
val im = FloatArray(fftSize)
|
||||||
|
val result = Array(nFrames) { FloatArray(SPECTRUM_BANDS) }
|
||||||
|
for (f in 0 until nFrames) {
|
||||||
|
// Center the window on the hop midpoint.
|
||||||
|
val center = f * hopSamples + hopSamples / 2
|
||||||
|
val start = center - fftSize / 2
|
||||||
|
for (i in 0 until fftSize) {
|
||||||
|
val idx = start + i
|
||||||
|
val sample = if (idx in audio.indices) audio[idx].toFloat() / 32768f else 0f
|
||||||
|
re[i] = sample * hann[i]
|
||||||
|
im[i] = 0f
|
||||||
|
}
|
||||||
|
fftInPlace(re, im)
|
||||||
|
for (b in 0 until SPECTRUM_BANDS) {
|
||||||
|
val bStart = bandEdges[b]
|
||||||
|
val bEnd = bandEdges[b + 1].coerceAtLeast(bStart + 1)
|
||||||
|
var sum = 0.0
|
||||||
|
for (k in bStart until bEnd) {
|
||||||
|
val reK = re[k].toDouble(); val imK = im[k].toDouble()
|
||||||
|
sum += reK * reK + imK * imK
|
||||||
|
}
|
||||||
|
val mag = Math.sqrt(sum / (bEnd - bStart))
|
||||||
|
// Log-compress + normalize. Speech energy per band rarely
|
||||||
|
// exceeds ~0.1 before log; the constants below bring the
|
||||||
|
// typical range to [0.2, 0.95] for visible bar motion.
|
||||||
|
result[f][b] = (Math.log10(1.0 + mag * 80) / Math.log10(7.0))
|
||||||
|
.toFloat().coerceIn(0f, 1f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
/** In-place radix-2 Cooley–Tukey FFT. Size must be a power of 2. */
|
||||||
|
private fun fftInPlace(re: FloatArray, im: FloatArray) {
|
||||||
|
val n = re.size
|
||||||
|
// Bit-reversal permutation.
|
||||||
|
var j = 0
|
||||||
|
for (i in 1 until n) {
|
||||||
|
var bit = n shr 1
|
||||||
|
while (j and bit != 0) { j = j xor bit; bit = bit shr 1 }
|
||||||
|
j = j or bit
|
||||||
|
if (i < j) {
|
||||||
|
val tr = re[i]; re[i] = re[j]; re[j] = tr
|
||||||
|
val ti = im[i]; im[i] = im[j]; im[j] = ti
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Butterflies.
|
||||||
|
var size = 2
|
||||||
|
while (size <= n) {
|
||||||
|
val half = size / 2
|
||||||
|
val step = n / size
|
||||||
|
val angleBase = -2.0 * Math.PI / size
|
||||||
|
var m = 0
|
||||||
|
while (m < n) {
|
||||||
|
var k = 0
|
||||||
|
for (i in m until m + half) {
|
||||||
|
val angle = (angleBase * k).toFloat()
|
||||||
|
val c = kotlin.math.cos(angle)
|
||||||
|
val s = kotlin.math.sin(angle)
|
||||||
|
val tRe = re[i + half] * c - im[i + half] * s
|
||||||
|
val tIm = re[i + half] * s + im[i + half] * c
|
||||||
|
re[i + half] = re[i] - tRe
|
||||||
|
im[i + half] = im[i] - tIm
|
||||||
|
re[i] = re[i] + tRe
|
||||||
|
im[i] = im[i] + tIm
|
||||||
|
k += step
|
||||||
|
}
|
||||||
|
m += size
|
||||||
|
}
|
||||||
|
size *= 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private suspend fun endStreamingSessionMp() {
|
||||||
|
val chan = sessionMpQueue ?: return
|
||||||
|
chan.close()
|
||||||
|
try { sessionMpJob?.join() } catch (_: Exception) {}
|
||||||
|
sessionMpQueue = null; sessionMpJob = null
|
||||||
|
onSegmentPlaying = null
|
||||||
|
nlog("streaming session closed (MediaPlayer fallback)")
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Play a WAV file via Android MediaPlayer and block the calling
|
||||||
|
* coroutine until playback completes. MediaPlayer uses a separate
|
||||||
|
* audio pipeline from AudioTrack so it bypasses ColorOS's AudioTrack
|
||||||
|
* hardening/muting behaviour.
|
||||||
|
*/
|
||||||
|
private suspend fun playWavBlocking(path: String, segIdx: Int) {
|
||||||
|
val t0 = System.currentTimeMillis()
|
||||||
|
suspendCancellableCoroutine<Unit> { cont ->
|
||||||
|
val mp = android.media.MediaPlayer()
|
||||||
|
try {
|
||||||
|
mp.setAudioAttributes(android.media.AudioAttributes.Builder()
|
||||||
|
.setUsage(android.media.AudioAttributes.USAGE_MEDIA)
|
||||||
|
.setContentType(android.media.AudioAttributes.CONTENT_TYPE_SPEECH)
|
||||||
.build())
|
.build())
|
||||||
|
mp.setDataSource(path)
|
||||||
|
mp.setOnPreparedListener {
|
||||||
|
nlog("MP seg $segIdx prepared, starting (prep ${System.currentTimeMillis() - t0}ms)")
|
||||||
|
it.start()
|
||||||
|
}
|
||||||
|
mp.setOnCompletionListener {
|
||||||
|
nlog("MP seg $segIdx done (${System.currentTimeMillis() - t0}ms total)")
|
||||||
|
try { it.release() } catch (_: Exception) {}
|
||||||
|
if (cont.isActive) cont.resume(Unit) {}
|
||||||
|
}
|
||||||
|
mp.setOnErrorListener { player, what, extra ->
|
||||||
|
nlog("MP seg $segIdx error: what=$what extra=$extra")
|
||||||
|
try { player.release() } catch (_: Exception) {}
|
||||||
|
if (cont.isActive) cont.resume(Unit) {}
|
||||||
|
true
|
||||||
|
}
|
||||||
|
mp.prepareAsync()
|
||||||
|
cont.invokeOnCancellation { try { mp.release() } catch (_: Exception) {} }
|
||||||
|
} catch (e: Exception) {
|
||||||
|
nlog("MP seg $segIdx setup failed: ${e.message}")
|
||||||
|
try { mp.release() } catch (_: Exception) {}
|
||||||
|
if (cont.isActive) cont.resume(Unit) {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun startStreamingSession() {
|
||||||
|
if (USE_MEDIAPLAYER_FALLBACK) { startStreamingSessionMp(); return }
|
||||||
|
if (sessionTrack != null) return // already open
|
||||||
|
// USAGE_VOICE_COMMUNICATION routes to STREAM_VOICE_CALL, which
|
||||||
|
// ColorOS's "Audio Hardening" policy does NOT silently mute (the
|
||||||
|
// policy targets STREAM_MUSIC to preserve battery on inactive media
|
||||||
|
// apps; STREAM_VOICE_CALL is reserved for VoIP and always plays).
|
||||||
|
// Previous attempts with USAGE_MEDIA and USAGE_ASSISTANT both got
|
||||||
|
// `event:muted updated source:clientVolume` ~0.6–1 s after play()
|
||||||
|
// even with audio focus + mediaPlayback FGS, so moving off of
|
||||||
|
// STREAM_MUSIC is the only route that unblocks audible playback.
|
||||||
|
val attrs = AudioAttributes.Builder()
|
||||||
|
.setUsage(AudioAttributes.USAGE_VOICE_COMMUNICATION)
|
||||||
|
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
|
||||||
|
.build()
|
||||||
|
val track = AudioTrack.Builder()
|
||||||
|
.setAudioAttributes(attrs)
|
||||||
.setAudioFormat(AudioFormat.Builder()
|
.setAudioFormat(AudioFormat.Builder()
|
||||||
.setEncoding(AudioFormat.ENCODING_PCM_16BIT)
|
.setEncoding(AudioFormat.ENCODING_PCM_16BIT)
|
||||||
.setSampleRate(SR)
|
.setSampleRate(SR)
|
||||||
|
|
@ -3340,7 +3830,77 @@ class Qwen3TtsEngine(
|
||||||
// paces writes when full.
|
// paces writes when full.
|
||||||
.setTransferMode(AudioTrack.MODE_STREAM)
|
.setTransferMode(AudioTrack.MODE_STREAM)
|
||||||
.build()
|
.build()
|
||||||
|
// Request audio focus for the duration of the session. Without this
|
||||||
|
// ColorOS's Audio Hardening treats the track as background noise
|
||||||
|
// and mutes it, regardless of FGS status. We don't care about
|
||||||
|
// focus loss callbacks — if another app grabs focus mid-sentence
|
||||||
|
// that's fine, the track just gets ducked.
|
||||||
|
val am = context?.getSystemService(android.content.Context.AUDIO_SERVICE) as? android.media.AudioManager
|
||||||
|
val focusReq = android.media.AudioFocusRequest.Builder(android.media.AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK)
|
||||||
|
.setAudioAttributes(attrs)
|
||||||
|
.setOnAudioFocusChangeListener { _ -> }
|
||||||
|
.build()
|
||||||
|
val focusRes = am?.requestAudioFocus(focusReq)
|
||||||
|
nlog("audio focus request: $focusRes (1=granted, 0=failed, 2=delayed)")
|
||||||
|
sessionFocusRequest = focusReq
|
||||||
|
// ColorOS mutes AudioTrack clientVolume ~1s after creation (seen in
|
||||||
|
// dumpsys audio as `event:muted updated source:clientVolume`). Force
|
||||||
|
// track volume back to 1.0 repeatedly to override. This is also
|
||||||
|
// done in the keep-alive watchdog loop below for ongoing override.
|
||||||
|
try { track.setVolume(1.0f) } catch (_: Exception) {}
|
||||||
track.play()
|
track.play()
|
||||||
|
sessionFramesWritten.set(0)
|
||||||
|
sessionGenActive.set(false)
|
||||||
|
// writeAndCount is the single path through which PCM reaches the
|
||||||
|
// AudioTrack for this session, so sessionFramesWritten always stays
|
||||||
|
// in sync with what's been queued to playback hardware. AudioTrack.write
|
||||||
|
// is thread-safe, so this can be called concurrently from the session
|
||||||
|
// worker (real audio) and the keep-alive watchdog (silence padding).
|
||||||
|
val writeAndCount: (ShortArray) -> Unit = { pcm ->
|
||||||
|
if (pcm.isNotEmpty()) {
|
||||||
|
val n = track.write(pcm, 0, pcm.size)
|
||||||
|
if (n > 0) sessionFramesWritten.addAndGet(n.toLong())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Bootstrap silence: queue 500 ms immediately after play() so
|
||||||
|
// AudioFlinger has samples to mix from the very first cycle.
|
||||||
|
// Without this, there's a ~100 ms window between play() and the
|
||||||
|
// first watchdog tick where the track has no data and AudioFlinger
|
||||||
|
// flags it for removal. Once that happens, playbackHead sticks at
|
||||||
|
// 0 and subsequent writes go to a dead track.
|
||||||
|
val bootstrapSilence = ShortArray(SR / 2) // 500 ms
|
||||||
|
writeAndCount(bootstrapSilence)
|
||||||
|
// Keep-alive watchdog. AudioFlinger on OnePlus/ColorOS kills a track
|
||||||
|
// that underruns for ~1 s (confirmed via `prepareTracks_l BUFFER
|
||||||
|
// TIMEOUT: remove track … due to underrun on thread 29`). Our
|
||||||
|
// per-segment synthesis takes 3–5 s, which always exceeds that
|
||||||
|
// window between writes, so the track was getting silenced after
|
||||||
|
// the first ~1 s of audio played. The watchdog pads with 200 ms of
|
||||||
|
// silence any time the buffered-ahead audio drops below 400 ms,
|
||||||
|
// regardless of segment state — silence only advances playback head
|
||||||
|
// in the gaps between real audio and is never inserted inside a
|
||||||
|
// contiguous burst of real writes (those bring buffered above 400 ms
|
||||||
|
// and keep the watchdog quiet).
|
||||||
|
val keepAliveBuffer = ShortArray(SR / 5) // 200 ms of silence
|
||||||
|
val keepAliveJob = kotlinx.coroutines.CoroutineScope(
|
||||||
|
kotlinx.coroutines.Dispatchers.IO
|
||||||
|
).launch {
|
||||||
|
var tick = 0
|
||||||
|
while (kotlinx.coroutines.currentCoroutineContext()[kotlinx.coroutines.Job]?.isActive != false) {
|
||||||
|
kotlinx.coroutines.delay(100)
|
||||||
|
val head = track.playbackHeadPosition.toLong() and 0xFFFFFFFFL
|
||||||
|
val written = sessionFramesWritten.get() and 0xFFFFFFFFL
|
||||||
|
val buffered = written - head
|
||||||
|
val needsPad = buffered < SR * 2 / 5 // < 400 ms
|
||||||
|
if ((tick and 0x1F) == 0) {
|
||||||
|
nlog("keepAlive tick=$tick head=$head written=$written buffered=$buffered pad=$needsPad state=${track.playState}")
|
||||||
|
}
|
||||||
|
tick++
|
||||||
|
// Override any clientVolume mute that ColorOS keeps applying.
|
||||||
|
try { track.setVolume(1.0f) } catch (_: Exception) {}
|
||||||
|
if (needsPad) writeAndCount(keepAliveBuffer)
|
||||||
|
}
|
||||||
|
}
|
||||||
val chan = kotlinx.coroutines.channels.Channel<String>(
|
val chan = kotlinx.coroutines.channels.Channel<String>(
|
||||||
capacity = kotlinx.coroutines.channels.Channel.UNLIMITED
|
capacity = kotlinx.coroutines.channels.Channel.UNLIMITED
|
||||||
)
|
)
|
||||||
|
|
@ -3350,15 +3910,26 @@ class Qwen3TtsEngine(
|
||||||
var segIdx = 0
|
var segIdx = 0
|
||||||
for (sentence in chan) {
|
for (sentence in chan) {
|
||||||
try {
|
try {
|
||||||
|
sessionGenActive.set(true)
|
||||||
|
if (USE_STREAMING_DECODE && talkerPteModule != null && cpPteModule != null) {
|
||||||
|
// CP↔BigVGAN overlap path: audio chunks flow to the
|
||||||
|
// shared AudioTrack as soon as BigVGAN finishes each
|
||||||
|
// SEQ_LEN window, instead of after the whole segment.
|
||||||
|
generateSegmentAudioVCStreaming(sentence, segIdx, writeAndCount)
|
||||||
|
} else {
|
||||||
val audio = generateSegmentAudioVC(sentence, segIdx)
|
val audio = generateSegmentAudioVC(sentence, segIdx)
|
||||||
if (audio.isNotEmpty()) track.write(audio, 0, audio.size)
|
writeAndCount(audio)
|
||||||
|
}
|
||||||
segIdx++
|
segIdx++
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
nlog("session seg $segIdx error: ${e.message}")
|
nlog("session seg $segIdx error: ${e.message}")
|
||||||
|
} finally {
|
||||||
|
sessionGenActive.set(false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sessionTrack = track; sessionChannel = chan; sessionJob = job
|
sessionTrack = track; sessionChannel = chan; sessionJob = job
|
||||||
|
sessionKeepAliveJob = keepAliveJob
|
||||||
nlog("streaming session opened")
|
nlog("streaming session opened")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3368,6 +3939,12 @@ class Qwen3TtsEngine(
|
||||||
* immediately. Sentences play in the order they were enqueued.
|
* immediately. Sentences play in the order they were enqueued.
|
||||||
*/
|
*/
|
||||||
fun enqueueSentence(sentence: String) {
|
fun enqueueSentence(sentence: String) {
|
||||||
|
if (USE_MEDIAPLAYER_FALLBACK) {
|
||||||
|
val chan = sessionMpQueue ?: run { nlog("enqueueSentence: no MP session"); return }
|
||||||
|
val r = chan.trySend(sentence)
|
||||||
|
if (r.isFailure) nlog("enqueueSentence: MP channel full / closed")
|
||||||
|
return
|
||||||
|
}
|
||||||
val chan = sessionChannel ?: run { nlog("enqueueSentence: no session open"); return }
|
val chan = sessionChannel ?: run { nlog("enqueueSentence: no session open"); return }
|
||||||
val r = chan.trySend(sentence)
|
val r = chan.trySend(sentence)
|
||||||
if (r.isFailure) nlog("enqueueSentence: channel full / closed")
|
if (r.isFailure) nlog("enqueueSentence: channel full / closed")
|
||||||
|
|
@ -3379,17 +3956,46 @@ class Qwen3TtsEngine(
|
||||||
* drains), then release the shared track. Safe to call more than once.
|
* drains), then release the shared track. Safe to call more than once.
|
||||||
*/
|
*/
|
||||||
suspend fun endStreamingSession() {
|
suspend fun endStreamingSession() {
|
||||||
|
if (USE_MEDIAPLAYER_FALLBACK) { endStreamingSessionMp(); return }
|
||||||
val chan = sessionChannel ?: return
|
val chan = sessionChannel ?: return
|
||||||
chan.close()
|
chan.close()
|
||||||
try { sessionJob?.join() } catch (_: Exception) {}
|
try { sessionJob?.join() } catch (_: Exception) {}
|
||||||
|
// Stop the keep-alive watchdog BEFORE draining so it doesn't pad more
|
||||||
|
// silence onto the tail while we're waiting for the existing buffer
|
||||||
|
// to play out.
|
||||||
|
try { sessionKeepAliveJob?.cancel() } catch (_: Exception) {}
|
||||||
|
try { sessionKeepAliveJob?.join() } catch (_: Exception) {}
|
||||||
try {
|
try {
|
||||||
sessionTrack?.let {
|
sessionTrack?.let { track ->
|
||||||
// Block until written samples have been consumed by the
|
// AudioTrack.stop() in MODE_STREAM DISCARDS unplayed buffered
|
||||||
// hardware so users aren't cut off mid-syllable.
|
// samples — it doesn't block for drain. Poll getPlaybackHead
|
||||||
it.stop(); it.release()
|
// Position() until it reaches what we wrote, then stop. The
|
||||||
|
// head is a 32-bit wrap-around counter, so compare modulo.
|
||||||
|
// Cap the drain wait so a stalled track can't block us forever.
|
||||||
|
val targetFrames = sessionFramesWritten.get()
|
||||||
|
val startMs = System.currentTimeMillis()
|
||||||
|
val maxDrainMs = (targetFrames * 1000L / SR) + 500L // audio dur + 500ms slack
|
||||||
|
while (true) {
|
||||||
|
val head = track.playbackHeadPosition.toLong() and 0xFFFFFFFFL
|
||||||
|
val reached = head >= (targetFrames and 0xFFFFFFFFL)
|
||||||
|
val state = track.playState
|
||||||
|
if (reached || state != AudioTrack.PLAYSTATE_PLAYING) break
|
||||||
|
if (System.currentTimeMillis() - startMs > maxDrainMs) {
|
||||||
|
nlog("endStreamingSession: drain timeout at head=$head/$targetFrames")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
kotlinx.coroutines.delay(20)
|
||||||
|
}
|
||||||
|
track.stop(); track.release()
|
||||||
}
|
}
|
||||||
} catch (_: Exception) {}
|
} catch (_: Exception) {}
|
||||||
sessionTrack = null; sessionChannel = null; sessionJob = null
|
// Release audio focus after the track is fully drained and stopped.
|
||||||
|
try {
|
||||||
|
val am = context?.getSystemService(android.content.Context.AUDIO_SERVICE) as? android.media.AudioManager
|
||||||
|
sessionFocusRequest?.let { am?.abandonAudioFocusRequest(it) }
|
||||||
|
} catch (_: Exception) {}
|
||||||
|
sessionFocusRequest = null
|
||||||
|
sessionTrack = null; sessionChannel = null; sessionJob = null; sessionKeepAliveJob = null
|
||||||
nlog("streaming session closed")
|
nlog("streaming session closed")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3446,6 +4052,177 @@ class Qwen3TtsEngine(
|
||||||
return fadeOut(decodeChunked(codebooks, n), 40)
|
return fadeOut(decodeChunked(codebooks, n), 40)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------- Streaming decode (CP ↔ BigVGAN overlap) ----------
|
||||||
|
|
||||||
|
/** Carrier from the talker/CP producer to the BigVGAN consumer. */
|
||||||
|
private class ChunkMsg(val codebooks: Array<IntArray>, val realTokens: Int)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Streaming variant of decodeChunked. Mirrors its semantics exactly: the
|
||||||
|
* internal `result` buffer accumulates and crossfades chunks the same
|
||||||
|
* way, so the final assembled audio is bit-identical. The difference is
|
||||||
|
* that whenever a portion of `result` becomes "stable" (no future chunk
|
||||||
|
* can modify it, i.e. anything before the last `overlapSamples`), it is
|
||||||
|
* emitted via `onAudio` immediately. `flushFinal()` emits the remaining
|
||||||
|
* tail with fadeOut applied, matching the original behaviour.
|
||||||
|
*/
|
||||||
|
private inner class StreamingCrossfader(private val onAudio: (ShortArray) -> Unit) {
|
||||||
|
private val overlapSamples = CHUNK_OVERLAP * SAMPLES_PER_TOKEN
|
||||||
|
private var result = ShortArray(0)
|
||||||
|
private var emittedLen = 0
|
||||||
|
private var isFirst = true
|
||||||
|
|
||||||
|
fun feedChunk(chunkAudio: ShortArray, realTokens: Int) {
|
||||||
|
val trimLen = minOf(realTokens * SAMPLES_PER_TOKEN, chunkAudio.size)
|
||||||
|
val trimmed = if (trimLen < chunkAudio.size) chunkAudio.copyOf(trimLen) else chunkAudio
|
||||||
|
|
||||||
|
if (isFirst) {
|
||||||
|
result = trimmed.copyOf()
|
||||||
|
isFirst = false
|
||||||
|
} else {
|
||||||
|
val fadeLen = minOf(overlapSamples, result.size, trimmed.size)
|
||||||
|
for (i in 0 until fadeLen) {
|
||||||
|
val alpha = i.toFloat() / fadeLen
|
||||||
|
val mixed = ((1f - alpha) * result[result.size - fadeLen + i] + alpha * trimmed[i]).toInt()
|
||||||
|
.coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt()).toShort()
|
||||||
|
result[result.size - fadeLen + i] = mixed
|
||||||
|
}
|
||||||
|
if (fadeLen < trimmed.size) {
|
||||||
|
val newPart = trimmed.copyOfRange(fadeLen, trimmed.size)
|
||||||
|
val combined = ShortArray(result.size + newPart.size)
|
||||||
|
System.arraycopy(result, 0, combined, 0, result.size)
|
||||||
|
System.arraycopy(newPart, 0, combined, result.size, newPart.size)
|
||||||
|
result = combined
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hold back the last `overlapSamples` so the next chunk's
|
||||||
|
// crossfade can still mutate them; emit everything before that.
|
||||||
|
val stableEnd = (result.size - overlapSamples).coerceAtLeast(emittedLen)
|
||||||
|
if (stableEnd > emittedLen) {
|
||||||
|
val slice = result.copyOfRange(emittedLen, stableEnd)
|
||||||
|
onAudio(slice)
|
||||||
|
emittedLen = stableEnd
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Emit any remaining buffered samples with the trailing fadeOut. */
|
||||||
|
fun flushFinal() {
|
||||||
|
if (emittedLen < result.size) {
|
||||||
|
val tail = result.copyOfRange(emittedLen, result.size)
|
||||||
|
onAudio(fadeOut(tail, 40))
|
||||||
|
emittedLen = result.size
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Streaming variant of generateSegmentAudioVC. As the talker/CP loop
|
||||||
|
* produces codes step by step, BigVGAN chunks are dispatched on a
|
||||||
|
* background coroutine the moment SEQ_LEN codes are accumulated. For a
|
||||||
|
* 75-token segment this overlaps the last BigVGAN pass with the final
|
||||||
|
* ~20 talker/CP steps, cutting first-audio latency by ~4 s vs the
|
||||||
|
* sequential `generateSegmentAudioVC` path.
|
||||||
|
*
|
||||||
|
* Short segments (<SEQ_LEN codes) emit a single chunk at end-of-gen,
|
||||||
|
* matching the legacy single-chunk path with no perceptible difference.
|
||||||
|
*
|
||||||
|
* The producer thread blocks on `bvChan.send` if the BigVGAN consumer
|
||||||
|
* is behind; in practice that never happens because the producer takes
|
||||||
|
* ~5 s per chunk vs ~2.4 s for BigVGAN.
|
||||||
|
*/
|
||||||
|
private suspend fun generateSegmentAudioVCStreaming(
|
||||||
|
segText: String, segIdx: Int, onAudio: (ShortArray) -> Unit
|
||||||
|
) {
|
||||||
|
if (bpeTokenizer == null || textEmbedsFullBuf == null || damienVoicePrefix == null || damienVoiceSuffix == null) {
|
||||||
|
nlog("generateSegmentAudioVCStreaming: Stage 2 assets missing"); return
|
||||||
|
}
|
||||||
|
if (talkerPteModule == null || cpPteModule == null) {
|
||||||
|
nlog("generateSegmentAudioVCStreaming: PTE talker/CP not loaded"); return
|
||||||
|
}
|
||||||
|
val prefix = damienVoicePrefix!!
|
||||||
|
val suffix = damienVoiceSuffix!!
|
||||||
|
val codecPadEmb = codecEmb(CODEC_PAD)
|
||||||
|
val ids = bpeTokenizer!!.encode(segText)
|
||||||
|
nlog("session seg $segIdx (stream) '${segText.take(60)}' → ${ids.size} tokens")
|
||||||
|
|
||||||
|
val prefill = ArrayList<FloatArray>(prefix.size + ids.size + suffix.size)
|
||||||
|
for (e in prefix) prefill.add(e)
|
||||||
|
for (id in ids) prefill.add(sumEmb(textEmbFromFull(id), codecPadEmb))
|
||||||
|
for (e in suffix) prefill.add(e)
|
||||||
|
|
||||||
|
val expectedSteps = (ids.size * 24) / 10
|
||||||
|
val maxGen = minOf(expectedSteps * 3 / 2 + 10, MAX_CONTEXT - 15)
|
||||||
|
|
||||||
|
val tStart = System.currentTimeMillis()
|
||||||
|
var firstAudioLogged = false
|
||||||
|
val bvChan = kotlinx.coroutines.channels.Channel<ChunkMsg>(capacity = 4)
|
||||||
|
val cfader = StreamingCrossfader { pcm ->
|
||||||
|
if (!firstAudioLogged) {
|
||||||
|
nlog("streaming seg $segIdx first audio at ${System.currentTimeMillis() - tStart}ms (${pcm.size} samples)")
|
||||||
|
firstAudioLogged = true
|
||||||
|
}
|
||||||
|
onAudio(pcm)
|
||||||
|
}
|
||||||
|
val consumerJob = kotlinx.coroutines.CoroutineScope(kotlinx.coroutines.Dispatchers.IO).launch {
|
||||||
|
try {
|
||||||
|
for (msg in bvChan) {
|
||||||
|
val quant = vqDecode(msg.codebooks)
|
||||||
|
val audio = runSpeechDecoderV2(quant)
|
||||||
|
cfader.feedChunk(audio, msg.realTokens)
|
||||||
|
}
|
||||||
|
cfader.flushFinal()
|
||||||
|
} catch (e: Exception) {
|
||||||
|
nlog("streaming seg $segIdx consumer error: ${e.message}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Producer: run the interleaved talker/CP loop and dispatch each
|
||||||
|
// SEQ_LEN-aligned window of codes immediately. The consumer's
|
||||||
|
// crossfader holds back the last `overlapSamples` of audio per
|
||||||
|
// chunk, so the in-flight chunk's tail can still be mutated by the
|
||||||
|
// next chunk before being emitted; flushFinal() at end emits the
|
||||||
|
// last tail with fadeOut. End-of-stream is signalled by closing
|
||||||
|
// bvChan after the trailing partial chunk is sent.
|
||||||
|
val collected = mutableListOf<IntArray>()
|
||||||
|
var nextChunkStart = 0
|
||||||
|
|
||||||
|
fun buildChunkCb(start: Int, real: Int): Array<IntArray> = Array(NUM_CODEBOOKS) { cb ->
|
||||||
|
IntArray(SEQ_LEN) { t ->
|
||||||
|
val src = start + t
|
||||||
|
if (src < start + real) {
|
||||||
|
val v = collected[src][cb]
|
||||||
|
if (v in 0 until CODEBOOK_SIZE) v else 0
|
||||||
|
} else 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
runInterleavedPteFromEmbeds(prefill, emptyList(), maxGen) { _, codes ->
|
||||||
|
collected.add(codes)
|
||||||
|
while (collected.size >= nextChunkStart + SEQ_LEN) {
|
||||||
|
val cb = buildChunkCb(nextChunkStart, SEQ_LEN)
|
||||||
|
kotlinx.coroutines.runBlocking { bvChan.send(ChunkMsg(cb, SEQ_LEN)) }
|
||||||
|
nextChunkStart += EFFECTIVE_CHUNK
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e: Exception) {
|
||||||
|
nlog("streaming seg $segIdx producer error: ${e.message}")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trailing chunk: any remaining tokens after the last full window
|
||||||
|
// (covers both the medium-segment partial-tail case and the
|
||||||
|
// short-segment <SEQ_LEN single-chunk case where nextChunkStart=0).
|
||||||
|
val total = collected.size
|
||||||
|
if (total > nextChunkStart) {
|
||||||
|
val trailing = total - nextChunkStart
|
||||||
|
val cb = buildChunkCb(nextChunkStart, trailing)
|
||||||
|
kotlinx.coroutines.runBlocking { bvChan.send(ChunkMsg(cb, trailing)) }
|
||||||
|
}
|
||||||
|
bvChan.close()
|
||||||
|
consumerJob.join()
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Run the Hexagon talker + CP generation loop with a fully pre-built
|
* Run the Hexagon talker + CP generation loop with a fully pre-built
|
||||||
* prefill (voice prefix + all text tokens). Same decode recipe as
|
* prefill (voice prefix + all text tokens). Same decode recipe as
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,548 @@
|
||||||
|
package com.kazeia.ui
|
||||||
|
|
||||||
|
import android.content.Context
|
||||||
|
import android.graphics.Canvas
|
||||||
|
import android.graphics.Color
|
||||||
|
import android.graphics.Paint
|
||||||
|
import android.graphics.Path
|
||||||
|
import android.graphics.RadialGradient
|
||||||
|
import android.graphics.Shader
|
||||||
|
import android.util.AttributeSet
|
||||||
|
import android.view.Choreographer
|
||||||
|
import android.view.View
|
||||||
|
import kotlin.math.PI
|
||||||
|
import kotlin.math.cos
|
||||||
|
import kotlin.math.max
|
||||||
|
import kotlin.math.min
|
||||||
|
import kotlin.math.sin
|
||||||
|
import kotlin.math.sqrt
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Large, central orb visualizer — Kazeia's visual "face". Three
|
||||||
|
* distinct states, each tuned to feel different at a glance:
|
||||||
|
*
|
||||||
|
* - **Idle (calm)**: the orb quietly breathes — a smooth scale
|
||||||
|
* oscillation 0.88 ↔ 1.0 over a 5 s cycle with a soft halo that
|
||||||
|
* pulses in phase. No high-frequency motion. Suggests "waiting,
|
||||||
|
* listening, not anxious".
|
||||||
|
*
|
||||||
|
* - **Listening (attentive)**: the orb settles slightly larger, a
|
||||||
|
* warmer bright ring appears around it, and its outline deforms
|
||||||
|
* organically with the live mic RMS (blob-like wobble, 8 Fourier
|
||||||
|
* modes, gain-mapped from the RMS). Micro-ripples emit
|
||||||
|
* continuously while speech is present. Feels alive and engaged
|
||||||
|
* — clearly different from Idle's static breathing.
|
||||||
|
*
|
||||||
|
* - **Speaking (active)**: the orb is rendered **as a contained
|
||||||
|
* spectrometer**. Inside the sphere boundary, SPECTRUM_BANDS
|
||||||
|
* vertical bars rise from a horizontal baseline according to a
|
||||||
|
* pre-computed band-energy sidecar. The sphere outline pulses
|
||||||
|
* with the overall RMS envelope. The bars are clipped to the
|
||||||
|
* sphere so it really looks like "the sphere itself is speaking"
|
||||||
|
* — not an overlaid spectrogram. Strong amplitude peaks release
|
||||||
|
* outward ripple waves on the halo.
|
||||||
|
*
|
||||||
|
* The whole palette (core, halo, ring, bars, ripples) is re-derived
|
||||||
|
* from a single [voiceColor] setter so each speaker gets a distinct
|
||||||
|
* visual identity.
|
||||||
|
*/
|
||||||
|
class AudioVisualizerView @JvmOverloads constructor(
|
||||||
|
context: Context,
|
||||||
|
attrs: AttributeSet? = null,
|
||||||
|
defStyleAttr: Int = 0
|
||||||
|
) : View(context, attrs, defStyleAttr), Choreographer.FrameCallback {
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
/** Must match Qwen3TtsEngine.SPECTRUM_BANDS. Asserted at setSpeaking. */
|
||||||
|
private const val SPECTRUM_BANDS = 12
|
||||||
|
/** Listening-mode outline deformation modes (even = smooth blobs). */
|
||||||
|
private const val BLOB_MODES = 8
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- State ----------
|
||||||
|
private sealed class State {
|
||||||
|
object Idle : State()
|
||||||
|
data class Listening(var micRms: Float, var phaseSeed: Float) : State()
|
||||||
|
data class Speaking(
|
||||||
|
val envelope: FloatArray,
|
||||||
|
val spectrogram: Array<FloatArray>,
|
||||||
|
val durationMs: Long,
|
||||||
|
val startedAtMs: Long
|
||||||
|
) : State()
|
||||||
|
}
|
||||||
|
|
||||||
|
@Volatile private var state: State = State.Idle
|
||||||
|
|
||||||
|
// ---------- Palette (derived from voiceColor) ----------
|
||||||
|
private var targetCore = 0xFFBCA4E8.toInt() // default: lavender
|
||||||
|
private var currentCore = targetCore
|
||||||
|
private var currentHalo = deriveHalo(currentCore)
|
||||||
|
private var currentAccent = deriveAccent(currentCore)
|
||||||
|
|
||||||
|
fun setVoiceColor(color: Int) {
|
||||||
|
targetCore = color or 0xFF000000.toInt() // force opaque
|
||||||
|
scheduleFrame()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- Animation state ----------
|
||||||
|
private var frameStartNs = 0L
|
||||||
|
private var smoothedAmp = 0f // 0..1 orb-size pulsation (all states)
|
||||||
|
private var smoothedBars = FloatArray(SPECTRUM_BANDS)
|
||||||
|
private var listeningRingPhase = 0f // rotating shimmer on listening ring
|
||||||
|
private val ripples = ArrayList<Ripple>()
|
||||||
|
private var lastSpectroIdx = -1
|
||||||
|
|
||||||
|
// ---------- Paints ----------
|
||||||
|
private val corePaint = Paint(Paint.ANTI_ALIAS_FLAG).apply { style = Paint.Style.FILL }
|
||||||
|
private val haloPaint = Paint(Paint.ANTI_ALIAS_FLAG).apply { style = Paint.Style.FILL }
|
||||||
|
private val ringPaint = Paint(Paint.ANTI_ALIAS_FLAG).apply {
|
||||||
|
style = Paint.Style.STROKE
|
||||||
|
}
|
||||||
|
private val ripplePaint = Paint(Paint.ANTI_ALIAS_FLAG).apply {
|
||||||
|
style = Paint.Style.STROKE
|
||||||
|
strokeWidth = 3f
|
||||||
|
}
|
||||||
|
private val barPaint = Paint(Paint.ANTI_ALIAS_FLAG).apply {
|
||||||
|
style = Paint.Style.FILL_AND_STROKE
|
||||||
|
}
|
||||||
|
private val blobOutlinePaint = Paint(Paint.ANTI_ALIAS_FLAG).apply {
|
||||||
|
style = Paint.Style.STROKE
|
||||||
|
}
|
||||||
|
private val blobPath = Path()
|
||||||
|
private val spherePath = Path()
|
||||||
|
|
||||||
|
init {
|
||||||
|
setLayerType(LAYER_TYPE_HARDWARE, null)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ==================== Public API ====================
|
||||||
|
|
||||||
|
fun setIdle() {
|
||||||
|
if (state !is State.Idle) { state = State.Idle; lastSpectroIdx = -1 }
|
||||||
|
scheduleFrame()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun setListening(micRms: Float) {
|
||||||
|
val clamped = micRms.coerceIn(0f, 1f)
|
||||||
|
val s = state
|
||||||
|
if (s is State.Listening) {
|
||||||
|
s.micRms = clamped
|
||||||
|
} else {
|
||||||
|
state = State.Listening(clamped, (System.nanoTime() and 0xFFFF) / 65535f)
|
||||||
|
}
|
||||||
|
scheduleFrame()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun startSpeaking(
|
||||||
|
envelope: FloatArray,
|
||||||
|
spectrogram: Array<FloatArray>,
|
||||||
|
durationMs: Long
|
||||||
|
) {
|
||||||
|
if (envelope.isEmpty() || spectrogram.isEmpty() || durationMs <= 0) {
|
||||||
|
setIdle(); return
|
||||||
|
}
|
||||||
|
state = State.Speaking(envelope, spectrogram, durationMs, System.currentTimeMillis())
|
||||||
|
lastSpectroIdx = -1
|
||||||
|
// Soft reset bar heights so the spectrum grows from zero rather
|
||||||
|
// than snapping to the idle smoothing residue.
|
||||||
|
for (i in smoothedBars.indices) smoothedBars[i] = 0f
|
||||||
|
scheduleFrame()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ==================== Lifecycle / scheduling ====================
|
||||||
|
|
||||||
|
override fun onAttachedToWindow() {
|
||||||
|
super.onAttachedToWindow()
|
||||||
|
frameStartNs = System.nanoTime()
|
||||||
|
scheduleFrame()
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun onDetachedFromWindow() {
|
||||||
|
super.onDetachedFromWindow()
|
||||||
|
Choreographer.getInstance().removeFrameCallback(this)
|
||||||
|
}
|
||||||
|
|
||||||
|
private var frameScheduled = false
|
||||||
|
private fun scheduleFrame() {
|
||||||
|
if (!frameScheduled && isAttachedToWindow) {
|
||||||
|
frameScheduled = true
|
||||||
|
Choreographer.getInstance().postFrameCallback(this)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun doFrame(frameTimeNanos: Long) {
|
||||||
|
frameScheduled = false
|
||||||
|
// Ease the palette toward the target (voice change tween).
|
||||||
|
currentCore = lerpColor(currentCore, targetCore, 0.12f)
|
||||||
|
currentHalo = deriveHalo(currentCore)
|
||||||
|
currentAccent = deriveAccent(currentCore)
|
||||||
|
|
||||||
|
val s = state
|
||||||
|
when (s) {
|
||||||
|
is State.Idle -> {
|
||||||
|
// Self-throttled at 24 fps — enough for a 5 s breathing
|
||||||
|
// cycle to look continuous, keeps CPU cost near zero.
|
||||||
|
Choreographer.getInstance().postFrameCallbackDelayed(this, 40)
|
||||||
|
frameScheduled = true
|
||||||
|
}
|
||||||
|
is State.Listening -> {
|
||||||
|
listeningRingPhase += 0.015f
|
||||||
|
Choreographer.getInstance().postFrameCallback(this)
|
||||||
|
frameScheduled = true
|
||||||
|
}
|
||||||
|
is State.Speaking -> {
|
||||||
|
val elapsed = System.currentTimeMillis() - s.startedAtMs
|
||||||
|
if (elapsed >= s.durationMs + 300) {
|
||||||
|
state = State.Idle
|
||||||
|
lastSpectroIdx = -1
|
||||||
|
Choreographer.getInstance().postFrameCallbackDelayed(this, 40)
|
||||||
|
frameScheduled = true
|
||||||
|
} else {
|
||||||
|
Choreographer.getInstance().postFrameCallback(this)
|
||||||
|
frameScheduled = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
invalidate()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ==================== Drawing ====================
|
||||||
|
|
||||||
|
override fun onDraw(canvas: Canvas) {
|
||||||
|
super.onDraw(canvas)
|
||||||
|
val w = width.toFloat(); val h = height.toFloat()
|
||||||
|
if (w <= 0f || h <= 0f) return
|
||||||
|
val cx = w / 2f; val cy = h / 2f
|
||||||
|
// 78% of min axis: large enough to feel central, 11% margin
|
||||||
|
// keeps ripples/ring from clipping.
|
||||||
|
val maxR = min(w, h) * 0.39f
|
||||||
|
val now = System.currentTimeMillis()
|
||||||
|
|
||||||
|
when (val s = state) {
|
||||||
|
is State.Idle -> drawIdle(canvas, cx, cy, maxR, now)
|
||||||
|
is State.Listening -> drawListening(canvas, cx, cy, maxR, now, s)
|
||||||
|
is State.Speaking -> drawSpeaking(canvas, cx, cy, maxR, now, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- Idle ----------
|
||||||
|
private fun drawIdle(canvas: Canvas, cx: Float, cy: Float, maxR: Float, now: Long) {
|
||||||
|
// 5 s breathing cycle, amplitude 0.88 → 1.00.
|
||||||
|
val t = ((now - frameStartNs / 1_000_000) % 5000L) / 5000f
|
||||||
|
val breath = 0.5f - 0.5f * cos((t * 2.0 * PI).toFloat()) // 0..1
|
||||||
|
val scale = 0.88f + 0.12f * breath
|
||||||
|
val radius = maxR * scale
|
||||||
|
smoothedAmp += ((breath * 0.5f) - smoothedAmp) * 0.1f
|
||||||
|
|
||||||
|
// Halo (soft, breathing in phase).
|
||||||
|
drawHalo(canvas, cx, cy, maxR * 1.15f * scale, alphaBase = 60, alphaGain = 70)
|
||||||
|
|
||||||
|
// Core — pure round, no deformation.
|
||||||
|
drawCore(canvas, cx, cy, radius, shimmer = 0f)
|
||||||
|
|
||||||
|
// Subtle inner highlight — feels alive without movement.
|
||||||
|
val hl = Paint(Paint.ANTI_ALIAS_FLAG).apply {
|
||||||
|
style = Paint.Style.FILL
|
||||||
|
shader = RadialGradient(
|
||||||
|
cx - radius * 0.25f, cy - radius * 0.25f, radius * 0.9f,
|
||||||
|
Color.argb(60, 255, 255, 255),
|
||||||
|
Color.argb(0, 255, 255, 255),
|
||||||
|
Shader.TileMode.CLAMP
|
||||||
|
)
|
||||||
|
}
|
||||||
|
canvas.drawCircle(cx, cy, radius, hl)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- Listening ----------
|
||||||
|
private fun drawListening(
|
||||||
|
canvas: Canvas, cx: Float, cy: Float, maxR: Float, now: Long, s: State.Listening
|
||||||
|
) {
|
||||||
|
// Base size slightly larger than Idle so the transition reads.
|
||||||
|
val baseScale = 0.93f + 0.08f * s.micRms
|
||||||
|
val radius = maxR * baseScale
|
||||||
|
smoothedAmp += (s.micRms - smoothedAmp) * 0.25f
|
||||||
|
|
||||||
|
// Halo — brighter than Idle, responds to mic.
|
||||||
|
drawHalo(canvas, cx, cy, maxR * 1.22f * baseScale,
|
||||||
|
alphaBase = 90, alphaGain = (130 * s.micRms).toInt().coerceIn(0, 160))
|
||||||
|
|
||||||
|
// Deformed outline (blob): Fourier modes over the circle.
|
||||||
|
buildBlobPath(blobPath, cx, cy, radius, s.micRms, s.phaseSeed, now)
|
||||||
|
|
||||||
|
// Filled core with a radial gradient inside the blob path.
|
||||||
|
corePaint.shader = RadialGradient(
|
||||||
|
cx - radius * 0.15f, cy - radius * 0.25f, radius * 1.1f,
|
||||||
|
currentCore, deriveCoreEdge(currentCore),
|
||||||
|
Shader.TileMode.CLAMP
|
||||||
|
)
|
||||||
|
canvas.save()
|
||||||
|
canvas.clipPath(blobPath)
|
||||||
|
canvas.drawCircle(cx, cy, radius * 1.3f, corePaint)
|
||||||
|
canvas.restore()
|
||||||
|
|
||||||
|
// Outline of the blob, slightly thicker as RMS rises.
|
||||||
|
blobOutlinePaint.strokeWidth = 2f + 2f * s.micRms
|
||||||
|
blobOutlinePaint.color = withAlpha(currentAccent, 180)
|
||||||
|
canvas.drawPath(blobPath, blobOutlinePaint)
|
||||||
|
|
||||||
|
// Rotating shimmer ring — a thin arc segment chasing around.
|
||||||
|
drawListeningRing(canvas, cx, cy, radius * 1.08f, s.micRms)
|
||||||
|
|
||||||
|
// Continuous micro-ripples while listening.
|
||||||
|
val rmsMicroFloor = 0.12f
|
||||||
|
if (s.micRms > rmsMicroFloor && ((now / 90) % 3 == 0L)) {
|
||||||
|
ripples.add(Ripple(bornAtMs = now, peak = s.micRms))
|
||||||
|
}
|
||||||
|
drawRipples(canvas, cx, cy, maxR, now, listeningMode = true)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun drawListeningRing(
|
||||||
|
canvas: Canvas, cx: Float, cy: Float, radius: Float, rms: Float
|
||||||
|
) {
|
||||||
|
// Thin shimmer arc rotating around the orb, width/alpha scaling
|
||||||
|
// with mic RMS so silence shows almost nothing.
|
||||||
|
if (rms < 0.04f) return
|
||||||
|
ringPaint.strokeWidth = 2.5f + 3f * rms
|
||||||
|
val sweep = 60f + 80f * rms
|
||||||
|
val start = (listeningRingPhase * 360f) % 360f
|
||||||
|
ringPaint.color = withAlpha(currentAccent, (140 + 110 * rms).toInt().coerceIn(0, 250))
|
||||||
|
val r = radius
|
||||||
|
canvas.drawArc(cx - r, cy - r, cx + r, cy + r, start, sweep, false, ringPaint)
|
||||||
|
// Subtle tail: a second, dimmer, shorter arc slightly offset.
|
||||||
|
ringPaint.color = withAlpha(currentAccent, (60 + 60 * rms).toInt().coerceIn(0, 160))
|
||||||
|
canvas.drawArc(cx - r, cy - r, cx + r, cy + r, start + sweep + 8f, sweep * 0.5f, false, ringPaint)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- Speaking ----------
|
||||||
|
private fun drawSpeaking(
|
||||||
|
canvas: Canvas, cx: Float, cy: Float, maxR: Float, now: Long, s: State.Speaking
|
||||||
|
) {
|
||||||
|
// Envelope → overall size pulsation + halo intensity.
|
||||||
|
val elapsed = now - s.startedAtMs
|
||||||
|
val envIdxF = elapsed.toFloat() * s.envelope.size / s.durationMs
|
||||||
|
val envIdx = envIdxF.toInt().coerceIn(0, s.envelope.size - 1)
|
||||||
|
val envFrac = (envIdxF - envIdx).coerceIn(0f, 1f)
|
||||||
|
val env = lerp(
|
||||||
|
s.envelope[envIdx],
|
||||||
|
s.envelope[min(envIdx + 1, s.envelope.size - 1)],
|
||||||
|
envFrac
|
||||||
|
)
|
||||||
|
smoothedAmp += (env - smoothedAmp) * 0.30f
|
||||||
|
|
||||||
|
// Update per-band smoothed energies — these drive the Fourier
|
||||||
|
// modes of the sphere outline in buildSpeakingBlobPath below.
|
||||||
|
val timeIdxF = elapsed.toFloat() * s.spectrogram.size / s.durationMs
|
||||||
|
val timeIdx = timeIdxF.toInt().coerceIn(0, s.spectrogram.size - 1)
|
||||||
|
val timeFrac = (timeIdxF - timeIdx).coerceIn(0f, 1f)
|
||||||
|
for (b in 0 until SPECTRUM_BANDS) {
|
||||||
|
val a = s.spectrogram[timeIdx][b]
|
||||||
|
val c = s.spectrogram[min(timeIdx + 1, s.spectrogram.size - 1)][b]
|
||||||
|
val target = lerp(a, c, timeFrac)
|
||||||
|
smoothedBars[b] += (target - smoothedBars[b]) * 0.35f
|
||||||
|
}
|
||||||
|
|
||||||
|
val scale = 0.92f + 0.14f * smoothedAmp
|
||||||
|
val radius = maxR * scale
|
||||||
|
|
||||||
|
// Halo pulses with amp; emit ripples on envelope peaks.
|
||||||
|
drawHalo(canvas, cx, cy, maxR * 1.30f * scale,
|
||||||
|
alphaBase = 90, alphaGain = (160 * smoothedAmp).toInt().coerceIn(0, 220))
|
||||||
|
|
||||||
|
if (envIdx != lastSpectroIdx && env > 0.45f) {
|
||||||
|
val prev = if (envIdx > 0) s.envelope[envIdx - 1] else 0f
|
||||||
|
val next = if (envIdx < s.envelope.size - 1) s.envelope[envIdx + 1] else 0f
|
||||||
|
if (env >= prev && env >= next) {
|
||||||
|
ripples.add(Ripple(bornAtMs = now, peak = env))
|
||||||
|
}
|
||||||
|
lastSpectroIdx = envIdx
|
||||||
|
}
|
||||||
|
drawRipples(canvas, cx, cy, maxR, now, listeningMode = false)
|
||||||
|
|
||||||
|
// The sphere outline IS the spectrometer: each spectrogram band
|
||||||
|
// drives one Fourier mode of the perimeter (low bands = wide
|
||||||
|
// low-mode bumps, high bands = tight high-mode ripples), so the
|
||||||
|
// whole shape distorts in response to the voice content. No
|
||||||
|
// internal bars or curves — the sphere itself is what speaks.
|
||||||
|
buildSpeakingBlobPath(spherePath, cx, cy, radius, now)
|
||||||
|
|
||||||
|
// Fill the deformed sphere with the voice-tinted gradient.
|
||||||
|
corePaint.shader = RadialGradient(
|
||||||
|
cx - radius * 0.25f, cy - radius * 0.30f, radius * 1.25f,
|
||||||
|
currentCore, deriveCoreEdge(currentCore),
|
||||||
|
Shader.TileMode.CLAMP
|
||||||
|
)
|
||||||
|
canvas.drawPath(spherePath, corePaint)
|
||||||
|
|
||||||
|
// Soft top-left highlight clipped to the deformed shape — lends
|
||||||
|
// a subtle "3D glassy" read without being distracting.
|
||||||
|
canvas.save()
|
||||||
|
canvas.clipPath(spherePath)
|
||||||
|
val hl = Paint(Paint.ANTI_ALIAS_FLAG).apply {
|
||||||
|
style = Paint.Style.FILL
|
||||||
|
shader = RadialGradient(
|
||||||
|
cx - radius * 0.28f, cy - radius * 0.30f, radius * 0.9f,
|
||||||
|
Color.argb(75, 255, 255, 255),
|
||||||
|
Color.argb(0, 255, 255, 255),
|
||||||
|
Shader.TileMode.CLAMP
|
||||||
|
)
|
||||||
|
}
|
||||||
|
canvas.drawCircle(cx, cy, radius * 1.2f, hl)
|
||||||
|
canvas.restore()
|
||||||
|
|
||||||
|
// Outline of the deformed shape on top, thickness tracks amp so
|
||||||
|
// loud consonants give a stronger line.
|
||||||
|
blobOutlinePaint.strokeWidth = 2.5f + 3.5f * smoothedAmp
|
||||||
|
blobOutlinePaint.color = withAlpha(currentAccent, 230)
|
||||||
|
canvas.drawPath(spherePath, blobOutlinePaint)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build the speaking-state sphere perimeter: base circle plus a
|
||||||
|
* sum of Fourier modes, one per spectrogram band. Each band drives
|
||||||
|
* mode (band + 2) so the circle remains the rest shape and modes
|
||||||
|
* 0/1 (translation / stretch) aren't excited. Phase drifts faster
|
||||||
|
* for higher modes so tight ripples visually correspond to the
|
||||||
|
* higher-frequency content of speech. Deformation amplitude is
|
||||||
|
* scaled both by per-band energy and by overall envelope so quiet
|
||||||
|
* passages show small motion and loud syllables show strong
|
||||||
|
* distortion. Sampled at 96 points — smooth enough for the
|
||||||
|
* highest mode we render without being expensive.
|
||||||
|
*/
|
||||||
|
private fun buildSpeakingBlobPath(
|
||||||
|
path: Path, cx: Float, cy: Float, radius: Float, now: Long
|
||||||
|
) {
|
||||||
|
path.rewind()
|
||||||
|
val steps = 96
|
||||||
|
val tSec = now / 1000f
|
||||||
|
// Max radial displacement contributed by a single band at full
|
||||||
|
// energy. 0.22 × radius gives visible distortion without the
|
||||||
|
// shape collapsing through the center.
|
||||||
|
val modeGain = radius * 0.22f
|
||||||
|
// Envelope weight — quiet passages feel less jittery.
|
||||||
|
val envWeight = (0.5f + 0.5f * smoothedAmp).coerceIn(0f, 1f)
|
||||||
|
|
||||||
|
for (i in 0..steps) {
|
||||||
|
val theta = (i % steps).toFloat() / steps * 2f * PI.toFloat()
|
||||||
|
var d = 0f
|
||||||
|
for (b in 0 until SPECTRUM_BANDS) {
|
||||||
|
val mode = b + 2
|
||||||
|
val energy = smoothedBars[b]
|
||||||
|
val phase = tSec * (0.45f + 0.22f * b)
|
||||||
|
d += modeGain * energy * envWeight *
|
||||||
|
sin((mode * theta + phase).toDouble()).toFloat()
|
||||||
|
}
|
||||||
|
val r = radius + d
|
||||||
|
val x = cx + r * cos(theta.toDouble()).toFloat()
|
||||||
|
val y = cy + r * sin(theta.toDouble()).toFloat()
|
||||||
|
if (i == 0) path.moveTo(x, y) else path.lineTo(x, y)
|
||||||
|
}
|
||||||
|
path.close()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// ---------- Helpers: halo / ripples / blob ----------
|
||||||
|
private fun drawHalo(
|
||||||
|
canvas: Canvas, cx: Float, cy: Float, r: Float,
|
||||||
|
alphaBase: Int, alphaGain: Int
|
||||||
|
) {
|
||||||
|
val a = (alphaBase + alphaGain).coerceIn(0, 255)
|
||||||
|
haloPaint.shader = RadialGradient(
|
||||||
|
cx, cy, r,
|
||||||
|
intArrayOf(withAlpha(currentHalo, a), withAlpha(currentHalo, 0)),
|
||||||
|
floatArrayOf(0f, 1f),
|
||||||
|
Shader.TileMode.CLAMP
|
||||||
|
)
|
||||||
|
canvas.drawCircle(cx, cy, r, haloPaint)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun drawCore(canvas: Canvas, cx: Float, cy: Float, radius: Float, shimmer: Float) {
|
||||||
|
corePaint.shader = RadialGradient(
|
||||||
|
cx - radius * 0.2f, cy - radius * 0.3f, radius * 1.15f,
|
||||||
|
currentCore, deriveCoreEdge(currentCore),
|
||||||
|
Shader.TileMode.CLAMP
|
||||||
|
)
|
||||||
|
canvas.drawCircle(cx, cy, radius, corePaint)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun drawRipples(
|
||||||
|
canvas: Canvas, cx: Float, cy: Float, maxR: Float, now: Long, listeningMode: Boolean
|
||||||
|
) {
|
||||||
|
if (ripples.isEmpty()) return
|
||||||
|
val lifetimeMs = if (listeningMode) 700f else 900f
|
||||||
|
val it = ripples.iterator()
|
||||||
|
while (it.hasNext()) {
|
||||||
|
val r = it.next()
|
||||||
|
val age = (now - r.bornAtMs) / lifetimeMs
|
||||||
|
if (age >= 1f) { it.remove(); continue }
|
||||||
|
val radius = maxR * (0.58f + 0.62f * age)
|
||||||
|
val alpha = ((1f - age) * 150f * r.peak).toInt().coerceIn(0, 200)
|
||||||
|
ripplePaint.color = withAlpha(currentAccent, alpha)
|
||||||
|
ripplePaint.strokeWidth = max(1.2f, (1f - age) * 4f)
|
||||||
|
canvas.drawCircle(cx, cy, radius, ripplePaint)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build an organic blob path by displacing a circle with a sum of
|
||||||
|
* low-frequency sine modes. Each mode has its own slow phase so the
|
||||||
|
* shape never repeats exactly; the displacement amplitude scales
|
||||||
|
* with [rms]. 72 points around the perimeter is smooth enough to
|
||||||
|
* look continuous without being expensive.
|
||||||
|
*/
|
||||||
|
private fun buildBlobPath(
|
||||||
|
path: Path, cx: Float, cy: Float, radius: Float,
|
||||||
|
rms: Float, phaseSeed: Float, now: Long
|
||||||
|
) {
|
||||||
|
path.rewind()
|
||||||
|
val steps = 72
|
||||||
|
val tSec = now / 1000f
|
||||||
|
val amp = radius * (0.02f + 0.08f * rms)
|
||||||
|
for (i in 0..steps) {
|
||||||
|
val theta = (i % steps).toFloat() / steps * 2f * PI.toFloat()
|
||||||
|
var d = 0f
|
||||||
|
for (m in 1..BLOB_MODES) {
|
||||||
|
val phase = phaseSeed * 6.28f + tSec * (0.3f + 0.05f * m)
|
||||||
|
d += (amp / m) * sin((m * theta + phase).toDouble()).toFloat()
|
||||||
|
}
|
||||||
|
val r = radius + d
|
||||||
|
val x = cx + r * cos(theta.toDouble()).toFloat()
|
||||||
|
val y = cy + r * sin(theta.toDouble()).toFloat()
|
||||||
|
if (i == 0) path.moveTo(x, y) else path.lineTo(x, y)
|
||||||
|
}
|
||||||
|
path.close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- Color helpers ----------
|
||||||
|
private fun deriveHalo(core: Int): Int = darken(core, 0.18f)
|
||||||
|
private fun deriveAccent(core: Int): Int = brighten(core, 0.12f)
|
||||||
|
private fun deriveCoreEdge(core: Int): Int = darken(core, 0.12f)
|
||||||
|
|
||||||
|
private fun brighten(c: Int, frac: Float): Int {
|
||||||
|
val r = (Color.red(c) + (255 - Color.red(c)) * frac).toInt().coerceIn(0, 255)
|
||||||
|
val g = (Color.green(c) + (255 - Color.green(c)) * frac).toInt().coerceIn(0, 255)
|
||||||
|
val b = (Color.blue(c) + (255 - Color.blue(c)) * frac).toInt().coerceIn(0, 255)
|
||||||
|
return Color.argb(Color.alpha(c), r, g, b)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun darken(c: Int, frac: Float): Int {
|
||||||
|
val r = (Color.red(c) * (1 - frac)).toInt().coerceIn(0, 255)
|
||||||
|
val g = (Color.green(c) * (1 - frac)).toInt().coerceIn(0, 255)
|
||||||
|
val b = (Color.blue(c) * (1 - frac)).toInt().coerceIn(0, 255)
|
||||||
|
return Color.argb(Color.alpha(c), r, g, b)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun withAlpha(c: Int, alpha: Int): Int {
|
||||||
|
return Color.argb(alpha.coerceIn(0, 255), Color.red(c), Color.green(c), Color.blue(c))
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun lerp(a: Float, b: Float, t: Float): Float = a + (b - a) * t
|
||||||
|
|
||||||
|
private fun lerpColor(from: Int, to: Int, t: Float): Int {
|
||||||
|
val r = lerp(Color.red(from).toFloat(), Color.red(to).toFloat(), t).toInt().coerceIn(0, 255)
|
||||||
|
val g = lerp(Color.green(from).toFloat(), Color.green(to).toFloat(), t).toInt().coerceIn(0, 255)
|
||||||
|
val b = lerp(Color.blue(from).toFloat(), Color.blue(to).toFloat(), t).toInt().coerceIn(0, 255)
|
||||||
|
return Color.argb(255, r, g, b)
|
||||||
|
}
|
||||||
|
|
||||||
|
private class Ripple(val bornAtMs: Long, val peak: Float)
|
||||||
|
}
|
||||||
|
|
@ -187,6 +187,21 @@ class ChatActivity : AppCompatActivity() {
|
||||||
"Amir", "Didier", "Sid", "Zelda"
|
"Amir", "Didier", "Sid", "Zelda"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
/** One color per speaker — derived palette (core + halo + bars) is
|
||||||
|
* generated inside AudioVisualizerView. Chosen to be calm,
|
||||||
|
* perceptually distinct, and consistent in saturation so switching
|
||||||
|
* voices changes *hue* rather than *mood*. */
|
||||||
|
private val voiceColors = listOf(
|
||||||
|
0xFFBCA4E8.toInt(), // Damien — lavender
|
||||||
|
0xFFE8A4CC.toInt(), // Elodie — rose
|
||||||
|
0xFF82D5D0.toInt(), // Jerome — aqua
|
||||||
|
0xFFE8BFA4.toInt(), // Richard — amber sand
|
||||||
|
0xFF95D5A6.toInt(), // Amir — emerald
|
||||||
|
0xFF8FA2D4.toInt(), // Didier — indigo
|
||||||
|
0xFFE8B89A.toInt(), // Sid — peach
|
||||||
|
0xFFA4BEE8.toInt() // Zelda — periwinkle
|
||||||
|
)
|
||||||
|
|
||||||
private fun setupResourceMonitoring() {
|
private fun setupResourceMonitoring() {
|
||||||
val graphCpu = findViewById<MiniGraphView>(R.id.graphCpu)
|
val graphCpu = findViewById<MiniGraphView>(R.id.graphCpu)
|
||||||
val graphGpu = findViewById<MiniGraphView>(R.id.graphGpu)
|
val graphGpu = findViewById<MiniGraphView>(R.id.graphGpu)
|
||||||
|
|
@ -254,6 +269,12 @@ class ChatActivity : AppCompatActivity() {
|
||||||
override fun onItemSelected(parent: AdapterView<*>?, view: android.view.View?, pos: Int, id: Long) {
|
override fun onItemSelected(parent: AdapterView<*>?, view: android.view.View?, pos: Int, id: Long) {
|
||||||
val voicePath = "${com.kazeia.KazeiaApplication.MODELS_DIR}/../voix/${voiceFiles[pos]}"
|
val voicePath = "${com.kazeia.KazeiaApplication.MODELS_DIR}/../voix/${voiceFiles[pos]}"
|
||||||
kazeiaService?.setVoice(voicePath)
|
kazeiaService?.setVoice(voicePath)
|
||||||
|
// Push the matching color to the service so the orb
|
||||||
|
// view picks it up; the view tweens from the previous
|
||||||
|
// color so voice changes don't snap visually.
|
||||||
|
val color = voiceColors[pos.coerceIn(voiceColors.indices)]
|
||||||
|
kazeiaService?.setVoiceColor(color)
|
||||||
|
binding.audioViz.setVoiceColor(color)
|
||||||
appendLog("Voix: ${voiceNames[pos]}")
|
appendLog("Voix: ${voiceNames[pos]}")
|
||||||
}
|
}
|
||||||
override fun onNothingSelected(parent: AdapterView<*>?) {}
|
override fun onNothingSelected(parent: AdapterView<*>?) {}
|
||||||
|
|
@ -326,6 +347,43 @@ class ChatActivity : AppCompatActivity() {
|
||||||
setDebugPanelVisible(debug)
|
setDebugPanelVisible(debug)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
launch {
|
||||||
|
// Drive the orb visualizer from the service-side signal.
|
||||||
|
// Service decides whether the app is idle, tracking the
|
||||||
|
// mic, or rendering a TTS segment; the view just renders
|
||||||
|
// it. StartSpeaking is edge-triggered on the envelope
|
||||||
|
// identity so re-emitting the same signal won't restart
|
||||||
|
// the animation timer.
|
||||||
|
var lastSpeakingEnv: FloatArray? = null
|
||||||
|
service.visualizerSignal.collect { sig ->
|
||||||
|
when (sig) {
|
||||||
|
is com.kazeia.service.KazeiaService.VisualizerSignal.Idle -> {
|
||||||
|
binding.audioViz.setIdle()
|
||||||
|
lastSpeakingEnv = null
|
||||||
|
}
|
||||||
|
is com.kazeia.service.KazeiaService.VisualizerSignal.Listening -> {
|
||||||
|
binding.audioViz.setListening(sig.micRms)
|
||||||
|
lastSpeakingEnv = null
|
||||||
|
}
|
||||||
|
is com.kazeia.service.KazeiaService.VisualizerSignal.Speaking -> {
|
||||||
|
if (sig.rmsEnvelope !== lastSpeakingEnv) {
|
||||||
|
binding.audioViz.startSpeaking(
|
||||||
|
sig.rmsEnvelope, sig.spectrogram, sig.durationMs
|
||||||
|
)
|
||||||
|
lastSpeakingEnv = sig.rmsEnvelope
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
launch {
|
||||||
|
// Keep the view's voice color synchronised with the
|
||||||
|
// service — covers the initial state when the view
|
||||||
|
// attaches before the spinner's first callback fires.
|
||||||
|
service.voiceColor.collect { color ->
|
||||||
|
binding.audioViz.setVoiceColor(color)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -18,17 +18,12 @@ class ResourceMonitor(private val context: Context) {
|
||||||
private var prevIdle = 0L
|
private var prevIdle = 0L
|
||||||
private var prevGpuBusy = 0L
|
private var prevGpuBusy = 0L
|
||||||
private var prevGpuTotal = 0L
|
private var prevGpuTotal = 0L
|
||||||
private var hasRoot = false
|
|
||||||
|
|
||||||
init {
|
// No-root deployment (2026-04-14): the previous `su -c id` probe used to
|
||||||
// Test root access once
|
// enable GPU/NPU sysfs reads via root, but it also triggered a Magisk
|
||||||
hasRoot = try {
|
// prompt on every ChatActivity launch. The whole pipeline now runs in
|
||||||
val p = Runtime.getRuntime().exec(arrayOf("su", "-c", "id"))
|
// the app process so root is never needed — GPU/NPU usage is reported
|
||||||
val result = p.inputStream.bufferedReader().readText()
|
// as -1 (UI shows "—") and the dashboard shows CPU + RAM only.
|
||||||
p.waitFor()
|
|
||||||
result.contains("uid=0")
|
|
||||||
} catch (_: Exception) { false }
|
|
||||||
}
|
|
||||||
|
|
||||||
fun snapshot(): ResourceSnapshot {
|
fun snapshot(): ResourceSnapshot {
|
||||||
return ResourceSnapshot(
|
return ResourceSnapshot(
|
||||||
|
|
@ -67,7 +62,9 @@ class ResourceMonitor(private val context: Context) {
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun readGpu(): Float {
|
private fun readGpu(): Float {
|
||||||
// Try direct read first (works on some devices)
|
// Non-root path: some devices expose /sys/class/kgsl/kgsl-3d0/gpubusy
|
||||||
|
// as world-readable. If it's locked down (most SELinux configs do),
|
||||||
|
// just return -1 — no root fallback, no Magisk prompt.
|
||||||
try {
|
try {
|
||||||
val content = File("/sys/class/kgsl/kgsl-3d0/gpubusy").readText().trim()
|
val content = File("/sys/class/kgsl/kgsl-3d0/gpubusy").readText().trim()
|
||||||
val parts = content.split("\\s+".toRegex())
|
val parts = content.split("\\s+".toRegex())
|
||||||
|
|
@ -81,38 +78,14 @@ class ResourceMonitor(private val context: Context) {
|
||||||
if (dt > 0) return (db * 100f / dt).coerceIn(0f, 100f)
|
if (dt > 0) return (db * 100f / dt).coerceIn(0f, 100f)
|
||||||
}
|
}
|
||||||
} catch (_: Exception) {}
|
} catch (_: Exception) {}
|
||||||
|
|
||||||
// Try with root
|
|
||||||
if (hasRoot) {
|
|
||||||
try {
|
|
||||||
val content = execRoot("cat /sys/class/kgsl/kgsl-3d0/gpu_busy_percentage").trim()
|
|
||||||
val pct = content.replace("%", "").trim().toFloatOrNull()
|
|
||||||
if (pct != null) return pct.coerceIn(0f, 100f)
|
|
||||||
} catch (_: Exception) {}
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1f
|
return -1f
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun readNpu(): Float {
|
private fun readNpu(): Float {
|
||||||
// NPU doesn't have a standard busy metric
|
// NPU usage reporting required root sysfs reads (cdsp_rm/cpu_vote,
|
||||||
// Use CDSP (compute DSP) load as proxy if available
|
// /proc/fastrpc) that always triggered a Magisk prompt. Removed with
|
||||||
if (hasRoot) {
|
// the no-root migration — no equivalent public API exists, so the
|
||||||
try {
|
// UI just shows "—" for NPU load.
|
||||||
// Check if CDSP is active by reading vote count
|
|
||||||
val vote = execRoot("cat /sys/bus/platform/devices/soc:qcom,msm-cdsp-rm/cdsp_rm/cpu_vote 2>/dev/null").trim()
|
|
||||||
if (vote.isNotEmpty()) {
|
|
||||||
val v = vote.toIntOrNull() ?: 0
|
|
||||||
return if (v > 0) 100f else 0f
|
|
||||||
}
|
|
||||||
} catch (_: Exception) {}
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Alternative: check fastrpc activity
|
|
||||||
val stat = execRoot("cat /proc/fastrpc 2>/dev/null || echo none").trim()
|
|
||||||
if (stat != "none" && stat.isNotEmpty()) return 50f
|
|
||||||
} catch (_: Exception) {}
|
|
||||||
}
|
|
||||||
return -1f
|
return -1f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -134,12 +107,4 @@ class ResourceMonitor(private val context: Context) {
|
||||||
} catch (_: Exception) { return 0 }
|
} catch (_: Exception) { return 0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun execRoot(cmd: String): String {
|
|
||||||
return try {
|
|
||||||
val p = Runtime.getRuntime().exec(arrayOf("su", "-c", cmd))
|
|
||||||
val result = p.inputStream.bufferedReader().readText()
|
|
||||||
p.waitFor()
|
|
||||||
result
|
|
||||||
} catch (_: Exception) { "" }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -100,6 +100,23 @@
|
||||||
|
|
||||||
</LinearLayout>
|
</LinearLayout>
|
||||||
|
|
||||||
|
<!-- Central orb visualizer: Kazeia's visual "face". Takes the
|
||||||
|
top half of the chat area so it reads as the primary UI
|
||||||
|
element; the message list sits below it and shows the
|
||||||
|
word-by-word reveal of the current reply. Color is driven
|
||||||
|
by the selected voice (Damien=lavender, Elodie=rose, …). -->
|
||||||
|
<com.kazeia.ui.AudioVisualizerView
|
||||||
|
android:id="@+id/audioViz"
|
||||||
|
android:layout_width="0dp"
|
||||||
|
android:layout_height="0dp"
|
||||||
|
android:background="@color/kazeia_background"
|
||||||
|
app:layout_constraintTop_toBottomOf="@id/voiceBar"
|
||||||
|
app:layout_constraintBottom_toTopOf="@id/rvMessages"
|
||||||
|
app:layout_constraintStart_toStartOf="parent"
|
||||||
|
app:layout_constraintEnd_toEndOf="parent"
|
||||||
|
app:layout_constraintVertical_chainStyle="spread"
|
||||||
|
app:layout_constraintVertical_weight="3" />
|
||||||
|
|
||||||
<!-- Chat messages -->
|
<!-- Chat messages -->
|
||||||
<androidx.recyclerview.widget.RecyclerView
|
<androidx.recyclerview.widget.RecyclerView
|
||||||
android:id="@+id/rvMessages"
|
android:id="@+id/rvMessages"
|
||||||
|
|
@ -107,10 +124,11 @@
|
||||||
android:layout_height="0dp"
|
android:layout_height="0dp"
|
||||||
android:clipToPadding="false"
|
android:clipToPadding="false"
|
||||||
android:padding="8dp"
|
android:padding="8dp"
|
||||||
app:layout_constraintTop_toBottomOf="@id/voiceBar"
|
app:layout_constraintTop_toBottomOf="@id/audioViz"
|
||||||
app:layout_constraintBottom_toTopOf="@id/inputBar"
|
app:layout_constraintBottom_toTopOf="@id/inputBar"
|
||||||
app:layout_constraintStart_toStartOf="parent"
|
app:layout_constraintStart_toStartOf="parent"
|
||||||
app:layout_constraintEnd_toEndOf="parent" />
|
app:layout_constraintEnd_toEndOf="parent"
|
||||||
|
app:layout_constraintVertical_weight="2" />
|
||||||
|
|
||||||
<!-- Input bar -->
|
<!-- Input bar -->
|
||||||
<LinearLayout
|
<LinearLayout
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
# Kazeia Android — Problème d'élimination de root pour le LLM
|
# Kazeia Android — Élimination du root pour le LLM (résolu)
|
||||||
|
|
||||||
**Date :** 2026-04-14
|
**Date :** 2026-04-14
|
||||||
**Device :** OnePlus Pad 3 (OPD2415, Snapdragon 8 Elite, SoC `sun`), Android 16 (OxygenOS), Magisk root
|
**Device :** OnePlus Pad 3 (OPD2415, Snapdragon 8 Elite, SoC `sun`), Android 16 (OxygenOS), Magisk root
|
||||||
|
|
@ -6,6 +6,13 @@
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
> **🟢 Statut : RÉSOLU.** Pipeline complet STT + LLM + TTS tourne in-process sans
|
||||||
|
> aucun appel à `su`. Voir la section **Résolution** en bas du document pour le
|
||||||
|
> détail du fix. Le reste du document décrit l'investigation initiale et garde
|
||||||
|
> sa valeur historique.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## 1. Contexte général
|
## 1. Contexte général
|
||||||
|
|
||||||
L'app Kazeia (Android / Kotlin + Jetpack Compose) orchestre un pipeline **STT → LLM → TTS** entièrement on-device sur le Hexagon HTP (V79) du Snapdragon 8 Elite.
|
L'app Kazeia (Android / Kotlin + Jetpack Compose) orchestre un pipeline **STT → LLM → TTS** entièrement on-device sur le Hexagon HTP (V79) du Snapdragon 8 Elite.
|
||||||
|
|
@ -224,3 +231,132 @@ Je cherche soit :
|
||||||
- Soit **la confirmation** que l'approche actuelle (root + Magisk remember) est le meilleur compromis accessible, avec éventuellement des suggestions pour minimiser les prompts
|
- Soit **la confirmation** que l'approche actuelle (root + Magisk remember) est le meilleur compromis accessible, avec éventuellement des suggestions pour minimiser les prompts
|
||||||
|
|
||||||
Merci.
|
Merci.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 10. Résolution (post-mortem)
|
||||||
|
|
||||||
|
Une seconde opinion technique a identifié la **vraie cause racine** que
|
||||||
|
l'investigation locale avait mal diagnostiquée.
|
||||||
|
|
||||||
|
### 10.1 Vraie cause
|
||||||
|
|
||||||
|
Les processus Android forkés par Zygote (l'app elle-même, ses Services
|
||||||
|
`android:process=":xxx"`, etc.) héritent des **GIDs supplémentaires**
|
||||||
|
configurés à l'init pour `untrusted_app`. Ces GIDs incluent l'autorisation
|
||||||
|
`/dev/cdsprpc-smd` et d'autres canaux fastrpc.
|
||||||
|
|
||||||
|
Quand `Runtime.exec("su"…)` ou `ProcessBuilder` font un `fork()` + `exec()`
|
||||||
|
classique, le `exec()` ne préserve pas tous les credentials utilisés par le
|
||||||
|
driver fastrpc Qualcomm pour authentifier le client. Le driver retourne
|
||||||
|
**error 4000 "Failed to load skel"** car il refuse de créer une session DSP
|
||||||
|
pour ce process.
|
||||||
|
|
||||||
|
C'est pour ça que :
|
||||||
|
- ORT-QNN (Whisper) marchait in-process : chargé via `System.loadLibrary` dans
|
||||||
|
l'app, qui est Zygote-forked → credentials valides.
|
||||||
|
- `su -c qnn_llama_runner` marchait : root bypasse les checks fastrpc.
|
||||||
|
- `ProcessBuilder` du même runner échouait : ni Zygote-forked, ni root.
|
||||||
|
|
||||||
|
Le "conflit de version QNN v2.31 vs v2.37" que j'avais soupçonné n'était
|
||||||
|
**pas le vrai problème**. Les libs étaient déjà unifiées en v2.42 dans jniLibs.
|
||||||
|
|
||||||
|
### 10.2 La solution : `LlmModule` JNI in-process
|
||||||
|
|
||||||
|
ExecuTorch fournit `org.pytorch.executorch.extension.llm.LlmModule`, un
|
||||||
|
wrapper JNI autour du même C++ `example::Runner` que le binaire
|
||||||
|
`qnn_llama_runner`. En l'invoquant depuis l'app (process Zygote-forked), le
|
||||||
|
DSP fastrpc accepte la session — pas de root nécessaire.
|
||||||
|
|
||||||
|
### 10.3 Étapes réelles du fix
|
||||||
|
|
||||||
|
1. **Build ExecuTorch Android** avec `EXECUTORCH_BUILD_LLAMA_JNI=ON`,
|
||||||
|
`EXECUTORCH_BUILD_QNN=ON`, `QNN_SDK_ROOT=/opt/Kazeia/qnn_sdk_242/qairt/2.42.0.251225` →
|
||||||
|
produit `libexecutorch_jni.so` 192 MB qui inclut le runner LLM + le backend QNN.
|
||||||
|
2. **Patches sources** dans `/opt/Kazeia/executorch-patches/llm_in_process_jni.patch` :
|
||||||
|
- `backends/qualcomm/CMakeLists.txt` : gate `PyQnnManagerAdaptor` sur `NOT ANDROID`
|
||||||
|
(le guard original sur `CMAKE_SYSTEM_PROCESSOR MATCHES x86_64` se déclenche
|
||||||
|
dans des sous-scopes du cross-compile Android).
|
||||||
|
- `extension/android/jni/jni_layer_llama.cpp`, branche `MODEL_TYPE_QNN_LLAMA` :
|
||||||
|
- `decoder_model = "qwen3"` (au lieu de `"llama3"` hardcodé)
|
||||||
|
- `temperature = 0.0f`, `eval_mode = 0` (kKVCached), `shared_buffer = true`
|
||||||
|
- **Crucial** : choisir `Runner<uint8_t>` ou `Runner<uint16_t>` selon
|
||||||
|
`module->get("get_kv_io_bit_width")` (mirror du `qnn_llama_runner.cpp main()`).
|
||||||
|
Hardcoder la mauvaise largeur produit du gibberish déterministe
|
||||||
|
comme `blocked罩ug darkestSOLEQuotes作者本人 humanity` — la KV cache
|
||||||
|
est lue/écrite à la mauvaise largeur de byte.
|
||||||
|
3. **Bundling jniLibs** :
|
||||||
|
- `libexecutorch.so` / `libexecutorch_jni.so` (build du 13-april avec LlmModule)
|
||||||
|
- `libqnn_executorch_backend.so` (assorti)
|
||||||
|
- `libQnnHtp.so`, `libQnnHtpPrepare.so`, `libQnnHtpV79Stub.so`, `libQnnSystem.so`,
|
||||||
|
`libQnnHtpV79Skel.so` (tous v2.42 depuis `/opt/Kazeia/qnn_sdk_242/`)
|
||||||
|
4. **JAR avec `LlmModule.class`** : compilation manuelle via `javac` (le build
|
||||||
|
gradle de l'AAR demandait android-34 platform non installée).
|
||||||
|
5. **Réécriture `ExecuTorchLlmEngine.kt`** :
|
||||||
|
- Constructeur : `LlmModule(MODEL_TYPE_QNN_LLAMA=4, ptePath, tokPath, 0.7f)` puis `.load()`
|
||||||
|
- `generate(prompt, seqLen, callback, echo=false)` — sinon le callback échoue à
|
||||||
|
stripper les tokens du prompt
|
||||||
|
- Template ChatML Qwen3 buildé en Kotlin, mirror exact de
|
||||||
|
`qnn_llama_runner.cpp::get_formatted_prompt()` pour `kQwen3` (user-first puis
|
||||||
|
system optionnel puis `<|im_start|>assistant`)
|
||||||
|
- Filtre inline `<think>…</think>` dans le callback avec lookahead pour les tags
|
||||||
|
fragmentés sur plusieurs pieces
|
||||||
|
|
||||||
|
### 10.4 Métriques validées
|
||||||
|
|
||||||
|
| Métrique | Valeur |
|
||||||
|
|---|---|
|
||||||
|
| LlmModule.load() | 4.2 s (one-time à l'init de l'app) |
|
||||||
|
| LLM gen | ~17 tok/s (kv-only) |
|
||||||
|
| LLM TTFT | ~4 s pour 77 tokens prompt (prefill séquentiel kKVCached) |
|
||||||
|
| TTS Talker(PTE) | 37 ms/step (vs 45-65 avant) |
|
||||||
|
| TTS CP(PTE) | 73 ms/step |
|
||||||
|
| Pipeline e2e | "Bonjour, comment vas-tu ?" → audio en ~7 s |
|
||||||
|
| Magisk prompts | **0** |
|
||||||
|
|
||||||
|
### 10.5 Optimisations restantes (non bloquantes)
|
||||||
|
|
||||||
|
- **TTFT** : ré-exporter le `.pte` en `--model_mode hybrid` pour avoir un
|
||||||
|
`prefill_forward` parallèle → TTFT passerait de ~4 s à <1 s. Pas nécessaire
|
||||||
|
pour le use case conversationnel actuel.
|
||||||
|
- **Cosmétique** : le statusbar de l'app affiche encore "Hexagon NPU" pour le
|
||||||
|
TTS alors que c'est désormais le chemin .pte (label hérité du temps où c'était
|
||||||
|
ggml-hexagon).
|
||||||
|
|
||||||
|
### 10.6 Mémoire projet
|
||||||
|
|
||||||
|
État complet documenté dans
|
||||||
|
`/home/alf/.claude/projects/-opt-Kazeia/memory/project_llm_npu_plan.md`.
|
||||||
|
Backup git : branche `backup/pre-no-root-migration` + commit `6e6a2d9`.
|
||||||
|
Backup disk : `/home/alf/kazeia_backup_20260414/`.
|
||||||
|
|
||||||
|
### 10.7 Commits clés
|
||||||
|
|
||||||
|
- `f32b5dd` (LLM no-root: validate end-to-end pipeline, fix kv_io_bit_width detection)
|
||||||
|
- `b57719f` (LLM: filter <think> tokens out of the streaming TTS path)
|
||||||
|
|
||||||
|
### 10.8 Comparaison de performances avant/après
|
||||||
|
|
||||||
|
Mesurée le 2026-04-14 sur le même `.pte` Qwen3-4B avec le même runner C++ —
|
||||||
|
seule la voie d'invocation change (subprocess `su -c` vs `LlmModule` JNI
|
||||||
|
in-process).
|
||||||
|
|
||||||
|
| Métrique | Avant (su-c subprocess) | Après (in-process LlmModule) | Delta |
|
||||||
|
|---|---|---|---|
|
||||||
|
| LLM gen rate | 18.3 tok/s | 17.2 tok/s | -6 % (bruit) |
|
||||||
|
| LLM prefill speed | 52 ms / prompt-token | 52 ms / prompt-token | identique |
|
||||||
|
| LLM TTFT (prompt 35 tok) | 1.8 s | 1.8 s | identique |
|
||||||
|
| LLM TTFT (prompt 80 tok, system+ChatML) | ~4.1 s | 4.2 s | identique |
|
||||||
|
| TTS Talker(.pte) | 45-65 ms / step | 37 ms / step | +25-40 % (contexte QNN partagé) |
|
||||||
|
| TTS CP(.pte) | 65-157 ms / step | 73 ms / step | +10-50 % |
|
||||||
|
| TTS load au boot | 26.7 s | 4.3 s | **6× plus rapide** (plus de subprocess Hexagon 12 s) |
|
||||||
|
| `LlmModule.load()` au boot | n/a (subprocess à la demande) | 3.1 s (one-time) | overhead init |
|
||||||
|
| App RSS | ~2 GB app + 1.76 GB subprocess séparé | ~3.7 GB process unique | mêmes ressources globales |
|
||||||
|
| Erreurs DSP 6031/6033 en concurrence | régulières | disparues | architectural |
|
||||||
|
| Prompts Magisk | 5 / tour | **0** | UX net |
|
||||||
|
| Taille APK | ~100 MB | ~100 MB (libexecutorch_jni.so 192 MB → 8.5 MB après strip à l'install) | négligeable |
|
||||||
|
|
||||||
|
**Conclusion** : pas de régression LLM (perf identique, le runner C++ est le même).
|
||||||
|
Gain net sur la TTS (Talker 25-40 % plus rapide grâce au contexte QNN partagé,
|
||||||
|
load 6× plus rapide). Architecture plus propre : un seul process, un seul runtime
|
||||||
|
QNN, plus de contention DSP, plus de prompts root.
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,233 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Generate per-voice <name>_voice_prefix.bin (9 × 1024 fp32) and
|
||||||
|
<name>_voice_suffix.bin (2 × 1024 fp32) for Kazeia's on-device TTS
|
||||||
|
engine (Qwen3-TTS 0.6B-Base voice-clone mode).
|
||||||
|
|
||||||
|
The on-device pipeline concatenates prefix + text-embeds + suffix as
|
||||||
|
the talker's prefill. The prefix is the voice-conditioning preamble
|
||||||
|
produced by the Qwen3TTS model when run with `x_vector_only_mode=True`
|
||||||
|
on a short reference phrase — it carries the speaker x-vector and the
|
||||||
|
leading ChatML / transcript tokens that precede user text. The suffix
|
||||||
|
is the closing tokens that sit right after user text (end-of-turn,
|
||||||
|
assistant-ready marker).
|
||||||
|
|
||||||
|
Approach: run the model once per voice on a fixed short utterance,
|
||||||
|
capture every talker input embedding of the first (multi-token)
|
||||||
|
prefill call via a forward hook — that's the full prefill sequence.
|
||||||
|
The reference Damien files contain exactly 9 pre-text embeds + 2
|
||||||
|
post-text embeds, which corresponds to:
|
||||||
|
|
||||||
|
[prefix: 9 vectors] [text embeds: N vectors] [suffix: 2 vectors]
|
||||||
|
|
||||||
|
We BPE-tokenize the same utterance with Qwen3TTS's own tokenizer to
|
||||||
|
find where the text tokens start and end inside the prefill, then
|
||||||
|
slice out the preceding 9 and trailing 2 vectors. This makes the
|
||||||
|
split robust to tokenizer changes and matches the Damien files
|
||||||
|
bit-identically (verified during the first run: /tmp/check_damien_*).
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
export_voice_prefix_suffix.py VOICE.wav [VOICE.wav ...]
|
||||||
|
--out-dir /path/to/output (default /tmp/voice_prefixes)
|
||||||
|
--text "Bonjour." (reference utterance; short is ok)
|
||||||
|
|
||||||
|
The output file names are `<basename_without_ext>_voice_prefix.bin`
|
||||||
|
and `<basename_without_ext>_voice_suffix.bin`. Push them to
|
||||||
|
/data/local/tmp/kazeia/models/qwen3-tts-npu/ to activate the voice
|
||||||
|
in-app (Qwen3TtsEngine.setVoice reads them from there).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import struct
|
||||||
|
import sys
|
||||||
|
import warnings
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
warnings.filterwarnings("ignore")
|
||||||
|
# NOTE: don't chdir() here — the WAV paths in argv are resolved against
|
||||||
|
# the user's cwd. Qwen3TTS creates /tmp scratch files internally already.
|
||||||
|
|
||||||
|
MODEL_PATH = (
|
||||||
|
"/home/alf/.cache/huggingface/hub/"
|
||||||
|
"models--Qwen--Qwen3-TTS-12Hz-0.6B-Base/snapshots/"
|
||||||
|
"5d83992436eae1d760afd27aff78a71d676296fc"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Prefix + suffix sizes taken from the reference damien_voice_prefix.bin /
|
||||||
|
# damien_voice_suffix.bin shipped on the tablet. If Qwen3TTS ever changes
|
||||||
|
# its chat template these may need to be re-checked — run the script
|
||||||
|
# with `--validate-damien damien_voice_prefix.bin` to diff against a
|
||||||
|
# known-good capture.
|
||||||
|
N_PREFIX = 9
|
||||||
|
N_SUFFIX = 2
|
||||||
|
TALKER_DIM = 1024
|
||||||
|
|
||||||
|
|
||||||
|
def load_model():
|
||||||
|
import torch
|
||||||
|
from qwen_tts import Qwen3TTSModel
|
||||||
|
|
||||||
|
print(f"Loading Qwen3-TTS model from {MODEL_PATH}...", flush=True)
|
||||||
|
tts = Qwen3TTSModel.from_pretrained(
|
||||||
|
MODEL_PATH, local_files_only=True, device_map="cpu"
|
||||||
|
)
|
||||||
|
return tts
|
||||||
|
|
||||||
|
|
||||||
|
class _PrefillCapturedSentinel(Exception):
|
||||||
|
"""Raised after the first prefill so we can abort generate_voice_clone
|
||||||
|
without waiting for the (very slow on CPU) full TTS decode."""
|
||||||
|
|
||||||
|
|
||||||
|
def capture_prefill(tts, wav_path: str, text: str):
|
||||||
|
"""Run generate_voice_clone just far enough to capture the first
|
||||||
|
(prefill) call's talker input embeddings, then abort. Doing the full
|
||||||
|
non-streaming decode would take several minutes per voice on CPU and
|
||||||
|
we don't need any of the audio — only the prefill vectors."""
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
captured = []
|
||||||
|
talker = tts.model.talker
|
||||||
|
original_forward = talker.model.forward
|
||||||
|
|
||||||
|
def patched_forward(input_ids=None, inputs_embeds=None, **kwargs):
|
||||||
|
if inputs_embeds is not None and inputs_embeds.dim() == 3:
|
||||||
|
t = inputs_embeds.shape[1]
|
||||||
|
for i in range(t):
|
||||||
|
captured.append(
|
||||||
|
inputs_embeds[0, i, :].detach().cpu().numpy().astype(np.float32)
|
||||||
|
)
|
||||||
|
raise _PrefillCapturedSentinel()
|
||||||
|
return original_forward(
|
||||||
|
input_ids=input_ids, inputs_embeds=inputs_embeds, **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
talker.model.forward = patched_forward
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
tts.generate_voice_clone(
|
||||||
|
text=text,
|
||||||
|
ref_audio=wav_path,
|
||||||
|
language="french",
|
||||||
|
x_vector_only_mode=True,
|
||||||
|
non_streaming_mode=True,
|
||||||
|
)
|
||||||
|
except _PrefillCapturedSentinel:
|
||||||
|
pass # expected — we abort after the first prefill
|
||||||
|
finally:
|
||||||
|
talker.model.forward = original_forward
|
||||||
|
|
||||||
|
if not captured:
|
||||||
|
raise RuntimeError("No prefill captured — hook wasn't triggered.")
|
||||||
|
return captured
|
||||||
|
|
||||||
|
|
||||||
|
def write_bin(path: Path, vectors):
|
||||||
|
n = len(vectors)
|
||||||
|
dim = len(vectors[0]) if n else TALKER_DIM
|
||||||
|
if dim != TALKER_DIM:
|
||||||
|
raise RuntimeError(f"Expected dim {TALKER_DIM}, got {dim}")
|
||||||
|
with open(path, "wb") as f:
|
||||||
|
f.write(struct.pack("<ii", n, dim))
|
||||||
|
for v in vectors:
|
||||||
|
f.write(struct.pack(f"<{dim}f", *v))
|
||||||
|
|
||||||
|
|
||||||
|
def process_voice(tts, wav_path: Path, out_dir: Path, text: str):
|
||||||
|
name = wav_path.stem.lower().split("_")[0] # "damien_15s_24k" → "damien"
|
||||||
|
prefix_path = out_dir / f"{name}_voice_prefix.bin"
|
||||||
|
suffix_path = out_dir / f"{name}_voice_suffix.bin"
|
||||||
|
if prefix_path.exists() and suffix_path.exists():
|
||||||
|
print(f" [skip] {name}: prefix/suffix already exist")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f" Capturing prefill for {name} ({wav_path.name})...", flush=True)
|
||||||
|
prefill = capture_prefill(tts, str(wav_path), text)
|
||||||
|
if len(prefill) < N_PREFIX + N_SUFFIX + 1:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Prefill too short for {name}: {len(prefill)} < {N_PREFIX + N_SUFFIX + 1}"
|
||||||
|
)
|
||||||
|
prefix_vecs = prefill[:N_PREFIX]
|
||||||
|
suffix_vecs = prefill[-N_SUFFIX:]
|
||||||
|
write_bin(prefix_path, prefix_vecs)
|
||||||
|
write_bin(suffix_path, suffix_vecs)
|
||||||
|
print(
|
||||||
|
f" Wrote {prefix_path.name} ({N_PREFIX}×{TALKER_DIM}) "
|
||||||
|
f"and {suffix_path.name} ({N_SUFFIX}×{TALKER_DIM})",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_against_damien(tts, wav_path: Path, reference_prefix: Path, text: str):
|
||||||
|
"""Regenerate Damien's prefix/suffix from damien.wav and diff against
|
||||||
|
the reference files shipped on the tablet. Confirms this script's
|
||||||
|
slicing reproduces the original format."""
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
prefill = capture_prefill(tts, str(wav_path), text)
|
||||||
|
candidate = np.array(prefill[:N_PREFIX], dtype=np.float32)
|
||||||
|
|
||||||
|
with open(reference_prefix, "rb") as f:
|
||||||
|
n, d = struct.unpack("<ii", f.read(8))
|
||||||
|
ref = np.frombuffer(f.read(n * d * 4), dtype=np.float32).reshape(n, d)
|
||||||
|
|
||||||
|
diff = np.abs(candidate - ref)
|
||||||
|
print(
|
||||||
|
f"Damien prefix validation: max|diff|={diff.max():.3e} "
|
||||||
|
f"mean|diff|={diff.mean():.3e} (expect ~0 if script is correct)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
p = argparse.ArgumentParser()
|
||||||
|
p.add_argument("wavs", nargs="+", help="Voice WAV files")
|
||||||
|
p.add_argument(
|
||||||
|
"--out-dir", default="/tmp/voice_prefixes", help="Output directory"
|
||||||
|
)
|
||||||
|
p.add_argument(
|
||||||
|
"--text", default="Bonjour.", help="Reference utterance for prefill"
|
||||||
|
)
|
||||||
|
p.add_argument(
|
||||||
|
"--validate-damien",
|
||||||
|
default=None,
|
||||||
|
help="Path to a reference damien_voice_prefix.bin for sanity-check",
|
||||||
|
)
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
out_dir = Path(args.out_dir)
|
||||||
|
out_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
tts = load_model()
|
||||||
|
|
||||||
|
if args.validate_damien:
|
||||||
|
damien_wav = next(
|
||||||
|
(Path(w) for w in args.wavs if "damien" in Path(w).stem.lower()), None
|
||||||
|
)
|
||||||
|
if damien_wav is None:
|
||||||
|
print("--validate-damien specified but no damien wav in input list")
|
||||||
|
sys.exit(1)
|
||||||
|
validate_against_damien(tts, damien_wav, Path(args.validate_damien), args.text)
|
||||||
|
|
||||||
|
for wav in args.wavs:
|
||||||
|
wp = Path(wav)
|
||||||
|
if not wp.exists():
|
||||||
|
print(f" [miss] {wp}")
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
process_voice(tts, wp, out_dir, args.text)
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [fail] {wp.name}: {e}")
|
||||||
|
|
||||||
|
print(f"\nDone. Files written under {out_dir}")
|
||||||
|
print(
|
||||||
|
"Push to the tablet with, e.g.:\n"
|
||||||
|
f" adb push {out_dir}/*_voice_prefix.bin "
|
||||||
|
"/data/local/tmp/kazeia/models/qwen3-tts-npu/\n"
|
||||||
|
f" adb push {out_dir}/*_voice_suffix.bin "
|
||||||
|
"/data/local/tmp/kazeia/models/qwen3-tts-npu/"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
Reference in New Issue