Compare commits
No commits in common. "main" and "backup/pre-no-root-migration" have entirely different histories.
main
...
backup/pre
|
|
@ -1,72 +0,0 @@
|
||||||
diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt
|
|
||||||
index e93731e..4951e1d 100644
|
|
||||||
--- a/backends/qualcomm/CMakeLists.txt
|
|
||||||
+++ b/backends/qualcomm/CMakeLists.txt
|
|
||||||
@@ -308,8 +308,8 @@ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES Hexagon)
|
|
||||||
)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
-# QNN pybind
|
|
||||||
-if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64")
|
|
||||||
+# QNN pybind — host Python bindings, not for Android cross-compile
|
|
||||||
+if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64" AND NOT ANDROID)
|
|
||||||
add_subdirectory(
|
|
||||||
${EXECUTORCH_SOURCE_DIR}/third-party/pybind11
|
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/pybind11
|
|
||||||
diff --git a/extension/android/jni/jni_layer_llama.cpp b/extension/android/jni/jni_layer_llama.cpp
|
|
||||||
index 45f2414..ae3d79f 100644
|
|
||||||
--- a/extension/android/jni/jni_layer_llama.cpp
|
|
||||||
+++ b/extension/android/jni/jni_layer_llama.cpp
|
|
||||||
@@ -171,14 +171,44 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass<ExecuTorchLlmJni> {
|
|
||||||
model_path->toStdString().c_str(),
|
|
||||||
data_files_vector,
|
|
||||||
executorch::extension::Module::LoadMode::MmapUseMlockIgnoreErrors);
|
|
||||||
- std::string decoder_model = "llama3"; // use llama3 for now
|
|
||||||
- runner_ = std::make_unique<example::Runner<uint16_t>>( // QNN runner
|
|
||||||
- std::move(module),
|
|
||||||
- decoder_model.c_str(),
|
|
||||||
- model_path->toStdString().c_str(),
|
|
||||||
- tokenizer_path->toStdString().c_str(),
|
|
||||||
- "",
|
|
||||||
- "");
|
|
||||||
+ std::string decoder_model = "qwen3"; // Kazeia: our .pte was exported with --decoder_model qwen3-4b
|
|
||||||
+
|
|
||||||
+ // Mirror qnn_llama_runner.cpp main(): pick the Runner<T> template based
|
|
||||||
+ // on the model's get_kv_io_bit_width metadata. The 16-bit KV I/O models
|
|
||||||
+ // were introduced after the 8-bit ones, and using the wrong T treats
|
|
||||||
+ // KV-cache bytes as the wrong width → garbage logits → gibberish output.
|
|
||||||
+ example::KvBitWidth kv_bitwidth = example::KvBitWidth::kWidth8;
|
|
||||||
+ if (module->method_names()->count("get_kv_io_bit_width") > 0) {
|
|
||||||
+ kv_bitwidth = static_cast<example::KvBitWidth>(
|
|
||||||
+ module->get("get_kv_io_bit_width").get().toScalar().to<int64_t>());
|
|
||||||
+ }
|
|
||||||
+ // Auto-detect eval_mode: kv-only (0) if the .pte only carries
|
|
||||||
+ // kv_forward, hybrid (1) if it also has prefill_forward (which lets the
|
|
||||||
+ // runner batch the prompt prefill — TTFT drops from ~52 ms/token to
|
|
||||||
+ // sub-ms after the one-shot prefill graph). Same JNI binary works with
|
|
||||||
+ // both export modes, no code change needed when the .pte is upgraded.
|
|
||||||
+ int eval_mode = 0;
|
|
||||||
+ if (module->method_names()->count("prefill_forward") > 0) {
|
|
||||||
+ eval_mode = 1; // EvalMode::kHybrid
|
|
||||||
+ }
|
|
||||||
+ auto make_runner = [&](auto sample) -> std::unique_ptr<llm::IRunner> {
|
|
||||||
+ using T = decltype(sample);
|
|
||||||
+ return std::make_unique<example::Runner<T>>(
|
|
||||||
+ std::move(module),
|
|
||||||
+ decoder_model.c_str(),
|
|
||||||
+ model_path->toStdString().c_str(),
|
|
||||||
+ tokenizer_path->toStdString().c_str(),
|
|
||||||
+ /* performance_output_path */ "",
|
|
||||||
+ /* dump_logits_path */ "",
|
|
||||||
+ /* temperature */ 0.0f, // greedy
|
|
||||||
+ eval_mode,
|
|
||||||
+ /* shared_buffer */ true);
|
|
||||||
+ };
|
|
||||||
+ if (kv_bitwidth == example::KvBitWidth::kWidth16) {
|
|
||||||
+ runner_ = make_runner(uint16_t{0});
|
|
||||||
+ } else {
|
|
||||||
+ runner_ = make_runner(uint8_t{0});
|
|
||||||
+ }
|
|
||||||
model_type_category_ = MODEL_TYPE_CATEGORY_LLM;
|
|
||||||
#endif
|
|
||||||
#if defined(EXECUTORCH_BUILD_MEDIATEK)
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
diff --git a/examples/qualcomm/oss_scripts/llama/__init__.py b/examples/qualcomm/oss_scripts/llama/__init__.py
|
diff --git a/examples/qualcomm/oss_scripts/llama/__init__.py b/examples/qualcomm/oss_scripts/llama/__init__.py
|
||||||
index 963db6e..9ccfdd0 100644
|
index 963db6e..953dc4c 100644
|
||||||
--- a/examples/qualcomm/oss_scripts/llama/__init__.py
|
--- a/examples/qualcomm/oss_scripts/llama/__init__.py
|
||||||
+++ b/examples/qualcomm/oss_scripts/llama/__init__.py
|
+++ b/examples/qualcomm/oss_scripts/llama/__init__.py
|
||||||
@@ -25,9 +25,14 @@ from executorch.examples.models.granite import (
|
@@ -25,9 +25,14 @@ from executorch.examples.models.granite import (
|
||||||
|
|
@ -20,7 +20,7 @@ index 963db6e..9ccfdd0 100644
|
||||||
from executorch.examples.models.qwen2_5 import (
|
from executorch.examples.models.qwen2_5 import (
|
||||||
convert_weights as convert_qwen2_5_weights,
|
convert_weights as convert_qwen2_5_weights,
|
||||||
)
|
)
|
||||||
@@ -479,6 +484,37 @@ class Qwen3_1_7B(LLMModelConfig):
|
@@ -479,6 +484,34 @@ class Qwen3_1_7B(LLMModelConfig):
|
||||||
quant_recipe = Qwen3_1_7BQuantRecipe
|
quant_recipe = Qwen3_1_7BQuantRecipe
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -40,13 +40,10 @@ index 963db6e..9ccfdd0 100644
|
||||||
+ convert_weights = convert_qwen3_weights
|
+ convert_weights = convert_qwen3_weights
|
||||||
+ transform_weight = False
|
+ transform_weight = False
|
||||||
+ instruct_model = True
|
+ instruct_model = True
|
||||||
+ # num_sharding=1 for hybrid mode: sharding=2 produces a multi-context
|
+ # Bumped to 2 to halve peak host RAM during QNN compile (4B at sharding=1
|
||||||
+ # .pte (2 graphs × 2 shards = 4 contexts) that the LlmModule load path
|
+ # OOMed on a 62 GB box, peak anon-rss 46 GB). At sharding=2 each shard
|
||||||
+ # can't restore (error 5010 "Context group 1 does not exist"). With
|
+ # compile fits comfortably; runner stitches them at load time.
|
||||||
+ # sharding=1 the hybrid export needs ~46 GB RAM peak — the 192 GB swap
|
+ num_sharding = 2
|
||||||
+ # on /swapfile handles this; compile takes ~80 min wall but completes
|
|
||||||
+ # cleanly. Single-context .pte loads fine through the JNI runner.
|
|
||||||
+ num_sharding = 1
|
|
||||||
+ masked_softmax = True
|
+ masked_softmax = True
|
||||||
+ seq_mse_candidates = 0
|
+ seq_mse_candidates = 0
|
||||||
+ r1 = False
|
+ r1 = False
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,6 @@
|
||||||
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
||||||
<uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
|
<uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
|
||||||
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MICROPHONE" />
|
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MICROPHONE" />
|
||||||
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MEDIA_PLAYBACK" />
|
|
||||||
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_SPECIAL_USE" />
|
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_SPECIAL_USE" />
|
||||||
<uses-permission android:name="android.permission.POST_NOTIFICATIONS" />
|
<uses-permission android:name="android.permission.POST_NOTIFICATIONS" />
|
||||||
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
|
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
|
||||||
|
|
@ -51,7 +50,7 @@
|
||||||
|
|
||||||
<service
|
<service
|
||||||
android:name=".service.KazeiaService"
|
android:name=".service.KazeiaService"
|
||||||
android:foregroundServiceType="microphone|mediaPlayback|specialUse"
|
android:foregroundServiceType="microphone|specialUse"
|
||||||
android:exported="true">
|
android:exported="true">
|
||||||
<property
|
<property
|
||||||
android:name="android.app.PROPERTY_SPECIAL_USE_FGS_SUBTYPE"
|
android:name="android.app.PROPERTY_SPECIAL_USE_FGS_SUBTYPE"
|
||||||
|
|
|
||||||
|
|
@ -1,49 +1,43 @@
|
||||||
package com.kazeia.llm
|
package com.kazeia.llm
|
||||||
|
|
||||||
import android.content.Context
|
|
||||||
import android.util.Log
|
import android.util.Log
|
||||||
import com.kazeia.core.*
|
import com.kazeia.core.*
|
||||||
import kotlinx.coroutines.Dispatchers
|
import kotlinx.coroutines.Dispatchers
|
||||||
import kotlinx.coroutines.withContext
|
import kotlinx.coroutines.withContext
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import org.pytorch.executorch.extension.llm.LlmCallback
|
|
||||||
import org.pytorch.executorch.extension.llm.LlmModule
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* LLM Engine using ExecuTorch LlmModule in-process — **no root required**.
|
* LLM Engine using ExecuTorch + QNN backend via subprocess.
|
||||||
*
|
* Calls qnn_llama_runner binary with root access (Magisk su).
|
||||||
* Runs Qwen3-4B via `org.pytorch.executorch.extension.llm.LlmModule`, which
|
|
||||||
* wraps the same C++ TextLlmRunner as the standalone qnn_llama_runner binary
|
|
||||||
* but inside the app's own process. The QNN HTP backend works because the
|
|
||||||
* DSP fastrpc service accepts the Zygote-forked app process (unlike
|
|
||||||
* ProcessBuilder-spawned subprocesses which lose supplementary GIDs on exec
|
|
||||||
* and get rejected by the fastrpc credential checks).
|
|
||||||
*
|
|
||||||
* Model + tokenizer live in /data/local/tmp/kazeia-et/ (readable by the app
|
|
||||||
* on this device's permissive SELinux policy). libexecutorch.so + QNN libs
|
|
||||||
* are bundled in jniLibs.
|
|
||||||
*
|
*
|
||||||
* Current tablet config: Qwen3-4B KV-mode, ~18-22 tok/s on Hexagon V79
|
* Current tablet config: Qwen3-4B KV-mode, ~18-22 tok/s on Hexagon V79
|
||||||
* (Snapdragon 8 Elite), TTFT 0.9 s, RSS 1.76 GB.
|
* (Snapdragon 8 Elite), TTFT 0.9 s, RSS 1.76 GB.
|
||||||
|
*
|
||||||
|
* Why root: the runner binary plus its QNN v2.42 .so deps live in
|
||||||
|
* /data/local/tmp/kazeia-et/ (shell_data_file SELinux context). Untrusted
|
||||||
|
* apps can't exec binaries from there. The Hexagon DSP fastrpc service also
|
||||||
|
* refuses to load the v2.42 Skel from the app's own files dir — only from
|
||||||
|
* nativeLibraryDir — but that dir already holds the TTS stack's v2.31 Skel
|
||||||
|
* (same filename, different version, can't coexist). Rebuilding everything
|
||||||
|
* against one QNN version would eliminate the conflict, but would require
|
||||||
|
* re-exporting the TTS .pte with the new runtime (tooling currently broken
|
||||||
|
* on the flatc schema/dataclass mismatch in the qnn_venv).
|
||||||
*/
|
*/
|
||||||
class ExecuTorchLlmEngine(
|
class ExecuTorchLlmEngine(
|
||||||
private val context: Context,
|
|
||||||
private val onLog: ((String) -> Unit)? = null
|
private val onLog: ((String) -> Unit)? = null
|
||||||
) : LlmEngine {
|
) : LlmEngine {
|
||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
private const val TAG = "ExecuTorchLLM"
|
private const val TAG = "ExecuTorchLLM"
|
||||||
// /no_think disables Qwen3's chain-of-thought block. Compact wording
|
private const val RUNNER_DIR = "/data/local/tmp/kazeia-et"
|
||||||
// keeps prefill cost low: this prompt is ~25 tokens vs ~55 in the
|
// /no_think disables Qwen3's chain-of-thought block so the full token
|
||||||
// earlier verbose version → saves ~1.5 s of TTFT in kv-only mode.
|
// budget goes to the actual answer (without it, 120-200 tokens get
|
||||||
private const val SYSTEM_PROMPT = "Tu es Kazeia, à l'écoute en français. Réponds en 1-2 phrases courtes, sans raisonnement. /no_think"
|
// consumed by <think>…</think> leaving nothing to speak).
|
||||||
|
// Short-response directive keeps TTS latency manageable — each sentence
|
||||||
private const val MODEL_DIR = "/data/local/tmp/kazeia-et"
|
// costs ~3-5 s on the .pte path, so 1-2 sentences is the sweet spot.
|
||||||
private const val MODEL_PATH = "$MODEL_DIR/hybrid_llama_qnn.pte"
|
private const val SYSTEM_PROMPT = "Tu es Kazeia, un compagnon bienveillant d'écoute émotionnelle. Réponds toujours en français, en 1 ou 2 phrases courtes (40 mots maximum). Pas de raisonnement, donne directement la réponse. /no_think"
|
||||||
private const val TOKENIZER_PATH = "$MODEL_DIR/tokenizer.json"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private var llmModule: LlmModule? = null
|
|
||||||
private var modelName = ""
|
private var modelName = ""
|
||||||
private var loaded = false
|
private var loaded = false
|
||||||
|
|
||||||
|
|
@ -54,152 +48,77 @@ class ExecuTorchLlmEngine(
|
||||||
|
|
||||||
override suspend fun load(modelPath: String, config: LlmConfig) {
|
override suspend fun load(modelPath: String, config: LlmConfig) {
|
||||||
withContext(Dispatchers.IO) {
|
withContext(Dispatchers.IO) {
|
||||||
if (!File(MODEL_PATH).exists()) {
|
val check = execRoot("ls $RUNNER_DIR/qnn_llama_runner $RUNNER_DIR/hybrid_llama_qnn.pte $RUNNER_DIR/tokenizer.json 2>&1")
|
||||||
nlog("ERROR: model not found at $MODEL_PATH")
|
if (check.contains("No such file")) {
|
||||||
return@withContext
|
nlog("ERROR: runner or model not found in $RUNNER_DIR")
|
||||||
}
|
|
||||||
if (!File(TOKENIZER_PATH).exists()) {
|
|
||||||
nlog("ERROR: tokenizer not found at $TOKENIZER_PATH")
|
|
||||||
return@withContext
|
return@withContext
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
deployRunnerScript()
|
||||||
val t0 = System.currentTimeMillis()
|
|
||||||
// MODEL_TYPE_QNN_LLAMA=4 selects the Qualcomm runner path in
|
|
||||||
// jni_layer_llama.cpp, which uses example::Runner (same code
|
|
||||||
// as the qnn_llama_runner binary) instead of the generic
|
|
||||||
// TextLLMRunner. Our .pte was exported with
|
|
||||||
// --decoder_model qwen3-4b which requires this path.
|
|
||||||
val MODEL_TYPE_QNN_LLAMA = 4
|
|
||||||
llmModule = LlmModule(MODEL_TYPE_QNN_LLAMA, MODEL_PATH, TOKENIZER_PATH, 0.7f)
|
|
||||||
nlog("LlmModule instantiated in ${System.currentTimeMillis() - t0}ms")
|
|
||||||
|
|
||||||
// Load the PTE into QNN HTP (calls the native load()).
|
writeFileRoot("$RUNNER_DIR/outputs/prompt.b64",
|
||||||
val loadResult = llmModule!!.load()
|
android.util.Base64.encodeToString("Bonjour".toByteArray(), android.util.Base64.NO_WRAP))
|
||||||
if (loadResult != 0) {
|
if (SYSTEM_PROMPT.isNotEmpty()) {
|
||||||
nlog("ERROR: LlmModule.load() returned $loadResult")
|
writeFileRoot("$RUNNER_DIR/outputs/system.b64",
|
||||||
llmModule = null
|
android.util.Base64.encodeToString(SYSTEM_PROMPT.toByteArray(), android.util.Base64.NO_WRAP))
|
||||||
return@withContext
|
} else {
|
||||||
}
|
execRoot("rm -f $RUNNER_DIR/outputs/system.b64")
|
||||||
nlog("LlmModule loaded in ${System.currentTimeMillis() - t0}ms total")
|
}
|
||||||
|
val test = execRoot("sh $RUNNER_DIR/run_llm.sh 0.0 80 2>&1")
|
||||||
|
|
||||||
|
if (test.contains("Generated Tokens") || test.contains("Rate:")) {
|
||||||
loaded = true
|
loaded = true
|
||||||
modelName = "Qwen3-4B LlmModule"
|
val rateMatch = Regex("Generated \\d+ tokens:.*Rate:\\s+([\\d.]+)").find(test)
|
||||||
|
val rate = rateMatch?.groupValues?.get(1) ?: "?"
|
||||||
|
modelName = "Qwen3 (${rate} tok/s NPU)"
|
||||||
nlog("Ready: $modelName")
|
nlog("Ready: $modelName")
|
||||||
} catch (e: Throwable) {
|
} else {
|
||||||
nlog("ERROR: LlmModule init failed: ${e.javaClass.simpleName}: ${e.message}")
|
nlog("ERROR: test failed: ${test.takeLast(200)}")
|
||||||
llmModule = null
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun isLoaded(): Boolean = loaded && llmModule != null
|
override fun isLoaded(): Boolean = loaded
|
||||||
|
|
||||||
override suspend fun generate(
|
override suspend fun generate(
|
||||||
prompt: String,
|
prompt: String,
|
||||||
params: SamplingParams,
|
params: SamplingParams,
|
||||||
onToken: ((String) -> Boolean)?
|
onToken: ((String) -> Boolean)?
|
||||||
): GenerationResult = withContext(Dispatchers.IO) {
|
): GenerationResult = withContext(Dispatchers.IO) {
|
||||||
val mod = llmModule ?: throw IllegalStateException("Model not loaded")
|
if (!loaded) throw IllegalStateException("Model not loaded")
|
||||||
|
|
||||||
val startTime = System.currentTimeMillis()
|
val startTime = System.currentTimeMillis()
|
||||||
val fullPrompt = buildChatTemplate(prompt)
|
|
||||||
|
writeFileRoot("$RUNNER_DIR/outputs/prompt.b64",
|
||||||
|
android.util.Base64.encodeToString(prompt.toByteArray(), android.util.Base64.NO_WRAP))
|
||||||
|
if (SYSTEM_PROMPT.isNotEmpty()) {
|
||||||
|
writeFileRoot("$RUNNER_DIR/outputs/system.b64",
|
||||||
|
android.util.Base64.encodeToString(SYSTEM_PROMPT.toByteArray(), android.util.Base64.NO_WRAP))
|
||||||
|
} else {
|
||||||
|
execRoot("rm -f $RUNNER_DIR/outputs/system.b64")
|
||||||
|
}
|
||||||
|
|
||||||
nlog("Prompt: '${prompt.take(80)}'")
|
nlog("Prompt: '${prompt.take(80)}'")
|
||||||
|
|
||||||
val responseBuilder = StringBuilder()
|
|
||||||
var firstTokenMs = -1L
|
|
||||||
// Track whether we're inside a <think>…</think> block so the upstream
|
|
||||||
// SentenceStreamer / TTS doesn't get fed reasoning tokens. Even with
|
|
||||||
// /no_think in the system prompt Qwen3 still emits empty <think></think>
|
|
||||||
// wrappers for ~3 tokens before the real answer.
|
|
||||||
var inThink = false
|
|
||||||
val tokenScan = StringBuilder() // small lookahead to spot tag boundaries
|
|
||||||
|
|
||||||
// Singleton special tokens that should never reach the TTS streamer
|
|
||||||
// (they leak when the model wraps its reply or signals end-of-turn).
|
|
||||||
val stripTokens = listOf("<|im_start|>", "<|im_end|>", "<|endoftext|>")
|
|
||||||
val maxTagLen = listOf("<think>", "</think>", "<|im_start|>", "<|im_end|>", "<|endoftext|>")
|
|
||||||
.maxOf { it.length }
|
|
||||||
|
|
||||||
val cb = object : LlmCallback {
|
|
||||||
override fun onResult(result: String) {
|
|
||||||
if (firstTokenMs < 0) firstTokenMs = System.currentTimeMillis() - startTime
|
|
||||||
responseBuilder.append(result)
|
|
||||||
|
|
||||||
// Forward to caller only outside <think> blocks, and strip
|
|
||||||
// singleton special tokens. We accumulate a tiny lookahead buffer
|
|
||||||
// so tag tokens that arrive split ("<thi", "nk>") still match.
|
|
||||||
tokenScan.append(result)
|
|
||||||
while (true) {
|
|
||||||
if (!inThink) {
|
|
||||||
val open = tokenScan.indexOf("<think>")
|
|
||||||
if (open < 0) {
|
|
||||||
// No <think> open pending — strip any singleton tokens
|
|
||||||
// that fully landed in the buffer, then flush prose
|
|
||||||
// up to a safe point preserving lookahead.
|
|
||||||
for (tok in stripTokens) {
|
|
||||||
var idx = tokenScan.indexOf(tok)
|
|
||||||
while (idx >= 0) {
|
|
||||||
tokenScan.delete(idx, idx + tok.length)
|
|
||||||
idx = tokenScan.indexOf(tok)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
val safe = tokenScan.length - maxTagLen
|
|
||||||
if (safe > 0) {
|
|
||||||
onToken?.invoke(tokenScan.substring(0, safe))
|
|
||||||
tokenScan.delete(0, safe)
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
// Flush the prose before the <think> tag, then enter think mode.
|
|
||||||
if (open > 0) onToken?.invoke(tokenScan.substring(0, open))
|
|
||||||
tokenScan.delete(0, open + "<think>".length)
|
|
||||||
inThink = true
|
|
||||||
} else {
|
|
||||||
val close = tokenScan.indexOf("</think>")
|
|
||||||
if (close < 0) {
|
|
||||||
// Drop all buffered chars except a small tail in case
|
|
||||||
// the closing tag is split across tokens.
|
|
||||||
val keep = "</think>".length - 1
|
|
||||||
if (tokenScan.length > keep) tokenScan.delete(0, tokenScan.length - keep)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
tokenScan.delete(0, close + "</think>".length)
|
|
||||||
inThink = false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
override fun onStats(stats: String) {
|
|
||||||
nlog("stats: ${stats.take(200)}")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
val seqLen = minOf(params.maxNewTokens, 512)
|
val seqLen = minOf(params.maxNewTokens, 512)
|
||||||
val rc = try {
|
val output = execRoot("sh $RUNNER_DIR/run_llm.sh ${params.temperature} $seqLen 2>&1")
|
||||||
// echo=false so onResult() only receives the generated completion,
|
|
||||||
// not the prompt tokens echoed back — otherwise the sentence
|
|
||||||
// streamer would feed '<|im_start|>user …' to the TTS.
|
|
||||||
mod.generate(fullPrompt, seqLen, cb, /* echo */ false)
|
|
||||||
} catch (e: Throwable) {
|
|
||||||
nlog("generate() threw: ${e.message}")
|
|
||||||
-1
|
|
||||||
}
|
|
||||||
|
|
||||||
// Drain any leftover prose buffered during <think>-suppression so the
|
val tokenCount = Regex("Generated Tokens:\\s+(\\d+)").find(output)
|
||||||
// last sentence reaches the TTS even if it ran past the closing tag.
|
?.groupValues?.get(1)?.toIntOrNull() ?: 0
|
||||||
if (!inThink && tokenScan.isNotEmpty()) {
|
val rate = Regex("Generated \\d+ tokens:.*Rate:\\s+([\\d.]+)").find(output)
|
||||||
onToken?.invoke(tokenScan.toString())
|
?.groupValues?.get(1)?.toFloatOrNull() ?: 0f
|
||||||
tokenScan.clear()
|
val ttft = Regex("Time to first generated token:\\s+([\\d.]+)").find(output)
|
||||||
}
|
?.groupValues?.get(1)?.toFloatOrNull() ?: 0f
|
||||||
|
|
||||||
|
val responseRaw = execRoot("cat $RUNNER_DIR/outputs/response.txt 2>/dev/null")
|
||||||
|
nlog("RAW: ${responseRaw.take(300)}")
|
||||||
|
val responseText = extractResponse(responseRaw)
|
||||||
|
|
||||||
val elapsed = System.currentTimeMillis() - startTime
|
val elapsed = System.currentTimeMillis() - startTime
|
||||||
val rawText = responseBuilder.toString()
|
nlog("Response: '$responseText'")
|
||||||
val responseText = cleanResponse(rawText)
|
nlog("Stats: ${tokenCount}tok ${rate}tok/s TTFT=${ttft}s ${elapsed}ms")
|
||||||
val tokenCount = rawText.length / 4 // rough estimate without a tokenizer
|
|
||||||
val rate = if (elapsed > 0) (tokenCount * 1000f) / elapsed else 0f
|
|
||||||
|
|
||||||
nlog("Response: '${responseText.take(80)}'")
|
onToken?.invoke(responseText)
|
||||||
nlog("Stats: rc=$rc ~${tokenCount}tok ~${"%.1f".format(rate)}tok/s TTFT=${firstTokenMs}ms total=${elapsed}ms")
|
|
||||||
|
|
||||||
GenerationResult(
|
GenerationResult(
|
||||||
text = responseText,
|
text = responseText,
|
||||||
|
|
@ -209,32 +128,20 @@ class ExecuTorchLlmEngine(
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
private fun extractResponse(raw: String): String {
|
||||||
* Qwen3 chat template matching qnn_llama_runner.cpp's get_formatted_prompt()
|
|
||||||
* for DecoderModelVersion::kQwen3. Note the user-first-then-system ordering
|
|
||||||
* (quirky but required — the runner binary produces the same layout and our
|
|
||||||
* .pte was trained with it). Terminates with `<|im_start|>assistant` with
|
|
||||||
* no trailing newline, matching the binary exactly.
|
|
||||||
*/
|
|
||||||
private fun buildChatTemplate(userInput: String): String {
|
|
||||||
val sb = StringBuilder()
|
|
||||||
sb.append("<|im_start|>user\n").append(userInput).append("<|im_end|>\n")
|
|
||||||
if (SYSTEM_PROMPT.isNotEmpty()) {
|
|
||||||
sb.append("<|im_start|>system\n").append(SYSTEM_PROMPT).append("<|im_end|>\n")
|
|
||||||
}
|
|
||||||
sb.append("<|im_start|>assistant")
|
|
||||||
return sb.toString()
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Strip <think>…</think>, special tokens, and leading/trailing whitespace. */
|
|
||||||
private fun cleanResponse(raw: String): String {
|
|
||||||
var text = raw
|
var text = raw
|
||||||
val thinkEnd = text.indexOf("</think>")
|
val thinkEnd = text.indexOf("</think>")
|
||||||
if (thinkEnd >= 0) {
|
if (thinkEnd >= 0) {
|
||||||
text = text.substring(thinkEnd + "</think>".length)
|
text = text.substring(thinkEnd + "</think>".length)
|
||||||
} else if (text.indexOf("<think>") >= 0) {
|
} else {
|
||||||
nlog("WARN: <think> block never closed")
|
val thinkStart = text.indexOf("<think>")
|
||||||
return ""
|
val assistantTag = text.indexOf("assistant")
|
||||||
|
if (thinkStart >= 0) {
|
||||||
|
nlog("WARN: <think> block never closed, no response generated")
|
||||||
|
return ""
|
||||||
|
} else if (assistantTag >= 0) {
|
||||||
|
text = text.substring(assistantTag + "assistant".length)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return text
|
return text
|
||||||
.replace("<|im_start|>", "")
|
.replace("<|im_start|>", "")
|
||||||
|
|
@ -245,9 +152,82 @@ class ExecuTorchLlmEngine(
|
||||||
.trim()
|
.trim()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun deployRunnerScript() {
|
||||||
|
val script = """
|
||||||
|
#!/bin/sh
|
||||||
|
cd $RUNNER_DIR
|
||||||
|
export LD_LIBRARY_PATH=$RUNNER_DIR
|
||||||
|
export ADSP_LIBRARY_PATH=$RUNNER_DIR
|
||||||
|
|
||||||
|
TEMP=${'$'}1
|
||||||
|
SEQ_LEN=${'$'}2
|
||||||
|
|
||||||
|
PROMPT=${'$'}(base64 -d $RUNNER_DIR/outputs/prompt.b64)
|
||||||
|
|
||||||
|
rm -f $RUNNER_DIR/outputs/response.txt
|
||||||
|
|
||||||
|
SYSTEM_ARGS=""
|
||||||
|
if [ -s $RUNNER_DIR/outputs/system.b64 ]; then
|
||||||
|
SYSTEM=${'$'}(base64 -d $RUNNER_DIR/outputs/system.b64)
|
||||||
|
SYSTEM_ARGS="--system_prompt"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "${'$'}SYSTEM_ARGS" ]; then
|
||||||
|
exec ./qnn_llama_runner \
|
||||||
|
--model_path hybrid_llama_qnn.pte \
|
||||||
|
--tokenizer_path tokenizer.json \
|
||||||
|
--decoder_model_version qwen3 \
|
||||||
|
--output_path $RUNNER_DIR/outputs/response.txt \
|
||||||
|
--performance_output_path $RUNNER_DIR/outputs/perf.txt \
|
||||||
|
--shared_buffer \
|
||||||
|
--system_prompt "${'$'}SYSTEM" \
|
||||||
|
--prompt "${'$'}PROMPT" \
|
||||||
|
--temperature ${'$'}TEMP \
|
||||||
|
--seq_len ${'$'}SEQ_LEN \
|
||||||
|
--eval_mode 0
|
||||||
|
else
|
||||||
|
exec ./qnn_llama_runner \
|
||||||
|
--model_path hybrid_llama_qnn.pte \
|
||||||
|
--tokenizer_path tokenizer.json \
|
||||||
|
--decoder_model_version qwen3 \
|
||||||
|
--output_path $RUNNER_DIR/outputs/response.txt \
|
||||||
|
--performance_output_path $RUNNER_DIR/outputs/perf.txt \
|
||||||
|
--shared_buffer \
|
||||||
|
--prompt "${'$'}PROMPT" \
|
||||||
|
--temperature ${'$'}TEMP \
|
||||||
|
--seq_len ${'$'}SEQ_LEN \
|
||||||
|
--eval_mode 0
|
||||||
|
fi
|
||||||
|
""".trimIndent()
|
||||||
|
|
||||||
|
writeFileRoot("$RUNNER_DIR/run_llm.sh", script)
|
||||||
|
execRoot("chmod 755 $RUNNER_DIR/run_llm.sh")
|
||||||
|
}
|
||||||
|
|
||||||
override fun release() {
|
override fun release() {
|
||||||
try { llmModule?.resetNative() } catch (_: Throwable) {}
|
|
||||||
llmModule = null
|
|
||||||
loaded = false
|
loaded = false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun writeFileRoot(path: String, content: String) {
|
||||||
|
try {
|
||||||
|
val process = Runtime.getRuntime().exec(arrayOf("su", "-c", "cat > $path"))
|
||||||
|
process.outputStream.bufferedWriter().use { it.write(content) }
|
||||||
|
process.waitFor()
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "writeFileRoot failed: ${e.message}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun execRoot(cmd: String): String {
|
||||||
|
return try {
|
||||||
|
val process = Runtime.getRuntime().exec(arrayOf("su", "-c", cmd))
|
||||||
|
val result = process.inputStream.bufferedReader().readText()
|
||||||
|
val error = process.errorStream.bufferedReader().readText()
|
||||||
|
process.waitFor()
|
||||||
|
if (error.isNotEmpty() && result.isEmpty()) error else result
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "execRoot failed: ${e.message}")
|
||||||
|
""
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -142,36 +142,14 @@ class KazeiaPipeline {
|
||||||
* the echo-mode playback through the same path — otherwise each TTS
|
* the echo-mode playback through the same path — otherwise each TTS
|
||||||
* site reimplemented the "streaming-or-fallback" dispatch.
|
* site reimplemented the "streaming-or-fallback" dispatch.
|
||||||
*/
|
*/
|
||||||
suspend fun speakText(
|
suspend fun speakText(text: String) {
|
||||||
text: String,
|
|
||||||
// Fires the instant each synthesized sentence starts playing
|
|
||||||
// through the speaker, with the sentence text, audio duration,
|
|
||||||
// and a per-ENVELOPE_WINDOW_MS RMS envelope. Used by
|
|
||||||
// processLlmResponse to defer the KAZEIA chat bubble appearance
|
|
||||||
// until sound is audible, pace word-by-word reveal inside the
|
|
||||||
// bubble, and drive the AudioVisualizerView orb.
|
|
||||||
onSegmentPlaying: ((
|
|
||||||
sentence: String,
|
|
||||||
durationMs: Long,
|
|
||||||
rmsEnvelope: FloatArray,
|
|
||||||
spectrogram: Array<FloatArray>
|
|
||||||
) -> Unit)? = null
|
|
||||||
) {
|
|
||||||
val ttsEngine = tts ?: return
|
val ttsEngine = tts ?: return
|
||||||
_pipelineState.value = PipelineState.Speaking
|
_pipelineState.value = PipelineState.Speaking
|
||||||
try {
|
try {
|
||||||
val qwen = ttsEngine as? com.kazeia.tts.Qwen3TtsEngine
|
val qwen = ttsEngine as? com.kazeia.tts.Qwen3TtsEngine
|
||||||
if (qwen != null) {
|
if (qwen != null) {
|
||||||
qwen.onSegmentPlaying = onSegmentPlaying
|
|
||||||
qwen.startStreamingSession()
|
qwen.startStreamingSession()
|
||||||
val streamer = com.kazeia.tts.SentenceStreamer { raw ->
|
val streamer = com.kazeia.tts.SentenceStreamer { s -> qwen.enqueueSentence(s) }
|
||||||
// Strip emoji / non-speakable pictographs before TTS
|
|
||||||
// so a standalone "😊" doesn't become its own noisy
|
|
||||||
// segment. The chat bubble keeps the original text —
|
|
||||||
// only the audio path sees the cleaned version.
|
|
||||||
val spoken = stripNonSpeakable(raw).trim()
|
|
||||||
if (spoken.isNotEmpty()) qwen.enqueueSentence(spoken)
|
|
||||||
}
|
|
||||||
streamer.append(text)
|
streamer.append(text)
|
||||||
streamer.flush()
|
streamer.flush()
|
||||||
qwen.endStreamingSession()
|
qwen.endStreamingSession()
|
||||||
|
|
@ -190,41 +168,6 @@ class KazeiaPipeline {
|
||||||
_messages.value = _messages.value + msg
|
_messages.value = _messages.value + msg
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Drop emoji + dingbat + pictographic characters so the TTS engine
|
|
||||||
* doesn't try to synthesize them. Covers the main Unicode emoji
|
|
||||||
* blocks (Miscellaneous Symbols, Dingbats, Emoticons, Transport,
|
|
||||||
* Supplemental Symbols and Pictographs, etc.) plus variation
|
|
||||||
* selectors and zero-width joiners that tag emoji sequences.
|
|
||||||
* Keeps everything in the Basic Latin / Latin-1 / Latin Extended
|
|
||||||
* ranges + common French punctuation untouched.
|
|
||||||
*/
|
|
||||||
private fun stripNonSpeakable(text: String): String {
|
|
||||||
val sb = StringBuilder(text.length)
|
|
||||||
var i = 0
|
|
||||||
while (i < text.length) {
|
|
||||||
val cp = text.codePointAt(i)
|
|
||||||
val skip = when {
|
|
||||||
cp in 0x2600..0x27BF -> true // misc symbols + dingbats
|
|
||||||
cp in 0x1F300..0x1F5FF -> true // pictographs
|
|
||||||
cp in 0x1F600..0x1F64F -> true // emoticons
|
|
||||||
cp in 0x1F680..0x1F6FF -> true // transport
|
|
||||||
cp in 0x1F700..0x1F77F -> true // alchemical
|
|
||||||
cp in 0x1F780..0x1F7FF -> true // geometric extended
|
|
||||||
cp in 0x1F800..0x1F8FF -> true // supplemental arrows-c
|
|
||||||
cp in 0x1F900..0x1F9FF -> true // supplemental pictographs
|
|
||||||
cp in 0x1FA00..0x1FAFF -> true // symbols & pictographs extended-A
|
|
||||||
cp == 0x200D -> true // zero-width joiner
|
|
||||||
cp in 0xFE00..0xFE0F -> true // variation selectors
|
|
||||||
cp in 0x1F1E6..0x1F1FF -> true // regional indicators (flags)
|
|
||||||
else -> false
|
|
||||||
}
|
|
||||||
if (!skip) sb.appendCodePoint(cp)
|
|
||||||
i += Character.charCount(cp)
|
|
||||||
}
|
|
||||||
return sb.toString()
|
|
||||||
}
|
|
||||||
|
|
||||||
fun log(msg: String) {
|
fun log(msg: String) {
|
||||||
Log.i(TAG, msg)
|
Log.i(TAG, msg)
|
||||||
val time = java.text.SimpleDateFormat("HH:mm:ss.SSS", java.util.Locale.FRANCE)
|
val time = java.text.SimpleDateFormat("HH:mm:ss.SSS", java.util.Locale.FRANCE)
|
||||||
|
|
|
||||||
|
|
@ -83,34 +83,6 @@ class KazeiaService : Service() {
|
||||||
private val _isListening = MutableStateFlow(false)
|
private val _isListening = MutableStateFlow(false)
|
||||||
val isListening: StateFlow<Boolean> = _isListening
|
val isListening: StateFlow<Boolean> = _isListening
|
||||||
|
|
||||||
// Drives the AudioVisualizerView orb. Pushed from the VAD loop
|
|
||||||
// during mic capture (mic RMS, normalized) and from the TTS engine's
|
|
||||||
// onSegmentPlaying callback (TTS RMS envelope per-segment). The view
|
|
||||||
// reads this via collectLatest in ChatActivity; the signals carry
|
|
||||||
// their own state so the visualizer knows whether it's idle, tracking
|
|
||||||
// the mic, or rendering a TTS segment.
|
|
||||||
sealed class VisualizerSignal {
|
|
||||||
object Idle : VisualizerSignal()
|
|
||||||
data class Listening(val micRms: Float) : VisualizerSignal()
|
|
||||||
data class Speaking(
|
|
||||||
val rmsEnvelope: FloatArray,
|
|
||||||
val spectrogram: Array<FloatArray>,
|
|
||||||
val durationMs: Long
|
|
||||||
) : VisualizerSignal()
|
|
||||||
}
|
|
||||||
private val _visualizerSignal = MutableStateFlow<VisualizerSignal>(VisualizerSignal.Idle)
|
|
||||||
val visualizerSignal: StateFlow<VisualizerSignal> = _visualizerSignal
|
|
||||||
|
|
||||||
// Kazeia's orb color is bound to the selected voice so the user
|
|
||||||
// visually associates a palette with the speaker they picked. UI
|
|
||||||
// sets this whenever the voice spinner changes; the orb view
|
|
||||||
// listens via the StateFlow and tweens the current → target color.
|
|
||||||
private val _voiceColor = MutableStateFlow(0xFFBCA4E8.toInt()) // lavender = Damien default
|
|
||||||
val voiceColor: StateFlow<Int> = _voiceColor
|
|
||||||
|
|
||||||
/** Called by the UI whenever the voice selector changes. */
|
|
||||||
fun setVoiceColor(color: Int) { _voiceColor.value = color }
|
|
||||||
|
|
||||||
private val _debugMode = MutableStateFlow(false)
|
private val _debugMode = MutableStateFlow(false)
|
||||||
val debugMode: StateFlow<Boolean> = _debugMode
|
val debugMode: StateFlow<Boolean> = _debugMode
|
||||||
|
|
||||||
|
|
@ -202,12 +174,6 @@ class KazeiaService : Service() {
|
||||||
if (!::llm.isInitialized || !llm.isLoaded()) {
|
if (!::llm.isInitialized || !llm.isLoaded()) {
|
||||||
log("Stream LLM: LLM not ready"); return@launch
|
log("Stream LLM: LLM not ready"); return@launch
|
||||||
}
|
}
|
||||||
// Set pipeline state to Speaking so the continuous-
|
|
||||||
// listening mic loop (line ~824) drops frames during
|
|
||||||
// TTS playback. Without this, the mic picks up the
|
|
||||||
// tablet speaker and feeds our own TTS back into STT,
|
|
||||||
// creating an infinite loop.
|
|
||||||
_pipelineState.value = PipelineState.Speaking
|
|
||||||
qwenTts.startStreamingSession()
|
qwenTts.startStreamingSession()
|
||||||
val tStart = System.currentTimeMillis()
|
val tStart = System.currentTimeMillis()
|
||||||
var firstSentenceLogged = false
|
var firstSentenceLogged = false
|
||||||
|
|
@ -233,9 +199,6 @@ class KazeiaService : Service() {
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
log("Stream LLM error: ${e.message}")
|
log("Stream LLM error: ${e.message}")
|
||||||
e.printStackTrace()
|
e.printStackTrace()
|
||||||
} finally {
|
|
||||||
// Back to Idle so the next mic frame is accepted.
|
|
||||||
_pipelineState.value = PipelineState.Idle
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -451,18 +414,10 @@ class KazeiaService : Service() {
|
||||||
this, Manifest.permission.RECORD_AUDIO
|
this, Manifest.permission.RECORD_AUDIO
|
||||||
) == PackageManager.PERMISSION_GRANTED
|
) == PackageManager.PERMISSION_GRANTED
|
||||||
|
|
||||||
// FOREGROUND_SERVICE_TYPE_MEDIA_PLAYBACK is required so ColorOS (and
|
|
||||||
// stock Android 14+ policies) don't mute the TTS AudioTrack with
|
|
||||||
// "clientVolume" at ~600 ms after play(). Without it the FGS was
|
|
||||||
// classified as mic-only or special-use and background-audio
|
|
||||||
// hardening silenced it. Combine with MICROPHONE so mic input keeps
|
|
||||||
// working during STT.
|
|
||||||
val fgsType = if (hasMicPermission) {
|
val fgsType = if (hasMicPermission) {
|
||||||
ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE or
|
ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE
|
||||||
ServiceInfo.FOREGROUND_SERVICE_TYPE_MEDIA_PLAYBACK
|
|
||||||
} else {
|
} else {
|
||||||
ServiceInfo.FOREGROUND_SERVICE_TYPE_MEDIA_PLAYBACK or
|
ServiceInfo.FOREGROUND_SERVICE_TYPE_SPECIAL_USE
|
||||||
ServiceInfo.FOREGROUND_SERVICE_TYPE_SPECIAL_USE
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) {
|
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) {
|
||||||
|
|
@ -495,7 +450,7 @@ class KazeiaService : Service() {
|
||||||
// TTS: try Qwen3-TTS (NPU Hexagon), fallback to Android TTS
|
// TTS: try Qwen3-TTS (NPU Hexagon), fallback to Android TTS
|
||||||
_loadingState.value = LoadingState(15, "TTS Qwen3…")
|
_loadingState.value = LoadingState(15, "TTS Qwen3…")
|
||||||
try {
|
try {
|
||||||
val qwenTts = com.kazeia.tts.Qwen3TtsEngine(nativeLibDir, this@KazeiaService) { msg -> log("[TTS] $msg") }
|
val qwenTts = com.kazeia.tts.Qwen3TtsEngine(nativeLibDir) { msg -> log("[TTS] $msg") }
|
||||||
qwenTts.load("$modelsDir/qwen3-tts-npu")
|
qwenTts.load("$modelsDir/qwen3-tts-npu")
|
||||||
if (qwenTts.isLoaded()) {
|
if (qwenTts.isLoaded()) {
|
||||||
tts = qwenTts
|
tts = qwenTts
|
||||||
|
|
@ -563,7 +518,7 @@ class KazeiaService : Service() {
|
||||||
|
|
||||||
// LLM: Qwen3-4B on Hexagon V79 via qnn_llama_runner.
|
// LLM: Qwen3-4B on Hexagon V79 via qnn_llama_runner.
|
||||||
_loadingState.value = LoadingState(50, "LLM Qwen3-4B NPU…")
|
_loadingState.value = LoadingState(50, "LLM Qwen3-4B NPU…")
|
||||||
llm = ExecuTorchLlmEngine(this@KazeiaService) { msg -> log(msg) }
|
llm = ExecuTorchLlmEngine { msg -> log(msg) }
|
||||||
try {
|
try {
|
||||||
llm.load("${KazeiaApplication.MODELS_DIR}/qwen3-4b", com.kazeia.core.LlmConfig())
|
llm.load("${KazeiaApplication.MODELS_DIR}/qwen3-4b", com.kazeia.core.LlmConfig())
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
|
|
@ -628,16 +583,6 @@ class KazeiaService : Service() {
|
||||||
if (chatterbox != null) {
|
if (chatterbox != null) {
|
||||||
chatterbox.setVoice(voicePath)
|
chatterbox.setVoice(voicePath)
|
||||||
log("Voice set to: $voicePath")
|
log("Voice set to: $voicePath")
|
||||||
return
|
|
||||||
}
|
|
||||||
val qwen = tts as? com.kazeia.tts.Qwen3TtsEngine
|
|
||||||
if (qwen != null) {
|
|
||||||
// Hot-swap prefix/suffix embeddings — no model reload. Takes
|
|
||||||
// effect from the NEXT synthesized segment (current in-flight
|
|
||||||
// one, if any, finishes with the old voice since the arrays
|
|
||||||
// are already in its closure).
|
|
||||||
qwen.setVoice(voicePath)
|
|
||||||
log("Voice set to: $voicePath")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -890,14 +835,6 @@ class KazeiaService : Service() {
|
||||||
for (s in frame) sumSq += s.toLong() * s.toLong()
|
for (s in frame) sumSq += s.toLong() * s.toLong()
|
||||||
val rms = Math.sqrt(sumSq.toDouble() / frameSize).toInt()
|
val rms = Math.sqrt(sumSq.toDouble() / frameSize).toInt()
|
||||||
|
|
||||||
// Drive the visualizer orb. Normalize with the same
|
|
||||||
// sqrt squashing used for TTS so loud peaks don't
|
|
||||||
// saturate and quiet speech is still visible. The
|
|
||||||
// visualizer stays in Listening mode; it will swap
|
|
||||||
// to Speaking or Idle when pipelineState moves on.
|
|
||||||
val rmsNorm = kotlin.math.sqrt((rms / 6000f).coerceIn(0f, 1f))
|
|
||||||
_visualizerSignal.value = VisualizerSignal.Listening(rmsNorm)
|
|
||||||
|
|
||||||
// Log RMS every second for calibration
|
// Log RMS every second for calibration
|
||||||
if (frameCount % 10 == 0) {
|
if (frameCount % 10 == 0) {
|
||||||
Log.d(TAG, "VAD RMS=$rms (threshold=$silenceThreshold)")
|
Log.d(TAG, "VAD RMS=$rms (threshold=$silenceThreshold)")
|
||||||
|
|
@ -1247,100 +1184,13 @@ class KazeiaService : Service() {
|
||||||
log("LLM stats: ${result.tokenCount} tokens in ${result.timeMs}ms (${result.tokensPerSecond} tok/s)")
|
log("LLM stats: ${result.tokenCount} tokens in ${result.timeMs}ms (${result.tokensPerSecond} tok/s)")
|
||||||
|
|
||||||
if (responseText.isNotEmpty()) {
|
if (responseText.isNotEmpty()) {
|
||||||
// Mark the pipeline as Speaking for the duration of TTS so
|
addMessage(ChatMessage(role = ChatMessage.Role.KAZEIA, text = responseText))
|
||||||
// the continuous-listening mic loop drops frames and we
|
pipeline.speakText(responseText)
|
||||||
// don't feed our own speaker output back into STT.
|
|
||||||
_pipelineState.value = PipelineState.Speaking
|
|
||||||
// Create a KAZEIA bubble up-front. Until the first TTS
|
|
||||||
// segment actually starts playing the bubble shows an
|
|
||||||
// animated "." → ".." → "..." typing indicator so the
|
|
||||||
// user knows Kazeia is thinking/synthesising; once the
|
|
||||||
// first segment plays the dots are cleared and the
|
|
||||||
// per-sentence word reveal takes over.
|
|
||||||
val bubble = ChatMessage(role = ChatMessage.Role.KAZEIA, text = ".")
|
|
||||||
addMessage(bubble)
|
|
||||||
val revealScope = kotlinx.coroutines.CoroutineScope(kotlinx.coroutines.Dispatchers.Default)
|
|
||||||
var revealedSoFar = ""
|
|
||||||
val revealJobs = mutableListOf<kotlinx.coroutines.Job>()
|
|
||||||
val firstSegmentSeen = java.util.concurrent.atomic.AtomicBoolean(false)
|
|
||||||
val typingJob = revealScope.launch {
|
|
||||||
var tick = 0
|
|
||||||
while (!firstSegmentSeen.get()) {
|
|
||||||
val dots = ".".repeat(1 + (tick % 3)) // . → .. → ...
|
|
||||||
updateMessageText(bubble.id, dots)
|
|
||||||
tick++
|
|
||||||
kotlinx.coroutines.delay(400)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
pipeline.speakText(responseText) { sentence, durationMs, envelope, spectrogram ->
|
|
||||||
// First segment: stop the typing indicator and
|
|
||||||
// reset the bubble to empty so the word reveal
|
|
||||||
// doesn't collide with the dots.
|
|
||||||
if (firstSegmentSeen.compareAndSet(false, true)) {
|
|
||||||
try { typingJob.cancel() } catch (_: Exception) {}
|
|
||||||
updateMessageText(bubble.id, "")
|
|
||||||
}
|
|
||||||
// Push the envelope + spectrogram to the
|
|
||||||
// visualizer at the same moment the MediaPlayer
|
|
||||||
// starts playing so the orb reacts to this
|
|
||||||
// segment's actual energy and the in-sphere
|
|
||||||
// spectrum bars match the audio content.
|
|
||||||
_visualizerSignal.value =
|
|
||||||
VisualizerSignal.Speaking(envelope, spectrogram, durationMs)
|
|
||||||
// Start a coroutine that appends one word at a time
|
|
||||||
// over the segment's audio duration. Words are
|
|
||||||
// separated on whitespace; punctuation rides with
|
|
||||||
// the trailing word. The prefix (= text already
|
|
||||||
// revealed from previous sentences) carries over so
|
|
||||||
// earlier sentences stay on screen.
|
|
||||||
val prefix = revealedSoFar
|
|
||||||
val words = sentence.split(Regex("\\s+")).filter { it.isNotBlank() }
|
|
||||||
revealedSoFar =
|
|
||||||
if (prefix.isEmpty()) sentence
|
|
||||||
else "$prefix $sentence"
|
|
||||||
if (words.isEmpty()) return@speakText
|
|
||||||
val perWordMs = (durationMs / words.size).coerceAtLeast(40L)
|
|
||||||
val job = revealScope.launch {
|
|
||||||
val sb = StringBuilder(prefix)
|
|
||||||
if (prefix.isNotEmpty()) sb.append(' ')
|
|
||||||
// Immediately reveal the first word so there's
|
|
||||||
// no visible gap between audio start and text.
|
|
||||||
sb.append(words[0])
|
|
||||||
updateMessageText(bubble.id, sb.toString())
|
|
||||||
for (i in 1 until words.size) {
|
|
||||||
kotlinx.coroutines.delay(perWordMs)
|
|
||||||
sb.append(' ').append(words[i])
|
|
||||||
updateMessageText(bubble.id, sb.toString())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
revealJobs.add(job)
|
|
||||||
}
|
|
||||||
// After all segments finished playing, ensure the full
|
|
||||||
// text is visible even if a reveal job was racing.
|
|
||||||
revealJobs.forEach { try { it.join() } catch (_: Exception) {} }
|
|
||||||
updateMessageText(bubble.id, responseText)
|
|
||||||
} finally {
|
|
||||||
// Defensive: cancel the typing dots in case no
|
|
||||||
// segment ever fired (e.g. the response was entirely
|
|
||||||
// emojis and got stripped empty).
|
|
||||||
firstSegmentSeen.set(true)
|
|
||||||
try { typingJob.cancel() } catch (_: Exception) {}
|
|
||||||
_pipelineState.value = if (_isListening.value)
|
|
||||||
PipelineState.Listening else PipelineState.Idle
|
|
||||||
// If we're going back to mic listening, the VAD loop
|
|
||||||
// will keep pushing Listening signals; otherwise drop
|
|
||||||
// to Idle so the orb settles back to its breathing
|
|
||||||
// baseline.
|
|
||||||
if (!_isListening.value) {
|
|
||||||
_visualizerSignal.value = VisualizerSignal.Idle
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
_pipelineState.value = if (_isListening.value)
|
|
||||||
PipelineState.Listening else PipelineState.Idle
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_pipelineState.value = if (_isListening.value)
|
||||||
|
PipelineState.Listening else PipelineState.Idle
|
||||||
|
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
_aiWorkload.value = _aiWorkload.value.copy(llmActive = false)
|
_aiWorkload.value = _aiWorkload.value.copy(llmActive = false)
|
||||||
log("ERROR: LLM generation error: ${e.message}")
|
log("ERROR: LLM generation error: ${e.message}")
|
||||||
|
|
@ -1357,19 +1207,6 @@ class KazeiaService : Service() {
|
||||||
_messages.value = _messages.value + message
|
_messages.value = _messages.value + message
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Replace the text of an existing message (identified by id) in the
|
|
||||||
* message list. Used by the progressive-reveal flow to grow a
|
|
||||||
* KAZEIA message word-by-word as TTS audio plays. */
|
|
||||||
private fun updateMessageText(id: Long, newText: String) {
|
|
||||||
val current = _messages.value
|
|
||||||
val idx = current.indexOfLast { it.id == id }
|
|
||||||
if (idx < 0) return
|
|
||||||
val m = current[idx]
|
|
||||||
_messages.value = current.toMutableList().also {
|
|
||||||
it[idx] = m.copy(text = newText)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun createNotification(): Notification {
|
private fun createNotification(): Notification {
|
||||||
val intent = Intent(this, ChatActivity::class.java)
|
val intent = Intent(this, ChatActivity::class.java)
|
||||||
val pendingIntent = PendingIntent.getActivity(
|
val pendingIntent = PendingIntent.getActivity(
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,6 @@ import kotlin.coroutines.resume
|
||||||
*/
|
*/
|
||||||
class Qwen3TtsEngine(
|
class Qwen3TtsEngine(
|
||||||
private val nativeLibDir: String,
|
private val nativeLibDir: String,
|
||||||
private val context: android.content.Context? = null,
|
|
||||||
private val onLog: ((String) -> Unit)? = null
|
private val onLog: ((String) -> Unit)? = null
|
||||||
) : TtsEngine {
|
) : TtsEngine {
|
||||||
|
|
||||||
|
|
@ -89,38 +88,6 @@ class Qwen3TtsEngine(
|
||||||
private const val TOKEN_USER = 872
|
private const val TOKEN_USER = 872
|
||||||
private const val TOKEN_ASSISTANT = 1042
|
private const val TOKEN_ASSISTANT = 1042
|
||||||
private const val TOKEN_NEWLINE = 198
|
private const val TOKEN_NEWLINE = 198
|
||||||
|
|
||||||
// Streaming decode: when true, BigVGAN dispatches a chunk's audio as
|
|
||||||
// soon as SEQ_LEN codes are ready from the talker/CP loop rather than
|
|
||||||
// waiting for all tokens. For long segments this overlaps the final
|
|
||||||
// BigVGAN passes with ongoing talker/CP work on Hexagon, cutting the
|
|
||||||
// first-audio latency by ~4 s. Short segments (<SEQ_LEN codes) fall
|
|
||||||
// back to the single-chunk path with zero difference. Flag exists so
|
|
||||||
// the sequential path can be re-enabled for A/B comparison.
|
|
||||||
private const val USE_STREAMING_DECODE = true
|
|
||||||
|
|
||||||
// ColorOS Audio Hardening silently mutes AudioTrack in background/FGS
|
|
||||||
// context (confirmed via `event:muted updated source:clientVolume`
|
|
||||||
// logs, same behaviour across USAGE_MEDIA, USAGE_ASSISTANT, and
|
|
||||||
// USAGE_VOICE_COMMUNICATION). When this flag is true, each
|
|
||||||
// generated segment is written as a WAV to app-owned shared
|
|
||||||
// storage and played via MediaPlayer instead. Slightly slower
|
|
||||||
// (WAV write + MediaPlayer prepare add ~150 ms per segment) but
|
|
||||||
// it's the only reliable path to audible output on this device.
|
|
||||||
private const val USE_MEDIAPLAYER_FALLBACK = true
|
|
||||||
|
|
||||||
// Window size for the TTS→visualizer RMS sidecar. 50 ms at 24 kHz
|
|
||||||
// = 1200 samples/window — small enough for a 60 fps visualizer to
|
|
||||||
// track formants, large enough to run at negligible CPU cost.
|
|
||||||
const val ENVELOPE_WINDOW_MS = 50
|
|
||||||
// FFT size for the spectrum-in-sphere sidecar. 1024 samples at
|
|
||||||
// 24 kHz = 43 ms — slightly narrower than the hop so each frame
|
|
||||||
// gives a clean snapshot centered on its hop boundary.
|
|
||||||
private const val FFT_SIZE = 1024
|
|
||||||
// Number of log-spaced bands 120 Hz–4 kHz rendered as vertical
|
|
||||||
// bars inside the sphere during Speaking. 12 feels like a real
|
|
||||||
// spectrometer without cluttering at smaller sphere sizes.
|
|
||||||
const val SPECTRUM_BANDS = 12
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private var ortEnv: OrtEnvironment? = null
|
private var ortEnv: OrtEnvironment? = null
|
||||||
|
|
@ -276,12 +243,7 @@ class Qwen3TtsEngine(
|
||||||
return session
|
return session
|
||||||
}
|
}
|
||||||
|
|
||||||
// Speech decoder V2 on CPU. Two paths tried, both worse than CPU:
|
// Speech decoder V2 on CPU (HTP tested: BigVGAN convolutions too slow to compile)
|
||||||
// - HTP: BigVGAN convolutions too slow to compile (timeout)
|
|
||||||
// - GPU Adreno via QNN GPU EP: model loads but per-phrase
|
|
||||||
// inference is ~3.5 s vs ~2 s on CPU (GPU/CPU memory transfer
|
|
||||||
// overhead dominates for this conv-heavy model)
|
|
||||||
// CPU 8-thread stays the practical optimum.
|
|
||||||
val v2Path = "$path/v2_pre_conv"
|
val v2Path = "$path/v2_pre_conv"
|
||||||
if (File("$v2Path/model.onnx").exists()) {
|
if (File("$v2Path/model.onnx").exists()) {
|
||||||
nlog("Loading V2 speech decoder (CPU ONNX)...")
|
nlog("Loading V2 speech decoder (CPU ONNX)...")
|
||||||
|
|
@ -608,53 +570,8 @@ class Qwen3TtsEngine(
|
||||||
|
|
||||||
override fun isLoaded(): Boolean = loaded
|
override fun isLoaded(): Boolean = loaded
|
||||||
|
|
||||||
/**
|
|
||||||
* Hot-swap the speaker prefix/suffix embeddings used for voice
|
|
||||||
* conditioning. [voicePath] is a WAV path like
|
|
||||||
* `/…/voix/elodie.wav` — we derive the voice id from its basename
|
|
||||||
* and look for matching `<id>_voice_prefix.bin` + `<id>_voice_suffix.bin`
|
|
||||||
* in the model dir. If both files exist they replace the current
|
|
||||||
* [damienVoicePrefix] / [damienVoiceSuffix] arrays so the next
|
|
||||||
* segment generated uses the new voice. If either file is missing
|
|
||||||
* we log a warning and keep the current voice — per-voice
|
|
||||||
* prefix/suffix files are offline-generated via
|
|
||||||
* scripts/prepare_tts_native.py; run once per voice WAV and
|
|
||||||
* `adb push` into the model dir to enable.
|
|
||||||
*
|
|
||||||
* Thread-safety: the arrays are read by the synth worker on
|
|
||||||
* Dispatchers.IO; replacing a reference via a volatile var is
|
|
||||||
* atomic on the JVM so a mid-segment replacement just takes
|
|
||||||
* effect on the next segment boundary.
|
|
||||||
*/
|
|
||||||
fun setVoice(voicePath: String) {
|
fun setVoice(voicePath: String) {
|
||||||
val modelDir = "/data/local/tmp/kazeia/models/qwen3-tts-npu"
|
nlog("Voice: $voicePath")
|
||||||
val id = java.io.File(voicePath).nameWithoutExtension.lowercase()
|
|
||||||
val prefixFile = java.io.File("$modelDir/${id}_voice_prefix.bin")
|
|
||||||
val suffixFile = java.io.File("$modelDir/${id}_voice_suffix.bin")
|
|
||||||
if (!prefixFile.exists() || !suffixFile.exists()) {
|
|
||||||
nlog("Voice '$id' not available (missing ${prefixFile.name} or ${suffixFile.name}); keeping current voice. " +
|
|
||||||
"Run scripts/prepare_tts_native.py with this WAV to generate the files.")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
val pBytes = prefixFile.readBytes()
|
|
||||||
val pHead = java.nio.ByteBuffer.wrap(pBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN)
|
|
||||||
val nPref = pHead.int; val dimPref = pHead.int
|
|
||||||
if (dimPref != TALKER_DIM) throw IllegalStateException("prefix dim $dimPref != $TALKER_DIM")
|
|
||||||
val newPrefix = Array(nPref) { FloatArray(TALKER_DIM).also { arr -> for (j in 0 until TALKER_DIM) arr[j] = pHead.float } }
|
|
||||||
|
|
||||||
val sBytes = suffixFile.readBytes()
|
|
||||||
val sHead = java.nio.ByteBuffer.wrap(sBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN)
|
|
||||||
val nSuf = sHead.int; val dimSuf = sHead.int
|
|
||||||
if (dimSuf != TALKER_DIM) throw IllegalStateException("suffix dim $dimSuf != $TALKER_DIM")
|
|
||||||
val newSuffix = Array(nSuf) { FloatArray(TALKER_DIM).also { arr -> for (j in 0 until TALKER_DIM) arr[j] = sHead.float } }
|
|
||||||
|
|
||||||
damienVoicePrefix = newPrefix
|
|
||||||
damienVoiceSuffix = newSuffix
|
|
||||||
nlog("Voice switched to '$id' ($nPref prefix + $nSuf suffix embeds)")
|
|
||||||
} catch (e: Exception) {
|
|
||||||
nlog("Voice swap failed for '$id': ${e.message}")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
override suspend fun synthesize(text: String, language: String): TtsResult {
|
override suspend fun synthesize(text: String, language: String): TtsResult {
|
||||||
|
|
@ -2752,11 +2669,7 @@ class Qwen3TtsEngine(
|
||||||
|
|
||||||
/** PTE pipeline from pre-computed embeddings (prefill + trailing). */
|
/** PTE pipeline from pre-computed embeddings (prefill + trailing). */
|
||||||
private fun runInterleavedPteFromEmbeds(
|
private fun runInterleavedPteFromEmbeds(
|
||||||
prefillEmbeds: List<FloatArray>, trailingEmbeds: List<FloatArray>, maxGenTokens: Int,
|
prefillEmbeds: List<FloatArray>, trailingEmbeds: List<FloatArray>, maxGenTokens: Int
|
||||||
// Invoked synchronously after each generated step with (stepIdx, 16-codebook codes).
|
|
||||||
// Streaming callers use it to dispatch SEQ_LEN-sized chunks to the BigVGAN pipeline
|
|
||||||
// as soon as they are ready. null preserves the original batch behaviour.
|
|
||||||
onCodeStep: ((step: Int, codes: IntArray) -> Unit)? = null
|
|
||||||
): Array<IntArray> {
|
): Array<IntArray> {
|
||||||
val talkerMod = talkerPteModule ?: return emptyArray()
|
val talkerMod = talkerPteModule ?: return emptyArray()
|
||||||
val cpMod = cpPteModule ?: return emptyArray()
|
val cpMod = cpPteModule ?: return emptyArray()
|
||||||
|
|
@ -2834,7 +2747,6 @@ class Qwen3TtsEngine(
|
||||||
totalCpMs += System.currentTimeMillis() - tCp0
|
totalCpMs += System.currentTimeMillis() - tCp0
|
||||||
for (cb in 1 until NUM_CODEBOOKS) codes[cb] = cpCodes[cb - 1]
|
for (cb in 1 until NUM_CODEBOOKS) codes[cb] = cpCodes[cb - 1]
|
||||||
allCodes.add(codes); generatedCb0.add(currentCb0)
|
allCodes.add(codes); generatedCb0.add(currentCb0)
|
||||||
onCodeStep?.invoke(genStep, codes)
|
|
||||||
|
|
||||||
if (genStep < 3) nlog("Step ${genStep+1}: cb0=$currentCb0 cb1=${codes[1]}")
|
if (genStep < 3) nlog("Step ${genStep+1}: cb0=$currentCb0 cb1=${codes[1]}")
|
||||||
|
|
||||||
|
|
@ -3404,18 +3316,6 @@ class Qwen3TtsEngine(
|
||||||
private var sessionTrack: AudioTrack? = null
|
private var sessionTrack: AudioTrack? = null
|
||||||
private var sessionChannel: kotlinx.coroutines.channels.Channel<String>? = null
|
private var sessionChannel: kotlinx.coroutines.channels.Channel<String>? = null
|
||||||
private var sessionJob: kotlinx.coroutines.Job? = null
|
private var sessionJob: kotlinx.coroutines.Job? = null
|
||||||
private var sessionKeepAliveJob: kotlinx.coroutines.Job? = null
|
|
||||||
private var sessionFocusRequest: android.media.AudioFocusRequest? = null
|
|
||||||
// Total PCM frames queued to sessionTrack across all segments in this session.
|
|
||||||
// endStreamingSession() polls track.playbackHeadPosition until it reaches this
|
|
||||||
// count before calling stop(), so the tail sentence isn't clipped.
|
|
||||||
// Uses AtomicLong because both the session worker and the keep-alive watchdog
|
|
||||||
// call writeAndCount concurrently.
|
|
||||||
private val sessionFramesWritten = java.util.concurrent.atomic.AtomicLong(0)
|
|
||||||
// True while a real-audio generate call is in progress. The keep-alive
|
|
||||||
// watchdog skips silence injection while this is set, so silence never
|
|
||||||
// interleaves with speech inside a segment.
|
|
||||||
private val sessionGenActive = java.util.concurrent.atomic.AtomicBoolean(false)
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Open a streaming TTS session backed by a persistent AudioTrack. After
|
* Open a streaming TTS session backed by a persistent AudioTrack. After
|
||||||
|
|
@ -3424,403 +3324,13 @@ class Qwen3TtsEngine(
|
||||||
* track as soon as it's decoded. Call endStreamingSession() to flush
|
* track as soon as it's decoded. Call endStreamingSession() to flush
|
||||||
* the queue and release the track.
|
* the queue and release the track.
|
||||||
*/
|
*/
|
||||||
// MediaPlayer-based fallback session state. If ColorOS mutes our
|
|
||||||
// AudioTrack (as observed repeatedly — `event:muted updated source:
|
|
||||||
// clientVolume` right after play()), we instead render each segment
|
|
||||||
// as a WAV file on shared storage and play it back via MediaPlayer,
|
|
||||||
// which uses a completely different internal audio pipeline that
|
|
||||||
// doesn't get silenced by the background playback policy.
|
|
||||||
private var sessionMpQueue: kotlinx.coroutines.channels.Channel<String>? = null
|
|
||||||
private var sessionMpJob: kotlinx.coroutines.Job? = null
|
|
||||||
private val sessionMpSegIdx = java.util.concurrent.atomic.AtomicInteger(0)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fires the moment a synthesized segment starts playing through the
|
|
||||||
* speaker. Carries the sentence text, audio duration, per-window RMS
|
|
||||||
* envelope (for orb amplitude) and per-window log-spaced band
|
|
||||||
* spectrogram (for the spectrum-in-sphere visualizer). All three
|
|
||||||
* share the same time axis — one entry per [ENVELOPE_WINDOW_MS].
|
|
||||||
*/
|
|
||||||
var onSegmentPlaying: ((
|
|
||||||
sentence: String,
|
|
||||||
durationMs: Long,
|
|
||||||
rmsEnvelope: FloatArray,
|
|
||||||
spectrogram: Array<FloatArray>
|
|
||||||
) -> Unit)? = null
|
|
||||||
|
|
||||||
private fun startStreamingSessionMp() {
|
|
||||||
if (sessionMpQueue != null) return
|
|
||||||
sessionMpSegIdx.set(0)
|
|
||||||
val sentenceChan = kotlinx.coroutines.channels.Channel<String>(
|
|
||||||
capacity = kotlinx.coroutines.channels.Channel.UNLIMITED
|
|
||||||
)
|
|
||||||
// Pipeline: synth worker produces WAV paths, playback worker runs
|
|
||||||
// them through a pair of MediaPlayer instances chained via
|
|
||||||
// setNextMediaPlayer() so there's zero-gap transition between
|
|
||||||
// segments (no DAC/output routing "pop" the user was hearing as
|
|
||||||
// "beg beg" with one player-per-seg). The rendezvous channel has
|
|
||||||
// capacity 2 so the synth worker can stay one seg ahead of the
|
|
||||||
// currently playing seg without growing disk use.
|
|
||||||
// Carry (segIdx, wavPath, sentence, durationMs) together so the
|
|
||||||
// playback worker can invoke onSegmentPlaying with the matching
|
|
||||||
// text and audio length when the segment actually starts playing.
|
|
||||||
val wavChan = kotlinx.coroutines.channels.Channel<SegmentReady>(capacity = 2)
|
|
||||||
val scope = kotlinx.coroutines.CoroutineScope(kotlinx.coroutines.Dispatchers.IO)
|
|
||||||
val synthJob = scope.launch {
|
|
||||||
for (sentence in sentenceChan) {
|
|
||||||
try {
|
|
||||||
val segIdx = sessionMpSegIdx.getAndIncrement()
|
|
||||||
val tSynth = System.currentTimeMillis()
|
|
||||||
val audio = generateSegmentAudioVC(sentence, segIdx)
|
|
||||||
if (audio.isEmpty()) continue
|
|
||||||
val wavPath = "${context?.cacheDir?.absolutePath ?: "/data/local/tmp/kazeia"}/tts_seg_${segIdx}.wav"
|
|
||||||
saveWav(wavPath, audio)
|
|
||||||
val durationMs = audio.size * 1000L / SR
|
|
||||||
val envelope = computeRmsEnvelope(audio)
|
|
||||||
val spectrogram = computeSpectrogram(audio)
|
|
||||||
nlog("MP seg $segIdx synthesized (${System.currentTimeMillis() - tSynth}ms, ${durationMs}ms audio, ${envelope.size} env × ${SPECTRUM_BANDS} bands), queued for playback")
|
|
||||||
wavChan.send(SegmentReady(segIdx, wavPath, sentence, durationMs, envelope, spectrogram))
|
|
||||||
} catch (e: Exception) {
|
|
||||||
nlog("MP synth error: ${e.message}")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
wavChan.close()
|
|
||||||
}
|
|
||||||
val playJob = scope.launch { playChainedMediaPlayers(wavChan) }
|
|
||||||
val combined = scope.launch { synthJob.join(); playJob.join() }
|
|
||||||
sessionMpQueue = sentenceChan; sessionMpJob = combined
|
|
||||||
nlog("streaming session opened (MediaPlayer fallback, chained)")
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Drive the WAV playback pipeline with two MediaPlayer instances
|
|
||||||
* chained via setNextMediaPlayer() so each segment flows into the
|
|
||||||
* next without re-arming the audio output (which caused audible
|
|
||||||
* "pops" between segments when one player stopped and another
|
|
||||||
* started). Consumes (segIdx, wavPath) pairs from [wavChan] and
|
|
||||||
* deletes each file after it finishes playing. Suspends until the
|
|
||||||
* channel closes AND the final segment finishes.
|
|
||||||
*/
|
|
||||||
private suspend fun playChainedMediaPlayers(
|
|
||||||
wavChan: kotlinx.coroutines.channels.ReceiveChannel<SegmentReady>
|
|
||||||
) {
|
|
||||||
val attrs = android.media.AudioAttributes.Builder()
|
|
||||||
.setUsage(android.media.AudioAttributes.USAGE_MEDIA)
|
|
||||||
.setContentType(android.media.AudioAttributes.CONTENT_TYPE_SPEECH)
|
|
||||||
.build()
|
|
||||||
|
|
||||||
// Synchronously prepare a MediaPlayer on the current coroutine.
|
|
||||||
// Throws on failure; caller handles cleanup.
|
|
||||||
suspend fun prepareMp(path: String, segIdx: Int): android.media.MediaPlayer {
|
|
||||||
val mp = android.media.MediaPlayer()
|
|
||||||
mp.setAudioAttributes(attrs)
|
|
||||||
mp.setDataSource(path)
|
|
||||||
kotlinx.coroutines.suspendCancellableCoroutine<Unit> { cont ->
|
|
||||||
mp.setOnPreparedListener { if (cont.isActive) cont.resume(Unit) {} }
|
|
||||||
mp.setOnErrorListener { _, what, extra ->
|
|
||||||
nlog("MP seg $segIdx prepare error: what=$what extra=$extra")
|
|
||||||
if (cont.isActive) cont.resume(Unit) {}
|
|
||||||
true
|
|
||||||
}
|
|
||||||
cont.invokeOnCancellation { try { mp.release() } catch (_: Exception) {} }
|
|
||||||
mp.prepareAsync()
|
|
||||||
}
|
|
||||||
return mp
|
|
||||||
}
|
|
||||||
|
|
||||||
// Per-player book-keeping. `done` completes the moment the
|
|
||||||
// MediaPlayer's OnCompletionListener fires, so the loop can
|
|
||||||
// tell *before* calling setNextMediaPlayer whether the chain
|
|
||||||
// will actually trigger (setNextMediaPlayer on a player already
|
|
||||||
// in the Completed state is a silent no-op — that was the root
|
|
||||||
// cause of missing audio on seg 1 when synthesis ran longer
|
|
||||||
// than seg 0's playback).
|
|
||||||
class Live(
|
|
||||||
val mp: android.media.MediaPlayer,
|
|
||||||
val info: SegmentReady,
|
|
||||||
val done: kotlinx.coroutines.CompletableDeferred<Unit>
|
|
||||||
)
|
|
||||||
|
|
||||||
fun arm(info: SegmentReady, mp: android.media.MediaPlayer): Live {
|
|
||||||
val done = kotlinx.coroutines.CompletableDeferred<Unit>()
|
|
||||||
mp.setOnCompletionListener {
|
|
||||||
try { it.release() } catch (_: Exception) {}
|
|
||||||
if (!done.isCompleted) done.complete(Unit)
|
|
||||||
}
|
|
||||||
mp.setOnErrorListener { _, what, extra ->
|
|
||||||
nlog("MP seg ${info.segIdx} play error: what=$what extra=$extra")
|
|
||||||
if (!done.isCompleted) done.complete(Unit)
|
|
||||||
true
|
|
||||||
}
|
|
||||||
return Live(mp, info, done)
|
|
||||||
}
|
|
||||||
|
|
||||||
var current: Live? = null
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Bootstrap with the first segment.
|
|
||||||
val first = wavChan.receiveCatching().getOrNull() ?: return
|
|
||||||
val firstMp = prepareMp(first.wavPath, first.segIdx)
|
|
||||||
firstMp.start()
|
|
||||||
current = arm(first, firstMp)
|
|
||||||
try { onSegmentPlaying?.invoke(first.sentence, first.durationMs, first.rmsEnvelope, first.spectrogram) } catch (_: Exception) {}
|
|
||||||
nlog("MP seg ${first.segIdx} started (${first.durationMs}ms)")
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
val upcoming = wavChan.receiveCatching().getOrNull() ?: break
|
|
||||||
val nextMp = prepareMp(upcoming.wavPath, upcoming.segIdx)
|
|
||||||
|
|
||||||
// Try to chain so Android auto-starts next when current
|
|
||||||
// finishes — gives zero-gap playback without re-arming
|
|
||||||
// the DAC. Skipped if current has already completed
|
|
||||||
// (setNext on Completed is a no-op); we fall back to an
|
|
||||||
// explicit start() below in that case.
|
|
||||||
var chained = false
|
|
||||||
try {
|
|
||||||
if (!current!!.done.isCompleted) {
|
|
||||||
current!!.mp.setNextMediaPlayer(nextMp)
|
|
||||||
chained = true
|
|
||||||
}
|
|
||||||
} catch (e: Exception) {
|
|
||||||
nlog("MP seg ${upcoming.segIdx} setNext failed: ${e.message}")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for current playback to finish before rotating.
|
|
||||||
current!!.done.await()
|
|
||||||
try { java.io.File(current!!.info.wavPath).delete() } catch (_: Exception) {}
|
|
||||||
|
|
||||||
// If we never chained (or the chain raced with the
|
|
||||||
// current's completion), start next manually. Safe to
|
|
||||||
// start() again even if Android already auto-started.
|
|
||||||
val autoStarted = try { chained && (nextMp.isPlaying || nextMp.currentPosition > 0) } catch (_: Exception) { false }
|
|
||||||
if (!autoStarted) {
|
|
||||||
try { nextMp.start() } catch (e: Exception) {
|
|
||||||
nlog("MP seg ${upcoming.segIdx} manual start failed: ${e.message}")
|
|
||||||
}
|
|
||||||
nlog("MP seg ${upcoming.segIdx} started manually (chain missed)")
|
|
||||||
} else {
|
|
||||||
nlog("MP seg ${upcoming.segIdx} auto-chained")
|
|
||||||
}
|
|
||||||
|
|
||||||
current = arm(upcoming, nextMp)
|
|
||||||
try { onSegmentPlaying?.invoke(upcoming.sentence, upcoming.durationMs, upcoming.rmsEnvelope, upcoming.spectrogram) } catch (_: Exception) {}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Drain: wait for the last player to finish.
|
|
||||||
current?.done?.await()
|
|
||||||
current?.let { try { java.io.File(it.info.wavPath).delete() } catch (_: Exception) {} }
|
|
||||||
} catch (e: Exception) {
|
|
||||||
nlog("MP playback chain error: ${e.message}")
|
|
||||||
} finally {
|
|
||||||
try { current?.mp?.release() } catch (_: Exception) {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Payload handed from the synth worker to the playback worker so
|
|
||||||
* the UI can be notified with matching text + duration when each
|
|
||||||
* segment starts playing. The [rmsEnvelope] is an optional sidecar
|
|
||||||
* array of per-ENVELOPE_WINDOW_MS RMS values normalized to [0, 1]
|
|
||||||
* that drives the audio-reactive orb visualizer without having to
|
|
||||||
* read PCM back from MediaPlayer. */
|
|
||||||
private data class SegmentReady(
|
|
||||||
val segIdx: Int,
|
|
||||||
val wavPath: String,
|
|
||||||
val sentence: String,
|
|
||||||
val durationMs: Long,
|
|
||||||
val rmsEnvelope: FloatArray,
|
|
||||||
val spectrogram: Array<FloatArray>
|
|
||||||
)
|
|
||||||
|
|
||||||
/** Compute a per-ENVELOPE_WINDOW_MS normalized RMS envelope from a
|
|
||||||
* mono 16-bit PCM buffer at [SR]. Cheap (one pass, trivially fast
|
|
||||||
* on the ~100 k samples we generate per segment) and called only
|
|
||||||
* once per segment right after synthesis. */
|
|
||||||
private fun computeRmsEnvelope(audio: ShortArray): FloatArray {
|
|
||||||
if (audio.isEmpty()) return FloatArray(0)
|
|
||||||
val windowSamples = SR * ENVELOPE_WINDOW_MS / 1000
|
|
||||||
val nWindows = (audio.size + windowSamples - 1) / windowSamples
|
|
||||||
val env = FloatArray(nWindows)
|
|
||||||
for (w in 0 until nWindows) {
|
|
||||||
val start = w * windowSamples
|
|
||||||
val end = minOf(start + windowSamples, audio.size)
|
|
||||||
var sumSq = 0.0
|
|
||||||
for (i in start until end) {
|
|
||||||
val s = audio[i].toDouble()
|
|
||||||
sumSq += s * s
|
|
||||||
}
|
|
||||||
val rms = kotlin.math.sqrt(sumSq / (end - start))
|
|
||||||
// Normalize: 32767 is full-scale; squash the upper range
|
|
||||||
// with a sqrt curve so even quiet speech shows visible
|
|
||||||
// motion without saturating on loud peaks.
|
|
||||||
env[w] = kotlin.math.sqrt((rms / 32767.0).coerceIn(0.0, 1.0)).toFloat()
|
|
||||||
}
|
|
||||||
return env
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Compute a per-window log-spaced band spectrogram used by the
|
|
||||||
* spectrum-in-sphere visualizer. Time axis aligned with the RMS
|
|
||||||
* envelope (one column per ENVELOPE_WINDOW_MS). FFT size is 1024
|
|
||||||
* samples (~43 ms at 24 kHz), windowed with Hann and centered on
|
|
||||||
* each hop. [SPECTRUM_BANDS] log-spaced bands from 120 Hz to
|
|
||||||
* 4 kHz — covers the vocal formant range without wasting visual
|
|
||||||
* space on silent sub-100 Hz or frictive >4 kHz content. */
|
|
||||||
private fun computeSpectrogram(audio: ShortArray): Array<FloatArray> {
|
|
||||||
if (audio.isEmpty()) return emptyArray()
|
|
||||||
val fftSize = FFT_SIZE
|
|
||||||
val hopSamples = SR * ENVELOPE_WINDOW_MS / 1000
|
|
||||||
val nFrames = (audio.size + hopSamples - 1) / hopSamples
|
|
||||||
// Pre-compute band edges as FFT bin indices.
|
|
||||||
val binHzRes = SR.toDouble() / fftSize
|
|
||||||
val fMin = 120.0; val fMax = 4000.0
|
|
||||||
val bandEdges = IntArray(SPECTRUM_BANDS + 1) { i ->
|
|
||||||
val f = fMin * Math.pow(fMax / fMin, i.toDouble() / SPECTRUM_BANDS)
|
|
||||||
(f / binHzRes).toInt().coerceIn(1, fftSize / 2 - 1)
|
|
||||||
}
|
|
||||||
// Hann window — reduces spectral leakage, gives cleaner bars.
|
|
||||||
val hann = FloatArray(fftSize) { i ->
|
|
||||||
(0.5 - 0.5 * Math.cos(2.0 * Math.PI * i / (fftSize - 1))).toFloat()
|
|
||||||
}
|
|
||||||
val re = FloatArray(fftSize)
|
|
||||||
val im = FloatArray(fftSize)
|
|
||||||
val result = Array(nFrames) { FloatArray(SPECTRUM_BANDS) }
|
|
||||||
for (f in 0 until nFrames) {
|
|
||||||
// Center the window on the hop midpoint.
|
|
||||||
val center = f * hopSamples + hopSamples / 2
|
|
||||||
val start = center - fftSize / 2
|
|
||||||
for (i in 0 until fftSize) {
|
|
||||||
val idx = start + i
|
|
||||||
val sample = if (idx in audio.indices) audio[idx].toFloat() / 32768f else 0f
|
|
||||||
re[i] = sample * hann[i]
|
|
||||||
im[i] = 0f
|
|
||||||
}
|
|
||||||
fftInPlace(re, im)
|
|
||||||
for (b in 0 until SPECTRUM_BANDS) {
|
|
||||||
val bStart = bandEdges[b]
|
|
||||||
val bEnd = bandEdges[b + 1].coerceAtLeast(bStart + 1)
|
|
||||||
var sum = 0.0
|
|
||||||
for (k in bStart until bEnd) {
|
|
||||||
val reK = re[k].toDouble(); val imK = im[k].toDouble()
|
|
||||||
sum += reK * reK + imK * imK
|
|
||||||
}
|
|
||||||
val mag = Math.sqrt(sum / (bEnd - bStart))
|
|
||||||
// Log-compress + normalize. Speech energy per band rarely
|
|
||||||
// exceeds ~0.1 before log; the constants below bring the
|
|
||||||
// typical range to [0.2, 0.95] for visible bar motion.
|
|
||||||
result[f][b] = (Math.log10(1.0 + mag * 80) / Math.log10(7.0))
|
|
||||||
.toFloat().coerceIn(0f, 1f)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
/** In-place radix-2 Cooley–Tukey FFT. Size must be a power of 2. */
|
|
||||||
private fun fftInPlace(re: FloatArray, im: FloatArray) {
|
|
||||||
val n = re.size
|
|
||||||
// Bit-reversal permutation.
|
|
||||||
var j = 0
|
|
||||||
for (i in 1 until n) {
|
|
||||||
var bit = n shr 1
|
|
||||||
while (j and bit != 0) { j = j xor bit; bit = bit shr 1 }
|
|
||||||
j = j or bit
|
|
||||||
if (i < j) {
|
|
||||||
val tr = re[i]; re[i] = re[j]; re[j] = tr
|
|
||||||
val ti = im[i]; im[i] = im[j]; im[j] = ti
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Butterflies.
|
|
||||||
var size = 2
|
|
||||||
while (size <= n) {
|
|
||||||
val half = size / 2
|
|
||||||
val step = n / size
|
|
||||||
val angleBase = -2.0 * Math.PI / size
|
|
||||||
var m = 0
|
|
||||||
while (m < n) {
|
|
||||||
var k = 0
|
|
||||||
for (i in m until m + half) {
|
|
||||||
val angle = (angleBase * k).toFloat()
|
|
||||||
val c = kotlin.math.cos(angle)
|
|
||||||
val s = kotlin.math.sin(angle)
|
|
||||||
val tRe = re[i + half] * c - im[i + half] * s
|
|
||||||
val tIm = re[i + half] * s + im[i + half] * c
|
|
||||||
re[i + half] = re[i] - tRe
|
|
||||||
im[i + half] = im[i] - tIm
|
|
||||||
re[i] = re[i] + tRe
|
|
||||||
im[i] = im[i] + tIm
|
|
||||||
k += step
|
|
||||||
}
|
|
||||||
m += size
|
|
||||||
}
|
|
||||||
size *= 2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private suspend fun endStreamingSessionMp() {
|
|
||||||
val chan = sessionMpQueue ?: return
|
|
||||||
chan.close()
|
|
||||||
try { sessionMpJob?.join() } catch (_: Exception) {}
|
|
||||||
sessionMpQueue = null; sessionMpJob = null
|
|
||||||
onSegmentPlaying = null
|
|
||||||
nlog("streaming session closed (MediaPlayer fallback)")
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Play a WAV file via Android MediaPlayer and block the calling
|
|
||||||
* coroutine until playback completes. MediaPlayer uses a separate
|
|
||||||
* audio pipeline from AudioTrack so it bypasses ColorOS's AudioTrack
|
|
||||||
* hardening/muting behaviour.
|
|
||||||
*/
|
|
||||||
private suspend fun playWavBlocking(path: String, segIdx: Int) {
|
|
||||||
val t0 = System.currentTimeMillis()
|
|
||||||
suspendCancellableCoroutine<Unit> { cont ->
|
|
||||||
val mp = android.media.MediaPlayer()
|
|
||||||
try {
|
|
||||||
mp.setAudioAttributes(android.media.AudioAttributes.Builder()
|
|
||||||
.setUsage(android.media.AudioAttributes.USAGE_MEDIA)
|
|
||||||
.setContentType(android.media.AudioAttributes.CONTENT_TYPE_SPEECH)
|
|
||||||
.build())
|
|
||||||
mp.setDataSource(path)
|
|
||||||
mp.setOnPreparedListener {
|
|
||||||
nlog("MP seg $segIdx prepared, starting (prep ${System.currentTimeMillis() - t0}ms)")
|
|
||||||
it.start()
|
|
||||||
}
|
|
||||||
mp.setOnCompletionListener {
|
|
||||||
nlog("MP seg $segIdx done (${System.currentTimeMillis() - t0}ms total)")
|
|
||||||
try { it.release() } catch (_: Exception) {}
|
|
||||||
if (cont.isActive) cont.resume(Unit) {}
|
|
||||||
}
|
|
||||||
mp.setOnErrorListener { player, what, extra ->
|
|
||||||
nlog("MP seg $segIdx error: what=$what extra=$extra")
|
|
||||||
try { player.release() } catch (_: Exception) {}
|
|
||||||
if (cont.isActive) cont.resume(Unit) {}
|
|
||||||
true
|
|
||||||
}
|
|
||||||
mp.prepareAsync()
|
|
||||||
cont.invokeOnCancellation { try { mp.release() } catch (_: Exception) {} }
|
|
||||||
} catch (e: Exception) {
|
|
||||||
nlog("MP seg $segIdx setup failed: ${e.message}")
|
|
||||||
try { mp.release() } catch (_: Exception) {}
|
|
||||||
if (cont.isActive) cont.resume(Unit) {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun startStreamingSession() {
|
fun startStreamingSession() {
|
||||||
if (USE_MEDIAPLAYER_FALLBACK) { startStreamingSessionMp(); return }
|
|
||||||
if (sessionTrack != null) return // already open
|
if (sessionTrack != null) return // already open
|
||||||
// USAGE_VOICE_COMMUNICATION routes to STREAM_VOICE_CALL, which
|
|
||||||
// ColorOS's "Audio Hardening" policy does NOT silently mute (the
|
|
||||||
// policy targets STREAM_MUSIC to preserve battery on inactive media
|
|
||||||
// apps; STREAM_VOICE_CALL is reserved for VoIP and always plays).
|
|
||||||
// Previous attempts with USAGE_MEDIA and USAGE_ASSISTANT both got
|
|
||||||
// `event:muted updated source:clientVolume` ~0.6–1 s after play()
|
|
||||||
// even with audio focus + mediaPlayback FGS, so moving off of
|
|
||||||
// STREAM_MUSIC is the only route that unblocks audible playback.
|
|
||||||
val attrs = AudioAttributes.Builder()
|
|
||||||
.setUsage(AudioAttributes.USAGE_VOICE_COMMUNICATION)
|
|
||||||
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
|
|
||||||
.build()
|
|
||||||
val track = AudioTrack.Builder()
|
val track = AudioTrack.Builder()
|
||||||
.setAudioAttributes(attrs)
|
.setAudioAttributes(AudioAttributes.Builder()
|
||||||
|
.setUsage(AudioAttributes.USAGE_MEDIA)
|
||||||
|
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
|
||||||
|
.build())
|
||||||
.setAudioFormat(AudioFormat.Builder()
|
.setAudioFormat(AudioFormat.Builder()
|
||||||
.setEncoding(AudioFormat.ENCODING_PCM_16BIT)
|
.setEncoding(AudioFormat.ENCODING_PCM_16BIT)
|
||||||
.setSampleRate(SR)
|
.setSampleRate(SR)
|
||||||
|
|
@ -3830,77 +3340,7 @@ class Qwen3TtsEngine(
|
||||||
// paces writes when full.
|
// paces writes when full.
|
||||||
.setTransferMode(AudioTrack.MODE_STREAM)
|
.setTransferMode(AudioTrack.MODE_STREAM)
|
||||||
.build()
|
.build()
|
||||||
// Request audio focus for the duration of the session. Without this
|
|
||||||
// ColorOS's Audio Hardening treats the track as background noise
|
|
||||||
// and mutes it, regardless of FGS status. We don't care about
|
|
||||||
// focus loss callbacks — if another app grabs focus mid-sentence
|
|
||||||
// that's fine, the track just gets ducked.
|
|
||||||
val am = context?.getSystemService(android.content.Context.AUDIO_SERVICE) as? android.media.AudioManager
|
|
||||||
val focusReq = android.media.AudioFocusRequest.Builder(android.media.AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK)
|
|
||||||
.setAudioAttributes(attrs)
|
|
||||||
.setOnAudioFocusChangeListener { _ -> }
|
|
||||||
.build()
|
|
||||||
val focusRes = am?.requestAudioFocus(focusReq)
|
|
||||||
nlog("audio focus request: $focusRes (1=granted, 0=failed, 2=delayed)")
|
|
||||||
sessionFocusRequest = focusReq
|
|
||||||
// ColorOS mutes AudioTrack clientVolume ~1s after creation (seen in
|
|
||||||
// dumpsys audio as `event:muted updated source:clientVolume`). Force
|
|
||||||
// track volume back to 1.0 repeatedly to override. This is also
|
|
||||||
// done in the keep-alive watchdog loop below for ongoing override.
|
|
||||||
try { track.setVolume(1.0f) } catch (_: Exception) {}
|
|
||||||
track.play()
|
track.play()
|
||||||
sessionFramesWritten.set(0)
|
|
||||||
sessionGenActive.set(false)
|
|
||||||
// writeAndCount is the single path through which PCM reaches the
|
|
||||||
// AudioTrack for this session, so sessionFramesWritten always stays
|
|
||||||
// in sync with what's been queued to playback hardware. AudioTrack.write
|
|
||||||
// is thread-safe, so this can be called concurrently from the session
|
|
||||||
// worker (real audio) and the keep-alive watchdog (silence padding).
|
|
||||||
val writeAndCount: (ShortArray) -> Unit = { pcm ->
|
|
||||||
if (pcm.isNotEmpty()) {
|
|
||||||
val n = track.write(pcm, 0, pcm.size)
|
|
||||||
if (n > 0) sessionFramesWritten.addAndGet(n.toLong())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Bootstrap silence: queue 500 ms immediately after play() so
|
|
||||||
// AudioFlinger has samples to mix from the very first cycle.
|
|
||||||
// Without this, there's a ~100 ms window between play() and the
|
|
||||||
// first watchdog tick where the track has no data and AudioFlinger
|
|
||||||
// flags it for removal. Once that happens, playbackHead sticks at
|
|
||||||
// 0 and subsequent writes go to a dead track.
|
|
||||||
val bootstrapSilence = ShortArray(SR / 2) // 500 ms
|
|
||||||
writeAndCount(bootstrapSilence)
|
|
||||||
// Keep-alive watchdog. AudioFlinger on OnePlus/ColorOS kills a track
|
|
||||||
// that underruns for ~1 s (confirmed via `prepareTracks_l BUFFER
|
|
||||||
// TIMEOUT: remove track … due to underrun on thread 29`). Our
|
|
||||||
// per-segment synthesis takes 3–5 s, which always exceeds that
|
|
||||||
// window between writes, so the track was getting silenced after
|
|
||||||
// the first ~1 s of audio played. The watchdog pads with 200 ms of
|
|
||||||
// silence any time the buffered-ahead audio drops below 400 ms,
|
|
||||||
// regardless of segment state — silence only advances playback head
|
|
||||||
// in the gaps between real audio and is never inserted inside a
|
|
||||||
// contiguous burst of real writes (those bring buffered above 400 ms
|
|
||||||
// and keep the watchdog quiet).
|
|
||||||
val keepAliveBuffer = ShortArray(SR / 5) // 200 ms of silence
|
|
||||||
val keepAliveJob = kotlinx.coroutines.CoroutineScope(
|
|
||||||
kotlinx.coroutines.Dispatchers.IO
|
|
||||||
).launch {
|
|
||||||
var tick = 0
|
|
||||||
while (kotlinx.coroutines.currentCoroutineContext()[kotlinx.coroutines.Job]?.isActive != false) {
|
|
||||||
kotlinx.coroutines.delay(100)
|
|
||||||
val head = track.playbackHeadPosition.toLong() and 0xFFFFFFFFL
|
|
||||||
val written = sessionFramesWritten.get() and 0xFFFFFFFFL
|
|
||||||
val buffered = written - head
|
|
||||||
val needsPad = buffered < SR * 2 / 5 // < 400 ms
|
|
||||||
if ((tick and 0x1F) == 0) {
|
|
||||||
nlog("keepAlive tick=$tick head=$head written=$written buffered=$buffered pad=$needsPad state=${track.playState}")
|
|
||||||
}
|
|
||||||
tick++
|
|
||||||
// Override any clientVolume mute that ColorOS keeps applying.
|
|
||||||
try { track.setVolume(1.0f) } catch (_: Exception) {}
|
|
||||||
if (needsPad) writeAndCount(keepAliveBuffer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
val chan = kotlinx.coroutines.channels.Channel<String>(
|
val chan = kotlinx.coroutines.channels.Channel<String>(
|
||||||
capacity = kotlinx.coroutines.channels.Channel.UNLIMITED
|
capacity = kotlinx.coroutines.channels.Channel.UNLIMITED
|
||||||
)
|
)
|
||||||
|
|
@ -3910,26 +3350,15 @@ class Qwen3TtsEngine(
|
||||||
var segIdx = 0
|
var segIdx = 0
|
||||||
for (sentence in chan) {
|
for (sentence in chan) {
|
||||||
try {
|
try {
|
||||||
sessionGenActive.set(true)
|
val audio = generateSegmentAudioVC(sentence, segIdx)
|
||||||
if (USE_STREAMING_DECODE && talkerPteModule != null && cpPteModule != null) {
|
if (audio.isNotEmpty()) track.write(audio, 0, audio.size)
|
||||||
// CP↔BigVGAN overlap path: audio chunks flow to the
|
|
||||||
// shared AudioTrack as soon as BigVGAN finishes each
|
|
||||||
// SEQ_LEN window, instead of after the whole segment.
|
|
||||||
generateSegmentAudioVCStreaming(sentence, segIdx, writeAndCount)
|
|
||||||
} else {
|
|
||||||
val audio = generateSegmentAudioVC(sentence, segIdx)
|
|
||||||
writeAndCount(audio)
|
|
||||||
}
|
|
||||||
segIdx++
|
segIdx++
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
nlog("session seg $segIdx error: ${e.message}")
|
nlog("session seg $segIdx error: ${e.message}")
|
||||||
} finally {
|
|
||||||
sessionGenActive.set(false)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sessionTrack = track; sessionChannel = chan; sessionJob = job
|
sessionTrack = track; sessionChannel = chan; sessionJob = job
|
||||||
sessionKeepAliveJob = keepAliveJob
|
|
||||||
nlog("streaming session opened")
|
nlog("streaming session opened")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3939,12 +3368,6 @@ class Qwen3TtsEngine(
|
||||||
* immediately. Sentences play in the order they were enqueued.
|
* immediately. Sentences play in the order they were enqueued.
|
||||||
*/
|
*/
|
||||||
fun enqueueSentence(sentence: String) {
|
fun enqueueSentence(sentence: String) {
|
||||||
if (USE_MEDIAPLAYER_FALLBACK) {
|
|
||||||
val chan = sessionMpQueue ?: run { nlog("enqueueSentence: no MP session"); return }
|
|
||||||
val r = chan.trySend(sentence)
|
|
||||||
if (r.isFailure) nlog("enqueueSentence: MP channel full / closed")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
val chan = sessionChannel ?: run { nlog("enqueueSentence: no session open"); return }
|
val chan = sessionChannel ?: run { nlog("enqueueSentence: no session open"); return }
|
||||||
val r = chan.trySend(sentence)
|
val r = chan.trySend(sentence)
|
||||||
if (r.isFailure) nlog("enqueueSentence: channel full / closed")
|
if (r.isFailure) nlog("enqueueSentence: channel full / closed")
|
||||||
|
|
@ -3956,46 +3379,17 @@ class Qwen3TtsEngine(
|
||||||
* drains), then release the shared track. Safe to call more than once.
|
* drains), then release the shared track. Safe to call more than once.
|
||||||
*/
|
*/
|
||||||
suspend fun endStreamingSession() {
|
suspend fun endStreamingSession() {
|
||||||
if (USE_MEDIAPLAYER_FALLBACK) { endStreamingSessionMp(); return }
|
|
||||||
val chan = sessionChannel ?: return
|
val chan = sessionChannel ?: return
|
||||||
chan.close()
|
chan.close()
|
||||||
try { sessionJob?.join() } catch (_: Exception) {}
|
try { sessionJob?.join() } catch (_: Exception) {}
|
||||||
// Stop the keep-alive watchdog BEFORE draining so it doesn't pad more
|
|
||||||
// silence onto the tail while we're waiting for the existing buffer
|
|
||||||
// to play out.
|
|
||||||
try { sessionKeepAliveJob?.cancel() } catch (_: Exception) {}
|
|
||||||
try { sessionKeepAliveJob?.join() } catch (_: Exception) {}
|
|
||||||
try {
|
try {
|
||||||
sessionTrack?.let { track ->
|
sessionTrack?.let {
|
||||||
// AudioTrack.stop() in MODE_STREAM DISCARDS unplayed buffered
|
// Block until written samples have been consumed by the
|
||||||
// samples — it doesn't block for drain. Poll getPlaybackHead
|
// hardware so users aren't cut off mid-syllable.
|
||||||
// Position() until it reaches what we wrote, then stop. The
|
it.stop(); it.release()
|
||||||
// head is a 32-bit wrap-around counter, so compare modulo.
|
|
||||||
// Cap the drain wait so a stalled track can't block us forever.
|
|
||||||
val targetFrames = sessionFramesWritten.get()
|
|
||||||
val startMs = System.currentTimeMillis()
|
|
||||||
val maxDrainMs = (targetFrames * 1000L / SR) + 500L // audio dur + 500ms slack
|
|
||||||
while (true) {
|
|
||||||
val head = track.playbackHeadPosition.toLong() and 0xFFFFFFFFL
|
|
||||||
val reached = head >= (targetFrames and 0xFFFFFFFFL)
|
|
||||||
val state = track.playState
|
|
||||||
if (reached || state != AudioTrack.PLAYSTATE_PLAYING) break
|
|
||||||
if (System.currentTimeMillis() - startMs > maxDrainMs) {
|
|
||||||
nlog("endStreamingSession: drain timeout at head=$head/$targetFrames")
|
|
||||||
break
|
|
||||||
}
|
|
||||||
kotlinx.coroutines.delay(20)
|
|
||||||
}
|
|
||||||
track.stop(); track.release()
|
|
||||||
}
|
}
|
||||||
} catch (_: Exception) {}
|
} catch (_: Exception) {}
|
||||||
// Release audio focus after the track is fully drained and stopped.
|
sessionTrack = null; sessionChannel = null; sessionJob = null
|
||||||
try {
|
|
||||||
val am = context?.getSystemService(android.content.Context.AUDIO_SERVICE) as? android.media.AudioManager
|
|
||||||
sessionFocusRequest?.let { am?.abandonAudioFocusRequest(it) }
|
|
||||||
} catch (_: Exception) {}
|
|
||||||
sessionFocusRequest = null
|
|
||||||
sessionTrack = null; sessionChannel = null; sessionJob = null; sessionKeepAliveJob = null
|
|
||||||
nlog("streaming session closed")
|
nlog("streaming session closed")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -4052,177 +3446,6 @@ class Qwen3TtsEngine(
|
||||||
return fadeOut(decodeChunked(codebooks, n), 40)
|
return fadeOut(decodeChunked(codebooks, n), 40)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------- Streaming decode (CP ↔ BigVGAN overlap) ----------
|
|
||||||
|
|
||||||
/** Carrier from the talker/CP producer to the BigVGAN consumer. */
|
|
||||||
private class ChunkMsg(val codebooks: Array<IntArray>, val realTokens: Int)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Streaming variant of decodeChunked. Mirrors its semantics exactly: the
|
|
||||||
* internal `result` buffer accumulates and crossfades chunks the same
|
|
||||||
* way, so the final assembled audio is bit-identical. The difference is
|
|
||||||
* that whenever a portion of `result` becomes "stable" (no future chunk
|
|
||||||
* can modify it, i.e. anything before the last `overlapSamples`), it is
|
|
||||||
* emitted via `onAudio` immediately. `flushFinal()` emits the remaining
|
|
||||||
* tail with fadeOut applied, matching the original behaviour.
|
|
||||||
*/
|
|
||||||
private inner class StreamingCrossfader(private val onAudio: (ShortArray) -> Unit) {
|
|
||||||
private val overlapSamples = CHUNK_OVERLAP * SAMPLES_PER_TOKEN
|
|
||||||
private var result = ShortArray(0)
|
|
||||||
private var emittedLen = 0
|
|
||||||
private var isFirst = true
|
|
||||||
|
|
||||||
fun feedChunk(chunkAudio: ShortArray, realTokens: Int) {
|
|
||||||
val trimLen = minOf(realTokens * SAMPLES_PER_TOKEN, chunkAudio.size)
|
|
||||||
val trimmed = if (trimLen < chunkAudio.size) chunkAudio.copyOf(trimLen) else chunkAudio
|
|
||||||
|
|
||||||
if (isFirst) {
|
|
||||||
result = trimmed.copyOf()
|
|
||||||
isFirst = false
|
|
||||||
} else {
|
|
||||||
val fadeLen = minOf(overlapSamples, result.size, trimmed.size)
|
|
||||||
for (i in 0 until fadeLen) {
|
|
||||||
val alpha = i.toFloat() / fadeLen
|
|
||||||
val mixed = ((1f - alpha) * result[result.size - fadeLen + i] + alpha * trimmed[i]).toInt()
|
|
||||||
.coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt()).toShort()
|
|
||||||
result[result.size - fadeLen + i] = mixed
|
|
||||||
}
|
|
||||||
if (fadeLen < trimmed.size) {
|
|
||||||
val newPart = trimmed.copyOfRange(fadeLen, trimmed.size)
|
|
||||||
val combined = ShortArray(result.size + newPart.size)
|
|
||||||
System.arraycopy(result, 0, combined, 0, result.size)
|
|
||||||
System.arraycopy(newPart, 0, combined, result.size, newPart.size)
|
|
||||||
result = combined
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Hold back the last `overlapSamples` so the next chunk's
|
|
||||||
// crossfade can still mutate them; emit everything before that.
|
|
||||||
val stableEnd = (result.size - overlapSamples).coerceAtLeast(emittedLen)
|
|
||||||
if (stableEnd > emittedLen) {
|
|
||||||
val slice = result.copyOfRange(emittedLen, stableEnd)
|
|
||||||
onAudio(slice)
|
|
||||||
emittedLen = stableEnd
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Emit any remaining buffered samples with the trailing fadeOut. */
|
|
||||||
fun flushFinal() {
|
|
||||||
if (emittedLen < result.size) {
|
|
||||||
val tail = result.copyOfRange(emittedLen, result.size)
|
|
||||||
onAudio(fadeOut(tail, 40))
|
|
||||||
emittedLen = result.size
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Streaming variant of generateSegmentAudioVC. As the talker/CP loop
|
|
||||||
* produces codes step by step, BigVGAN chunks are dispatched on a
|
|
||||||
* background coroutine the moment SEQ_LEN codes are accumulated. For a
|
|
||||||
* 75-token segment this overlaps the last BigVGAN pass with the final
|
|
||||||
* ~20 talker/CP steps, cutting first-audio latency by ~4 s vs the
|
|
||||||
* sequential `generateSegmentAudioVC` path.
|
|
||||||
*
|
|
||||||
* Short segments (<SEQ_LEN codes) emit a single chunk at end-of-gen,
|
|
||||||
* matching the legacy single-chunk path with no perceptible difference.
|
|
||||||
*
|
|
||||||
* The producer thread blocks on `bvChan.send` if the BigVGAN consumer
|
|
||||||
* is behind; in practice that never happens because the producer takes
|
|
||||||
* ~5 s per chunk vs ~2.4 s for BigVGAN.
|
|
||||||
*/
|
|
||||||
private suspend fun generateSegmentAudioVCStreaming(
|
|
||||||
segText: String, segIdx: Int, onAudio: (ShortArray) -> Unit
|
|
||||||
) {
|
|
||||||
if (bpeTokenizer == null || textEmbedsFullBuf == null || damienVoicePrefix == null || damienVoiceSuffix == null) {
|
|
||||||
nlog("generateSegmentAudioVCStreaming: Stage 2 assets missing"); return
|
|
||||||
}
|
|
||||||
if (talkerPteModule == null || cpPteModule == null) {
|
|
||||||
nlog("generateSegmentAudioVCStreaming: PTE talker/CP not loaded"); return
|
|
||||||
}
|
|
||||||
val prefix = damienVoicePrefix!!
|
|
||||||
val suffix = damienVoiceSuffix!!
|
|
||||||
val codecPadEmb = codecEmb(CODEC_PAD)
|
|
||||||
val ids = bpeTokenizer!!.encode(segText)
|
|
||||||
nlog("session seg $segIdx (stream) '${segText.take(60)}' → ${ids.size} tokens")
|
|
||||||
|
|
||||||
val prefill = ArrayList<FloatArray>(prefix.size + ids.size + suffix.size)
|
|
||||||
for (e in prefix) prefill.add(e)
|
|
||||||
for (id in ids) prefill.add(sumEmb(textEmbFromFull(id), codecPadEmb))
|
|
||||||
for (e in suffix) prefill.add(e)
|
|
||||||
|
|
||||||
val expectedSteps = (ids.size * 24) / 10
|
|
||||||
val maxGen = minOf(expectedSteps * 3 / 2 + 10, MAX_CONTEXT - 15)
|
|
||||||
|
|
||||||
val tStart = System.currentTimeMillis()
|
|
||||||
var firstAudioLogged = false
|
|
||||||
val bvChan = kotlinx.coroutines.channels.Channel<ChunkMsg>(capacity = 4)
|
|
||||||
val cfader = StreamingCrossfader { pcm ->
|
|
||||||
if (!firstAudioLogged) {
|
|
||||||
nlog("streaming seg $segIdx first audio at ${System.currentTimeMillis() - tStart}ms (${pcm.size} samples)")
|
|
||||||
firstAudioLogged = true
|
|
||||||
}
|
|
||||||
onAudio(pcm)
|
|
||||||
}
|
|
||||||
val consumerJob = kotlinx.coroutines.CoroutineScope(kotlinx.coroutines.Dispatchers.IO).launch {
|
|
||||||
try {
|
|
||||||
for (msg in bvChan) {
|
|
||||||
val quant = vqDecode(msg.codebooks)
|
|
||||||
val audio = runSpeechDecoderV2(quant)
|
|
||||||
cfader.feedChunk(audio, msg.realTokens)
|
|
||||||
}
|
|
||||||
cfader.flushFinal()
|
|
||||||
} catch (e: Exception) {
|
|
||||||
nlog("streaming seg $segIdx consumer error: ${e.message}")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Producer: run the interleaved talker/CP loop and dispatch each
|
|
||||||
// SEQ_LEN-aligned window of codes immediately. The consumer's
|
|
||||||
// crossfader holds back the last `overlapSamples` of audio per
|
|
||||||
// chunk, so the in-flight chunk's tail can still be mutated by the
|
|
||||||
// next chunk before being emitted; flushFinal() at end emits the
|
|
||||||
// last tail with fadeOut. End-of-stream is signalled by closing
|
|
||||||
// bvChan after the trailing partial chunk is sent.
|
|
||||||
val collected = mutableListOf<IntArray>()
|
|
||||||
var nextChunkStart = 0
|
|
||||||
|
|
||||||
fun buildChunkCb(start: Int, real: Int): Array<IntArray> = Array(NUM_CODEBOOKS) { cb ->
|
|
||||||
IntArray(SEQ_LEN) { t ->
|
|
||||||
val src = start + t
|
|
||||||
if (src < start + real) {
|
|
||||||
val v = collected[src][cb]
|
|
||||||
if (v in 0 until CODEBOOK_SIZE) v else 0
|
|
||||||
} else 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
runInterleavedPteFromEmbeds(prefill, emptyList(), maxGen) { _, codes ->
|
|
||||||
collected.add(codes)
|
|
||||||
while (collected.size >= nextChunkStart + SEQ_LEN) {
|
|
||||||
val cb = buildChunkCb(nextChunkStart, SEQ_LEN)
|
|
||||||
kotlinx.coroutines.runBlocking { bvChan.send(ChunkMsg(cb, SEQ_LEN)) }
|
|
||||||
nextChunkStart += EFFECTIVE_CHUNK
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (e: Exception) {
|
|
||||||
nlog("streaming seg $segIdx producer error: ${e.message}")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Trailing chunk: any remaining tokens after the last full window
|
|
||||||
// (covers both the medium-segment partial-tail case and the
|
|
||||||
// short-segment <SEQ_LEN single-chunk case where nextChunkStart=0).
|
|
||||||
val total = collected.size
|
|
||||||
if (total > nextChunkStart) {
|
|
||||||
val trailing = total - nextChunkStart
|
|
||||||
val cb = buildChunkCb(nextChunkStart, trailing)
|
|
||||||
kotlinx.coroutines.runBlocking { bvChan.send(ChunkMsg(cb, trailing)) }
|
|
||||||
}
|
|
||||||
bvChan.close()
|
|
||||||
consumerJob.join()
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Run the Hexagon talker + CP generation loop with a fully pre-built
|
* Run the Hexagon talker + CP generation loop with a fully pre-built
|
||||||
* prefill (voice prefix + all text tokens). Same decode recipe as
|
* prefill (voice prefix + all text tokens). Same decode recipe as
|
||||||
|
|
|
||||||
|
|
@ -1,548 +0,0 @@
|
||||||
package com.kazeia.ui
|
|
||||||
|
|
||||||
import android.content.Context
|
|
||||||
import android.graphics.Canvas
|
|
||||||
import android.graphics.Color
|
|
||||||
import android.graphics.Paint
|
|
||||||
import android.graphics.Path
|
|
||||||
import android.graphics.RadialGradient
|
|
||||||
import android.graphics.Shader
|
|
||||||
import android.util.AttributeSet
|
|
||||||
import android.view.Choreographer
|
|
||||||
import android.view.View
|
|
||||||
import kotlin.math.PI
|
|
||||||
import kotlin.math.cos
|
|
||||||
import kotlin.math.max
|
|
||||||
import kotlin.math.min
|
|
||||||
import kotlin.math.sin
|
|
||||||
import kotlin.math.sqrt
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Large, central orb visualizer — Kazeia's visual "face". Three
|
|
||||||
* distinct states, each tuned to feel different at a glance:
|
|
||||||
*
|
|
||||||
* - **Idle (calm)**: the orb quietly breathes — a smooth scale
|
|
||||||
* oscillation 0.88 ↔ 1.0 over a 5 s cycle with a soft halo that
|
|
||||||
* pulses in phase. No high-frequency motion. Suggests "waiting,
|
|
||||||
* listening, not anxious".
|
|
||||||
*
|
|
||||||
* - **Listening (attentive)**: the orb settles slightly larger, a
|
|
||||||
* warmer bright ring appears around it, and its outline deforms
|
|
||||||
* organically with the live mic RMS (blob-like wobble, 8 Fourier
|
|
||||||
* modes, gain-mapped from the RMS). Micro-ripples emit
|
|
||||||
* continuously while speech is present. Feels alive and engaged
|
|
||||||
* — clearly different from Idle's static breathing.
|
|
||||||
*
|
|
||||||
* - **Speaking (active)**: the orb is rendered **as a contained
|
|
||||||
* spectrometer**. Inside the sphere boundary, SPECTRUM_BANDS
|
|
||||||
* vertical bars rise from a horizontal baseline according to a
|
|
||||||
* pre-computed band-energy sidecar. The sphere outline pulses
|
|
||||||
* with the overall RMS envelope. The bars are clipped to the
|
|
||||||
* sphere so it really looks like "the sphere itself is speaking"
|
|
||||||
* — not an overlaid spectrogram. Strong amplitude peaks release
|
|
||||||
* outward ripple waves on the halo.
|
|
||||||
*
|
|
||||||
* The whole palette (core, halo, ring, bars, ripples) is re-derived
|
|
||||||
* from a single [voiceColor] setter so each speaker gets a distinct
|
|
||||||
* visual identity.
|
|
||||||
*/
|
|
||||||
class AudioVisualizerView @JvmOverloads constructor(
|
|
||||||
context: Context,
|
|
||||||
attrs: AttributeSet? = null,
|
|
||||||
defStyleAttr: Int = 0
|
|
||||||
) : View(context, attrs, defStyleAttr), Choreographer.FrameCallback {
|
|
||||||
|
|
||||||
companion object {
|
|
||||||
/** Must match Qwen3TtsEngine.SPECTRUM_BANDS. Asserted at setSpeaking. */
|
|
||||||
private const val SPECTRUM_BANDS = 12
|
|
||||||
/** Listening-mode outline deformation modes (even = smooth blobs). */
|
|
||||||
private const val BLOB_MODES = 8
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------- State ----------
|
|
||||||
private sealed class State {
|
|
||||||
object Idle : State()
|
|
||||||
data class Listening(var micRms: Float, var phaseSeed: Float) : State()
|
|
||||||
data class Speaking(
|
|
||||||
val envelope: FloatArray,
|
|
||||||
val spectrogram: Array<FloatArray>,
|
|
||||||
val durationMs: Long,
|
|
||||||
val startedAtMs: Long
|
|
||||||
) : State()
|
|
||||||
}
|
|
||||||
|
|
||||||
@Volatile private var state: State = State.Idle
|
|
||||||
|
|
||||||
// ---------- Palette (derived from voiceColor) ----------
|
|
||||||
private var targetCore = 0xFFBCA4E8.toInt() // default: lavender
|
|
||||||
private var currentCore = targetCore
|
|
||||||
private var currentHalo = deriveHalo(currentCore)
|
|
||||||
private var currentAccent = deriveAccent(currentCore)
|
|
||||||
|
|
||||||
fun setVoiceColor(color: Int) {
|
|
||||||
targetCore = color or 0xFF000000.toInt() // force opaque
|
|
||||||
scheduleFrame()
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------- Animation state ----------
|
|
||||||
private var frameStartNs = 0L
|
|
||||||
private var smoothedAmp = 0f // 0..1 orb-size pulsation (all states)
|
|
||||||
private var smoothedBars = FloatArray(SPECTRUM_BANDS)
|
|
||||||
private var listeningRingPhase = 0f // rotating shimmer on listening ring
|
|
||||||
private val ripples = ArrayList<Ripple>()
|
|
||||||
private var lastSpectroIdx = -1
|
|
||||||
|
|
||||||
// ---------- Paints ----------
|
|
||||||
private val corePaint = Paint(Paint.ANTI_ALIAS_FLAG).apply { style = Paint.Style.FILL }
|
|
||||||
private val haloPaint = Paint(Paint.ANTI_ALIAS_FLAG).apply { style = Paint.Style.FILL }
|
|
||||||
private val ringPaint = Paint(Paint.ANTI_ALIAS_FLAG).apply {
|
|
||||||
style = Paint.Style.STROKE
|
|
||||||
}
|
|
||||||
private val ripplePaint = Paint(Paint.ANTI_ALIAS_FLAG).apply {
|
|
||||||
style = Paint.Style.STROKE
|
|
||||||
strokeWidth = 3f
|
|
||||||
}
|
|
||||||
private val barPaint = Paint(Paint.ANTI_ALIAS_FLAG).apply {
|
|
||||||
style = Paint.Style.FILL_AND_STROKE
|
|
||||||
}
|
|
||||||
private val blobOutlinePaint = Paint(Paint.ANTI_ALIAS_FLAG).apply {
|
|
||||||
style = Paint.Style.STROKE
|
|
||||||
}
|
|
||||||
private val blobPath = Path()
|
|
||||||
private val spherePath = Path()
|
|
||||||
|
|
||||||
init {
|
|
||||||
setLayerType(LAYER_TYPE_HARDWARE, null)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ==================== Public API ====================
|
|
||||||
|
|
||||||
fun setIdle() {
|
|
||||||
if (state !is State.Idle) { state = State.Idle; lastSpectroIdx = -1 }
|
|
||||||
scheduleFrame()
|
|
||||||
}
|
|
||||||
|
|
||||||
fun setListening(micRms: Float) {
|
|
||||||
val clamped = micRms.coerceIn(0f, 1f)
|
|
||||||
val s = state
|
|
||||||
if (s is State.Listening) {
|
|
||||||
s.micRms = clamped
|
|
||||||
} else {
|
|
||||||
state = State.Listening(clamped, (System.nanoTime() and 0xFFFF) / 65535f)
|
|
||||||
}
|
|
||||||
scheduleFrame()
|
|
||||||
}
|
|
||||||
|
|
||||||
fun startSpeaking(
|
|
||||||
envelope: FloatArray,
|
|
||||||
spectrogram: Array<FloatArray>,
|
|
||||||
durationMs: Long
|
|
||||||
) {
|
|
||||||
if (envelope.isEmpty() || spectrogram.isEmpty() || durationMs <= 0) {
|
|
||||||
setIdle(); return
|
|
||||||
}
|
|
||||||
state = State.Speaking(envelope, spectrogram, durationMs, System.currentTimeMillis())
|
|
||||||
lastSpectroIdx = -1
|
|
||||||
// Soft reset bar heights so the spectrum grows from zero rather
|
|
||||||
// than snapping to the idle smoothing residue.
|
|
||||||
for (i in smoothedBars.indices) smoothedBars[i] = 0f
|
|
||||||
scheduleFrame()
|
|
||||||
}
|
|
||||||
|
|
||||||
// ==================== Lifecycle / scheduling ====================
|
|
||||||
|
|
||||||
override fun onAttachedToWindow() {
|
|
||||||
super.onAttachedToWindow()
|
|
||||||
frameStartNs = System.nanoTime()
|
|
||||||
scheduleFrame()
|
|
||||||
}
|
|
||||||
|
|
||||||
override fun onDetachedFromWindow() {
|
|
||||||
super.onDetachedFromWindow()
|
|
||||||
Choreographer.getInstance().removeFrameCallback(this)
|
|
||||||
}
|
|
||||||
|
|
||||||
private var frameScheduled = false
|
|
||||||
private fun scheduleFrame() {
|
|
||||||
if (!frameScheduled && isAttachedToWindow) {
|
|
||||||
frameScheduled = true
|
|
||||||
Choreographer.getInstance().postFrameCallback(this)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
override fun doFrame(frameTimeNanos: Long) {
|
|
||||||
frameScheduled = false
|
|
||||||
// Ease the palette toward the target (voice change tween).
|
|
||||||
currentCore = lerpColor(currentCore, targetCore, 0.12f)
|
|
||||||
currentHalo = deriveHalo(currentCore)
|
|
||||||
currentAccent = deriveAccent(currentCore)
|
|
||||||
|
|
||||||
val s = state
|
|
||||||
when (s) {
|
|
||||||
is State.Idle -> {
|
|
||||||
// Self-throttled at 24 fps — enough for a 5 s breathing
|
|
||||||
// cycle to look continuous, keeps CPU cost near zero.
|
|
||||||
Choreographer.getInstance().postFrameCallbackDelayed(this, 40)
|
|
||||||
frameScheduled = true
|
|
||||||
}
|
|
||||||
is State.Listening -> {
|
|
||||||
listeningRingPhase += 0.015f
|
|
||||||
Choreographer.getInstance().postFrameCallback(this)
|
|
||||||
frameScheduled = true
|
|
||||||
}
|
|
||||||
is State.Speaking -> {
|
|
||||||
val elapsed = System.currentTimeMillis() - s.startedAtMs
|
|
||||||
if (elapsed >= s.durationMs + 300) {
|
|
||||||
state = State.Idle
|
|
||||||
lastSpectroIdx = -1
|
|
||||||
Choreographer.getInstance().postFrameCallbackDelayed(this, 40)
|
|
||||||
frameScheduled = true
|
|
||||||
} else {
|
|
||||||
Choreographer.getInstance().postFrameCallback(this)
|
|
||||||
frameScheduled = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
invalidate()
|
|
||||||
}
|
|
||||||
|
|
||||||
// ==================== Drawing ====================
|
|
||||||
|
|
||||||
override fun onDraw(canvas: Canvas) {
|
|
||||||
super.onDraw(canvas)
|
|
||||||
val w = width.toFloat(); val h = height.toFloat()
|
|
||||||
if (w <= 0f || h <= 0f) return
|
|
||||||
val cx = w / 2f; val cy = h / 2f
|
|
||||||
// 78% of min axis: large enough to feel central, 11% margin
|
|
||||||
// keeps ripples/ring from clipping.
|
|
||||||
val maxR = min(w, h) * 0.39f
|
|
||||||
val now = System.currentTimeMillis()
|
|
||||||
|
|
||||||
when (val s = state) {
|
|
||||||
is State.Idle -> drawIdle(canvas, cx, cy, maxR, now)
|
|
||||||
is State.Listening -> drawListening(canvas, cx, cy, maxR, now, s)
|
|
||||||
is State.Speaking -> drawSpeaking(canvas, cx, cy, maxR, now, s)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------- Idle ----------
|
|
||||||
private fun drawIdle(canvas: Canvas, cx: Float, cy: Float, maxR: Float, now: Long) {
|
|
||||||
// 5 s breathing cycle, amplitude 0.88 → 1.00.
|
|
||||||
val t = ((now - frameStartNs / 1_000_000) % 5000L) / 5000f
|
|
||||||
val breath = 0.5f - 0.5f * cos((t * 2.0 * PI).toFloat()) // 0..1
|
|
||||||
val scale = 0.88f + 0.12f * breath
|
|
||||||
val radius = maxR * scale
|
|
||||||
smoothedAmp += ((breath * 0.5f) - smoothedAmp) * 0.1f
|
|
||||||
|
|
||||||
// Halo (soft, breathing in phase).
|
|
||||||
drawHalo(canvas, cx, cy, maxR * 1.15f * scale, alphaBase = 60, alphaGain = 70)
|
|
||||||
|
|
||||||
// Core — pure round, no deformation.
|
|
||||||
drawCore(canvas, cx, cy, radius, shimmer = 0f)
|
|
||||||
|
|
||||||
// Subtle inner highlight — feels alive without movement.
|
|
||||||
val hl = Paint(Paint.ANTI_ALIAS_FLAG).apply {
|
|
||||||
style = Paint.Style.FILL
|
|
||||||
shader = RadialGradient(
|
|
||||||
cx - radius * 0.25f, cy - radius * 0.25f, radius * 0.9f,
|
|
||||||
Color.argb(60, 255, 255, 255),
|
|
||||||
Color.argb(0, 255, 255, 255),
|
|
||||||
Shader.TileMode.CLAMP
|
|
||||||
)
|
|
||||||
}
|
|
||||||
canvas.drawCircle(cx, cy, radius, hl)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------- Listening ----------
|
|
||||||
private fun drawListening(
|
|
||||||
canvas: Canvas, cx: Float, cy: Float, maxR: Float, now: Long, s: State.Listening
|
|
||||||
) {
|
|
||||||
// Base size slightly larger than Idle so the transition reads.
|
|
||||||
val baseScale = 0.93f + 0.08f * s.micRms
|
|
||||||
val radius = maxR * baseScale
|
|
||||||
smoothedAmp += (s.micRms - smoothedAmp) * 0.25f
|
|
||||||
|
|
||||||
// Halo — brighter than Idle, responds to mic.
|
|
||||||
drawHalo(canvas, cx, cy, maxR * 1.22f * baseScale,
|
|
||||||
alphaBase = 90, alphaGain = (130 * s.micRms).toInt().coerceIn(0, 160))
|
|
||||||
|
|
||||||
// Deformed outline (blob): Fourier modes over the circle.
|
|
||||||
buildBlobPath(blobPath, cx, cy, radius, s.micRms, s.phaseSeed, now)
|
|
||||||
|
|
||||||
// Filled core with a radial gradient inside the blob path.
|
|
||||||
corePaint.shader = RadialGradient(
|
|
||||||
cx - radius * 0.15f, cy - radius * 0.25f, radius * 1.1f,
|
|
||||||
currentCore, deriveCoreEdge(currentCore),
|
|
||||||
Shader.TileMode.CLAMP
|
|
||||||
)
|
|
||||||
canvas.save()
|
|
||||||
canvas.clipPath(blobPath)
|
|
||||||
canvas.drawCircle(cx, cy, radius * 1.3f, corePaint)
|
|
||||||
canvas.restore()
|
|
||||||
|
|
||||||
// Outline of the blob, slightly thicker as RMS rises.
|
|
||||||
blobOutlinePaint.strokeWidth = 2f + 2f * s.micRms
|
|
||||||
blobOutlinePaint.color = withAlpha(currentAccent, 180)
|
|
||||||
canvas.drawPath(blobPath, blobOutlinePaint)
|
|
||||||
|
|
||||||
// Rotating shimmer ring — a thin arc segment chasing around.
|
|
||||||
drawListeningRing(canvas, cx, cy, radius * 1.08f, s.micRms)
|
|
||||||
|
|
||||||
// Continuous micro-ripples while listening.
|
|
||||||
val rmsMicroFloor = 0.12f
|
|
||||||
if (s.micRms > rmsMicroFloor && ((now / 90) % 3 == 0L)) {
|
|
||||||
ripples.add(Ripple(bornAtMs = now, peak = s.micRms))
|
|
||||||
}
|
|
||||||
drawRipples(canvas, cx, cy, maxR, now, listeningMode = true)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun drawListeningRing(
|
|
||||||
canvas: Canvas, cx: Float, cy: Float, radius: Float, rms: Float
|
|
||||||
) {
|
|
||||||
// Thin shimmer arc rotating around the orb, width/alpha scaling
|
|
||||||
// with mic RMS so silence shows almost nothing.
|
|
||||||
if (rms < 0.04f) return
|
|
||||||
ringPaint.strokeWidth = 2.5f + 3f * rms
|
|
||||||
val sweep = 60f + 80f * rms
|
|
||||||
val start = (listeningRingPhase * 360f) % 360f
|
|
||||||
ringPaint.color = withAlpha(currentAccent, (140 + 110 * rms).toInt().coerceIn(0, 250))
|
|
||||||
val r = radius
|
|
||||||
canvas.drawArc(cx - r, cy - r, cx + r, cy + r, start, sweep, false, ringPaint)
|
|
||||||
// Subtle tail: a second, dimmer, shorter arc slightly offset.
|
|
||||||
ringPaint.color = withAlpha(currentAccent, (60 + 60 * rms).toInt().coerceIn(0, 160))
|
|
||||||
canvas.drawArc(cx - r, cy - r, cx + r, cy + r, start + sweep + 8f, sweep * 0.5f, false, ringPaint)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------- Speaking ----------
|
|
||||||
private fun drawSpeaking(
|
|
||||||
canvas: Canvas, cx: Float, cy: Float, maxR: Float, now: Long, s: State.Speaking
|
|
||||||
) {
|
|
||||||
// Envelope → overall size pulsation + halo intensity.
|
|
||||||
val elapsed = now - s.startedAtMs
|
|
||||||
val envIdxF = elapsed.toFloat() * s.envelope.size / s.durationMs
|
|
||||||
val envIdx = envIdxF.toInt().coerceIn(0, s.envelope.size - 1)
|
|
||||||
val envFrac = (envIdxF - envIdx).coerceIn(0f, 1f)
|
|
||||||
val env = lerp(
|
|
||||||
s.envelope[envIdx],
|
|
||||||
s.envelope[min(envIdx + 1, s.envelope.size - 1)],
|
|
||||||
envFrac
|
|
||||||
)
|
|
||||||
smoothedAmp += (env - smoothedAmp) * 0.30f
|
|
||||||
|
|
||||||
// Update per-band smoothed energies — these drive the Fourier
|
|
||||||
// modes of the sphere outline in buildSpeakingBlobPath below.
|
|
||||||
val timeIdxF = elapsed.toFloat() * s.spectrogram.size / s.durationMs
|
|
||||||
val timeIdx = timeIdxF.toInt().coerceIn(0, s.spectrogram.size - 1)
|
|
||||||
val timeFrac = (timeIdxF - timeIdx).coerceIn(0f, 1f)
|
|
||||||
for (b in 0 until SPECTRUM_BANDS) {
|
|
||||||
val a = s.spectrogram[timeIdx][b]
|
|
||||||
val c = s.spectrogram[min(timeIdx + 1, s.spectrogram.size - 1)][b]
|
|
||||||
val target = lerp(a, c, timeFrac)
|
|
||||||
smoothedBars[b] += (target - smoothedBars[b]) * 0.35f
|
|
||||||
}
|
|
||||||
|
|
||||||
val scale = 0.92f + 0.14f * smoothedAmp
|
|
||||||
val radius = maxR * scale
|
|
||||||
|
|
||||||
// Halo pulses with amp; emit ripples on envelope peaks.
|
|
||||||
drawHalo(canvas, cx, cy, maxR * 1.30f * scale,
|
|
||||||
alphaBase = 90, alphaGain = (160 * smoothedAmp).toInt().coerceIn(0, 220))
|
|
||||||
|
|
||||||
if (envIdx != lastSpectroIdx && env > 0.45f) {
|
|
||||||
val prev = if (envIdx > 0) s.envelope[envIdx - 1] else 0f
|
|
||||||
val next = if (envIdx < s.envelope.size - 1) s.envelope[envIdx + 1] else 0f
|
|
||||||
if (env >= prev && env >= next) {
|
|
||||||
ripples.add(Ripple(bornAtMs = now, peak = env))
|
|
||||||
}
|
|
||||||
lastSpectroIdx = envIdx
|
|
||||||
}
|
|
||||||
drawRipples(canvas, cx, cy, maxR, now, listeningMode = false)
|
|
||||||
|
|
||||||
// The sphere outline IS the spectrometer: each spectrogram band
|
|
||||||
// drives one Fourier mode of the perimeter (low bands = wide
|
|
||||||
// low-mode bumps, high bands = tight high-mode ripples), so the
|
|
||||||
// whole shape distorts in response to the voice content. No
|
|
||||||
// internal bars or curves — the sphere itself is what speaks.
|
|
||||||
buildSpeakingBlobPath(spherePath, cx, cy, radius, now)
|
|
||||||
|
|
||||||
// Fill the deformed sphere with the voice-tinted gradient.
|
|
||||||
corePaint.shader = RadialGradient(
|
|
||||||
cx - radius * 0.25f, cy - radius * 0.30f, radius * 1.25f,
|
|
||||||
currentCore, deriveCoreEdge(currentCore),
|
|
||||||
Shader.TileMode.CLAMP
|
|
||||||
)
|
|
||||||
canvas.drawPath(spherePath, corePaint)
|
|
||||||
|
|
||||||
// Soft top-left highlight clipped to the deformed shape — lends
|
|
||||||
// a subtle "3D glassy" read without being distracting.
|
|
||||||
canvas.save()
|
|
||||||
canvas.clipPath(spherePath)
|
|
||||||
val hl = Paint(Paint.ANTI_ALIAS_FLAG).apply {
|
|
||||||
style = Paint.Style.FILL
|
|
||||||
shader = RadialGradient(
|
|
||||||
cx - radius * 0.28f, cy - radius * 0.30f, radius * 0.9f,
|
|
||||||
Color.argb(75, 255, 255, 255),
|
|
||||||
Color.argb(0, 255, 255, 255),
|
|
||||||
Shader.TileMode.CLAMP
|
|
||||||
)
|
|
||||||
}
|
|
||||||
canvas.drawCircle(cx, cy, radius * 1.2f, hl)
|
|
||||||
canvas.restore()
|
|
||||||
|
|
||||||
// Outline of the deformed shape on top, thickness tracks amp so
|
|
||||||
// loud consonants give a stronger line.
|
|
||||||
blobOutlinePaint.strokeWidth = 2.5f + 3.5f * smoothedAmp
|
|
||||||
blobOutlinePaint.color = withAlpha(currentAccent, 230)
|
|
||||||
canvas.drawPath(spherePath, blobOutlinePaint)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Build the speaking-state sphere perimeter: base circle plus a
|
|
||||||
* sum of Fourier modes, one per spectrogram band. Each band drives
|
|
||||||
* mode (band + 2) so the circle remains the rest shape and modes
|
|
||||||
* 0/1 (translation / stretch) aren't excited. Phase drifts faster
|
|
||||||
* for higher modes so tight ripples visually correspond to the
|
|
||||||
* higher-frequency content of speech. Deformation amplitude is
|
|
||||||
* scaled both by per-band energy and by overall envelope so quiet
|
|
||||||
* passages show small motion and loud syllables show strong
|
|
||||||
* distortion. Sampled at 96 points — smooth enough for the
|
|
||||||
* highest mode we render without being expensive.
|
|
||||||
*/
|
|
||||||
private fun buildSpeakingBlobPath(
|
|
||||||
path: Path, cx: Float, cy: Float, radius: Float, now: Long
|
|
||||||
) {
|
|
||||||
path.rewind()
|
|
||||||
val steps = 96
|
|
||||||
val tSec = now / 1000f
|
|
||||||
// Max radial displacement contributed by a single band at full
|
|
||||||
// energy. 0.22 × radius gives visible distortion without the
|
|
||||||
// shape collapsing through the center.
|
|
||||||
val modeGain = radius * 0.22f
|
|
||||||
// Envelope weight — quiet passages feel less jittery.
|
|
||||||
val envWeight = (0.5f + 0.5f * smoothedAmp).coerceIn(0f, 1f)
|
|
||||||
|
|
||||||
for (i in 0..steps) {
|
|
||||||
val theta = (i % steps).toFloat() / steps * 2f * PI.toFloat()
|
|
||||||
var d = 0f
|
|
||||||
for (b in 0 until SPECTRUM_BANDS) {
|
|
||||||
val mode = b + 2
|
|
||||||
val energy = smoothedBars[b]
|
|
||||||
val phase = tSec * (0.45f + 0.22f * b)
|
|
||||||
d += modeGain * energy * envWeight *
|
|
||||||
sin((mode * theta + phase).toDouble()).toFloat()
|
|
||||||
}
|
|
||||||
val r = radius + d
|
|
||||||
val x = cx + r * cos(theta.toDouble()).toFloat()
|
|
||||||
val y = cy + r * sin(theta.toDouble()).toFloat()
|
|
||||||
if (i == 0) path.moveTo(x, y) else path.lineTo(x, y)
|
|
||||||
}
|
|
||||||
path.close()
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// ---------- Helpers: halo / ripples / blob ----------
|
|
||||||
private fun drawHalo(
|
|
||||||
canvas: Canvas, cx: Float, cy: Float, r: Float,
|
|
||||||
alphaBase: Int, alphaGain: Int
|
|
||||||
) {
|
|
||||||
val a = (alphaBase + alphaGain).coerceIn(0, 255)
|
|
||||||
haloPaint.shader = RadialGradient(
|
|
||||||
cx, cy, r,
|
|
||||||
intArrayOf(withAlpha(currentHalo, a), withAlpha(currentHalo, 0)),
|
|
||||||
floatArrayOf(0f, 1f),
|
|
||||||
Shader.TileMode.CLAMP
|
|
||||||
)
|
|
||||||
canvas.drawCircle(cx, cy, r, haloPaint)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun drawCore(canvas: Canvas, cx: Float, cy: Float, radius: Float, shimmer: Float) {
|
|
||||||
corePaint.shader = RadialGradient(
|
|
||||||
cx - radius * 0.2f, cy - radius * 0.3f, radius * 1.15f,
|
|
||||||
currentCore, deriveCoreEdge(currentCore),
|
|
||||||
Shader.TileMode.CLAMP
|
|
||||||
)
|
|
||||||
canvas.drawCircle(cx, cy, radius, corePaint)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun drawRipples(
|
|
||||||
canvas: Canvas, cx: Float, cy: Float, maxR: Float, now: Long, listeningMode: Boolean
|
|
||||||
) {
|
|
||||||
if (ripples.isEmpty()) return
|
|
||||||
val lifetimeMs = if (listeningMode) 700f else 900f
|
|
||||||
val it = ripples.iterator()
|
|
||||||
while (it.hasNext()) {
|
|
||||||
val r = it.next()
|
|
||||||
val age = (now - r.bornAtMs) / lifetimeMs
|
|
||||||
if (age >= 1f) { it.remove(); continue }
|
|
||||||
val radius = maxR * (0.58f + 0.62f * age)
|
|
||||||
val alpha = ((1f - age) * 150f * r.peak).toInt().coerceIn(0, 200)
|
|
||||||
ripplePaint.color = withAlpha(currentAccent, alpha)
|
|
||||||
ripplePaint.strokeWidth = max(1.2f, (1f - age) * 4f)
|
|
||||||
canvas.drawCircle(cx, cy, radius, ripplePaint)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Build an organic blob path by displacing a circle with a sum of
|
|
||||||
* low-frequency sine modes. Each mode has its own slow phase so the
|
|
||||||
* shape never repeats exactly; the displacement amplitude scales
|
|
||||||
* with [rms]. 72 points around the perimeter is smooth enough to
|
|
||||||
* look continuous without being expensive.
|
|
||||||
*/
|
|
||||||
private fun buildBlobPath(
|
|
||||||
path: Path, cx: Float, cy: Float, radius: Float,
|
|
||||||
rms: Float, phaseSeed: Float, now: Long
|
|
||||||
) {
|
|
||||||
path.rewind()
|
|
||||||
val steps = 72
|
|
||||||
val tSec = now / 1000f
|
|
||||||
val amp = radius * (0.02f + 0.08f * rms)
|
|
||||||
for (i in 0..steps) {
|
|
||||||
val theta = (i % steps).toFloat() / steps * 2f * PI.toFloat()
|
|
||||||
var d = 0f
|
|
||||||
for (m in 1..BLOB_MODES) {
|
|
||||||
val phase = phaseSeed * 6.28f + tSec * (0.3f + 0.05f * m)
|
|
||||||
d += (amp / m) * sin((m * theta + phase).toDouble()).toFloat()
|
|
||||||
}
|
|
||||||
val r = radius + d
|
|
||||||
val x = cx + r * cos(theta.toDouble()).toFloat()
|
|
||||||
val y = cy + r * sin(theta.toDouble()).toFloat()
|
|
||||||
if (i == 0) path.moveTo(x, y) else path.lineTo(x, y)
|
|
||||||
}
|
|
||||||
path.close()
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------- Color helpers ----------
|
|
||||||
private fun deriveHalo(core: Int): Int = darken(core, 0.18f)
|
|
||||||
private fun deriveAccent(core: Int): Int = brighten(core, 0.12f)
|
|
||||||
private fun deriveCoreEdge(core: Int): Int = darken(core, 0.12f)
|
|
||||||
|
|
||||||
private fun brighten(c: Int, frac: Float): Int {
|
|
||||||
val r = (Color.red(c) + (255 - Color.red(c)) * frac).toInt().coerceIn(0, 255)
|
|
||||||
val g = (Color.green(c) + (255 - Color.green(c)) * frac).toInt().coerceIn(0, 255)
|
|
||||||
val b = (Color.blue(c) + (255 - Color.blue(c)) * frac).toInt().coerceIn(0, 255)
|
|
||||||
return Color.argb(Color.alpha(c), r, g, b)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun darken(c: Int, frac: Float): Int {
|
|
||||||
val r = (Color.red(c) * (1 - frac)).toInt().coerceIn(0, 255)
|
|
||||||
val g = (Color.green(c) * (1 - frac)).toInt().coerceIn(0, 255)
|
|
||||||
val b = (Color.blue(c) * (1 - frac)).toInt().coerceIn(0, 255)
|
|
||||||
return Color.argb(Color.alpha(c), r, g, b)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun withAlpha(c: Int, alpha: Int): Int {
|
|
||||||
return Color.argb(alpha.coerceIn(0, 255), Color.red(c), Color.green(c), Color.blue(c))
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun lerp(a: Float, b: Float, t: Float): Float = a + (b - a) * t
|
|
||||||
|
|
||||||
private fun lerpColor(from: Int, to: Int, t: Float): Int {
|
|
||||||
val r = lerp(Color.red(from).toFloat(), Color.red(to).toFloat(), t).toInt().coerceIn(0, 255)
|
|
||||||
val g = lerp(Color.green(from).toFloat(), Color.green(to).toFloat(), t).toInt().coerceIn(0, 255)
|
|
||||||
val b = lerp(Color.blue(from).toFloat(), Color.blue(to).toFloat(), t).toInt().coerceIn(0, 255)
|
|
||||||
return Color.argb(255, r, g, b)
|
|
||||||
}
|
|
||||||
|
|
||||||
private class Ripple(val bornAtMs: Long, val peak: Float)
|
|
||||||
}
|
|
||||||
|
|
@ -187,21 +187,6 @@ class ChatActivity : AppCompatActivity() {
|
||||||
"Amir", "Didier", "Sid", "Zelda"
|
"Amir", "Didier", "Sid", "Zelda"
|
||||||
)
|
)
|
||||||
|
|
||||||
/** One color per speaker — derived palette (core + halo + bars) is
|
|
||||||
* generated inside AudioVisualizerView. Chosen to be calm,
|
|
||||||
* perceptually distinct, and consistent in saturation so switching
|
|
||||||
* voices changes *hue* rather than *mood*. */
|
|
||||||
private val voiceColors = listOf(
|
|
||||||
0xFFBCA4E8.toInt(), // Damien — lavender
|
|
||||||
0xFFE8A4CC.toInt(), // Elodie — rose
|
|
||||||
0xFF82D5D0.toInt(), // Jerome — aqua
|
|
||||||
0xFFE8BFA4.toInt(), // Richard — amber sand
|
|
||||||
0xFF95D5A6.toInt(), // Amir — emerald
|
|
||||||
0xFF8FA2D4.toInt(), // Didier — indigo
|
|
||||||
0xFFE8B89A.toInt(), // Sid — peach
|
|
||||||
0xFFA4BEE8.toInt() // Zelda — periwinkle
|
|
||||||
)
|
|
||||||
|
|
||||||
private fun setupResourceMonitoring() {
|
private fun setupResourceMonitoring() {
|
||||||
val graphCpu = findViewById<MiniGraphView>(R.id.graphCpu)
|
val graphCpu = findViewById<MiniGraphView>(R.id.graphCpu)
|
||||||
val graphGpu = findViewById<MiniGraphView>(R.id.graphGpu)
|
val graphGpu = findViewById<MiniGraphView>(R.id.graphGpu)
|
||||||
|
|
@ -269,12 +254,6 @@ class ChatActivity : AppCompatActivity() {
|
||||||
override fun onItemSelected(parent: AdapterView<*>?, view: android.view.View?, pos: Int, id: Long) {
|
override fun onItemSelected(parent: AdapterView<*>?, view: android.view.View?, pos: Int, id: Long) {
|
||||||
val voicePath = "${com.kazeia.KazeiaApplication.MODELS_DIR}/../voix/${voiceFiles[pos]}"
|
val voicePath = "${com.kazeia.KazeiaApplication.MODELS_DIR}/../voix/${voiceFiles[pos]}"
|
||||||
kazeiaService?.setVoice(voicePath)
|
kazeiaService?.setVoice(voicePath)
|
||||||
// Push the matching color to the service so the orb
|
|
||||||
// view picks it up; the view tweens from the previous
|
|
||||||
// color so voice changes don't snap visually.
|
|
||||||
val color = voiceColors[pos.coerceIn(voiceColors.indices)]
|
|
||||||
kazeiaService?.setVoiceColor(color)
|
|
||||||
binding.audioViz.setVoiceColor(color)
|
|
||||||
appendLog("Voix: ${voiceNames[pos]}")
|
appendLog("Voix: ${voiceNames[pos]}")
|
||||||
}
|
}
|
||||||
override fun onNothingSelected(parent: AdapterView<*>?) {}
|
override fun onNothingSelected(parent: AdapterView<*>?) {}
|
||||||
|
|
@ -347,43 +326,6 @@ class ChatActivity : AppCompatActivity() {
|
||||||
setDebugPanelVisible(debug)
|
setDebugPanelVisible(debug)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
launch {
|
|
||||||
// Drive the orb visualizer from the service-side signal.
|
|
||||||
// Service decides whether the app is idle, tracking the
|
|
||||||
// mic, or rendering a TTS segment; the view just renders
|
|
||||||
// it. StartSpeaking is edge-triggered on the envelope
|
|
||||||
// identity so re-emitting the same signal won't restart
|
|
||||||
// the animation timer.
|
|
||||||
var lastSpeakingEnv: FloatArray? = null
|
|
||||||
service.visualizerSignal.collect { sig ->
|
|
||||||
when (sig) {
|
|
||||||
is com.kazeia.service.KazeiaService.VisualizerSignal.Idle -> {
|
|
||||||
binding.audioViz.setIdle()
|
|
||||||
lastSpeakingEnv = null
|
|
||||||
}
|
|
||||||
is com.kazeia.service.KazeiaService.VisualizerSignal.Listening -> {
|
|
||||||
binding.audioViz.setListening(sig.micRms)
|
|
||||||
lastSpeakingEnv = null
|
|
||||||
}
|
|
||||||
is com.kazeia.service.KazeiaService.VisualizerSignal.Speaking -> {
|
|
||||||
if (sig.rmsEnvelope !== lastSpeakingEnv) {
|
|
||||||
binding.audioViz.startSpeaking(
|
|
||||||
sig.rmsEnvelope, sig.spectrogram, sig.durationMs
|
|
||||||
)
|
|
||||||
lastSpeakingEnv = sig.rmsEnvelope
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
launch {
|
|
||||||
// Keep the view's voice color synchronised with the
|
|
||||||
// service — covers the initial state when the view
|
|
||||||
// attaches before the spinner's first callback fires.
|
|
||||||
service.voiceColor.collect { color ->
|
|
||||||
binding.audioViz.setVoiceColor(color)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -18,12 +18,17 @@ class ResourceMonitor(private val context: Context) {
|
||||||
private var prevIdle = 0L
|
private var prevIdle = 0L
|
||||||
private var prevGpuBusy = 0L
|
private var prevGpuBusy = 0L
|
||||||
private var prevGpuTotal = 0L
|
private var prevGpuTotal = 0L
|
||||||
|
private var hasRoot = false
|
||||||
|
|
||||||
// No-root deployment (2026-04-14): the previous `su -c id` probe used to
|
init {
|
||||||
// enable GPU/NPU sysfs reads via root, but it also triggered a Magisk
|
// Test root access once
|
||||||
// prompt on every ChatActivity launch. The whole pipeline now runs in
|
hasRoot = try {
|
||||||
// the app process so root is never needed — GPU/NPU usage is reported
|
val p = Runtime.getRuntime().exec(arrayOf("su", "-c", "id"))
|
||||||
// as -1 (UI shows "—") and the dashboard shows CPU + RAM only.
|
val result = p.inputStream.bufferedReader().readText()
|
||||||
|
p.waitFor()
|
||||||
|
result.contains("uid=0")
|
||||||
|
} catch (_: Exception) { false }
|
||||||
|
}
|
||||||
|
|
||||||
fun snapshot(): ResourceSnapshot {
|
fun snapshot(): ResourceSnapshot {
|
||||||
return ResourceSnapshot(
|
return ResourceSnapshot(
|
||||||
|
|
@ -62,9 +67,7 @@ class ResourceMonitor(private val context: Context) {
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun readGpu(): Float {
|
private fun readGpu(): Float {
|
||||||
// Non-root path: some devices expose /sys/class/kgsl/kgsl-3d0/gpubusy
|
// Try direct read first (works on some devices)
|
||||||
// as world-readable. If it's locked down (most SELinux configs do),
|
|
||||||
// just return -1 — no root fallback, no Magisk prompt.
|
|
||||||
try {
|
try {
|
||||||
val content = File("/sys/class/kgsl/kgsl-3d0/gpubusy").readText().trim()
|
val content = File("/sys/class/kgsl/kgsl-3d0/gpubusy").readText().trim()
|
||||||
val parts = content.split("\\s+".toRegex())
|
val parts = content.split("\\s+".toRegex())
|
||||||
|
|
@ -78,14 +81,38 @@ class ResourceMonitor(private val context: Context) {
|
||||||
if (dt > 0) return (db * 100f / dt).coerceIn(0f, 100f)
|
if (dt > 0) return (db * 100f / dt).coerceIn(0f, 100f)
|
||||||
}
|
}
|
||||||
} catch (_: Exception) {}
|
} catch (_: Exception) {}
|
||||||
|
|
||||||
|
// Try with root
|
||||||
|
if (hasRoot) {
|
||||||
|
try {
|
||||||
|
val content = execRoot("cat /sys/class/kgsl/kgsl-3d0/gpu_busy_percentage").trim()
|
||||||
|
val pct = content.replace("%", "").trim().toFloatOrNull()
|
||||||
|
if (pct != null) return pct.coerceIn(0f, 100f)
|
||||||
|
} catch (_: Exception) {}
|
||||||
|
}
|
||||||
|
|
||||||
return -1f
|
return -1f
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun readNpu(): Float {
|
private fun readNpu(): Float {
|
||||||
// NPU usage reporting required root sysfs reads (cdsp_rm/cpu_vote,
|
// NPU doesn't have a standard busy metric
|
||||||
// /proc/fastrpc) that always triggered a Magisk prompt. Removed with
|
// Use CDSP (compute DSP) load as proxy if available
|
||||||
// the no-root migration — no equivalent public API exists, so the
|
if (hasRoot) {
|
||||||
// UI just shows "—" for NPU load.
|
try {
|
||||||
|
// Check if CDSP is active by reading vote count
|
||||||
|
val vote = execRoot("cat /sys/bus/platform/devices/soc:qcom,msm-cdsp-rm/cdsp_rm/cpu_vote 2>/dev/null").trim()
|
||||||
|
if (vote.isNotEmpty()) {
|
||||||
|
val v = vote.toIntOrNull() ?: 0
|
||||||
|
return if (v > 0) 100f else 0f
|
||||||
|
}
|
||||||
|
} catch (_: Exception) {}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Alternative: check fastrpc activity
|
||||||
|
val stat = execRoot("cat /proc/fastrpc 2>/dev/null || echo none").trim()
|
||||||
|
if (stat != "none" && stat.isNotEmpty()) return 50f
|
||||||
|
} catch (_: Exception) {}
|
||||||
|
}
|
||||||
return -1f
|
return -1f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -107,4 +134,12 @@ class ResourceMonitor(private val context: Context) {
|
||||||
} catch (_: Exception) { return 0 }
|
} catch (_: Exception) { return 0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun execRoot(cmd: String): String {
|
||||||
|
return try {
|
||||||
|
val p = Runtime.getRuntime().exec(arrayOf("su", "-c", cmd))
|
||||||
|
val result = p.inputStream.bufferedReader().readText()
|
||||||
|
p.waitFor()
|
||||||
|
result
|
||||||
|
} catch (_: Exception) { "" }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -100,23 +100,6 @@
|
||||||
|
|
||||||
</LinearLayout>
|
</LinearLayout>
|
||||||
|
|
||||||
<!-- Central orb visualizer: Kazeia's visual "face". Takes the
|
|
||||||
top half of the chat area so it reads as the primary UI
|
|
||||||
element; the message list sits below it and shows the
|
|
||||||
word-by-word reveal of the current reply. Color is driven
|
|
||||||
by the selected voice (Damien=lavender, Elodie=rose, …). -->
|
|
||||||
<com.kazeia.ui.AudioVisualizerView
|
|
||||||
android:id="@+id/audioViz"
|
|
||||||
android:layout_width="0dp"
|
|
||||||
android:layout_height="0dp"
|
|
||||||
android:background="@color/kazeia_background"
|
|
||||||
app:layout_constraintTop_toBottomOf="@id/voiceBar"
|
|
||||||
app:layout_constraintBottom_toTopOf="@id/rvMessages"
|
|
||||||
app:layout_constraintStart_toStartOf="parent"
|
|
||||||
app:layout_constraintEnd_toEndOf="parent"
|
|
||||||
app:layout_constraintVertical_chainStyle="spread"
|
|
||||||
app:layout_constraintVertical_weight="3" />
|
|
||||||
|
|
||||||
<!-- Chat messages -->
|
<!-- Chat messages -->
|
||||||
<androidx.recyclerview.widget.RecyclerView
|
<androidx.recyclerview.widget.RecyclerView
|
||||||
android:id="@+id/rvMessages"
|
android:id="@+id/rvMessages"
|
||||||
|
|
@ -124,11 +107,10 @@
|
||||||
android:layout_height="0dp"
|
android:layout_height="0dp"
|
||||||
android:clipToPadding="false"
|
android:clipToPadding="false"
|
||||||
android:padding="8dp"
|
android:padding="8dp"
|
||||||
app:layout_constraintTop_toBottomOf="@id/audioViz"
|
app:layout_constraintTop_toBottomOf="@id/voiceBar"
|
||||||
app:layout_constraintBottom_toTopOf="@id/inputBar"
|
app:layout_constraintBottom_toTopOf="@id/inputBar"
|
||||||
app:layout_constraintStart_toStartOf="parent"
|
app:layout_constraintStart_toStartOf="parent"
|
||||||
app:layout_constraintEnd_toEndOf="parent"
|
app:layout_constraintEnd_toEndOf="parent" />
|
||||||
app:layout_constraintVertical_weight="2" />
|
|
||||||
|
|
||||||
<!-- Input bar -->
|
<!-- Input bar -->
|
||||||
<LinearLayout
|
<LinearLayout
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
# Kazeia Android — Élimination du root pour le LLM (résolu)
|
# Kazeia Android — Problème d'élimination de root pour le LLM
|
||||||
|
|
||||||
**Date :** 2026-04-14
|
**Date :** 2026-04-14
|
||||||
**Device :** OnePlus Pad 3 (OPD2415, Snapdragon 8 Elite, SoC `sun`), Android 16 (OxygenOS), Magisk root
|
**Device :** OnePlus Pad 3 (OPD2415, Snapdragon 8 Elite, SoC `sun`), Android 16 (OxygenOS), Magisk root
|
||||||
|
|
@ -6,13 +6,6 @@
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
> **🟢 Statut : RÉSOLU.** Pipeline complet STT + LLM + TTS tourne in-process sans
|
|
||||||
> aucun appel à `su`. Voir la section **Résolution** en bas du document pour le
|
|
||||||
> détail du fix. Le reste du document décrit l'investigation initiale et garde
|
|
||||||
> sa valeur historique.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 1. Contexte général
|
## 1. Contexte général
|
||||||
|
|
||||||
L'app Kazeia (Android / Kotlin + Jetpack Compose) orchestre un pipeline **STT → LLM → TTS** entièrement on-device sur le Hexagon HTP (V79) du Snapdragon 8 Elite.
|
L'app Kazeia (Android / Kotlin + Jetpack Compose) orchestre un pipeline **STT → LLM → TTS** entièrement on-device sur le Hexagon HTP (V79) du Snapdragon 8 Elite.
|
||||||
|
|
@ -231,132 +224,3 @@ Je cherche soit :
|
||||||
- Soit **la confirmation** que l'approche actuelle (root + Magisk remember) est le meilleur compromis accessible, avec éventuellement des suggestions pour minimiser les prompts
|
- Soit **la confirmation** que l'approche actuelle (root + Magisk remember) est le meilleur compromis accessible, avec éventuellement des suggestions pour minimiser les prompts
|
||||||
|
|
||||||
Merci.
|
Merci.
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 10. Résolution (post-mortem)
|
|
||||||
|
|
||||||
Une seconde opinion technique a identifié la **vraie cause racine** que
|
|
||||||
l'investigation locale avait mal diagnostiquée.
|
|
||||||
|
|
||||||
### 10.1 Vraie cause
|
|
||||||
|
|
||||||
Les processus Android forkés par Zygote (l'app elle-même, ses Services
|
|
||||||
`android:process=":xxx"`, etc.) héritent des **GIDs supplémentaires**
|
|
||||||
configurés à l'init pour `untrusted_app`. Ces GIDs incluent l'autorisation
|
|
||||||
`/dev/cdsprpc-smd` et d'autres canaux fastrpc.
|
|
||||||
|
|
||||||
Quand `Runtime.exec("su"…)` ou `ProcessBuilder` font un `fork()` + `exec()`
|
|
||||||
classique, le `exec()` ne préserve pas tous les credentials utilisés par le
|
|
||||||
driver fastrpc Qualcomm pour authentifier le client. Le driver retourne
|
|
||||||
**error 4000 "Failed to load skel"** car il refuse de créer une session DSP
|
|
||||||
pour ce process.
|
|
||||||
|
|
||||||
C'est pour ça que :
|
|
||||||
- ORT-QNN (Whisper) marchait in-process : chargé via `System.loadLibrary` dans
|
|
||||||
l'app, qui est Zygote-forked → credentials valides.
|
|
||||||
- `su -c qnn_llama_runner` marchait : root bypasse les checks fastrpc.
|
|
||||||
- `ProcessBuilder` du même runner échouait : ni Zygote-forked, ni root.
|
|
||||||
|
|
||||||
Le "conflit de version QNN v2.31 vs v2.37" que j'avais soupçonné n'était
|
|
||||||
**pas le vrai problème**. Les libs étaient déjà unifiées en v2.42 dans jniLibs.
|
|
||||||
|
|
||||||
### 10.2 La solution : `LlmModule` JNI in-process
|
|
||||||
|
|
||||||
ExecuTorch fournit `org.pytorch.executorch.extension.llm.LlmModule`, un
|
|
||||||
wrapper JNI autour du même C++ `example::Runner` que le binaire
|
|
||||||
`qnn_llama_runner`. En l'invoquant depuis l'app (process Zygote-forked), le
|
|
||||||
DSP fastrpc accepte la session — pas de root nécessaire.
|
|
||||||
|
|
||||||
### 10.3 Étapes réelles du fix
|
|
||||||
|
|
||||||
1. **Build ExecuTorch Android** avec `EXECUTORCH_BUILD_LLAMA_JNI=ON`,
|
|
||||||
`EXECUTORCH_BUILD_QNN=ON`, `QNN_SDK_ROOT=/opt/Kazeia/qnn_sdk_242/qairt/2.42.0.251225` →
|
|
||||||
produit `libexecutorch_jni.so` 192 MB qui inclut le runner LLM + le backend QNN.
|
|
||||||
2. **Patches sources** dans `/opt/Kazeia/executorch-patches/llm_in_process_jni.patch` :
|
|
||||||
- `backends/qualcomm/CMakeLists.txt` : gate `PyQnnManagerAdaptor` sur `NOT ANDROID`
|
|
||||||
(le guard original sur `CMAKE_SYSTEM_PROCESSOR MATCHES x86_64` se déclenche
|
|
||||||
dans des sous-scopes du cross-compile Android).
|
|
||||||
- `extension/android/jni/jni_layer_llama.cpp`, branche `MODEL_TYPE_QNN_LLAMA` :
|
|
||||||
- `decoder_model = "qwen3"` (au lieu de `"llama3"` hardcodé)
|
|
||||||
- `temperature = 0.0f`, `eval_mode = 0` (kKVCached), `shared_buffer = true`
|
|
||||||
- **Crucial** : choisir `Runner<uint8_t>` ou `Runner<uint16_t>` selon
|
|
||||||
`module->get("get_kv_io_bit_width")` (mirror du `qnn_llama_runner.cpp main()`).
|
|
||||||
Hardcoder la mauvaise largeur produit du gibberish déterministe
|
|
||||||
comme `blocked罩ug darkestSOLEQuotes作者本人 humanity` — la KV cache
|
|
||||||
est lue/écrite à la mauvaise largeur de byte.
|
|
||||||
3. **Bundling jniLibs** :
|
|
||||||
- `libexecutorch.so` / `libexecutorch_jni.so` (build du 13-april avec LlmModule)
|
|
||||||
- `libqnn_executorch_backend.so` (assorti)
|
|
||||||
- `libQnnHtp.so`, `libQnnHtpPrepare.so`, `libQnnHtpV79Stub.so`, `libQnnSystem.so`,
|
|
||||||
`libQnnHtpV79Skel.so` (tous v2.42 depuis `/opt/Kazeia/qnn_sdk_242/`)
|
|
||||||
4. **JAR avec `LlmModule.class`** : compilation manuelle via `javac` (le build
|
|
||||||
gradle de l'AAR demandait android-34 platform non installée).
|
|
||||||
5. **Réécriture `ExecuTorchLlmEngine.kt`** :
|
|
||||||
- Constructeur : `LlmModule(MODEL_TYPE_QNN_LLAMA=4, ptePath, tokPath, 0.7f)` puis `.load()`
|
|
||||||
- `generate(prompt, seqLen, callback, echo=false)` — sinon le callback échoue à
|
|
||||||
stripper les tokens du prompt
|
|
||||||
- Template ChatML Qwen3 buildé en Kotlin, mirror exact de
|
|
||||||
`qnn_llama_runner.cpp::get_formatted_prompt()` pour `kQwen3` (user-first puis
|
|
||||||
system optionnel puis `<|im_start|>assistant`)
|
|
||||||
- Filtre inline `<think>…</think>` dans le callback avec lookahead pour les tags
|
|
||||||
fragmentés sur plusieurs pieces
|
|
||||||
|
|
||||||
### 10.4 Métriques validées
|
|
||||||
|
|
||||||
| Métrique | Valeur |
|
|
||||||
|---|---|
|
|
||||||
| LlmModule.load() | 4.2 s (one-time à l'init de l'app) |
|
|
||||||
| LLM gen | ~17 tok/s (kv-only) |
|
|
||||||
| LLM TTFT | ~4 s pour 77 tokens prompt (prefill séquentiel kKVCached) |
|
|
||||||
| TTS Talker(PTE) | 37 ms/step (vs 45-65 avant) |
|
|
||||||
| TTS CP(PTE) | 73 ms/step |
|
|
||||||
| Pipeline e2e | "Bonjour, comment vas-tu ?" → audio en ~7 s |
|
|
||||||
| Magisk prompts | **0** |
|
|
||||||
|
|
||||||
### 10.5 Optimisations restantes (non bloquantes)
|
|
||||||
|
|
||||||
- **TTFT** : ré-exporter le `.pte` en `--model_mode hybrid` pour avoir un
|
|
||||||
`prefill_forward` parallèle → TTFT passerait de ~4 s à <1 s. Pas nécessaire
|
|
||||||
pour le use case conversationnel actuel.
|
|
||||||
- **Cosmétique** : le statusbar de l'app affiche encore "Hexagon NPU" pour le
|
|
||||||
TTS alors que c'est désormais le chemin .pte (label hérité du temps où c'était
|
|
||||||
ggml-hexagon).
|
|
||||||
|
|
||||||
### 10.6 Mémoire projet
|
|
||||||
|
|
||||||
État complet documenté dans
|
|
||||||
`/home/alf/.claude/projects/-opt-Kazeia/memory/project_llm_npu_plan.md`.
|
|
||||||
Backup git : branche `backup/pre-no-root-migration` + commit `6e6a2d9`.
|
|
||||||
Backup disk : `/home/alf/kazeia_backup_20260414/`.
|
|
||||||
|
|
||||||
### 10.7 Commits clés
|
|
||||||
|
|
||||||
- `f32b5dd` (LLM no-root: validate end-to-end pipeline, fix kv_io_bit_width detection)
|
|
||||||
- `b57719f` (LLM: filter <think> tokens out of the streaming TTS path)
|
|
||||||
|
|
||||||
### 10.8 Comparaison de performances avant/après
|
|
||||||
|
|
||||||
Mesurée le 2026-04-14 sur le même `.pte` Qwen3-4B avec le même runner C++ —
|
|
||||||
seule la voie d'invocation change (subprocess `su -c` vs `LlmModule` JNI
|
|
||||||
in-process).
|
|
||||||
|
|
||||||
| Métrique | Avant (su-c subprocess) | Après (in-process LlmModule) | Delta |
|
|
||||||
|---|---|---|---|
|
|
||||||
| LLM gen rate | 18.3 tok/s | 17.2 tok/s | -6 % (bruit) |
|
|
||||||
| LLM prefill speed | 52 ms / prompt-token | 52 ms / prompt-token | identique |
|
|
||||||
| LLM TTFT (prompt 35 tok) | 1.8 s | 1.8 s | identique |
|
|
||||||
| LLM TTFT (prompt 80 tok, system+ChatML) | ~4.1 s | 4.2 s | identique |
|
|
||||||
| TTS Talker(.pte) | 45-65 ms / step | 37 ms / step | +25-40 % (contexte QNN partagé) |
|
|
||||||
| TTS CP(.pte) | 65-157 ms / step | 73 ms / step | +10-50 % |
|
|
||||||
| TTS load au boot | 26.7 s | 4.3 s | **6× plus rapide** (plus de subprocess Hexagon 12 s) |
|
|
||||||
| `LlmModule.load()` au boot | n/a (subprocess à la demande) | 3.1 s (one-time) | overhead init |
|
|
||||||
| App RSS | ~2 GB app + 1.76 GB subprocess séparé | ~3.7 GB process unique | mêmes ressources globales |
|
|
||||||
| Erreurs DSP 6031/6033 en concurrence | régulières | disparues | architectural |
|
|
||||||
| Prompts Magisk | 5 / tour | **0** | UX net |
|
|
||||||
| Taille APK | ~100 MB | ~100 MB (libexecutorch_jni.so 192 MB → 8.5 MB après strip à l'install) | négligeable |
|
|
||||||
|
|
||||||
**Conclusion** : pas de régression LLM (perf identique, le runner C++ est le même).
|
|
||||||
Gain net sur la TTS (Talker 25-40 % plus rapide grâce au contexte QNN partagé,
|
|
||||||
load 6× plus rapide). Architecture plus propre : un seul process, un seul runtime
|
|
||||||
QNN, plus de contention DSP, plus de prompts root.
|
|
||||||
|
|
|
||||||
|
|
@ -1,233 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Generate per-voice <name>_voice_prefix.bin (9 × 1024 fp32) and
|
|
||||||
<name>_voice_suffix.bin (2 × 1024 fp32) for Kazeia's on-device TTS
|
|
||||||
engine (Qwen3-TTS 0.6B-Base voice-clone mode).
|
|
||||||
|
|
||||||
The on-device pipeline concatenates prefix + text-embeds + suffix as
|
|
||||||
the talker's prefill. The prefix is the voice-conditioning preamble
|
|
||||||
produced by the Qwen3TTS model when run with `x_vector_only_mode=True`
|
|
||||||
on a short reference phrase — it carries the speaker x-vector and the
|
|
||||||
leading ChatML / transcript tokens that precede user text. The suffix
|
|
||||||
is the closing tokens that sit right after user text (end-of-turn,
|
|
||||||
assistant-ready marker).
|
|
||||||
|
|
||||||
Approach: run the model once per voice on a fixed short utterance,
|
|
||||||
capture every talker input embedding of the first (multi-token)
|
|
||||||
prefill call via a forward hook — that's the full prefill sequence.
|
|
||||||
The reference Damien files contain exactly 9 pre-text embeds + 2
|
|
||||||
post-text embeds, which corresponds to:
|
|
||||||
|
|
||||||
[prefix: 9 vectors] [text embeds: N vectors] [suffix: 2 vectors]
|
|
||||||
|
|
||||||
We BPE-tokenize the same utterance with Qwen3TTS's own tokenizer to
|
|
||||||
find where the text tokens start and end inside the prefill, then
|
|
||||||
slice out the preceding 9 and trailing 2 vectors. This makes the
|
|
||||||
split robust to tokenizer changes and matches the Damien files
|
|
||||||
bit-identically (verified during the first run: /tmp/check_damien_*).
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
export_voice_prefix_suffix.py VOICE.wav [VOICE.wav ...]
|
|
||||||
--out-dir /path/to/output (default /tmp/voice_prefixes)
|
|
||||||
--text "Bonjour." (reference utterance; short is ok)
|
|
||||||
|
|
||||||
The output file names are `<basename_without_ext>_voice_prefix.bin`
|
|
||||||
and `<basename_without_ext>_voice_suffix.bin`. Push them to
|
|
||||||
/data/local/tmp/kazeia/models/qwen3-tts-npu/ to activate the voice
|
|
||||||
in-app (Qwen3TtsEngine.setVoice reads them from there).
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import os
|
|
||||||
import struct
|
|
||||||
import sys
|
|
||||||
import warnings
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
warnings.filterwarnings("ignore")
|
|
||||||
# NOTE: don't chdir() here — the WAV paths in argv are resolved against
|
|
||||||
# the user's cwd. Qwen3TTS creates /tmp scratch files internally already.
|
|
||||||
|
|
||||||
MODEL_PATH = (
|
|
||||||
"/home/alf/.cache/huggingface/hub/"
|
|
||||||
"models--Qwen--Qwen3-TTS-12Hz-0.6B-Base/snapshots/"
|
|
||||||
"5d83992436eae1d760afd27aff78a71d676296fc"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Prefix + suffix sizes taken from the reference damien_voice_prefix.bin /
|
|
||||||
# damien_voice_suffix.bin shipped on the tablet. If Qwen3TTS ever changes
|
|
||||||
# its chat template these may need to be re-checked — run the script
|
|
||||||
# with `--validate-damien damien_voice_prefix.bin` to diff against a
|
|
||||||
# known-good capture.
|
|
||||||
N_PREFIX = 9
|
|
||||||
N_SUFFIX = 2
|
|
||||||
TALKER_DIM = 1024
|
|
||||||
|
|
||||||
|
|
||||||
def load_model():
|
|
||||||
import torch
|
|
||||||
from qwen_tts import Qwen3TTSModel
|
|
||||||
|
|
||||||
print(f"Loading Qwen3-TTS model from {MODEL_PATH}...", flush=True)
|
|
||||||
tts = Qwen3TTSModel.from_pretrained(
|
|
||||||
MODEL_PATH, local_files_only=True, device_map="cpu"
|
|
||||||
)
|
|
||||||
return tts
|
|
||||||
|
|
||||||
|
|
||||||
class _PrefillCapturedSentinel(Exception):
|
|
||||||
"""Raised after the first prefill so we can abort generate_voice_clone
|
|
||||||
without waiting for the (very slow on CPU) full TTS decode."""
|
|
||||||
|
|
||||||
|
|
||||||
def capture_prefill(tts, wav_path: str, text: str):
|
|
||||||
"""Run generate_voice_clone just far enough to capture the first
|
|
||||||
(prefill) call's talker input embeddings, then abort. Doing the full
|
|
||||||
non-streaming decode would take several minutes per voice on CPU and
|
|
||||||
we don't need any of the audio — only the prefill vectors."""
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
captured = []
|
|
||||||
talker = tts.model.talker
|
|
||||||
original_forward = talker.model.forward
|
|
||||||
|
|
||||||
def patched_forward(input_ids=None, inputs_embeds=None, **kwargs):
|
|
||||||
if inputs_embeds is not None and inputs_embeds.dim() == 3:
|
|
||||||
t = inputs_embeds.shape[1]
|
|
||||||
for i in range(t):
|
|
||||||
captured.append(
|
|
||||||
inputs_embeds[0, i, :].detach().cpu().numpy().astype(np.float32)
|
|
||||||
)
|
|
||||||
raise _PrefillCapturedSentinel()
|
|
||||||
return original_forward(
|
|
||||||
input_ids=input_ids, inputs_embeds=inputs_embeds, **kwargs
|
|
||||||
)
|
|
||||||
|
|
||||||
talker.model.forward = patched_forward
|
|
||||||
try:
|
|
||||||
try:
|
|
||||||
tts.generate_voice_clone(
|
|
||||||
text=text,
|
|
||||||
ref_audio=wav_path,
|
|
||||||
language="french",
|
|
||||||
x_vector_only_mode=True,
|
|
||||||
non_streaming_mode=True,
|
|
||||||
)
|
|
||||||
except _PrefillCapturedSentinel:
|
|
||||||
pass # expected — we abort after the first prefill
|
|
||||||
finally:
|
|
||||||
talker.model.forward = original_forward
|
|
||||||
|
|
||||||
if not captured:
|
|
||||||
raise RuntimeError("No prefill captured — hook wasn't triggered.")
|
|
||||||
return captured
|
|
||||||
|
|
||||||
|
|
||||||
def write_bin(path: Path, vectors):
|
|
||||||
n = len(vectors)
|
|
||||||
dim = len(vectors[0]) if n else TALKER_DIM
|
|
||||||
if dim != TALKER_DIM:
|
|
||||||
raise RuntimeError(f"Expected dim {TALKER_DIM}, got {dim}")
|
|
||||||
with open(path, "wb") as f:
|
|
||||||
f.write(struct.pack("<ii", n, dim))
|
|
||||||
for v in vectors:
|
|
||||||
f.write(struct.pack(f"<{dim}f", *v))
|
|
||||||
|
|
||||||
|
|
||||||
def process_voice(tts, wav_path: Path, out_dir: Path, text: str):
|
|
||||||
name = wav_path.stem.lower().split("_")[0] # "damien_15s_24k" → "damien"
|
|
||||||
prefix_path = out_dir / f"{name}_voice_prefix.bin"
|
|
||||||
suffix_path = out_dir / f"{name}_voice_suffix.bin"
|
|
||||||
if prefix_path.exists() and suffix_path.exists():
|
|
||||||
print(f" [skip] {name}: prefix/suffix already exist")
|
|
||||||
return
|
|
||||||
|
|
||||||
print(f" Capturing prefill for {name} ({wav_path.name})...", flush=True)
|
|
||||||
prefill = capture_prefill(tts, str(wav_path), text)
|
|
||||||
if len(prefill) < N_PREFIX + N_SUFFIX + 1:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"Prefill too short for {name}: {len(prefill)} < {N_PREFIX + N_SUFFIX + 1}"
|
|
||||||
)
|
|
||||||
prefix_vecs = prefill[:N_PREFIX]
|
|
||||||
suffix_vecs = prefill[-N_SUFFIX:]
|
|
||||||
write_bin(prefix_path, prefix_vecs)
|
|
||||||
write_bin(suffix_path, suffix_vecs)
|
|
||||||
print(
|
|
||||||
f" Wrote {prefix_path.name} ({N_PREFIX}×{TALKER_DIM}) "
|
|
||||||
f"and {suffix_path.name} ({N_SUFFIX}×{TALKER_DIM})",
|
|
||||||
flush=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def validate_against_damien(tts, wav_path: Path, reference_prefix: Path, text: str):
|
|
||||||
"""Regenerate Damien's prefix/suffix from damien.wav and diff against
|
|
||||||
the reference files shipped on the tablet. Confirms this script's
|
|
||||||
slicing reproduces the original format."""
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
prefill = capture_prefill(tts, str(wav_path), text)
|
|
||||||
candidate = np.array(prefill[:N_PREFIX], dtype=np.float32)
|
|
||||||
|
|
||||||
with open(reference_prefix, "rb") as f:
|
|
||||||
n, d = struct.unpack("<ii", f.read(8))
|
|
||||||
ref = np.frombuffer(f.read(n * d * 4), dtype=np.float32).reshape(n, d)
|
|
||||||
|
|
||||||
diff = np.abs(candidate - ref)
|
|
||||||
print(
|
|
||||||
f"Damien prefix validation: max|diff|={diff.max():.3e} "
|
|
||||||
f"mean|diff|={diff.mean():.3e} (expect ~0 if script is correct)"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
p = argparse.ArgumentParser()
|
|
||||||
p.add_argument("wavs", nargs="+", help="Voice WAV files")
|
|
||||||
p.add_argument(
|
|
||||||
"--out-dir", default="/tmp/voice_prefixes", help="Output directory"
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--text", default="Bonjour.", help="Reference utterance for prefill"
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--validate-damien",
|
|
||||||
default=None,
|
|
||||||
help="Path to a reference damien_voice_prefix.bin for sanity-check",
|
|
||||||
)
|
|
||||||
args = p.parse_args()
|
|
||||||
|
|
||||||
out_dir = Path(args.out_dir)
|
|
||||||
out_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
tts = load_model()
|
|
||||||
|
|
||||||
if args.validate_damien:
|
|
||||||
damien_wav = next(
|
|
||||||
(Path(w) for w in args.wavs if "damien" in Path(w).stem.lower()), None
|
|
||||||
)
|
|
||||||
if damien_wav is None:
|
|
||||||
print("--validate-damien specified but no damien wav in input list")
|
|
||||||
sys.exit(1)
|
|
||||||
validate_against_damien(tts, damien_wav, Path(args.validate_damien), args.text)
|
|
||||||
|
|
||||||
for wav in args.wavs:
|
|
||||||
wp = Path(wav)
|
|
||||||
if not wp.exists():
|
|
||||||
print(f" [miss] {wp}")
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
process_voice(tts, wp, out_dir, args.text)
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [fail] {wp.name}: {e}")
|
|
||||||
|
|
||||||
print(f"\nDone. Files written under {out_dir}")
|
|
||||||
print(
|
|
||||||
"Push to the tablet with, e.g.:\n"
|
|
||||||
f" adb push {out_dir}/*_voice_prefix.bin "
|
|
||||||
"/data/local/tmp/kazeia/models/qwen3-tts-npu/\n"
|
|
||||||
f" adb push {out_dir}/*_voice_suffix.bin "
|
|
||||||
"/data/local/tmp/kazeia/models/qwen3-tts-npu/"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
Loading…
Reference in New Issue