From 8e536094df137ee0f22ea0fa3d5de510f58eaae3 Mon Sep 17 00:00:00 2001 From: Kazeia Team Date: Thu, 9 Apr 2026 10:53:19 +0200 Subject: [PATCH] Fix C++ pipeline eos/pad + disable for quality (keep Java default) - Fixed trailing embed handling (use pre-computed as-is) - Added eos/pad embed params to nativeRun - Improved C++ PRNG for sampling - Disabled native pipeline: slight quality regression vs Java (two separate QNN instances give different numerical results) - Java pipeline (RTF 1.8) kept as default for validated quality Co-Authored-By: Claude Opus 4.6 (1M context) --- executorch-custom/tts_pipeline_jni.cpp | 47 ++++++++++--------- .../java/com/kazeia/tts/Qwen3TtsEngine.kt | 5 +- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/executorch-custom/tts_pipeline_jni.cpp b/executorch-custom/tts_pipeline_jni.cpp index 91d5970..6b2ddec 100644 --- a/executorch-custom/tts_pipeline_jni.cpp +++ b/executorch-custom/tts_pipeline_jni.cpp @@ -57,27 +57,30 @@ static int argmax_head(const float*h,const float*W,int vocab,int dim){ return best; } -// Top-k sampling with temperature +// Top-k sampling with temperature (Java-compatible PRNG) +static uint64_t g_rng_state = 0x12345678ABCDEF01ULL; +static float next_rand() { + // Java-style LCG for reproducibility + g_rng_state = g_rng_state * 6364136223846793005ULL + 1442695040888963407ULL; + return (float)((g_rng_state >> 33) & 0x7FFFFFFF) / (float)0x7FFFFFFF; +} + static int sample_topk(const float* logits, int vocab, float temp, int k) { - // Find top-k struct IV { int i; float v; }; std::vector topk(k, {0, -FLT_MAX}); for (int i = 0; i < vocab; i++) { if (logits[i] > topk[k-1].v) { topk[k-1] = {i, logits[i]}; - // Bubble up for (int j = k-2; j >= 0; j--) { if (topk[j+1].v > topk[j].v) std::swap(topk[j], topk[j+1]); else break; } } } - // Softmax with temperature float maxv = topk[0].v; float sum = 0; for (auto& t : topk) { t.v = expf((t.v - maxv) / temp); sum += t.v; } - // Sample - float r = (float)rand() / RAND_MAX * sum; + float r = next_rand() * sum; float acc = 0; for (auto& t : topk) { acc += t.v; if (acc >= r) return t.i; } return topk[0].i; @@ -312,26 +315,26 @@ Java_com_kazeia_tts_TtsPipeline_nativeRun( for(int i=0;i - if (nativePipelineReady) { + // Native C++ pipeline: RTF 1.4 but slight quality loss vs Java (different QNN instance) + // Java pipeline: RTF 1.8, validated quality + // TODO: share QNN context between Java and C++ for same quality at C++ speed + if (nativePipelineReady && false) { // Disabled: quality regression, see TODO above // Native C++ pipeline — zero Java overhead val prefillFlat = FloatArray(nPrefill * TALKER_DIM) for (i in 0 until nPrefill) System.arraycopy(embeds[i], 0, prefillFlat, i * TALKER_DIM, TALKER_DIM)