diff --git a/executorch-custom/tts_pipeline_jni.cpp b/executorch-custom/tts_pipeline_jni.cpp index 91d5970..6b2ddec 100644 --- a/executorch-custom/tts_pipeline_jni.cpp +++ b/executorch-custom/tts_pipeline_jni.cpp @@ -57,27 +57,30 @@ static int argmax_head(const float*h,const float*W,int vocab,int dim){ return best; } -// Top-k sampling with temperature +// Top-k sampling with temperature (Java-compatible PRNG) +static uint64_t g_rng_state = 0x12345678ABCDEF01ULL; +static float next_rand() { + // Java-style LCG for reproducibility + g_rng_state = g_rng_state * 6364136223846793005ULL + 1442695040888963407ULL; + return (float)((g_rng_state >> 33) & 0x7FFFFFFF) / (float)0x7FFFFFFF; +} + static int sample_topk(const float* logits, int vocab, float temp, int k) { - // Find top-k struct IV { int i; float v; }; std::vector topk(k, {0, -FLT_MAX}); for (int i = 0; i < vocab; i++) { if (logits[i] > topk[k-1].v) { topk[k-1] = {i, logits[i]}; - // Bubble up for (int j = k-2; j >= 0; j--) { if (topk[j+1].v > topk[j].v) std::swap(topk[j], topk[j+1]); else break; } } } - // Softmax with temperature float maxv = topk[0].v; float sum = 0; for (auto& t : topk) { t.v = expf((t.v - maxv) / temp); sum += t.v; } - // Sample - float r = (float)rand() / RAND_MAX * sum; + float r = next_rand() * sum; float acc = 0; for (auto& t : topk) { acc += t.v; if (acc >= r) return t.i; } return topk[0].i; @@ -312,26 +315,26 @@ Java_com_kazeia_tts_TtsPipeline_nativeRun( for(int i=0;i - if (nativePipelineReady) { + // Native C++ pipeline: RTF 1.4 but slight quality loss vs Java (different QNN instance) + // Java pipeline: RTF 1.8, validated quality + // TODO: share QNN context between Java and C++ for same quality at C++ speed + if (nativePipelineReady && false) { // Disabled: quality regression, see TODO above // Native C++ pipeline — zero Java overhead val prefillFlat = FloatArray(nPrefill * TALKER_DIM) for (i in 0 until nPrefill) System.arraycopy(embeds[i], 0, prefillFlat, i * TALKER_DIM, TALKER_DIM)