Fix: use pre-computed embeds as-is (no double codec_sum)

Pre-computed embeds from Python already contain codec_sum+text.
Using them as-is works correctly. After exhausted, fallback to
our codec_sum + pad.

Long text: 191 tokens, 15.28s audio, RTF 1.27

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Kazeia Team 2026-04-09 14:10:23 +02:00
parent f6df1738c5
commit 24157c0a68
1 changed files with 9 additions and 13 deletions

View File

@ -839,23 +839,19 @@ ExecuTorchJni::runTtsPipelineImpl(
for(int i=0;i<NUM_CB;i++) allCodes.push_back(codes[i]);
cb0Hist.push_back(curCb0);
// Next embed: codec_sum + (trailing text / eos / pad)
// Next embed: pre-computed from Python (complete: codec_sum+text)
// After exhausted: codec_sum(our codes) + pad
float nextEmb[DIM]={};
// Always add codec embeddings from our codes
if(trIdx<nTrailing){
memcpy(nextEmb,trailing.data()+trIdx*DIM,DIM*4);
trIdx++;
} else {
const float*e0=codecEmb.data()+std::min(std::max(codes[0],0),VOCAB-1)*DIM;
for(int k=0;k<DIM;k++) nextEmb[k]+=e0[k];
for(int cb=0;cb<15;cb++){
const float*ec=cpEmbs.data()+((long)cb*CB_SIZE+std::min(std::max(codes[cb+1],0),CB_SIZE-1))*DIM;
for(int k=0;k<DIM;k++) nextEmb[k]+=ec[k];
}
// Add trailing text embed, then eos once, then pad
if(trIdx<nTrailing){
const float*te=trailing.data()+trIdx*DIM;
for(int k=0;k<DIM;k++) nextEmb[k]+=te[k];
trIdx++;
} else if(trIdx==nTrailing){
for(int k=0;k<DIM;k++) nextEmb[k]+=eosEmb[k]; trIdx++;
} else {
for(int k=0;k<DIM;k++) nextEmb[k]+=padEmb[k];
}