Fix: use pre-computed embeds as-is (no double codec_sum)
Pre-computed embeds from Python already contain codec_sum+text. Using them as-is works correctly. After exhausted, fallback to our codec_sum + pad. Long text: 191 tokens, 15.28s audio, RTF 1.27 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f6df1738c5
commit
24157c0a68
|
|
@ -839,23 +839,19 @@ ExecuTorchJni::runTtsPipelineImpl(
|
|||
for(int i=0;i<NUM_CB;i++) allCodes.push_back(codes[i]);
|
||||
cb0Hist.push_back(curCb0);
|
||||
|
||||
// Next embed: codec_sum + (trailing text / eos / pad)
|
||||
// Next embed: pre-computed from Python (complete: codec_sum+text)
|
||||
// After exhausted: codec_sum(our codes) + pad
|
||||
float nextEmb[DIM]={};
|
||||
// Always add codec embeddings from our codes
|
||||
const float*e0=codecEmb.data()+std::min(std::max(codes[0],0),VOCAB-1)*DIM;
|
||||
for(int k=0;k<DIM;k++) nextEmb[k]+=e0[k];
|
||||
for(int cb=0;cb<15;cb++){
|
||||
const float*ec=cpEmbs.data()+((long)cb*CB_SIZE+std::min(std::max(codes[cb+1],0),CB_SIZE-1))*DIM;
|
||||
for(int k=0;k<DIM;k++) nextEmb[k]+=ec[k];
|
||||
}
|
||||
// Add trailing text embed, then eos once, then pad
|
||||
if(trIdx<nTrailing){
|
||||
const float*te=trailing.data()+trIdx*DIM;
|
||||
for(int k=0;k<DIM;k++) nextEmb[k]+=te[k];
|
||||
memcpy(nextEmb,trailing.data()+trIdx*DIM,DIM*4);
|
||||
trIdx++;
|
||||
} else if(trIdx==nTrailing){
|
||||
for(int k=0;k<DIM;k++) nextEmb[k]+=eosEmb[k]; trIdx++;
|
||||
} else {
|
||||
const float*e0=codecEmb.data()+std::min(std::max(codes[0],0),VOCAB-1)*DIM;
|
||||
for(int k=0;k<DIM;k++) nextEmb[k]+=e0[k];
|
||||
for(int cb=0;cb<15;cb++){
|
||||
const float*ec=cpEmbs.data()+((long)cb*CB_SIZE+std::min(std::max(codes[cb+1],0),CB_SIZE-1))*DIM;
|
||||
for(int k=0;k<DIM;k++) nextEmb[k]+=ec[k];
|
||||
}
|
||||
for(int k=0;k<DIM;k++) nextEmb[k]+=padEmb[k];
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue