Fix KV buffer + revert HTP decoder (BigVGAN too complex for HTP)
- Restored intermediate KV buffer for talker (direct output→input caused trembling from buffer overwrite during execute()) - BigVGAN HTP compilation takes >5min, not viable - RTF 1.35 with clean audio quality Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
985fd9cff9
commit
4dcc4bb8b3
|
|
@ -712,7 +712,7 @@ ExecuTorchJni::runTtsPipelineImpl(
|
|||
auto eosEmb=copyArr(jEos),padEmb=copyArr(jPad);
|
||||
|
||||
int tkvElem=T_KV*T_KV_LEN*T_HD;
|
||||
// tK/tV not needed — KV copied directly from output to input each step
|
||||
std::vector<float> tKV(T_L*2*tkvElem,0); // intermediate KV buffer (avoids output overwrite race)
|
||||
float mask[T_KV_LEN]; for(int i=0;i<T_KV_LEN;i++) mask[i]=-1e9f;
|
||||
float hidden[DIM]={},logits[VOCAB]={};
|
||||
std::vector<int> allCodes,cb0Hist;
|
||||
|
|
@ -749,20 +749,19 @@ ExecuTorchJni::runTtsPipelineImpl(
|
|||
memcpy(tInMask, mask, T_KV_LEN*4);
|
||||
memcpy(tInCos, tCos.data()+pi*T_HD, T_HD*4);
|
||||
memcpy(tInSin, tSin.data()+pi*T_HD, T_HD*4);
|
||||
// KV: copy directly from PREVIOUS output to input (skip intermediate buffer)
|
||||
if(pos > 0) {
|
||||
for(int i=0;i<T_L;i++){
|
||||
memcpy(tInKV[i*2], tMethod->get_output(2+i*2).toTensor().const_data_ptr<float>(), tkvElem*4);
|
||||
memcpy(tInKV[i*2+1], tMethod->get_output(3+i*2).toTensor().const_data_ptr<float>(), tkvElem*4);
|
||||
}
|
||||
for(int i=0;i<T_L;i++){
|
||||
memcpy(tInKV[i*2], tKV.data()+(i*2)*tkvElem, tkvElem*4);
|
||||
memcpy(tInKV[i*2+1], tKV.data()+(i*2+1)*tkvElem, tkvElem*4);
|
||||
}
|
||||
// (pos==0: first call, input KV is already zeros from prepare_input_tensors)
|
||||
|
||||
auto status = tMethod->execute();
|
||||
if(status!=Error::Ok){ET_LOG(Error,"Talker exec fail: %d",(int)status);return;}
|
||||
memcpy(hidden, tMethod->get_output(0).toTensor().const_data_ptr<float>(), DIM*4);
|
||||
memcpy(logits, tMethod->get_output(1).toTensor().const_data_ptr<float>(), VOCAB*4);
|
||||
// KV NOT copied to tK/tV — read from output directly next step
|
||||
for(int i=0;i<T_L;i++){
|
||||
memcpy(tKV.data()+(i*2)*tkvElem, tMethod->get_output(2+i*2).toTensor().const_data_ptr<float>(), tkvElem*4);
|
||||
memcpy(tKV.data()+(i*2+1)*tkvElem, tMethod->get_output(3+i*2).toTensor().const_data_ptr<float>(), tkvElem*4);
|
||||
}
|
||||
pos++;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -209,7 +209,7 @@ class Qwen3TtsEngine(
|
|||
return session
|
||||
}
|
||||
|
||||
// Speech decoder V2: CPU ONNX (GPU tested: no gain, +300ms overhead)
|
||||
// Speech decoder V2 on CPU (HTP tested: BigVGAN convolutions too slow to compile)
|
||||
val v2Path = "$path/v2_pre_conv"
|
||||
if (File("$v2Path/model.onnx").exists()) {
|
||||
nlog("Loading V2 speech decoder (CPU ONNX)...")
|
||||
|
|
|
|||
Loading…
Reference in New Issue