diff --git a/executorch-custom/jni_layer_tts.cpp b/executorch-custom/jni_layer_tts.cpp index f74e88f..a52db0e 100644 --- a/executorch-custom/jni_layer_tts.cpp +++ b/executorch-custom/jni_layer_tts.cpp @@ -692,7 +692,7 @@ ExecuTorchJni::runTtsPipelineImpl( jint maxTokens) { static const int DIM=1024,VOCAB=3072,CB_SIZE=2048,NUM_CB=16; - static const int T_L=28,T_KV=8,T_HD=128,T_KV_LEN=64; + static const int T_L=28,T_KV=8,T_HD=128,T_KV_LEN=100; static const int C_L=5,C_KV=8,C_HD=128,C_KV_LEN=16; static const int CODEC_EOS=2150; @@ -839,13 +839,13 @@ ExecuTorchJni::runTtsPipelineImpl( for(int i=0;i= prefill+maxGen. KV=64 caused quality loss (role tokens evicted) state = torch.load("/opt/Kazeia/models_qnn/qwen3-tts-export/qwen3_tts_talker.pth", map_location="cpu", weights_only=False) diff --git a/scripts/prepare_tts_segments.py b/scripts/prepare_tts_segments.py index bf36f8a..6125d22 100644 --- a/scripts/prepare_tts_segments.py +++ b/scripts/prepare_tts_segments.py @@ -25,40 +25,29 @@ MODEL = "/home/alf/.cache/huggingface/hub/models--Qwen--Qwen3-TTS-12Hz-0.6B-Base import torch, numpy as np from qwen_tts import Qwen3TTSModel -def split_sentences(text, max_tokens=60): - """Split text at sentence boundaries, keeping segments short.""" - # Split at . ! ? ; and keep the punctuation - parts = re.split(r'(?<=[.!?;])\s+', text.strip()) +def split_sentences(text, max_chars=120): + """Split text into SHORT segments (~40-50 tokens max). Each sentence separate.""" + # Split at every sentence boundary + parts = re.split(r'(?<=[.!?;:])\s+', text.strip()) - segments = [] - current = "" - for part in parts: - if current and len(current) + len(part) > 200: # rough char limit - segments.append(current.strip()) - current = part - else: - current = (current + " " + part).strip() if current else part - if current.strip(): - segments.append(current.strip()) - - # If any segment is still too long, split at commas + # Further split long sentences at commas final = [] - for seg in segments: - if len(seg) > 250: - parts = re.split(r'(?<=,)\s+', seg) - sub = "" - for p in parts: - if sub and len(sub) + len(p) > 200: - final.append(sub.strip()) - sub = p + for part in parts: + if len(part) > max_chars: + subs = re.split(r'(?<=,)\s+', part) + current = "" + for s in subs: + if current and len(current) + len(s) > max_chars: + final.append(current.strip()) + current = s else: - sub = (sub + " " + p).strip() if sub else p - if sub.strip(): - final.append(sub.strip()) + current = (current + " " + s).strip() if current else s + if current.strip(): + final.append(current.strip()) else: - final.append(seg) + final.append(part) - return final if final else [text] + return [s for s in final if s.strip()] if final else [text] print(f"Text: '{TEXT[:80]}{'...' if len(TEXT)>80 else ''}'") segments = split_sentences(TEXT)