mtmd: fix miscounting n_tokens (#24656)

This commit is contained in:
Xuan-Son Nguyen 2026-06-15 18:07:14 +02:00 committed by GitHub
parent 38d546330a
commit e36a602ba3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -96,16 +96,15 @@ struct mtmd_image_tokens {
// [BOI] [row0 tokens + newline] ... [row(ny-1) tokens + newline] [EOI]
return (nx + 1) * ny + 2;
}
// [QWEN_VIDEO] this logic is quite ugly, it's mostly to make qwen-vl temporal merge work, can be improved in the future
if (batch_f32.entries.size() == 1 || n_temporal_merge == 1) {
return nx * ny;
}
uint32_t nz = batch_f32.entries.size();
// TODO: simplify this by repeating the last frame until it fits the temporal merge
if (nz % n_temporal_merge != 0) {
nz = nz / n_temporal_merge + 1;
} else {
nz = nz / n_temporal_merge;
if (n_temporal_merge > 1) {
// [QWEN_VIDEO] this logic is quite ugly, it's mostly to make qwen-vl temporal merge work, can be improved in the future
// TODO: simplify this by repeating the last frame until it fits the temporal merge
if (nz % n_temporal_merge != 0) {
nz = nz / n_temporal_merge + 1;
} else {
nz = nz / n_temporal_merge;
}
}
return nx * ny * nz;
}