spec : disacard last drafted token with low prob (#1820)

* spec : disacard last drafted token with low prob

* Apply suggestion from @ikawrakow

Co-authored-by: Kawrakow <iwankawrakow@gmail.com>

---------

Co-authored-by: firecoperana <firecoperana>
Co-authored-by: Kawrakow <iwankawrakow@gmail.com>
This commit is contained in:
firecoperana 2026-05-19 00:35:35 -05:00 committed by GitHub
parent f43a9f1cf6
commit 104846ddee
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 9 additions and 5 deletions

View File

@ -471,16 +471,20 @@ struct common_speculative_state_draft : public common_speculative_state {
common_sampler_accept(smpl, nullptr, id, true);
// only collect very high-confidence draft tokens
if (cur_p->data[0].p < params.p_min) {
if (i == 0) {
result.push_back(id);
}
break;
}
result.push_back(id);
if (params.n_max <= (int) result.size()) {
break;
}
// only collect very high-confidence draft tokens
if (cur_p->data[0].p < params.p_min) {
break;
}
common_batch_add(batch, id, n_past + i + 1, { 0 }, true);

View File

@ -5032,7 +5032,7 @@ void server_context::update_slots() {
// start populating the batch for this iteration
common_batch_clear(batch);
// frist, add sampled tokens from any ongoing sequences
// first, add sampled tokens from any ongoing sequences
add_sampled_tokens(); // Prepare batch for inference
// process in chunks of params.n_batch