* Bug fixes

* More
This commit is contained in:
Kawrakow 2026-06-10 07:45:49 +02:00 committed by GitHub
parent 2768b62515
commit 366e478cb6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 8 additions and 7 deletions

View File

@ -5145,7 +5145,7 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
yaml_dump_string_multiline(stream, "in_prefix", params.input_prefix.c_str());
fprintf(stream, "in_prefix_bos: %s # default: false\n", params.input_prefix_bos ? "true" : "false");
yaml_dump_string_multiline(stream, "in_suffix", params.input_prefix.c_str());
yaml_dump_string_multiline(stream, "in_suffix", params.input_suffix.c_str());
fprintf(stream, "interactive: %s # default: false\n", params.interactive ? "true" : "false");
fprintf(stream, "interactive_first: %s # default: false\n", params.interactive_first ? "true" : "false");
fprintf(stream, "keep: %d # default: 0\n", params.n_keep);

View File

@ -64,10 +64,10 @@ static bool common_speculative_are_compatible(
const llama_vocab * vocab_tgt = llama_model_get_vocab(model_tgt);
const llama_vocab * vocab_dft = llama_model_get_vocab(model_dft);
const bool vocab_type_tgt = llama_vocab_type(vocab_tgt);
const auto vocab_type_tgt = llama_vocab_type(vocab_tgt);
LOG_DBG("%s: vocab_type tgt: %d\n", __func__, vocab_type_tgt);
const bool vocab_type_dft = llama_vocab_type(vocab_dft);
const auto vocab_type_dft = llama_vocab_type(vocab_dft);
LOG_DBG("%s: vocab_type dft: %d\n", __func__, vocab_type_dft);
if (vocab_type_tgt != vocab_type_dft) {

View File

@ -6426,7 +6426,7 @@ void vec_dot_q3_k_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t b
//
void quantize_row_q2_k_r4_ref(const float * x, block_q2_k_r4 * y, int64_t k) {
quantize_q3_k_r4(x, (void *)y, 4, k/4, nullptr, nullptr);
quantize_q2_k_r4(x, (void *)y, 4, k/4, nullptr, nullptr);
}
void quantize_row_q2_k_r4(const float * x, void * y, int64_t k) {

View File

@ -1416,7 +1416,7 @@ void llama_grammar_sample_impl(const struct llama_grammar * grammar, const struc
for (const auto & reject : rejects) {
candidates->data[reject.index].logit = -INFINITY;
}
if (!smpl) {
if (smpl) {
smpl->t_sample_us += ggml_time_us() - t_start_sample_us;
}
}

View File

@ -277,6 +277,7 @@ void llama_sample_tail_free_impl(struct llama_sampling * smpl, llama_token_data_
}
}
if (min_keep < 1) min_keep = 1;
float cum_sum = 0.0f;
size_t last_idx = candidates->size;
for (size_t i = 0; i < second_derivatives.size(); ++i) {
@ -337,7 +338,7 @@ void llama_sample_typical_impl(struct llama_sampling * smpl, llama_token_data_ar
cum_sum += candidates->data[idx].p;
// Check if the running sum is greater than typical or if we have kept at least min_keep tokens
if (cum_sum > p && i >= min_keep - 1) {
if (cum_sum > p && i + 1 >= min_keep) {
last_idx = i + 1;
break;
}

View File

@ -3616,7 +3616,7 @@ static bool llm_load_tensors(
LLAMA_LOG_ERROR("Not enough memory in device %d to offload the output layer\n", id);
throw std::runtime_error("Unable to auto-fit model");
}
device_mem[id] -= layer_sizes[id];
device_mem[id] -= layer_sizes[n_layer];
if (!tensor_split) {
float sum = 0;
for (int id = 0; id < int(model.splits.size()); ++id) {