Minor logging cleanup

This commit is contained in:
Kawrakow 2026-05-23 16:05:54 +00:00
parent 40d8cb196a
commit c7211cc500
2 changed files with 20 additions and 20 deletions

View File

@ -938,15 +938,15 @@ void llm_load_hparams(
int n_nead_kv = hparams.n_gqa();
if (n_nead_kv%4 != 0 || hparams.n_embd_head_k(0) != expected_head_size_k || hparams.n_embd_head_v(0) != expected_head_size_v ||
hparams.n_rot != 64) {
printf("==========================================================================\n");
printf("Detected incompatible DeepSeek model without a known way to fix it.\n");
printf("Consider making your own ik_llama.cpp compatible model or\n");
printf("ask the model provider to make one for you,\n\n");
printf("Sorry, uknown model => cannot fix it => bailing out\n");
printf("==========================================================================\n");
LLAMA_LOG_ERROR("==========================================================================\n");
LLAMA_LOG_ERROR("Detected incompatible DeepSeek model without a known way to fix it.\n");
LLAMA_LOG_ERROR("Consider making your own ik_llama.cpp compatible model or\n");
LLAMA_LOG_ERROR("ask the model provider to make one for you,\n\n");
LLAMA_LOG_ERROR("Sorry, uknown model => cannot fix it => bailing out\n");
LLAMA_LOG_ERROR("==========================================================================\n");
GGML_ABORT("Fatal error");
}
printf("================= Adjusted mainline llama.cpp MLA tensors to ik_llama.cpp\n");
LLAMA_LOG_INFO("================= Adjusted mainline llama.cpp MLA tensors to ik_llama.cpp\n");
for (auto& item : hparams.n_head_kv_arr) item = n_nead_kv;
hparams.n_embd_head_k_full = 192;
hparams.n_embd_head_v_full = 128;
@ -976,7 +976,7 @@ void llm_load_hparams(
// GLM-4.7-Flash has 47 layers (or 48, if an MTP layer is included in the GGUF).
hparams.expert_gating_func = hparams.n_layer == 47 || hparams.n_layer == 48 ?
LLM_EXPERT_GATING_FUNC_SIGMOID : LLM_EXPERT_GATING_FUNC_SOFTMAX;
printf("================= Missing experts gating function -> set to %s\n",
LLAMA_LOG_INFO("================= Missing experts gating function -> set to %s\n",
llm_expert_gating_func_name(llm_expert_gating_func_type(hparams.expert_gating_func)));
}
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false);
@ -1390,13 +1390,13 @@ void llm_load_hparams(
int n_nead_kv = hparams.n_gqa();
if (n_nead_kv%4 != 0 || hparams.n_embd_head_k_full != 576 || hparams.n_embd_head_v_full != 512 ||
hparams.n_rot != 64) {
printf("==========================================================================\n");
printf("Detected incompatible DeepSeek model without a known way to fix it.\n");
printf("Sorry, uknown model => cannot fix it => bailing out\n");
printf("==========================================================================\n");
LLAMA_LOG_ERROR("==========================================================================\n");
LLAMA_LOG_ERROR("Detected incompatible DeepSeek model without a known way to fix it.\n");
LLAMA_LOG_ERROR("Sorry, uknown model => cannot fix it => bailing out\n");
LLAMA_LOG_ERROR("==========================================================================\n");
GGML_ABORT("Fatal error");
}
printf("================= Adjusted mainline llama.cpp MLA tensors to ik_llama.cpp\n");
LLAMA_LOG_INFO("================= Adjusted mainline llama.cpp MLA tensors to ik_llama.cpp\n");
for (auto& item : hparams.n_head_kv_arr) item = n_nead_kv;
hparams.n_embd_head_k_full = 192;
hparams.n_embd_head_v_full = 128;

View File

@ -1112,7 +1112,7 @@ static bool llama_kv_cache_init(
if (split_cache || replicate_mla) {
LLAMA_LOG_INFO("%s: KV cache size per device%s:\n", __func__,
replicate_mla ? " (MLA replicated)" : "");
for (int i = 0; i < int(mem_split.size()); ++i) printf(" Device %d: %g MiB\n", i, mem_split[i]/1024./1024.);
for (int i = 0; i < int(mem_split.size()); ++i) LLAMA_LOG_INFO(" Device %d: %g MiB\n", i, mem_split[i]/1024./1024.);
}
#if 0
@ -2415,7 +2415,7 @@ static void llm_prepare_mla(llama_model & model, int mla) {
split.ggml.splits = split.tensor_splits.data();
computed->extra = (void *)&split.ggml;
printf("Computed %s as %d x %d x %d of type %s, split across %d devices on dim=2\n",
LLAMA_LOG_INFO("Computed %s as %d x %d x %d of type %s, split across %d devices on dim=2\n",
tname.c_str(), (int)source->ne[0], (int)source->ne[1], (int)source->ne[2],
ggml_type_name(source->type), n_device);
} else {
@ -2432,7 +2432,7 @@ static void llm_prepare_mla(llama_model & model, int mla) {
iqk_modify_tensor(computed.get());
}
printf("Computed %s as %d x %d x %d of type %s and stored in buffer %s\n",
LLAMA_LOG_INFO("Computed %s as %d x %d x %d of type %s and stored in buffer %s\n",
tname.c_str(), (int)source->ne[0], (int)source->ne[1], (int)source->ne[2],
ggml_type_name(source->type), ggml_backend_buffer_name(computed->buffer));
}
@ -2662,7 +2662,7 @@ static void llm_prepare_mla(llama_model & model, int mla) {
model.tensors_by_name.push_back(std::make_pair(name, l.computed_wk_b_pp.get()));
l.wk_b_pp = l.computed_wk_b_pp.get();
printf("Computed %s as %d x %d x %d of type %s, split across %d devices on dim=2\n",
LLAMA_LOG_INFO("Computed %s as %d x %d x %d of type %s, split across %d devices on dim=2\n",
name.c_str(),
(int)l.computed_wk_b_pp->ne[0],
(int)l.computed_wk_b_pp->ne[1],
@ -2801,7 +2801,7 @@ static void llm_prepare_mla(llama_model & model, int mla) {
l.wkv_b = l.computed_wkv_b.get();
model.tensors_by_name.push_back(std::make_pair(name, l.wkv_b));
printf("Computed %s as %d x %d of type %s and stored in buffer %s\n", name.c_str(), (int)wkv_b->ne[0], (int)wkv_b->ne[1],
LLAMA_LOG_INFO("Computed %s as %d x %d of type %s and stored in buffer %s\n", name.c_str(), (int)wkv_b->ne[0], (int)wkv_b->ne[1],
ggml_type_name(wkv_b->type), ggml_backend_buffer_name(l.computed_wkv_b->buffer));
ggml_graph_clear(graph);
@ -2928,7 +2928,7 @@ static void llm_apply_khad_pretransform(llama_model & model) {
static void llm_scale_gate_inp_s(llama_model & model, bool uses_mmap) {
auto & hparams = model.hparams;
printf("%s: n_embd = %d\n", __func__, hparams.n_embd);
LLAMA_LOG_INFO("%s: n_embd = %d\n", __func__, hparams.n_embd);
std::vector<float> values(hparams.n_embd);
float scale = 1.0f/sqrtf((float)hparams.n_embd);
int n_host = 0;
@ -10755,7 +10755,7 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
void llama_set_offload_policy(struct llama_context * lctx, int op, bool on_or_off) {
if (!lctx || !lctx->sched) return;
const char * op_name = op < 0 || op >= int(GGML_OP_COUNT) ? "all ops" : ggml_op_name(ggml_op(op));
printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXX offload(%s) = %d\n", op_name, on_or_off);
LLAMA_LOG_INFO("XXXXXXXXXXXXXXXXXXXXXXXXXXXX offload(%s) = %d\n", op_name, on_or_off);
ggml_backend_sched_set_op_offload(lctx->sched, ggml_op(op), on_or_off);
}