Minor logging cleanup

2026-06-28 04:30:15 -05:00 · 2026-05-23 16:05:54 +00:00 · 2026-05-23 16:05:54 +00:00 · c7211cc500
commit c7211cc500
parent 40d8cb196a
2 changed files with 20 additions and 20 deletions
--- a/src/llama-hparams.cpp
+++ b/src/llama-hparams.cpp
@ -938,15 +938,15 @@ void llm_load_hparams(
                    int n_nead_kv = hparams.n_gqa();
                    if (n_nead_kv%4 != 0 || hparams.n_embd_head_k(0) != expected_head_size_k || hparams.n_embd_head_v(0) != expected_head_size_v ||
                        hparams.n_rot != 64) {
-                        printf("==========================================================================\n");
-                        printf("Detected incompatible DeepSeek model without a known way to fix it.\n");
-                        printf("Consider making your own ik_llama.cpp compatible model or\n");
-                        printf("ask the model provider to make one for you,\n\n");
-                        printf("Sorry, uknown model => cannot fix it => bailing out\n");
-                        printf("==========================================================================\n");
+                        LLAMA_LOG_ERROR("==========================================================================\n");
+                        LLAMA_LOG_ERROR("Detected incompatible DeepSeek model without a known way to fix it.\n");
+                        LLAMA_LOG_ERROR("Consider making your own ik_llama.cpp compatible model or\n");
+                        LLAMA_LOG_ERROR("ask the model provider to make one for you,\n\n");
+                        LLAMA_LOG_ERROR("Sorry, uknown model => cannot fix it => bailing out\n");
+                        LLAMA_LOG_ERROR("==========================================================================\n");
                        GGML_ABORT("Fatal error");
                    }
-                    printf("================= Adjusted mainline llama.cpp MLA tensors to ik_llama.cpp\n");
+                    LLAMA_LOG_INFO("================= Adjusted mainline llama.cpp MLA tensors to ik_llama.cpp\n");
                    for (auto& item : hparams.n_head_kv_arr) item = n_nead_kv;
                    hparams.n_embd_head_k_full = 192;
                    hparams.n_embd_head_v_full = 128;
@ -976,7 +976,7 @@ void llm_load_hparams(
                    // GLM-4.7-Flash has 47 layers (or 48, if an MTP layer is included in the GGUF).
                    hparams.expert_gating_func = hparams.n_layer == 47 || hparams.n_layer == 48 ?
                        LLM_EXPERT_GATING_FUNC_SIGMOID : LLM_EXPERT_GATING_FUNC_SOFTMAX;
-                    printf("================= Missing experts gating function -> set to %s\n",
+                    LLAMA_LOG_INFO("================= Missing experts gating function -> set to %s\n",
                            llm_expert_gating_func_name(llm_expert_gating_func_type(hparams.expert_gating_func)));
                }
                ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false);
@ -1390,13 +1390,13 @@ void llm_load_hparams(
                    int n_nead_kv = hparams.n_gqa();
                    if (n_nead_kv%4 != 0 || hparams.n_embd_head_k_full != 576 || hparams.n_embd_head_v_full != 512 ||
                        hparams.n_rot != 64) {
-                        printf("==========================================================================\n");
-                        printf("Detected incompatible DeepSeek model without a known way to fix it.\n");
-                        printf("Sorry, uknown model => cannot fix it => bailing out\n");
-                        printf("==========================================================================\n");
+                        LLAMA_LOG_ERROR("==========================================================================\n");
+                        LLAMA_LOG_ERROR("Detected incompatible DeepSeek model without a known way to fix it.\n");
+                        LLAMA_LOG_ERROR("Sorry, uknown model => cannot fix it => bailing out\n");
+                        LLAMA_LOG_ERROR("==========================================================================\n");
                        GGML_ABORT("Fatal error");
                    }
-                    printf("================= Adjusted mainline llama.cpp MLA tensors to ik_llama.cpp\n");
+                    LLAMA_LOG_INFO("================= Adjusted mainline llama.cpp MLA tensors to ik_llama.cpp\n");
                    for (auto& item : hparams.n_head_kv_arr) item = n_nead_kv;
                    hparams.n_embd_head_k_full = 192;
                    hparams.n_embd_head_v_full = 128;
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -1112,7 +1112,7 @@ static bool llama_kv_cache_init(
    if (split_cache || replicate_mla) {
        LLAMA_LOG_INFO("%s: KV cache size per device%s:\n", __func__,
                       replicate_mla ? " (MLA replicated)" : "");
-        for (int i = 0; i < int(mem_split.size()); ++i) printf("    Device %d:  %g MiB\n", i, mem_split[i]/1024./1024.);
+        for (int i = 0; i < int(mem_split.size()); ++i) LLAMA_LOG_INFO("    Device %d:  %g MiB\n", i, mem_split[i]/1024./1024.);
    }

 #if 0
@ -2415,7 +2415,7 @@ static void llm_prepare_mla(llama_model & model, int mla) {
                    split.ggml.splits    = split.tensor_splits.data();
                    computed->extra = (void *)&split.ggml;

-                    printf("Computed %s as %d x %d x %d of type %s, split across %d devices on dim=2\n",
+                    LLAMA_LOG_INFO("Computed %s as %d x %d x %d of type %s, split across %d devices on dim=2\n",
                            tname.c_str(), (int)source->ne[0], (int)source->ne[1], (int)source->ne[2],
                            ggml_type_name(source->type), n_device);
                } else {
@ -2432,7 +2432,7 @@ static void llm_prepare_mla(llama_model & model, int mla) {
                        iqk_modify_tensor(computed.get());
                    }

-                    printf("Computed %s as %d x %d x %d of type %s and stored in buffer %s\n",
+                    LLAMA_LOG_INFO("Computed %s as %d x %d x %d of type %s and stored in buffer %s\n",
                            tname.c_str(), (int)source->ne[0], (int)source->ne[1], (int)source->ne[2],
                            ggml_type_name(source->type), ggml_backend_buffer_name(computed->buffer));
                }
@ -2662,7 +2662,7 @@ static void llm_prepare_mla(llama_model & model, int mla) {
                model.tensors_by_name.push_back(std::make_pair(name, l.computed_wk_b_pp.get()));
                l.wk_b_pp = l.computed_wk_b_pp.get();

-                printf("Computed %s as %d x %d x %d of type %s, split across %d devices on dim=2\n",
+                LLAMA_LOG_INFO("Computed %s as %d x %d x %d of type %s, split across %d devices on dim=2\n",
                        name.c_str(),
                        (int)l.computed_wk_b_pp->ne[0],
                        (int)l.computed_wk_b_pp->ne[1],
@ -2801,7 +2801,7 @@ static void llm_prepare_mla(llama_model & model, int mla) {
        l.wkv_b = l.computed_wkv_b.get();
        model.tensors_by_name.push_back(std::make_pair(name, l.wkv_b));

-        printf("Computed %s as %d x %d of type %s and stored in buffer %s\n", name.c_str(), (int)wkv_b->ne[0], (int)wkv_b->ne[1],
+        LLAMA_LOG_INFO("Computed %s as %d x %d of type %s and stored in buffer %s\n", name.c_str(), (int)wkv_b->ne[0], (int)wkv_b->ne[1],
                    ggml_type_name(wkv_b->type), ggml_backend_buffer_name(l.computed_wkv_b->buffer));

        ggml_graph_clear(graph);
@ -2928,7 +2928,7 @@ static void llm_apply_khad_pretransform(llama_model & model) {

 static void llm_scale_gate_inp_s(llama_model & model, bool uses_mmap) {
    auto & hparams = model.hparams;
-    printf("%s: n_embd = %d\n", __func__, hparams.n_embd);
+    LLAMA_LOG_INFO("%s: n_embd = %d\n", __func__, hparams.n_embd);
    std::vector<float> values(hparams.n_embd);
    float scale = 1.0f/sqrtf((float)hparams.n_embd);
    int n_host = 0;
@ -10755,7 +10755,7 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
 void llama_set_offload_policy(struct llama_context * lctx, int op, bool on_or_off) {
    if (!lctx || !lctx->sched) return;
    const char * op_name = op < 0 || op >= int(GGML_OP_COUNT) ? "all ops" : ggml_op_name(ggml_op(op));
-    printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXX offload(%s) = %d\n", op_name, on_or_off);
+    LLAMA_LOG_INFO("XXXXXXXXXXXXXXXXXXXXXXXXXXXX offload(%s) = %d\n", op_name, on_or_off);
    ggml_backend_sched_set_op_offload(lctx->sched, ggml_op(op), on_or_off);
 }