From 9799b79ca12c3a714a57cba8e3cc76f9950bd57a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 27 Jun 2026 16:41:22 +0300 Subject: [PATCH] cont : common --- common/common.cpp | 94 +++++++++++++++++++++---------------------- src/llama-context.cpp | 2 +- 2 files changed, 48 insertions(+), 48 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 74e7363e87..3512c4f866 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -225,7 +225,7 @@ bool set_process_priority(enum ggml_sched_priority prio) { } if (!SetPriorityClass(GetCurrentProcess(), p)) { - LOG_WRN("failed to set process priority class %d : (%d)\n", prio, (int) GetLastError()); + CMN_WRN("failed to set process priority class %d : (%d)\n", prio, (int) GetLastError()); return false; } @@ -251,7 +251,7 @@ bool set_process_priority(enum ggml_sched_priority prio) { } if (setpriority(PRIO_PROCESS, 0, p) != 0) { - LOG_WRN("failed to set process priority %d : %s (%d)\n", prio, strerror(errno), errno); + CMN_WRN("failed to set process priority %d : %s (%d)\n", prio, strerror(errno), errno); return false; } return true; @@ -284,14 +284,14 @@ void postprocess_cpu_params(common_cpu_params & cpuparams, const common_cpu_para if (n_set && n_set < cpuparams.n_threads) { // Not enough set bits, may experience performance issues. - LOG_WRN("Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads); + CMN_WRN("Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads); } } bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THREADS]) { size_t dash_loc = range.find('-'); if (dash_loc == std::string::npos) { - LOG_ERR("Format of CPU range is invalid! Expected []-[].\n"); + CMN_ERR("%s", "Format of CPU range is invalid! Expected []-[].\n"); return false; } @@ -303,7 +303,7 @@ bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THRE } else { start_i = std::stoull(range.substr(0, dash_loc)); if (start_i >= GGML_MAX_N_THREADS) { - LOG_ERR("Start index out of bounds!\n"); + CMN_ERR("%s", "Start index out of bounds!\n"); return false; } } @@ -313,7 +313,7 @@ bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THRE } else { end_i = std::stoull(range.substr(dash_loc + 1)); if (end_i >= GGML_MAX_N_THREADS) { - LOG_ERR("End index out of bounds!\n"); + CMN_ERR("%s", "End index out of bounds!\n"); return false; } } @@ -333,7 +333,7 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREAD } size_t num_digits = mask.length() - start_i; - if (num_digits > 128) num_digits = 128; + num_digits = std::min(num_digits, 128); size_t end_i = num_digits + start_i; @@ -348,7 +348,7 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREAD } else if (c >= 'A' && c <= 'F') { id -= 'A' - 10; } else { - LOG_ERR("Invalid hex character '%c' at position %d\n", c, int32_t(i)); + CMN_ERR("Invalid hex character '%c' at position %d\n", c, int32_t(i)); return false; } @@ -379,21 +379,21 @@ void common_params_print_info(const common_params & params, bool print_devices) #else const char * build_type = " (debug)"; #endif - LOG_TRC("%s: build %d (%s) with %s for %s%s\n", __func__, llama_build_number(), llama_commit(), llama_compiler(), llama_build_target(), build_type); + CMN_TRC("%s: build %d (%s) with %s for %s%s\n", __func__, llama_build_number(), llama_commit(), llama_compiler(), llama_build_target(), build_type); - LOG_INF("%s: verbosity = %d (adjust with the `-lv N` CLI arg)\n", __func__, common_log_get_verbosity_thold()); + CMN_INF("%s: verbosity = %d (adjust with the `-lv N` CLI arg)\n", __func__, common_log_get_verbosity_thold()); // device enumeration creates a primary context on CUDA backends, skip it when the caller does not own any device if (print_devices) { - LOG_TRC("device_info:\n"); + CMN_TRC("%s", "device_info:\n"); for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { auto * dev = ggml_backend_dev_get(i); size_t free, total; ggml_backend_dev_memory(dev, &free, &total); - LOG_TRC(" - %-8s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024); + CMN_TRC(" - %-8s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024); } } - LOG_TRC("%s\n", common_params_get_system_info(params).c_str()); + CMN_TRC("%s\n", common_params_get_system_info(params).c_str()); } std::string common_params_get_system_info(const common_params & params) { @@ -660,7 +660,7 @@ void string_process_escapes(std::string & input) { bool string_parse_kv_override(const char * data, std::vector & overrides) { const char * sep = strchr(data, '='); if (sep == nullptr || sep - data >= 128) { - LOG_ERR("%s: malformed KV override '%s'\n", __func__, data); + CMN_ERR("%s: malformed KV override '%s'\n", __func__, data); return false; } llama_model_kv_override kvo; @@ -683,20 +683,20 @@ bool string_parse_kv_override(const char * data, std::vector 127) { - LOG_ERR("%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data); + CMN_ERR("%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data); return false; } strncpy(kvo.val_str, sep, 127); kvo.val_str[127] = '\0'; } else { - LOG_ERR("%s: invalid type for KV override '%s'\n", __func__, data); + CMN_ERR("%s: invalid type for KV override '%s'\n", __func__, data); return false; } overrides.emplace_back(std::move(kvo)); @@ -1199,8 +1199,8 @@ common_init_result::common_init_result(common_params & params, bool model_only) auto cparams = common_context_params_to_llama(params); if (params.fit_params) { - LOG_TRC("%s: fitting params to device memory ...\n", __func__); - LOG_TRC("%s: (for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on)\n", __func__); + CMN_TRC("%s", "fitting params to device memory ...\n"); + CMN_TRC("%s", "(for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on)\n"); common_fit_params(params.model.path.c_str(), &mparams, &cparams, params.tensor_split, params.tensor_buft_overrides.data(), @@ -1227,7 +1227,7 @@ common_init_result::common_init_result(common_params & params, bool model_only) llama_adapter_lora_ptr lora; lora.reset(llama_adapter_lora_init(model, la.path.c_str())); if (lora == nullptr) { - LOG_ERR("%s: failed to load lora adapter '%s'\n", __func__, la.path.c_str()); + CMN_ERR("failed to load lora adapter '%s'\n", la.path.c_str()); pimpl->model.reset(model); return; } @@ -1246,14 +1246,14 @@ common_init_result::common_init_result(common_params & params, bool model_only) common_init_sampler_from_model(model, params.sampling); if (params.sampling.ignore_eos && llama_vocab_eos(vocab) == LLAMA_TOKEN_NULL) { - LOG_WRN("%s: warning: vocab does not have an EOS token, ignoring --ignore-eos\n", __func__); + CMN_WRN("%s", "vocab does not have an EOS token, ignoring --ignore-eos\n"); params.sampling.ignore_eos = false; } // initialize once for (llama_token i = 0; i < llama_vocab_n_tokens(vocab); i++) { if (llama_vocab_is_eog(vocab, i)) { - LOG_TRC("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(vocab, i).c_str(), -INFINITY); + CMN_TRC("added %s logit bias = %f\n", common_token_to_piece(vocab, i).c_str(), -INFINITY); params.sampling.logit_bias_eog.push_back({i, -INFINITY}); } } @@ -1291,7 +1291,7 @@ common_init_result::common_init_result(common_params & params, bool model_only) llama_context * lctx = llama_init_from_model(model, cparams); if (lctx == NULL) { - LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.path.c_str()); + CMN_ERR("failed to create context with model '%s'\n", params.model.path.c_str()); return; } @@ -1328,7 +1328,7 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode llama_model * model = res->model(); if (model == NULL) { - LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.path.c_str()); + CMN_ERR("failed to load model '%s'\n", params.model.path.c_str()); return res; } @@ -1338,14 +1338,14 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode llama_context * lctx = res->context(); if (lctx == NULL) { - LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.path.c_str()); + CMN_ERR("failed to create context with model '%s'\n", params.model.path.c_str()); return res; } const llama_vocab * vocab = llama_model_get_vocab(model); if (params.ctx_shift && !llama_memory_can_shift(llama_get_memory(lctx))) { - LOG_WRN("%s: KV cache shifting is not supported for this context, disabling KV cache shifting\n", __func__); + CMN_WRN("%s", "KV cache shifting is not supported for this context, disabling KV cache shifting\n"); params.ctx_shift = false; } @@ -1374,7 +1374,7 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode bool ok = true; if (llama_vocab_bos(vocab) == LLAMA_TOKEN_NULL) { - LOG_WRN("%s: warning: vocab does not have a BOS token, reranking will not work\n", __func__); + CMN_WRN("%s", "vocab does not have a BOS token, reranking will not work\n"); ok = false; } @@ -1383,10 +1383,10 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode bool has_rerank_prompt = llama_model_chat_template(model, "rerank") != NULL; if (!has_eos && !has_sep && !has_rerank_prompt) { - LOG_WRN("%s: warning: vocab does not have an EOS token, SEP token, or rerank prompt. Reranking will not work\n", __func__); + CMN_WRN("%s", "vocab does not have an EOS token, SEP token, or rerank prompt. Reranking will not work\n"); ok = false; } else if (!has_eos) { - LOG_WRN("%s: warning: vocab does not have an EOS token, using SEP token as fallback\n", __func__); + CMN_WRN("%s", "vocab does not have an EOS token, using SEP token as fallback\n"); } if (!ok) { @@ -1399,7 +1399,7 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode } if (params.warmup) { - LOG_TRC("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__); + CMN_TRC("%s", "warming up the model with an empty run - please wait ... (--no-warmup to disable)\n"); std::vector tmp; llama_token bos = llama_vocab_bos(vocab); @@ -1473,20 +1473,20 @@ common_context_seq_rm_type common_context_can_seq_rm(llama_context * ctx) { int ret = llama_decode(ctx, llama_batch_get_one(tmp.data(), tmp.size())); if (ret != 0) { - LOG_ERR("%s: llama_decode() failed: %d\n", __func__, ret); + CMN_ERR("llama_decode() failed: %d\n", ret); res = COMMON_CONTEXT_SEQ_RM_TYPE_NO; goto done; } if (llama_n_rs_seq(ctx) > 0) { - LOG_INF("%s: the context supports bounded partial sequence removal\n", __func__); + CMN_TRC("%s", "the context supports bounded partial sequence removal\n"); res = COMMON_CONTEXT_SEQ_RM_TYPE_RS; goto done; } // try to remove the last tokens if (!llama_memory_seq_rm(mem, 0, 1, -1)) { - LOG_TRC("%s: the context does not support partial sequence removal\n", __func__); + CMN_TRC("%s", "the context does not support partial sequence removal\n"); res = COMMON_CONTEXT_SEQ_RM_TYPE_FULL; goto done; } @@ -1803,13 +1803,13 @@ static common_control_vector_data common_control_vector_load_one(const common_co }; struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params); if (!ctx_gguf) { - LOG_ERR("%s: failed to load control vector file from %s\n", __func__, load_info.fname.c_str()); + CMN_ERR("failed to load control vector file from %s\n", load_info.fname.c_str()); return result; } int32_t n_tensors = gguf_get_n_tensors(ctx_gguf); if (n_tensors == 0) { - LOG_WRN("%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str()); + CMN_WRN("no direction tensors found in %s\n", load_info.fname.c_str()); } for (int i = 0; i < n_tensors; i++) { @@ -1827,23 +1827,23 @@ static common_control_vector_data common_control_vector_load_one(const common_co } } if (layer_idx < 0) { - LOG_ERR("%s: invalid/unparsable direction tensor layer index in %s\n", __func__, load_info.fname.c_str()); + CMN_ERR("invalid/unparsable direction tensor layer index in %s\n", load_info.fname.c_str()); result.n_embd = -1; break; } else if (layer_idx == 0) { - LOG_ERR("%s: invalid (zero) direction tensor layer index in %s\n", __func__, load_info.fname.c_str()); + CMN_ERR("invalid (zero) direction tensor layer index in %s\n", load_info.fname.c_str()); result.n_embd = -1; break; } struct ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str()); if (tensor->type != GGML_TYPE_F32) { - LOG_ERR("%s: invalid (non-F32) direction tensor type in %s\n", __func__, load_info.fname.c_str()); + CMN_ERR("invalid (non-F32) direction tensor type in %s\n", load_info.fname.c_str()); result.n_embd = -1; break; } if (ggml_n_dims(tensor) != 1) { - LOG_ERR("%s: invalid (non-1D) direction tensor shape in %s\n", __func__, load_info.fname.c_str()); + CMN_ERR("invalid (non-1D) direction tensor shape in %s\n", load_info.fname.c_str()); result.n_embd = -1; break; } @@ -1851,7 +1851,7 @@ static common_control_vector_data common_control_vector_load_one(const common_co if (result.n_embd == -1) { result.n_embd = ggml_nelements(tensor); } else if (ggml_nelements(tensor) != result.n_embd) { - LOG_ERR("%s: direction tensor in %s does not match previous dimensions\n", __func__, load_info.fname.c_str()); + CMN_ERR("direction tensor in %s does not match previous dimensions\n", load_info.fname.c_str()); result.n_embd = -1; break; } @@ -1868,7 +1868,7 @@ static common_control_vector_data common_control_vector_load_one(const common_co } if (result.n_embd == -1) { - LOG_WRN("%s: skipping %s due to invalid direction tensors\n", __func__, load_info.fname.c_str()); + CMN_WRN("skipping %s due to invalid direction tensors\n", load_info.fname.c_str()); result.data.clear(); } @@ -1889,7 +1889,7 @@ common_control_vector_data common_control_vector_load(const std::vector(all_tokens.data() + offset), n_tokens_before_last))) { - LOG_ERR("%s : failed to eval\n", __func__); + CMN_ERR("%s", "failed to eval\n"); return false; } n_past += n_tokens_before_last; llama_state_save_file(ctx, state_path.data(), all_tokens.data(), all_tokens.size()); - LOG_INF("saved session before last token to %s, n_new = %zu\n", state_path.data(), all_tokens.size()); + CMN_INF("saved session before last token to %s, n_new = %zu\n", state_path.data(), all_tokens.size()); llama_token last_token = all_tokens.back(); llama_batch batch = llama_batch_get_one(&last_token, 1); @@ -2030,13 +2030,13 @@ bool common_prompt_batch_decode( batch.pos = &pos; if (llama_decode(ctx, batch)) { - LOG_ERR("%s : failed to eval last token\n", __func__); + CMN_ERR("%s", "failed to eval last token\n"); return false; } n_past++; } else { if (llama_decode(ctx, llama_batch_get_one(const_cast(all_tokens.data() + offset), n_new))) { - LOG_ERR("%s : failed to eval\n", __func__); + CMN_ERR("%s", "failed to eval\n"); return false; } n_past += n_new; diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 220240ea95..9f8a8fdb86 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -256,7 +256,7 @@ llama_context::llama_context( LLAMA_LOG_INFO("%s: n_outputs_max = %u\n", __func__, cparams.n_outputs_max); if (cparams.n_ctx_seq < hparams.n_ctx_train) { - LLAMA_LOG_WARN("%s: n_ctx_seq (%u) < n_ctx_train (%u) -- the full capacity of the model will not be utilized\n", + LLAMA_LOG_INFO("%s: n_ctx_seq (%u) < n_ctx_train (%u) -- the full capacity of the model will not be utilized\n", __func__, cparams.n_ctx_seq, hparams.n_ctx_train); }