app : add the llama download subcommand (#24982)

* app : add the download command (with llama-download)

Signed-off-by: Adrien Gallouët <angt@huggingface.co>

* Remove llama-download tool for now

Signed-off-by: Adrien Gallouët <angt@huggingface.co>

---------

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
Adrien Gallouët 2026-06-25 13:36:36 +02:00 committed by GitHub
parent f728adab68
commit 683b04cc4a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 107 additions and 19 deletions

View File

@ -1,6 +1,6 @@
set(TARGET llama-app) set(TARGET llama-app)
add_executable(${TARGET} llama.cpp) add_executable(${TARGET} llama.cpp download.cpp)
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama) set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama)
target_link_libraries(${TARGET} PRIVATE target_link_libraries(${TARGET} PRIVATE

70
app/download.cpp Normal file
View File

@ -0,0 +1,70 @@
#include "arg.h"
#include "common.h"
#include "download.h"
#include "log.h"
#include <cstdio>
#include <filesystem>
static void print_usage(int /*argc*/, char ** argv) {
printf(
"\nexamples:\n"
" %s -hf ggml-org/gemma-3-4b-it-qat-GGUF\n"
" %s -hf ggml-org/gemma-3-4b-it-qat-GGUF:Q4_K_M\n"
" %s -hf ggml-org/models -hff model.gguf\n"
" %s -mu https://example.com/model.gguf -m model.gguf\n"
"\n",
argv[0], argv[0], argv[0], argv[0]
);
}
int llama_download(int argc, char ** argv);
int llama_download(int argc, char ** argv) {
common_init();
common_params params;
params.verbosity = LOG_LEVEL_ERROR;
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_DOWNLOAD, print_usage)) {
return 1;
}
const bool has_source = !params.model.hf_repo.empty() || !params.model.url.empty() ||
!params.model.path.empty() || !params.model.docker_repo.empty();
if (!has_source) {
fprintf(stderr, "error: no model source specified (use --hf-repo, --model-url, --model or --docker-repo)\n");
return 1;
}
try {
common_params_handle_models(params, LLAMA_EXAMPLE_DOWNLOAD, {});
} catch (const std::exception & e) {
fprintf(stderr, "error: %s\n", e.what());
return 1;
}
if (!params.models_preset.empty()) {
// -hf pointed at a preset repo: print the preset path and stop
printf("%s\n", params.models_preset.c_str());
return 0;
}
if (params.model.path.empty()) {
fprintf(stderr, "error: model download failed\n");
return 1;
}
if (!std::filesystem::exists(params.model.path)) {
fprintf(stderr, "error: model file does not exist: %s\n", params.model.path.c_str());
return 1;
}
printf("%s\n", params.model.path.c_str());
if (!params.mmproj.path.empty()) {
printf("%s\n", params.mmproj.path.c_str());
}
if (!params.speculative.draft.mparams.path.empty()) {
printf("%s\n", params.speculative.draft.mparams.path.c_str());
}
return 0;
}

View File

@ -19,6 +19,7 @@ int llama_batched_bench(int argc, char ** argv);
int llama_fit_params(int argc, char ** argv); int llama_fit_params(int argc, char ** argv);
int llama_quantize(int argc, char ** argv); int llama_quantize(int argc, char ** argv);
int llama_perplexity(int argc, char ** argv); int llama_perplexity(int argc, char ** argv);
int llama_download(int argc, char ** argv);
// Self-update is only supported for binaries built with llama-install.sh // Self-update is only supported for binaries built with llama-install.sh
static int llama_update(int argc, char ** argv) { static int llama_update(int argc, char ** argv) {
@ -61,6 +62,7 @@ static const command cmds[] = {
{"serve", "HTTP API server", {"server"}, false, llama_server }, {"serve", "HTTP API server", {"server"}, false, llama_server },
{"cli", "Command-line interactive interface", {"client"}, false, llama_cli }, {"cli", "Command-line interactive interface", {"client"}, false, llama_cli },
{"update", "Update llama to the latest release", {}, UPDATE_HIDDEN, llama_update }, {"update", "Update llama to the latest release", {}, UPDATE_HIDDEN, llama_update },
{"download", "Download a model", {"get"}, false, llama_download },
{"completion", "Text completion", {"complete"}, true, llama_completion }, {"completion", "Text completion", {"complete"}, true, llama_completion },
{"bench", "Benchmark prompt processing and text generation", {}, true, llama_bench }, {"bench", "Benchmark prompt processing and text generation", {}, true, llama_bench },
{"batched-bench", "Benchmark batched decoding performance", {}, true, llama_batched_bench}, {"batched-bench", "Benchmark batched decoding performance", {}, true, llama_batched_bench},

View File

@ -594,6 +594,8 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
const bool skip_model_download = const bool skip_model_download =
// server will call common_params_handle_models() later, so we skip it here // server will call common_params_handle_models() later, so we skip it here
ctx_arg.ex == LLAMA_EXAMPLE_SERVER || ctx_arg.ex == LLAMA_EXAMPLE_SERVER ||
// download calls common_params_handle_models() itself and prints the paths
ctx_arg.ex == LLAMA_EXAMPLE_DOWNLOAD ||
// export_graph_ops loads only metadata // export_graph_ops loads only metadata
ctx_arg.ex == LLAMA_EXAMPLE_EXPORT_GRAPH_OPS; ctx_arg.ex == LLAMA_EXAMPLE_EXPORT_GRAPH_OPS;
@ -671,15 +673,19 @@ static void common_params_print_usage(common_params_context & ctx_arg) {
common_options.push_back(&opt); common_options.push_back(&opt);
} }
} }
printf("----- common params -----\n\n"); bool first = true;
print_options(common_options); auto print_section = [&](const char * header, std::vector<common_arg *> & options) {
printf("\n\n----- sampling params -----\n\n"); if (options.empty()) {
print_options(sampling_options); return;
printf("\n\n----- speculative params -----\n\n"); }
print_options(spec_options); printf("%s----- %s -----\n\n", first ? "" : "\n\n", header);
// TODO: maybe convert enum llama_example to string first = false;
printf("\n\n----- example-specific params -----\n\n"); print_options(options);
print_options(specific_options); };
print_section("common params", common_options);
print_section("sampling params", sampling_options);
print_section("speculative params", spec_options);
print_section("example-specific params", specific_options);
} }
static void common_params_print_completion(common_params_context & ctx_arg) { static void common_params_print_completion(common_params_context & ctx_arg) {
@ -1079,7 +1085,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
* - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example * - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example
*/ */
auto add_opt = [&](common_arg arg) { auto add_opt = [&](common_arg arg) {
if ((arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) && !arg.is_exclude(ex)) { // download only exposes the handful of args explicitly tagged for it
const bool inherit_common = ex != LLAMA_EXAMPLE_DOWNLOAD;
if ((arg.in_example(ex) || (inherit_common && arg.in_example(LLAMA_EXAMPLE_COMMON))) && !arg.is_exclude(ex)) {
ctx_arg.options.push_back(std::move(arg)); ctx_arg.options.push_back(std::move(arg));
} }
}; };
@ -1090,7 +1098,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params) { [](common_params & params) {
params.usage = true; params.usage = true;
} }
)); ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_DOWNLOAD}));
add_opt(common_arg( add_opt(common_arg(
{"--version"}, {"--version"},
"show version and build info", "show version and build info",
@ -2212,7 +2220,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, bool value) { [](common_params & params, bool value) {
params.no_mmproj = !value; params.no_mmproj = !value;
} }
).set_examples(mmproj_examples).set_env("LLAMA_ARG_MMPROJ_AUTO")); ).set_examples({LLAMA_EXAMPLE_MTMD, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_DOWNLOAD}).set_env("LLAMA_ARG_MMPROJ_AUTO"));
add_opt(common_arg( add_opt(common_arg(
{"--mmproj-offload"}, {"--mmproj-offload"},
{"--no-mmproj-offload"}, {"--no-mmproj-offload"},
@ -2611,14 +2619,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, const std::string & value) { [](common_params & params, const std::string & value) {
params.model.path = value; params.model.path = value;
} }
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}).set_env("LLAMA_ARG_MODEL")); ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_DOWNLOAD}).set_env("LLAMA_ARG_MODEL"));
add_opt(common_arg( add_opt(common_arg(
{"-mu", "--model-url"}, "MODEL_URL", {"-mu", "--model-url"}, "MODEL_URL",
"model download url (default: unused)", "model download url (default: unused)",
[](common_params & params, const std::string & value) { [](common_params & params, const std::string & value) {
params.model.url = value; params.model.url = value;
} }
).set_env("LLAMA_ARG_MODEL_URL")); ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_DOWNLOAD}).set_env("LLAMA_ARG_MODEL_URL"));
add_opt(common_arg( add_opt(common_arg(
{ "-dr", "--docker-repo" }, "[<repo>/]<model>[:quant]", { "-dr", "--docker-repo" }, "[<repo>/]<model>[:quant]",
"Docker Hub model repository. repo is optional, default to ai/. quant is optional, default to :latest.\n" "Docker Hub model repository. repo is optional, default to ai/. quant is optional, default to :latest.\n"
@ -2627,7 +2635,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, const std::string & value) { [](common_params & params, const std::string & value) {
params.model.docker_repo = value; params.model.docker_repo = value;
} }
).set_env("LLAMA_ARG_DOCKER_REPO")); ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_DOWNLOAD}).set_env("LLAMA_ARG_DOCKER_REPO"));
add_opt(common_arg( add_opt(common_arg(
{"-hf", "-hfr", "--hf-repo"}, "<user>/<model>[:quant]", {"-hf", "-hfr", "--hf-repo"}, "<user>/<model>[:quant]",
"Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n" "Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n"
@ -2637,14 +2645,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, const std::string & value) { [](common_params & params, const std::string & value) {
params.model.hf_repo = value; params.model.hf_repo = value;
} }
).set_env("LLAMA_ARG_HF_REPO")); ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_DOWNLOAD}).set_env("LLAMA_ARG_HF_REPO"));
add_opt(common_arg( add_opt(common_arg(
{"-hff", "--hf-file"}, "FILE", {"-hff", "--hf-file"}, "FILE",
"Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)", "Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)",
[](common_params & params, const std::string & value) { [](common_params & params, const std::string & value) {
params.model.hf_file = value; params.model.hf_file = value;
} }
).set_env("LLAMA_ARG_HF_FILE")); ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_DOWNLOAD}).set_env("LLAMA_ARG_HF_FILE"));
add_opt(common_arg( add_opt(common_arg(
{"-hfv", "-hfrv", "--hf-repo-v"}, "<user>/<model>[:quant]", {"-hfv", "-hfrv", "--hf-repo-v"}, "<user>/<model>[:quant]",
"Hugging Face model repository for the vocoder model (default: unused)", "Hugging Face model repository for the vocoder model (default: unused)",
@ -2665,7 +2673,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, const std::string & value) { [](common_params & params, const std::string & value) {
params.hf_token = value; params.hf_token = value;
} }
).set_env("HF_TOKEN")); ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_DOWNLOAD}).set_env("HF_TOKEN"));
add_opt(common_arg(
{"--mtp"},
"also download the multi-token prediction (MTP) head, if available (default: unused)",
[](common_params & params) {
params.speculative.types.push_back(COMMON_SPECULATIVE_TYPE_DRAFT_MTP);
}
).set_examples({LLAMA_EXAMPLE_DOWNLOAD}));
add_opt(common_arg( add_opt(common_arg(
{"--context-file"}, "FNAME", {"--context-file"}, "FNAME",
"file to load context from (use comma-separated values to specify multiple files)", "file to load context from (use comma-separated values to specify multiple files)",

View File

@ -96,6 +96,7 @@ enum llama_example {
LLAMA_EXAMPLE_FIT_PARAMS, LLAMA_EXAMPLE_FIT_PARAMS,
LLAMA_EXAMPLE_RESULTS, LLAMA_EXAMPLE_RESULTS,
LLAMA_EXAMPLE_EXPORT_GRAPH_OPS, LLAMA_EXAMPLE_EXPORT_GRAPH_OPS,
LLAMA_EXAMPLE_DOWNLOAD,
LLAMA_EXAMPLE_COUNT, LLAMA_EXAMPLE_COUNT,
}; };