mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
app : add the llama download subcommand (#24982)
* app : add the download command (with llama-download) Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Remove llama-download tool for now Signed-off-by: Adrien Gallouët <angt@huggingface.co> --------- Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
f728adab68
commit
683b04cc4a
@ -1,6 +1,6 @@
|
|||||||
set(TARGET llama-app)
|
set(TARGET llama-app)
|
||||||
|
|
||||||
add_executable(${TARGET} llama.cpp)
|
add_executable(${TARGET} llama.cpp download.cpp)
|
||||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama)
|
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama)
|
||||||
|
|
||||||
target_link_libraries(${TARGET} PRIVATE
|
target_link_libraries(${TARGET} PRIVATE
|
||||||
|
|||||||
70
app/download.cpp
Normal file
70
app/download.cpp
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
#include "arg.h"
|
||||||
|
#include "common.h"
|
||||||
|
#include "download.h"
|
||||||
|
#include "log.h"
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <filesystem>
|
||||||
|
|
||||||
|
static void print_usage(int /*argc*/, char ** argv) {
|
||||||
|
printf(
|
||||||
|
"\nexamples:\n"
|
||||||
|
" %s -hf ggml-org/gemma-3-4b-it-qat-GGUF\n"
|
||||||
|
" %s -hf ggml-org/gemma-3-4b-it-qat-GGUF:Q4_K_M\n"
|
||||||
|
" %s -hf ggml-org/models -hff model.gguf\n"
|
||||||
|
" %s -mu https://example.com/model.gguf -m model.gguf\n"
|
||||||
|
"\n",
|
||||||
|
argv[0], argv[0], argv[0], argv[0]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
int llama_download(int argc, char ** argv);
|
||||||
|
|
||||||
|
int llama_download(int argc, char ** argv) {
|
||||||
|
common_init();
|
||||||
|
|
||||||
|
common_params params;
|
||||||
|
params.verbosity = LOG_LEVEL_ERROR;
|
||||||
|
|
||||||
|
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_DOWNLOAD, print_usage)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const bool has_source = !params.model.hf_repo.empty() || !params.model.url.empty() ||
|
||||||
|
!params.model.path.empty() || !params.model.docker_repo.empty();
|
||||||
|
if (!has_source) {
|
||||||
|
fprintf(stderr, "error: no model source specified (use --hf-repo, --model-url, --model or --docker-repo)\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
common_params_handle_models(params, LLAMA_EXAMPLE_DOWNLOAD, {});
|
||||||
|
} catch (const std::exception & e) {
|
||||||
|
fprintf(stderr, "error: %s\n", e.what());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!params.models_preset.empty()) {
|
||||||
|
// -hf pointed at a preset repo: print the preset path and stop
|
||||||
|
printf("%s\n", params.models_preset.c_str());
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (params.model.path.empty()) {
|
||||||
|
fprintf(stderr, "error: model download failed\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (!std::filesystem::exists(params.model.path)) {
|
||||||
|
fprintf(stderr, "error: model file does not exist: %s\n", params.model.path.c_str());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("%s\n", params.model.path.c_str());
|
||||||
|
if (!params.mmproj.path.empty()) {
|
||||||
|
printf("%s\n", params.mmproj.path.c_str());
|
||||||
|
}
|
||||||
|
if (!params.speculative.draft.mparams.path.empty()) {
|
||||||
|
printf("%s\n", params.speculative.draft.mparams.path.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
@ -19,6 +19,7 @@ int llama_batched_bench(int argc, char ** argv);
|
|||||||
int llama_fit_params(int argc, char ** argv);
|
int llama_fit_params(int argc, char ** argv);
|
||||||
int llama_quantize(int argc, char ** argv);
|
int llama_quantize(int argc, char ** argv);
|
||||||
int llama_perplexity(int argc, char ** argv);
|
int llama_perplexity(int argc, char ** argv);
|
||||||
|
int llama_download(int argc, char ** argv);
|
||||||
|
|
||||||
// Self-update is only supported for binaries built with llama-install.sh
|
// Self-update is only supported for binaries built with llama-install.sh
|
||||||
static int llama_update(int argc, char ** argv) {
|
static int llama_update(int argc, char ** argv) {
|
||||||
@ -61,6 +62,7 @@ static const command cmds[] = {
|
|||||||
{"serve", "HTTP API server", {"server"}, false, llama_server },
|
{"serve", "HTTP API server", {"server"}, false, llama_server },
|
||||||
{"cli", "Command-line interactive interface", {"client"}, false, llama_cli },
|
{"cli", "Command-line interactive interface", {"client"}, false, llama_cli },
|
||||||
{"update", "Update llama to the latest release", {}, UPDATE_HIDDEN, llama_update },
|
{"update", "Update llama to the latest release", {}, UPDATE_HIDDEN, llama_update },
|
||||||
|
{"download", "Download a model", {"get"}, false, llama_download },
|
||||||
{"completion", "Text completion", {"complete"}, true, llama_completion },
|
{"completion", "Text completion", {"complete"}, true, llama_completion },
|
||||||
{"bench", "Benchmark prompt processing and text generation", {}, true, llama_bench },
|
{"bench", "Benchmark prompt processing and text generation", {}, true, llama_bench },
|
||||||
{"batched-bench", "Benchmark batched decoding performance", {}, true, llama_batched_bench},
|
{"batched-bench", "Benchmark batched decoding performance", {}, true, llama_batched_bench},
|
||||||
|
|||||||
@ -594,6 +594,8 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
|
|||||||
const bool skip_model_download =
|
const bool skip_model_download =
|
||||||
// server will call common_params_handle_models() later, so we skip it here
|
// server will call common_params_handle_models() later, so we skip it here
|
||||||
ctx_arg.ex == LLAMA_EXAMPLE_SERVER ||
|
ctx_arg.ex == LLAMA_EXAMPLE_SERVER ||
|
||||||
|
// download calls common_params_handle_models() itself and prints the paths
|
||||||
|
ctx_arg.ex == LLAMA_EXAMPLE_DOWNLOAD ||
|
||||||
// export_graph_ops loads only metadata
|
// export_graph_ops loads only metadata
|
||||||
ctx_arg.ex == LLAMA_EXAMPLE_EXPORT_GRAPH_OPS;
|
ctx_arg.ex == LLAMA_EXAMPLE_EXPORT_GRAPH_OPS;
|
||||||
|
|
||||||
@ -671,15 +673,19 @@ static void common_params_print_usage(common_params_context & ctx_arg) {
|
|||||||
common_options.push_back(&opt);
|
common_options.push_back(&opt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
printf("----- common params -----\n\n");
|
bool first = true;
|
||||||
print_options(common_options);
|
auto print_section = [&](const char * header, std::vector<common_arg *> & options) {
|
||||||
printf("\n\n----- sampling params -----\n\n");
|
if (options.empty()) {
|
||||||
print_options(sampling_options);
|
return;
|
||||||
printf("\n\n----- speculative params -----\n\n");
|
}
|
||||||
print_options(spec_options);
|
printf("%s----- %s -----\n\n", first ? "" : "\n\n", header);
|
||||||
// TODO: maybe convert enum llama_example to string
|
first = false;
|
||||||
printf("\n\n----- example-specific params -----\n\n");
|
print_options(options);
|
||||||
print_options(specific_options);
|
};
|
||||||
|
print_section("common params", common_options);
|
||||||
|
print_section("sampling params", sampling_options);
|
||||||
|
print_section("speculative params", spec_options);
|
||||||
|
print_section("example-specific params", specific_options);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void common_params_print_completion(common_params_context & ctx_arg) {
|
static void common_params_print_completion(common_params_context & ctx_arg) {
|
||||||
@ -1079,7 +1085,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||||||
* - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example
|
* - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example
|
||||||
*/
|
*/
|
||||||
auto add_opt = [&](common_arg arg) {
|
auto add_opt = [&](common_arg arg) {
|
||||||
if ((arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) && !arg.is_exclude(ex)) {
|
// download only exposes the handful of args explicitly tagged for it
|
||||||
|
const bool inherit_common = ex != LLAMA_EXAMPLE_DOWNLOAD;
|
||||||
|
if ((arg.in_example(ex) || (inherit_common && arg.in_example(LLAMA_EXAMPLE_COMMON))) && !arg.is_exclude(ex)) {
|
||||||
ctx_arg.options.push_back(std::move(arg));
|
ctx_arg.options.push_back(std::move(arg));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -1090,7 +1098,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||||||
[](common_params & params) {
|
[](common_params & params) {
|
||||||
params.usage = true;
|
params.usage = true;
|
||||||
}
|
}
|
||||||
));
|
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_DOWNLOAD}));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"--version"},
|
{"--version"},
|
||||||
"show version and build info",
|
"show version and build info",
|
||||||
@ -2212,7 +2220,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||||||
[](common_params & params, bool value) {
|
[](common_params & params, bool value) {
|
||||||
params.no_mmproj = !value;
|
params.no_mmproj = !value;
|
||||||
}
|
}
|
||||||
).set_examples(mmproj_examples).set_env("LLAMA_ARG_MMPROJ_AUTO"));
|
).set_examples({LLAMA_EXAMPLE_MTMD, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_DOWNLOAD}).set_env("LLAMA_ARG_MMPROJ_AUTO"));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"--mmproj-offload"},
|
{"--mmproj-offload"},
|
||||||
{"--no-mmproj-offload"},
|
{"--no-mmproj-offload"},
|
||||||
@ -2611,14 +2619,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||||||
[](common_params & params, const std::string & value) {
|
[](common_params & params, const std::string & value) {
|
||||||
params.model.path = value;
|
params.model.path = value;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}).set_env("LLAMA_ARG_MODEL"));
|
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_DOWNLOAD}).set_env("LLAMA_ARG_MODEL"));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"-mu", "--model-url"}, "MODEL_URL",
|
{"-mu", "--model-url"}, "MODEL_URL",
|
||||||
"model download url (default: unused)",
|
"model download url (default: unused)",
|
||||||
[](common_params & params, const std::string & value) {
|
[](common_params & params, const std::string & value) {
|
||||||
params.model.url = value;
|
params.model.url = value;
|
||||||
}
|
}
|
||||||
).set_env("LLAMA_ARG_MODEL_URL"));
|
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_DOWNLOAD}).set_env("LLAMA_ARG_MODEL_URL"));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{ "-dr", "--docker-repo" }, "[<repo>/]<model>[:quant]",
|
{ "-dr", "--docker-repo" }, "[<repo>/]<model>[:quant]",
|
||||||
"Docker Hub model repository. repo is optional, default to ai/. quant is optional, default to :latest.\n"
|
"Docker Hub model repository. repo is optional, default to ai/. quant is optional, default to :latest.\n"
|
||||||
@ -2627,7 +2635,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||||||
[](common_params & params, const std::string & value) {
|
[](common_params & params, const std::string & value) {
|
||||||
params.model.docker_repo = value;
|
params.model.docker_repo = value;
|
||||||
}
|
}
|
||||||
).set_env("LLAMA_ARG_DOCKER_REPO"));
|
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_DOWNLOAD}).set_env("LLAMA_ARG_DOCKER_REPO"));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"-hf", "-hfr", "--hf-repo"}, "<user>/<model>[:quant]",
|
{"-hf", "-hfr", "--hf-repo"}, "<user>/<model>[:quant]",
|
||||||
"Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n"
|
"Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n"
|
||||||
@ -2637,14 +2645,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||||||
[](common_params & params, const std::string & value) {
|
[](common_params & params, const std::string & value) {
|
||||||
params.model.hf_repo = value;
|
params.model.hf_repo = value;
|
||||||
}
|
}
|
||||||
).set_env("LLAMA_ARG_HF_REPO"));
|
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_DOWNLOAD}).set_env("LLAMA_ARG_HF_REPO"));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"-hff", "--hf-file"}, "FILE",
|
{"-hff", "--hf-file"}, "FILE",
|
||||||
"Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)",
|
"Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)",
|
||||||
[](common_params & params, const std::string & value) {
|
[](common_params & params, const std::string & value) {
|
||||||
params.model.hf_file = value;
|
params.model.hf_file = value;
|
||||||
}
|
}
|
||||||
).set_env("LLAMA_ARG_HF_FILE"));
|
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_DOWNLOAD}).set_env("LLAMA_ARG_HF_FILE"));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"-hfv", "-hfrv", "--hf-repo-v"}, "<user>/<model>[:quant]",
|
{"-hfv", "-hfrv", "--hf-repo-v"}, "<user>/<model>[:quant]",
|
||||||
"Hugging Face model repository for the vocoder model (default: unused)",
|
"Hugging Face model repository for the vocoder model (default: unused)",
|
||||||
@ -2665,7 +2673,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||||||
[](common_params & params, const std::string & value) {
|
[](common_params & params, const std::string & value) {
|
||||||
params.hf_token = value;
|
params.hf_token = value;
|
||||||
}
|
}
|
||||||
).set_env("HF_TOKEN"));
|
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_DOWNLOAD}).set_env("HF_TOKEN"));
|
||||||
|
add_opt(common_arg(
|
||||||
|
{"--mtp"},
|
||||||
|
"also download the multi-token prediction (MTP) head, if available (default: unused)",
|
||||||
|
[](common_params & params) {
|
||||||
|
params.speculative.types.push_back(COMMON_SPECULATIVE_TYPE_DRAFT_MTP);
|
||||||
|
}
|
||||||
|
).set_examples({LLAMA_EXAMPLE_DOWNLOAD}));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"--context-file"}, "FNAME",
|
{"--context-file"}, "FNAME",
|
||||||
"file to load context from (use comma-separated values to specify multiple files)",
|
"file to load context from (use comma-separated values to specify multiple files)",
|
||||||
|
|||||||
@ -96,6 +96,7 @@ enum llama_example {
|
|||||||
LLAMA_EXAMPLE_FIT_PARAMS,
|
LLAMA_EXAMPLE_FIT_PARAMS,
|
||||||
LLAMA_EXAMPLE_RESULTS,
|
LLAMA_EXAMPLE_RESULTS,
|
||||||
LLAMA_EXAMPLE_EXPORT_GRAPH_OPS,
|
LLAMA_EXAMPLE_EXPORT_GRAPH_OPS,
|
||||||
|
LLAMA_EXAMPLE_DOWNLOAD,
|
||||||
|
|
||||||
LLAMA_EXAMPLE_COUNT,
|
LLAMA_EXAMPLE_COUNT,
|
||||||
};
|
};
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user