mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
bench : add --offline (#24511)
* bench : add --offline Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Add default Signed-off-by: Adrien Gallouët <angt@huggingface.co> --------- Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
ac79caa7ce
commit
e3a74b2990
@ -323,6 +323,7 @@ struct cmd_params {
|
||||
std::vector<std::string> hf_repo;
|
||||
std::vector<std::string> hf_file;
|
||||
std::string hf_token;
|
||||
bool offline;
|
||||
std::vector<int> n_prompt;
|
||||
std::vector<int> n_gen;
|
||||
std::vector<std::pair<int, int>> n_pg;
|
||||
@ -367,6 +368,7 @@ static const cmd_params cmd_params_defaults = {
|
||||
/* hf_repo */ {},
|
||||
/* hf_file */ {},
|
||||
/* hf_token */ "",
|
||||
/* offline */ false,
|
||||
/* n_prompt */ { 512 },
|
||||
/* n_gen */ { 128 },
|
||||
/* n_pg */ {},
|
||||
@ -437,6 +439,8 @@ static void print_usage(int /* argc */, char ** argv) {
|
||||
printf(" (default: unused)\n");
|
||||
printf(" -hft, --hf-token <token> Hugging Face access token\n");
|
||||
printf(" (default: value from HF_TOKEN environment variable)\n");
|
||||
printf(" --offline Offline mode: forces use of cache, prevents network access\n");
|
||||
printf(" (default: disabled)\n");
|
||||
printf(" -p, --n-prompt <n> (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str());
|
||||
printf(" -n, --n-gen <n> (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str());
|
||||
printf(" -pg <pp,tg> (default: %s)\n", join(transform_to_str(cmd_params_defaults.n_pg, pair_str), ",").c_str());
|
||||
@ -558,6 +562,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
||||
break;
|
||||
}
|
||||
params.hf_token = argv[i];
|
||||
} else if (arg == "--offline") {
|
||||
params.offline = true;
|
||||
} else if (arg == "-p" || arg == "--n-prompt") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
@ -1040,6 +1046,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
||||
|
||||
common_download_opts opts;
|
||||
opts.bearer_token = params.hf_token;
|
||||
opts.offline = params.offline;
|
||||
auto download_result = common_download_model(model, opts);
|
||||
if (download_result.model_path.empty()) {
|
||||
fprintf(stderr, "error: failed to download model from HuggingFace\n");
|
||||
|
||||
@ -40,6 +40,7 @@ def main(args_in: list[str] | None = None) -> None:
|
||||
required=True)
|
||||
parser.add_argument("--hf-repo", type=str, help="Hugging Face model repository", required=True)
|
||||
parser.add_argument("--hf-file", type=str, help="Hugging Face model file", required=True)
|
||||
parser.add_argument("--offline", action="store_true", default=False, help="Offline mode: forces use of cache, prevents network access")
|
||||
parser.add_argument("-ngl", "--n-gpu-layers", type=int, help="layers to the GPU for computation", required=True)
|
||||
parser.add_argument("--ctx-size", type=int, help="Set the size of the prompt context", required=True)
|
||||
parser.add_argument("--parallel", type=int, help="Set the number of slots for process requests", required=True)
|
||||
@ -268,6 +269,8 @@ def start_server_background(args):
|
||||
]
|
||||
server_args.extend(['--hf-repo', args.hf_repo])
|
||||
server_args.extend(['--hf-file', args.hf_file])
|
||||
if args.offline:
|
||||
server_args.append('--offline')
|
||||
server_args.extend(['--n-gpu-layers', args.n_gpu_layers])
|
||||
server_args.extend(['--ctx-size', args.ctx_size])
|
||||
server_args.extend(['--parallel', args.parallel])
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user