From 1d7ab2b94711b609c5694717238783b9eb80fe51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrien=20Gallou=C3=ABt?= Date: Thu, 21 May 2026 09:29:44 +0200 Subject: [PATCH] app : add batched-bench, fit-params, quantize & perplexity (#23459) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * app : add batched-bench, fit-params, quantize & perplexity Signed-off-by: Adrien Gallouët * Add missing main.cpp Signed-off-by: Adrien Gallouët * Add EOL Signed-off-by: Adrien Gallouët --------- Signed-off-by: Adrien Gallouët --- app/CMakeLists.txt | 11 ++++++++++- app/llama.cpp | 22 ++++++++++++++++------ tools/batched-bench/CMakeLists.txt | 16 ++++++++++++++-- tools/batched-bench/batched-bench.cpp | 5 ++++- tools/batched-bench/main.cpp | 5 +++++ tools/fit-params/CMakeLists.txt | 16 ++++++++++++++-- tools/fit-params/fit-params.cpp | 5 ++++- tools/fit-params/main.cpp | 5 +++++ tools/perplexity/CMakeLists.txt | 16 ++++++++++++++-- tools/perplexity/main.cpp | 5 +++++ tools/perplexity/perplexity.cpp | 5 ++++- tools/quantize/CMakeLists.txt | 17 ++++++++++++++--- tools/quantize/main.cpp | 5 +++++ tools/quantize/quantize.cpp | 5 ++++- 14 files changed, 118 insertions(+), 20 deletions(-) create mode 100644 tools/batched-bench/main.cpp create mode 100644 tools/fit-params/main.cpp create mode 100644 tools/perplexity/main.cpp create mode 100644 tools/quantize/main.cpp diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index 2dddff9d40..6c53ce0e4e 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -3,7 +3,16 @@ set(TARGET llama-app) add_executable(${TARGET} llama.cpp) set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama) -target_link_libraries(${TARGET} PRIVATE llama-server-impl llama-cli-impl llama-completion-impl llama-bench-impl) +target_link_libraries(${TARGET} PRIVATE + llama-server-impl + llama-cli-impl + llama-completion-impl + llama-bench-impl + llama-batched-bench-impl + llama-fit-params-impl + llama-quantize-impl + llama-perplexity-impl +) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/app/llama.cpp b/app/llama.cpp index 55aa8ca5ee..e149975d28 100644 --- a/app/llama.cpp +++ b/app/llama.cpp @@ -4,12 +4,18 @@ #include #include +// visible int llama_server(int argc, char ** argv); int llama_cli(int argc, char ** argv); // hidden int llama_completion(int argc, char ** argv); int llama_bench(int argc, char ** argv); +int llama_batched_bench(int argc, char ** argv); +int llama_fit_params(int argc, char ** argv); +int llama_quantize(int argc, char ** argv); +int llama_perplexity(int argc, char ** argv); + static int help(int argc, char ** argv); static int version(int argc, char ** argv); @@ -22,12 +28,16 @@ struct command { }; static const command cmds[] = { - {"serve", "HTTP API server", {"server"}, false, llama_server }, - {"cli", "Command-line interactive interface", {"client"}, false, llama_cli }, - {"completion", "Text completion", {"complete"}, true, llama_completion }, - {"bench", "Benchmarking tool", {}, true, llama_bench }, - {"version", "Show version", {}, true, version }, - {"help", "Show available commands", {}, true, help }, + {"serve", "HTTP API server", {"server"}, false, llama_server }, + {"cli", "Command-line interactive interface", {"client"}, false, llama_cli }, + {"completion", "Text completion", {"complete"}, true, llama_completion }, + {"bench", "Benchmark prompt processing and text generation", {}, true, llama_bench }, + {"batched-bench", "Benchmark batched decoding performance", {}, true, llama_batched_bench}, + {"fit-params", "Compute parameters to fit a model in device memory", {}, true, llama_fit_params }, + {"quantize", "Quantize a model", {}, true, llama_quantize }, + {"perplexity", "Compute model perplexity and KL divergence", {}, true, llama_perplexity }, + {"version", "Show version", {}, true, version }, + {"help", "Show available commands", {}, true, help }, }; static int version(int argc, char ** argv) { diff --git a/tools/batched-bench/CMakeLists.txt b/tools/batched-bench/CMakeLists.txt index f9ffd2d4ce..1769c2136b 100644 --- a/tools/batched-bench/CMakeLists.txt +++ b/tools/batched-bench/CMakeLists.txt @@ -1,6 +1,18 @@ +# llama-batched-bench-impl: batched-bench logic, reusable by app + +set(TARGET llama-batched-bench-impl) + +add_library(${TARGET} STATIC batched-bench.cpp) + +target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT}) + +# llama-batched-bench executable + set(TARGET llama-batched-bench) -add_executable(${TARGET} batched-bench.cpp) -target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) + +add_executable(${TARGET} main.cpp) +target_link_libraries(${TARGET} PRIVATE llama-batched-bench-impl) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/batched-bench/batched-bench.cpp b/tools/batched-bench/batched-bench.cpp index 3964ef2595..e2dcd0b2e7 100644 --- a/tools/batched-bench/batched-bench.cpp +++ b/tools/batched-bench/batched-bench.cpp @@ -15,7 +15,10 @@ static void print_usage(int, char ** argv) { LOG("\n"); } -int main(int argc, char ** argv) { +// satisfies -Wmissing-declarations +int llama_batched_bench(int argc, char ** argv); + +int llama_batched_bench(int argc, char ** argv) { std::setlocale(LC_NUMERIC, "C"); common_params params; diff --git a/tools/batched-bench/main.cpp b/tools/batched-bench/main.cpp new file mode 100644 index 0000000000..958cfc5b31 --- /dev/null +++ b/tools/batched-bench/main.cpp @@ -0,0 +1,5 @@ +int llama_batched_bench(int argc, char ** argv); + +int main(int argc, char ** argv) { + return llama_batched_bench(argc, argv); +} diff --git a/tools/fit-params/CMakeLists.txt b/tools/fit-params/CMakeLists.txt index 25c4096633..207caf2ced 100644 --- a/tools/fit-params/CMakeLists.txt +++ b/tools/fit-params/CMakeLists.txt @@ -1,6 +1,18 @@ +# llama-fit-params-impl: fit-params logic, reusable by app + +set(TARGET llama-fit-params-impl) + +add_library(${TARGET} STATIC fit-params.cpp) + +target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT}) + +# llama-fit-params executable + set(TARGET llama-fit-params) -add_executable(${TARGET} fit-params.cpp) -target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) + +add_executable(${TARGET} main.cpp) +target_link_libraries(${TARGET} PRIVATE llama-fit-params-impl) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/fit-params/fit-params.cpp b/tools/fit-params/fit-params.cpp index 20a5ff1ebd..5d897bc466 100644 --- a/tools/fit-params/fit-params.cpp +++ b/tools/fit-params/fit-params.cpp @@ -12,7 +12,10 @@ #pragma warning(disable: 4244 4267) // possible loss of data #endif -int main(int argc, char ** argv) { +// satisfies -Wmissing-declarations +int llama_fit_params(int argc, char ** argv); + +int llama_fit_params(int argc, char ** argv) { common_params params; common_init(); diff --git a/tools/fit-params/main.cpp b/tools/fit-params/main.cpp new file mode 100644 index 0000000000..b7271d4756 --- /dev/null +++ b/tools/fit-params/main.cpp @@ -0,0 +1,5 @@ +int llama_fit_params(int argc, char ** argv); + +int main(int argc, char ** argv) { + return llama_fit_params(argc, argv); +} diff --git a/tools/perplexity/CMakeLists.txt b/tools/perplexity/CMakeLists.txt index 0c194ee7f0..44061d0a55 100644 --- a/tools/perplexity/CMakeLists.txt +++ b/tools/perplexity/CMakeLists.txt @@ -1,6 +1,18 @@ +# llama-perplexity-impl: perplexity logic, reusable by app + +set(TARGET llama-perplexity-impl) + +add_library(${TARGET} STATIC perplexity.cpp) + +target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT}) + +# llama-perplexity executable + set(TARGET llama-perplexity) -add_executable(${TARGET} perplexity.cpp) -target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) + +add_executable(${TARGET} main.cpp) +target_link_libraries(${TARGET} PRIVATE llama-perplexity-impl) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/perplexity/main.cpp b/tools/perplexity/main.cpp new file mode 100644 index 0000000000..13a9940e9e --- /dev/null +++ b/tools/perplexity/main.cpp @@ -0,0 +1,5 @@ +int llama_perplexity(int argc, char ** argv); + +int main(int argc, char ** argv) { + return llama_perplexity(argc, argv); +} diff --git a/tools/perplexity/perplexity.cpp b/tools/perplexity/perplexity.cpp index 75defd7c87..f66576eb40 100644 --- a/tools/perplexity/perplexity.cpp +++ b/tools/perplexity/perplexity.cpp @@ -2005,7 +2005,10 @@ static void kl_divergence(llama_context * ctx, const common_params & params) { LOG("Same top p: %6.3lf ± %5.3lf %%\n", 100.0*same_top_p, 100.0*sqrt(same_top_p*(1.0 - same_top_p)/(kld.count - 1))); } -int main(int argc, char ** argv) { +// satisfies -Wmissing-declarations +int llama_perplexity(int argc, char ** argv); + +int llama_perplexity(int argc, char ** argv) { std::setlocale(LC_NUMERIC, "C"); common_params params; diff --git a/tools/quantize/CMakeLists.txt b/tools/quantize/CMakeLists.txt index 965adc0059..e76f7d8110 100644 --- a/tools/quantize/CMakeLists.txt +++ b/tools/quantize/CMakeLists.txt @@ -1,7 +1,18 @@ +# llama-quantize-impl: quantize logic, reusable by app + +set(TARGET llama-quantize-impl) + +add_library(${TARGET} STATIC quantize.cpp) + +target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT}) + +# llama-quantize executable + set(TARGET llama-quantize) -add_executable(${TARGET} quantize.cpp) -target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) -target_include_directories(${TARGET} PRIVATE ../../common) + +add_executable(${TARGET} main.cpp) +target_link_libraries(${TARGET} PRIVATE llama-quantize-impl) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/quantize/main.cpp b/tools/quantize/main.cpp new file mode 100644 index 0000000000..fc247190c8 --- /dev/null +++ b/tools/quantize/main.cpp @@ -0,0 +1,5 @@ +int llama_quantize(int argc, char ** argv); + +int main(int argc, char ** argv) { + return llama_quantize(argc, argv); +} diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp index 3d33d47d98..7292bda6f4 100644 --- a/tools/quantize/quantize.cpp +++ b/tools/quantize/quantize.cpp @@ -490,7 +490,10 @@ static bool parse_layer_prune(const char * data, std::vector & prune_layers return true; } -int main(int argc, char ** argv) { +// satisfies -Wmissing-declarations +int llama_quantize(int argc, char ** argv); + +int llama_quantize(int argc, char ** argv) { std::setlocale(LC_NUMERIC, "C"); if (argc < 3) { usage(argv[0]);