mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
app : add batched-bench, fit-params, quantize & perplexity (#23459)
* app : add batched-bench, fit-params, quantize & perplexity Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Add missing main.cpp Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Add EOL Signed-off-by: Adrien Gallouët <angt@huggingface.co> --------- Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
12e5d99078
commit
1d7ab2b947
@ -3,7 +3,16 @@ set(TARGET llama-app)
|
|||||||
add_executable(${TARGET} llama.cpp)
|
add_executable(${TARGET} llama.cpp)
|
||||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama)
|
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama)
|
||||||
|
|
||||||
target_link_libraries(${TARGET} PRIVATE llama-server-impl llama-cli-impl llama-completion-impl llama-bench-impl)
|
target_link_libraries(${TARGET} PRIVATE
|
||||||
|
llama-server-impl
|
||||||
|
llama-cli-impl
|
||||||
|
llama-completion-impl
|
||||||
|
llama-bench-impl
|
||||||
|
llama-batched-bench-impl
|
||||||
|
llama-fit-params-impl
|
||||||
|
llama-quantize-impl
|
||||||
|
llama-perplexity-impl
|
||||||
|
)
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||||
|
|
||||||
if(LLAMA_TOOLS_INSTALL)
|
if(LLAMA_TOOLS_INSTALL)
|
||||||
|
|||||||
@ -4,12 +4,18 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
// visible
|
||||||
int llama_server(int argc, char ** argv);
|
int llama_server(int argc, char ** argv);
|
||||||
int llama_cli(int argc, char ** argv);
|
int llama_cli(int argc, char ** argv);
|
||||||
|
|
||||||
// hidden
|
// hidden
|
||||||
int llama_completion(int argc, char ** argv);
|
int llama_completion(int argc, char ** argv);
|
||||||
int llama_bench(int argc, char ** argv);
|
int llama_bench(int argc, char ** argv);
|
||||||
|
int llama_batched_bench(int argc, char ** argv);
|
||||||
|
int llama_fit_params(int argc, char ** argv);
|
||||||
|
int llama_quantize(int argc, char ** argv);
|
||||||
|
int llama_perplexity(int argc, char ** argv);
|
||||||
|
|
||||||
static int help(int argc, char ** argv);
|
static int help(int argc, char ** argv);
|
||||||
static int version(int argc, char ** argv);
|
static int version(int argc, char ** argv);
|
||||||
|
|
||||||
@ -22,12 +28,16 @@ struct command {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static const command cmds[] = {
|
static const command cmds[] = {
|
||||||
{"serve", "HTTP API server", {"server"}, false, llama_server },
|
{"serve", "HTTP API server", {"server"}, false, llama_server },
|
||||||
{"cli", "Command-line interactive interface", {"client"}, false, llama_cli },
|
{"cli", "Command-line interactive interface", {"client"}, false, llama_cli },
|
||||||
{"completion", "Text completion", {"complete"}, true, llama_completion },
|
{"completion", "Text completion", {"complete"}, true, llama_completion },
|
||||||
{"bench", "Benchmarking tool", {}, true, llama_bench },
|
{"bench", "Benchmark prompt processing and text generation", {}, true, llama_bench },
|
||||||
{"version", "Show version", {}, true, version },
|
{"batched-bench", "Benchmark batched decoding performance", {}, true, llama_batched_bench},
|
||||||
{"help", "Show available commands", {}, true, help },
|
{"fit-params", "Compute parameters to fit a model in device memory", {}, true, llama_fit_params },
|
||||||
|
{"quantize", "Quantize a model", {}, true, llama_quantize },
|
||||||
|
{"perplexity", "Compute model perplexity and KL divergence", {}, true, llama_perplexity },
|
||||||
|
{"version", "Show version", {}, true, version },
|
||||||
|
{"help", "Show available commands", {}, true, help },
|
||||||
};
|
};
|
||||||
|
|
||||||
static int version(int argc, char ** argv) {
|
static int version(int argc, char ** argv) {
|
||||||
|
|||||||
@ -1,6 +1,18 @@
|
|||||||
|
# llama-batched-bench-impl: batched-bench logic, reusable by app
|
||||||
|
|
||||||
|
set(TARGET llama-batched-bench-impl)
|
||||||
|
|
||||||
|
add_library(${TARGET} STATIC batched-bench.cpp)
|
||||||
|
|
||||||
|
target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
|
||||||
|
# llama-batched-bench executable
|
||||||
|
|
||||||
set(TARGET llama-batched-bench)
|
set(TARGET llama-batched-bench)
|
||||||
add_executable(${TARGET} batched-bench.cpp)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT})
|
add_executable(${TARGET} main.cpp)
|
||||||
|
target_link_libraries(${TARGET} PRIVATE llama-batched-bench-impl)
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||||
|
|
||||||
if(LLAMA_TOOLS_INSTALL)
|
if(LLAMA_TOOLS_INSTALL)
|
||||||
|
|||||||
@ -15,7 +15,10 @@ static void print_usage(int, char ** argv) {
|
|||||||
LOG("\n");
|
LOG("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
// satisfies -Wmissing-declarations
|
||||||
|
int llama_batched_bench(int argc, char ** argv);
|
||||||
|
|
||||||
|
int llama_batched_bench(int argc, char ** argv) {
|
||||||
std::setlocale(LC_NUMERIC, "C");
|
std::setlocale(LC_NUMERIC, "C");
|
||||||
|
|
||||||
common_params params;
|
common_params params;
|
||||||
|
|||||||
5
tools/batched-bench/main.cpp
Normal file
5
tools/batched-bench/main.cpp
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
int llama_batched_bench(int argc, char ** argv);
|
||||||
|
|
||||||
|
int main(int argc, char ** argv) {
|
||||||
|
return llama_batched_bench(argc, argv);
|
||||||
|
}
|
||||||
@ -1,6 +1,18 @@
|
|||||||
|
# llama-fit-params-impl: fit-params logic, reusable by app
|
||||||
|
|
||||||
|
set(TARGET llama-fit-params-impl)
|
||||||
|
|
||||||
|
add_library(${TARGET} STATIC fit-params.cpp)
|
||||||
|
|
||||||
|
target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
|
||||||
|
# llama-fit-params executable
|
||||||
|
|
||||||
set(TARGET llama-fit-params)
|
set(TARGET llama-fit-params)
|
||||||
add_executable(${TARGET} fit-params.cpp)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT})
|
add_executable(${TARGET} main.cpp)
|
||||||
|
target_link_libraries(${TARGET} PRIVATE llama-fit-params-impl)
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||||
|
|
||||||
if(LLAMA_TOOLS_INSTALL)
|
if(LLAMA_TOOLS_INSTALL)
|
||||||
|
|||||||
@ -12,7 +12,10 @@
|
|||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
// satisfies -Wmissing-declarations
|
||||||
|
int llama_fit_params(int argc, char ** argv);
|
||||||
|
|
||||||
|
int llama_fit_params(int argc, char ** argv) {
|
||||||
common_params params;
|
common_params params;
|
||||||
|
|
||||||
common_init();
|
common_init();
|
||||||
|
|||||||
5
tools/fit-params/main.cpp
Normal file
5
tools/fit-params/main.cpp
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
int llama_fit_params(int argc, char ** argv);
|
||||||
|
|
||||||
|
int main(int argc, char ** argv) {
|
||||||
|
return llama_fit_params(argc, argv);
|
||||||
|
}
|
||||||
@ -1,6 +1,18 @@
|
|||||||
|
# llama-perplexity-impl: perplexity logic, reusable by app
|
||||||
|
|
||||||
|
set(TARGET llama-perplexity-impl)
|
||||||
|
|
||||||
|
add_library(${TARGET} STATIC perplexity.cpp)
|
||||||
|
|
||||||
|
target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
|
||||||
|
# llama-perplexity executable
|
||||||
|
|
||||||
set(TARGET llama-perplexity)
|
set(TARGET llama-perplexity)
|
||||||
add_executable(${TARGET} perplexity.cpp)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT})
|
add_executable(${TARGET} main.cpp)
|
||||||
|
target_link_libraries(${TARGET} PRIVATE llama-perplexity-impl)
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||||
|
|
||||||
if(LLAMA_TOOLS_INSTALL)
|
if(LLAMA_TOOLS_INSTALL)
|
||||||
|
|||||||
5
tools/perplexity/main.cpp
Normal file
5
tools/perplexity/main.cpp
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
int llama_perplexity(int argc, char ** argv);
|
||||||
|
|
||||||
|
int main(int argc, char ** argv) {
|
||||||
|
return llama_perplexity(argc, argv);
|
||||||
|
}
|
||||||
@ -2005,7 +2005,10 @@ static void kl_divergence(llama_context * ctx, const common_params & params) {
|
|||||||
LOG("Same top p: %6.3lf ± %5.3lf %%\n", 100.0*same_top_p, 100.0*sqrt(same_top_p*(1.0 - same_top_p)/(kld.count - 1)));
|
LOG("Same top p: %6.3lf ± %5.3lf %%\n", 100.0*same_top_p, 100.0*sqrt(same_top_p*(1.0 - same_top_p)/(kld.count - 1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
// satisfies -Wmissing-declarations
|
||||||
|
int llama_perplexity(int argc, char ** argv);
|
||||||
|
|
||||||
|
int llama_perplexity(int argc, char ** argv) {
|
||||||
std::setlocale(LC_NUMERIC, "C");
|
std::setlocale(LC_NUMERIC, "C");
|
||||||
|
|
||||||
common_params params;
|
common_params params;
|
||||||
|
|||||||
@ -1,7 +1,18 @@
|
|||||||
|
# llama-quantize-impl: quantize logic, reusable by app
|
||||||
|
|
||||||
|
set(TARGET llama-quantize-impl)
|
||||||
|
|
||||||
|
add_library(${TARGET} STATIC quantize.cpp)
|
||||||
|
|
||||||
|
target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
|
||||||
|
# llama-quantize executable
|
||||||
|
|
||||||
set(TARGET llama-quantize)
|
set(TARGET llama-quantize)
|
||||||
add_executable(${TARGET} quantize.cpp)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT})
|
add_executable(${TARGET} main.cpp)
|
||||||
target_include_directories(${TARGET} PRIVATE ../../common)
|
target_link_libraries(${TARGET} PRIVATE llama-quantize-impl)
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||||
|
|
||||||
if(LLAMA_TOOLS_INSTALL)
|
if(LLAMA_TOOLS_INSTALL)
|
||||||
|
|||||||
5
tools/quantize/main.cpp
Normal file
5
tools/quantize/main.cpp
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
int llama_quantize(int argc, char ** argv);
|
||||||
|
|
||||||
|
int main(int argc, char ** argv) {
|
||||||
|
return llama_quantize(argc, argv);
|
||||||
|
}
|
||||||
@ -490,7 +490,10 @@ static bool parse_layer_prune(const char * data, std::vector<int> & prune_layers
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
// satisfies -Wmissing-declarations
|
||||||
|
int llama_quantize(int argc, char ** argv);
|
||||||
|
|
||||||
|
int llama_quantize(int argc, char ** argv) {
|
||||||
std::setlocale(LC_NUMERIC, "C");
|
std::setlocale(LC_NUMERIC, "C");
|
||||||
if (argc < 3) {
|
if (argc < 3) {
|
||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user