app : introduce the llama unified executable (#23296)

* app : introduce the llama unified executable

Signed-off-by: Adrien Gallouët <angt@huggingface.co>

* Use serve for server

Signed-off-by: Adrien Gallouët <angt@huggingface.co>

* Hide completion and bench, add help command

Signed-off-by: Adrien Gallouët <angt@huggingface.co>

* Remove STATIC

Signed-off-by: Adrien Gallouët <angt@huggingface.co>

* Use -impl targets instead of -lib

Signed-off-by: Adrien Gallouët <angt@huggingface.co>

* Revert "Remove STATIC"

This reverts commit cc44caccb9902b34a3531633edac911e5b3d65cd.

---------

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
Adrien Gallouët 2026-05-20 13:22:22 +02:00 committed by GitHub
parent e6b4acfe86
commit 29f1482221
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 184 additions and 30 deletions

View File

@ -104,12 +104,13 @@ option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE})
# extra artifacts
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_TOOLS "llama: build tools" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_UI "llama: build the embedded Web UI for server" ON)
option(LLAMA_USE_PREBUILT_UI "llama: use prebuilt UI from HF Bucket when available (requires LLAMA_BUILD_UI=ON)" ON)
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_TOOLS "llama: build tools" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_APP "llama: build the unified binary" OFF)
option(LLAMA_BUILD_UI "llama: build the embedded Web UI for server" ON)
option(LLAMA_USE_PREBUILT_UI "llama: use prebuilt UI from HF Bucket when available (requires LLAMA_BUILD_UI=ON)" ON)
# Backward compat: when old var is set but new one isn't, forward the value
if(DEFINED LLAMA_BUILD_WEBUI)
@ -120,8 +121,9 @@ if(DEFINED LLAMA_USE_PREBUILT_WEBUI)
set(LLAMA_USE_PREBUILT_UI ${LLAMA_USE_PREBUILT_WEBUI})
message(DEPRECATION "LLAMA_USE_PREBUILT_WEBUI is deprecated, use LLAMA_USE_PREBUILT_UI instead")
endif()
option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_DEFAULT})
option(LLAMA_TESTS_INSTALL "llama: install tests" ON)
option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_DEFAULT})
option(LLAMA_TESTS_INSTALL "llama: install tests" ON)
# 3rd party libs
option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" ON)
@ -226,6 +228,10 @@ if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TOOLS)
add_subdirectory(tools)
endif()
if (LLAMA_BUILD_APP)
add_subdirectory(app)
endif()
# Automatically add all files from the 'licenses' directory
file(GLOB EXTRA_LICENSES "${CMAKE_SOURCE_DIR}/licenses/LICENSE-*")

11
app/CMakeLists.txt Normal file
View File

@ -0,0 +1,11 @@
set(TARGET llama-app)
add_executable(${TARGET} llama.cpp)
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama)
target_link_libraries(${TARGET} PRIVATE llama-server-impl llama-cli-impl llama-completion-impl llama-bench-impl)
target_compile_features(${TARGET} PRIVATE cxx_std_17)
if(LLAMA_TOOLS_INSTALL)
install(TARGETS ${TARGET} RUNTIME)
endif()

67
app/llama.cpp Normal file
View File

@ -0,0 +1,67 @@
#include <cstdio>
#include <string>
#include <vector>
int llama_server(int argc, char ** argv);
int llama_cli(int argc, char ** argv);
// hidden
int llama_completion(int argc, char ** argv);
int llama_bench(int argc, char ** argv);
static int help(int argc, char ** argv);
struct command {
const char * name;
const char * desc;
std::vector<std::string> aliases;
bool hidden;
int (*func)(int, char **);
};
static const command cmds[] = {
{"serve", "HTTP API server", {"server"}, false, llama_server },
{"cli", "Command-line interactive interface", {"client"}, false, llama_cli },
{"completion", "Text completion", {"complete"}, true, llama_completion },
{"bench", "Benchmarking tool", {}, true, llama_bench },
{"help", "Show available commands", {}, true, help },
};
static int help(int argc, char ** argv) {
const bool show_all = argc >= 2 && std::string(argv[1]) == "all";
printf("Usage: llama <command> [options]\n\nAvailable commands:\n");
for (const auto & cmd : cmds) {
if (show_all || !cmd.hidden) {
printf(" %-15s %s\n", cmd.name, cmd.desc);
}
}
printf("\nRun 'llama <command> --help' for command-specific usage.\n");
return 0;
}
static bool matches(const std::string & arg, const command & cmd) {
if (arg == cmd.name) {
return true;
}
for (const auto & alias : cmd.aliases) {
if (arg == alias) {
return true;
}
}
return false;
}
int main(int argc, char ** argv) {
const std::string arg = argc >= 2 ? argv[1] : "help";
for (const auto & cmd : cmds) {
if (matches(arg, cmd)) {
return cmd.func(argc - 1, argv + 1);
}
}
fprintf(stderr, "error: unknown command '%s'\n", arg.c_str());
return 1;
}

View File

@ -1,9 +1,19 @@
set(TARGET llama-cli)
add_executable(${TARGET} cli.cpp)
target_link_libraries(${TARGET} PRIVATE server-context PUBLIC llama-common ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)
# llama-cli-impl: CLI logic, reusable by app
include_directories(../server)
set(TARGET llama-cli-impl)
add_library(${TARGET} STATIC cli.cpp)
target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ../server)
target_link_libraries(${TARGET} PUBLIC server-context llama-common ${CMAKE_THREAD_LIBS_INIT})
# llama-cli executable
set(TARGET llama-cli)
add_executable(${TARGET} main.cpp)
target_link_libraries(${TARGET} PRIVATE llama-cli-impl)
target_compile_features(${TARGET} PRIVATE cxx_std_17)
if(LLAMA_TOOLS_INSTALL)
install(TARGETS ${TARGET} RUNTIME)

View File

@ -342,7 +342,10 @@ static std::vector<std::pair<std::string, size_t>> auto_completion_callback(std:
static constexpr size_t FILE_GLOB_MAX_RESULTS = 100;
int main(int argc, char ** argv) {
// satisfies -Wmissing-declarations
int llama_cli(int argc, char ** argv);
int llama_cli(int argc, char ** argv) {
common_params params;
params.verbosity = LOG_LEVEL_ERROR; // by default, less verbose logs

5
tools/cli/main.cpp Normal file
View File

@ -0,0 +1,5 @@
int llama_cli(int argc, char ** argv);
int main(int argc, char ** argv) {
return llama_cli(argc, argv);
}

View File

@ -1,6 +1,18 @@
# llama-completion-impl: completion logic, reusable by app
set(TARGET llama-completion-impl)
add_library(${TARGET} STATIC completion.cpp)
target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT})
# llama-completion executable
set(TARGET llama-completion)
add_executable(${TARGET} completion.cpp)
target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT})
add_executable(${TARGET} main.cpp)
target_link_libraries(${TARGET} PRIVATE llama-completion-impl)
target_compile_features(${TARGET} PRIVATE cxx_std_17)
if(LLAMA_TOOLS_INSTALL)

View File

@ -84,7 +84,10 @@ static void sigint_handler(int signo) {
}
#endif
int main(int argc, char ** argv) {
// satisfies -Wmissing-declarations
int llama_completion(int argc, char ** argv);
int llama_completion(int argc, char ** argv) {
std::setlocale(LC_NUMERIC, "C");
common_params params;

View File

@ -0,0 +1,5 @@
int llama_completion(int argc, char ** argv);
int main(int argc, char ** argv) {
return llama_completion(argc, argv);
}

View File

@ -1,6 +1,18 @@
# llama-bench-impl: benchmark logic, reusable by app
set(TARGET llama-bench-impl)
add_library(${TARGET} STATIC llama-bench.cpp)
target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT})
# llama-bench executable
set(TARGET llama-bench)
add_executable(${TARGET} llama-bench.cpp)
target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT})
add_executable(${TARGET} main.cpp)
target_link_libraries(${TARGET} PRIVATE llama-bench-impl)
target_compile_features(${TARGET} PRIVATE cxx_std_17)
if(LLAMA_TOOLS_INSTALL)

View File

@ -2136,7 +2136,10 @@ static std::unique_ptr<printer> create_printer(output_formats format) {
GGML_ABORT("fatal error");
}
int main(int argc, char ** argv) {
// satisfies -Wmissing-declarations
int llama_bench(int argc, char ** argv);
int llama_bench(int argc, char ** argv) {
std::setlocale(LC_NUMERIC, "C");
// try to set locale for unicode characters in markdown
std::setlocale(LC_CTYPE, ".UTF-8");

View File

@ -0,0 +1,5 @@
int llama_bench(int argc, char ** argv);
int main(int argc, char ** argv) {
return llama_bench(argc, argv);
}

View File

@ -27,12 +27,11 @@ target_include_directories(${TARGET} PRIVATE ../mtmd)
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
target_link_libraries(${TARGET} PUBLIC llama-common mtmd ${CMAKE_THREAD_LIBS_INIT})
# llama-server-impl: server logic, reusable by app
# llama-server executable
set(TARGET llama-server-impl)
set(TARGET llama-server)
set(TARGET_SRCS
add_library(${TARGET} STATIC
server.cpp
server-http.cpp
server-http.h
@ -40,11 +39,16 @@ set(TARGET_SRCS
server-models.h
)
add_executable(${TARGET} ${TARGET_SRCS})
target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories(${TARGET} PRIVATE ../mtmd ${CMAKE_SOURCE_DIR})
target_link_libraries(${TARGET} PUBLIC server-context llama-ui cpp-httplib ${CMAKE_THREAD_LIBS_INIT})
# llama-server executable
set(TARGET llama-server)
add_executable(${TARGET} main.cpp)
install(TARGETS ${TARGET} RUNTIME)
target_include_directories(${TARGET} PRIVATE ../mtmd)
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
target_link_libraries(${TARGET} PRIVATE server-context llama-ui PUBLIC llama-common cpp-httplib ${CMAKE_THREAD_LIBS_INIT})
target_link_libraries(${TARGET} PRIVATE llama-server-impl)
target_compile_features(${TARGET} PRIVATE cxx_std_17)

5
tools/server/main.cpp Normal file
View File

@ -0,0 +1,5 @@
int llama_server(int argc, char ** argv);
int main(int argc, char ** argv) {
return llama_server(argc, argv);
}

View File

@ -71,7 +71,10 @@ static server_http_context::handler_t ex_wrapper(server_http_context::handler_t
};
}
int main(int argc, char ** argv) {
// satisfies -Wmissing-declarations
int llama_server(int argc, char ** argv);
int llama_server(int argc, char ** argv) {
std::setlocale(LC_NUMERIC, "C");
// own arguments required by this example