mtmd, arg: fix utf8 handling on windows (#24779)

* mtmd, arg: fix utf8 handling on windows

* also fix ggml_fopen

* fix build fail

* also fix CLI
This commit is contained in:
Xuan-Son Nguyen 2026-06-19 22:28:38 +02:00 committed by GitHub
parent 175147e8f6
commit e475fa2b5f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 106 additions and 13 deletions

View File

@ -17,6 +17,7 @@
# define NOMINMAX # define NOMINMAX
#endif #endif
#include <windows.h> #include <windows.h>
#include <shellapi.h>
#endif #endif
#define JSON_ASSERT GGML_ASSERT #define JSON_ASSERT GGML_ASSERT
@ -893,7 +894,44 @@ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map<com
return true; return true;
} }
#ifdef _WIN32
struct utf8_argv {
std::vector<std::string> buf;
std::vector<char*> ptrs;
};
static utf8_argv make_utf8_argv() {
utf8_argv out;
int wargc = 0;
LPWSTR* wargv = CommandLineToArgvW(GetCommandLineW(), &wargc);
if (!wargv) return out;
out.buf.reserve(wargc);
for (int i = 0; i < wargc; ++i) {
int n = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, wargv[i], -1, nullptr, 0, nullptr, nullptr);
if (n <= 0) { out.buf.emplace_back(); continue; }
auto& s = out.buf.emplace_back();
s.resize(static_cast<size_t>(n - 1));
(void)WideCharToMultiByte(CP_UTF8, 0, wargv[i], -1, s.data(), n, nullptr, nullptr);
}
LocalFree(wargv);
out.ptrs.reserve(out.buf.size() + 1);
for (auto& s : out.buf) out.ptrs.push_back(s.data());
out.ptrs.push_back(nullptr);
return out;
}
#endif
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) { bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
#ifdef _WIN32
auto utf8 = make_utf8_argv();
if (!utf8.ptrs.empty()) {
argc = static_cast<int>(utf8.buf.size());
argv = utf8.ptrs.data();
}
#endif
auto ctx_arg = common_params_parser_init(params, ex, print_usage); auto ctx_arg = common_params_parser_init(params, ex, print_usage);
const common_params params_org = ctx_arg.params; // the example can modify the default params const common_params params_org = ctx_arg.params; // the example can modify the default params

View File

@ -1074,6 +1074,18 @@ std::vector<common_file_info> fs_list(const std::string & path, bool include_dir
return files; return files;
} }
std::ifstream fs_open_ifstream(const std::string & fname, std::ios_base::openmode mode) {
#ifdef _WIN32
int wlen = MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, NULL, 0);
if (!wlen) { return std::ifstream(); }
std::vector<wchar_t> wfname(wlen);
(void)MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, wfname.data(), wlen);
return std::ifstream(wfname.data(), mode);
#else
return std::ifstream(fname, mode);
#endif
}
// //
// TTY utils // TTY utils
// //

View File

@ -842,6 +842,9 @@ struct common_file_info {
}; };
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories); std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
// fs open, also handle UTF8 on Windows
std::ifstream fs_open_ifstream(const std::string & fname, std::ios_base::openmode mode);
// //
// TTY utils // TTY utils
// //

View File

@ -600,18 +600,15 @@ FILE * ggml_fopen(const char * fname, const char * mode) {
// convert fname (UTF-8) // convert fname (UTF-8)
wchar_t * wfname = ggml_mbstowcs(fname); wchar_t * wfname = ggml_mbstowcs(fname);
if (wfname) { if (wfname) {
// convert mode (ANSI) // convert mode (UTF-8)
wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t)); wchar_t * wmode = ggml_mbstowcs(mode);
wchar_t * wmode_p = wmode; if (wmode) {
do { // open file
*wmode_p++ = (wchar_t)*mode; file = _wfopen(wfname, wmode);
} while (*mode++); GGML_FREE(wmode);
}
// open file
file = _wfopen(wfname, wmode);
GGML_FREE(wfname); GGML_FREE(wfname);
GGML_FREE(wmode);
} }
return file; return file;

View File

@ -202,7 +202,7 @@ struct cli_context {
// TODO: support remote files in the future (http, https, etc) // TODO: support remote files in the future (http, https, etc)
std::string load_input_file(const std::string & fname, bool is_media) { std::string load_input_file(const std::string & fname, bool is_media) {
std::ifstream file(fname, std::ios::binary); std::ifstream file = fs_open_ifstream(fname, std::ios::binary);
if (!file) { if (!file) {
return ""; return "";
} }

View File

@ -13,6 +13,14 @@
#include <sstream> #include <sstream>
#include <vector> #include <vector>
#include <memory> #include <memory>
#include <fstream>
#ifdef _WIN32
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
#endif
// Internal header for clip.cpp // Internal header for clip.cpp
@ -661,6 +669,22 @@ struct clip_image_f32_batch {
// common utils // common utils
// //
#ifdef _WIN32
static std::ifstream open_ifstream_binary(const std::string & fname) {
int wlen = MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, NULL, 0);
if (!wlen) {
throw std::runtime_error("failed to convert filename to UTF-16: " + fname);
}
std::vector<wchar_t> wfname(wlen);
(void)MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, wfname.data(), wlen);
return std::ifstream(wfname.data(), std::ios::binary);
}
#else
static std::ifstream open_ifstream_binary(const std::string & fname) {
return std::ifstream(fname, std::ios::binary);
}
#endif
static std::string string_format(const char * fmt, ...) { static std::string string_format(const char * fmt, ...) {
va_list ap; va_list ap;
va_list ap2; va_list ap2;

View File

@ -1752,7 +1752,7 @@ struct clip_model_loader {
std::map<std::string, size_t> tensor_offset; std::map<std::string, size_t> tensor_offset;
std::vector<ggml_tensor *> tensors_to_load; std::vector<ggml_tensor *> tensors_to_load;
auto fin = std::ifstream(fname, std::ios::binary); auto fin = open_ifstream_binary(fname);
if (!fin) { if (!fin) {
throw std::runtime_error(string_format("%s: failed to open %s\n", __func__, fname.c_str())); throw std::runtime_error(string_format("%s: failed to open %s\n", __func__, fname.c_str()));
} }

View File

@ -396,6 +396,9 @@ int main(int argc, char ** argv) {
int n_predict = params.n_predict < 0 ? INT_MAX : params.n_predict; int n_predict = params.n_predict < 0 ? INT_MAX : params.n_predict;
console::init(params.simple_io, params.use_color);
atexit([]() { console::cleanup(); });
// Ctrl+C handling // Ctrl+C handling
{ {
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))

View File

@ -582,13 +582,29 @@ mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx,
} }
mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname, bool placeholder) { mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname, bool placeholder) {
std::vector<unsigned char> buf; #ifdef _WIN32
int wlen = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0);
if (!wlen) {
LOG_ERR("Unable to convert filename to UTF-16: %s\n", fname);
return {nullptr, nullptr};
}
std::vector<wchar_t> wfname(wlen);
wlen = MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wlen);
if (!wlen) {
LOG_ERR("Unable to convert filename to UTF-16: %s\n", fname);
return {nullptr, nullptr};
}
FILE * f = _wfopen(wfname.data(), L"rb");
#else
FILE * f = fopen(fname, "rb"); FILE * f = fopen(fname, "rb");
#endif
if (!f) { if (!f) {
LOG_ERR("Unable to open file %s: %s\n", fname, strerror(errno)); LOG_ERR("Unable to open file %s: %s\n", fname, strerror(errno));
return {nullptr, nullptr}; return {nullptr, nullptr};
} }
std::vector<unsigned char> buf;
fseek(f, 0, SEEK_END); fseek(f, 0, SEEK_END);
long file_size = ftell(f); long file_size = ftell(f);
fseek(f, 0, SEEK_SET); fseek(f, 0, SEEK_SET);