mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
mtmd, arg: fix utf8 handling on windows (#24779)
* mtmd, arg: fix utf8 handling on windows * also fix ggml_fopen * fix build fail * also fix CLI
This commit is contained in:
parent
175147e8f6
commit
e475fa2b5f
@ -17,6 +17,7 @@
|
||||
# define NOMINMAX
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#include <shellapi.h>
|
||||
#endif
|
||||
|
||||
#define JSON_ASSERT GGML_ASSERT
|
||||
@ -893,7 +894,44 @@ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map<com
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
struct utf8_argv {
|
||||
std::vector<std::string> buf;
|
||||
std::vector<char*> ptrs;
|
||||
};
|
||||
|
||||
static utf8_argv make_utf8_argv() {
|
||||
utf8_argv out;
|
||||
int wargc = 0;
|
||||
LPWSTR* wargv = CommandLineToArgvW(GetCommandLineW(), &wargc);
|
||||
if (!wargv) return out;
|
||||
|
||||
out.buf.reserve(wargc);
|
||||
for (int i = 0; i < wargc; ++i) {
|
||||
int n = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, wargv[i], -1, nullptr, 0, nullptr, nullptr);
|
||||
if (n <= 0) { out.buf.emplace_back(); continue; }
|
||||
auto& s = out.buf.emplace_back();
|
||||
s.resize(static_cast<size_t>(n - 1));
|
||||
(void)WideCharToMultiByte(CP_UTF8, 0, wargv[i], -1, s.data(), n, nullptr, nullptr);
|
||||
}
|
||||
LocalFree(wargv);
|
||||
|
||||
out.ptrs.reserve(out.buf.size() + 1);
|
||||
for (auto& s : out.buf) out.ptrs.push_back(s.data());
|
||||
out.ptrs.push_back(nullptr);
|
||||
return out;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
|
||||
#ifdef _WIN32
|
||||
auto utf8 = make_utf8_argv();
|
||||
if (!utf8.ptrs.empty()) {
|
||||
argc = static_cast<int>(utf8.buf.size());
|
||||
argv = utf8.ptrs.data();
|
||||
}
|
||||
#endif
|
||||
|
||||
auto ctx_arg = common_params_parser_init(params, ex, print_usage);
|
||||
const common_params params_org = ctx_arg.params; // the example can modify the default params
|
||||
|
||||
|
||||
@ -1074,6 +1074,18 @@ std::vector<common_file_info> fs_list(const std::string & path, bool include_dir
|
||||
return files;
|
||||
}
|
||||
|
||||
std::ifstream fs_open_ifstream(const std::string & fname, std::ios_base::openmode mode) {
|
||||
#ifdef _WIN32
|
||||
int wlen = MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, NULL, 0);
|
||||
if (!wlen) { return std::ifstream(); }
|
||||
std::vector<wchar_t> wfname(wlen);
|
||||
(void)MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, wfname.data(), wlen);
|
||||
return std::ifstream(wfname.data(), mode);
|
||||
#else
|
||||
return std::ifstream(fname, mode);
|
||||
#endif
|
||||
}
|
||||
|
||||
//
|
||||
// TTY utils
|
||||
//
|
||||
|
||||
@ -842,6 +842,9 @@ struct common_file_info {
|
||||
};
|
||||
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
|
||||
|
||||
// fs open, also handle UTF8 on Windows
|
||||
std::ifstream fs_open_ifstream(const std::string & fname, std::ios_base::openmode mode);
|
||||
|
||||
//
|
||||
// TTY utils
|
||||
//
|
||||
|
||||
@ -600,18 +600,15 @@ FILE * ggml_fopen(const char * fname, const char * mode) {
|
||||
// convert fname (UTF-8)
|
||||
wchar_t * wfname = ggml_mbstowcs(fname);
|
||||
if (wfname) {
|
||||
// convert mode (ANSI)
|
||||
wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
|
||||
wchar_t * wmode_p = wmode;
|
||||
do {
|
||||
*wmode_p++ = (wchar_t)*mode;
|
||||
} while (*mode++);
|
||||
|
||||
// open file
|
||||
file = _wfopen(wfname, wmode);
|
||||
// convert mode (UTF-8)
|
||||
wchar_t * wmode = ggml_mbstowcs(mode);
|
||||
if (wmode) {
|
||||
// open file
|
||||
file = _wfopen(wfname, wmode);
|
||||
GGML_FREE(wmode);
|
||||
}
|
||||
|
||||
GGML_FREE(wfname);
|
||||
GGML_FREE(wmode);
|
||||
}
|
||||
|
||||
return file;
|
||||
|
||||
@ -202,7 +202,7 @@ struct cli_context {
|
||||
|
||||
// TODO: support remote files in the future (http, https, etc)
|
||||
std::string load_input_file(const std::string & fname, bool is_media) {
|
||||
std::ifstream file(fname, std::ios::binary);
|
||||
std::ifstream file = fs_open_ifstream(fname, std::ios::binary);
|
||||
if (!file) {
|
||||
return "";
|
||||
}
|
||||
|
||||
@ -13,6 +13,14 @@
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <fstream>
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
// Internal header for clip.cpp
|
||||
|
||||
@ -661,6 +669,22 @@ struct clip_image_f32_batch {
|
||||
// common utils
|
||||
//
|
||||
|
||||
#ifdef _WIN32
|
||||
static std::ifstream open_ifstream_binary(const std::string & fname) {
|
||||
int wlen = MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, NULL, 0);
|
||||
if (!wlen) {
|
||||
throw std::runtime_error("failed to convert filename to UTF-16: " + fname);
|
||||
}
|
||||
std::vector<wchar_t> wfname(wlen);
|
||||
(void)MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, wfname.data(), wlen);
|
||||
return std::ifstream(wfname.data(), std::ios::binary);
|
||||
}
|
||||
#else
|
||||
static std::ifstream open_ifstream_binary(const std::string & fname) {
|
||||
return std::ifstream(fname, std::ios::binary);
|
||||
}
|
||||
#endif
|
||||
|
||||
static std::string string_format(const char * fmt, ...) {
|
||||
va_list ap;
|
||||
va_list ap2;
|
||||
|
||||
@ -1752,7 +1752,7 @@ struct clip_model_loader {
|
||||
std::map<std::string, size_t> tensor_offset;
|
||||
std::vector<ggml_tensor *> tensors_to_load;
|
||||
|
||||
auto fin = std::ifstream(fname, std::ios::binary);
|
||||
auto fin = open_ifstream_binary(fname);
|
||||
if (!fin) {
|
||||
throw std::runtime_error(string_format("%s: failed to open %s\n", __func__, fname.c_str()));
|
||||
}
|
||||
|
||||
@ -396,6 +396,9 @@ int main(int argc, char ** argv) {
|
||||
|
||||
int n_predict = params.n_predict < 0 ? INT_MAX : params.n_predict;
|
||||
|
||||
console::init(params.simple_io, params.use_color);
|
||||
atexit([]() { console::cleanup(); });
|
||||
|
||||
// Ctrl+C handling
|
||||
{
|
||||
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
||||
|
||||
@ -582,13 +582,29 @@ mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx,
|
||||
}
|
||||
|
||||
mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname, bool placeholder) {
|
||||
std::vector<unsigned char> buf;
|
||||
#ifdef _WIN32
|
||||
int wlen = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0);
|
||||
if (!wlen) {
|
||||
LOG_ERR("Unable to convert filename to UTF-16: %s\n", fname);
|
||||
return {nullptr, nullptr};
|
||||
}
|
||||
std::vector<wchar_t> wfname(wlen);
|
||||
wlen = MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wlen);
|
||||
if (!wlen) {
|
||||
LOG_ERR("Unable to convert filename to UTF-16: %s\n", fname);
|
||||
return {nullptr, nullptr};
|
||||
}
|
||||
FILE * f = _wfopen(wfname.data(), L"rb");
|
||||
#else
|
||||
FILE * f = fopen(fname, "rb");
|
||||
#endif
|
||||
if (!f) {
|
||||
LOG_ERR("Unable to open file %s: %s\n", fname, strerror(errno));
|
||||
return {nullptr, nullptr};
|
||||
}
|
||||
|
||||
std::vector<unsigned char> buf;
|
||||
|
||||
fseek(f, 0, SEEK_END);
|
||||
long file_size = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user