mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
mtmd, arg: fix utf8 handling on windows (#24779)
* mtmd, arg: fix utf8 handling on windows * also fix ggml_fopen * fix build fail * also fix CLI
This commit is contained in:
parent
175147e8f6
commit
e475fa2b5f
@ -17,6 +17,7 @@
|
|||||||
# define NOMINMAX
|
# define NOMINMAX
|
||||||
#endif
|
#endif
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
|
#include <shellapi.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define JSON_ASSERT GGML_ASSERT
|
#define JSON_ASSERT GGML_ASSERT
|
||||||
@ -893,7 +894,44 @@ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map<com
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
struct utf8_argv {
|
||||||
|
std::vector<std::string> buf;
|
||||||
|
std::vector<char*> ptrs;
|
||||||
|
};
|
||||||
|
|
||||||
|
static utf8_argv make_utf8_argv() {
|
||||||
|
utf8_argv out;
|
||||||
|
int wargc = 0;
|
||||||
|
LPWSTR* wargv = CommandLineToArgvW(GetCommandLineW(), &wargc);
|
||||||
|
if (!wargv) return out;
|
||||||
|
|
||||||
|
out.buf.reserve(wargc);
|
||||||
|
for (int i = 0; i < wargc; ++i) {
|
||||||
|
int n = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, wargv[i], -1, nullptr, 0, nullptr, nullptr);
|
||||||
|
if (n <= 0) { out.buf.emplace_back(); continue; }
|
||||||
|
auto& s = out.buf.emplace_back();
|
||||||
|
s.resize(static_cast<size_t>(n - 1));
|
||||||
|
(void)WideCharToMultiByte(CP_UTF8, 0, wargv[i], -1, s.data(), n, nullptr, nullptr);
|
||||||
|
}
|
||||||
|
LocalFree(wargv);
|
||||||
|
|
||||||
|
out.ptrs.reserve(out.buf.size() + 1);
|
||||||
|
for (auto& s : out.buf) out.ptrs.push_back(s.data());
|
||||||
|
out.ptrs.push_back(nullptr);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
|
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
|
||||||
|
#ifdef _WIN32
|
||||||
|
auto utf8 = make_utf8_argv();
|
||||||
|
if (!utf8.ptrs.empty()) {
|
||||||
|
argc = static_cast<int>(utf8.buf.size());
|
||||||
|
argv = utf8.ptrs.data();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
auto ctx_arg = common_params_parser_init(params, ex, print_usage);
|
auto ctx_arg = common_params_parser_init(params, ex, print_usage);
|
||||||
const common_params params_org = ctx_arg.params; // the example can modify the default params
|
const common_params params_org = ctx_arg.params; // the example can modify the default params
|
||||||
|
|
||||||
|
|||||||
@ -1074,6 +1074,18 @@ std::vector<common_file_info> fs_list(const std::string & path, bool include_dir
|
|||||||
return files;
|
return files;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::ifstream fs_open_ifstream(const std::string & fname, std::ios_base::openmode mode) {
|
||||||
|
#ifdef _WIN32
|
||||||
|
int wlen = MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, NULL, 0);
|
||||||
|
if (!wlen) { return std::ifstream(); }
|
||||||
|
std::vector<wchar_t> wfname(wlen);
|
||||||
|
(void)MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, wfname.data(), wlen);
|
||||||
|
return std::ifstream(wfname.data(), mode);
|
||||||
|
#else
|
||||||
|
return std::ifstream(fname, mode);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// TTY utils
|
// TTY utils
|
||||||
//
|
//
|
||||||
|
|||||||
@ -842,6 +842,9 @@ struct common_file_info {
|
|||||||
};
|
};
|
||||||
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
|
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
|
||||||
|
|
||||||
|
// fs open, also handle UTF8 on Windows
|
||||||
|
std::ifstream fs_open_ifstream(const std::string & fname, std::ios_base::openmode mode);
|
||||||
|
|
||||||
//
|
//
|
||||||
// TTY utils
|
// TTY utils
|
||||||
//
|
//
|
||||||
|
|||||||
@ -600,18 +600,15 @@ FILE * ggml_fopen(const char * fname, const char * mode) {
|
|||||||
// convert fname (UTF-8)
|
// convert fname (UTF-8)
|
||||||
wchar_t * wfname = ggml_mbstowcs(fname);
|
wchar_t * wfname = ggml_mbstowcs(fname);
|
||||||
if (wfname) {
|
if (wfname) {
|
||||||
// convert mode (ANSI)
|
// convert mode (UTF-8)
|
||||||
wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
|
wchar_t * wmode = ggml_mbstowcs(mode);
|
||||||
wchar_t * wmode_p = wmode;
|
if (wmode) {
|
||||||
do {
|
// open file
|
||||||
*wmode_p++ = (wchar_t)*mode;
|
file = _wfopen(wfname, wmode);
|
||||||
} while (*mode++);
|
GGML_FREE(wmode);
|
||||||
|
}
|
||||||
// open file
|
|
||||||
file = _wfopen(wfname, wmode);
|
|
||||||
|
|
||||||
GGML_FREE(wfname);
|
GGML_FREE(wfname);
|
||||||
GGML_FREE(wmode);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return file;
|
return file;
|
||||||
|
|||||||
@ -202,7 +202,7 @@ struct cli_context {
|
|||||||
|
|
||||||
// TODO: support remote files in the future (http, https, etc)
|
// TODO: support remote files in the future (http, https, etc)
|
||||||
std::string load_input_file(const std::string & fname, bool is_media) {
|
std::string load_input_file(const std::string & fname, bool is_media) {
|
||||||
std::ifstream file(fname, std::ios::binary);
|
std::ifstream file = fs_open_ifstream(fname, std::ios::binary);
|
||||||
if (!file) {
|
if (!file) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|||||||
@ -13,6 +13,14 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#ifndef NOMINMAX
|
||||||
|
#define NOMINMAX
|
||||||
|
#endif
|
||||||
|
#include <windows.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
// Internal header for clip.cpp
|
// Internal header for clip.cpp
|
||||||
|
|
||||||
@ -661,6 +669,22 @@ struct clip_image_f32_batch {
|
|||||||
// common utils
|
// common utils
|
||||||
//
|
//
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
static std::ifstream open_ifstream_binary(const std::string & fname) {
|
||||||
|
int wlen = MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, NULL, 0);
|
||||||
|
if (!wlen) {
|
||||||
|
throw std::runtime_error("failed to convert filename to UTF-16: " + fname);
|
||||||
|
}
|
||||||
|
std::vector<wchar_t> wfname(wlen);
|
||||||
|
(void)MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, wfname.data(), wlen);
|
||||||
|
return std::ifstream(wfname.data(), std::ios::binary);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static std::ifstream open_ifstream_binary(const std::string & fname) {
|
||||||
|
return std::ifstream(fname, std::ios::binary);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static std::string string_format(const char * fmt, ...) {
|
static std::string string_format(const char * fmt, ...) {
|
||||||
va_list ap;
|
va_list ap;
|
||||||
va_list ap2;
|
va_list ap2;
|
||||||
|
|||||||
@ -1752,7 +1752,7 @@ struct clip_model_loader {
|
|||||||
std::map<std::string, size_t> tensor_offset;
|
std::map<std::string, size_t> tensor_offset;
|
||||||
std::vector<ggml_tensor *> tensors_to_load;
|
std::vector<ggml_tensor *> tensors_to_load;
|
||||||
|
|
||||||
auto fin = std::ifstream(fname, std::ios::binary);
|
auto fin = open_ifstream_binary(fname);
|
||||||
if (!fin) {
|
if (!fin) {
|
||||||
throw std::runtime_error(string_format("%s: failed to open %s\n", __func__, fname.c_str()));
|
throw std::runtime_error(string_format("%s: failed to open %s\n", __func__, fname.c_str()));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -396,6 +396,9 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
int n_predict = params.n_predict < 0 ? INT_MAX : params.n_predict;
|
int n_predict = params.n_predict < 0 ? INT_MAX : params.n_predict;
|
||||||
|
|
||||||
|
console::init(params.simple_io, params.use_color);
|
||||||
|
atexit([]() { console::cleanup(); });
|
||||||
|
|
||||||
// Ctrl+C handling
|
// Ctrl+C handling
|
||||||
{
|
{
|
||||||
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
||||||
|
|||||||
@ -582,13 +582,29 @@ mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname, bool placeholder) {
|
mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname, bool placeholder) {
|
||||||
std::vector<unsigned char> buf;
|
#ifdef _WIN32
|
||||||
|
int wlen = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0);
|
||||||
|
if (!wlen) {
|
||||||
|
LOG_ERR("Unable to convert filename to UTF-16: %s\n", fname);
|
||||||
|
return {nullptr, nullptr};
|
||||||
|
}
|
||||||
|
std::vector<wchar_t> wfname(wlen);
|
||||||
|
wlen = MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wlen);
|
||||||
|
if (!wlen) {
|
||||||
|
LOG_ERR("Unable to convert filename to UTF-16: %s\n", fname);
|
||||||
|
return {nullptr, nullptr};
|
||||||
|
}
|
||||||
|
FILE * f = _wfopen(wfname.data(), L"rb");
|
||||||
|
#else
|
||||||
FILE * f = fopen(fname, "rb");
|
FILE * f = fopen(fname, "rb");
|
||||||
|
#endif
|
||||||
if (!f) {
|
if (!f) {
|
||||||
LOG_ERR("Unable to open file %s: %s\n", fname, strerror(errno));
|
LOG_ERR("Unable to open file %s: %s\n", fname, strerror(errno));
|
||||||
return {nullptr, nullptr};
|
return {nullptr, nullptr};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<unsigned char> buf;
|
||||||
|
|
||||||
fseek(f, 0, SEEK_END);
|
fseek(f, 0, SEEK_END);
|
||||||
long file_size = ftell(f);
|
long file_size = ftell(f);
|
||||||
fseek(f, 0, SEEK_SET);
|
fseek(f, 0, SEEK_SET);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user