mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
allow user to use THP for host allocations with GGML_CUDA_HOST_MALLOC_THP (#2010)
* allow user to use THP for host allocations with GGML_CUDA_HOST_MALLOC_THP * Remove useless symbol check
This commit is contained in:
parent
2d3ecd5e19
commit
7ccf1d2095
@ -1424,17 +1424,16 @@ static void * ggml_cuda_host_malloc(size_t size) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Whether to request the kernel to attempt to defragment memory to back the region with 2M hugepages.
|
// Whether to request the kernel to attempt to defragment memory to back the region with 2M hugepages.
|
||||||
// Otherwise dependent on kernel settings:
|
// Otherwise dependent on kernel settings:
|
||||||
// * enabled="always": Hand over whatever 2M pages it has on hand and the rest will be 4k
|
// * enabled="always": Hand over whatever 2M pages it has on hand and the rest will be 4k
|
||||||
// * enabled="madvise": 4k pages
|
// * enabled="madvise": 4k pages
|
||||||
// * enabled="never": 4k pages
|
// * enabled="never": 4k pages
|
||||||
// Potluck on performance. If there's not much defragmentation to do, then you win. Otherwise come back in an hour.
|
// Potluck on performance. If there's not much defragmentation to do, then you win. Otherwise come back in an hour.
|
||||||
#if 0
|
// Defaults to disabled unless GGML_CUDA_HOST_MALLOC_THP is set.
|
||||||
#ifdef MADV_HUGEPAGE
|
if (getenv("GGML_CUDA_HOST_MALLOC_THP") != nullptr) {
|
||||||
madvise(ptr, size, MADV_HUGEPAGE);
|
madvise(ptr, size, MADV_HUGEPAGE);
|
||||||
#endif
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
// prefault the whole region. If the kernel knows how to do this then let it do so.
|
// prefault the whole region. If the kernel knows how to do this then let it do so.
|
||||||
// Might be worth spawning threads to speed up this process on huge allocations.
|
// Might be worth spawning threads to speed up this process on huge allocations.
|
||||||
@ -1442,8 +1441,7 @@ static void * ggml_cuda_host_malloc(size_t size) {
|
|||||||
#ifdef MADV_POPULATE_WRITE
|
#ifdef MADV_POPULATE_WRITE
|
||||||
needs_manual_prefault = madvise(ptr, size, MADV_POPULATE_WRITE);
|
needs_manual_prefault = madvise(ptr, size, MADV_POPULATE_WRITE);
|
||||||
#endif
|
#endif
|
||||||
if (needs_manual_prefault)
|
if (needs_manual_prefault) {
|
||||||
{
|
|
||||||
char * p = (char *) ptr;
|
char * p = (char *) ptr;
|
||||||
for (size_t off = 0; off < size; off += 4096) {
|
for (size_t off = 0; off < size; off += 4096) {
|
||||||
p[off] = 0;
|
p[off] = 0;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user