diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu
index cbc7618d..51693a09 100644
--- a/ggml/src/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda.cu
@@ -1424,17 +1424,16 @@ static void * ggml_cuda_host_malloc(size_t size) {
         return nullptr;
     }
 
-// Whether to request the kernel to attempt to defragment memory to back the region with 2M hugepages.
-// Otherwise dependent on kernel settings:
-//   * enabled="always":  Hand over whatever 2M pages it has on hand and the rest will be 4k 
-//   * enabled="madvise": 4k pages
-//   * enabled="never":   4k pages
-// Potluck on performance. If there's not much defragmentation to do, then you win. Otherwise come back in an hour.
-#if 0
-#ifdef MADV_HUGEPAGE
-    madvise(ptr, size, MADV_HUGEPAGE);
-#endif
-#endif
+    // Whether to request the kernel to attempt to defragment memory to back the region with 2M hugepages.
+    // Otherwise dependent on kernel settings:
+    //   * enabled="always":  Hand over whatever 2M pages it has on hand and the rest will be 4k 
+    //   * enabled="madvise": 4k pages
+    //   * enabled="never":   4k pages
+    // Potluck on performance. If there's not much defragmentation to do, then you win. Otherwise come back in an hour.
+    // Defaults to disabled unless GGML_CUDA_HOST_MALLOC_THP is set.
+    if (getenv("GGML_CUDA_HOST_MALLOC_THP") != nullptr) {
+        madvise(ptr, size, MADV_HUGEPAGE);
+    }
 
     // prefault the whole region. If the kernel knows how to do this then let it do so.
     // Might be worth spawning threads to speed up this process on huge allocations.
@@ -1442,8 +1441,7 @@ static void * ggml_cuda_host_malloc(size_t size) {
 #ifdef MADV_POPULATE_WRITE
     needs_manual_prefault = madvise(ptr, size, MADV_POPULATE_WRITE);
 #endif
-    if (needs_manual_prefault)
-    {
+    if (needs_manual_prefault) {
         char * p = (char *) ptr;
         for (size_t off = 0; off < size; off += 4096) {
             p[off] = 0;