From 12d2a7c57384997217ca7df25d7c46f00297c41e Mon Sep 17 00:00:00 2001 From: Jared Delony Date: Fri, 8 May 2026 11:03:41 -0500 Subject: [PATCH] Added gemma4 to spark --- .gitignore | 1 + ...Uncensored.sh => Qwen3.6-27B-Uncensored.sh | 0 create_new_vllm_image.sh | 23 ++++++++++++++++++ spark/gemma-4-26B-A4B-mxfp4.sh | 24 +++++++++++++++++++ 4 files changed, 48 insertions(+) create mode 100644 .gitignore rename Qwen3.6=27B-Uncensored.sh => Qwen3.6-27B-Uncensored.sh (100%) create mode 100755 create_new_vllm_image.sh create mode 100755 spark/gemma-4-26B-A4B-mxfp4.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7e47654 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +vllm_env.sh diff --git a/Qwen3.6=27B-Uncensored.sh b/Qwen3.6-27B-Uncensored.sh similarity index 100% rename from Qwen3.6=27B-Uncensored.sh rename to Qwen3.6-27B-Uncensored.sh diff --git a/create_new_vllm_image.sh b/create_new_vllm_image.sh new file mode 100755 index 0000000..6c51700 --- /dev/null +++ b/create_new_vllm_image.sh @@ -0,0 +1,23 @@ +#!/bin/bash +export BASE_IMAGE="vllm/vllm-openai:v0.20.1-cu129" +export DATE=$(date +%Y-%m-%d) +export IMG_PRE="vllm-dw" +export IMG_NAME="$IMG_PRE:$DATE" + +cat > Dockerfile << EOF +FROM $BASE_IMAGE +RUN groupadd -g 1000 vllm && useradd -u 1000 -g 1000 -m -s /bin/bash vllm +WORKDIR /home/vllm +USER vllm +EOF + +docker build -t $IMG_NAME . + +#Updated env for other scripts +cat > vllm_env.sh << EOF +#!/bin/bash +#updated from create_new_image_vllm.sh +export VLLM_IMAGE=$IMG_NAME +EOF + +rm Dockerfile diff --git a/spark/gemma-4-26B-A4B-mxfp4.sh b/spark/gemma-4-26B-A4B-mxfp4.sh new file mode 100755 index 0000000..a9b1695 --- /dev/null +++ b/spark/gemma-4-26B-A4B-mxfp4.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#grab container image name +MODEL_NAME="Gemma-4-26B-A4B-it-mxfp4" +CONT_NAME="llama-$MODEL_NAME" +MODEL_FOLDER="/home/jared/.cache/huggingface/hub/models--unsloth--gemma-4-26B-A4B-it-GGUF" +MODEL="snapshots/3365c68df1a83799b846d05324ebfadbb8cc70b3/gemma-4-26B-A4B-it-MXFP4_MOE.gguf" +MODEL_MM="snapshots/3365c68df1a83799b846d05324ebfadbb8cc70b3/mmproj-F16.gguf" + +source ../env.sh +echo $IK_IMAGkE +docker container create --name $CONT_NAME --network llms -p 10987:10987 -v $MODEL_FOLDER:/model --user $(id -u):$(id -g) --gpus=all --restart on-failure:3 \ + --health-cmd "curl -f http://$CONT_NAME:10987/v1/models || exit 1" \ + --health-interval 5s \ + --health-timeout 5s \ + --health-start-period 20s \ + $IK_IMAGE \ + /llama.cpp/build/bin/llama-server \ + --model /model/$MODEL \ + --mm /model/$MODEL_MM \ + --alias $MODEL_NAME \ + -ctk q8_0 -ctv q8_0 \ + --reasoning-budget 0 \ + --host ik-$MODEL_NAME \ + --port 10987 --no-mmap -dio