Added gemma4 to spark
This commit is contained in:
parent
cb2346e432
commit
12d2a7c573
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
vllm_env.sh
|
||||
23
create_new_vllm_image.sh
Executable file
23
create_new_vllm_image.sh
Executable file
@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
export BASE_IMAGE="vllm/vllm-openai:v0.20.1-cu129"
|
||||
export DATE=$(date +%Y-%m-%d)
|
||||
export IMG_PRE="vllm-dw"
|
||||
export IMG_NAME="$IMG_PRE:$DATE"
|
||||
|
||||
cat > Dockerfile << EOF
|
||||
FROM $BASE_IMAGE
|
||||
RUN groupadd -g 1000 vllm && useradd -u 1000 -g 1000 -m -s /bin/bash vllm
|
||||
WORKDIR /home/vllm
|
||||
USER vllm
|
||||
EOF
|
||||
|
||||
docker build -t $IMG_NAME .
|
||||
|
||||
#Updated env for other scripts
|
||||
cat > vllm_env.sh << EOF
|
||||
#!/bin/bash
|
||||
#updated from create_new_image_vllm.sh
|
||||
export VLLM_IMAGE=$IMG_NAME
|
||||
EOF
|
||||
|
||||
rm Dockerfile
|
||||
24
spark/gemma-4-26B-A4B-mxfp4.sh
Executable file
24
spark/gemma-4-26B-A4B-mxfp4.sh
Executable file
@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
#grab container image name
|
||||
MODEL_NAME="Gemma-4-26B-A4B-it-mxfp4"
|
||||
CONT_NAME="llama-$MODEL_NAME"
|
||||
MODEL_FOLDER="/home/jared/.cache/huggingface/hub/models--unsloth--gemma-4-26B-A4B-it-GGUF"
|
||||
MODEL="snapshots/3365c68df1a83799b846d05324ebfadbb8cc70b3/gemma-4-26B-A4B-it-MXFP4_MOE.gguf"
|
||||
MODEL_MM="snapshots/3365c68df1a83799b846d05324ebfadbb8cc70b3/mmproj-F16.gguf"
|
||||
|
||||
source ../env.sh
|
||||
echo $IK_IMAGkE
|
||||
docker container create --name $CONT_NAME --network llms -p 10987:10987 -v $MODEL_FOLDER:/model --user $(id -u):$(id -g) --gpus=all --restart on-failure:3 \
|
||||
--health-cmd "curl -f http://$CONT_NAME:10987/v1/models || exit 1" \
|
||||
--health-interval 5s \
|
||||
--health-timeout 5s \
|
||||
--health-start-period 20s \
|
||||
$IK_IMAGE \
|
||||
/llama.cpp/build/bin/llama-server \
|
||||
--model /model/$MODEL \
|
||||
--mm /model/$MODEL_MM \
|
||||
--alias $MODEL_NAME \
|
||||
-ctk q8_0 -ctv q8_0 \
|
||||
--reasoning-budget 0 \
|
||||
--host ik-$MODEL_NAME \
|
||||
--port 10987 --no-mmap -dio
|
||||
Loading…
x
Reference in New Issue
Block a user