Added gemma4 to spark
This commit is contained in:
parent
cb2346e432
commit
12d2a7c573
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
vllm_env.sh
|
||||||
23
create_new_vllm_image.sh
Executable file
23
create_new_vllm_image.sh
Executable file
@ -0,0 +1,23 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
export BASE_IMAGE="vllm/vllm-openai:v0.20.1-cu129"
|
||||||
|
export DATE=$(date +%Y-%m-%d)
|
||||||
|
export IMG_PRE="vllm-dw"
|
||||||
|
export IMG_NAME="$IMG_PRE:$DATE"
|
||||||
|
|
||||||
|
cat > Dockerfile << EOF
|
||||||
|
FROM $BASE_IMAGE
|
||||||
|
RUN groupadd -g 1000 vllm && useradd -u 1000 -g 1000 -m -s /bin/bash vllm
|
||||||
|
WORKDIR /home/vllm
|
||||||
|
USER vllm
|
||||||
|
EOF
|
||||||
|
|
||||||
|
docker build -t $IMG_NAME .
|
||||||
|
|
||||||
|
#Updated env for other scripts
|
||||||
|
cat > vllm_env.sh << EOF
|
||||||
|
#!/bin/bash
|
||||||
|
#updated from create_new_image_vllm.sh
|
||||||
|
export VLLM_IMAGE=$IMG_NAME
|
||||||
|
EOF
|
||||||
|
|
||||||
|
rm Dockerfile
|
||||||
24
spark/gemma-4-26B-A4B-mxfp4.sh
Executable file
24
spark/gemma-4-26B-A4B-mxfp4.sh
Executable file
@ -0,0 +1,24 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#grab container image name
|
||||||
|
MODEL_NAME="Gemma-4-26B-A4B-it-mxfp4"
|
||||||
|
CONT_NAME="llama-$MODEL_NAME"
|
||||||
|
MODEL_FOLDER="/home/jared/.cache/huggingface/hub/models--unsloth--gemma-4-26B-A4B-it-GGUF"
|
||||||
|
MODEL="snapshots/3365c68df1a83799b846d05324ebfadbb8cc70b3/gemma-4-26B-A4B-it-MXFP4_MOE.gguf"
|
||||||
|
MODEL_MM="snapshots/3365c68df1a83799b846d05324ebfadbb8cc70b3/mmproj-F16.gguf"
|
||||||
|
|
||||||
|
source ../env.sh
|
||||||
|
echo $IK_IMAGkE
|
||||||
|
docker container create --name $CONT_NAME --network llms -p 10987:10987 -v $MODEL_FOLDER:/model --user $(id -u):$(id -g) --gpus=all --restart on-failure:3 \
|
||||||
|
--health-cmd "curl -f http://$CONT_NAME:10987/v1/models || exit 1" \
|
||||||
|
--health-interval 5s \
|
||||||
|
--health-timeout 5s \
|
||||||
|
--health-start-period 20s \
|
||||||
|
$IK_IMAGE \
|
||||||
|
/llama.cpp/build/bin/llama-server \
|
||||||
|
--model /model/$MODEL \
|
||||||
|
--mm /model/$MODEL_MM \
|
||||||
|
--alias $MODEL_NAME \
|
||||||
|
-ctk q8_0 -ctv q8_0 \
|
||||||
|
--reasoning-budget 0 \
|
||||||
|
--host ik-$MODEL_NAME \
|
||||||
|
--port 10987 --no-mmap -dio
|
||||||
Loading…
x
Reference in New Issue
Block a user