diff --git a/GLM-4.7-Flash-Uncensored.sh b/GLM-4.7-Flash-Uncensored.sh new file mode 100755 index 0000000..23586bc --- /dev/null +++ b/GLM-4.7-Flash-Uncensored.sh @@ -0,0 +1,34 @@ +#!/bin/bash +#grab container image name +MODEL_NAME="GLM-4.7-Flash-Uncen-Hrt" +CONT_NAME="LLM-$MODEL_NAME" + +MODEL_FOLDER="/home/jared/.cache" +MODEL="huggingface/hub/models--DavidAU--GLM-4.7-Flash-Uncensored-Heretic-NEO-CODE-Imatrix-MAX-GGUF/snapshots/af1cb75ecde13d21c19ceaa5fbd4352f8233e0e5/GLM-4.7-Flash-Uncen-Hrt-NEO-CODE-MAX-imat-D_AU-Q8_0.gguf" +#MODEL_MM="huggingface/hub/models--HauhauCS--Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive/snapshots/f12a584fecbeb5f20001130d8ecd66c9327ae685/mmproj-Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive-f16.gguf" + +PORT=8356 + +# Check if llama_env.sh exists, if not run create_new_image.sh +if [ ! -f "./env/llama_env.sh" ]; then + echo "llama_env.sh not found, running create_new_image.sh..." + ./create_new_image.sh +fi +source ./env/llama_env.sh +echo $IMAGE +docker container create --name $CONT_NAME --network llms -p $PORT:$PORT -v $MODEL_FOLDER:/model --user $(id -u):$(id -g) --gpus=all --restart on-failure:3 \ + --health-cmd "curl -f http://$CONT_NAME:$PORT/v1/models || exit 1" \ + --health-interval 5s \ + --health-timeout 5s \ + --health-start-period 20s \ + $IMAGE \ + /llama.cpp/build/bin/llama-server \ + --model /model/$MODEL \ + --alias $MODEL_NAME \ + -ctk q8_0 -ctv q8_0 \ + --host $CONT_NAME \ + --port $PORT --no-mmap -dio + + +echo "Done!" +echo $CONT_NAME \ No newline at end of file diff --git a/README.md b/README.md index 61dc4cc..500a1a5 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,3 @@ # ik_llama_docker -Builds docker images to run ik_llama quantized models. - -## How to -First generate the base docker image by running ./create_new_image.sh - -Run the specific model script to create the docker container ./qwen3.5-122b.sh \ No newline at end of file +Builds docker images to run ik_llama quantized models, llama.cpp compatable models, and vllm backend models. \ No newline at end of file diff --git a/qwen3.5-397B-A17B-IQ2_KL-CPU.sh b/qwen3.5-397B-A17B-IQ2_KL-CPU.sh new file mode 100755 index 0000000..4a8178a --- /dev/null +++ b/qwen3.5-397B-A17B-IQ2_KL-CPU.sh @@ -0,0 +1,36 @@ +#!/bin/bash +#grab container image name +MODEL_NAME="Qwen3.5-397B-A17B" +CONT_NAME="LLM-$MODEL_NAME" + +MODEL="huggingface/hub/models--ubergarm--Qwen3.5-397B-A17B-GGUF/snapshots/bb9c5e48cdcad8716ef0dfff1f48f43b34b89cb2/IQ2_KL/Qwen3.5-397B-A17B-IQ2_KL-00001-of-00004.gguf" +MODEL_FOLDER="/home/jared/.cache" + +PORT=10987 + +# Check if llama_env.sh exists, if not run create_new_image.sh +if [ ! -f "./env/ik_env.sh" ]; then + echo "./env/ik_env.sh not found, running create_new_IK_image.sh..." + ./create_new_IK_image.sh +fi +source ./env/ik_env.sh +echo $IMAGE +docker container create --name $CONT_NAME --network llms -p $PORT:$PORT -v $MODEL_FOLDER:/model --user $(id -u):$(id -g) --gpus=all --restart on-failure:3 \ + --health-cmd "curl -f http://$CONT_NAME:$PORT/v1/models || exit 1" \ + --health-interval 5s \ + --health-timeout 5s \ + --health-start-period 50s \ + $IMAGE \ + /ik_llama.cpp/build/bin/llama-server \ + --model /model/$MODEL \ + --alias $MODEL_NAME \ + -ctk q8_0 -ctv q8_0 \ + -ngl 999 \ + --parallel 2 --threads 24 \ + --host $CONT_NAME \ + --port $PORT --jinja --no-mmap + +echo "Done!" +echo $CONT_NAME + +docker container start -a $CONT_NAME