Hi
In the nvidia settings is no option to set a target temperature. That’s why I wrote a script where you can set a target temperature, a minimum fan speed and a maximum fan speed. If somebody is interested, just copy the code, paste it into a text file, set it up as you wish (number of GPUs, target temperature, start fan speed, minimum fan speed, maximum fan speed, check interval) and save it . Make it executable with “chmod +x filename” and start it with “./filename”. If you have any questions, problems or if you find a bug just let me know. I’m not a professional coder, but the script works in my case without any problems.
#!/bin/bash
# Number of your GPUs, has to be greater than 0
numberOfGpus="6"
# Target temperature in degree Celsius, has to be less than 90 and at least 30
targetTemp="64"
# Start fan speed in %, has to be at least 35, less than maximum fan speed and greater than minimum fan speed
startSpeedPercentage="50"
# Minimum fan speed in %, has to be at least 25 and less than maximum fan speed
minSpeedPercentage="35"
# Maximum fan speed in %, has to be at least 50, less than 100 and greater than minimum fan speed
maxSpeedPercentage="95"
# Check interval in seconds, has to be less than 61 and greater than 3. Recommended 25 - 35
interval="30"
if [ -z "$numberOfGpus" ] || [ -z "$targetTemp" ] || [ -z "$startSpeedPercentage" ] || [ -z "$minSpeedPercentage" ] || [ -z "$maxSpeedPercentage" ] || [ -z "$interval" ]
then
echo "Invalid varibles, empty variables not allowed - exit..."
exit 1
fi
if ! [[ "$numberOfGpus" =~ ^[0-9]+$ ]] || ! [[ "$targetTemp" =~ ^[0-9]+$ ]] || ! [[ "$startSpeedPercentage" =~ ^[0-9]+$ ]] || ! [[ "$minSpeedPercentage" =~ ^[0-9]+$ ]] || ! [[ "$maxSpeedPercentage" =~ ^[0-9]+$ ]] || ! [[ "$interval" =~ ^[0-9]+$ ]]
then
echo "Invalid variables, integers only - exit..."
exit 1
fi
if [ "$numberOfGpus" -lt "1" ] || [ "$targetTemp" -gt "90" ] || [ "$targetTemp" -lt "30" ] || [ "$startSpeedPercentage" -lt "35" ] || [ "$startSpeedPercentage" -gt "$maxSpeedPercentage" ] || [ "$startSpeedPercentage" -lt "$minSpeedPercentage" ] || [ "$minSpeedPercentage" -lt "25" ] || [ "$minSpeedPercentage" -gt "$maxSpeedPercentage" ] || [ "$maxSpeedPercentage" -gt "100" ] || [ "$maxSpeedPercentage" -lt "50" ] || [ "$interval" -gt "60" ] || [ "$interval" -lt "3" ]
then
echo "Ivalid variables - exit..."
exit 1
fi
echo "Number of GPUs: ${numberOfGpus}"
echo "Temperature target: ${targetTemp} degree Celsius"
echo "Start speed: ${startSpeedPercentage}%"
echo "Minimum speed: ${minSpeedPercentage}%"
echo "Maximum speed: ${maxSpeedPercentage}%"
echo "Interval: ${interval} seconds"
echo ""
echo ""
gpu="$numberOfGpus"
let "gpu--"
while [ "$gpu" -ge "0" ]
do
nvidia-settings -a [gpu:"$gpu"]/GPUFanControlState=1 > /dev/null
nvidia-settings -a [fan:"$gpu"]/GPUTargetFanSpeed="$startSpeedPercentage" > /dev/null
eval speedPercentage"$gpu"="$startSpeedPercentage"
let "gpu--"
done
gpu="$numberOfGpus"
let "gpu--"
while [ "$gpu" -ge "0" ]
do
eval temp"$gpu"="$(nvidia-smi -i "$gpu" --query-gpu=temperature.gpu --format=csv,noheader)"
eval \tempDifference$gpu=$((temp$gpu - $targetTemp))
if eval [ \$"temp"$gpu -lt "$targetTemp" ]
then
echo "Temperature OK"
fi
if eval [ \$"temp"$gpu -eq "$targetTemp" ]
then
echo "Temperature OK"
elif eval [ \$"speedPercentage"$gpu -eq "$maxSpeedPercentage" ] && eval [ \$"temp"$gpu -gt "$targetTemp" ]
then
echo "TARGET TEMPERATURE CANNOT BE REACHED!"
elif eval [ \$"tempDifference"$gpu -ge 5 ] && eval [ $((speedPercentage$gpu + 10)) -lt "$maxSpeedPercentage" ]
then
eval speedPercentage$gpu=$((speedPercentage$gpu + 10))
eval nvidia-settings -a [fan:"$gpu"]/GPUTargetFanSpeed=\$"speedPercentage"$gpu > /dev/null
elif eval [ \$"temp"$gpu -gt "$targetTemp" ] && eval [ \$"speedPercentage"$gpu -lt "$maxSpeedPercentage" ]
then
eval "let \speedPercentage$gpu++"
eval nvidia-settings -a [fan:"$gpu"]/GPUTargetFanSpeed=\$"speedPercentage"$gpu > /dev/null
elif [ "$temp"$gpu"" -lt "$targetTemp" ] && eval [ \$"speedPercentage"$gpu -gt "$minSpeedPercentage" ]
then
eval "let \speedPercentage$gpu--"
eval nvidia-settings -a [fan:"$gpu"]/GPUTargetFanSpeed=\$"speedPercentage"$gpu > /dev/null
fi
echo "Target temperature: $targetTemp degree Celsius"
eval "echo "GPU"$gpu": \$"temp"$gpu degree Celsius""
eval "echo "GPU"$gpu" Fan: \$"speedPercentage"$gpu%""
eval "echo "Temperature difference: \$"tempDifference"$gpu" degree Celsuis"
echo ""
if [ "$gpu" -eq "0" ]
then
gpu="$numberOfGpus"
echo "------------------------------------------"
sleep "$interval"
fi
let "gpu--"
done