forked from ServeurpersoCom/acestep.cpp
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathquantize.sh
More file actions
executable file
·35 lines (28 loc) · 882 Bytes
/
quantize.sh
File metadata and controls
executable file
·35 lines (28 loc) · 882 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/bin/bash
set -e
Q="./build/quantize"
quantize() {
local bf16="$1" type="$2"
local out="${bf16/-BF16.gguf/-${type}.gguf}"
if [ -f "$out" ]; then
echo "[Skip] $out"
else
$Q "$bf16" "$out" "$type"
fi
}
# Embedding: Q8_0 only (single-shot, precision matters)
quantize models/Qwen3-Embedding-0.6B-BF16.gguf Q8_0
# Small/medium LM: Q8_0 only (too small to survive aggressive quant)
quantize models/acestep-5Hz-lm-0.6B-BF16.gguf Q8_0
quantize models/acestep-5Hz-lm-1.7B-BF16.gguf Q8_0
# Large LM: full range (Q4_K_M confirmed broken for audio codes)
for type in Q5_K_M Q6_K Q8_0; do
quantize models/acestep-5Hz-lm-4B-BF16.gguf "$type"
done
# DiT models: full range
for bf16 in models/acestep-v15-*-BF16.gguf; do
for type in Q4_K_M Q5_K_M Q6_K Q8_0; do
quantize "$bf16" "$type"
done
done
# VAE: never quantized (stays BF16)