{
    "meta": {
        "title": "GGUF Quantization Quality Reference",
        "description": "Machine-readable reference for 19 GGUF LLM quantization types with exact bits-per-weight, family (K-quant / I-quant / legacy / float) and quality tier — pick a quant that fits VRAM and stays accurate.",
        "generated": "2026-06-20T01:38:38+00:00",
        "version": "1.0",
        "license": "https://creativecommons.org/licenses/by/4.0/",
        "license_name": "CC BY 4.0",
        "source": "https://d-central.tech/gguf-quantization-quality/",
        "record_count": 19,
        "disclaimer": "Bits-per-weight are the documented super-block values from llama.cpp/HF; named variants (Q4_K_M etc.) are tensor-level mixes that sit slightly above the base type. Output quality varies by model and architecture; treat tiers as guidance."
    },
    "rows": [
        {
            "type": "F32",
            "family": "Floating point",
            "bpw": 32,
            "quality_tier": "Reference",
            "note": "Full single precision; the unquantized training baseline. Rarely used for local inference (huge files)."
        },
        {
            "type": "F16",
            "family": "Floating point",
            "bpw": 16,
            "quality_tier": "Reference",
            "note": "Half precision; the near-lossless inference baseline that quants are measured against."
        },
        {
            "type": "BF16",
            "family": "Floating point",
            "bpw": 16,
            "quality_tier": "Reference",
            "note": "Brain-float 16; same size as F16 with a wider exponent range; a common training/inference format."
        },
        {
            "type": "Q8_0",
            "family": "Legacy",
            "bpw": 8.5,
            "quality_tier": "Near-lossless",
            "note": "8-bit round-to-nearest; virtually indistinguishable from F16. Large files; a safe maximum-quality quant."
        },
        {
            "type": "Q6_K",
            "family": "K-quant",
            "bpw": 6.5625,
            "quality_tier": "Excellent",
            "note": "6-bit K-quant; near-indistinguishable from F16 in most evaluations. The top choice when VRAM allows."
        },
        {
            "type": "Q5_K",
            "family": "K-quant",
            "bpw": 5.5,
            "quality_tier": "Very good",
            "note": "Base 5-bit K-quant. Q5_K_M (a Q5_K/Q6_K tensor mix) is a high-quality pick for modest extra size."
        },
        {
            "type": "Q4_K",
            "family": "K-quant",
            "bpw": 4.5,
            "quality_tier": "Good",
            "note": "Base 4-bit K-quant. Q4_K_M (a Q4_K/Q6_K mix, ~4.8 bpw effective) is the recommended default for most local models."
        },
        {
            "type": "Q4_0",
            "family": "Legacy",
            "bpw": 4.5,
            "quality_tier": "Medium",
            "note": "Legacy round-to-nearest 4-bit; superseded by Q4_K_M, which is higher quality at similar size."
        },
        {
            "type": "IQ4_XS",
            "family": "I-quant",
            "bpw": 4.25,
            "quality_tier": "Good",
            "note": "Importance-matrix 4-bit; excellent quality-per-bit, often matching Q4_K_S at a smaller size. Slower on some CPUs."
        },
        {
            "type": "IQ4_NL",
            "family": "I-quant",
            "bpw": 4.25,
            "quality_tier": "Good",
            "note": "Importance-matrix 4-bit non-linear; similar size to IQ4_XS, tuned for non-linear weight distributions."
        },
        {
            "type": "IQ3_S",
            "family": "I-quant",
            "bpw": 3.439999999999999946709294817992486059665679931640625,
            "quality_tier": "Medium",
            "note": "Importance-matrix 3-bit; better quality-per-bit than Q3_K at a comparable size."
        },
        {
            "type": "Q3_K",
            "family": "K-quant",
            "bpw": 3.4375,
            "quality_tier": "Medium-low",
            "note": "Base 3-bit K-quant (Q3_K_S/M/L are tensor mixes around this). Visible quality loss on smaller models."
        },
        {
            "type": "IQ3_XXS",
            "family": "I-quant",
            "bpw": 3.060000000000000053290705182007513940334320068359375,
            "quality_tier": "Low-medium",
            "note": "Importance-matrix 3-bit, very small; best reserved for larger models."
        },
        {
            "type": "Q2_K",
            "family": "K-quant",
            "bpw": 2.625,
            "quality_tier": "Low",
            "note": "Smallest K-quant; noticeable quality loss. Use only for tight VRAM on large (30B+) models."
        },
        {
            "type": "IQ2_S",
            "family": "I-quant",
            "bpw": 2.5,
            "quality_tier": "Low",
            "note": "Importance-matrix 2-bit; preserves more quality per bit than Q2_K. Large models only."
        },
        {
            "type": "IQ2_XS",
            "family": "I-quant",
            "bpw": 2.310000000000000053290705182007513940334320068359375,
            "quality_tier": "Very low",
            "note": "Importance-matrix 2-bit, extra small; viable only on very large models."
        },
        {
            "type": "IQ2_XXS",
            "family": "I-quant",
            "bpw": 2.060000000000000053290705182007513940334320068359375,
            "quality_tier": "Very low",
            "note": "Importance-matrix 2-bit, smallest practical 2-bit; large models only, with real quality loss."
        },
        {
            "type": "IQ1_M",
            "family": "I-quant",
            "bpw": 1.75,
            "quality_tier": "Lowest",
            "note": "Importance-matrix 1-bit; extreme compression, only usable on the largest (70B+) models."
        },
        {
            "type": "IQ1_S",
            "family": "I-quant",
            "bpw": 1.560000000000000053290705182007513940334320068359375,
            "quality_tier": "Lowest",
            "note": "Importance-matrix 1-bit, smallest; experimental, heavy quality loss, 70B+ only."
        }
    ]
}