{
    "meta": {
        "title": "D-Central — Local LLM Model Database",
        "description": "Open-weight large language models suitable for local / self-hosted deployment. Covers family, parameter count, context window, license, approximate VRAM at Q4/Q8/FP16, Ollama tag, and CLOUD-Act-free status.",
        "version": "1.0",
        "date_as_of": "2026-06-15",
        "license": "https://creativecommons.org/licenses/by/4.0/",
        "license_name": "CC BY 4.0",
        "source": "https://d-central.tech/data/local-llm-model-database/",
        "record_count": 33,
        "disclaimer": "VRAM figures are approximate estimates (cross-checked against Ollama reported sizes where available). The open-weight LLM landscape changes rapidly; verify all specs and licenses at source before deployment. Not legal advice."
    },
    "rows": [
        {
            "model_id": "smollm2-1.7b",
            "model_name": "SmolLM2 1.7B Instruct",
            "family": "SmolLM",
            "developer": "HuggingFace",
            "params_b": 1.7,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 8,
            "license": "Apache 2.0",
            "license_url": "https://www.apache.org/licenses/LICENSE-2.0",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Full commercial use permitted.",
            "vram_q4_gb": 1.8,
            "vram_q8_gb": 2,
            "vram_fp16_gb": 3.6,
            "vram_note": "Q4 confirmed via Ollama library (smollm2:1.7b = 1.8 GB).",
            "ollama_tag": "smollm2:1.7b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "Compact on-device model; 135M, 360M, and 1.7B variants available. 8K context. Apache 2.0 verified from HF model card. Designed to run on-device / in-browser. Ollama tag: smollm2:1.7b. HF card: HuggingFaceTB/SmolLM2-1.7B-Instruct."
        },
        {
            "model_id": "llama3.2-1b",
            "model_name": "Llama 3.2 1B Instruct",
            "family": "Llama",
            "developer": "Meta",
            "params_b": 1,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "Llama 3.2 Community License",
            "license_url": "https://www.llama.com/llama3_2/license/",
            "license_permits_commercial": true,
            "license_commercial_note": "Commercial use OK. \"Built with Llama\" attribution required. >700M MAU: request Meta permission.",
            "vram_q4_gb": 0.9,
            "vram_q8_gb": 1.3,
            "vram_fp16_gb": 2.2,
            "vram_note": "Estimate; actual varies with context and overhead.",
            "ollama_tag": "llama3.2:1b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "Lightweight edge model; runs on low-end hardware (< 2 GB VRAM). 128K context verified on HF card. License: Llama 3.2 Community License (commercial OK, 700M MAU cap). HF card: meta-llama/Llama-3.2-1B-Instruct."
        },
        {
            "model_id": "llama3.2-3b",
            "model_name": "Llama 3.2 3B Instruct",
            "family": "Llama",
            "developer": "Meta",
            "params_b": 3,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "Llama 3.2 Community License",
            "license_url": "https://www.llama.com/llama3_2/license/",
            "license_permits_commercial": true,
            "license_commercial_note": "Commercial use OK. \"Built with Llama\" attribution required. >700M MAU: request Meta permission.",
            "vram_q4_gb": 2,
            "vram_q8_gb": 3.5,
            "vram_fp16_gb": 6.3,
            "vram_note": "Estimate; actual varies with context and overhead.",
            "ollama_tag": "llama3.2:3b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "Best-value small model; competitive with much larger models on many tasks. 128K context. License: Llama 3.2 Community License. HF card: meta-llama/Llama-3.2-3B-Instruct."
        },
        {
            "model_id": "llama3.1-8b",
            "model_name": "Llama 3.1 8B Instruct",
            "family": "Llama",
            "developer": "Meta",
            "params_b": 8,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "Llama 3.1 Community License",
            "license_url": "https://llama.meta.com/llama3_1/license/",
            "license_permits_commercial": true,
            "license_commercial_note": "Commercial use OK. \"Built with Llama\" attribution required. >700M MAU: request Meta permission.",
            "vram_q4_gb": 5,
            "vram_q8_gb": 8.5,
            "vram_fp16_gb": 16.2,
            "vram_note": "Estimate; Ollama typically reports ~5 GB for llama3.1:8b Q4.",
            "ollama_tag": "llama3.1:8b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "128K context confirmed from HF model card (meta-llama/Llama-3.1-8B-Instruct). License: Llama 3.1 Community License; commercial use OK under 700M MAU. Flagship small-model tier of the Llama 3.1 series; multilingual support."
        },
        {
            "model_id": "llama3.1-70b",
            "model_name": "Llama 3.1 70B Instruct",
            "family": "Llama",
            "developer": "Meta",
            "params_b": 70,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "Llama 3.1 Community License",
            "license_url": "https://llama.meta.com/llama3_1/license/",
            "license_permits_commercial": true,
            "license_commercial_note": "Commercial use OK. \"Built with Llama\" attribution required. >700M MAU: request Meta permission.",
            "vram_q4_gb": 43,
            "vram_q8_gb": 75,
            "vram_fp16_gb": 141.5,
            "vram_note": "Requires ~48 GB VRAM at Q4 (e.g. dual 24 GB GPUs, single A6000 Ada, or H100 80 GB).",
            "ollama_tag": "llama3.1:70b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "128K context; strong reasoning and multilingual. Requires multi-GPU or high-VRAM workstation at Q4. 405B variant also available (llama3.1:405b, ~245 GB Q4 — requires data-centre hardware). License: Llama 3.1 Community License."
        },
        {
            "model_id": "llama3.3-70b",
            "model_name": "Llama 3.3 70B Instruct",
            "family": "Llama",
            "developer": "Meta",
            "params_b": 70,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "Llama 3.3 Community License",
            "license_url": "https://www.llama.com/llama3_3/license/",
            "license_permits_commercial": true,
            "license_commercial_note": "Commercial use OK. \"Built with Llama\" attribution required. >700M MAU: request Meta permission.",
            "vram_q4_gb": 43,
            "vram_q8_gb": 75,
            "vram_fp16_gb": 141.5,
            "vram_note": "Same memory footprint as Llama 3.1 70B; improved benchmark scores vs. 3.1 70B.",
            "ollama_tag": "llama3.3:70b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "Outperforms Llama 3.1 70B on instruction following and reasoning benchmarks. Same 128K context; same hardware requirements. December 2024 release. License: Llama 3.3 Community License."
        },
        {
            "model_id": "qwen2.5-7b",
            "model_name": "Qwen2.5 7B Instruct",
            "family": "Qwen",
            "developer": "Alibaba / Qwen Team",
            "params_b": 7,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "Apache 2.0",
            "license_url": "https://www.apache.org/licenses/LICENSE-2.0",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Full commercial use permitted under Apache 2.0.",
            "vram_q4_gb": 4.5,
            "vram_q8_gb": 7.5,
            "vram_fp16_gb": 14.2,
            "vram_note": "Estimate based on standard formula. Verify against Ollama qwen2.5:7b reported size.",
            "ollama_tag": "qwen2.5:7b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "128K context. Apache 2.0 confirmed for Qwen2.5 models <72B (HF model card: Qwen/Qwen2.5-7B-Instruct). Pre-trained on 18 trillion tokens. Strong coding + math performance at 7B scale."
        },
        {
            "model_id": "qwen2.5-14b",
            "model_name": "Qwen2.5 14B Instruct",
            "family": "Qwen",
            "developer": "Alibaba / Qwen Team",
            "params_b": 14,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "Apache 2.0",
            "license_url": "https://www.apache.org/licenses/LICENSE-2.0",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Full commercial use permitted under Apache 2.0.",
            "vram_q4_gb": 8.5,
            "vram_q8_gb": 14.8,
            "vram_fp16_gb": 28.5,
            "vram_note": "Estimate based on standard formula.",
            "ollama_tag": "qwen2.5:14b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "128K context. Apache 2.0. Fits 16 GB VRAM at Q4. Competitive with models twice its size on coding benchmarks."
        },
        {
            "model_id": "qwen2.5-32b",
            "model_name": "Qwen2.5 32B Instruct",
            "family": "Qwen",
            "developer": "Alibaba / Qwen Team",
            "params_b": 32,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "Apache 2.0",
            "license_url": "https://www.apache.org/licenses/LICENSE-2.0",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Full commercial use permitted under Apache 2.0.",
            "vram_q4_gb": 19,
            "vram_q8_gb": 33,
            "vram_fp16_gb": 65,
            "vram_note": "Estimate. 24 GB single GPU (Q4) or dual 16 GB setup.",
            "ollama_tag": "qwen2.5:32b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/Qwen/Qwen2.5-32B-Instruct",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "128K context. Apache 2.0. 32B sweet spot for local deployment on 24 GB GPU (RTX 3090/4090). Excellent instruction following."
        },
        {
            "model_id": "qwen2.5-72b",
            "model_name": "Qwen2.5 72B Instruct",
            "family": "Qwen",
            "developer": "Alibaba / Qwen Team",
            "params_b": 72,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "Tongyi Qianwen License",
            "license_url": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct/blob/main/LICENSE",
            "license_permits_commercial": true,
            "license_commercial_note": "Commercial use OK. \"Built with Qwen\" attribution required. >100M MAU: request Alibaba permission. Note: smaller Qwen2.5 models (<72B) use Apache 2.0.",
            "vram_q4_gb": 44,
            "vram_q8_gb": 75,
            "vram_fp16_gb": 146,
            "vram_note": "Requires multi-GPU setup at Q4 (e.g. dual A6000 Ada or 80 GB H100).",
            "ollama_tag": "qwen2.5:72b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "128K context. License: Tongyi Qianwen License (NOT Apache 2.0 — confirmed from HF LICENSE file). Commercial use permitted with attribution; 100M MAU threshold. Models <72B use Apache 2.0."
        },
        {
            "model_id": "qwen3-8b",
            "model_name": "Qwen3 8B",
            "family": "Qwen",
            "developer": "Alibaba / Qwen Team",
            "params_b": 8,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 32,
            "license": "Apache 2.0",
            "license_url": "https://www.apache.org/licenses/LICENSE-2.0",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Full commercial use permitted under Apache 2.0.",
            "vram_q4_gb": 5.2,
            "vram_q8_gb": 8.8,
            "vram_fp16_gb": 16.4,
            "vram_note": "Q4 confirmed via Ollama library (qwen3:8b = 5.2 GB).",
            "ollama_tag": "qwen3:8b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/Qwen/Qwen3-8B",
            "release_year": 2025,
            "last_verified": "2026-06",
            "notes": "Native 32K context (extends to ~131K with YaRN — Ollama may display 40K after RoPE scaling). Apache 2.0 confirmed from HF model card. Released April 2025. Qwen3 family includes hybrid thinking/non-thinking modes. HF card: Qwen/Qwen3-8B."
        },
        {
            "model_id": "qwen3-14b",
            "model_name": "Qwen3 14B",
            "family": "Qwen",
            "developer": "Alibaba / Qwen Team",
            "params_b": 14,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 32,
            "license": "Apache 2.0",
            "license_url": "https://www.apache.org/licenses/LICENSE-2.0",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Full commercial use permitted under Apache 2.0.",
            "vram_q4_gb": 9.3,
            "vram_q8_gb": 14.8,
            "vram_fp16_gb": 28.5,
            "vram_note": "Q4 confirmed via Ollama library (qwen3:14b = 9.3 GB).",
            "ollama_tag": "qwen3:14b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/Qwen/Qwen3-14B",
            "release_year": 2025,
            "last_verified": "2026-06",
            "notes": "Native 32K context (extends to ~131K with YaRN). Apache 2.0. Fits a single 16 GB GPU at Q4. Strong reasoning model with hybrid thinking mode. Ollama: qwen3:14b."
        },
        {
            "model_id": "qwen3-32b",
            "model_name": "Qwen3 32B",
            "family": "Qwen",
            "developer": "Alibaba / Qwen Team",
            "params_b": 32,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 32,
            "license": "Apache 2.0",
            "license_url": "https://www.apache.org/licenses/LICENSE-2.0",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Full commercial use permitted under Apache 2.0.",
            "vram_q4_gb": 20,
            "vram_q8_gb": 33,
            "vram_fp16_gb": 65,
            "vram_note": "Q4 confirmed via Ollama library (qwen3:32b = 20 GB).",
            "ollama_tag": "qwen3:32b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/Qwen/Qwen3-32B",
            "release_year": 2025,
            "last_verified": "2026-06",
            "notes": "Native 32K context; 32B dense with hybrid thinking. Apache 2.0. Requires 24 GB GPU at Q4. Released April 2025."
        },
        {
            "model_id": "qwen3-30b-a3b",
            "model_name": "Qwen3 30B-A3B (MoE)",
            "family": "Qwen",
            "developer": "Alibaba / Qwen Team",
            "params_b": 30,
            "params_active_b": 3,
            "architecture": "moe",
            "context_window_k": 128,
            "license": "Apache 2.0",
            "license_url": "https://www.apache.org/licenses/LICENSE-2.0",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Full commercial use permitted under Apache 2.0.",
            "vram_q4_gb": 19,
            "vram_q8_gb": 32,
            "vram_fp16_gb": 62,
            "vram_note": "Q4 confirmed via Ollama library (qwen3:30b = 19 GB). All 30B params must be loaded despite only 3B active per token.",
            "ollama_tag": "qwen3:30b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/Qwen/Qwen3-30B-A3B",
            "release_year": 2025,
            "last_verified": "2026-06",
            "notes": "MoE: 30B total, 3B active per forward pass. 128K context (extends to ~131K YaRN). Apache 2.0. Inference speed similar to a 3B dense model while maintaining quality closer to a 30B dense. Ollama: qwen3:30b."
        },
        {
            "model_id": "qwen3-235b-a22b",
            "model_name": "Qwen3 235B-A22B (MoE)",
            "family": "Qwen",
            "developer": "Alibaba / Qwen Team",
            "params_b": 235,
            "params_active_b": 22,
            "architecture": "moe",
            "context_window_k": 128,
            "license": "Apache 2.0",
            "license_url": "https://www.apache.org/licenses/LICENSE-2.0",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Full commercial use permitted under Apache 2.0.",
            "vram_q4_gb": 142,
            "vram_q8_gb": null,
            "vram_fp16_gb": null,
            "vram_note": "Q4 confirmed via Ollama library (qwen3:235b = 142 GB). Requires data-centre hardware (multi-GPU A100/H100). Q8 and FP16 not practical for most deployments.",
            "ollama_tag": "qwen3:235b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/Qwen/Qwen3-235B-A22B",
            "release_year": 2025,
            "last_verified": "2026-06",
            "notes": "MoE: 235B total, 22B active per forward pass. First open-weight model to match commercial frontier models on ArenaHard (April 2025). Apache 2.0. 128K native context. Data-centre hardware required (min ~4x A100 80GB at Q4). Ollama: qwen3:235b."
        },
        {
            "model_id": "gemma3-4b",
            "model_name": "Gemma 3 4B Instruct",
            "family": "Gemma",
            "developer": "Google DeepMind",
            "params_b": 4,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "Google Gemma Terms of Use",
            "license_url": "https://ai.google.dev/gemma/terms",
            "license_permits_commercial": true,
            "license_commercial_note": "Commercial use permitted with conditions. Review Google Gemma Terms. Prohibited uses include harmful AI applications; no sub-licensing.",
            "vram_q4_gb": 2.5,
            "vram_q8_gb": 4.3,
            "vram_fp16_gb": 8.2,
            "vram_note": "Estimate. Gemma 3 4B confirmed text+vision via Ollama gemma3:4b.",
            "ollama_tag": "gemma3:4b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text+vision",
            "hf_model_card_url": "https://huggingface.co/google/gemma-3-4b-it",
            "release_year": 2025,
            "last_verified": "2026-06",
            "notes": "128K context. Multimodal: accepts images and text. 140+ languages. Google Gemma Terms (commercial OK with conditions). Released March 2025. Ollama: gemma3:4b (marked latest). See also Gemma 4 for Apache 2.0 licensed successor."
        },
        {
            "model_id": "gemma3-12b",
            "model_name": "Gemma 3 12B Instruct",
            "family": "Gemma",
            "developer": "Google DeepMind",
            "params_b": 12,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "Google Gemma Terms of Use",
            "license_url": "https://ai.google.dev/gemma/terms",
            "license_permits_commercial": true,
            "license_commercial_note": "Commercial use permitted with conditions. Review Google Gemma Terms.",
            "vram_q4_gb": 7.5,
            "vram_q8_gb": 12.8,
            "vram_fp16_gb": 24.4,
            "vram_note": "Fits 8–12 GB GPU at Q4.",
            "ollama_tag": "gemma3:12b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text+vision",
            "hf_model_card_url": "https://huggingface.co/google/gemma-3-12b-it",
            "release_year": 2025,
            "last_verified": "2026-06",
            "notes": "128K context. Text+vision multimodal. Google Gemma Terms (commercial permitted). Fits 8 GB VRAM at Q4. Released March 2025."
        },
        {
            "model_id": "gemma3-27b",
            "model_name": "Gemma 3 27B Instruct",
            "family": "Gemma",
            "developer": "Google DeepMind",
            "params_b": 27,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "Google Gemma Terms of Use",
            "license_url": "https://ai.google.dev/gemma/terms",
            "license_permits_commercial": true,
            "license_commercial_note": "Commercial use permitted with conditions. Review Google Gemma Terms.",
            "vram_q4_gb": 16.5,
            "vram_q8_gb": 27.5,
            "vram_fp16_gb": 54.7,
            "vram_note": "Requires 24 GB GPU at Q4 (e.g. RTX 3090/4090).",
            "ollama_tag": "gemma3:27b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text+vision",
            "hf_model_card_url": "https://huggingface.co/google/gemma-3-27b-it",
            "release_year": 2025,
            "last_verified": "2026-06",
            "notes": "128K context. Text+vision. Google's \"most capable model that runs on a single GPU\" at release. Google Gemma Terms (commercial OK). Released March 2025."
        },
        {
            "model_id": "gemma4-e4b",
            "model_name": "Gemma 4 E4B (Edge)",
            "family": "Gemma",
            "developer": "Google DeepMind",
            "params_b": 4.5,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "Apache 2.0",
            "license_url": "https://www.apache.org/licenses/LICENSE-2.0",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. First Gemma model under Apache 2.0 — full commercial freedom with no MAU caps.",
            "vram_q4_gb": 9.6,
            "vram_q8_gb": null,
            "vram_fp16_gb": null,
            "vram_note": "Q4 confirmed via Ollama library (gemma4:e4b = 9.6 GB). Larger than expected for ~4.5B effective params due to Per-Layer Embeddings (PLE) — each decoder layer has its own large embedding table. Q8/FP16 not typically used for edge deployment.",
            "ollama_tag": "gemma4:e4b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text+vision+audio",
            "hf_model_card_url": "https://huggingface.co/google/gemma-4-E4B",
            "release_year": 2026,
            "last_verified": "2026-06",
            "notes": "Apache 2.0 — Google's first open Apache-licensed model family. Released April 2, 2026. \"E\" = edge; ~4.5B effective compute params + Per-Layer Embeddings (PLE) inflate weight file to 9.6 GB. 128K context. Multimodal: text, vision, audio. Ollama: gemma4:e4b (default gemma4:latest). Also available: gemma4:12b (7.6 GB), gemma4:26b (18 GB MoE), gemma4:31b (20 GB)."
        },
        {
            "model_id": "gemma4-31b",
            "model_name": "Gemma 4 31B",
            "family": "Gemma",
            "developer": "Google DeepMind",
            "params_b": 31,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 256,
            "license": "Apache 2.0",
            "license_url": "https://www.apache.org/licenses/LICENSE-2.0",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Full commercial use under Apache 2.0.",
            "vram_q4_gb": 20,
            "vram_q8_gb": 32.5,
            "vram_fp16_gb": 63,
            "vram_note": "Q4 confirmed via Ollama library (gemma4:31b = 20 GB). Fits 24 GB GPU at Q4.",
            "ollama_tag": "gemma4:31b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text+vision+audio",
            "hf_model_card_url": "https://huggingface.co/google/gemma-4-31b-it",
            "release_year": 2026,
            "last_verified": "2026-06",
            "notes": "256K context. Apache 2.0. Multimodal (text, vision, audio). Dense 31B — best single-GPU Gemma 4 variant. Released April 2026. Also available: gemma4:26b (18 GB, 26B-A4B MoE)."
        },
        {
            "model_id": "mistral-7b-v0.3",
            "model_name": "Mistral 7B v0.3 Instruct",
            "family": "Mistral",
            "developer": "Mistral AI",
            "params_b": 7,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 32,
            "license": "Apache 2.0",
            "license_url": "https://www.apache.org/licenses/LICENSE-2.0",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Full commercial use under Apache 2.0.",
            "vram_q4_gb": 4.5,
            "vram_q8_gb": 7.5,
            "vram_fp16_gb": 14.2,
            "vram_note": "Estimate; runs comfortably on 6–8 GB VRAM at Q4.",
            "ollama_tag": "mistral:7b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "32K context. Apache 2.0 (verified from Mistral AI documentation). Sliding window attention. v0.3 adds function-calling support. Ollama: mistral:7b. The model that sparked the open-weight model renaissance in 2023."
        },
        {
            "model_id": "mistral-nemo-12b",
            "model_name": "Mistral Nemo 12B Instruct",
            "family": "Mistral",
            "developer": "Mistral AI + NVIDIA",
            "params_b": 12,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "Apache 2.0",
            "license_url": "https://www.apache.org/licenses/LICENSE-2.0",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Full commercial use under Apache 2.0.",
            "vram_q4_gb": 7.1,
            "vram_q8_gb": 12.5,
            "vram_fp16_gb": 24.2,
            "vram_note": "Q4 confirmed via Ollama library (mistral-nemo:12b = 7.1 GB).",
            "ollama_tag": "mistral-nemo:12b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "128K context. Apache 2.0. Joint Mistral AI + NVIDIA release (July 2024). Tekken tokenizer (131K vocabulary, multilingual). Strong multilingual; fits 8 GB GPU at Q4. Ollama: mistral-nemo:12b."
        },
        {
            "model_id": "mixtral-8x7b",
            "model_name": "Mixtral 8x7B Instruct v0.1",
            "family": "Mistral",
            "developer": "Mistral AI",
            "params_b": 46.7,
            "params_active_b": 12.9,
            "architecture": "moe",
            "context_window_k": 32,
            "license": "Apache 2.0",
            "license_url": "https://www.apache.org/licenses/LICENSE-2.0",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Full commercial use under Apache 2.0.",
            "vram_q4_gb": 26,
            "vram_q8_gb": 47.2,
            "vram_fp16_gb": 93.8,
            "vram_note": "MoE: all 8 experts (46.7B total) loaded into VRAM; only 2 active per token. Requires ~32 GB VRAM at Q4.",
            "ollama_tag": "mixtral:8x7b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1",
            "release_year": 2023,
            "last_verified": "2026-06",
            "notes": "32K context. MoE: 46.7B total params, 12.9B active per token (inference cost similar to a 13B dense model). Apache 2.0. Released December 2023. Requires multi-GPU or high-VRAM workstation (≥32 GB VRAM at Q4). Ollama: mixtral:8x7b."
        },
        {
            "model_id": "phi3.5-mini-3.8b",
            "model_name": "Phi-3.5 mini 3.8B Instruct",
            "family": "Phi",
            "developer": "Microsoft",
            "params_b": 3.8,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "MIT",
            "license_url": "https://opensource.org/licenses/MIT",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Full commercial use under MIT.",
            "vram_q4_gb": 2.2,
            "vram_q8_gb": 4.1,
            "vram_fp16_gb": 7.8,
            "vram_note": "Q4 confirmed via Ollama library (phi3.5:3.8b = 2.2 GB).",
            "ollama_tag": "phi3.5:3.8b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "128K context (confirmed from HF model card, released August 2024). MIT license. Excellent quality-per-GB ratio; outperforms many larger models on reasoning benchmarks. Ollama: phi3.5:3.8b (2.2 GB Q4)."
        },
        {
            "model_id": "phi4-14b",
            "model_name": "Phi-4 14B Instruct",
            "family": "Phi",
            "developer": "Microsoft",
            "params_b": 14.7,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 16,
            "license": "MIT",
            "license_url": "https://opensource.org/licenses/MIT",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Full commercial use under MIT.",
            "vram_q4_gb": 9.1,
            "vram_q8_gb": 15.5,
            "vram_fp16_gb": 29.8,
            "vram_note": "Q4 confirmed via Ollama library (phi4:14b = 9.1 GB).",
            "ollama_tag": "phi4:14b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/microsoft/phi-4",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "16K context (confirmed from Ollama phi4:14b; note: shorter than Phi-3.5). MIT license. Released December 2024. 14.7B params (rounded as 14B in Ollama). Phi-4-Reasoning-Plus (also MIT, 32K context) is a subsequent reasoning-focused fine-tune."
        },
        {
            "model_id": "deepseek-r1-distill-qwen-7b",
            "model_name": "DeepSeek-R1-Distill-Qwen-7B",
            "family": "DeepSeek",
            "developer": "DeepSeek AI",
            "params_b": 7,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "MIT",
            "license_url": "https://opensource.org/licenses/MIT",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. MIT license confirmed from HF model card (deepseek-ai/DeepSeek-R1-Distill-Qwen-7B).",
            "vram_q4_gb": 4.7,
            "vram_q8_gb": 7.5,
            "vram_fp16_gb": 14.2,
            "vram_note": "Q4 confirmed via Ollama library (deepseek-r1:7b = 4.7 GB).",
            "ollama_tag": "deepseek-r1:7b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
            "release_year": 2025,
            "last_verified": "2026-06",
            "notes": "128K context (confirmed from HF model card; based on Qwen2.5-Math-7B backbone). MIT license confirmed from HF model card. Chain-of-thought reasoning distilled from DeepSeek-R1. Released January 2025. Qwen-based distills (1.5B, 7B, 14B, 32B) carry MIT; Llama-based (8B, 70B) carry Llama 3.3 Community License."
        },
        {
            "model_id": "deepseek-r1-distill-qwen-14b",
            "model_name": "DeepSeek-R1-Distill-Qwen-14B",
            "family": "DeepSeek",
            "developer": "DeepSeek AI",
            "params_b": 14,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "MIT",
            "license_url": "https://opensource.org/licenses/MIT",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. MIT license.",
            "vram_q4_gb": 9,
            "vram_q8_gb": 14.8,
            "vram_fp16_gb": 28.5,
            "vram_note": "Q4 confirmed via Ollama library (deepseek-r1:14b = 9.0 GB).",
            "ollama_tag": "deepseek-r1:14b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
            "release_year": 2025,
            "last_verified": "2026-06",
            "notes": "128K context. MIT. Based on Qwen2.5-14B backbone. Strong reasoning, especially math. Released January 2025."
        },
        {
            "model_id": "deepseek-r1-distill-qwen-32b",
            "model_name": "DeepSeek-R1-Distill-Qwen-32B",
            "family": "DeepSeek",
            "developer": "DeepSeek AI",
            "params_b": 32,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "MIT",
            "license_url": "https://opensource.org/licenses/MIT",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. MIT license.",
            "vram_q4_gb": 20,
            "vram_q8_gb": 33,
            "vram_fp16_gb": 65,
            "vram_note": "Q4 confirmed via Ollama library (deepseek-r1:32b = 20 GB).",
            "ollama_tag": "deepseek-r1:32b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
            "release_year": 2025,
            "last_verified": "2026-06",
            "notes": "128K context. MIT. Based on Qwen2.5-32B backbone. Highest quality Qwen-distill; requires 24 GB GPU at Q4."
        },
        {
            "model_id": "deepseek-r1-distill-llama-70b",
            "model_name": "DeepSeek-R1-Distill-Llama-70B",
            "family": "DeepSeek",
            "developer": "DeepSeek AI",
            "params_b": 70,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "Llama 3.3 Community License",
            "license_url": "https://www.llama.com/llama3_3/license/",
            "license_permits_commercial": true,
            "license_commercial_note": "Commercial use OK. \"Built with Llama\" attribution required. >700M MAU: request Meta permission. Note: carries Llama 3.3 license (base model), NOT MIT.",
            "vram_q4_gb": 43,
            "vram_q8_gb": 75,
            "vram_fp16_gb": 141.5,
            "vram_note": "Q4 confirmed via Ollama library (deepseek-r1:70b = 43 GB).",
            "ollama_tag": "deepseek-r1:70b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
            "release_year": 2025,
            "last_verified": "2026-06",
            "notes": "128K context. Based on Llama 3.3 backbone — carries Llama 3.3 Community License (NOT MIT, unlike Qwen-based distills). Strong reasoning at 70B. Requires multi-GPU at Q4 (~48 GB VRAM)."
        },
        {
            "model_id": "olmo2-7b",
            "model_name": "OLMo 2 7B Instruct",
            "family": "OLMo",
            "developer": "Allen AI (AI2)",
            "params_b": 7,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 4,
            "license": "Apache 2.0",
            "license_url": "https://www.apache.org/licenses/LICENSE-2.0",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Fully open: weights, training data, training code, and logs released under Apache 2.0.",
            "vram_q4_gb": 4.5,
            "vram_q8_gb": 7.5,
            "vram_fp16_gb": 14.2,
            "vram_note": "Q4 confirmed via Ollama library (olmo2:7b = 4.5 GB).",
            "ollama_tag": "olmo2:7b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/allenai/OLMo-2-7B-Instruct",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "4K native context (8K with RoPE scaling, per AI2 docs). The most fully open model family: training data, training code, intermediate checkpoints, and logs also published. 5T training tokens. Released November 2024. Apache 2.0. Competing with Llama 3.1 at 7B on English benchmarks."
        },
        {
            "model_id": "olmo2-13b",
            "model_name": "OLMo 2 13B Instruct",
            "family": "OLMo",
            "developer": "Allen AI (AI2)",
            "params_b": 13,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 4,
            "license": "Apache 2.0",
            "license_url": "https://www.apache.org/licenses/LICENSE-2.0",
            "license_permits_commercial": true,
            "license_commercial_note": "No restrictions. Fully open under Apache 2.0.",
            "vram_q4_gb": 8.4,
            "vram_q8_gb": 13.5,
            "vram_fp16_gb": 26.5,
            "vram_note": "Q4 confirmed via Ollama library (olmo2:13b = 8.4 GB).",
            "ollama_tag": "olmo2:13b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/allenai/OLMo-2-13B-Instruct",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "4K native context. Outperforms Qwen 2.5 14B Instruct on several English benchmarks. Fully open: training data + code published. Apache 2.0. Released November 2024."
        },
        {
            "model_id": "command-r-35b",
            "model_name": "Command R 35B (08-2024)",
            "family": "Command",
            "developer": "Cohere",
            "params_b": 35,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "CC-BY-NC-4.0",
            "license_url": "https://creativecommons.org/licenses/by-nc/4.0/",
            "license_permits_commercial": false,
            "license_commercial_note": "Non-commercial use only (CC-BY-NC-4.0). Attribution required. Contact Cohere for commercial licensing.",
            "vram_q4_gb": 19,
            "vram_q8_gb": 35.5,
            "vram_fp16_gb": 70.9,
            "vram_note": "Q4 confirmed via Ollama library (command-r:35b = 19 GB).",
            "ollama_tag": "command-r:35b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/CohereForAI/c4ai-command-r-08-2024",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "128K context. CC-BY-NC-4.0 — NOT commercially usable without a separate Cohere agreement. Strong at RAG and tool use. Ollama: command-r:35b (19 GB Q4). Paired with Command R+ 104B for maximum capability."
        },
        {
            "model_id": "command-r-plus-104b",
            "model_name": "Command R+ 104B",
            "family": "Command",
            "developer": "Cohere",
            "params_b": 104,
            "params_active_b": null,
            "architecture": "dense",
            "context_window_k": 128,
            "license": "CC-BY-NC-4.0",
            "license_url": "https://creativecommons.org/licenses/by-nc/4.0/",
            "license_permits_commercial": false,
            "license_commercial_note": "Non-commercial use only (CC-BY-NC-4.0). Contact Cohere for commercial licensing.",
            "vram_q4_gb": 59,
            "vram_q8_gb": 110,
            "vram_fp16_gb": 210.5,
            "vram_note": "Q4 confirmed via Ollama library (command-r-plus:104b = 59 GB). Requires multi-GPU at Q4.",
            "ollama_tag": "command-r-plus:104b",
            "cloud_act_free_if_selfhosted": true,
            "modality": "text",
            "hf_model_card_url": "https://huggingface.co/CohereForAI/c4ai-command-r-plus",
            "release_year": 2024,
            "last_verified": "2026-06",
            "notes": "128K context. CC-BY-NC-4.0 — NOT commercially usable without separate agreement. 104B enterprise-scale model; designed for complex RAG, tool use, and multi-step reasoning. Requires multi-GPU workstation at Q4."
        }
    ]
}