{
    "meta": {
        "title": "D-Central — Local AI Runtime Comparison",
        "description": "Comparison of 16 local AI inference runtimes for running open-weight LLMs on your own hardware: category, interface, model formats, OpenAI-compatible API, GPU support, license and OS.",
        "generated": "2026-06-21T03:30:19+00:00",
        "version": "1.0",
        "license": "https://creativecommons.org/licenses/by/4.0/",
        "license_name": "CC BY 4.0",
        "source": "https://d-central.tech/local-ai-runtime-comparison/",
        "record_count": 16,
        "provenance": "Compiled from the projects' own repositories and documentation; D-Central is one node in the open-source AI ecosystem, not its maintainer.",
        "disclaimer": "Local-AI tooling moves fast: confirm the current license, formats and API against the project repo before deciding. Many tools wrap llama.cpp under the hood."
    },
    "rows": [
        {
            "id": "ollama",
            "name": "Ollama",
            "category": "engine",
            "interface": "CLI + server",
            "model_formats": "GGUF (can import safetensors)",
            "openai_api": "yes",
            "gpu_support": "NVIDIA (CUDA), AMD (ROCm), Apple (Metal)",
            "license": "MIT",
            "os": "Linux / macOS / Windows",
            "best_for": "simplest local model runner",
            "source": "github.com/ollama/ollama"
        },
        {
            "id": "llama-cpp",
            "name": "llama.cpp",
            "category": "engine",
            "interface": "CLI + server (also a C/C++ library)",
            "model_formats": "GGUF",
            "openai_api": "yes",
            "gpu_support": "NVIDIA (CUDA), AMD (ROCm), Apple (Metal), Vulkan, CPU",
            "license": "MIT",
            "os": "Linux / macOS / Windows",
            "best_for": "underlying GGUF inference engine many tools wrap",
            "source": "github.com/ggml-org/llama.cpp"
        },
        {
            "id": "lm-studio",
            "name": "LM Studio",
            "category": "app",
            "interface": "Desktop GUI (+ local server, CLI 'lms')",
            "model_formats": "GGUF, MLX",
            "openai_api": "yes",
            "gpu_support": "NVIDIA (CUDA), AMD (ROCm/Vulkan), Apple (Metal)",
            "license": "Closed (proprietary); free for personal use",
            "os": "Linux / macOS / Windows",
            "best_for": "non-technical desktop users",
            "source": "lmstudio.ai"
        },
        {
            "id": "vllm",
            "name": "vLLM",
            "category": "server",
            "interface": "Server (API) + Python library",
            "model_formats": "HF safetensors, GPTQ, AWQ",
            "openai_api": "yes",
            "gpu_support": "NVIDIA (CUDA) first, AMD (ROCm)",
            "license": "Apache-2.0",
            "os": "Linux (Windows via WSL/Docker)",
            "best_for": "max-throughput GPU serving",
            "source": "github.com/vllm-project/vllm"
        },
        {
            "id": "text-generation-webui",
            "name": "text-generation-webui (oobabooga)",
            "category": "app",
            "interface": "Web UI (+ API)",
            "model_formats": "GGUF, EXL2, GPTQ, AWQ, HF safetensors",
            "openai_api": "yes",
            "gpu_support": "NVIDIA (CUDA), AMD (ROCm), Apple (Metal), CPU",
            "license": "AGPL-3.0",
            "os": "Linux / macOS / Windows",
            "best_for": "power users wanting many backends and quant formats",
            "source": "github.com/oobabooga/text-generation-webui"
        },
        {
            "id": "jan",
            "name": "Jan",
            "category": "app",
            "interface": "Desktop GUI (+ local server)",
            "model_formats": "GGUF (llama.cpp engine)",
            "openai_api": "yes",
            "gpu_support": "NVIDIA (CUDA), AMD (Vulkan), Apple (Metal)",
            "license": "Apache-2.0",
            "os": "Linux / macOS / Windows",
            "best_for": "open-source offline desktop ChatGPT alternative",
            "source": "github.com/menloresearch/jan"
        },
        {
            "id": "gpt4all",
            "name": "GPT4All",
            "category": "app",
            "interface": "Desktop GUI (+ local API server)",
            "model_formats": "GGUF",
            "openai_api": "yes",
            "gpu_support": "NVIDIA/AMD (Vulkan), Apple (Metal), CPU",
            "license": "MIT",
            "os": "Linux / macOS / Windows",
            "best_for": "privacy-focused desktop chat with local documents",
            "source": "github.com/nomic-ai/gpt4all"
        },
        {
            "id": "koboldcpp",
            "name": "KoboldCpp",
            "category": "app",
            "interface": "GUI launcher + Web UI + Server (API)",
            "model_formats": "GGUF",
            "openai_api": "yes",
            "gpu_support": "NVIDIA (CUDA), AMD (Vulkan), Apple (Metal), CPU",
            "license": "AGPL-3.0",
            "os": "Linux / macOS / Windows",
            "best_for": "single-binary story writing and roleplay",
            "source": "github.com/LostRuins/koboldcpp"
        },
        {
            "id": "localai",
            "name": "LocalAI",
            "category": "server",
            "interface": "Server (API)",
            "model_formats": "GGUF (plus multimodal/whisper/diffusers backends)",
            "openai_api": "yes",
            "gpu_support": "NVIDIA (CUDA), AMD (ROCm), Apple (Metal), CPU",
            "license": "MIT",
            "os": "Linux / macOS / Windows (Docker-first)",
            "best_for": "self-hosted drop-in OpenAI API replacement",
            "source": "github.com/mudler/LocalAI"
        },
        {
            "id": "llamafile",
            "name": "Llamafile",
            "category": "engine",
            "interface": "CLI + server (single-file executable)",
            "model_formats": "GGUF",
            "openai_api": "yes",
            "gpu_support": "NVIDIA (CUDA), AMD (ROCm), Apple (Metal), CPU",
            "license": "Apache-2.0",
            "os": "Linux / macOS / Windows / BSD",
            "best_for": "single portable file that runs across OSes",
            "source": "github.com/mozilla-ai/llamafile"
        },
        {
            "id": "open-webui",
            "name": "Open WebUI",
            "category": "frontend",
            "interface": "Web UI",
            "model_formats": "N/A (uses a backend)",
            "openai_api": "no",
            "gpu_support": "N/A (backend-dependent)",
            "license": "BSD-3-Clause (v0.6.6+ adds a branding clause + CLA)",
            "os": "Linux / macOS / Windows (Docker)",
            "best_for": "self-hosted chat UI in front of Ollama or OpenAI-compatible APIs",
            "source": "github.com/open-webui/open-webui"
        },
        {
            "id": "mlx-lm",
            "name": "MLX / mlx-lm",
            "category": "library",
            "interface": "Python library + CLI",
            "model_formats": "MLX (converts from HF safetensors)",
            "openai_api": "yes",
            "gpu_support": "Apple (Metal) only",
            "license": "MIT",
            "os": "macOS (Apple Silicon) only",
            "best_for": "Apple Silicon native inference",
            "source": "github.com/ml-explore/mlx-lm"
        },
        {
            "id": "hugging-face-tgi",
            "name": "Hugging Face TGI",
            "category": "server",
            "interface": "Server (API)",
            "model_formats": "HF safetensors, GPTQ, AWQ",
            "openai_api": "yes",
            "gpu_support": "NVIDIA (CUDA) first, AMD (ROCm), Intel Gaudi",
            "license": "Apache-2.0",
            "os": "Linux (Docker-first)",
            "best_for": "production serving of Hugging Face models",
            "source": "github.com/huggingface/text-generation-inference"
        },
        {
            "id": "exllamav2-tabbyapi",
            "name": "ExLlamaV2 / TabbyAPI",
            "category": "engine",
            "interface": "Python library + Server (API via TabbyAPI)",
            "model_formats": "EXL2, GPTQ",
            "openai_api": "yes",
            "gpu_support": "NVIDIA (CUDA)",
            "license": "MIT (ExLlamaV2); AGPL-3.0 (TabbyAPI)",
            "os": "Linux / Windows",
            "best_for": "fast EXL2 quantized inference on NVIDIA GPUs",
            "source": "github.com/turboderp-org/exllamav2"
        },
        {
            "id": "sglang",
            "name": "SGLang",
            "category": "server",
            "interface": "Server (API) + Python library",
            "model_formats": "HF safetensors, GPTQ, AWQ, FP8",
            "openai_api": "yes",
            "gpu_support": "NVIDIA (CUDA) first, AMD (ROCm)",
            "license": "Apache-2.0",
            "os": "Linux",
            "best_for": "high-throughput structured/agentic GPU serving",
            "source": "github.com/sgl-project/sglang"
        },
        {
            "id": "anythingllm",
            "name": "AnythingLLM",
            "category": "app",
            "interface": "Desktop GUI + Web UI (Docker)",
            "model_formats": "GGUF (built-in engine; also connects to external providers)",
            "openai_api": "no",
            "gpu_support": "NVIDIA (CUDA), Apple (Metal), CPU",
            "license": "MIT",
            "os": "Linux / macOS / Windows",
            "best_for": "all-in-one local RAG desktop app",
            "source": "github.com/Mintplex-Labs/anything-llm"
        }
    ]
}