llama.cpp verification source 2026-05-22
Some checks are pending
Copilot Setup Steps / copilot-setup-steps (push) Waiting to run
Check Pre-Tokenizer Hashes / pre-tokenizer-hashes (push) Waiting to run
Python check requirements.txt / check-requirements (push) Waiting to run
Python Type-Check / python type-check (push) Waiting to run
Update Operations Documentation / update-ops-docs (push) Waiting to run
Some checks are pending
Copilot Setup Steps / copilot-setup-steps (push) Waiting to run
Check Pre-Tokenizer Hashes / pre-tokenizer-hashes (push) Waiting to run
Python check requirements.txt / check-requirements (push) Waiting to run
Python Type-Check / python type-check (push) Waiting to run
Update Operations Documentation / update-ops-docs (push) Waiting to run
This commit is contained in:
21
.devops/nix/apps.nix
Normal file
21
.devops/nix/apps.nix
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
perSystem =
|
||||
{ config, lib, ... }:
|
||||
{
|
||||
apps =
|
||||
let
|
||||
inherit (config.packages) default;
|
||||
binaries = [
|
||||
"llama-cli"
|
||||
"llama-embedding"
|
||||
"llama-server"
|
||||
"llama-quantize"
|
||||
];
|
||||
mkApp = name: {
|
||||
type = "app";
|
||||
program = "${default}/bin/${name}";
|
||||
};
|
||||
in
|
||||
lib.genAttrs binaries mkApp;
|
||||
};
|
||||
}
|
||||
52
.devops/nix/devshells.nix
Normal file
52
.devops/nix/devshells.nix
Normal file
@@ -0,0 +1,52 @@
|
||||
{ inputs, ... }:
|
||||
|
||||
{
|
||||
perSystem =
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
system,
|
||||
...
|
||||
}:
|
||||
{
|
||||
devShells =
|
||||
let
|
||||
pkgs = import inputs.nixpkgs { inherit system; };
|
||||
stdenv = pkgs.stdenv;
|
||||
scripts = config.packages.python-scripts;
|
||||
in
|
||||
lib.pipe (config.packages) [
|
||||
(lib.concatMapAttrs (
|
||||
name: package: {
|
||||
${name} = pkgs.mkShell {
|
||||
name = "${name}";
|
||||
inputsFrom = [ package ];
|
||||
shellHook = ''
|
||||
echo "Entering ${name} devShell"
|
||||
'';
|
||||
};
|
||||
"${name}-extra" =
|
||||
if (name == "python-scripts") then
|
||||
null
|
||||
else
|
||||
pkgs.mkShell {
|
||||
name = "${name}-extra";
|
||||
inputsFrom = [
|
||||
package
|
||||
scripts
|
||||
];
|
||||
# Extra packages that *may* be used by some scripts
|
||||
packages = [
|
||||
pkgs.python3Packages.tiktoken
|
||||
];
|
||||
shellHook = ''
|
||||
echo "Entering ${name} devShell"
|
||||
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
|
||||
'';
|
||||
};
|
||||
}
|
||||
))
|
||||
(lib.filterAttrs (name: value: value != null))
|
||||
];
|
||||
};
|
||||
}
|
||||
37
.devops/nix/docker.nix
Normal file
37
.devops/nix/docker.nix
Normal file
@@ -0,0 +1,37 @@
|
||||
{
|
||||
lib,
|
||||
dockerTools,
|
||||
buildEnv,
|
||||
llama-cpp,
|
||||
interactive ? true,
|
||||
coreutils,
|
||||
}:
|
||||
|
||||
# A tar that can be fed into `docker load`:
|
||||
#
|
||||
# $ nix build .#llamaPackages.docker
|
||||
# $ docker load < result
|
||||
|
||||
# For details and variations cf.
|
||||
# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
|
||||
# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
|
||||
# - https://nixery.dev/
|
||||
|
||||
# Approximate (compressed) sizes, at the time of writing, are:
|
||||
#
|
||||
# .#llamaPackages.docker: 125M;
|
||||
# .#llamaPackagesCuda.docker: 537M;
|
||||
# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
|
||||
|
||||
dockerTools.buildLayeredImage {
|
||||
name = llama-cpp.pname;
|
||||
tag = "latest";
|
||||
|
||||
contents =
|
||||
[ llama-cpp ]
|
||||
++ lib.optionals interactive [
|
||||
coreutils
|
||||
dockerTools.binSh
|
||||
dockerTools.caCertificates
|
||||
];
|
||||
}
|
||||
39
.devops/nix/jetson-support.nix
Normal file
39
.devops/nix/jetson-support.nix
Normal file
@@ -0,0 +1,39 @@
|
||||
{ inputs, ... }:
|
||||
{
|
||||
perSystem =
|
||||
{
|
||||
config,
|
||||
system,
|
||||
lib,
|
||||
pkgsCuda,
|
||||
...
|
||||
}:
|
||||
{
|
||||
legacyPackages =
|
||||
let
|
||||
caps.llamaPackagesXavier = "7.2";
|
||||
caps.llamaPackagesOrin = "8.7";
|
||||
caps.llamaPackagesTX2 = "6.2";
|
||||
caps.llamaPackagesNano = "5.3";
|
||||
|
||||
pkgsFor =
|
||||
cap:
|
||||
import inputs.nixpkgs {
|
||||
inherit system;
|
||||
config = {
|
||||
cudaSupport = true;
|
||||
cudaCapabilities = [ cap ];
|
||||
cudaEnableForwardCompat = false;
|
||||
inherit (pkgsCuda.config) allowUnfreePredicate;
|
||||
};
|
||||
};
|
||||
in
|
||||
builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps;
|
||||
|
||||
packages = lib.optionalAttrs (system == "aarch64-linux") {
|
||||
jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp;
|
||||
jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp;
|
||||
jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp;
|
||||
};
|
||||
};
|
||||
}
|
||||
45
.devops/nix/nixpkgs-instances.nix
Normal file
45
.devops/nix/nixpkgs-instances.nix
Normal file
@@ -0,0 +1,45 @@
|
||||
{ inputs, ... }:
|
||||
{
|
||||
# The _module.args definitions are passed on to modules as arguments. E.g.
|
||||
# the module `{ pkgs ... }: { /* config */ }` implicitly uses
|
||||
# `_module.args.pkgs` (defined in this case by flake-parts).
|
||||
perSystem =
|
||||
{ lib, system, ... }:
|
||||
{
|
||||
_module.args = {
|
||||
# Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
|
||||
# again, the below creates several nixpkgs instances which the
|
||||
# flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
|
||||
#
|
||||
# This is currently "slow" and "expensive", on a certain scale.
|
||||
# This also isn't "right" in that this hinders dependency injection at
|
||||
# the level of flake inputs. This might get removed in the foreseeable
|
||||
# future.
|
||||
#
|
||||
# Note that you can use these expressions without Nix
|
||||
# (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).
|
||||
|
||||
pkgsCuda = import inputs.nixpkgs {
|
||||
inherit system;
|
||||
# Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
|
||||
# and ucx are built with CUDA support)
|
||||
config.cudaSupport = true;
|
||||
config.allowUnfreePredicate =
|
||||
p:
|
||||
builtins.all (
|
||||
license:
|
||||
license.free
|
||||
|| builtins.elem license.shortName [
|
||||
"CUDA EULA"
|
||||
"cuDNN EULA"
|
||||
]
|
||||
) (p.meta.licenses or (lib.toList p.meta.license));
|
||||
};
|
||||
# Ensure dependencies use ROCm consistently
|
||||
pkgsRocm = import inputs.nixpkgs {
|
||||
inherit system;
|
||||
config.rocmSupport = true;
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
38
.devops/nix/package-gguf-py.nix
Normal file
38
.devops/nix/package-gguf-py.nix
Normal file
@@ -0,0 +1,38 @@
|
||||
{
|
||||
lib,
|
||||
llamaVersion,
|
||||
numpy,
|
||||
tqdm,
|
||||
requests,
|
||||
sentencepiece,
|
||||
pyyaml,
|
||||
poetry-core,
|
||||
buildPythonPackage,
|
||||
pytestCheckHook,
|
||||
}:
|
||||
|
||||
buildPythonPackage {
|
||||
pname = "gguf";
|
||||
version = llamaVersion;
|
||||
pyproject = true;
|
||||
nativeBuildInputs = [ poetry-core ];
|
||||
propagatedBuildInputs = [
|
||||
numpy
|
||||
tqdm
|
||||
sentencepiece
|
||||
pyyaml
|
||||
requests
|
||||
];
|
||||
src = lib.cleanSource ../../gguf-py;
|
||||
pythonImportsCheck = [
|
||||
"numpy"
|
||||
"gguf"
|
||||
];
|
||||
nativeCheckInputs = [ pytestCheckHook ];
|
||||
doCheck = true;
|
||||
meta = with lib; {
|
||||
description = "Python package for writing binary files in the GGUF format";
|
||||
license = licenses.mit;
|
||||
maintainers = [ maintainers.ditsuke ];
|
||||
};
|
||||
}
|
||||
248
.devops/nix/package.nix
Normal file
248
.devops/nix/package.nix
Normal file
@@ -0,0 +1,248 @@
|
||||
{
|
||||
lib,
|
||||
glibc,
|
||||
config,
|
||||
stdenv,
|
||||
runCommand,
|
||||
cmake,
|
||||
ninja,
|
||||
pkg-config,
|
||||
git,
|
||||
mpi,
|
||||
blas,
|
||||
cudaPackages,
|
||||
autoAddDriverRunpath,
|
||||
darwin,
|
||||
rocmPackages,
|
||||
vulkan-headers,
|
||||
vulkan-loader,
|
||||
openssl,
|
||||
shaderc,
|
||||
spirv-headers,
|
||||
useBlas ?
|
||||
builtins.all (x: !x) [
|
||||
useCuda
|
||||
useMetalKit
|
||||
useRocm
|
||||
useVulkan
|
||||
]
|
||||
&& blas.meta.available,
|
||||
useCuda ? config.cudaSupport,
|
||||
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
|
||||
# Increases the runtime closure size by ~700M
|
||||
useMpi ? false,
|
||||
useRocm ? config.rocmSupport,
|
||||
rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
|
||||
useVulkan ? false,
|
||||
useRpc ? false,
|
||||
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
||||
|
||||
# It's necessary to consistently use backendStdenv when building with CUDA support,
|
||||
# otherwise we get libstdc++ errors downstream.
|
||||
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
|
||||
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
|
||||
precompileMetalShaders ? false,
|
||||
useWebUi ? true,
|
||||
}:
|
||||
|
||||
let
|
||||
inherit (lib)
|
||||
cmakeBool
|
||||
cmakeFeature
|
||||
optionalAttrs
|
||||
optionals
|
||||
strings
|
||||
;
|
||||
|
||||
stdenv = throw "Use effectiveStdenv instead";
|
||||
|
||||
suffices =
|
||||
lib.optionals useBlas [ "BLAS" ]
|
||||
++ lib.optionals useCuda [ "CUDA" ]
|
||||
++ lib.optionals useMetalKit [ "MetalKit" ]
|
||||
++ lib.optionals useMpi [ "MPI" ]
|
||||
++ lib.optionals useRocm [ "ROCm" ]
|
||||
++ lib.optionals useVulkan [ "Vulkan" ];
|
||||
|
||||
pnameSuffix =
|
||||
strings.optionalString (suffices != [ ])
|
||||
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
||||
descriptionSuffix = strings.optionalString (
|
||||
suffices != [ ]
|
||||
) ", accelerated with ${strings.concatStringsSep ", " suffices}";
|
||||
|
||||
xcrunHost = runCommand "xcrunHost" { } ''
|
||||
mkdir -p $out/bin
|
||||
ln -s /usr/bin/xcrun $out/bin
|
||||
'';
|
||||
|
||||
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
|
||||
# separately
|
||||
darwinBuildInputs =
|
||||
with darwin.apple_sdk.frameworks;
|
||||
[
|
||||
Accelerate
|
||||
CoreVideo
|
||||
CoreGraphics
|
||||
]
|
||||
++ optionals useMetalKit [ MetalKit ];
|
||||
|
||||
cudaBuildInputs = with cudaPackages; [
|
||||
cuda_cudart
|
||||
cuda_cccl # <nv/target>
|
||||
libcublas
|
||||
];
|
||||
|
||||
rocmBuildInputs = with rocmPackages; [
|
||||
clr
|
||||
hipblas
|
||||
rocblas
|
||||
];
|
||||
|
||||
vulkanBuildInputs = [
|
||||
vulkan-headers
|
||||
vulkan-loader
|
||||
shaderc
|
||||
spirv-headers
|
||||
];
|
||||
in
|
||||
|
||||
effectiveStdenv.mkDerivation (finalAttrs: {
|
||||
pname = "llama-cpp${pnameSuffix}";
|
||||
version = llamaVersion;
|
||||
|
||||
# Note: none of the files discarded here are visible in the sandbox or
|
||||
# affect the output hash. This also means they can be modified without
|
||||
# triggering a rebuild.
|
||||
src = lib.cleanSourceWith {
|
||||
filter =
|
||||
name: type:
|
||||
let
|
||||
noneOf = builtins.all (x: !x);
|
||||
baseName = baseNameOf name;
|
||||
in
|
||||
noneOf [
|
||||
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
||||
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
||||
(lib.hasPrefix "." baseName) # Skip hidden files and directories
|
||||
(baseName == "flake.lock")
|
||||
];
|
||||
src = lib.cleanSource ../../.;
|
||||
};
|
||||
|
||||
postPatch = ''
|
||||
'';
|
||||
|
||||
# With PR#6015 https://github.com/ggml-org/llama.cpp/pull/6015,
|
||||
# `default.metallib` may be compiled with Metal compiler from XCode
|
||||
# and we need to escape sandbox on MacOS to access Metal compiler.
|
||||
# `xcrun` is used find the path of the Metal compiler, which is varible
|
||||
# and not on $PATH
|
||||
# see https://github.com/ggml-org/llama.cpp/pull/6118 for discussion
|
||||
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
|
||||
|
||||
nativeBuildInputs =
|
||||
[
|
||||
cmake
|
||||
ninja
|
||||
pkg-config
|
||||
git
|
||||
]
|
||||
++ optionals useCuda [
|
||||
cudaPackages.cuda_nvcc
|
||||
|
||||
autoAddDriverRunpath
|
||||
]
|
||||
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
|
||||
++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
|
||||
|
||||
buildInputs =
|
||||
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
||||
++ optionals useCuda cudaBuildInputs
|
||||
++ optionals useMpi [ mpi ]
|
||||
++ optionals useRocm rocmBuildInputs
|
||||
++ optionals useBlas [ blas ]
|
||||
++ optionals useVulkan vulkanBuildInputs
|
||||
++ [ openssl ];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
||||
(cmakeBool "LLAMA_BUILD_WEBUI" useWebUi)
|
||||
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
||||
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
||||
(cmakeBool "GGML_NATIVE" false)
|
||||
(cmakeBool "GGML_BLAS" useBlas)
|
||||
(cmakeBool "GGML_CUDA" useCuda)
|
||||
(cmakeBool "GGML_HIP" useRocm)
|
||||
(cmakeBool "GGML_METAL" useMetalKit)
|
||||
(cmakeBool "GGML_VULKAN" useVulkan)
|
||||
(cmakeBool "GGML_STATIC" enableStatic)
|
||||
(cmakeBool "GGML_RPC" useRpc)
|
||||
]
|
||||
++ optionals useCuda [
|
||||
(
|
||||
with cudaPackages.flags;
|
||||
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
||||
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
||||
)
|
||||
)
|
||||
]
|
||||
++ optionals useRocm [
|
||||
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
||||
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
|
||||
]
|
||||
++ optionals useMetalKit [
|
||||
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
||||
(cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
||||
];
|
||||
|
||||
# Environment variables needed for ROCm
|
||||
env = optionalAttrs useRocm {
|
||||
ROCM_PATH = "${rocmPackages.clr}";
|
||||
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
|
||||
};
|
||||
|
||||
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
||||
# if they haven't been added yet.
|
||||
postInstall = ''
|
||||
mkdir -p $out/include
|
||||
cp $src/include/llama.h $out/include/
|
||||
'';
|
||||
|
||||
meta = {
|
||||
# Configurations we don't want even the CI to evaluate. Results in the
|
||||
# "unsupported platform" messages. This is mostly a no-op, because
|
||||
# cudaPackages would've refused to evaluate anyway.
|
||||
badPlatforms = optionals useCuda lib.platforms.darwin;
|
||||
|
||||
# Configurations that are known to result in build failures. Can be
|
||||
# overridden by importing Nixpkgs with `allowBroken = true`.
|
||||
broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
||||
|
||||
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
||||
homepage = "https://github.com/ggml-org/llama.cpp/";
|
||||
license = lib.licenses.mit;
|
||||
|
||||
# Accommodates `nix run` and `lib.getExe`
|
||||
mainProgram = "llama-cli";
|
||||
|
||||
# These people might respond, on the best effort basis, if you ping them
|
||||
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
||||
# Consider adding yourself to this list if you want to ensure this flake
|
||||
# stays maintained and you're willing to invest your time. Do not add
|
||||
# other people without their consent. Consider removing people after
|
||||
# they've been unreachable for long periods of time.
|
||||
|
||||
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
||||
# an attrset following the same format as in
|
||||
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
||||
maintainers = with lib.maintainers; [
|
||||
philiptaron
|
||||
SomeoneSerge
|
||||
];
|
||||
|
||||
# Extend `badPlatforms` instead
|
||||
platforms = lib.platforms.all;
|
||||
};
|
||||
})
|
||||
66
.devops/nix/python-scripts.nix
Normal file
66
.devops/nix/python-scripts.nix
Normal file
@@ -0,0 +1,66 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
buildPythonPackage,
|
||||
poetry-core,
|
||||
mkShell,
|
||||
python3Packages,
|
||||
gguf-py,
|
||||
}@inputs:
|
||||
|
||||
let
|
||||
llama-python-deps = with python3Packages; [
|
||||
numpy
|
||||
sentencepiece
|
||||
transformers
|
||||
protobuf
|
||||
torchWithoutCuda
|
||||
gguf-py
|
||||
tqdm
|
||||
|
||||
# for scripts/compare-llama-bench.py
|
||||
gitpython
|
||||
tabulate
|
||||
|
||||
# for examples/pydantic-models-to-grammar-examples.py
|
||||
docstring-parser
|
||||
pydantic
|
||||
|
||||
];
|
||||
|
||||
llama-python-test-deps = with python3Packages; [
|
||||
# Server bench
|
||||
matplotlib
|
||||
|
||||
# server tests
|
||||
openai
|
||||
pytest
|
||||
prometheus-client
|
||||
];
|
||||
in
|
||||
|
||||
buildPythonPackage ({
|
||||
pname = "llama-scripts";
|
||||
version = "0.0.0";
|
||||
pyproject = true;
|
||||
|
||||
# NOTE: The files filtered out here are not visible in the build sandbox, neither
|
||||
# do they affect the output hash. They can be modified without triggering a rebuild.
|
||||
src = lib.cleanSourceWith {
|
||||
filter =
|
||||
name: type:
|
||||
let
|
||||
any = builtins.any (x: x);
|
||||
baseName = builtins.baseNameOf name;
|
||||
in
|
||||
any [
|
||||
(lib.hasSuffix ".py" name)
|
||||
(baseName == "README.md")
|
||||
(baseName == "pyproject.toml")
|
||||
];
|
||||
src = lib.cleanSource ../../.;
|
||||
};
|
||||
nativeBuildInputs = [ poetry-core ];
|
||||
nativeCheckInputs = llama-python-test-deps;
|
||||
dependencies = llama-python-deps;
|
||||
})
|
||||
35
.devops/nix/scope.nix
Normal file
35
.devops/nix/scope.nix
Normal file
@@ -0,0 +1,35 @@
|
||||
{
|
||||
lib,
|
||||
newScope,
|
||||
python3,
|
||||
llamaVersion ? "0.0.0",
|
||||
}:
|
||||
|
||||
let
|
||||
pythonPackages = python3.pkgs;
|
||||
in
|
||||
|
||||
# We're using `makeScope` instead of just writing out an attrset
|
||||
# because it allows users to apply overlays later using `overrideScope'`.
|
||||
# Cf. https://noogle.dev/f/lib/makeScope
|
||||
|
||||
lib.makeScope newScope (self: {
|
||||
inherit llamaVersion;
|
||||
gguf-py = self.callPackage ./package-gguf-py.nix {
|
||||
inherit (pythonPackages)
|
||||
numpy
|
||||
tqdm
|
||||
sentencepiece
|
||||
pyyaml
|
||||
pytestCheckHook
|
||||
requests
|
||||
buildPythonPackage
|
||||
poetry-core
|
||||
;
|
||||
};
|
||||
python-scripts = self.callPackage ./python-scripts.nix { inherit (pythonPackages) buildPythonPackage poetry-core; };
|
||||
llama-cpp = self.callPackage ./package.nix { };
|
||||
docker = self.callPackage ./docker.nix { };
|
||||
docker-min = self.callPackage ./docker.nix { interactive = false; };
|
||||
sif = self.callPackage ./sif.nix { };
|
||||
})
|
||||
27
.devops/nix/sif.nix
Normal file
27
.devops/nix/sif.nix
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
lib,
|
||||
singularity-tools,
|
||||
llama-cpp,
|
||||
bashInteractive,
|
||||
interactive ? false,
|
||||
}:
|
||||
|
||||
let
|
||||
optionalInt = cond: x: if cond then x else 0;
|
||||
in
|
||||
singularity-tools.buildImage rec {
|
||||
inherit (llama-cpp) name;
|
||||
contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
|
||||
|
||||
# These are excessive (but safe) for most variants. Building singularity
|
||||
# images requires superuser privileges, so we build them inside a VM in a
|
||||
# writable image of pre-determined size.
|
||||
#
|
||||
# ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
|
||||
#
|
||||
# Expected image sizes:
|
||||
# - cpu/blas: 150M,
|
||||
# - cuda, all gencodes: 560M,
|
||||
diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
|
||||
memSize = diskSize;
|
||||
}
|
||||
Reference in New Issue
Block a user