ollama source for Momentry Core verification

This commit is contained in:
Accusys
2026-05-22 17:19:10 +08:00
commit 0b31ff9135
2020 changed files with 1413145 additions and 0 deletions

544
server/internal/cache/blob/cache.go vendored Normal file
View File

@@ -0,0 +1,544 @@
// Package blob implements a content-addressable disk cache for blobs and
// manifests.
package blob
import (
"bytes"
"crypto/sha256"
"errors"
"fmt"
"hash"
"io"
"io/fs"
"iter"
"os"
"path/filepath"
"strings"
"time"
"github.com/ollama/ollama/server/internal/internal/names"
)
// Entry contains metadata about a blob in the cache.
type Entry struct {
Digest Digest
Size int64
Time time.Time // when added to the cache
}
// DiskCache caches blobs and manifests on disk.
//
// The cache is rooted at a directory, which is created if it does not exist.
//
// Blobs are stored in the "blobs" subdirectory, and manifests are stored in the
// "manifests" subdirectory. A example directory structure might look like:
//
// <dir>/
// blobs/
// sha256-<digest> - <blob data>
// manifests/
// <host>/
// <namespace>/
// <name>/
// <tag> - <manifest data>
//
// The cache is safe for concurrent use.
//
// Name casing is preserved in the cache, but is not significant when resolving
// names. For example, "Foo" and "foo" are considered the same name.
//
// The cache is not safe for concurrent use. It guards concurrent writes, but
// does not prevent duplicated effort. Because blobs are immutable, duplicate
// writes should result in the same file being written to disk.
type DiskCache struct {
// Dir specifies the top-level directory where blobs and manifest
// pointers are stored.
dir string
now func() time.Time
testHookBeforeFinalWrite func(f *os.File)
}
// PutBytes is a convenience function for c.Put(d, strings.NewReader(s), int64(len(s))).
func PutBytes[S string | []byte](c *DiskCache, d Digest, data S) error {
return c.Put(d, bytes.NewReader([]byte(data)), int64(len(data)))
}
// Open opens a cache rooted at the given directory. If the directory does not
// exist, it is created. If the directory is not a directory, an error is
// returned.
func Open(dir string) (*DiskCache, error) {
if dir == "" {
return nil, errors.New("blob: empty directory name")
}
info, err := os.Stat(dir)
if err == nil && !info.IsDir() {
return nil, fmt.Errorf("%q is not a directory", dir)
}
if err := os.MkdirAll(dir, 0o777); err != nil {
return nil, err
}
subdirs := []string{"blobs", "manifests"}
for _, subdir := range subdirs {
if err := os.MkdirAll(filepath.Join(dir, subdir), 0o777); err != nil {
return nil, err
}
}
// TODO(bmizerany): support shards
c := &DiskCache{
dir: dir,
now: time.Now,
}
return c, nil
}
func readAndSum(filename string, limit int64) (data []byte, _ Digest, err error) {
f, err := os.Open(filename)
if err != nil {
return nil, Digest{}, err
}
defer f.Close()
h := sha256.New()
r := io.TeeReader(f, h)
data, err = io.ReadAll(io.LimitReader(r, limit))
if err != nil {
return nil, Digest{}, err
}
var d Digest
h.Sum(d.sum[:0])
return data, d, nil
}
//lint:ignore U1000 used for debugging purposes as needed in tests
var debug = false
// debugger returns a function that can be used to add a step to the error message.
// The error message will be a list of steps that were taken before the error occurred.
// The steps are added in the order they are called.
//
// To set the error message, call the returned function with an empty string.
//
//lint:ignore U1000 used for debugging purposes as needed in tests
func debugger(err *error) func(step string) {
if !debug {
return func(string) {}
}
var steps []string
return func(step string) {
if step == "" && *err != nil {
*err = fmt.Errorf("%q: %w", steps, *err)
return
}
steps = append(steps, step)
if len(steps) > 100 {
// shift hints in case of a bug that causes a lot of hints
copy(steps, steps[1:])
steps = steps[:100]
}
}
}
// Resolve resolves a name to a digest. The name is expected to
// be in either of the following forms:
//
// @<digest>
// <name>@<digest>
// <name>
//
// If a digest is provided, it is returned as is and nothing else happens.
//
// If a name is provided for a manifest that exists in the cache, the digest
// of the manifest is returned. If there is no manifest in the cache, it
// returns [fs.ErrNotExist].
//
// To cover the case where a manifest may change without the cache knowing
// (e.g. it was reformatted or modified by hand), the manifest data read and
// hashed is passed to a PutBytes call to ensure that the manifest is in the
// blob store. This is done to ensure that future calls to [Get] succeed in
// these cases.
func (c *DiskCache) Resolve(name string) (Digest, error) {
name, digest := splitNameDigest(name)
if digest != "" {
return ParseDigest(digest)
}
// We want to address manifests files by digest using Get. That requires
// them to be blobs. This cannot be directly accomplished by looking in
// the blob store because manifests can change without Ollama knowing
// (e.g. a user modifies a manifests by hand then pushes it to update
// their model). We also need to support the blob caches inherited from
// older versions of Ollama, which do not store manifests in the blob
// store, so for these cases, we need to handle adding the manifests to
// the blob store, just in time.
//
// So now we read the manifests file, hash it, and copy it to the blob
// store if it's not already there.
//
// This should be cheap because manifests are small, and accessed
// infrequently.
file, err := c.manifestPath(name)
if err != nil {
return Digest{}, err
}
data, d, err := readAndSum(file, 1<<20)
if err != nil {
return Digest{}, err
}
// Ideally we'd read the "manifest" file as a manifest to the blob file,
// but we are not changing this yet, so copy the manifest to the blob
// store so it can be addressed by digest subsequent calls to Get.
if err := PutBytes(c, d, data); err != nil {
return Digest{}, err
}
return d, nil
}
// Put writes a new blob to the cache, identified by its digest. The operation
// reads content from r, which must precisely match both the specified size and
// digest.
//
// Concurrent write safety is achieved through file locking. The implementation
// guarantees write integrity by enforcing size limits and content validation
// before allowing the file to reach its final state.
func (c *DiskCache) Put(d Digest, r io.Reader, size int64) error {
return c.copyNamedFile(c.GetFile(d), r, d, size)
}
// Import imports a blob from the provided reader into the cache. It reads the
// entire content of the reader, calculates its digest, and stores it in the
// cache.
//
// Import should be considered unsafe for use with untrusted data, such as data
// read from a network. The caller is responsible for ensuring the integrity of
// the data being imported.
func (c *DiskCache) Import(r io.Reader, size int64) (Digest, error) {
// users that want to change the temp dir can set TEMPDIR.
f, err := os.CreateTemp("", "blob-")
if err != nil {
return Digest{}, err
}
defer os.Remove(f.Name())
// Copy the blob to a temporary file.
h := sha256.New()
r = io.TeeReader(r, h)
n, err := io.Copy(f, r)
if err != nil {
return Digest{}, err
}
if n != size {
return Digest{}, fmt.Errorf("blob: expected %d bytes, got %d", size, n)
}
// Check the digest.
var d Digest
h.Sum(d.sum[:0])
if err := f.Close(); err != nil {
return Digest{}, err
}
name := c.GetFile(d)
// Rename the temporary file to the final file.
if err := os.Rename(f.Name(), name); err != nil {
return Digest{}, err
}
os.Chtimes(name, c.now(), c.now()) // mainly for tests
return d, nil
}
// Get retrieves a blob from the cache using the provided digest. The operation
// fails if the digest is malformed or if any errors occur during blob
// retrieval.
func (c *DiskCache) Get(d Digest) (Entry, error) {
name := c.GetFile(d)
info, err := os.Stat(name)
if err != nil {
return Entry{}, err
}
if info.Size() == 0 {
return Entry{}, fs.ErrNotExist
}
return Entry{
Digest: d,
Size: info.Size(),
Time: info.ModTime(),
}, nil
}
// Link creates a symbolic reference in the cache that maps the provided name
// to a blob identified by its digest, making it retrievable by name using
// [Resolve].
//
// It returns an error if either the name or digest is invalid, or if link
// creation encounters any issues.
func (c *DiskCache) Link(name string, d Digest) error {
manifest, err := c.manifestPath(name)
if err != nil {
return err
}
f, err := os.OpenFile(c.GetFile(d), os.O_RDONLY, 0)
if err != nil {
return err
}
defer f.Close()
// TODO(bmizerany): test this happens only if the blob was found to
// avoid leaving debris
if err := os.MkdirAll(filepath.Dir(manifest), 0o777); err != nil {
return err
}
info, err := f.Stat()
if err != nil {
return err
}
// Copy manifest to cache directory.
return c.copyNamedFile(manifest, f, d, info.Size())
}
// Unlink unlinks the manifest by name from the cache. If the name is not
// found. If a manifest is removed ok will be true, otherwise false. If an
// error occurs, it returns ok false, and the error.
func (c *DiskCache) Unlink(name string) (ok bool, _ error) {
manifest, err := c.manifestPath(name)
if err != nil {
return false, err
}
err = os.Remove(manifest)
if errors.Is(err, fs.ErrNotExist) {
return false, nil
}
return true, err
}
// GetFile returns the absolute path to the file, in the cache, for the given
// digest. It does not check if the file exists.
//
// The returned path should not be stored, used outside the lifetime of the
// cache, or interpreted in any way.
func (c *DiskCache) GetFile(d Digest) string {
filename := fmt.Sprintf("sha256-%x", d.sum)
return absJoin(c.dir, "blobs", filename)
}
// Links returns a sequence of link names. The sequence is in lexical order.
// Names are converted from their relative path form to their name form but are
// not guaranteed to be valid. Callers should validate the names before using.
func (c *DiskCache) Links() iter.Seq2[string, error] {
return func(yield func(string, error) bool) {
for path, err := range c.links() {
if err != nil {
yield("", err)
return
}
if !yield(pathToName(path), nil) {
return
}
}
}
}
// pathToName converts a path to a name. It is the inverse of nameToPath. The
// path is assumed to be in filepath.ToSlash format.
func pathToName(s string) string {
s = strings.TrimPrefix(s, "manifests/")
rr := []rune(s)
for i := len(rr) - 1; i > 0; i-- {
if rr[i] == '/' {
rr[i] = ':'
return string(rr)
}
}
return s
}
// manifestPath finds the first manifest file on disk that matches the given
// name using a case-insensitive comparison. If no manifest file is found, it
// returns the path where the manifest file would be if it existed.
//
// If two manifest files exists on disk that match the given name using a
// case-insensitive comparison, the one that sorts first, lexically, is
// returned.
func (c *DiskCache) manifestPath(name string) (string, error) {
np, err := nameToPath(name)
if err != nil {
return "", err
}
maybe := filepath.Join("manifests", np)
for l, err := range c.links() {
if err != nil {
return "", err
}
if strings.EqualFold(maybe, l) {
return filepath.Join(c.dir, l), nil
}
}
return filepath.Join(c.dir, maybe), nil
}
// links returns a sequence of links in the cache in lexical order.
func (c *DiskCache) links() iter.Seq2[string, error] {
// TODO(bmizerany): reuse empty dirnames if exist
return func(yield func(string, error) bool) {
fsys := os.DirFS(c.dir)
manifests, err := fs.Glob(fsys, "manifests/*/*/*/*")
if err != nil {
yield("", err)
return
}
for _, manifest := range manifests {
if !yield(manifest, nil) {
return
}
}
}
}
type checkWriter struct {
size int64
d Digest
f *os.File
h hash.Hash
w io.Writer // underlying writer; set by creator
n int64
err error
testHookBeforeFinalWrite func(*os.File)
}
func (w *checkWriter) seterr(err error) error {
if w.err == nil {
w.err = err
}
return err
}
// Write writes p to the underlying hash and writer. The last write to the
// underlying writer is guaranteed to be the last byte of p as verified by the
// hash.
func (w *checkWriter) Write(p []byte) (int, error) {
if w.err != nil {
return 0, w.err
}
_, err := w.h.Write(p)
if err != nil {
return 0, w.seterr(err)
}
nextSize := w.n + int64(len(p))
if nextSize == w.size {
// last write. check hash.
sum := w.h.Sum(nil)
if !bytes.Equal(sum, w.d.sum[:]) {
return 0, w.seterr(fmt.Errorf("file content changed underfoot"))
}
if w.testHookBeforeFinalWrite != nil {
w.testHookBeforeFinalWrite(w.f)
}
}
if nextSize > w.size {
return 0, w.seterr(fmt.Errorf("content exceeds expected size: %d > %d", nextSize, w.size))
}
n, err := w.w.Write(p)
w.n += int64(n)
return n, w.seterr(err)
}
// copyNamedFile copies file into name, expecting it to have the given Digest
// and size, if that file is not present already.
func (c *DiskCache) copyNamedFile(name string, file io.Reader, out Digest, size int64) error {
info, err := os.Stat(name)
if err == nil && info.Size() == size {
// File already exists with correct size. This is good enough.
// We can skip expensive hash checks.
//
// TODO: Do the hash check, but give caller a way to skip it.
return nil
}
// Copy file to cache directory.
mode := os.O_RDWR | os.O_CREATE
if err == nil && info.Size() > size { // shouldn't happen but fix in case
mode |= os.O_TRUNC
}
f, err := os.OpenFile(name, mode, 0o666)
if err != nil {
return err
}
defer f.Close()
if size == 0 {
// File now exists with correct size.
// Only one possible zero-length file, so contents are OK too.
// Early return here makes sure there's a "last byte" for code below.
return nil
}
// From here on, if any of the I/O writing the file fails,
// we make a best-effort attempt to truncate the file f
// before returning, to avoid leaving bad bytes in the file.
// Copy file to f, but also into h to double-check hash.
cw := &checkWriter{
d: out,
size: size,
h: sha256.New(),
f: f,
w: f,
testHookBeforeFinalWrite: c.testHookBeforeFinalWrite,
}
n, err := io.Copy(cw, file)
if err != nil {
f.Truncate(0)
return err
}
if n < size {
f.Truncate(0)
return io.ErrUnexpectedEOF
}
if err := f.Close(); err != nil {
// Data might not have been written,
// but file may look like it is the right size.
// To be extra careful, remove cached file.
os.Remove(name)
return err
}
os.Chtimes(name, c.now(), c.now()) // mainly for tests
return nil
}
func splitNameDigest(s string) (name, digest string) {
i := strings.LastIndexByte(s, '@')
if i < 0 {
return s, ""
}
return s[:i], s[i+1:]
}
var errInvalidName = errors.New("invalid name")
func nameToPath(name string) (_ string, err error) {
n := names.Parse(name)
if !n.IsFullyQualified() {
return "", errInvalidName
}
return filepath.Join(n.Host(), n.Namespace(), n.Model(), n.Tag()), nil
}
func absJoin(pp ...string) string {
abs, err := filepath.Abs(filepath.Join(pp...))
if err != nil {
panic(err) // this should never happen
}
return abs
}

688
server/internal/cache/blob/cache_test.go vendored Normal file
View File

@@ -0,0 +1,688 @@
package blob
import (
"crypto/sha256"
"errors"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"slices"
"strings"
"testing"
"time"
"github.com/ollama/ollama/server/internal/testutil"
)
func init() {
debug = true
}
var epoch = func() time.Time {
d := time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC)
if d.IsZero() {
panic("time zero")
}
return d
}()
func TestOpenErrors(t *testing.T) {
exe, err := os.Executable()
if err != nil {
panic(err)
}
cases := []struct {
dir string
err string
}{
{t.TempDir(), ""},
{"", "empty directory name"},
{exe, "not a directory"},
}
for _, tt := range cases {
t.Run(tt.dir, func(t *testing.T) {
_, err := Open(tt.dir)
if tt.err == "" {
if err != nil {
t.Fatal(err)
}
return
}
if err == nil {
t.Fatal("expected error")
}
if !strings.Contains(err.Error(), tt.err) {
t.Fatalf("err = %v, want %q", err, tt.err)
}
})
}
}
func TestGetFile(t *testing.T) {
t.Chdir(t.TempDir())
c, err := Open(".")
if err != nil {
t.Fatal(err)
}
d := mkdigest("1")
got := c.GetFile(d)
cleaned := filepath.Clean(got)
if cleaned != got {
t.Fatalf("got is unclean: %q", got)
}
if !filepath.IsAbs(got) {
t.Fatal("got is not absolute")
}
abs, _ := filepath.Abs(c.dir)
if !strings.HasPrefix(got, abs) {
t.Fatalf("got is not local to %q", c.dir)
}
}
func TestBasic(t *testing.T) {
c, err := Open(t.TempDir())
if err != nil {
t.Fatal(err)
}
now := epoch
c.now = func() time.Time { return now }
checkEntry := entryChecker(t, c)
checkFailed := func(err error) {
if err == nil {
t.Helper()
t.Fatal("expected error")
}
}
_, err = c.Resolve("invalid")
checkFailed(err)
_, err = c.Resolve("h/n/m:t")
checkFailed(err)
dx := mkdigest("x")
d, err := c.Resolve(fmt.Sprintf("h/n/m:t@%s", dx))
if err != nil {
t.Fatal(err)
}
if d != dx {
t.Fatalf("d = %v, want %v", d, dx)
}
_, err = c.Get(Digest{})
checkFailed(err)
// not committed yet
_, err = c.Get(dx)
checkFailed(err)
err = PutBytes(c, dx, "!")
checkFailed(err)
err = PutBytes(c, dx, "x")
if err != nil {
t.Fatal(err)
}
checkEntry(dx, 1, now)
t0 := now
now = now.Add(1*time.Hour + 1*time.Minute)
err = PutBytes(c, dx, "x")
if err != nil {
t.Fatal(err)
}
// check not updated
checkEntry(dx, 1, t0)
}
type sleepFunc func(d time.Duration) time.Time
func openTester(t *testing.T) (*DiskCache, sleepFunc) {
t.Helper()
c, err := Open(t.TempDir())
if err != nil {
t.Fatal(err)
}
now := epoch
c.now = func() time.Time { return now }
return c, func(d time.Duration) time.Time {
now = now.Add(d)
return now
}
}
func TestManifestPath(t *testing.T) {
check := testutil.Checker(t)
c, sleep := openTester(t)
d1 := mkdigest("1")
err := PutBytes(c, d1, "1")
check(err)
err = c.Link("h/n/m:t", d1)
check(err)
t0 := sleep(0)
sleep(1 * time.Hour)
err = c.Link("h/n/m:t", d1) // nop expected
check(err)
file := must(c.manifestPath("h/n/m:t"))
info, err := os.Stat(file)
check(err)
testutil.CheckTime(t, info.ModTime(), t0)
}
func TestManifestExistsWithoutBlob(t *testing.T) {
t.Chdir(t.TempDir())
check := testutil.Checker(t)
c, err := Open(".")
check(err)
checkEntry := entryChecker(t, c)
man := must(c.manifestPath("h/n/m:t"))
os.MkdirAll(filepath.Dir(man), 0o777)
testutil.WriteFile(t, man, "1")
got, err := c.Resolve("h/n/m:t")
check(err)
want := mkdigest("1")
if got != want {
t.Fatalf("got = %v, want %v", got, want)
}
e, err := c.Get(got)
check(err)
checkEntry(got, 1, e.Time)
}
func TestPut(t *testing.T) {
c, sleep := openTester(t)
check := testutil.Checker(t)
checkEntry := entryChecker(t, c)
d := mkdigest("hello, world")
err := PutBytes(c, d, "hello")
if err == nil {
t.Fatal("expected error")
}
got, err := c.Get(d)
if !errors.Is(err, fs.ErrNotExist) {
t.Fatalf("expected error, got %v", got)
}
// Put a valid blob
err = PutBytes(c, d, "hello, world")
check(err)
checkEntry(d, 12, sleep(0))
// Put a blob with content that does not hash to the digest
err = PutBytes(c, d, "hello")
if err == nil {
t.Fatal("expected error")
}
checkNotExists(t, c, d)
// Put the valid blob back and check it
err = PutBytes(c, d, "hello, world")
check(err)
checkEntry(d, 12, sleep(0))
// Put a blob that errors during Read
err = c.Put(d, &errOnBangReader{s: "!"}, 1)
if err == nil {
t.Fatal("expected error")
}
checkNotExists(t, c, d)
// Put valid blob back and check it
err = PutBytes(c, d, "hello, world")
check(err)
checkEntry(d, 12, sleep(0))
// Put a blob with mismatched size
err = c.Put(d, strings.NewReader("hello, world"), 11)
if err == nil {
t.Fatal("expected error")
}
checkNotExists(t, c, d)
// Final byte does not match the digest (testing commit phase)
err = PutBytes(c, d, "hello, world$")
if err == nil {
t.Fatal("expected error")
}
checkNotExists(t, c, d)
reset := c.setTestHookBeforeFinalWrite(func(f *os.File) {
// change mode to read-only
f.Truncate(0)
f.Chmod(0o400)
f.Close()
f1, err := os.OpenFile(f.Name(), os.O_RDONLY, 0)
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() { f1.Close() })
*f = *f1
})
defer reset()
err = PutBytes(c, d, "hello, world")
if err == nil {
t.Fatal("expected error")
}
checkNotExists(t, c, d)
reset()
}
func TestImport(t *testing.T) {
c, _ := openTester(t)
checkEntry := entryChecker(t, c)
want := mkdigest("x")
got, err := c.Import(strings.NewReader("x"), 1)
if err != nil {
t.Fatal(err)
}
if want != got {
t.Fatalf("digest = %v, want %v", got, want)
}
checkEntry(want, 1, epoch)
got, err = c.Import(strings.NewReader("x"), 1)
if err != nil {
t.Fatal(err)
}
if want != got {
t.Fatalf("digest = %v, want %v", got, want)
}
checkEntry(want, 1, epoch)
}
func (c *DiskCache) setTestHookBeforeFinalWrite(h func(*os.File)) (reset func()) {
old := c.testHookBeforeFinalWrite
c.testHookBeforeFinalWrite = h
return func() { c.testHookBeforeFinalWrite = old }
}
func TestPutGetZero(t *testing.T) {
c, sleep := openTester(t)
check := testutil.Checker(t)
checkEntry := entryChecker(t, c)
d := mkdigest("x")
err := PutBytes(c, d, "x")
check(err)
checkEntry(d, 1, sleep(0))
err = os.Truncate(c.GetFile(d), 0)
check(err)
_, err = c.Get(d)
if !errors.Is(err, fs.ErrNotExist) {
t.Fatalf("err = %v, want fs.ErrNotExist", err)
}
}
func TestPutZero(t *testing.T) {
c, _ := openTester(t)
d := mkdigest("x")
err := c.Put(d, strings.NewReader("x"), 0) // size == 0 (not size of content)
testutil.Check(t, err)
checkNotExists(t, c, d)
}
func TestCommit(t *testing.T) {
check := testutil.Checker(t)
c, err := Open(t.TempDir())
if err != nil {
t.Fatal(err)
}
checkEntry := entryChecker(t, c)
now := epoch
c.now = func() time.Time { return now }
d1 := mkdigest("1")
err = c.Link("h/n/m:t", d1)
if !errors.Is(err, fs.ErrNotExist) {
t.Fatalf("err = %v, want fs.ErrNotExist", err)
}
err = PutBytes(c, d1, "1")
check(err)
err = c.Link("h/n/m:t", d1)
check(err)
got, err := c.Resolve("h/n/m:t")
check(err)
if got != d1 {
t.Fatalf("d = %v, want %v", got, d1)
}
// commit again, more than 1 byte
d2 := mkdigest("22")
err = PutBytes(c, d2, "22")
check(err)
err = c.Link("h/n/m:t", d2)
check(err)
checkEntry(d2, 2, now)
filename := must(c.manifestPath("h/n/m:t"))
data, err := os.ReadFile(filename)
check(err)
if string(data) != "22" {
t.Fatalf("data = %q, want %q", data, "22")
}
t0 := now
now = now.Add(1 * time.Hour)
err = c.Link("h/n/m:t", d2) // same contents; nop
check(err)
info, err := os.Stat(filename)
check(err)
testutil.CheckTime(t, info.ModTime(), t0)
}
func TestManifestInvalidBlob(t *testing.T) {
c, _ := openTester(t)
d := mkdigest("1")
err := c.Link("h/n/m:t", d)
if err == nil {
t.Fatal("expected error")
}
checkNotExists(t, c, d)
err = PutBytes(c, d, "1")
testutil.Check(t, err)
err = os.WriteFile(c.GetFile(d), []byte("invalid"), 0o666)
if err != nil {
t.Fatal(err)
}
err = c.Link("h/n/m:t", d)
if !strings.Contains(err.Error(), "underfoot") {
t.Fatalf("err = %v, want error to contain %q", err, "underfoot")
}
}
func TestManifestNameReuse(t *testing.T) {
t.Run("case-insensitive", func(t *testing.T) {
// This should run on all file system types.
testManifestNameReuse(t)
})
t.Run("case-sensitive", func(t *testing.T) {
useCaseInsensitiveTempDir(t)
testManifestNameReuse(t)
})
}
func testManifestNameReuse(t *testing.T) {
check := testutil.Checker(t)
c, _ := openTester(t)
d1 := mkdigest("1")
err := PutBytes(c, d1, "1")
check(err)
err = c.Link("h/n/m:t", d1)
check(err)
d2 := mkdigest("22")
err = PutBytes(c, d2, "22")
check(err)
err = c.Link("H/N/M:T", d2)
check(err)
var g [2]Digest
g[0], err = c.Resolve("h/n/m:t")
check(err)
g[1], err = c.Resolve("H/N/M:T")
check(err)
w := [2]Digest{d2, d2}
if g != w {
t.Fatalf("g = %v, want %v", g, w)
}
var got []string
for l, err := range c.links() {
if err != nil {
t.Fatal(err)
}
got = append(got, l)
}
want := []string{"manifests/h/n/m/t"}
if !slices.Equal(got, want) {
t.Fatalf("got = %v, want %v", got, want)
}
// relink with different case
unlinked, err := c.Unlink("h/n/m:t")
check(err)
if !unlinked {
t.Fatal("expected unlinked")
}
err = c.Link("h/n/m:T", d1)
check(err)
got = got[:0]
for l, err := range c.links() {
if err != nil {
t.Fatal(err)
}
got = append(got, l)
}
// we should have only one link that is same case as the last link
want = []string{"manifests/h/n/m/T"}
if !slices.Equal(got, want) {
t.Fatalf("got = %v, want %v", got, want)
}
}
func TestManifestFile(t *testing.T) {
cases := []struct {
in string
want string
}{
{"", ""},
// valid names
{"h/n/m:t", "/manifests/h/n/m/t"},
{"hh/nn/mm:tt", "/manifests/hh/nn/mm/tt"},
{"%/%/%/%", ""},
// already a path
{"h/n/m/t", ""},
// refs are not names
{"h/n/m:t@sha256-1", ""},
{"m@sha256-1", ""},
{"n/m:t@sha256-1", ""},
}
c, _ := openTester(t)
for _, tt := range cases {
t.Run(tt.in, func(t *testing.T) {
got, err := c.manifestPath(tt.in)
if err != nil && tt.want != "" {
t.Fatalf("unexpected error: %v", err)
}
if err == nil && tt.want == "" {
t.Fatalf("expected error")
}
dir := filepath.ToSlash(c.dir)
got = filepath.ToSlash(got)
got = strings.TrimPrefix(got, dir)
if got != tt.want {
t.Fatalf("got = %q, want %q", got, tt.want)
}
})
}
}
func TestNames(t *testing.T) {
c, _ := openTester(t)
check := testutil.Checker(t)
check(PutBytes(c, mkdigest("1"), "1"))
check(PutBytes(c, mkdigest("2"), "2"))
check(c.Link("h/n/m:t", mkdigest("1")))
check(c.Link("h/n/m:u", mkdigest("2")))
var got []string
for l, err := range c.Links() {
if err != nil {
t.Fatal(err)
}
got = append(got, l)
}
want := []string{"h/n/m:t", "h/n/m:u"}
if !slices.Equal(got, want) {
t.Fatalf("got = %v, want %v", got, want)
}
}
func mkdigest(s string) Digest {
return Digest{sha256.Sum256([]byte(s))}
}
func checkNotExists(t *testing.T, c *DiskCache, d Digest) {
t.Helper()
_, err := c.Get(d)
if !errors.Is(err, fs.ErrNotExist) {
t.Fatalf("err = %v, want fs.ErrNotExist", err)
}
}
func entryChecker(t *testing.T, c *DiskCache) func(Digest, int64, time.Time) {
t.Helper()
return func(d Digest, size int64, mod time.Time) {
t.Helper()
t.Run("checkEntry:"+d.String(), func(t *testing.T) {
t.Helper()
defer func() {
if t.Failed() {
dumpCacheContents(t, c)
}
}()
e, err := c.Get(d)
if size == 0 && errors.Is(err, fs.ErrNotExist) {
err = nil
}
if err != nil {
t.Fatal(err)
}
if e.Digest != d {
t.Errorf("e.Digest = %v, want %v", e.Digest, d)
}
if e.Size != size {
t.Fatalf("e.Size = %v, want %v", e.Size, size)
}
testutil.CheckTime(t, e.Time, mod)
info, err := os.Stat(c.GetFile(d))
if err != nil {
t.Fatal(err)
}
if info.Size() != size {
t.Fatalf("info.Size = %v, want %v", info.Size(), size)
}
testutil.CheckTime(t, info.ModTime(), mod)
})
}
}
func must[T any](v T, err error) T {
if err != nil {
panic(err)
}
return v
}
func TestNameToPath(t *testing.T) {
_, err := nameToPath("h/n/m:t")
if err != nil {
t.Fatal(err)
}
}
type errOnBangReader struct {
s string
n int
}
func (e *errOnBangReader) Read(p []byte) (int, error) {
if len(p) < 1 {
return 0, io.ErrShortBuffer
}
if e.n >= len(p) {
return 0, io.EOF
}
if e.s[e.n] == '!' {
return 0, errors.New("bang")
}
p[0] = e.s[e.n]
e.n++
return 1, nil
}
func dumpCacheContents(t *testing.T, c *DiskCache) {
t.Helper()
var b strings.Builder
fsys := os.DirFS(c.dir)
fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error {
t.Helper()
if err != nil {
return err
}
info, err := d.Info()
if err != nil {
return err
}
// Format like ls:
//
// ; ls -la
// drwxr-xr-x 224 Jan 13 14:22 blob/sha256-123
// drwxr-xr-x 224 Jan 13 14:22 manifest/h/n/m
fmt.Fprintf(&b, " %s % 4d %s %s\n",
info.Mode(),
info.Size(),
info.ModTime().Format("Jan 2 15:04"),
path,
)
return nil
})
t.Log()
t.Logf("cache contents:\n%s", b.String())
}

View File

@@ -0,0 +1,93 @@
package blob
import (
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"testing"
)
func isCaseSensitive(dir string) bool {
defer func() {
os.Remove(filepath.Join(dir, "_casecheck"))
}()
exists := func(file string) bool {
_, err := os.Stat(file)
return err == nil
}
file := filepath.Join(dir, "_casecheck")
FILE := filepath.Join(dir, "_CASECHECK")
if exists(file) || exists(FILE) {
panic(fmt.Sprintf("_casecheck already exists in %q; remove and try again.", dir))
}
err := os.WriteFile(file, nil, 0o666)
if err != nil {
panic(err)
}
return !exists(FILE)
}
func isCI() bool {
return os.Getenv("CI") != ""
}
const volumeHint = `
Unable to locate case-insensitive TMPDIR on darwin.
To run tests, create the case-insensitive volume /Volumes/data:
$ sudo diskutil apfs addVolume disk1 APFSX data -mountpoint /Volumes/data
or run with:
CI=1 go test ./...
`
// useCaseInsensitiveTempDir sets TMPDIR to a case-insensitive directory
// can find one, otherwise it skips the test if the CI environment variable is
// set, or GOOS is not darwin.
func useCaseInsensitiveTempDir(t *testing.T) bool {
if isCaseSensitive(os.TempDir()) {
// Use the default temp dir if it is already case-sensitive.
return true
}
if runtime.GOOS == "darwin" {
// If darwin, check for the special case-sensitive volume and
// use it if available.
const volume = "/Volumes/data"
_, err := os.Stat(volume)
if err == nil {
tmpdir := filepath.Join(volume, "tmp")
os.MkdirAll(tmpdir, 0o700)
t.Setenv("TMPDIR", tmpdir)
return true
}
if isCI() {
// Special case darwin in CI; it is not case-sensitive
// by default, and we will be testing other platforms
// that are case-sensitive, so we'll have the test
// being skipped covered there.
t.Skip("Skipping test in CI for darwin; TMPDIR is not case-insensitive.")
}
}
if !isCI() {
// Require devs to always tests with a case-insensitive TMPDIR.
// TODO(bmizerany): Print platform-specific instructions or
// link to docs on that topic.
lines := strings.Split(volumeHint, "\n")
for _, line := range lines {
t.Skip(line)
}
}
return false
}

73
server/internal/cache/blob/chunked.go vendored Normal file
View File

@@ -0,0 +1,73 @@
package blob
import (
"crypto/sha256"
"errors"
"io"
"os"
)
// Chunk represents a range of bytes in a blob.
type Chunk struct {
Start int64
End int64
}
// Size returns end minus start plus one.
func (c Chunk) Size() int64 {
return c.End - c.Start + 1
}
// Chunker writes to a blob in chunks.
// Its zero value is invalid. Use [DiskCache.Chunked] to create a new Chunker.
type Chunker struct {
digest Digest
size int64
f *os.File // nil means pre-validated
}
// Chunked returns a new Chunker, ready for use storing a blob of the given
// size in chunks.
//
// Use [Chunker.Put] to write data to the blob at specific offsets.
func (c *DiskCache) Chunked(d Digest, size int64) (*Chunker, error) {
name := c.GetFile(d)
info, err := os.Stat(name)
if err == nil && info.Size() == size {
return &Chunker{}, nil
}
f, err := os.OpenFile(name, os.O_CREATE|os.O_WRONLY, 0o666)
if err != nil {
return nil, err
}
return &Chunker{digest: d, size: size, f: f}, nil
}
// Put copies chunk.Size() bytes from r to the blob at the given offset,
// merging the data with the existing blob. It returns an error if any. As a
// special case, if r has less than chunk.Size() bytes, Put returns
// io.ErrUnexpectedEOF.
func (c *Chunker) Put(chunk Chunk, d Digest, r io.Reader) error {
if c.f == nil {
return nil
}
cw := &checkWriter{
d: d,
size: chunk.Size(),
h: sha256.New(),
f: c.f,
w: io.NewOffsetWriter(c.f, chunk.Start),
}
_, err := io.CopyN(cw, r, chunk.Size())
if err != nil && errors.Is(err, io.EOF) {
return io.ErrUnexpectedEOF
}
return err
}
// Close closes the underlying file.
func (c *Chunker) Close() error {
return c.f.Close()
}

99
server/internal/cache/blob/digest.go vendored Normal file
View File

@@ -0,0 +1,99 @@
package blob
import (
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"slices"
"strings"
)
var ErrInvalidDigest = errors.New("invalid digest")
// Digest is a blob identifier that is the SHA-256 hash of a blob's content.
//
// It is comparable and can be used as a map key.
type Digest struct {
sum [32]byte
}
// ParseDigest parses a digest from a string. If the string is not a valid
// digest, a call to the returned digest's IsValid method will return false.
//
// The input string may be in one of two forms:
//
// - ("sha256-<hex>"), where <hex> is a 64-character hexadecimal string.
// - ("sha256:<hex>"), where <hex> is a 64-character hexadecimal string.
//
// The [Digest.String] method will return the canonical form of the
// digest, "sha256:<hex>".
func ParseDigest[S ~[]byte | ~string](v S) (Digest, error) {
s := string(v)
i := strings.IndexAny(s, ":-")
var zero Digest
if i < 0 {
return zero, ErrInvalidDigest
}
prefix, sum := s[:i], s[i+1:]
if prefix != "sha256" || len(sum) != 64 {
return zero, ErrInvalidDigest
}
var d Digest
_, err := hex.Decode(d.sum[:], []byte(sum))
if err != nil {
return zero, ErrInvalidDigest
}
return d, nil
}
func DigestFromBytes[S ~[]byte | ~string](v S) Digest {
return Digest{sha256.Sum256([]byte(v))}
}
// String returns the string representation of the digest in the conventional
// form "sha256:<hex>".
func (d Digest) String() string {
return fmt.Sprintf("sha256:%x", d.sum[:])
}
func (d Digest) Short() string {
return fmt.Sprintf("%x", d.sum[:4])
}
func (d Digest) Sum() [32]byte {
return d.sum
}
func (d Digest) Compare(other Digest) int {
return slices.Compare(d.sum[:], other.sum[:])
}
// IsValid returns true if the digest is valid, i.e. if it is the SHA-256 hash
// of some content.
func (d Digest) IsValid() bool {
return d != (Digest{})
}
// MarshalText implements the encoding.TextMarshaler interface. It returns an
// error if [Digest.IsValid] returns false.
func (d Digest) MarshalText() ([]byte, error) {
return []byte(d.String()), nil
}
// UnmarshalText implements the encoding.TextUnmarshaler interface, and only
// works for a zero digest. If [Digest.IsValid] returns true, it returns an
// error.
func (d *Digest) UnmarshalText(text []byte) error {
if *d != (Digest{}) {
return errors.New("digest: illegal UnmarshalText on valid digest")
}
v, err := ParseDigest(string(text))
if err != nil {
return err
}
*d = v
return nil
}

View File

@@ -0,0 +1,63 @@
package blob
import (
"encoding/json"
"testing"
)
func TestParseDigest(t *testing.T) {
cases := []struct {
in string
valid bool
}{
{"sha256-0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", true},
{"sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", true},
// too short
{"sha256-0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcde", false},
{"sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcde", false},
// too long
{"sha256-0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0", false},
{"sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0", false},
// invalid prefix
{"sha255-0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", false},
{"sha255:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", false},
{"sha256!0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", false},
// invalid hex
{"sha256-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", false},
{"sha256:XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", false},
}
for _, tt := range cases {
got, err := ParseDigest(tt.in)
if tt.valid && err != nil {
t.Errorf("ParseDigest(%q) = %v, %v; want valid", tt.in, got, err)
}
want := "sha256:" + tt.in[7:]
if tt.valid && got.String() != want {
t.Errorf("ParseDigest(%q).String() = %q, want %q", tt.in, got.String(), want)
}
}
}
func TestDigestMarshalText(t *testing.T) {
const s = `"sha256-0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"`
var d Digest
if err := json.Unmarshal([]byte(s), &d); err != nil {
t.Errorf("json.Unmarshal: %v", err)
}
out, err := json.Marshal(d)
if err != nil {
t.Errorf("json.Marshal: %v", err)
}
want := `"sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"`
if string(out) != want {
t.Errorf("json.Marshal: got %s, want %s", out, want)
}
if err := json.Unmarshal([]byte(`"invalid"`), &Digest{}); err == nil {
t.Errorf("json.Unmarshal: expected error")
}
}