ollama source for Momentry Core verification
This commit is contained in:
544
server/internal/cache/blob/cache.go
vendored
Normal file
544
server/internal/cache/blob/cache.go
vendored
Normal file
@@ -0,0 +1,544 @@
|
||||
// Package blob implements a content-addressable disk cache for blobs and
|
||||
// manifests.
|
||||
package blob
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"io/fs"
|
||||
"iter"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/server/internal/internal/names"
|
||||
)
|
||||
|
||||
// Entry contains metadata about a blob in the cache.
|
||||
type Entry struct {
|
||||
Digest Digest
|
||||
Size int64
|
||||
Time time.Time // when added to the cache
|
||||
}
|
||||
|
||||
// DiskCache caches blobs and manifests on disk.
|
||||
//
|
||||
// The cache is rooted at a directory, which is created if it does not exist.
|
||||
//
|
||||
// Blobs are stored in the "blobs" subdirectory, and manifests are stored in the
|
||||
// "manifests" subdirectory. A example directory structure might look like:
|
||||
//
|
||||
// <dir>/
|
||||
// blobs/
|
||||
// sha256-<digest> - <blob data>
|
||||
// manifests/
|
||||
// <host>/
|
||||
// <namespace>/
|
||||
// <name>/
|
||||
// <tag> - <manifest data>
|
||||
//
|
||||
// The cache is safe for concurrent use.
|
||||
//
|
||||
// Name casing is preserved in the cache, but is not significant when resolving
|
||||
// names. For example, "Foo" and "foo" are considered the same name.
|
||||
//
|
||||
// The cache is not safe for concurrent use. It guards concurrent writes, but
|
||||
// does not prevent duplicated effort. Because blobs are immutable, duplicate
|
||||
// writes should result in the same file being written to disk.
|
||||
type DiskCache struct {
|
||||
// Dir specifies the top-level directory where blobs and manifest
|
||||
// pointers are stored.
|
||||
dir string
|
||||
now func() time.Time
|
||||
|
||||
testHookBeforeFinalWrite func(f *os.File)
|
||||
}
|
||||
|
||||
// PutBytes is a convenience function for c.Put(d, strings.NewReader(s), int64(len(s))).
|
||||
func PutBytes[S string | []byte](c *DiskCache, d Digest, data S) error {
|
||||
return c.Put(d, bytes.NewReader([]byte(data)), int64(len(data)))
|
||||
}
|
||||
|
||||
// Open opens a cache rooted at the given directory. If the directory does not
|
||||
// exist, it is created. If the directory is not a directory, an error is
|
||||
// returned.
|
||||
func Open(dir string) (*DiskCache, error) {
|
||||
if dir == "" {
|
||||
return nil, errors.New("blob: empty directory name")
|
||||
}
|
||||
|
||||
info, err := os.Stat(dir)
|
||||
if err == nil && !info.IsDir() {
|
||||
return nil, fmt.Errorf("%q is not a directory", dir)
|
||||
}
|
||||
if err := os.MkdirAll(dir, 0o777); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
subdirs := []string{"blobs", "manifests"}
|
||||
for _, subdir := range subdirs {
|
||||
if err := os.MkdirAll(filepath.Join(dir, subdir), 0o777); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(bmizerany): support shards
|
||||
c := &DiskCache{
|
||||
dir: dir,
|
||||
now: time.Now,
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func readAndSum(filename string, limit int64) (data []byte, _ Digest, err error) {
|
||||
f, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return nil, Digest{}, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
h := sha256.New()
|
||||
r := io.TeeReader(f, h)
|
||||
data, err = io.ReadAll(io.LimitReader(r, limit))
|
||||
if err != nil {
|
||||
return nil, Digest{}, err
|
||||
}
|
||||
var d Digest
|
||||
h.Sum(d.sum[:0])
|
||||
return data, d, nil
|
||||
}
|
||||
|
||||
//lint:ignore U1000 used for debugging purposes as needed in tests
|
||||
var debug = false
|
||||
|
||||
// debugger returns a function that can be used to add a step to the error message.
|
||||
// The error message will be a list of steps that were taken before the error occurred.
|
||||
// The steps are added in the order they are called.
|
||||
//
|
||||
// To set the error message, call the returned function with an empty string.
|
||||
//
|
||||
//lint:ignore U1000 used for debugging purposes as needed in tests
|
||||
func debugger(err *error) func(step string) {
|
||||
if !debug {
|
||||
return func(string) {}
|
||||
}
|
||||
var steps []string
|
||||
return func(step string) {
|
||||
if step == "" && *err != nil {
|
||||
*err = fmt.Errorf("%q: %w", steps, *err)
|
||||
return
|
||||
}
|
||||
steps = append(steps, step)
|
||||
if len(steps) > 100 {
|
||||
// shift hints in case of a bug that causes a lot of hints
|
||||
copy(steps, steps[1:])
|
||||
steps = steps[:100]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve resolves a name to a digest. The name is expected to
|
||||
// be in either of the following forms:
|
||||
//
|
||||
// @<digest>
|
||||
// <name>@<digest>
|
||||
// <name>
|
||||
//
|
||||
// If a digest is provided, it is returned as is and nothing else happens.
|
||||
//
|
||||
// If a name is provided for a manifest that exists in the cache, the digest
|
||||
// of the manifest is returned. If there is no manifest in the cache, it
|
||||
// returns [fs.ErrNotExist].
|
||||
//
|
||||
// To cover the case where a manifest may change without the cache knowing
|
||||
// (e.g. it was reformatted or modified by hand), the manifest data read and
|
||||
// hashed is passed to a PutBytes call to ensure that the manifest is in the
|
||||
// blob store. This is done to ensure that future calls to [Get] succeed in
|
||||
// these cases.
|
||||
func (c *DiskCache) Resolve(name string) (Digest, error) {
|
||||
name, digest := splitNameDigest(name)
|
||||
if digest != "" {
|
||||
return ParseDigest(digest)
|
||||
}
|
||||
|
||||
// We want to address manifests files by digest using Get. That requires
|
||||
// them to be blobs. This cannot be directly accomplished by looking in
|
||||
// the blob store because manifests can change without Ollama knowing
|
||||
// (e.g. a user modifies a manifests by hand then pushes it to update
|
||||
// their model). We also need to support the blob caches inherited from
|
||||
// older versions of Ollama, which do not store manifests in the blob
|
||||
// store, so for these cases, we need to handle adding the manifests to
|
||||
// the blob store, just in time.
|
||||
//
|
||||
// So now we read the manifests file, hash it, and copy it to the blob
|
||||
// store if it's not already there.
|
||||
//
|
||||
// This should be cheap because manifests are small, and accessed
|
||||
// infrequently.
|
||||
file, err := c.manifestPath(name)
|
||||
if err != nil {
|
||||
return Digest{}, err
|
||||
}
|
||||
|
||||
data, d, err := readAndSum(file, 1<<20)
|
||||
if err != nil {
|
||||
return Digest{}, err
|
||||
}
|
||||
|
||||
// Ideally we'd read the "manifest" file as a manifest to the blob file,
|
||||
// but we are not changing this yet, so copy the manifest to the blob
|
||||
// store so it can be addressed by digest subsequent calls to Get.
|
||||
if err := PutBytes(c, d, data); err != nil {
|
||||
return Digest{}, err
|
||||
}
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// Put writes a new blob to the cache, identified by its digest. The operation
|
||||
// reads content from r, which must precisely match both the specified size and
|
||||
// digest.
|
||||
//
|
||||
// Concurrent write safety is achieved through file locking. The implementation
|
||||
// guarantees write integrity by enforcing size limits and content validation
|
||||
// before allowing the file to reach its final state.
|
||||
func (c *DiskCache) Put(d Digest, r io.Reader, size int64) error {
|
||||
return c.copyNamedFile(c.GetFile(d), r, d, size)
|
||||
}
|
||||
|
||||
// Import imports a blob from the provided reader into the cache. It reads the
|
||||
// entire content of the reader, calculates its digest, and stores it in the
|
||||
// cache.
|
||||
//
|
||||
// Import should be considered unsafe for use with untrusted data, such as data
|
||||
// read from a network. The caller is responsible for ensuring the integrity of
|
||||
// the data being imported.
|
||||
func (c *DiskCache) Import(r io.Reader, size int64) (Digest, error) {
|
||||
// users that want to change the temp dir can set TEMPDIR.
|
||||
f, err := os.CreateTemp("", "blob-")
|
||||
if err != nil {
|
||||
return Digest{}, err
|
||||
}
|
||||
defer os.Remove(f.Name())
|
||||
|
||||
// Copy the blob to a temporary file.
|
||||
h := sha256.New()
|
||||
r = io.TeeReader(r, h)
|
||||
n, err := io.Copy(f, r)
|
||||
if err != nil {
|
||||
return Digest{}, err
|
||||
}
|
||||
if n != size {
|
||||
return Digest{}, fmt.Errorf("blob: expected %d bytes, got %d", size, n)
|
||||
}
|
||||
|
||||
// Check the digest.
|
||||
var d Digest
|
||||
h.Sum(d.sum[:0])
|
||||
if err := f.Close(); err != nil {
|
||||
return Digest{}, err
|
||||
}
|
||||
name := c.GetFile(d)
|
||||
// Rename the temporary file to the final file.
|
||||
if err := os.Rename(f.Name(), name); err != nil {
|
||||
return Digest{}, err
|
||||
}
|
||||
os.Chtimes(name, c.now(), c.now()) // mainly for tests
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// Get retrieves a blob from the cache using the provided digest. The operation
|
||||
// fails if the digest is malformed or if any errors occur during blob
|
||||
// retrieval.
|
||||
func (c *DiskCache) Get(d Digest) (Entry, error) {
|
||||
name := c.GetFile(d)
|
||||
info, err := os.Stat(name)
|
||||
if err != nil {
|
||||
return Entry{}, err
|
||||
}
|
||||
if info.Size() == 0 {
|
||||
return Entry{}, fs.ErrNotExist
|
||||
}
|
||||
return Entry{
|
||||
Digest: d,
|
||||
Size: info.Size(),
|
||||
Time: info.ModTime(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Link creates a symbolic reference in the cache that maps the provided name
|
||||
// to a blob identified by its digest, making it retrievable by name using
|
||||
// [Resolve].
|
||||
//
|
||||
// It returns an error if either the name or digest is invalid, or if link
|
||||
// creation encounters any issues.
|
||||
func (c *DiskCache) Link(name string, d Digest) error {
|
||||
manifest, err := c.manifestPath(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f, err := os.OpenFile(c.GetFile(d), os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// TODO(bmizerany): test this happens only if the blob was found to
|
||||
// avoid leaving debris
|
||||
if err := os.MkdirAll(filepath.Dir(manifest), 0o777); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
info, err := f.Stat()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy manifest to cache directory.
|
||||
return c.copyNamedFile(manifest, f, d, info.Size())
|
||||
}
|
||||
|
||||
// Unlink unlinks the manifest by name from the cache. If the name is not
|
||||
// found. If a manifest is removed ok will be true, otherwise false. If an
|
||||
// error occurs, it returns ok false, and the error.
|
||||
func (c *DiskCache) Unlink(name string) (ok bool, _ error) {
|
||||
manifest, err := c.manifestPath(name)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
err = os.Remove(manifest)
|
||||
if errors.Is(err, fs.ErrNotExist) {
|
||||
return false, nil
|
||||
}
|
||||
return true, err
|
||||
}
|
||||
|
||||
// GetFile returns the absolute path to the file, in the cache, for the given
|
||||
// digest. It does not check if the file exists.
|
||||
//
|
||||
// The returned path should not be stored, used outside the lifetime of the
|
||||
// cache, or interpreted in any way.
|
||||
func (c *DiskCache) GetFile(d Digest) string {
|
||||
filename := fmt.Sprintf("sha256-%x", d.sum)
|
||||
return absJoin(c.dir, "blobs", filename)
|
||||
}
|
||||
|
||||
// Links returns a sequence of link names. The sequence is in lexical order.
|
||||
// Names are converted from their relative path form to their name form but are
|
||||
// not guaranteed to be valid. Callers should validate the names before using.
|
||||
func (c *DiskCache) Links() iter.Seq2[string, error] {
|
||||
return func(yield func(string, error) bool) {
|
||||
for path, err := range c.links() {
|
||||
if err != nil {
|
||||
yield("", err)
|
||||
return
|
||||
}
|
||||
if !yield(pathToName(path), nil) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pathToName converts a path to a name. It is the inverse of nameToPath. The
|
||||
// path is assumed to be in filepath.ToSlash format.
|
||||
func pathToName(s string) string {
|
||||
s = strings.TrimPrefix(s, "manifests/")
|
||||
rr := []rune(s)
|
||||
for i := len(rr) - 1; i > 0; i-- {
|
||||
if rr[i] == '/' {
|
||||
rr[i] = ':'
|
||||
return string(rr)
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// manifestPath finds the first manifest file on disk that matches the given
|
||||
// name using a case-insensitive comparison. If no manifest file is found, it
|
||||
// returns the path where the manifest file would be if it existed.
|
||||
//
|
||||
// If two manifest files exists on disk that match the given name using a
|
||||
// case-insensitive comparison, the one that sorts first, lexically, is
|
||||
// returned.
|
||||
func (c *DiskCache) manifestPath(name string) (string, error) {
|
||||
np, err := nameToPath(name)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
maybe := filepath.Join("manifests", np)
|
||||
for l, err := range c.links() {
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if strings.EqualFold(maybe, l) {
|
||||
return filepath.Join(c.dir, l), nil
|
||||
}
|
||||
}
|
||||
return filepath.Join(c.dir, maybe), nil
|
||||
}
|
||||
|
||||
// links returns a sequence of links in the cache in lexical order.
|
||||
func (c *DiskCache) links() iter.Seq2[string, error] {
|
||||
// TODO(bmizerany): reuse empty dirnames if exist
|
||||
return func(yield func(string, error) bool) {
|
||||
fsys := os.DirFS(c.dir)
|
||||
manifests, err := fs.Glob(fsys, "manifests/*/*/*/*")
|
||||
if err != nil {
|
||||
yield("", err)
|
||||
return
|
||||
}
|
||||
for _, manifest := range manifests {
|
||||
if !yield(manifest, nil) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type checkWriter struct {
|
||||
size int64
|
||||
d Digest
|
||||
f *os.File
|
||||
h hash.Hash
|
||||
|
||||
w io.Writer // underlying writer; set by creator
|
||||
n int64
|
||||
err error
|
||||
|
||||
testHookBeforeFinalWrite func(*os.File)
|
||||
}
|
||||
|
||||
func (w *checkWriter) seterr(err error) error {
|
||||
if w.err == nil {
|
||||
w.err = err
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Write writes p to the underlying hash and writer. The last write to the
|
||||
// underlying writer is guaranteed to be the last byte of p as verified by the
|
||||
// hash.
|
||||
func (w *checkWriter) Write(p []byte) (int, error) {
|
||||
if w.err != nil {
|
||||
return 0, w.err
|
||||
}
|
||||
|
||||
_, err := w.h.Write(p)
|
||||
if err != nil {
|
||||
return 0, w.seterr(err)
|
||||
}
|
||||
nextSize := w.n + int64(len(p))
|
||||
if nextSize == w.size {
|
||||
// last write. check hash.
|
||||
sum := w.h.Sum(nil)
|
||||
if !bytes.Equal(sum, w.d.sum[:]) {
|
||||
return 0, w.seterr(fmt.Errorf("file content changed underfoot"))
|
||||
}
|
||||
if w.testHookBeforeFinalWrite != nil {
|
||||
w.testHookBeforeFinalWrite(w.f)
|
||||
}
|
||||
}
|
||||
if nextSize > w.size {
|
||||
return 0, w.seterr(fmt.Errorf("content exceeds expected size: %d > %d", nextSize, w.size))
|
||||
}
|
||||
n, err := w.w.Write(p)
|
||||
w.n += int64(n)
|
||||
return n, w.seterr(err)
|
||||
}
|
||||
|
||||
// copyNamedFile copies file into name, expecting it to have the given Digest
|
||||
// and size, if that file is not present already.
|
||||
func (c *DiskCache) copyNamedFile(name string, file io.Reader, out Digest, size int64) error {
|
||||
info, err := os.Stat(name)
|
||||
if err == nil && info.Size() == size {
|
||||
// File already exists with correct size. This is good enough.
|
||||
// We can skip expensive hash checks.
|
||||
//
|
||||
// TODO: Do the hash check, but give caller a way to skip it.
|
||||
return nil
|
||||
}
|
||||
|
||||
// Copy file to cache directory.
|
||||
mode := os.O_RDWR | os.O_CREATE
|
||||
if err == nil && info.Size() > size { // shouldn't happen but fix in case
|
||||
mode |= os.O_TRUNC
|
||||
}
|
||||
f, err := os.OpenFile(name, mode, 0o666)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
if size == 0 {
|
||||
// File now exists with correct size.
|
||||
// Only one possible zero-length file, so contents are OK too.
|
||||
// Early return here makes sure there's a "last byte" for code below.
|
||||
return nil
|
||||
}
|
||||
|
||||
// From here on, if any of the I/O writing the file fails,
|
||||
// we make a best-effort attempt to truncate the file f
|
||||
// before returning, to avoid leaving bad bytes in the file.
|
||||
|
||||
// Copy file to f, but also into h to double-check hash.
|
||||
cw := &checkWriter{
|
||||
d: out,
|
||||
size: size,
|
||||
h: sha256.New(),
|
||||
f: f,
|
||||
w: f,
|
||||
|
||||
testHookBeforeFinalWrite: c.testHookBeforeFinalWrite,
|
||||
}
|
||||
n, err := io.Copy(cw, file)
|
||||
if err != nil {
|
||||
f.Truncate(0)
|
||||
return err
|
||||
}
|
||||
if n < size {
|
||||
f.Truncate(0)
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
if err := f.Close(); err != nil {
|
||||
// Data might not have been written,
|
||||
// but file may look like it is the right size.
|
||||
// To be extra careful, remove cached file.
|
||||
os.Remove(name)
|
||||
return err
|
||||
}
|
||||
os.Chtimes(name, c.now(), c.now()) // mainly for tests
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func splitNameDigest(s string) (name, digest string) {
|
||||
i := strings.LastIndexByte(s, '@')
|
||||
if i < 0 {
|
||||
return s, ""
|
||||
}
|
||||
return s[:i], s[i+1:]
|
||||
}
|
||||
|
||||
var errInvalidName = errors.New("invalid name")
|
||||
|
||||
func nameToPath(name string) (_ string, err error) {
|
||||
n := names.Parse(name)
|
||||
if !n.IsFullyQualified() {
|
||||
return "", errInvalidName
|
||||
}
|
||||
return filepath.Join(n.Host(), n.Namespace(), n.Model(), n.Tag()), nil
|
||||
}
|
||||
|
||||
func absJoin(pp ...string) string {
|
||||
abs, err := filepath.Abs(filepath.Join(pp...))
|
||||
if err != nil {
|
||||
panic(err) // this should never happen
|
||||
}
|
||||
return abs
|
||||
}
|
||||
688
server/internal/cache/blob/cache_test.go
vendored
Normal file
688
server/internal/cache/blob/cache_test.go
vendored
Normal file
@@ -0,0 +1,688 @@
|
||||
package blob
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/server/internal/testutil"
|
||||
)
|
||||
|
||||
func init() {
|
||||
debug = true
|
||||
}
|
||||
|
||||
var epoch = func() time.Time {
|
||||
d := time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
if d.IsZero() {
|
||||
panic("time zero")
|
||||
}
|
||||
return d
|
||||
}()
|
||||
|
||||
func TestOpenErrors(t *testing.T) {
|
||||
exe, err := os.Executable()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
dir string
|
||||
err string
|
||||
}{
|
||||
{t.TempDir(), ""},
|
||||
{"", "empty directory name"},
|
||||
{exe, "not a directory"},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.dir, func(t *testing.T) {
|
||||
_, err := Open(tt.dir)
|
||||
if tt.err == "" {
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
if !strings.Contains(err.Error(), tt.err) {
|
||||
t.Fatalf("err = %v, want %q", err, tt.err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetFile(t *testing.T) {
|
||||
t.Chdir(t.TempDir())
|
||||
|
||||
c, err := Open(".")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
d := mkdigest("1")
|
||||
got := c.GetFile(d)
|
||||
cleaned := filepath.Clean(got)
|
||||
if cleaned != got {
|
||||
t.Fatalf("got is unclean: %q", got)
|
||||
}
|
||||
if !filepath.IsAbs(got) {
|
||||
t.Fatal("got is not absolute")
|
||||
}
|
||||
abs, _ := filepath.Abs(c.dir)
|
||||
if !strings.HasPrefix(got, abs) {
|
||||
t.Fatalf("got is not local to %q", c.dir)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBasic(t *testing.T) {
|
||||
c, err := Open(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
now := epoch
|
||||
c.now = func() time.Time { return now }
|
||||
|
||||
checkEntry := entryChecker(t, c)
|
||||
checkFailed := func(err error) {
|
||||
if err == nil {
|
||||
t.Helper()
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
_, err = c.Resolve("invalid")
|
||||
checkFailed(err)
|
||||
|
||||
_, err = c.Resolve("h/n/m:t")
|
||||
checkFailed(err)
|
||||
|
||||
dx := mkdigest("x")
|
||||
|
||||
d, err := c.Resolve(fmt.Sprintf("h/n/m:t@%s", dx))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if d != dx {
|
||||
t.Fatalf("d = %v, want %v", d, dx)
|
||||
}
|
||||
|
||||
_, err = c.Get(Digest{})
|
||||
checkFailed(err)
|
||||
|
||||
// not committed yet
|
||||
_, err = c.Get(dx)
|
||||
checkFailed(err)
|
||||
|
||||
err = PutBytes(c, dx, "!")
|
||||
checkFailed(err)
|
||||
|
||||
err = PutBytes(c, dx, "x")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
checkEntry(dx, 1, now)
|
||||
|
||||
t0 := now
|
||||
now = now.Add(1*time.Hour + 1*time.Minute)
|
||||
err = PutBytes(c, dx, "x")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// check not updated
|
||||
checkEntry(dx, 1, t0)
|
||||
}
|
||||
|
||||
type sleepFunc func(d time.Duration) time.Time
|
||||
|
||||
func openTester(t *testing.T) (*DiskCache, sleepFunc) {
|
||||
t.Helper()
|
||||
c, err := Open(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
now := epoch
|
||||
c.now = func() time.Time { return now }
|
||||
return c, func(d time.Duration) time.Time {
|
||||
now = now.Add(d)
|
||||
return now
|
||||
}
|
||||
}
|
||||
|
||||
func TestManifestPath(t *testing.T) {
|
||||
check := testutil.Checker(t)
|
||||
|
||||
c, sleep := openTester(t)
|
||||
|
||||
d1 := mkdigest("1")
|
||||
err := PutBytes(c, d1, "1")
|
||||
check(err)
|
||||
|
||||
err = c.Link("h/n/m:t", d1)
|
||||
check(err)
|
||||
|
||||
t0 := sleep(0)
|
||||
sleep(1 * time.Hour)
|
||||
err = c.Link("h/n/m:t", d1) // nop expected
|
||||
check(err)
|
||||
|
||||
file := must(c.manifestPath("h/n/m:t"))
|
||||
info, err := os.Stat(file)
|
||||
check(err)
|
||||
testutil.CheckTime(t, info.ModTime(), t0)
|
||||
}
|
||||
|
||||
func TestManifestExistsWithoutBlob(t *testing.T) {
|
||||
t.Chdir(t.TempDir())
|
||||
|
||||
check := testutil.Checker(t)
|
||||
|
||||
c, err := Open(".")
|
||||
check(err)
|
||||
|
||||
checkEntry := entryChecker(t, c)
|
||||
|
||||
man := must(c.manifestPath("h/n/m:t"))
|
||||
os.MkdirAll(filepath.Dir(man), 0o777)
|
||||
testutil.WriteFile(t, man, "1")
|
||||
|
||||
got, err := c.Resolve("h/n/m:t")
|
||||
check(err)
|
||||
|
||||
want := mkdigest("1")
|
||||
if got != want {
|
||||
t.Fatalf("got = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
e, err := c.Get(got)
|
||||
check(err)
|
||||
checkEntry(got, 1, e.Time)
|
||||
}
|
||||
|
||||
func TestPut(t *testing.T) {
|
||||
c, sleep := openTester(t)
|
||||
|
||||
check := testutil.Checker(t)
|
||||
checkEntry := entryChecker(t, c)
|
||||
|
||||
d := mkdigest("hello, world")
|
||||
err := PutBytes(c, d, "hello")
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
|
||||
got, err := c.Get(d)
|
||||
if !errors.Is(err, fs.ErrNotExist) {
|
||||
t.Fatalf("expected error, got %v", got)
|
||||
}
|
||||
|
||||
// Put a valid blob
|
||||
err = PutBytes(c, d, "hello, world")
|
||||
check(err)
|
||||
checkEntry(d, 12, sleep(0))
|
||||
|
||||
// Put a blob with content that does not hash to the digest
|
||||
err = PutBytes(c, d, "hello")
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
checkNotExists(t, c, d)
|
||||
|
||||
// Put the valid blob back and check it
|
||||
err = PutBytes(c, d, "hello, world")
|
||||
check(err)
|
||||
checkEntry(d, 12, sleep(0))
|
||||
|
||||
// Put a blob that errors during Read
|
||||
err = c.Put(d, &errOnBangReader{s: "!"}, 1)
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
checkNotExists(t, c, d)
|
||||
|
||||
// Put valid blob back and check it
|
||||
err = PutBytes(c, d, "hello, world")
|
||||
check(err)
|
||||
checkEntry(d, 12, sleep(0))
|
||||
|
||||
// Put a blob with mismatched size
|
||||
err = c.Put(d, strings.NewReader("hello, world"), 11)
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
checkNotExists(t, c, d)
|
||||
|
||||
// Final byte does not match the digest (testing commit phase)
|
||||
err = PutBytes(c, d, "hello, world$")
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
checkNotExists(t, c, d)
|
||||
|
||||
reset := c.setTestHookBeforeFinalWrite(func(f *os.File) {
|
||||
// change mode to read-only
|
||||
f.Truncate(0)
|
||||
f.Chmod(0o400)
|
||||
f.Close()
|
||||
f1, err := os.OpenFile(f.Name(), os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Cleanup(func() { f1.Close() })
|
||||
*f = *f1
|
||||
})
|
||||
defer reset()
|
||||
|
||||
err = PutBytes(c, d, "hello, world")
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
checkNotExists(t, c, d)
|
||||
reset()
|
||||
}
|
||||
|
||||
func TestImport(t *testing.T) {
|
||||
c, _ := openTester(t)
|
||||
|
||||
checkEntry := entryChecker(t, c)
|
||||
|
||||
want := mkdigest("x")
|
||||
got, err := c.Import(strings.NewReader("x"), 1)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if want != got {
|
||||
t.Fatalf("digest = %v, want %v", got, want)
|
||||
}
|
||||
checkEntry(want, 1, epoch)
|
||||
|
||||
got, err = c.Import(strings.NewReader("x"), 1)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if want != got {
|
||||
t.Fatalf("digest = %v, want %v", got, want)
|
||||
}
|
||||
checkEntry(want, 1, epoch)
|
||||
}
|
||||
|
||||
func (c *DiskCache) setTestHookBeforeFinalWrite(h func(*os.File)) (reset func()) {
|
||||
old := c.testHookBeforeFinalWrite
|
||||
c.testHookBeforeFinalWrite = h
|
||||
return func() { c.testHookBeforeFinalWrite = old }
|
||||
}
|
||||
|
||||
func TestPutGetZero(t *testing.T) {
|
||||
c, sleep := openTester(t)
|
||||
|
||||
check := testutil.Checker(t)
|
||||
checkEntry := entryChecker(t, c)
|
||||
|
||||
d := mkdigest("x")
|
||||
err := PutBytes(c, d, "x")
|
||||
check(err)
|
||||
checkEntry(d, 1, sleep(0))
|
||||
|
||||
err = os.Truncate(c.GetFile(d), 0)
|
||||
check(err)
|
||||
|
||||
_, err = c.Get(d)
|
||||
if !errors.Is(err, fs.ErrNotExist) {
|
||||
t.Fatalf("err = %v, want fs.ErrNotExist", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPutZero(t *testing.T) {
|
||||
c, _ := openTester(t)
|
||||
d := mkdigest("x")
|
||||
err := c.Put(d, strings.NewReader("x"), 0) // size == 0 (not size of content)
|
||||
testutil.Check(t, err)
|
||||
checkNotExists(t, c, d)
|
||||
}
|
||||
|
||||
func TestCommit(t *testing.T) {
|
||||
check := testutil.Checker(t)
|
||||
|
||||
c, err := Open(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
checkEntry := entryChecker(t, c)
|
||||
|
||||
now := epoch
|
||||
c.now = func() time.Time { return now }
|
||||
|
||||
d1 := mkdigest("1")
|
||||
err = c.Link("h/n/m:t", d1)
|
||||
if !errors.Is(err, fs.ErrNotExist) {
|
||||
t.Fatalf("err = %v, want fs.ErrNotExist", err)
|
||||
}
|
||||
|
||||
err = PutBytes(c, d1, "1")
|
||||
check(err)
|
||||
|
||||
err = c.Link("h/n/m:t", d1)
|
||||
check(err)
|
||||
|
||||
got, err := c.Resolve("h/n/m:t")
|
||||
check(err)
|
||||
if got != d1 {
|
||||
t.Fatalf("d = %v, want %v", got, d1)
|
||||
}
|
||||
|
||||
// commit again, more than 1 byte
|
||||
d2 := mkdigest("22")
|
||||
err = PutBytes(c, d2, "22")
|
||||
check(err)
|
||||
err = c.Link("h/n/m:t", d2)
|
||||
check(err)
|
||||
checkEntry(d2, 2, now)
|
||||
|
||||
filename := must(c.manifestPath("h/n/m:t"))
|
||||
data, err := os.ReadFile(filename)
|
||||
check(err)
|
||||
if string(data) != "22" {
|
||||
t.Fatalf("data = %q, want %q", data, "22")
|
||||
}
|
||||
|
||||
t0 := now
|
||||
now = now.Add(1 * time.Hour)
|
||||
err = c.Link("h/n/m:t", d2) // same contents; nop
|
||||
check(err)
|
||||
info, err := os.Stat(filename)
|
||||
check(err)
|
||||
testutil.CheckTime(t, info.ModTime(), t0)
|
||||
}
|
||||
|
||||
func TestManifestInvalidBlob(t *testing.T) {
|
||||
c, _ := openTester(t)
|
||||
d := mkdigest("1")
|
||||
err := c.Link("h/n/m:t", d)
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
checkNotExists(t, c, d)
|
||||
|
||||
err = PutBytes(c, d, "1")
|
||||
testutil.Check(t, err)
|
||||
err = os.WriteFile(c.GetFile(d), []byte("invalid"), 0o666)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = c.Link("h/n/m:t", d)
|
||||
if !strings.Contains(err.Error(), "underfoot") {
|
||||
t.Fatalf("err = %v, want error to contain %q", err, "underfoot")
|
||||
}
|
||||
}
|
||||
|
||||
func TestManifestNameReuse(t *testing.T) {
|
||||
t.Run("case-insensitive", func(t *testing.T) {
|
||||
// This should run on all file system types.
|
||||
testManifestNameReuse(t)
|
||||
})
|
||||
t.Run("case-sensitive", func(t *testing.T) {
|
||||
useCaseInsensitiveTempDir(t)
|
||||
testManifestNameReuse(t)
|
||||
})
|
||||
}
|
||||
|
||||
func testManifestNameReuse(t *testing.T) {
|
||||
check := testutil.Checker(t)
|
||||
|
||||
c, _ := openTester(t)
|
||||
|
||||
d1 := mkdigest("1")
|
||||
err := PutBytes(c, d1, "1")
|
||||
check(err)
|
||||
err = c.Link("h/n/m:t", d1)
|
||||
check(err)
|
||||
|
||||
d2 := mkdigest("22")
|
||||
err = PutBytes(c, d2, "22")
|
||||
check(err)
|
||||
err = c.Link("H/N/M:T", d2)
|
||||
check(err)
|
||||
|
||||
var g [2]Digest
|
||||
g[0], err = c.Resolve("h/n/m:t")
|
||||
check(err)
|
||||
g[1], err = c.Resolve("H/N/M:T")
|
||||
check(err)
|
||||
|
||||
w := [2]Digest{d2, d2}
|
||||
if g != w {
|
||||
t.Fatalf("g = %v, want %v", g, w)
|
||||
}
|
||||
|
||||
var got []string
|
||||
for l, err := range c.links() {
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
got = append(got, l)
|
||||
}
|
||||
want := []string{"manifests/h/n/m/t"}
|
||||
if !slices.Equal(got, want) {
|
||||
t.Fatalf("got = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
// relink with different case
|
||||
unlinked, err := c.Unlink("h/n/m:t")
|
||||
check(err)
|
||||
if !unlinked {
|
||||
t.Fatal("expected unlinked")
|
||||
}
|
||||
err = c.Link("h/n/m:T", d1)
|
||||
check(err)
|
||||
|
||||
got = got[:0]
|
||||
for l, err := range c.links() {
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
got = append(got, l)
|
||||
}
|
||||
|
||||
// we should have only one link that is same case as the last link
|
||||
want = []string{"manifests/h/n/m/T"}
|
||||
if !slices.Equal(got, want) {
|
||||
t.Fatalf("got = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestManifestFile(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
{"", ""},
|
||||
|
||||
// valid names
|
||||
{"h/n/m:t", "/manifests/h/n/m/t"},
|
||||
{"hh/nn/mm:tt", "/manifests/hh/nn/mm/tt"},
|
||||
|
||||
{"%/%/%/%", ""},
|
||||
|
||||
// already a path
|
||||
{"h/n/m/t", ""},
|
||||
|
||||
// refs are not names
|
||||
{"h/n/m:t@sha256-1", ""},
|
||||
{"m@sha256-1", ""},
|
||||
{"n/m:t@sha256-1", ""},
|
||||
}
|
||||
|
||||
c, _ := openTester(t)
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.in, func(t *testing.T) {
|
||||
got, err := c.manifestPath(tt.in)
|
||||
if err != nil && tt.want != "" {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if err == nil && tt.want == "" {
|
||||
t.Fatalf("expected error")
|
||||
}
|
||||
dir := filepath.ToSlash(c.dir)
|
||||
got = filepath.ToSlash(got)
|
||||
got = strings.TrimPrefix(got, dir)
|
||||
if got != tt.want {
|
||||
t.Fatalf("got = %q, want %q", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNames(t *testing.T) {
|
||||
c, _ := openTester(t)
|
||||
check := testutil.Checker(t)
|
||||
|
||||
check(PutBytes(c, mkdigest("1"), "1"))
|
||||
check(PutBytes(c, mkdigest("2"), "2"))
|
||||
|
||||
check(c.Link("h/n/m:t", mkdigest("1")))
|
||||
check(c.Link("h/n/m:u", mkdigest("2")))
|
||||
|
||||
var got []string
|
||||
for l, err := range c.Links() {
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
got = append(got, l)
|
||||
}
|
||||
want := []string{"h/n/m:t", "h/n/m:u"}
|
||||
if !slices.Equal(got, want) {
|
||||
t.Fatalf("got = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func mkdigest(s string) Digest {
|
||||
return Digest{sha256.Sum256([]byte(s))}
|
||||
}
|
||||
|
||||
func checkNotExists(t *testing.T, c *DiskCache, d Digest) {
|
||||
t.Helper()
|
||||
_, err := c.Get(d)
|
||||
if !errors.Is(err, fs.ErrNotExist) {
|
||||
t.Fatalf("err = %v, want fs.ErrNotExist", err)
|
||||
}
|
||||
}
|
||||
|
||||
func entryChecker(t *testing.T, c *DiskCache) func(Digest, int64, time.Time) {
|
||||
t.Helper()
|
||||
return func(d Digest, size int64, mod time.Time) {
|
||||
t.Helper()
|
||||
t.Run("checkEntry:"+d.String(), func(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
defer func() {
|
||||
if t.Failed() {
|
||||
dumpCacheContents(t, c)
|
||||
}
|
||||
}()
|
||||
|
||||
e, err := c.Get(d)
|
||||
if size == 0 && errors.Is(err, fs.ErrNotExist) {
|
||||
err = nil
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if e.Digest != d {
|
||||
t.Errorf("e.Digest = %v, want %v", e.Digest, d)
|
||||
}
|
||||
if e.Size != size {
|
||||
t.Fatalf("e.Size = %v, want %v", e.Size, size)
|
||||
}
|
||||
|
||||
testutil.CheckTime(t, e.Time, mod)
|
||||
info, err := os.Stat(c.GetFile(d))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if info.Size() != size {
|
||||
t.Fatalf("info.Size = %v, want %v", info.Size(), size)
|
||||
}
|
||||
testutil.CheckTime(t, info.ModTime(), mod)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func must[T any](v T, err error) T {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func TestNameToPath(t *testing.T) {
|
||||
_, err := nameToPath("h/n/m:t")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
type errOnBangReader struct {
|
||||
s string
|
||||
n int
|
||||
}
|
||||
|
||||
func (e *errOnBangReader) Read(p []byte) (int, error) {
|
||||
if len(p) < 1 {
|
||||
return 0, io.ErrShortBuffer
|
||||
}
|
||||
if e.n >= len(p) {
|
||||
return 0, io.EOF
|
||||
}
|
||||
if e.s[e.n] == '!' {
|
||||
return 0, errors.New("bang")
|
||||
}
|
||||
p[0] = e.s[e.n]
|
||||
e.n++
|
||||
return 1, nil
|
||||
}
|
||||
|
||||
func dumpCacheContents(t *testing.T, c *DiskCache) {
|
||||
t.Helper()
|
||||
|
||||
var b strings.Builder
|
||||
fsys := os.DirFS(c.dir)
|
||||
fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error {
|
||||
t.Helper()
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
info, err := d.Info()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Format like ls:
|
||||
//
|
||||
// ; ls -la
|
||||
// drwxr-xr-x 224 Jan 13 14:22 blob/sha256-123
|
||||
// drwxr-xr-x 224 Jan 13 14:22 manifest/h/n/m
|
||||
|
||||
fmt.Fprintf(&b, " %s % 4d %s %s\n",
|
||||
info.Mode(),
|
||||
info.Size(),
|
||||
info.ModTime().Format("Jan 2 15:04"),
|
||||
path,
|
||||
)
|
||||
return nil
|
||||
})
|
||||
t.Log()
|
||||
t.Logf("cache contents:\n%s", b.String())
|
||||
}
|
||||
93
server/internal/cache/blob/casecheck_test.go
vendored
Normal file
93
server/internal/cache/blob/casecheck_test.go
vendored
Normal file
@@ -0,0 +1,93 @@
|
||||
package blob
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func isCaseSensitive(dir string) bool {
|
||||
defer func() {
|
||||
os.Remove(filepath.Join(dir, "_casecheck"))
|
||||
}()
|
||||
|
||||
exists := func(file string) bool {
|
||||
_, err := os.Stat(file)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
file := filepath.Join(dir, "_casecheck")
|
||||
FILE := filepath.Join(dir, "_CASECHECK")
|
||||
if exists(file) || exists(FILE) {
|
||||
panic(fmt.Sprintf("_casecheck already exists in %q; remove and try again.", dir))
|
||||
}
|
||||
|
||||
err := os.WriteFile(file, nil, 0o666)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return !exists(FILE)
|
||||
}
|
||||
|
||||
func isCI() bool {
|
||||
return os.Getenv("CI") != ""
|
||||
}
|
||||
|
||||
const volumeHint = `
|
||||
|
||||
Unable to locate case-insensitive TMPDIR on darwin.
|
||||
|
||||
To run tests, create the case-insensitive volume /Volumes/data:
|
||||
|
||||
$ sudo diskutil apfs addVolume disk1 APFSX data -mountpoint /Volumes/data
|
||||
|
||||
or run with:
|
||||
|
||||
CI=1 go test ./...
|
||||
|
||||
`
|
||||
|
||||
// useCaseInsensitiveTempDir sets TMPDIR to a case-insensitive directory
|
||||
// can find one, otherwise it skips the test if the CI environment variable is
|
||||
// set, or GOOS is not darwin.
|
||||
func useCaseInsensitiveTempDir(t *testing.T) bool {
|
||||
if isCaseSensitive(os.TempDir()) {
|
||||
// Use the default temp dir if it is already case-sensitive.
|
||||
return true
|
||||
}
|
||||
if runtime.GOOS == "darwin" {
|
||||
// If darwin, check for the special case-sensitive volume and
|
||||
// use it if available.
|
||||
const volume = "/Volumes/data"
|
||||
_, err := os.Stat(volume)
|
||||
if err == nil {
|
||||
tmpdir := filepath.Join(volume, "tmp")
|
||||
os.MkdirAll(tmpdir, 0o700)
|
||||
t.Setenv("TMPDIR", tmpdir)
|
||||
return true
|
||||
}
|
||||
if isCI() {
|
||||
// Special case darwin in CI; it is not case-sensitive
|
||||
// by default, and we will be testing other platforms
|
||||
// that are case-sensitive, so we'll have the test
|
||||
// being skipped covered there.
|
||||
t.Skip("Skipping test in CI for darwin; TMPDIR is not case-insensitive.")
|
||||
}
|
||||
}
|
||||
|
||||
if !isCI() {
|
||||
// Require devs to always tests with a case-insensitive TMPDIR.
|
||||
|
||||
// TODO(bmizerany): Print platform-specific instructions or
|
||||
// link to docs on that topic.
|
||||
lines := strings.Split(volumeHint, "\n")
|
||||
for _, line := range lines {
|
||||
t.Skip(line)
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
73
server/internal/cache/blob/chunked.go
vendored
Normal file
73
server/internal/cache/blob/chunked.go
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
package blob
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
)
|
||||
|
||||
// Chunk represents a range of bytes in a blob.
|
||||
type Chunk struct {
|
||||
Start int64
|
||||
End int64
|
||||
}
|
||||
|
||||
// Size returns end minus start plus one.
|
||||
func (c Chunk) Size() int64 {
|
||||
return c.End - c.Start + 1
|
||||
}
|
||||
|
||||
// Chunker writes to a blob in chunks.
|
||||
// Its zero value is invalid. Use [DiskCache.Chunked] to create a new Chunker.
|
||||
type Chunker struct {
|
||||
digest Digest
|
||||
size int64
|
||||
f *os.File // nil means pre-validated
|
||||
}
|
||||
|
||||
// Chunked returns a new Chunker, ready for use storing a blob of the given
|
||||
// size in chunks.
|
||||
//
|
||||
// Use [Chunker.Put] to write data to the blob at specific offsets.
|
||||
func (c *DiskCache) Chunked(d Digest, size int64) (*Chunker, error) {
|
||||
name := c.GetFile(d)
|
||||
info, err := os.Stat(name)
|
||||
if err == nil && info.Size() == size {
|
||||
return &Chunker{}, nil
|
||||
}
|
||||
f, err := os.OpenFile(name, os.O_CREATE|os.O_WRONLY, 0o666)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Chunker{digest: d, size: size, f: f}, nil
|
||||
}
|
||||
|
||||
// Put copies chunk.Size() bytes from r to the blob at the given offset,
|
||||
// merging the data with the existing blob. It returns an error if any. As a
|
||||
// special case, if r has less than chunk.Size() bytes, Put returns
|
||||
// io.ErrUnexpectedEOF.
|
||||
func (c *Chunker) Put(chunk Chunk, d Digest, r io.Reader) error {
|
||||
if c.f == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
cw := &checkWriter{
|
||||
d: d,
|
||||
size: chunk.Size(),
|
||||
h: sha256.New(),
|
||||
f: c.f,
|
||||
w: io.NewOffsetWriter(c.f, chunk.Start),
|
||||
}
|
||||
|
||||
_, err := io.CopyN(cw, r, chunk.Size())
|
||||
if err != nil && errors.Is(err, io.EOF) {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Close closes the underlying file.
|
||||
func (c *Chunker) Close() error {
|
||||
return c.f.Close()
|
||||
}
|
||||
99
server/internal/cache/blob/digest.go
vendored
Normal file
99
server/internal/cache/blob/digest.go
vendored
Normal file
@@ -0,0 +1,99 @@
|
||||
package blob
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var ErrInvalidDigest = errors.New("invalid digest")
|
||||
|
||||
// Digest is a blob identifier that is the SHA-256 hash of a blob's content.
|
||||
//
|
||||
// It is comparable and can be used as a map key.
|
||||
type Digest struct {
|
||||
sum [32]byte
|
||||
}
|
||||
|
||||
// ParseDigest parses a digest from a string. If the string is not a valid
|
||||
// digest, a call to the returned digest's IsValid method will return false.
|
||||
//
|
||||
// The input string may be in one of two forms:
|
||||
//
|
||||
// - ("sha256-<hex>"), where <hex> is a 64-character hexadecimal string.
|
||||
// - ("sha256:<hex>"), where <hex> is a 64-character hexadecimal string.
|
||||
//
|
||||
// The [Digest.String] method will return the canonical form of the
|
||||
// digest, "sha256:<hex>".
|
||||
func ParseDigest[S ~[]byte | ~string](v S) (Digest, error) {
|
||||
s := string(v)
|
||||
i := strings.IndexAny(s, ":-")
|
||||
var zero Digest
|
||||
if i < 0 {
|
||||
return zero, ErrInvalidDigest
|
||||
}
|
||||
|
||||
prefix, sum := s[:i], s[i+1:]
|
||||
if prefix != "sha256" || len(sum) != 64 {
|
||||
return zero, ErrInvalidDigest
|
||||
}
|
||||
|
||||
var d Digest
|
||||
_, err := hex.Decode(d.sum[:], []byte(sum))
|
||||
if err != nil {
|
||||
return zero, ErrInvalidDigest
|
||||
}
|
||||
return d, nil
|
||||
}
|
||||
|
||||
func DigestFromBytes[S ~[]byte | ~string](v S) Digest {
|
||||
return Digest{sha256.Sum256([]byte(v))}
|
||||
}
|
||||
|
||||
// String returns the string representation of the digest in the conventional
|
||||
// form "sha256:<hex>".
|
||||
func (d Digest) String() string {
|
||||
return fmt.Sprintf("sha256:%x", d.sum[:])
|
||||
}
|
||||
|
||||
func (d Digest) Short() string {
|
||||
return fmt.Sprintf("%x", d.sum[:4])
|
||||
}
|
||||
|
||||
func (d Digest) Sum() [32]byte {
|
||||
return d.sum
|
||||
}
|
||||
|
||||
func (d Digest) Compare(other Digest) int {
|
||||
return slices.Compare(d.sum[:], other.sum[:])
|
||||
}
|
||||
|
||||
// IsValid returns true if the digest is valid, i.e. if it is the SHA-256 hash
|
||||
// of some content.
|
||||
func (d Digest) IsValid() bool {
|
||||
return d != (Digest{})
|
||||
}
|
||||
|
||||
// MarshalText implements the encoding.TextMarshaler interface. It returns an
|
||||
// error if [Digest.IsValid] returns false.
|
||||
func (d Digest) MarshalText() ([]byte, error) {
|
||||
return []byte(d.String()), nil
|
||||
}
|
||||
|
||||
// UnmarshalText implements the encoding.TextUnmarshaler interface, and only
|
||||
// works for a zero digest. If [Digest.IsValid] returns true, it returns an
|
||||
// error.
|
||||
func (d *Digest) UnmarshalText(text []byte) error {
|
||||
if *d != (Digest{}) {
|
||||
return errors.New("digest: illegal UnmarshalText on valid digest")
|
||||
}
|
||||
v, err := ParseDigest(string(text))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*d = v
|
||||
return nil
|
||||
}
|
||||
63
server/internal/cache/blob/digest_test.go
vendored
Normal file
63
server/internal/cache/blob/digest_test.go
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
package blob
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseDigest(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
valid bool
|
||||
}{
|
||||
{"sha256-0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", true},
|
||||
{"sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", true},
|
||||
|
||||
// too short
|
||||
{"sha256-0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcde", false},
|
||||
{"sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcde", false},
|
||||
|
||||
// too long
|
||||
{"sha256-0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0", false},
|
||||
{"sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0", false},
|
||||
|
||||
// invalid prefix
|
||||
{"sha255-0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", false},
|
||||
{"sha255:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", false},
|
||||
{"sha256!0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", false},
|
||||
|
||||
// invalid hex
|
||||
{"sha256-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", false},
|
||||
{"sha256:XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", false},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
got, err := ParseDigest(tt.in)
|
||||
if tt.valid && err != nil {
|
||||
t.Errorf("ParseDigest(%q) = %v, %v; want valid", tt.in, got, err)
|
||||
}
|
||||
want := "sha256:" + tt.in[7:]
|
||||
if tt.valid && got.String() != want {
|
||||
t.Errorf("ParseDigest(%q).String() = %q, want %q", tt.in, got.String(), want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDigestMarshalText(t *testing.T) {
|
||||
const s = `"sha256-0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"`
|
||||
var d Digest
|
||||
if err := json.Unmarshal([]byte(s), &d); err != nil {
|
||||
t.Errorf("json.Unmarshal: %v", err)
|
||||
}
|
||||
out, err := json.Marshal(d)
|
||||
if err != nil {
|
||||
t.Errorf("json.Marshal: %v", err)
|
||||
}
|
||||
want := `"sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"`
|
||||
if string(out) != want {
|
||||
t.Errorf("json.Marshal: got %s, want %s", out, want)
|
||||
}
|
||||
if err := json.Unmarshal([]byte(`"invalid"`), &Digest{}); err == nil {
|
||||
t.Errorf("json.Unmarshal: expected error")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user