diff --git a/internal/forks/rsc.io/gitfs/fs.go b/internal/forks/rsc.io/gitfs/fs.go index 46eabd31..0a5ea398 100644 --- a/internal/forks/rsc.io/gitfs/fs.go +++ b/internal/forks/rsc.io/gitfs/fs.go @@ -39,21 +39,21 @@ func ParseHash(text string) (Hash, error) { type ObjType int const ( - objNone ObjType = 0 - objCommit ObjType = 1 - objTree ObjType = 2 - objBlob ObjType = 3 - objTag ObjType = 4 + ObjNone ObjType = 0 + ObjCommit ObjType = 1 + ObjTree ObjType = 2 + ObjBlob ObjType = 3 + ObjTag ObjType = 4 // 5 undefined - objOfsDelta ObjType = 6 - objRefDelta ObjType = 7 + ObjOfsDelta ObjType = 6 + ObjRefDelta ObjType = 7 ) var objTypes = [...]string{ - objCommit: "commit", - objTree: "tree", - objBlob: "blob", - objTag: "tag", + ObjCommit: "commit", + ObjTree: "tree", + ObjBlob: "blob", + ObjTag: "tag", } func (t ObjType) String() string { @@ -70,10 +70,10 @@ type DirEntry struct { Hash Hash } -// parseDirEntry parses the next directory entry from data, +// ParseDirEntry parses the next directory entry from data, // returning the entry and the number of bytes it occupied. -// If data is malformed, parseDirEntry returns dirEntry{}, 0. -func parseDirEntry(data []byte) (DirEntry, int) { +// If data is malformed, ParseDirEntry returns DirEntry{}, 0. +func ParseDirEntry(data []byte) (DirEntry, int) { // Unclear where or if this format is documented by Git. // Each directory entry is an octal mode, then a space, // then a file name, then a NUL byte, then a 20-byte binary hash. @@ -111,7 +111,7 @@ func treeLookup(data []byte, name string) (mode int, h Hash, ok bool) { // but the directory entry data is not self-synchronizing, // so it's not possible to be clever and use a binary search here. for len(data) > 0 { - e, size := parseDirEntry(data) + e, size := ParseDirEntry(data) if size == 0 { break } @@ -153,8 +153,8 @@ func commitKeyValue(data []byte, key string) ([]byte, bool) { return nil, false } -// A store is a collection of Git objects, indexed for lookup by hash. -type store struct { +// A Store is a collection of Git objects, indexed for lookup by hash. +type Store struct { repo *Repo sha1 hashpkg.Hash // reused hash state index map[Hash]stored // lookup index @@ -164,13 +164,13 @@ type store struct { // A stored describes a single stored object. type stored struct { typ ObjType // object type - off int // object data is store.data[off:off+len] + off int // object data is Store.data[off:off+len] len int } -// add adds an object with the given type and content to s, returning its Hash. -// If the object is already stored in s, add succeeds but doesn't store a second copy. -func (s *store) add(typ ObjType, data []byte) (Hash, []byte) { +// Add adds an object with the given type and content to s, returning its Hash. +// If the object is already stored in s, Add succeeds but doesn't store a second copy. +func (s *Store) Add(typ ObjType, data []byte) (Hash, []byte) { if s.sha1 == nil { s.sha1 = sha1.New() } @@ -196,7 +196,7 @@ func (s *store) add(typ ObjType, data []byte) (Hash, []byte) { // Object returns the type and data for the Object with hash h. // If there is no Object with hash h, Object returns 0, nil. -func (s *store) Object(h Hash) (typ ObjType, data []byte) { +func (s *Store) Object(h Hash) (typ ObjType, data []byte) { d, ok := s.index[h] if !ok { return 0, nil @@ -205,16 +205,16 @@ func (s *store) Object(h Hash) (typ ObjType, data []byte) { } // Commit returns a treeFS for the file system tree associated with the given Commit hash. -func (s *store) Commit(c Hash) (*treeFS, []byte, error) { +func (s *Store) Commit(c Hash) (*treeFS, []byte, error) { // The commit object data starts with key-value pairs typ, data := s.Object(c) - if typ == objNone { + if typ == ObjNone { return nil, nil, fmt.Errorf("commit %s: no such hash", c) } // fmt.Fprintf(os.Stderr, "typ=%d\n", typ) // fmt.Fprintf(os.Stderr, "%s", data) // os.Stderr.Write([]byte("\n")) - if typ != objCommit { + if typ != ObjCommit { return nil, nil, fmt.Errorf("commit %s: unexpected type %s", c, typ) } treeHash, ok := commitKeyValue(data, "tree") @@ -230,7 +230,7 @@ func (s *store) Commit(c Hash) (*treeFS, []byte, error) { // A treeFS is an fs.FS serving a Git file system tree rooted at a given tree object hash. type treeFS struct { - s *store + s *Store tree Hash // root tree commit Hash } @@ -265,7 +265,7 @@ func (t *treeFS) Open(name string) (f fs.File, err error) { if i == len(name) || name[i] == '/' { // Look up name in current tree object h. typ, data := t.s.Object(h) - if typ != objTree { + if typ != ObjTree { return nil, &fs.PathError{Path: name, Op: "open", Err: fs.ErrNotExist} } _, th, ok := treeLookup(data, name[start:i]) @@ -283,7 +283,7 @@ func (t *treeFS) Open(name string) (f fs.File, err error) { // The hash h is the hash for name. Load its object. typ, data := t.s.Object(h) info := fileInfo{name, name[start:], 0, 0, nil} - if typ == objBlob { + if typ == ObjBlob { // Regular file. info.mode = 0444 info.size = int64(len(data)) @@ -294,7 +294,7 @@ func (t *treeFS) Open(name string) (f fs.File, err error) { } return &blobFile{info, bytes.NewReader(data)}, nil } - if typ == objTree { + if typ == ObjTree { // Directory. info.mode = fs.ModeDir | 0555 info.sys = &DirEntry{ @@ -341,7 +341,7 @@ func (f *blobFile) Stat() (fs.FileInfo, error) { return &f.info, nil } // A dirFile implements fs.File for a directory. type dirFile struct { - s *store + s *Store info fileInfo data []byte off int @@ -369,18 +369,18 @@ func (f *dirFile) ReadDir(n int) (list []fs.DirEntry, err error) { }() for (n <= 0 || len(list) < n) && f.off < len(f.data) { - e, size := parseDirEntry(f.data[f.off:]) + e, size := ParseDirEntry(f.data[f.off:]) if size == 0 { break } f.off += size typ, data := f.s.Object(e.Hash) mode := fs.FileMode(0444) - if typ == objTree { + if typ == ObjTree { mode = fs.ModeDir | 0555 } infoSize := int64(0) - if typ == objBlob { + if typ == ObjBlob { infoSize = int64(len(data)) } name := string(e.Name) diff --git a/internal/forks/rsc.io/gitfs/git.go b/internal/forks/rsc.io/gitfs/git.go index 268948da..b380d7a1 100644 --- a/internal/forks/rsc.io/gitfs/git.go +++ b/internal/forks/rsc.io/gitfs/git.go @@ -196,6 +196,101 @@ func (r *Repo) CloneHash(ctx context.Context, h Hash) (fs.FS, []byte, error) { return tfs, data, nil } +// FetchPack fetches a full (non-shallow) packfile from the remote server, +// requesting all refs. It returns the raw packfile bytes. +func (r *Repo) FetchPack(ctx context.Context) ([]byte, error) { + opts, ok := r.caps["fetch"] + if !ok { + return nil, fmt.Errorf("fetch: server does not support fetch") + } + _ = opts + + refs, err := r.Refs(ctx) + if err != nil { + return nil, fmt.Errorf("fetchpack: refs: %v", err) + } + + // Deduplicate hashes. + seen := map[Hash]bool{} + var wants []Hash + for _, ref := range refs { + if !seen[ref.Hash] { + seen[ref.Hash] = true + wants = append(wants, ref.Hash) + } + } + if len(wants) == 0 { + return nil, fmt.Errorf("fetchpack: no refs found") + } + + var buf bytes.Buffer + pw := newPktLineWriter(&buf) + pw.WriteString("command=fetch") + pw.Delim() + for _, h := range wants { + pw.WriteString("want " + h.String()) + } + pw.WriteString("done") + pw.Close() + + req, _ := http.NewRequestWithContext(ctx, "POST", r.url+"/git-upload-pack", &buf) + req.Header.Set("Content-Type", "application/x-git-upload-pack-request") + req.Header.Set("Accept", "application/x-git-upload-pack-result") + req.Header.Set("Git-Protocol", "version=2") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, fmt.Errorf("fetchpack: %v", err) + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("fetchpack: %v\n%s", resp.Status, body) + } + if ct := resp.Header.Get("Content-Type"); ct != "application/x-git-upload-pack-result" { + return nil, fmt.Errorf("fetchpack: invalid response Content-Type: %v", ct) + } + + var data []byte + pr := newPktLineReader(resp.Body) + sawPackfile := false + for { + line, err := pr.Next() + if err != nil { + if err == io.EOF { + break + } + return nil, fmt.Errorf("fetchpack: parsing response: %v", err) + } + if line == nil { + continue + } + if !sawPackfile { + if strings.TrimSuffix(string(line), "\n") == "packfile" { + sawPackfile = true + } + continue + } + if len(line) == 0 || line[0] == 0 || line[0] > 3 { + continue + } + switch line[0] { + case 1: + data = append(data, line[1:]...) + case 2: + // progress + case 3: + return nil, fmt.Errorf("fetchpack: server error: %s", line[1:]) + } + } + + if !bytes.HasPrefix(data, []byte("PACK")) { + return nil, fmt.Errorf("fetchpack: malformed response: not packfile") + } + + return data, nil +} + // fetch returns the fs.FS for a given hash. func (r *Repo) fetch(ctx context.Context, h Hash) (fs.FS, []byte, error) { // Fetch a shallow packfile from the remote server. @@ -285,8 +380,8 @@ func (r *Repo) fetch(ctx context.Context, h Hash) (fs.FS, []byte, error) { } // Unpack pack file and return fs.FS for the commit we downloaded. - var s store - if err := unpack(&s, data); err != nil { + var s Store + if err := Unpack(&s, data); err != nil { return nil, nil, fmt.Errorf("fetch: %v", err) } s.repo = r diff --git a/internal/forks/rsc.io/gitfs/git_test.go b/internal/forks/rsc.io/gitfs/git_test.go index bfabcf6c..e2a66a33 100644 --- a/internal/forks/rsc.io/gitfs/git_test.go +++ b/internal/forks/rsc.io/gitfs/git_test.go @@ -54,8 +54,8 @@ func TestPack(t *testing.T) { if err != nil { t.Fatal(err) } - var s store - err = unpack(&s, data) + var s Store + err = Unpack(&s, data) if err != nil { t.Fatal(err) } diff --git a/internal/forks/rsc.io/gitfs/pack.go b/internal/forks/rsc.io/gitfs/pack.go index f7e79c5e..e4b5d000 100644 --- a/internal/forks/rsc.io/gitfs/pack.go +++ b/internal/forks/rsc.io/gitfs/pack.go @@ -13,11 +13,11 @@ import ( "io" ) -// unpack parses data, which is a Git pack-formatted archive, -// writing every object it contains to the store s. +// Unpack parses data, which is a Git pack-formatted archive, +// writing every object it contains to the Store s. // // See https://git-scm.com/docs/pack-format for format documentation. -func unpack(s *store, data []byte) error { +func Unpack(s *Store, data []byte) error { // If the store is empty, pre-allocate the length of data. // This should be about the right order of magnitude for the eventual data, // avoiding many growing steps during append. @@ -50,7 +50,7 @@ func unpack(s *store, data []byte) error { objs := data[12 : len(data)-20] off := 0 for i := 0; i < int(nobj); i++ { - _, _, _, encSize, err := unpackObject(s, objs, off) + _, _, _, encSize, err := UnpackObject(s, objs, off) if err != nil { return fmt.Errorf("unpack: malformed git pack: %v", err) } @@ -62,10 +62,10 @@ func unpack(s *store, data []byte) error { return nil } -// unpackObject unpacks the object at objs[off:] and writes it to the store s. +// UnpackObject unpacks the object at objs[off:] and writes it to the Store s. // It returns the type, hash, and content of the object, as well as the encoded size, // meaning the number of bytes at the start of objs[off:] that this record occupies. -func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content []byte, encSize int, err error) { +func UnpackObject(s *Store, objs []byte, off int) (typ ObjType, h Hash, content []byte, encSize int, err error) { fail := func(err error) (ObjType, Hash, []byte, int, error) { return 0, Hash{}, nil, 0, err } @@ -92,7 +92,7 @@ func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content var deltaTyp ObjType var deltaBase []byte switch typ { - case objRefDelta: + case ObjRefDelta: if len(objs)-(off+size) < 20 { return fail(fmt.Errorf("invalid object: bad delta ref")) } @@ -105,7 +105,7 @@ func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content return fail(fmt.Errorf("invalid object: unknown delta ref %v", h)) } - case objOfsDelta: + case ObjOfsDelta: i := off + size if len(objs)-i < 20 { return fail(fmt.Errorf("invalid object: too short")) @@ -130,7 +130,7 @@ func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content return fail(fmt.Errorf("invalid object: bad delta offset")) } var err error - deltaTyp, _, deltaBase, _, err = unpackObject(s, objs, off-int(d)) + deltaTyp, _, deltaBase, _, err = UnpackObject(s, objs, off-int(d)) if err != nil { return fail(fmt.Errorf("invalid object: bad delta offset")) } @@ -156,9 +156,9 @@ func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content switch typ { default: return fail(fmt.Errorf("invalid object: unknown object type")) - case objCommit, objTree, objBlob, objTag: + case ObjCommit, ObjTree, ObjBlob, ObjTag: // ok - case objRefDelta, objOfsDelta: + case ObjRefDelta, ObjOfsDelta: // Actual object type is the type of the base object. typ = deltaTyp @@ -179,7 +179,7 @@ func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content data = targ } - h, data = s.add(typ, data) + h, data = s.Add(typ, data) return typ, h, data, encSize, nil } diff --git a/internal/git/cache.go b/internal/git/cache.go new file mode 100644 index 00000000..19fc008c --- /dev/null +++ b/internal/git/cache.go @@ -0,0 +1,315 @@ +package git + +import ( + "compress/gzip" + "context" + "crypto/sha256" + "encoding/json" + "fmt" + "io" + "log" + "os" + "path/filepath" + "strings" + "sync" + "time" + + "cloud.google.com/go/storage" +) + +type packCache interface { + GetIndex(ctx context.Context, key string) (*PackIndex, error) + PutIndex(ctx context.Context, key string, idx *PackIndex) error + GetPack(ctx context.Context, key string) ([]byte, error) + PutPack(ctx context.Context, key string, data []byte) error + RangeReader(ctx context.Context, key string, offset, length int64) (io.ReadCloser, error) +} + +func cacheKey(repoURL string) string { + h := sha256.Sum256([]byte(repoURL)) + return fmt.Sprintf("%x", h[:16]) +} + +// gcsPackCache stores packs and indexes in GCS. +type gcsPackCache struct { + bucket *storage.BucketHandle +} + +func (g *gcsPackCache) packPath(key string) string { + return fmt.Sprintf("pack/%s/pack.bin", key) +} + +func (g *gcsPackCache) indexPath(key string) string { + return fmt.Sprintf("pack/%s/index.json.gz", key) +} + +func (g *gcsPackCache) GetIndex(ctx context.Context, key string) (*PackIndex, error) { + rc, err := g.bucket.Object(g.indexPath(key)).NewReader(ctx) + if err != nil { + return nil, err + } + defer rc.Close() + zr, err := gzip.NewReader(rc) + if err != nil { + return nil, err + } + defer zr.Close() + idx := &PackIndex{} + if err := json.NewDecoder(zr).Decode(idx); err != nil { + return nil, err + } + return idx, nil +} + +func (g *gcsPackCache) PutIndex(ctx context.Context, key string, idx *PackIndex) error { + w := g.bucket.Object(g.indexPath(key)).NewWriter(ctx) + zw, err := gzip.NewWriterLevel(w, gzip.BestSpeed) + if err != nil { + return err + } + if err := json.NewEncoder(zw).Encode(idx); err != nil { + zw.Close() + w.Close() + return err + } + if err := zw.Close(); err != nil { + w.Close() + return err + } + return w.Close() +} + +func (g *gcsPackCache) GetPack(ctx context.Context, key string) ([]byte, error) { + rc, err := g.bucket.Object(g.packPath(key)).NewReader(ctx) + if err != nil { + return nil, err + } + defer rc.Close() + return io.ReadAll(rc) +} + +func (g *gcsPackCache) PutPack(ctx context.Context, key string, data []byte) error { + w := g.bucket.Object(g.packPath(key)).NewWriter(ctx) + if _, err := w.Write(data); err != nil { + w.Close() + return err + } + return w.Close() +} + +func (g *gcsPackCache) RangeReader(ctx context.Context, key string, offset, length int64) (io.ReadCloser, error) { + return g.bucket.Object(g.packPath(key)).NewRangeReader(ctx, offset, length) +} + +// dirPackCache stores packs and indexes in a local directory. +type dirPackCache struct { + dir string +} + +func (d *dirPackCache) path(key, name string) string { + return filepath.Join(d.dir, "pack", key, name) +} + +func (d *dirPackCache) ensureDir(key string) error { + return os.MkdirAll(filepath.Join(d.dir, "pack", key), 0755) +} + +func (d *dirPackCache) GetIndex(ctx context.Context, key string) (*PackIndex, error) { + f, err := os.Open(d.path(key, "index.json.gz")) + if err != nil { + return nil, err + } + defer f.Close() + zr, err := gzip.NewReader(f) + if err != nil { + return nil, err + } + defer zr.Close() + idx := &PackIndex{} + if err := json.NewDecoder(zr).Decode(idx); err != nil { + return nil, err + } + return idx, nil +} + +func (d *dirPackCache) PutIndex(ctx context.Context, key string, idx *PackIndex) error { + if err := d.ensureDir(key); err != nil { + return err + } + f, err := os.Create(d.path(key, "index.json.gz")) + if err != nil { + return err + } + defer f.Close() + zw, err := gzip.NewWriterLevel(f, gzip.BestSpeed) + if err != nil { + return err + } + if err := json.NewEncoder(zw).Encode(idx); err != nil { + zw.Close() + return err + } + return zw.Close() +} + +func (d *dirPackCache) GetPack(ctx context.Context, key string) ([]byte, error) { + return os.ReadFile(d.path(key, "pack.bin")) +} + +func (d *dirPackCache) PutPack(ctx context.Context, key string, data []byte) error { + if err := d.ensureDir(key); err != nil { + return err + } + return os.WriteFile(d.path(key, "pack.bin"), data, 0644) +} + +func (d *dirPackCache) RangeReader(ctx context.Context, key string, offset, length int64) (io.ReadCloser, error) { + f, err := os.Open(d.path(key, "pack.bin")) + if err != nil { + return nil, err + } + return io.NopCloser(io.NewSectionReader(f, offset, length)), nil +} + +// memPackIndex is an in-memory LRU of parsed PackIndex structs. +type memPackIndex struct { + mu sync.Mutex + cap int + entries map[string]*memIndexEntry +} + +type memIndexEntry struct { + idx *PackIndex + access time.Time +} + +func (m *memPackIndex) Get(key string) *PackIndex { + m.mu.Lock() + defer m.mu.Unlock() + if e, ok := m.entries[key]; ok { + e.access = time.Now() + return e.idx + } + return nil +} + +func (m *memPackIndex) Put(key string, idx *PackIndex) { + m.mu.Lock() + defer m.mu.Unlock() + if m.entries == nil { + m.entries = make(map[string]*memIndexEntry) + } + if len(m.entries) >= m.cap { + var oldest string + var oldestTime time.Time + for k, e := range m.entries { + if oldest == "" || e.access.Before(oldestTime) { + oldest = k + oldestTime = e.access + } + } + delete(m.entries, oldest) + } + m.entries[key] = &memIndexEntry{idx: idx, access: time.Now()} +} + +func buildPackCache() packCache { + if cd := os.Getenv("CACHE_DIR"); cd != "" { + log.Printf("pack cache: dir=%s", cd) + return &dirPackCache{dir: cd} + } + if cb := os.Getenv("CACHE_BUCKET"); cb != "" { + log.Printf("pack cache: bucket=%s", cb) + client, err := storage.NewClient(context.Background()) + if err != nil { + log.Printf("pack cache: gcs error: %v", err) + return &noopPackCache{} + } + bkt := client.Bucket(strings.TrimPrefix(cb, "gs://")) + return &gcsPackCache{bucket: bkt} + } + return &noopPackCache{} +} + +// noopPackCache is used when no cache backend is configured. +type noopPackCache struct{} + +func (n *noopPackCache) GetIndex(ctx context.Context, key string) (*PackIndex, error) { + return nil, fmt.Errorf("no cache") +} +func (n *noopPackCache) PutIndex(ctx context.Context, key string, idx *PackIndex) error { + return nil +} +func (n *noopPackCache) GetPack(ctx context.Context, key string) ([]byte, error) { + return nil, fmt.Errorf("no cache") +} +func (n *noopPackCache) PutPack(ctx context.Context, key string, data []byte) error { + return nil +} +func (n *noopPackCache) RangeReader(ctx context.Context, key string, offset, length int64) (io.ReadCloser, error) { + return nil, fmt.Errorf("no cache") +} + +// tieredPackCache wraps a persistent cache with an in-memory LRU for indexes. +type tieredPackCache struct { + mem *memPackIndex + back packCache +} + +func (t *tieredPackCache) GetIndex(ctx context.Context, key string) (*PackIndex, error) { + if idx := t.mem.Get(key); idx != nil { + return idx, nil + } + idx, err := t.back.GetIndex(ctx, key) + if err != nil { + return nil, err + } + t.mem.Put(key, idx) + return idx, nil +} + +func (t *tieredPackCache) PutIndex(ctx context.Context, key string, idx *PackIndex) error { + t.mem.Put(key, idx) + return t.back.PutIndex(ctx, key, idx) +} + +func (t *tieredPackCache) GetPack(ctx context.Context, key string) ([]byte, error) { + return t.back.GetPack(ctx, key) +} + +func (t *tieredPackCache) PutPack(ctx context.Context, key string, data []byte) error { + return t.back.PutPack(ctx, key, data) +} + +func (t *tieredPackCache) RangeReader(ctx context.Context, key string, offset, length int64) (io.ReadCloser, error) { + return t.back.RangeReader(ctx, key, offset, length) +} + +func newPackCache() packCache { + return &tieredPackCache{ + mem: &memPackIndex{cap: 50}, + back: buildPackCache(), + } +} + +// memPackData holds raw packfile bytes in memory for object detail views. +type memPackData struct { + mu sync.Mutex + data map[string][]byte +} + +func (m *memPackData) Get(key string) []byte { + m.mu.Lock() + defer m.mu.Unlock() + return m.data[key] +} + +func (m *memPackData) Put(key string, data []byte) { + m.mu.Lock() + defer m.mu.Unlock() + if m.data == nil { + m.data = make(map[string][]byte) + } + m.data[key] = data +} + diff --git a/internal/git/git.go b/internal/git/git.go index 4fde8b83..ddca43ed 100644 --- a/internal/git/git.go +++ b/internal/git/git.go @@ -3,6 +3,7 @@ package git import ( "bufio" "bytes" + "context" "fmt" "html" "io" @@ -36,6 +37,9 @@ type handler struct { repos map[string]*gitfs.Repo commits map[string][]byte fsyss map[string]fs.FS + + packCache packCache + packData memPackData } type Option func(h *handler) @@ -48,10 +52,11 @@ func WithUserAgent(ua string) Option { func New(args []string, opts ...Option) http.Handler { h := handler{ - args: args, - repos: map[string]*gitfs.Repo{}, - fsyss: map[string]fs.FS{}, - commits: map[string][]byte{}, + args: args, + repos: map[string]*gitfs.Repo{}, + fsyss: map[string]fs.FS{}, + commits: map[string][]byte{}, + packCache: newPackCache(), } for _, opt := range opts { @@ -63,6 +68,7 @@ func New(args []string, opts ...Option) http.Handler { mux.HandleFunc("/", h.errHandler(h.renderResponse)) mux.HandleFunc("/http/", h.errHandler(h.renderFS)) mux.HandleFunc("/https/", h.errHandler(h.renderFS)) + mux.HandleFunc("/pack/", h.errHandler(h.renderPackObject)) h.mux = gzhttp.GzipHandler(mux) @@ -108,6 +114,14 @@ func (h *handler) errHandler(hfe HandleFuncE) http.HandlerFunc { func (h *handler) renderResponse(w http.ResponseWriter, r *http.Request) error { qs := r.URL.Query() + if q := qs.Get("pack"); q != "" { + u, err := url.PathUnescape(q) + if err != nil { + return err + } + return h.renderPackOverview(w, r, u) + } + if q := qs.Get("url"); q != "" { u, err := url.PathUnescape(q) if err != nil { @@ -606,3 +620,512 @@ func (d *dumbEscaper) Write(p []byte) (n int, err error) { } return len(p), d.buf.Flush() } + +func (h *handler) getOrFetchPack(ctx context.Context, repoURL string) (*PackIndex, string, error) { + key := cacheKey(repoURL) + + // Try cache first. + idx, err := h.packCache.GetIndex(ctx, key) + if err == nil { + return idx, key, nil + } + + // Cache miss: fetch the packfile. + if !strings.Contains(repoURL, "://") { + repoURL = "https://" + repoURL + } + + repo, err := gitfs.NewRepo(ctx, repoURL) + if err != nil { + return nil, "", fmt.Errorf("NewRepo: %w", err) + } + + data, err := repo.FetchPack(ctx) + if err != nil { + return nil, "", fmt.Errorf("FetchPack: %w", err) + } + + idx, err = BuildPackIndex(data) + if err != nil { + return nil, "", fmt.Errorf("BuildPackIndex: %w", err) + } + + // Store in cache (best effort). + if putErr := h.packCache.PutPack(ctx, key, data); putErr != nil { + log.Printf("pack cache put pack: %v", putErr) + } + if putErr := h.packCache.PutIndex(ctx, key, idx); putErr != nil { + log.Printf("pack cache put index: %v", putErr) + } + + // Keep packfile data in memory for object detail views. + h.packData.Put(key, data) + + return idx, key, nil +} + +func (h *handler) renderPackOverview(w http.ResponseWriter, r *http.Request, repoURL string) error { + ctx := r.Context() + + idx, key, err := h.getOrFetchPack(ctx, repoURL) + if err != nil { + return err + } + + repo := strings.TrimPrefix(repoURL, "https://") + repo = strings.TrimPrefix(repo, "http://") + + if err := headerTmpl.Execute(w, TitleData{"Pack: " + repo}); err != nil { + return err + } + hd := HeaderData{ + Repo: repo, + RepoLink: repoURL, + JQ: fmt.Sprintf("git verify-pack -v .git/objects/pack/pack-%s.idx", idx.Checksum), + } + if err := bodyTmpl.Execute(w, hd); err != nil { + return err + } + + // Compute stats. + nonDelta := 0 + chainLengths := map[int]int{} + for _, obj := range idx.Objects { + if obj.Depth == 0 { + nonDelta++ + } else { + chainLengths[obj.Depth]++ + } + } + + // Filter by type if requested. + filterType := r.URL.Query().Get("type") + + fmt.Fprintf(w, "
\n")
+ for _, obj := range idx.Objects {
+ if filterType != "" {
+ if strings.HasPrefix(filterType, "depth-") {
+ var d int
+ fmt.Sscanf(filterType, "depth-%d", &d)
+ if obj.Depth != d {
+ continue
+ }
+ } else if obj.ResolvedType != filterType && obj.Type != filterType {
+ continue
+ }
+ }
+ href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), obj.Hash, key)
+ hashLink := fmt.Sprintf("%s", href, obj.Hash)
+
+ if obj.Depth > 0 {
+ baseHref := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), obj.BaseHash, key)
+ baseLink := fmt.Sprintf("%s", baseHref, obj.BaseHash)
+ fmt.Fprintf(w, "%s %-6s %d %d %d %d %s\n", hashLink, obj.ResolvedType, obj.Size, obj.EncodedSize, obj.Offset, obj.Depth, baseLink)
+ } else {
+ fmt.Fprintf(w, "%s %-6s %d %d %d\n", hashLink, obj.ResolvedType, obj.Size, obj.EncodedSize, obj.Offset)
+ }
+ }
+
+ // Summary.
+ fmt.Fprintf(w, "non delta: %d objects\n", nonDelta)
+ maxDepth := 0
+ for d := range chainLengths {
+ if d > maxDepth {
+ maxDepth = d
+ }
+ }
+ for d := 1; d <= maxDepth; d++ {
+ if c, ok := chainLengths[d]; ok {
+ href := fmt.Sprintf("/?pack=%s&type=depth-%d", url.QueryEscape(repoURL), d)
+ fmt.Fprintf(w, "chain length = %d: %d objects\n", d, href, c)
+ }
+ }
+
+ fmt.Fprintf(w, "\n")
+ fmt.Fprintf(w, footer)
+ return nil
+}
+
+func (h *handler) renderPackObject(w http.ResponseWriter, r *http.Request) error {
+ ctx := r.Context()
+
+ // Path: /pack/\n")
+ if obj.Type != objType {
+ fmt.Fprintf(w, "type: %s (resolves to %s)\n", obj.Type, objType)
+ } else {
+ fmt.Fprintf(w, "type: %s\n", objType)
+ }
+ fmt.Fprintf(w, "size: %s (%d bytes)\n", formatBytes(int64(obj.Size)), obj.Size)
+ fmt.Fprintf(w, "offset: %d\n", obj.Offset)
+ fmt.Fprintf(w, "encoded: %d bytes\n", obj.EncodedSize)
+ if obj.BaseHash != "" {
+ baseHref := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), obj.BaseHash, key)
+ fmt.Fprintf(w, "base: %s (%s", baseHref, obj.BaseHash, obj.Type)
+ if obj.Type == "ofs-delta" {
+ fmt.Fprintf(w, ", offset %d", obj.BaseOffset)
+ }
+ fmt.Fprintf(w, ")\n")
+ fmt.Fprintf(w, "depth: %d\n", obj.Depth)
+ }
+ fmt.Fprintf(w, "\n")
+
+ // For delta objects, show the raw delta instructions.
+ if obj.Type == "ref-delta" || obj.Type == "ofs-delta" {
+ rawDelta, err := RawDelta(data, obj.Offset)
+ if err == nil {
+ deltaInfo, err := ParseDelta(rawDelta)
+ if err == nil {
+ h.renderDeltaOps(w, deltaInfo, repo, key, idx)
+ fmt.Fprintf(w, "\n")
+ }
+ }
+ fmt.Fprintf(w, "--- resolved content (%s) ---\n\n", objType)
+ }
+
+ // Render resolved content based on type.
+ switch objType {
+ case "commit":
+ h.renderPackCommit(w, content, repo, key)
+ case "tree":
+ h.renderPackTree(w, content, repo, key, idx)
+ case "blob":
+ size := min(int64(len(content)), tooBig)
+ esc := &dumbEscaper{buf: bufio.NewWriter(w)}
+ io.CopyN(esc, bytes.NewReader(content), size)
+ if int64(len(content)) > tooBig {
+ fmt.Fprintf(w, "\n... truncated (%s total)", formatBytes(int64(len(content))))
+ }
+ case "tag":
+ h.renderPackTag(w, content, repo, key)
+ default:
+ fmt.Fprintf(w, "(raw %d bytes)\n", len(content))
+ }
+
+ fmt.Fprintf(w, "\n")
+ fmt.Fprintf(w, footer)
+ return nil
+}
+
+func (h *handler) renderPackCommit(w io.Writer, content []byte, repo, key string) {
+ scanner := bufio.NewScanner(bytes.NewReader(content))
+ for scanner.Scan() {
+ line := scanner.Text()
+ hdr, val, ok := strings.Cut(line, " ")
+ if !ok {
+ fmt.Fprintf(w, "%s\n", htmlEscape(line))
+ continue
+ }
+ switch hdr {
+ case "tree", "parent":
+ href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), val, key)
+ fmt.Fprintf(w, "%s %s\n", hdr, href, val)
+ default:
+ fmt.Fprintf(w, "%s\n", htmlEscape(line))
+ }
+ }
+}
+
+func (h *handler) renderPackTree(w io.Writer, content []byte, repo, key string, idx *PackIndex) {
+ // Build hash lookup.
+ hashSet := map[string]bool{}
+ for _, obj := range idx.Objects {
+ hashSet[obj.Hash] = true
+ }
+
+ data := content
+ for len(data) > 0 {
+ e, size := gitfs.ParseDirEntry(data)
+ if size == 0 {
+ break
+ }
+ data = data[size:]
+
+ hashStr := e.Hash.String()
+ typeStr := "blob"
+ if e.Mode == 0o40000 {
+ typeStr = "tree"
+ } else if e.Mode == 0o160000 {
+ typeStr = "commit"
+ }
+
+ name := htmlEscape(string(e.Name))
+ if hashSet[hashStr] {
+ href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), hashStr, key)
+ fmt.Fprintf(w, "%06o %s %s\t%s\n", e.Mode, typeStr, href, hashStr, name)
+ } else {
+ fmt.Fprintf(w, "%06o %s %s\t%s\n", e.Mode, typeStr, hashStr, name)
+ }
+ }
+}
+
+func (h *handler) renderPackTag(w io.Writer, content []byte, repo, key string) {
+ scanner := bufio.NewScanner(bytes.NewReader(content))
+ for scanner.Scan() {
+ line := scanner.Text()
+ hdr, val, ok := strings.Cut(line, " ")
+ if !ok {
+ fmt.Fprintf(w, "%s\n", htmlEscape(line))
+ continue
+ }
+ switch hdr {
+ case "object":
+ href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), val, key)
+ fmt.Fprintf(w, "%s %s\n", hdr, href, val)
+ default:
+ fmt.Fprintf(w, "%s\n", htmlEscape(line))
+ }
+ }
+}
+
+func (h *handler) renderDeltaOps(w io.Writer, info *DeltaInfo, repo, key string, idx *PackIndex) {
+ // Build hash lookup for linking.
+ hashSet := map[string]bool{}
+ for _, obj := range idx.Objects {
+ hashSet[obj.Hash] = true
+ }
+
+ fmt.Fprintf(w, "--- delta instructions ---\n\n")
+ fmt.Fprintf(w, "base size: %s (%d bytes)\n", formatBytes(int64(info.BaseSize)), info.BaseSize)
+ fmt.Fprintf(w, "target size: %s (%d bytes)\n", formatBytes(int64(info.TargetSize)), info.TargetSize)
+ fmt.Fprintf(w, "operations: %d\n\n", len(info.Ops))
+
+ for i, op := range info.Ops {
+ switch op.Kind {
+ case "copy":
+ fmt.Fprintf(w, "%4d copy base[%d:%d] (%d bytes)\n",
+ i, op.Offset, op.Offset+op.Size, op.Size)
+ case "insert":
+ if isBinary(op.Data) {
+ if entries, prefix, suffix := tryParseTreeInsert(op.Data); len(entries) > 0 || len(prefix) > 0 {
+ fmt.Fprintf(w, "%4d insert %d bytes (tree data)\n", i, op.Size)
+ if len(prefix) > 0 {
+ writeHashFragment(w, prefix, hashSet, repo, key)
+ }
+ for _, e := range entries {
+ hashStr := e.Hash.String()
+ typeStr := "blob"
+ if e.Mode == 0o40000 {
+ typeStr = "tree"
+ } else if e.Mode == 0o160000 {
+ typeStr = "commit"
+ }
+ name := htmlEscape(string(e.Name))
+ if hashSet[hashStr] {
+ href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), hashStr, key)
+ fmt.Fprintf(w, " %06o %s %s\t%s\n", e.Mode, typeStr, href, hashStr, name)
+ } else {
+ fmt.Fprintf(w, " %06o %s %s\t%s\n", e.Mode, typeStr, hashStr, name)
+ }
+ }
+ if len(suffix) > 0 {
+ writeHashFragment(w, suffix, hashSet, repo, key)
+ }
+ } else {
+ fmt.Fprintf(w, "%4d insert %d bytes\n", i, op.Size)
+ writeHexDump(w, op.Data)
+ }
+ } else {
+ fmt.Fprintf(w, "%4d insert %d bytes: ", i, op.Size)
+ show := op.Data
+ truncated := false
+ if len(show) > 128 {
+ show = show[:128]
+ truncated = true
+ }
+ esc := &dumbEscaper{buf: bufio.NewWriter(w)}
+ esc.Write(show)
+ if truncated {
+ fmt.Fprintf(w, "...")
+ }
+ fmt.Fprintf(w, "\n")
+ }
+ }
+ }
+}
+
+// tryParseTreeInsert tries to interpret binary insert data as tree entry fragments.
+// It returns any complete tree entries parsed, plus any leading prefix (trailing
+// hash bytes from a previous entry) and trailing suffix that didn't form a complete entry.
+func tryParseTreeInsert(data []byte) (entries []gitfs.DirEntry, prefix, suffix []byte) {
+ // The insert might start mid-entry — the leading bytes could be the tail
+ // of a previous entry's 20-byte hash. Look for the start of a tree entry:
+ // an octal digit followed eventually by ' ', name, '\0', 20 bytes.
+ start := 0
+ for start < len(data) {
+ if data[start] >= '1' && data[start] <= '7' {
+ // Might be the start of a mode. Try parsing.
+ e, size := gitfs.ParseDirEntry(data[start:])
+ if size > 0 {
+ // Found a valid entry start. Everything before it is prefix.
+ if start > 0 {
+ prefix = data[:start]
+ }
+ entries = append(entries, e)
+ pos := start + size
+ // Parse remaining entries.
+ for pos < len(data) {
+ e, size := gitfs.ParseDirEntry(data[pos:])
+ if size == 0 {
+ break
+ }
+ entries = append(entries, e)
+ pos += size
+ }
+ if pos < len(data) {
+ suffix = data[pos:]
+ }
+ return entries, prefix, suffix
+ }
+ }
+ start++
+ }
+ // Couldn't parse any entries. Might be a pure hash fragment.
+ if len(data) <= 20 {
+ return nil, data, nil
+ }
+ return nil, nil, nil
+}
+
+func writeHashFragment(w io.Writer, data []byte, hashSet map[string]bool, repo, key string) {
+ if len(data) == 20 {
+ hashStr := fmt.Sprintf("%x", data)
+ if hashSet[hashStr] {
+ href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), hashStr, key)
+ fmt.Fprintf(w, " hash %s\n", href, hashStr)
+ } else {
+ fmt.Fprintf(w, " hash %s\n", hashStr)
+ }
+ } else {
+ fmt.Fprintf(w, " (%d bytes) %x\n", len(data), data)
+ }
+}
+
+func isBinary(data []byte) bool {
+ for _, b := range data {
+ if b == 0 || b >= 0x7f {
+ return true
+ }
+ if b < 0x20 && b != '\n' && b != '\r' && b != '\t' {
+ return true
+ }
+ }
+ return false
+}
+
+func writeHexDump(w io.Writer, data []byte) {
+ for i := 0; i < len(data); i += 16 {
+ end := i + 16
+ if end > len(data) {
+ end = len(data)
+ }
+ line := data[i:end]
+
+ // Offset.
+ fmt.Fprintf(w, " %04x ", i)
+
+ // Hex bytes.
+ for j, b := range line {
+ if j == 8 {
+ fmt.Fprintf(w, " ")
+ }
+ fmt.Fprintf(w, "%02x ", b)
+ }
+ // Pad if short line.
+ for j := len(line); j < 16; j++ {
+ if j == 8 {
+ fmt.Fprintf(w, " ")
+ }
+ fmt.Fprintf(w, " ")
+ }
+
+ // ASCII.
+ fmt.Fprintf(w, " |")
+ for _, b := range line {
+ if b >= 0x20 && b < 0x7f {
+ fmt.Fprintf(w, "%c", b)
+ } else {
+ fmt.Fprintf(w, ".")
+ }
+ }
+ fmt.Fprintf(w, "|\n")
+ }
+}
+
+func formatBytes(b int64) string {
+ switch {
+ case b >= 1<<30:
+ return fmt.Sprintf("%.1f GiB", float64(b)/(1<<30))
+ case b >= 1<<20:
+ return fmt.Sprintf("%.1f MiB", float64(b)/(1<<20))
+ case b >= 1<<10:
+ return fmt.Sprintf("%.1f KiB", float64(b)/(1<<10))
+ default:
+ return fmt.Sprintf("%d B", b)
+ }
+}
diff --git a/internal/git/packfile.go b/internal/git/packfile.go
new file mode 100644
index 00000000..38501aa5
--- /dev/null
+++ b/internal/git/packfile.go
@@ -0,0 +1,442 @@
+package git
+
+import (
+ "bytes"
+ "compress/zlib"
+ "crypto/sha1"
+ "encoding/binary"
+ "encoding/hex"
+ "fmt"
+ "io"
+
+ "github.com/jonjohnsonjr/dagdotdev/internal/forks/rsc.io/gitfs"
+)
+
+type PackIndex struct {
+ Version uint32 `json:"version"`
+ NumObjects uint32 `json:"numObjects"`
+ Size int64 `json:"size"`
+ Checksum string `json:"checksum"`
+ Objects []PackObject `json:"objects"`
+}
+
+type PackObject struct {
+ Offset int `json:"offset"`
+ EncodedSize int `json:"encodedSize"`
+ Type string `json:"type"` // raw type: commit, tree, blob, tag, ofs-delta, ref-delta
+ ResolvedType string `json:"resolvedType"` // resolved type after delta resolution
+ Size int `json:"size"`
+ Hash string `json:"hash"`
+
+ // Delta info (only for ofs-delta and ref-delta objects)
+ DeltaBase string `json:"deltaBase,omitempty"` // raw base ref (hash for ref-delta, offset string for ofs-delta)
+ BaseHash string `json:"baseHash,omitempty"` // resolved base object hash
+ BaseOffset int `json:"baseOffset,omitempty"`
+ Depth int `json:"depth,omitempty"` // delta chain depth (0 for non-delta)
+}
+
+// BuildPackIndex parses a raw packfile and builds an index of all objects.
+func BuildPackIndex(data []byte) (*PackIndex, error) {
+ if len(data) < 12+20 {
+ return nil, fmt.Errorf("packfile too short")
+ }
+
+ hdr := data[:12]
+ vers := binary.BigEndian.Uint32(hdr[4:8])
+ nobj := binary.BigEndian.Uint32(hdr[8:12])
+ if string(hdr[:4]) != "PACK" || (vers != 2 && vers != 3) {
+ return nil, fmt.Errorf("not a packfile")
+ }
+ if vers == 3 {
+ return nil, fmt.Errorf("packfile v3 not supported")
+ }
+
+ sum := sha1.Sum(data[:len(data)-20])
+ if !bytes.Equal(sum[:], data[len(data)-20:]) {
+ return nil, fmt.Errorf("packfile checksum mismatch")
+ }
+
+ idx := &PackIndex{
+ Version: vers,
+ NumObjects: nobj,
+ Size: int64(len(data)),
+ Checksum: hex.EncodeToString(data[len(data)-20:]),
+ Objects: make([]PackObject, 0, nobj),
+ }
+
+ // We need a store to resolve delta chains and compute hashes.
+ var s gitfs.Store
+ objs := data[12 : len(data)-20]
+ off := 0
+
+ for i := 0; i < int(nobj); i++ {
+ obj, encSize, err := indexObject(&s, objs, off)
+ if err != nil {
+ return nil, fmt.Errorf("object %d at offset %d: %v", i, off+12, err)
+ }
+ obj.Offset = off + 12 // offset from start of packfile
+ obj.EncodedSize = encSize
+ idx.Objects = append(idx.Objects, obj)
+ off += encSize
+ }
+
+ // Compute delta chain depths.
+ byHash := map[string]int{} // hash -> index into Objects
+ for i, obj := range idx.Objects {
+ byHash[obj.Hash] = i
+ }
+ for i := range idx.Objects {
+ if idx.Objects[i].BaseHash == "" {
+ continue
+ }
+ depth := 1
+ baseHash := idx.Objects[i].BaseHash
+ for {
+ bi, ok := byHash[baseHash]
+ if !ok || idx.Objects[bi].BaseHash == "" {
+ break
+ }
+ depth++
+ baseHash = idx.Objects[bi].BaseHash
+ }
+ idx.Objects[i].Depth = depth
+ }
+
+ return idx, nil
+}
+
+// indexObject parses the object at objs[off:] and returns structural info.
+func indexObject(s *gitfs.Store, objs []byte, off int) (PackObject, int, error) {
+ if off < 0 || off >= len(objs) {
+ return PackObject{}, 0, fmt.Errorf("invalid offset")
+ }
+
+ u, size := binary.Uvarint(objs[off:])
+ if size <= 0 {
+ return PackObject{}, 0, fmt.Errorf("bad varint")
+ }
+ typ := gitfs.ObjType((u >> 4) & 7)
+ n := int(u&15 | u>>7<<4)
+
+ obj := PackObject{}
+
+ switch typ {
+ case gitfs.ObjRefDelta:
+ if len(objs)-(off+size) < 20 {
+ return PackObject{}, 0, fmt.Errorf("bad ref-delta")
+ }
+ var h gitfs.Hash
+ copy(h[:], objs[off+size:])
+ size += 20
+ obj.Type = "ref-delta"
+ obj.DeltaBase = h.String()
+
+ case gitfs.ObjOfsDelta:
+ i := off + size
+ if len(objs)-i < 20 {
+ return PackObject{}, 0, fmt.Errorf("bad ofs-delta")
+ }
+ d := int64(objs[i] & 0x7f)
+ for objs[i]&0x80 != 0 {
+ i++
+ d = d<<7 | int64(objs[i]&0x7f)
+ d += 1 << 7
+ }
+ i++
+ size = i - off
+ obj.Type = "ofs-delta"
+ obj.BaseOffset = off - int(d) + 12 // offset from start of packfile
+ obj.DeltaBase = fmt.Sprintf("-%d", int(d))
+
+ case gitfs.ObjCommit:
+ obj.Type = "commit"
+ case gitfs.ObjTree:
+ obj.Type = "tree"
+ case gitfs.ObjBlob:
+ obj.Type = "blob"
+ case gitfs.ObjTag:
+ obj.Type = "tag"
+ default:
+ return PackObject{}, 0, fmt.Errorf("unknown type %d", typ)
+ }
+
+ // Decompress to get the actual size and compute hash.
+ br := bytes.NewReader(objs[off+size:])
+ zr, err := zlib.NewReader(br)
+ if err != nil {
+ return PackObject{}, 0, fmt.Errorf("zlib: %v", err)
+ }
+ content, err := io.ReadAll(zr)
+ if err != nil {
+ return PackObject{}, 0, fmt.Errorf("zlib read: %v", err)
+ }
+ if len(content) != n {
+ return PackObject{}, 0, fmt.Errorf("size mismatch: %d != %d", len(content), n)
+ }
+ encSize := len(objs[off:]) - br.Len()
+
+ // For non-delta objects, the hash is straightforward.
+ // For delta objects, we need to resolve the chain via the store.
+ switch typ {
+ case gitfs.ObjCommit, gitfs.ObjTree, gitfs.ObjBlob, gitfs.ObjTag:
+ h, _ := s.Add(typ, content)
+ obj.Hash = h.String()
+ obj.Size = len(content)
+ obj.ResolvedType = obj.Type
+
+ case gitfs.ObjRefDelta:
+ baseTyp, baseData := s.Object(gitfs.Hash(mustParseHash(obj.DeltaBase)))
+ if baseTyp == gitfs.ObjNone {
+ return PackObject{}, 0, fmt.Errorf("unknown ref-delta base %s", obj.DeltaBase)
+ }
+ resolved, err := applyPackDelta(baseData, content)
+ if err != nil {
+ return PackObject{}, 0, fmt.Errorf("apply ref-delta: %v", err)
+ }
+ h, _ := s.Add(baseTyp, resolved)
+ obj.Hash = h.String()
+ obj.Size = len(resolved)
+ obj.ResolvedType = baseTyp.String()
+ obj.BaseHash = obj.DeltaBase
+
+ case gitfs.ObjOfsDelta:
+ baseOff := off - mustParseOfsOffset(obj.DeltaBase)
+ baseTyp, baseHash, baseContent, _, err := gitfs.UnpackObject(s, objs, baseOff)
+ if err != nil {
+ return PackObject{}, 0, fmt.Errorf("resolve ofs-delta base: %v", err)
+ }
+ resolved, err := applyPackDelta(baseContent, content)
+ if err != nil {
+ return PackObject{}, 0, fmt.Errorf("apply ofs-delta: %v", err)
+ }
+ h, _ := s.Add(baseTyp, resolved)
+ obj.Hash = h.String()
+ obj.Size = len(resolved)
+ obj.ResolvedType = baseTyp.String()
+ obj.BaseHash = baseHash.String()
+ }
+
+ return obj, encSize, nil
+}
+
+func mustParseHash(s string) [20]byte {
+ b, _ := hex.DecodeString(s)
+ var h [20]byte
+ copy(h[:], b)
+ return h
+}
+
+func mustParseOfsOffset(s string) int {
+ // s is like "-1234"
+ var n int
+ fmt.Sscanf(s, "-%d", &n)
+ return n
+}
+
+// resolveBaseType follows the delta chain to find the base object type.
+func resolveBaseType(s *gitfs.Store, objs []byte, off int) (gitfs.ObjType, error) {
+ u, size := binary.Uvarint(objs[off:])
+ if size <= 0 {
+ return 0, fmt.Errorf("bad varint")
+ }
+ typ := gitfs.ObjType((u >> 4) & 7)
+ switch typ {
+ case gitfs.ObjCommit, gitfs.ObjTree, gitfs.ObjBlob, gitfs.ObjTag:
+ return typ, nil
+ case gitfs.ObjOfsDelta:
+ i := off + size
+ d := int64(objs[i] & 0x7f)
+ for objs[i]&0x80 != 0 {
+ i++
+ d = d<<7 | int64(objs[i]&0x7f)
+ d += 1 << 7
+ }
+ return resolveBaseType(s, objs, off-int(d))
+ case gitfs.ObjRefDelta:
+ var h gitfs.Hash
+ copy(h[:], objs[off+size:])
+ baseTyp, _ := s.Object(h)
+ return baseTyp, nil
+ }
+ return 0, fmt.Errorf("unknown type %d", typ)
+}
+
+// applyPackDelta applies a delta to a base to produce the target.
+func applyPackDelta(base, delta []byte) ([]byte, error) {
+ // Delta starts with base size and target size as varints.
+ baseSize, s := binary.Uvarint(delta)
+ delta = delta[s:]
+ if baseSize != uint64(len(base)) {
+ return nil, fmt.Errorf("base size mismatch: %d != %d", baseSize, len(base))
+ }
+ targSize, s := binary.Uvarint(delta)
+ delta = delta[s:]
+
+ targ := make([]byte, targSize)
+ dst := targ
+ for len(delta) > 0 {
+ cmd := delta[0]
+ delta = delta[1:]
+ switch {
+ case cmd == 0:
+ return nil, fmt.Errorf("invalid delta cmd")
+ case cmd&0x80 != 0:
+ var off, size int64
+ for i := uint(0); i < 4; i++ {
+ if cmd&(1< 0 {
+ cmd := delta[0]
+ delta = delta[1:]
+
+ switch {
+ case cmd == 0:
+ return nil, fmt.Errorf("invalid delta cmd 0")
+
+ case cmd&0x80 != 0:
+ // Copy from base.
+ var off, size int64
+ for i := uint(0); i < 4; i++ {
+ if cmd&(1<= len(data)-20 {
+ return nil, fmt.Errorf("invalid offset %d", offset)
+ }
+ objs := data[12 : len(data)-20]
+ off := offset - 12
+
+ u, size := binary.Uvarint(objs[off:])
+ if size <= 0 {
+ return nil, fmt.Errorf("bad varint")
+ }
+ typ := gitfs.ObjType((u >> 4) & 7)
+
+ switch typ {
+ case gitfs.ObjRefDelta:
+ size += 20
+ case gitfs.ObjOfsDelta:
+ i := off + size
+ for objs[i]&0x80 != 0 {
+ i++
+ }
+ i++
+ size = i - off
+ default:
+ return nil, fmt.Errorf("not a delta object (type %s)", typ)
+ }
+
+ br := bytes.NewReader(objs[off+size:])
+ zr, err := zlib.NewReader(br)
+ if err != nil {
+ return nil, fmt.Errorf("zlib: %v", err)
+ }
+ return io.ReadAll(zr)
+}
+
+// DecompressObject decompresses a single object from packfile data, identified by hash.
+// It does a full unpack to populate the store (required for ref-delta resolution),
+// then looks up the object by hash.
+func DecompressObject(data []byte, hash string) (objType string, content []byte, err error) {
+ if len(data) < 12+20 {
+ return "", nil, fmt.Errorf("packfile too short")
+ }
+
+ var s gitfs.Store
+ if err := gitfs.Unpack(&s, data); err != nil {
+ return "", nil, err
+ }
+
+ h, err := gitfs.ParseHash(hash)
+ if err != nil {
+ return "", nil, fmt.Errorf("invalid hash %q: %v", hash, err)
+ }
+
+ typ, objData := s.Object(h)
+ if typ == gitfs.ObjNone {
+ return "", nil, fmt.Errorf("object %s not found", hash)
+ }
+
+ return typ.String(), objData, nil
+}
diff --git a/internal/git/templates.go b/internal/git/templates.go
index a221125b..120c83b6 100644
--- a/internal/git/templates.go
+++ b/internal/git/templates.go
@@ -55,6 +55,19 @@ body {
Explore the raw packfile format of a git repo:
+ ++