diff --git a/internal/forks/rsc.io/gitfs/fs.go b/internal/forks/rsc.io/gitfs/fs.go index 46eabd31..0a5ea398 100644 --- a/internal/forks/rsc.io/gitfs/fs.go +++ b/internal/forks/rsc.io/gitfs/fs.go @@ -39,21 +39,21 @@ func ParseHash(text string) (Hash, error) { type ObjType int const ( - objNone ObjType = 0 - objCommit ObjType = 1 - objTree ObjType = 2 - objBlob ObjType = 3 - objTag ObjType = 4 + ObjNone ObjType = 0 + ObjCommit ObjType = 1 + ObjTree ObjType = 2 + ObjBlob ObjType = 3 + ObjTag ObjType = 4 // 5 undefined - objOfsDelta ObjType = 6 - objRefDelta ObjType = 7 + ObjOfsDelta ObjType = 6 + ObjRefDelta ObjType = 7 ) var objTypes = [...]string{ - objCommit: "commit", - objTree: "tree", - objBlob: "blob", - objTag: "tag", + ObjCommit: "commit", + ObjTree: "tree", + ObjBlob: "blob", + ObjTag: "tag", } func (t ObjType) String() string { @@ -70,10 +70,10 @@ type DirEntry struct { Hash Hash } -// parseDirEntry parses the next directory entry from data, +// ParseDirEntry parses the next directory entry from data, // returning the entry and the number of bytes it occupied. -// If data is malformed, parseDirEntry returns dirEntry{}, 0. -func parseDirEntry(data []byte) (DirEntry, int) { +// If data is malformed, ParseDirEntry returns DirEntry{}, 0. +func ParseDirEntry(data []byte) (DirEntry, int) { // Unclear where or if this format is documented by Git. // Each directory entry is an octal mode, then a space, // then a file name, then a NUL byte, then a 20-byte binary hash. @@ -111,7 +111,7 @@ func treeLookup(data []byte, name string) (mode int, h Hash, ok bool) { // but the directory entry data is not self-synchronizing, // so it's not possible to be clever and use a binary search here. for len(data) > 0 { - e, size := parseDirEntry(data) + e, size := ParseDirEntry(data) if size == 0 { break } @@ -153,8 +153,8 @@ func commitKeyValue(data []byte, key string) ([]byte, bool) { return nil, false } -// A store is a collection of Git objects, indexed for lookup by hash. -type store struct { +// A Store is a collection of Git objects, indexed for lookup by hash. +type Store struct { repo *Repo sha1 hashpkg.Hash // reused hash state index map[Hash]stored // lookup index @@ -164,13 +164,13 @@ type store struct { // A stored describes a single stored object. type stored struct { typ ObjType // object type - off int // object data is store.data[off:off+len] + off int // object data is Store.data[off:off+len] len int } -// add adds an object with the given type and content to s, returning its Hash. -// If the object is already stored in s, add succeeds but doesn't store a second copy. -func (s *store) add(typ ObjType, data []byte) (Hash, []byte) { +// Add adds an object with the given type and content to s, returning its Hash. +// If the object is already stored in s, Add succeeds but doesn't store a second copy. +func (s *Store) Add(typ ObjType, data []byte) (Hash, []byte) { if s.sha1 == nil { s.sha1 = sha1.New() } @@ -196,7 +196,7 @@ func (s *store) add(typ ObjType, data []byte) (Hash, []byte) { // Object returns the type and data for the Object with hash h. // If there is no Object with hash h, Object returns 0, nil. -func (s *store) Object(h Hash) (typ ObjType, data []byte) { +func (s *Store) Object(h Hash) (typ ObjType, data []byte) { d, ok := s.index[h] if !ok { return 0, nil @@ -205,16 +205,16 @@ func (s *store) Object(h Hash) (typ ObjType, data []byte) { } // Commit returns a treeFS for the file system tree associated with the given Commit hash. -func (s *store) Commit(c Hash) (*treeFS, []byte, error) { +func (s *Store) Commit(c Hash) (*treeFS, []byte, error) { // The commit object data starts with key-value pairs typ, data := s.Object(c) - if typ == objNone { + if typ == ObjNone { return nil, nil, fmt.Errorf("commit %s: no such hash", c) } // fmt.Fprintf(os.Stderr, "typ=%d\n", typ) // fmt.Fprintf(os.Stderr, "%s", data) // os.Stderr.Write([]byte("\n")) - if typ != objCommit { + if typ != ObjCommit { return nil, nil, fmt.Errorf("commit %s: unexpected type %s", c, typ) } treeHash, ok := commitKeyValue(data, "tree") @@ -230,7 +230,7 @@ func (s *store) Commit(c Hash) (*treeFS, []byte, error) { // A treeFS is an fs.FS serving a Git file system tree rooted at a given tree object hash. type treeFS struct { - s *store + s *Store tree Hash // root tree commit Hash } @@ -265,7 +265,7 @@ func (t *treeFS) Open(name string) (f fs.File, err error) { if i == len(name) || name[i] == '/' { // Look up name in current tree object h. typ, data := t.s.Object(h) - if typ != objTree { + if typ != ObjTree { return nil, &fs.PathError{Path: name, Op: "open", Err: fs.ErrNotExist} } _, th, ok := treeLookup(data, name[start:i]) @@ -283,7 +283,7 @@ func (t *treeFS) Open(name string) (f fs.File, err error) { // The hash h is the hash for name. Load its object. typ, data := t.s.Object(h) info := fileInfo{name, name[start:], 0, 0, nil} - if typ == objBlob { + if typ == ObjBlob { // Regular file. info.mode = 0444 info.size = int64(len(data)) @@ -294,7 +294,7 @@ func (t *treeFS) Open(name string) (f fs.File, err error) { } return &blobFile{info, bytes.NewReader(data)}, nil } - if typ == objTree { + if typ == ObjTree { // Directory. info.mode = fs.ModeDir | 0555 info.sys = &DirEntry{ @@ -341,7 +341,7 @@ func (f *blobFile) Stat() (fs.FileInfo, error) { return &f.info, nil } // A dirFile implements fs.File for a directory. type dirFile struct { - s *store + s *Store info fileInfo data []byte off int @@ -369,18 +369,18 @@ func (f *dirFile) ReadDir(n int) (list []fs.DirEntry, err error) { }() for (n <= 0 || len(list) < n) && f.off < len(f.data) { - e, size := parseDirEntry(f.data[f.off:]) + e, size := ParseDirEntry(f.data[f.off:]) if size == 0 { break } f.off += size typ, data := f.s.Object(e.Hash) mode := fs.FileMode(0444) - if typ == objTree { + if typ == ObjTree { mode = fs.ModeDir | 0555 } infoSize := int64(0) - if typ == objBlob { + if typ == ObjBlob { infoSize = int64(len(data)) } name := string(e.Name) diff --git a/internal/forks/rsc.io/gitfs/git.go b/internal/forks/rsc.io/gitfs/git.go index 268948da..b380d7a1 100644 --- a/internal/forks/rsc.io/gitfs/git.go +++ b/internal/forks/rsc.io/gitfs/git.go @@ -196,6 +196,101 @@ func (r *Repo) CloneHash(ctx context.Context, h Hash) (fs.FS, []byte, error) { return tfs, data, nil } +// FetchPack fetches a full (non-shallow) packfile from the remote server, +// requesting all refs. It returns the raw packfile bytes. +func (r *Repo) FetchPack(ctx context.Context) ([]byte, error) { + opts, ok := r.caps["fetch"] + if !ok { + return nil, fmt.Errorf("fetch: server does not support fetch") + } + _ = opts + + refs, err := r.Refs(ctx) + if err != nil { + return nil, fmt.Errorf("fetchpack: refs: %v", err) + } + + // Deduplicate hashes. + seen := map[Hash]bool{} + var wants []Hash + for _, ref := range refs { + if !seen[ref.Hash] { + seen[ref.Hash] = true + wants = append(wants, ref.Hash) + } + } + if len(wants) == 0 { + return nil, fmt.Errorf("fetchpack: no refs found") + } + + var buf bytes.Buffer + pw := newPktLineWriter(&buf) + pw.WriteString("command=fetch") + pw.Delim() + for _, h := range wants { + pw.WriteString("want " + h.String()) + } + pw.WriteString("done") + pw.Close() + + req, _ := http.NewRequestWithContext(ctx, "POST", r.url+"/git-upload-pack", &buf) + req.Header.Set("Content-Type", "application/x-git-upload-pack-request") + req.Header.Set("Accept", "application/x-git-upload-pack-result") + req.Header.Set("Git-Protocol", "version=2") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, fmt.Errorf("fetchpack: %v", err) + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("fetchpack: %v\n%s", resp.Status, body) + } + if ct := resp.Header.Get("Content-Type"); ct != "application/x-git-upload-pack-result" { + return nil, fmt.Errorf("fetchpack: invalid response Content-Type: %v", ct) + } + + var data []byte + pr := newPktLineReader(resp.Body) + sawPackfile := false + for { + line, err := pr.Next() + if err != nil { + if err == io.EOF { + break + } + return nil, fmt.Errorf("fetchpack: parsing response: %v", err) + } + if line == nil { + continue + } + if !sawPackfile { + if strings.TrimSuffix(string(line), "\n") == "packfile" { + sawPackfile = true + } + continue + } + if len(line) == 0 || line[0] == 0 || line[0] > 3 { + continue + } + switch line[0] { + case 1: + data = append(data, line[1:]...) + case 2: + // progress + case 3: + return nil, fmt.Errorf("fetchpack: server error: %s", line[1:]) + } + } + + if !bytes.HasPrefix(data, []byte("PACK")) { + return nil, fmt.Errorf("fetchpack: malformed response: not packfile") + } + + return data, nil +} + // fetch returns the fs.FS for a given hash. func (r *Repo) fetch(ctx context.Context, h Hash) (fs.FS, []byte, error) { // Fetch a shallow packfile from the remote server. @@ -285,8 +380,8 @@ func (r *Repo) fetch(ctx context.Context, h Hash) (fs.FS, []byte, error) { } // Unpack pack file and return fs.FS for the commit we downloaded. - var s store - if err := unpack(&s, data); err != nil { + var s Store + if err := Unpack(&s, data); err != nil { return nil, nil, fmt.Errorf("fetch: %v", err) } s.repo = r diff --git a/internal/forks/rsc.io/gitfs/git_test.go b/internal/forks/rsc.io/gitfs/git_test.go index bfabcf6c..e2a66a33 100644 --- a/internal/forks/rsc.io/gitfs/git_test.go +++ b/internal/forks/rsc.io/gitfs/git_test.go @@ -54,8 +54,8 @@ func TestPack(t *testing.T) { if err != nil { t.Fatal(err) } - var s store - err = unpack(&s, data) + var s Store + err = Unpack(&s, data) if err != nil { t.Fatal(err) } diff --git a/internal/forks/rsc.io/gitfs/pack.go b/internal/forks/rsc.io/gitfs/pack.go index f7e79c5e..e4b5d000 100644 --- a/internal/forks/rsc.io/gitfs/pack.go +++ b/internal/forks/rsc.io/gitfs/pack.go @@ -13,11 +13,11 @@ import ( "io" ) -// unpack parses data, which is a Git pack-formatted archive, -// writing every object it contains to the store s. +// Unpack parses data, which is a Git pack-formatted archive, +// writing every object it contains to the Store s. // // See https://git-scm.com/docs/pack-format for format documentation. -func unpack(s *store, data []byte) error { +func Unpack(s *Store, data []byte) error { // If the store is empty, pre-allocate the length of data. // This should be about the right order of magnitude for the eventual data, // avoiding many growing steps during append. @@ -50,7 +50,7 @@ func unpack(s *store, data []byte) error { objs := data[12 : len(data)-20] off := 0 for i := 0; i < int(nobj); i++ { - _, _, _, encSize, err := unpackObject(s, objs, off) + _, _, _, encSize, err := UnpackObject(s, objs, off) if err != nil { return fmt.Errorf("unpack: malformed git pack: %v", err) } @@ -62,10 +62,10 @@ func unpack(s *store, data []byte) error { return nil } -// unpackObject unpacks the object at objs[off:] and writes it to the store s. +// UnpackObject unpacks the object at objs[off:] and writes it to the Store s. // It returns the type, hash, and content of the object, as well as the encoded size, // meaning the number of bytes at the start of objs[off:] that this record occupies. -func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content []byte, encSize int, err error) { +func UnpackObject(s *Store, objs []byte, off int) (typ ObjType, h Hash, content []byte, encSize int, err error) { fail := func(err error) (ObjType, Hash, []byte, int, error) { return 0, Hash{}, nil, 0, err } @@ -92,7 +92,7 @@ func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content var deltaTyp ObjType var deltaBase []byte switch typ { - case objRefDelta: + case ObjRefDelta: if len(objs)-(off+size) < 20 { return fail(fmt.Errorf("invalid object: bad delta ref")) } @@ -105,7 +105,7 @@ func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content return fail(fmt.Errorf("invalid object: unknown delta ref %v", h)) } - case objOfsDelta: + case ObjOfsDelta: i := off + size if len(objs)-i < 20 { return fail(fmt.Errorf("invalid object: too short")) @@ -130,7 +130,7 @@ func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content return fail(fmt.Errorf("invalid object: bad delta offset")) } var err error - deltaTyp, _, deltaBase, _, err = unpackObject(s, objs, off-int(d)) + deltaTyp, _, deltaBase, _, err = UnpackObject(s, objs, off-int(d)) if err != nil { return fail(fmt.Errorf("invalid object: bad delta offset")) } @@ -156,9 +156,9 @@ func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content switch typ { default: return fail(fmt.Errorf("invalid object: unknown object type")) - case objCommit, objTree, objBlob, objTag: + case ObjCommit, ObjTree, ObjBlob, ObjTag: // ok - case objRefDelta, objOfsDelta: + case ObjRefDelta, ObjOfsDelta: // Actual object type is the type of the base object. typ = deltaTyp @@ -179,7 +179,7 @@ func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content data = targ } - h, data = s.add(typ, data) + h, data = s.Add(typ, data) return typ, h, data, encSize, nil } diff --git a/internal/git/cache.go b/internal/git/cache.go new file mode 100644 index 00000000..19fc008c --- /dev/null +++ b/internal/git/cache.go @@ -0,0 +1,315 @@ +package git + +import ( + "compress/gzip" + "context" + "crypto/sha256" + "encoding/json" + "fmt" + "io" + "log" + "os" + "path/filepath" + "strings" + "sync" + "time" + + "cloud.google.com/go/storage" +) + +type packCache interface { + GetIndex(ctx context.Context, key string) (*PackIndex, error) + PutIndex(ctx context.Context, key string, idx *PackIndex) error + GetPack(ctx context.Context, key string) ([]byte, error) + PutPack(ctx context.Context, key string, data []byte) error + RangeReader(ctx context.Context, key string, offset, length int64) (io.ReadCloser, error) +} + +func cacheKey(repoURL string) string { + h := sha256.Sum256([]byte(repoURL)) + return fmt.Sprintf("%x", h[:16]) +} + +// gcsPackCache stores packs and indexes in GCS. +type gcsPackCache struct { + bucket *storage.BucketHandle +} + +func (g *gcsPackCache) packPath(key string) string { + return fmt.Sprintf("pack/%s/pack.bin", key) +} + +func (g *gcsPackCache) indexPath(key string) string { + return fmt.Sprintf("pack/%s/index.json.gz", key) +} + +func (g *gcsPackCache) GetIndex(ctx context.Context, key string) (*PackIndex, error) { + rc, err := g.bucket.Object(g.indexPath(key)).NewReader(ctx) + if err != nil { + return nil, err + } + defer rc.Close() + zr, err := gzip.NewReader(rc) + if err != nil { + return nil, err + } + defer zr.Close() + idx := &PackIndex{} + if err := json.NewDecoder(zr).Decode(idx); err != nil { + return nil, err + } + return idx, nil +} + +func (g *gcsPackCache) PutIndex(ctx context.Context, key string, idx *PackIndex) error { + w := g.bucket.Object(g.indexPath(key)).NewWriter(ctx) + zw, err := gzip.NewWriterLevel(w, gzip.BestSpeed) + if err != nil { + return err + } + if err := json.NewEncoder(zw).Encode(idx); err != nil { + zw.Close() + w.Close() + return err + } + if err := zw.Close(); err != nil { + w.Close() + return err + } + return w.Close() +} + +func (g *gcsPackCache) GetPack(ctx context.Context, key string) ([]byte, error) { + rc, err := g.bucket.Object(g.packPath(key)).NewReader(ctx) + if err != nil { + return nil, err + } + defer rc.Close() + return io.ReadAll(rc) +} + +func (g *gcsPackCache) PutPack(ctx context.Context, key string, data []byte) error { + w := g.bucket.Object(g.packPath(key)).NewWriter(ctx) + if _, err := w.Write(data); err != nil { + w.Close() + return err + } + return w.Close() +} + +func (g *gcsPackCache) RangeReader(ctx context.Context, key string, offset, length int64) (io.ReadCloser, error) { + return g.bucket.Object(g.packPath(key)).NewRangeReader(ctx, offset, length) +} + +// dirPackCache stores packs and indexes in a local directory. +type dirPackCache struct { + dir string +} + +func (d *dirPackCache) path(key, name string) string { + return filepath.Join(d.dir, "pack", key, name) +} + +func (d *dirPackCache) ensureDir(key string) error { + return os.MkdirAll(filepath.Join(d.dir, "pack", key), 0755) +} + +func (d *dirPackCache) GetIndex(ctx context.Context, key string) (*PackIndex, error) { + f, err := os.Open(d.path(key, "index.json.gz")) + if err != nil { + return nil, err + } + defer f.Close() + zr, err := gzip.NewReader(f) + if err != nil { + return nil, err + } + defer zr.Close() + idx := &PackIndex{} + if err := json.NewDecoder(zr).Decode(idx); err != nil { + return nil, err + } + return idx, nil +} + +func (d *dirPackCache) PutIndex(ctx context.Context, key string, idx *PackIndex) error { + if err := d.ensureDir(key); err != nil { + return err + } + f, err := os.Create(d.path(key, "index.json.gz")) + if err != nil { + return err + } + defer f.Close() + zw, err := gzip.NewWriterLevel(f, gzip.BestSpeed) + if err != nil { + return err + } + if err := json.NewEncoder(zw).Encode(idx); err != nil { + zw.Close() + return err + } + return zw.Close() +} + +func (d *dirPackCache) GetPack(ctx context.Context, key string) ([]byte, error) { + return os.ReadFile(d.path(key, "pack.bin")) +} + +func (d *dirPackCache) PutPack(ctx context.Context, key string, data []byte) error { + if err := d.ensureDir(key); err != nil { + return err + } + return os.WriteFile(d.path(key, "pack.bin"), data, 0644) +} + +func (d *dirPackCache) RangeReader(ctx context.Context, key string, offset, length int64) (io.ReadCloser, error) { + f, err := os.Open(d.path(key, "pack.bin")) + if err != nil { + return nil, err + } + return io.NopCloser(io.NewSectionReader(f, offset, length)), nil +} + +// memPackIndex is an in-memory LRU of parsed PackIndex structs. +type memPackIndex struct { + mu sync.Mutex + cap int + entries map[string]*memIndexEntry +} + +type memIndexEntry struct { + idx *PackIndex + access time.Time +} + +func (m *memPackIndex) Get(key string) *PackIndex { + m.mu.Lock() + defer m.mu.Unlock() + if e, ok := m.entries[key]; ok { + e.access = time.Now() + return e.idx + } + return nil +} + +func (m *memPackIndex) Put(key string, idx *PackIndex) { + m.mu.Lock() + defer m.mu.Unlock() + if m.entries == nil { + m.entries = make(map[string]*memIndexEntry) + } + if len(m.entries) >= m.cap { + var oldest string + var oldestTime time.Time + for k, e := range m.entries { + if oldest == "" || e.access.Before(oldestTime) { + oldest = k + oldestTime = e.access + } + } + delete(m.entries, oldest) + } + m.entries[key] = &memIndexEntry{idx: idx, access: time.Now()} +} + +func buildPackCache() packCache { + if cd := os.Getenv("CACHE_DIR"); cd != "" { + log.Printf("pack cache: dir=%s", cd) + return &dirPackCache{dir: cd} + } + if cb := os.Getenv("CACHE_BUCKET"); cb != "" { + log.Printf("pack cache: bucket=%s", cb) + client, err := storage.NewClient(context.Background()) + if err != nil { + log.Printf("pack cache: gcs error: %v", err) + return &noopPackCache{} + } + bkt := client.Bucket(strings.TrimPrefix(cb, "gs://")) + return &gcsPackCache{bucket: bkt} + } + return &noopPackCache{} +} + +// noopPackCache is used when no cache backend is configured. +type noopPackCache struct{} + +func (n *noopPackCache) GetIndex(ctx context.Context, key string) (*PackIndex, error) { + return nil, fmt.Errorf("no cache") +} +func (n *noopPackCache) PutIndex(ctx context.Context, key string, idx *PackIndex) error { + return nil +} +func (n *noopPackCache) GetPack(ctx context.Context, key string) ([]byte, error) { + return nil, fmt.Errorf("no cache") +} +func (n *noopPackCache) PutPack(ctx context.Context, key string, data []byte) error { + return nil +} +func (n *noopPackCache) RangeReader(ctx context.Context, key string, offset, length int64) (io.ReadCloser, error) { + return nil, fmt.Errorf("no cache") +} + +// tieredPackCache wraps a persistent cache with an in-memory LRU for indexes. +type tieredPackCache struct { + mem *memPackIndex + back packCache +} + +func (t *tieredPackCache) GetIndex(ctx context.Context, key string) (*PackIndex, error) { + if idx := t.mem.Get(key); idx != nil { + return idx, nil + } + idx, err := t.back.GetIndex(ctx, key) + if err != nil { + return nil, err + } + t.mem.Put(key, idx) + return idx, nil +} + +func (t *tieredPackCache) PutIndex(ctx context.Context, key string, idx *PackIndex) error { + t.mem.Put(key, idx) + return t.back.PutIndex(ctx, key, idx) +} + +func (t *tieredPackCache) GetPack(ctx context.Context, key string) ([]byte, error) { + return t.back.GetPack(ctx, key) +} + +func (t *tieredPackCache) PutPack(ctx context.Context, key string, data []byte) error { + return t.back.PutPack(ctx, key, data) +} + +func (t *tieredPackCache) RangeReader(ctx context.Context, key string, offset, length int64) (io.ReadCloser, error) { + return t.back.RangeReader(ctx, key, offset, length) +} + +func newPackCache() packCache { + return &tieredPackCache{ + mem: &memPackIndex{cap: 50}, + back: buildPackCache(), + } +} + +// memPackData holds raw packfile bytes in memory for object detail views. +type memPackData struct { + mu sync.Mutex + data map[string][]byte +} + +func (m *memPackData) Get(key string) []byte { + m.mu.Lock() + defer m.mu.Unlock() + return m.data[key] +} + +func (m *memPackData) Put(key string, data []byte) { + m.mu.Lock() + defer m.mu.Unlock() + if m.data == nil { + m.data = make(map[string][]byte) + } + m.data[key] = data +} + diff --git a/internal/git/git.go b/internal/git/git.go index 4fde8b83..ddca43ed 100644 --- a/internal/git/git.go +++ b/internal/git/git.go @@ -3,6 +3,7 @@ package git import ( "bufio" "bytes" + "context" "fmt" "html" "io" @@ -36,6 +37,9 @@ type handler struct { repos map[string]*gitfs.Repo commits map[string][]byte fsyss map[string]fs.FS + + packCache packCache + packData memPackData } type Option func(h *handler) @@ -48,10 +52,11 @@ func WithUserAgent(ua string) Option { func New(args []string, opts ...Option) http.Handler { h := handler{ - args: args, - repos: map[string]*gitfs.Repo{}, - fsyss: map[string]fs.FS{}, - commits: map[string][]byte{}, + args: args, + repos: map[string]*gitfs.Repo{}, + fsyss: map[string]fs.FS{}, + commits: map[string][]byte{}, + packCache: newPackCache(), } for _, opt := range opts { @@ -63,6 +68,7 @@ func New(args []string, opts ...Option) http.Handler { mux.HandleFunc("/", h.errHandler(h.renderResponse)) mux.HandleFunc("/http/", h.errHandler(h.renderFS)) mux.HandleFunc("/https/", h.errHandler(h.renderFS)) + mux.HandleFunc("/pack/", h.errHandler(h.renderPackObject)) h.mux = gzhttp.GzipHandler(mux) @@ -108,6 +114,14 @@ func (h *handler) errHandler(hfe HandleFuncE) http.HandlerFunc { func (h *handler) renderResponse(w http.ResponseWriter, r *http.Request) error { qs := r.URL.Query() + if q := qs.Get("pack"); q != "" { + u, err := url.PathUnescape(q) + if err != nil { + return err + } + return h.renderPackOverview(w, r, u) + } + if q := qs.Get("url"); q != "" { u, err := url.PathUnescape(q) if err != nil { @@ -606,3 +620,512 @@ func (d *dumbEscaper) Write(p []byte) (n int, err error) { } return len(p), d.buf.Flush() } + +func (h *handler) getOrFetchPack(ctx context.Context, repoURL string) (*PackIndex, string, error) { + key := cacheKey(repoURL) + + // Try cache first. + idx, err := h.packCache.GetIndex(ctx, key) + if err == nil { + return idx, key, nil + } + + // Cache miss: fetch the packfile. + if !strings.Contains(repoURL, "://") { + repoURL = "https://" + repoURL + } + + repo, err := gitfs.NewRepo(ctx, repoURL) + if err != nil { + return nil, "", fmt.Errorf("NewRepo: %w", err) + } + + data, err := repo.FetchPack(ctx) + if err != nil { + return nil, "", fmt.Errorf("FetchPack: %w", err) + } + + idx, err = BuildPackIndex(data) + if err != nil { + return nil, "", fmt.Errorf("BuildPackIndex: %w", err) + } + + // Store in cache (best effort). + if putErr := h.packCache.PutPack(ctx, key, data); putErr != nil { + log.Printf("pack cache put pack: %v", putErr) + } + if putErr := h.packCache.PutIndex(ctx, key, idx); putErr != nil { + log.Printf("pack cache put index: %v", putErr) + } + + // Keep packfile data in memory for object detail views. + h.packData.Put(key, data) + + return idx, key, nil +} + +func (h *handler) renderPackOverview(w http.ResponseWriter, r *http.Request, repoURL string) error { + ctx := r.Context() + + idx, key, err := h.getOrFetchPack(ctx, repoURL) + if err != nil { + return err + } + + repo := strings.TrimPrefix(repoURL, "https://") + repo = strings.TrimPrefix(repo, "http://") + + if err := headerTmpl.Execute(w, TitleData{"Pack: " + repo}); err != nil { + return err + } + hd := HeaderData{ + Repo: repo, + RepoLink: repoURL, + JQ: fmt.Sprintf("git verify-pack -v .git/objects/pack/pack-%s.idx", idx.Checksum), + } + if err := bodyTmpl.Execute(w, hd); err != nil { + return err + } + + // Compute stats. + nonDelta := 0 + chainLengths := map[int]int{} + for _, obj := range idx.Objects { + if obj.Depth == 0 { + nonDelta++ + } else { + chainLengths[obj.Depth]++ + } + } + + // Filter by type if requested. + filterType := r.URL.Query().Get("type") + + fmt.Fprintf(w, "
\n")
+	for _, obj := range idx.Objects {
+		if filterType != "" {
+			if strings.HasPrefix(filterType, "depth-") {
+				var d int
+				fmt.Sscanf(filterType, "depth-%d", &d)
+				if obj.Depth != d {
+					continue
+				}
+			} else if obj.ResolvedType != filterType && obj.Type != filterType {
+				continue
+			}
+		}
+		href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), obj.Hash, key)
+		hashLink := fmt.Sprintf("%s", href, obj.Hash)
+
+		if obj.Depth > 0 {
+			baseHref := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), obj.BaseHash, key)
+			baseLink := fmt.Sprintf("%s", baseHref, obj.BaseHash)
+			fmt.Fprintf(w, "%s %-6s %d %d %d %d %s\n", hashLink, obj.ResolvedType, obj.Size, obj.EncodedSize, obj.Offset, obj.Depth, baseLink)
+		} else {
+			fmt.Fprintf(w, "%s %-6s %d %d %d\n", hashLink, obj.ResolvedType, obj.Size, obj.EncodedSize, obj.Offset)
+		}
+	}
+
+	// Summary.
+	fmt.Fprintf(w, "non delta: %d objects\n", nonDelta)
+	maxDepth := 0
+	for d := range chainLengths {
+		if d > maxDepth {
+			maxDepth = d
+		}
+	}
+	for d := 1; d <= maxDepth; d++ {
+		if c, ok := chainLengths[d]; ok {
+			href := fmt.Sprintf("/?pack=%s&type=depth-%d", url.QueryEscape(repoURL), d)
+			fmt.Fprintf(w, "chain length = %d: %d objects\n", d, href, c)
+		}
+	}
+
+	fmt.Fprintf(w, "
\n") + fmt.Fprintf(w, footer) + return nil +} + +func (h *handler) renderPackObject(w http.ResponseWriter, r *http.Request) error { + ctx := r.Context() + + // Path: /pack// + p := strings.TrimPrefix(r.URL.Path, "/pack/") + lastSlash := strings.LastIndex(p, "/") + if lastSlash < 0 { + return fmt.Errorf("invalid pack object path: %s", r.URL.Path) + } + repo := p[:lastSlash] + hash := p[lastSlash+1:] + key := r.URL.Query().Get("key") + if key == "" { + key = cacheKey(repo) + } + + // Get the index to find the object. + idx, err := h.packCache.GetIndex(ctx, key) + if err != nil { + // Try fetching. + repoURL := repo + if !strings.Contains(repoURL, "://") { + repoURL = "https://" + repoURL + } + idx, key, err = h.getOrFetchPack(ctx, repoURL) + if err != nil { + return fmt.Errorf("get pack index: %w", err) + } + } + + // Find the object by hash. + var obj *PackObject + for i := range idx.Objects { + if idx.Objects[i].Hash == hash { + obj = &idx.Objects[i] + break + } + } + if obj == nil { + return fmt.Errorf("object %s not found in pack", hash) + } + + // Get the packfile data to decompress the object. + data := h.packData.Get(key) + if data == nil { + data, err = h.packCache.GetPack(ctx, key) + if err != nil { + return fmt.Errorf("get pack data: %w", err) + } + h.packData.Put(key, data) + } + + objType, content, err := DecompressObject(data, obj.Hash) + if err != nil { + return fmt.Errorf("decompress: %w", err) + } + + if err := headerTmpl.Execute(w, TitleData{hash[:12] + " - Pack Object"}); err != nil { + return err + } + hd := HeaderData{ + Repo: repo, + RepoLink: fmt.Sprintf("/?pack=%s", url.QueryEscape(repo)), + JQ: fmt.Sprintf("git cat-file -p %s", hash), + } + if err := bodyTmpl.Execute(w, hd); err != nil { + return err + } + + fmt.Fprintf(w, "
\n")
+	if obj.Type != objType {
+		fmt.Fprintf(w, "type:    %s (resolves to %s)\n", obj.Type, objType)
+	} else {
+		fmt.Fprintf(w, "type:    %s\n", objType)
+	}
+	fmt.Fprintf(w, "size:    %s (%d bytes)\n", formatBytes(int64(obj.Size)), obj.Size)
+	fmt.Fprintf(w, "offset:  %d\n", obj.Offset)
+	fmt.Fprintf(w, "encoded: %d bytes\n", obj.EncodedSize)
+	if obj.BaseHash != "" {
+		baseHref := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), obj.BaseHash, key)
+		fmt.Fprintf(w, "base:    %s (%s", baseHref, obj.BaseHash, obj.Type)
+		if obj.Type == "ofs-delta" {
+			fmt.Fprintf(w, ", offset %d", obj.BaseOffset)
+		}
+		fmt.Fprintf(w, ")\n")
+		fmt.Fprintf(w, "depth:   %d\n", obj.Depth)
+	}
+	fmt.Fprintf(w, "\n")
+
+	// For delta objects, show the raw delta instructions.
+	if obj.Type == "ref-delta" || obj.Type == "ofs-delta" {
+		rawDelta, err := RawDelta(data, obj.Offset)
+		if err == nil {
+			deltaInfo, err := ParseDelta(rawDelta)
+			if err == nil {
+				h.renderDeltaOps(w, deltaInfo, repo, key, idx)
+				fmt.Fprintf(w, "\n")
+			}
+		}
+		fmt.Fprintf(w, "--- resolved content (%s) ---\n\n", objType)
+	}
+
+	// Render resolved content based on type.
+	switch objType {
+	case "commit":
+		h.renderPackCommit(w, content, repo, key)
+	case "tree":
+		h.renderPackTree(w, content, repo, key, idx)
+	case "blob":
+		size := min(int64(len(content)), tooBig)
+		esc := &dumbEscaper{buf: bufio.NewWriter(w)}
+		io.CopyN(esc, bytes.NewReader(content), size)
+		if int64(len(content)) > tooBig {
+			fmt.Fprintf(w, "\n... truncated (%s total)", formatBytes(int64(len(content))))
+		}
+	case "tag":
+		h.renderPackTag(w, content, repo, key)
+	default:
+		fmt.Fprintf(w, "(raw %d bytes)\n", len(content))
+	}
+
+	fmt.Fprintf(w, "
\n") + fmt.Fprintf(w, footer) + return nil +} + +func (h *handler) renderPackCommit(w io.Writer, content []byte, repo, key string) { + scanner := bufio.NewScanner(bytes.NewReader(content)) + for scanner.Scan() { + line := scanner.Text() + hdr, val, ok := strings.Cut(line, " ") + if !ok { + fmt.Fprintf(w, "%s\n", htmlEscape(line)) + continue + } + switch hdr { + case "tree", "parent": + href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), val, key) + fmt.Fprintf(w, "%s %s\n", hdr, href, val) + default: + fmt.Fprintf(w, "%s\n", htmlEscape(line)) + } + } +} + +func (h *handler) renderPackTree(w io.Writer, content []byte, repo, key string, idx *PackIndex) { + // Build hash lookup. + hashSet := map[string]bool{} + for _, obj := range idx.Objects { + hashSet[obj.Hash] = true + } + + data := content + for len(data) > 0 { + e, size := gitfs.ParseDirEntry(data) + if size == 0 { + break + } + data = data[size:] + + hashStr := e.Hash.String() + typeStr := "blob" + if e.Mode == 0o40000 { + typeStr = "tree" + } else if e.Mode == 0o160000 { + typeStr = "commit" + } + + name := htmlEscape(string(e.Name)) + if hashSet[hashStr] { + href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), hashStr, key) + fmt.Fprintf(w, "%06o %s %s\t%s\n", e.Mode, typeStr, href, hashStr, name) + } else { + fmt.Fprintf(w, "%06o %s %s\t%s\n", e.Mode, typeStr, hashStr, name) + } + } +} + +func (h *handler) renderPackTag(w io.Writer, content []byte, repo, key string) { + scanner := bufio.NewScanner(bytes.NewReader(content)) + for scanner.Scan() { + line := scanner.Text() + hdr, val, ok := strings.Cut(line, " ") + if !ok { + fmt.Fprintf(w, "%s\n", htmlEscape(line)) + continue + } + switch hdr { + case "object": + href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), val, key) + fmt.Fprintf(w, "%s %s\n", hdr, href, val) + default: + fmt.Fprintf(w, "%s\n", htmlEscape(line)) + } + } +} + +func (h *handler) renderDeltaOps(w io.Writer, info *DeltaInfo, repo, key string, idx *PackIndex) { + // Build hash lookup for linking. + hashSet := map[string]bool{} + for _, obj := range idx.Objects { + hashSet[obj.Hash] = true + } + + fmt.Fprintf(w, "--- delta instructions ---\n\n") + fmt.Fprintf(w, "base size: %s (%d bytes)\n", formatBytes(int64(info.BaseSize)), info.BaseSize) + fmt.Fprintf(w, "target size: %s (%d bytes)\n", formatBytes(int64(info.TargetSize)), info.TargetSize) + fmt.Fprintf(w, "operations: %d\n\n", len(info.Ops)) + + for i, op := range info.Ops { + switch op.Kind { + case "copy": + fmt.Fprintf(w, "%4d copy base[%d:%d] (%d bytes)\n", + i, op.Offset, op.Offset+op.Size, op.Size) + case "insert": + if isBinary(op.Data) { + if entries, prefix, suffix := tryParseTreeInsert(op.Data); len(entries) > 0 || len(prefix) > 0 { + fmt.Fprintf(w, "%4d insert %d bytes (tree data)\n", i, op.Size) + if len(prefix) > 0 { + writeHashFragment(w, prefix, hashSet, repo, key) + } + for _, e := range entries { + hashStr := e.Hash.String() + typeStr := "blob" + if e.Mode == 0o40000 { + typeStr = "tree" + } else if e.Mode == 0o160000 { + typeStr = "commit" + } + name := htmlEscape(string(e.Name)) + if hashSet[hashStr] { + href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), hashStr, key) + fmt.Fprintf(w, " %06o %s %s\t%s\n", e.Mode, typeStr, href, hashStr, name) + } else { + fmt.Fprintf(w, " %06o %s %s\t%s\n", e.Mode, typeStr, hashStr, name) + } + } + if len(suffix) > 0 { + writeHashFragment(w, suffix, hashSet, repo, key) + } + } else { + fmt.Fprintf(w, "%4d insert %d bytes\n", i, op.Size) + writeHexDump(w, op.Data) + } + } else { + fmt.Fprintf(w, "%4d insert %d bytes: ", i, op.Size) + show := op.Data + truncated := false + if len(show) > 128 { + show = show[:128] + truncated = true + } + esc := &dumbEscaper{buf: bufio.NewWriter(w)} + esc.Write(show) + if truncated { + fmt.Fprintf(w, "...") + } + fmt.Fprintf(w, "\n") + } + } + } +} + +// tryParseTreeInsert tries to interpret binary insert data as tree entry fragments. +// It returns any complete tree entries parsed, plus any leading prefix (trailing +// hash bytes from a previous entry) and trailing suffix that didn't form a complete entry. +func tryParseTreeInsert(data []byte) (entries []gitfs.DirEntry, prefix, suffix []byte) { + // The insert might start mid-entry — the leading bytes could be the tail + // of a previous entry's 20-byte hash. Look for the start of a tree entry: + // an octal digit followed eventually by ' ', name, '\0', 20 bytes. + start := 0 + for start < len(data) { + if data[start] >= '1' && data[start] <= '7' { + // Might be the start of a mode. Try parsing. + e, size := gitfs.ParseDirEntry(data[start:]) + if size > 0 { + // Found a valid entry start. Everything before it is prefix. + if start > 0 { + prefix = data[:start] + } + entries = append(entries, e) + pos := start + size + // Parse remaining entries. + for pos < len(data) { + e, size := gitfs.ParseDirEntry(data[pos:]) + if size == 0 { + break + } + entries = append(entries, e) + pos += size + } + if pos < len(data) { + suffix = data[pos:] + } + return entries, prefix, suffix + } + } + start++ + } + // Couldn't parse any entries. Might be a pure hash fragment. + if len(data) <= 20 { + return nil, data, nil + } + return nil, nil, nil +} + +func writeHashFragment(w io.Writer, data []byte, hashSet map[string]bool, repo, key string) { + if len(data) == 20 { + hashStr := fmt.Sprintf("%x", data) + if hashSet[hashStr] { + href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), hashStr, key) + fmt.Fprintf(w, " hash %s\n", href, hashStr) + } else { + fmt.Fprintf(w, " hash %s\n", hashStr) + } + } else { + fmt.Fprintf(w, " (%d bytes) %x\n", len(data), data) + } +} + +func isBinary(data []byte) bool { + for _, b := range data { + if b == 0 || b >= 0x7f { + return true + } + if b < 0x20 && b != '\n' && b != '\r' && b != '\t' { + return true + } + } + return false +} + +func writeHexDump(w io.Writer, data []byte) { + for i := 0; i < len(data); i += 16 { + end := i + 16 + if end > len(data) { + end = len(data) + } + line := data[i:end] + + // Offset. + fmt.Fprintf(w, " %04x ", i) + + // Hex bytes. + for j, b := range line { + if j == 8 { + fmt.Fprintf(w, " ") + } + fmt.Fprintf(w, "%02x ", b) + } + // Pad if short line. + for j := len(line); j < 16; j++ { + if j == 8 { + fmt.Fprintf(w, " ") + } + fmt.Fprintf(w, " ") + } + + // ASCII. + fmt.Fprintf(w, " |") + for _, b := range line { + if b >= 0x20 && b < 0x7f { + fmt.Fprintf(w, "%c", b) + } else { + fmt.Fprintf(w, ".") + } + } + fmt.Fprintf(w, "|\n") + } +} + +func formatBytes(b int64) string { + switch { + case b >= 1<<30: + return fmt.Sprintf("%.1f GiB", float64(b)/(1<<30)) + case b >= 1<<20: + return fmt.Sprintf("%.1f MiB", float64(b)/(1<<20)) + case b >= 1<<10: + return fmt.Sprintf("%.1f KiB", float64(b)/(1<<10)) + default: + return fmt.Sprintf("%d B", b) + } +} diff --git a/internal/git/packfile.go b/internal/git/packfile.go new file mode 100644 index 00000000..38501aa5 --- /dev/null +++ b/internal/git/packfile.go @@ -0,0 +1,442 @@ +package git + +import ( + "bytes" + "compress/zlib" + "crypto/sha1" + "encoding/binary" + "encoding/hex" + "fmt" + "io" + + "github.com/jonjohnsonjr/dagdotdev/internal/forks/rsc.io/gitfs" +) + +type PackIndex struct { + Version uint32 `json:"version"` + NumObjects uint32 `json:"numObjects"` + Size int64 `json:"size"` + Checksum string `json:"checksum"` + Objects []PackObject `json:"objects"` +} + +type PackObject struct { + Offset int `json:"offset"` + EncodedSize int `json:"encodedSize"` + Type string `json:"type"` // raw type: commit, tree, blob, tag, ofs-delta, ref-delta + ResolvedType string `json:"resolvedType"` // resolved type after delta resolution + Size int `json:"size"` + Hash string `json:"hash"` + + // Delta info (only for ofs-delta and ref-delta objects) + DeltaBase string `json:"deltaBase,omitempty"` // raw base ref (hash for ref-delta, offset string for ofs-delta) + BaseHash string `json:"baseHash,omitempty"` // resolved base object hash + BaseOffset int `json:"baseOffset,omitempty"` + Depth int `json:"depth,omitempty"` // delta chain depth (0 for non-delta) +} + +// BuildPackIndex parses a raw packfile and builds an index of all objects. +func BuildPackIndex(data []byte) (*PackIndex, error) { + if len(data) < 12+20 { + return nil, fmt.Errorf("packfile too short") + } + + hdr := data[:12] + vers := binary.BigEndian.Uint32(hdr[4:8]) + nobj := binary.BigEndian.Uint32(hdr[8:12]) + if string(hdr[:4]) != "PACK" || (vers != 2 && vers != 3) { + return nil, fmt.Errorf("not a packfile") + } + if vers == 3 { + return nil, fmt.Errorf("packfile v3 not supported") + } + + sum := sha1.Sum(data[:len(data)-20]) + if !bytes.Equal(sum[:], data[len(data)-20:]) { + return nil, fmt.Errorf("packfile checksum mismatch") + } + + idx := &PackIndex{ + Version: vers, + NumObjects: nobj, + Size: int64(len(data)), + Checksum: hex.EncodeToString(data[len(data)-20:]), + Objects: make([]PackObject, 0, nobj), + } + + // We need a store to resolve delta chains and compute hashes. + var s gitfs.Store + objs := data[12 : len(data)-20] + off := 0 + + for i := 0; i < int(nobj); i++ { + obj, encSize, err := indexObject(&s, objs, off) + if err != nil { + return nil, fmt.Errorf("object %d at offset %d: %v", i, off+12, err) + } + obj.Offset = off + 12 // offset from start of packfile + obj.EncodedSize = encSize + idx.Objects = append(idx.Objects, obj) + off += encSize + } + + // Compute delta chain depths. + byHash := map[string]int{} // hash -> index into Objects + for i, obj := range idx.Objects { + byHash[obj.Hash] = i + } + for i := range idx.Objects { + if idx.Objects[i].BaseHash == "" { + continue + } + depth := 1 + baseHash := idx.Objects[i].BaseHash + for { + bi, ok := byHash[baseHash] + if !ok || idx.Objects[bi].BaseHash == "" { + break + } + depth++ + baseHash = idx.Objects[bi].BaseHash + } + idx.Objects[i].Depth = depth + } + + return idx, nil +} + +// indexObject parses the object at objs[off:] and returns structural info. +func indexObject(s *gitfs.Store, objs []byte, off int) (PackObject, int, error) { + if off < 0 || off >= len(objs) { + return PackObject{}, 0, fmt.Errorf("invalid offset") + } + + u, size := binary.Uvarint(objs[off:]) + if size <= 0 { + return PackObject{}, 0, fmt.Errorf("bad varint") + } + typ := gitfs.ObjType((u >> 4) & 7) + n := int(u&15 | u>>7<<4) + + obj := PackObject{} + + switch typ { + case gitfs.ObjRefDelta: + if len(objs)-(off+size) < 20 { + return PackObject{}, 0, fmt.Errorf("bad ref-delta") + } + var h gitfs.Hash + copy(h[:], objs[off+size:]) + size += 20 + obj.Type = "ref-delta" + obj.DeltaBase = h.String() + + case gitfs.ObjOfsDelta: + i := off + size + if len(objs)-i < 20 { + return PackObject{}, 0, fmt.Errorf("bad ofs-delta") + } + d := int64(objs[i] & 0x7f) + for objs[i]&0x80 != 0 { + i++ + d = d<<7 | int64(objs[i]&0x7f) + d += 1 << 7 + } + i++ + size = i - off + obj.Type = "ofs-delta" + obj.BaseOffset = off - int(d) + 12 // offset from start of packfile + obj.DeltaBase = fmt.Sprintf("-%d", int(d)) + + case gitfs.ObjCommit: + obj.Type = "commit" + case gitfs.ObjTree: + obj.Type = "tree" + case gitfs.ObjBlob: + obj.Type = "blob" + case gitfs.ObjTag: + obj.Type = "tag" + default: + return PackObject{}, 0, fmt.Errorf("unknown type %d", typ) + } + + // Decompress to get the actual size and compute hash. + br := bytes.NewReader(objs[off+size:]) + zr, err := zlib.NewReader(br) + if err != nil { + return PackObject{}, 0, fmt.Errorf("zlib: %v", err) + } + content, err := io.ReadAll(zr) + if err != nil { + return PackObject{}, 0, fmt.Errorf("zlib read: %v", err) + } + if len(content) != n { + return PackObject{}, 0, fmt.Errorf("size mismatch: %d != %d", len(content), n) + } + encSize := len(objs[off:]) - br.Len() + + // For non-delta objects, the hash is straightforward. + // For delta objects, we need to resolve the chain via the store. + switch typ { + case gitfs.ObjCommit, gitfs.ObjTree, gitfs.ObjBlob, gitfs.ObjTag: + h, _ := s.Add(typ, content) + obj.Hash = h.String() + obj.Size = len(content) + obj.ResolvedType = obj.Type + + case gitfs.ObjRefDelta: + baseTyp, baseData := s.Object(gitfs.Hash(mustParseHash(obj.DeltaBase))) + if baseTyp == gitfs.ObjNone { + return PackObject{}, 0, fmt.Errorf("unknown ref-delta base %s", obj.DeltaBase) + } + resolved, err := applyPackDelta(baseData, content) + if err != nil { + return PackObject{}, 0, fmt.Errorf("apply ref-delta: %v", err) + } + h, _ := s.Add(baseTyp, resolved) + obj.Hash = h.String() + obj.Size = len(resolved) + obj.ResolvedType = baseTyp.String() + obj.BaseHash = obj.DeltaBase + + case gitfs.ObjOfsDelta: + baseOff := off - mustParseOfsOffset(obj.DeltaBase) + baseTyp, baseHash, baseContent, _, err := gitfs.UnpackObject(s, objs, baseOff) + if err != nil { + return PackObject{}, 0, fmt.Errorf("resolve ofs-delta base: %v", err) + } + resolved, err := applyPackDelta(baseContent, content) + if err != nil { + return PackObject{}, 0, fmt.Errorf("apply ofs-delta: %v", err) + } + h, _ := s.Add(baseTyp, resolved) + obj.Hash = h.String() + obj.Size = len(resolved) + obj.ResolvedType = baseTyp.String() + obj.BaseHash = baseHash.String() + } + + return obj, encSize, nil +} + +func mustParseHash(s string) [20]byte { + b, _ := hex.DecodeString(s) + var h [20]byte + copy(h[:], b) + return h +} + +func mustParseOfsOffset(s string) int { + // s is like "-1234" + var n int + fmt.Sscanf(s, "-%d", &n) + return n +} + +// resolveBaseType follows the delta chain to find the base object type. +func resolveBaseType(s *gitfs.Store, objs []byte, off int) (gitfs.ObjType, error) { + u, size := binary.Uvarint(objs[off:]) + if size <= 0 { + return 0, fmt.Errorf("bad varint") + } + typ := gitfs.ObjType((u >> 4) & 7) + switch typ { + case gitfs.ObjCommit, gitfs.ObjTree, gitfs.ObjBlob, gitfs.ObjTag: + return typ, nil + case gitfs.ObjOfsDelta: + i := off + size + d := int64(objs[i] & 0x7f) + for objs[i]&0x80 != 0 { + i++ + d = d<<7 | int64(objs[i]&0x7f) + d += 1 << 7 + } + return resolveBaseType(s, objs, off-int(d)) + case gitfs.ObjRefDelta: + var h gitfs.Hash + copy(h[:], objs[off+size:]) + baseTyp, _ := s.Object(h) + return baseTyp, nil + } + return 0, fmt.Errorf("unknown type %d", typ) +} + +// applyPackDelta applies a delta to a base to produce the target. +func applyPackDelta(base, delta []byte) ([]byte, error) { + // Delta starts with base size and target size as varints. + baseSize, s := binary.Uvarint(delta) + delta = delta[s:] + if baseSize != uint64(len(base)) { + return nil, fmt.Errorf("base size mismatch: %d != %d", baseSize, len(base)) + } + targSize, s := binary.Uvarint(delta) + delta = delta[s:] + + targ := make([]byte, targSize) + dst := targ + for len(delta) > 0 { + cmd := delta[0] + delta = delta[1:] + switch { + case cmd == 0: + return nil, fmt.Errorf("invalid delta cmd") + case cmd&0x80 != 0: + var off, size int64 + for i := uint(0); i < 4; i++ { + if cmd&(1< 0 { + cmd := delta[0] + delta = delta[1:] + + switch { + case cmd == 0: + return nil, fmt.Errorf("invalid delta cmd 0") + + case cmd&0x80 != 0: + // Copy from base. + var off, size int64 + for i := uint(0); i < 4; i++ { + if cmd&(1<= len(data)-20 { + return nil, fmt.Errorf("invalid offset %d", offset) + } + objs := data[12 : len(data)-20] + off := offset - 12 + + u, size := binary.Uvarint(objs[off:]) + if size <= 0 { + return nil, fmt.Errorf("bad varint") + } + typ := gitfs.ObjType((u >> 4) & 7) + + switch typ { + case gitfs.ObjRefDelta: + size += 20 + case gitfs.ObjOfsDelta: + i := off + size + for objs[i]&0x80 != 0 { + i++ + } + i++ + size = i - off + default: + return nil, fmt.Errorf("not a delta object (type %s)", typ) + } + + br := bytes.NewReader(objs[off+size:]) + zr, err := zlib.NewReader(br) + if err != nil { + return nil, fmt.Errorf("zlib: %v", err) + } + return io.ReadAll(zr) +} + +// DecompressObject decompresses a single object from packfile data, identified by hash. +// It does a full unpack to populate the store (required for ref-delta resolution), +// then looks up the object by hash. +func DecompressObject(data []byte, hash string) (objType string, content []byte, err error) { + if len(data) < 12+20 { + return "", nil, fmt.Errorf("packfile too short") + } + + var s gitfs.Store + if err := gitfs.Unpack(&s, data); err != nil { + return "", nil, err + } + + h, err := gitfs.ParseHash(hash) + if err != nil { + return "", nil, fmt.Errorf("invalid hash %q: %v", hash, err) + } + + typ, objData := s.Object(h) + if typ == gitfs.ObjNone { + return "", nil, fmt.Errorf("object %s not found", hash) + } + + return typ.String(), objData, nil +} diff --git a/internal/git/templates.go b/internal/git/templates.go index a221125b..120c83b6 100644 --- a/internal/git/templates.go +++ b/internal/git/templates.go @@ -55,6 +55,19 @@ body {
  • github.com/wolfi-dev/os
  • +

    Pack Explorer

    +

    Explore the raw packfile format of a git repo:

    +
    + + +
    +

    +

    Examples

    + +

    FAQ

    How does this work?