diff --git a/cmd/zoekt-sourcegraph-indexserver/index.go b/cmd/zoekt-sourcegraph-indexserver/index.go index 9d082a0ef..36f55331b 100644 --- a/cmd/zoekt-sourcegraph-indexserver/index.go +++ b/cmd/zoekt-sourcegraph-indexserver/index.go @@ -85,9 +85,6 @@ type indexArgs struct { // Parallelism is the number of shards to compute in parallel. Parallelism int - // FileLimit is the maximum size of a file - FileLimit int - // UseDelta is true if we want to use the new delta indexer. This should // only be true for repositories we explicitly enable. UseDelta bool @@ -126,7 +123,7 @@ func (o *indexArgs) BuildOptions() *index.Options { }, IndexDir: o.IndexDir, Parallelism: o.Parallelism, - SizeMax: o.FileLimit, + SizeMax: MaxFileSize, LargeFiles: o.LargeFiles, CTagsMustSucceed: o.Symbols, DisableCTags: !o.Symbols, @@ -232,6 +229,17 @@ func fetchRepo(ctx context.Context, gitDir string, o *indexArgs, c gitIndexConfi return err } + for _, header := range []string{ + "X-Sourcegraph-Actor-UID: internal", + "X-Sourcegraph-Tenant-ID: " + strconv.Itoa(o.TenantID), + } { + cmd = exec.CommandContext(ctx, "git", "-C", gitDir, "config", "--add", "http.extraHeader", header) + cmd.Stdin = &bytes.Buffer{} + if err := c.runCmd(cmd); err != nil { + return err + } + } + var fetchDuration time.Duration successfullyFetchedCommitsCount := 0 allFetchesSucceeded := true @@ -249,14 +257,13 @@ func fetchRepo(ctx context.Context, gitDir string, o *indexArgs, c gitIndexConfi fetchArgs := []string{ "-C", gitDir, "-c", "protocol.version=2", - "-c", "http.extraHeader=X-Sourcegraph-Actor-UID: internal", - "-c", "http.extraHeader=X-Sourcegraph-Tenant-ID: " + strconv.Itoa(o.TenantID), "fetch", "--depth=1", "--no-tags", } - // If there are no exceptions to MaxFileSize (1MB), we can avoid fetching these large files. + // Git's blob:limit filter excludes blobs whose size is >= the given limit, + // while zoekt indexes files up to and including FileLimit bytes. if len(o.LargeFiles) == 0 { - fetchArgs = append(fetchArgs, "--filter=blob:limit=1m") + fetchArgs = append(fetchArgs, fmt.Sprintf("--filter=blob:limit=%d", int64(MaxFileSize)+1)) } fetchArgs = append(fetchArgs, o.CloneURL) diff --git a/cmd/zoekt-sourcegraph-indexserver/index_test.go b/cmd/zoekt-sourcegraph-indexserver/index_test.go index 6b886b6d2..45b4fc355 100644 --- a/cmd/zoekt-sourcegraph-indexserver/index_test.go +++ b/cmd/zoekt-sourcegraph-indexserver/index_test.go @@ -487,7 +487,9 @@ func TestIndex(t *testing.T) { }, want: []string{ "git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git", - "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 42 fetch --depth=1 --no-tags --filter=blob:limit=1m http://api.test/.internal/git/test/repo deadbeef", + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Actor-UID: internal", + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Tenant-ID: 42", + "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 --no-tags --filter=blob:limit=1048577 http://api.test/.internal/git/test/repo deadbeef", "git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef", "git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0", "git -C $TMPDIR/test%2Frepo.git config zoekt.fork 0", @@ -497,7 +499,7 @@ func TestIndex(t *testing.T) { "git -C $TMPDIR/test%2Frepo.git config zoekt.public 0", "git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 0", "git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 42", - "zoekt-git-index -submodules=false -branches HEAD -disable_ctags $TMPDIR/test%2Frepo.git", + "zoekt-git-index -submodules=false -branches HEAD -file_limit 1048576 -disable_ctags $TMPDIR/test%2Frepo.git", }, }, { name: "minimal-id", @@ -512,7 +514,9 @@ func TestIndex(t *testing.T) { }, want: []string{ "git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git", - "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 1 fetch --depth=1 --no-tags --filter=blob:limit=1m http://api.test/.internal/git/test/repo deadbeef", + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Actor-UID: internal", + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Tenant-ID: 1", + "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 --no-tags --filter=blob:limit=1048577 http://api.test/.internal/git/test/repo deadbeef", "git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef", "git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0", "git -C $TMPDIR/test%2Frepo.git config zoekt.fork 0", @@ -522,7 +526,7 @@ func TestIndex(t *testing.T) { "git -C $TMPDIR/test%2Frepo.git config zoekt.public 0", "git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 123", "git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 1", - "zoekt-git-index -submodules=false -branches HEAD -disable_ctags $TMPDIR/test%2Frepo.git", + "zoekt-git-index -submodules=false -branches HEAD -file_limit 1048576 -disable_ctags $TMPDIR/test%2Frepo.git", }, }, { name: "all", @@ -530,7 +534,6 @@ func TestIndex(t *testing.T) { Incremental: true, IndexDir: "/data/index", Parallelism: 4, - FileLimit: 123, IndexOptions: IndexOptions{ Name: "test/repo", CloneURL: "http://api.test/.internal/git/test/repo", @@ -545,7 +548,9 @@ func TestIndex(t *testing.T) { }, want: []string{ "git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git", - "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 1 fetch --depth=1 --no-tags http://api.test/.internal/git/test/repo deadbeef feebdaed", + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Actor-UID: internal", + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Tenant-ID: 1", + "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 --no-tags http://api.test/.internal/git/test/repo deadbeef feebdaed", "git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef", "git -C $TMPDIR/test%2Frepo.git update-ref refs/heads/dev feebdaed", "git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0", @@ -557,7 +562,7 @@ func TestIndex(t *testing.T) { "git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 0", "git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 1", "zoekt-git-index -submodules=false -incremental -branches HEAD,dev " + - "-file_limit 123 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " + + "-file_limit 1048576 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " + "$TMPDIR/test%2Frepo.git", }, }, { @@ -566,7 +571,6 @@ func TestIndex(t *testing.T) { Incremental: true, IndexDir: "/data/index", Parallelism: 4, - FileLimit: 123, UseDelta: true, IndexOptions: IndexOptions{ RepoID: 0, @@ -594,7 +598,9 @@ func TestIndex(t *testing.T) { }, want: []string{ "git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git", - "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 1 fetch --depth=1 --no-tags http://api.test/.internal/git/test/repo deadbeef feebdaed 12345678 oldhead olddev oldrelease", + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Actor-UID: internal", + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Tenant-ID: 1", + "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 --no-tags http://api.test/.internal/git/test/repo deadbeef feebdaed 12345678 oldhead olddev oldrelease", "git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef", "git -C $TMPDIR/test%2Frepo.git update-ref refs/heads/dev feebdaed", "git -C $TMPDIR/test%2Frepo.git update-ref refs/heads/release 12345678", @@ -607,7 +613,7 @@ func TestIndex(t *testing.T) { "git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 0", "git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 1", "zoekt-git-index -submodules=false -incremental -branches HEAD,dev,release " + - "-delta -delta_threshold 22 -file_limit 123 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " + + "-delta -delta_threshold 22 -file_limit 1048576 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " + "$TMPDIR/test%2Frepo.git", }, }} diff --git a/cmd/zoekt-sourcegraph-indexserver/main.go b/cmd/zoekt-sourcegraph-indexserver/main.go index 5dfd1b009..c218ba0a7 100644 --- a/cmd/zoekt-sourcegraph-indexserver/main.go +++ b/cmd/zoekt-sourcegraph-indexserver/main.go @@ -169,8 +169,7 @@ var ( }) ) -// 1 MB; match https://sourcegraph.sgdev.org/github.com/sourcegraph/sourcegraph/-/blob/cmd/symbols/internal/symbols/search.go#L22 -// NOTE: if you change this, you must also update gitIndex to use the same value when fetching the repo. +// 1 MB; match https://sourcegraph.sourcegraph.com/r/github.com/sourcegraph/sourcegraph/-/blob/cmd/searcher/internal/search/store.go?L32 const MaxFileSize = 1 << 20 // set of repositories that we want to capture separate indexing metrics for @@ -732,7 +731,6 @@ func (s *Server) indexArgs(opts IndexOptions) *indexArgs { IndexDir: s.IndexDir, Parallelism: parallelism, Incremental: true, - FileLimit: MaxFileSize, ShardMerging: s.shardMerging, } } diff --git a/cmd/zoekt-sourcegraph-indexserver/main_test.go b/cmd/zoekt-sourcegraph-indexserver/main_test.go index 759dc4a07..2fab54c68 100644 --- a/cmd/zoekt-sourcegraph-indexserver/main_test.go +++ b/cmd/zoekt-sourcegraph-indexserver/main_test.go @@ -44,7 +44,6 @@ func TestServer_defaultArgs(t *testing.T) { IndexDir: "/testdata/index", Parallelism: 6, Incremental: true, - FileLimit: 1 << 20, } got := s.indexArgs(IndexOptions{Name: "testName"}) if !cmp.Equal(got, want) { @@ -218,7 +217,6 @@ func TestCreateEmptyShard(t *testing.T) { Incremental: true, IndexDir: dir, Parallelism: 1, - FileLimit: 1, } if err := createEmptyShard(args); err != nil {