Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions cmd/zoekt-sourcegraph-indexserver/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,6 @@ type indexArgs struct {
// Parallelism is the number of shards to compute in parallel.
Parallelism int

// FileLimit is the maximum size of a file
FileLimit int

// UseDelta is true if we want to use the new delta indexer. This should
// only be true for repositories we explicitly enable.
UseDelta bool
Expand Down Expand Up @@ -126,7 +123,7 @@ func (o *indexArgs) BuildOptions() *index.Options {
},
IndexDir: o.IndexDir,
Parallelism: o.Parallelism,
SizeMax: o.FileLimit,
SizeMax: MaxFileSize,
LargeFiles: o.LargeFiles,
CTagsMustSucceed: o.Symbols,
DisableCTags: !o.Symbols,
Expand Down Expand Up @@ -232,6 +229,17 @@ func fetchRepo(ctx context.Context, gitDir string, o *indexArgs, c gitIndexConfi
return err
}

for _, header := range []string{
"X-Sourcegraph-Actor-UID: internal",
"X-Sourcegraph-Tenant-ID: " + strconv.Itoa(o.TenantID),
} {
cmd = exec.CommandContext(ctx, "git", "-C", gitDir, "config", "--add", "http.extraHeader", header)
cmd.Stdin = &bytes.Buffer{}
if err := c.runCmd(cmd); err != nil {
return err
}
}

var fetchDuration time.Duration
successfullyFetchedCommitsCount := 0
allFetchesSucceeded := true
Expand All @@ -249,14 +257,13 @@ func fetchRepo(ctx context.Context, gitDir string, o *indexArgs, c gitIndexConfi
fetchArgs := []string{
"-C", gitDir,
"-c", "protocol.version=2",
"-c", "http.extraHeader=X-Sourcegraph-Actor-UID: internal",
"-c", "http.extraHeader=X-Sourcegraph-Tenant-ID: " + strconv.Itoa(o.TenantID),
"fetch", "--depth=1", "--no-tags",
}

// If there are no exceptions to MaxFileSize (1MB), we can avoid fetching these large files.
// Git's blob:limit filter excludes blobs whose size is >= the given limit,
// while zoekt indexes files up to and including FileLimit bytes.
if len(o.LargeFiles) == 0 {
fetchArgs = append(fetchArgs, "--filter=blob:limit=1m")
fetchArgs = append(fetchArgs, fmt.Sprintf("--filter=blob:limit=%d", int64(MaxFileSize)+1))
}

fetchArgs = append(fetchArgs, o.CloneURL)
Expand Down
26 changes: 16 additions & 10 deletions cmd/zoekt-sourcegraph-indexserver/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,9 @@ func TestIndex(t *testing.T) {
},
want: []string{
"git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git",
"git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 42 fetch --depth=1 --no-tags --filter=blob:limit=1m http://api.test/.internal/git/test/repo deadbeef",
"git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Actor-UID: internal",
"git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Tenant-ID: 42",
"git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 --no-tags --filter=blob:limit=1048577 http://api.test/.internal/git/test/repo deadbeef",
"git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef",
"git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.fork 0",
Expand All @@ -497,7 +499,7 @@ func TestIndex(t *testing.T) {
"git -C $TMPDIR/test%2Frepo.git config zoekt.public 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 42",
"zoekt-git-index -submodules=false -branches HEAD -disable_ctags $TMPDIR/test%2Frepo.git",
"zoekt-git-index -submodules=false -branches HEAD -file_limit 1048576 -disable_ctags $TMPDIR/test%2Frepo.git",
},
}, {
name: "minimal-id",
Expand All @@ -512,7 +514,9 @@ func TestIndex(t *testing.T) {
},
want: []string{
"git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git",
"git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 1 fetch --depth=1 --no-tags --filter=blob:limit=1m http://api.test/.internal/git/test/repo deadbeef",
"git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Actor-UID: internal",
"git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Tenant-ID: 1",
"git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 --no-tags --filter=blob:limit=1048577 http://api.test/.internal/git/test/repo deadbeef",
"git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef",
"git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.fork 0",
Expand All @@ -522,15 +526,14 @@ func TestIndex(t *testing.T) {
"git -C $TMPDIR/test%2Frepo.git config zoekt.public 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 123",
"git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 1",
"zoekt-git-index -submodules=false -branches HEAD -disable_ctags $TMPDIR/test%2Frepo.git",
"zoekt-git-index -submodules=false -branches HEAD -file_limit 1048576 -disable_ctags $TMPDIR/test%2Frepo.git",
},
}, {
name: "all",
args: indexArgs{
Incremental: true,
IndexDir: "/data/index",
Parallelism: 4,
FileLimit: 123,
IndexOptions: IndexOptions{
Name: "test/repo",
CloneURL: "http://api.test/.internal/git/test/repo",
Expand All @@ -545,7 +548,9 @@ func TestIndex(t *testing.T) {
},
want: []string{
"git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git",
"git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 1 fetch --depth=1 --no-tags http://api.test/.internal/git/test/repo deadbeef feebdaed",
"git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Actor-UID: internal",
"git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Tenant-ID: 1",
"git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 --no-tags http://api.test/.internal/git/test/repo deadbeef feebdaed",
"git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef",
"git -C $TMPDIR/test%2Frepo.git update-ref refs/heads/dev feebdaed",
"git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0",
Expand All @@ -557,7 +562,7 @@ func TestIndex(t *testing.T) {
"git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 1",
"zoekt-git-index -submodules=false -incremental -branches HEAD,dev " +
"-file_limit 123 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " +
"-file_limit 1048576 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " +
"$TMPDIR/test%2Frepo.git",
},
}, {
Expand All @@ -566,7 +571,6 @@ func TestIndex(t *testing.T) {
Incremental: true,
IndexDir: "/data/index",
Parallelism: 4,
FileLimit: 123,
UseDelta: true,
IndexOptions: IndexOptions{
RepoID: 0,
Expand Down Expand Up @@ -594,7 +598,9 @@ func TestIndex(t *testing.T) {
},
want: []string{
"git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git",
"git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 1 fetch --depth=1 --no-tags http://api.test/.internal/git/test/repo deadbeef feebdaed 12345678 oldhead olddev oldrelease",
"git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Actor-UID: internal",
"git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Tenant-ID: 1",
"git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 --no-tags http://api.test/.internal/git/test/repo deadbeef feebdaed 12345678 oldhead olddev oldrelease",
"git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef",
"git -C $TMPDIR/test%2Frepo.git update-ref refs/heads/dev feebdaed",
"git -C $TMPDIR/test%2Frepo.git update-ref refs/heads/release 12345678",
Expand All @@ -607,7 +613,7 @@ func TestIndex(t *testing.T) {
"git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 1",
"zoekt-git-index -submodules=false -incremental -branches HEAD,dev,release " +
"-delta -delta_threshold 22 -file_limit 123 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " +
"-delta -delta_threshold 22 -file_limit 1048576 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " +
"$TMPDIR/test%2Frepo.git",
},
}}
Expand Down
4 changes: 1 addition & 3 deletions cmd/zoekt-sourcegraph-indexserver/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,7 @@ var (
})
)

// 1 MB; match https://sourcegraph.sgdev.org/github.com/sourcegraph/sourcegraph/-/blob/cmd/symbols/internal/symbols/search.go#L22
// NOTE: if you change this, you must also update gitIndex to use the same value when fetching the repo.
// 1 MB; match https://sourcegraph.sourcegraph.com/r/github.com/sourcegraph/sourcegraph/-/blob/cmd/searcher/internal/search/store.go?L32
const MaxFileSize = 1 << 20

// set of repositories that we want to capture separate indexing metrics for
Expand Down Expand Up @@ -732,7 +731,6 @@ func (s *Server) indexArgs(opts IndexOptions) *indexArgs {
IndexDir: s.IndexDir,
Parallelism: parallelism,
Incremental: true,
FileLimit: MaxFileSize,
ShardMerging: s.shardMerging,
}
}
Expand Down
2 changes: 0 additions & 2 deletions cmd/zoekt-sourcegraph-indexserver/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ func TestServer_defaultArgs(t *testing.T) {
IndexDir: "/testdata/index",
Parallelism: 6,
Incremental: true,
FileLimit: 1 << 20,
}
got := s.indexArgs(IndexOptions{Name: "testName"})
if !cmp.Equal(got, want) {
Expand Down Expand Up @@ -218,7 +217,6 @@ func TestCreateEmptyShard(t *testing.T) {
Incremental: true,
IndexDir: dir,
Parallelism: 1,
FileLimit: 1,
}

if err := createEmptyShard(args); err != nil {
Expand Down
Loading