From 575a7c8f2b7d24cab3f409df2c39c8f7b9e8c296 Mon Sep 17 00:00:00 2001 From: Cory Bullinger Date: Sat, 18 Apr 2026 09:34:12 -0400 Subject: [PATCH 01/20] feat(operator): add audit wiring and token-protected operator UI Wire MongoDB copy audit events after target uploads; add /operator dashboard with audit feed, deployment metadata, and optional tag API. Made-with: Cursor --- app.go | 7 + configs/environment.go | 15 ++ services/audit_logger.go | 30 +-- services/github_write_to_target.go | 85 +++++- services/github_write_to_target_test.go | 18 +- services/operator_ui.go | 329 ++++++++++++++++++++++++ services/web/operator/index.html | 163 ++++++++++++ services/webhook_handler_new.go | 2 +- services/workflow_processor.go | 9 +- types/types.go | 26 +- 10 files changed, 649 insertions(+), 35 deletions(-) create mode 100644 services/operator_ui.go create mode 100644 services/web/operator/index.html diff --git a/app.go b/app.go index 00c50b4..22a70ab 100644 --- a/app.go +++ b/app.go @@ -174,6 +174,10 @@ func startWebServer(config *configs.Config, container *services.ServiceContainer // Config diagnostic endpoint — shows resolved config with secrets redacted mux.HandleFunc("/config", services.ConfigDiagnosticHandler(container, version)) + if config.OperatorUIToken != "" { + services.RegisterOperatorRoutes(mux, config, container, version) + } + // Info endpoint mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { if r.URL.Path != "/" { @@ -189,6 +193,9 @@ func startWebServer(config *configs.Config, container *services.ServiceContainer if config.MetricsEnabled { _, _ = fmt.Fprintf(w, "Metrics: /metrics\n") } + if config.OperatorUIToken != "" { + _, _ = fmt.Fprintf(w, "Operator UI: /operator/\n") + } }) // Create server diff --git a/configs/environment.go b/configs/environment.go index f9f5b01..e76dc07 100644 --- a/configs/environment.go +++ b/configs/environment.go @@ -69,6 +69,12 @@ type Config struct { // Webhook retry configuration WebhookMaxRetries int // max retry attempts for failed webhook processing WebhookRetryInitialDelay int // initial delay between retries in seconds (doubles each attempt) + + // Operator web UI (optional) — protected by OPERATOR_UI_TOKEN when set + OperatorUIToken string + OperatorRepoSlug string // "owner/repo" for GitHub links and optional tag API + OperatorReleaseGitHubToken string // PAT with contents:write to create a version tag (optional) + OperatorReleaseTargetBranch string // branch SHA used when creating a tag (default main) } const ( @@ -117,6 +123,10 @@ const ( WebhookProcessingTimeoutSeconds = "WEBHOOK_PROCESSING_TIMEOUT_SECONDS" WebhookMaxRetries = "WEBHOOK_MAX_RETRIES" WebhookRetryInitialDelay = "WEBHOOK_RETRY_INITIAL_DELAY" //nolint:gosec // env var name, not a credential + OperatorUIToken = "OPERATOR_UI_TOKEN" // #nosec G101 -- env var name + OperatorRepoSlug = "OPERATOR_REPO_SLUG" + OperatorReleaseGitHubToken = "OPERATOR_RELEASE_GITHUB_TOKEN" // #nosec G101 -- env var name + OperatorReleaseTargetBranch = "OPERATOR_RELEASE_TARGET_BRANCH" ) // NewConfig returns a new Config instance with default values @@ -235,6 +245,11 @@ func LoadEnvironment(envFile string) (*Config, error) { config.WebhookMaxRetries = getIntEnvWithDefault(WebhookMaxRetries, config.WebhookMaxRetries) config.WebhookRetryInitialDelay = getIntEnvWithDefault(WebhookRetryInitialDelay, config.WebhookRetryInitialDelay) + config.OperatorUIToken = os.Getenv(OperatorUIToken) + config.OperatorRepoSlug = os.Getenv(OperatorRepoSlug) + config.OperatorReleaseGitHubToken = os.Getenv(OperatorReleaseGitHubToken) + config.OperatorReleaseTargetBranch = getEnvWithDefault(OperatorReleaseTargetBranch, "main") + if err := validateConfig(config); err != nil { return nil, err } diff --git a/services/audit_logger.go b/services/audit_logger.go index 7fa6fca..dc606ca 100644 --- a/services/audit_logger.go +++ b/services/audit_logger.go @@ -21,21 +21,21 @@ const ( // AuditEvent represents an audit log entry type AuditEvent struct { - ID string `bson:"_id,omitempty"` - Timestamp time.Time `bson:"timestamp"` - EventType AuditEventType `bson:"event_type"` - RuleName string `bson:"rule_name,omitempty"` - SourceRepo string `bson:"source_repo"` - SourcePath string `bson:"source_path"` - TargetRepo string `bson:"target_repo,omitempty"` - TargetPath string `bson:"target_path,omitempty"` - CommitSHA string `bson:"commit_sha,omitempty"` - PRNumber int `bson:"pr_number,omitempty"` - Success bool `bson:"success"` - ErrorMessage string `bson:"error_message,omitempty"` - DurationMs int64 `bson:"duration_ms,omitempty"` - FileSize int64 `bson:"file_size,omitempty"` - AdditionalData map[string]any `bson:"additional_data,omitempty"` + ID string `bson:"_id,omitempty" json:"id,omitempty"` + Timestamp time.Time `bson:"timestamp" json:"timestamp"` + EventType AuditEventType `bson:"event_type" json:"event_type"` + RuleName string `bson:"rule_name,omitempty" json:"rule_name,omitempty"` + SourceRepo string `bson:"source_repo" json:"source_repo"` + SourcePath string `bson:"source_path" json:"source_path"` + TargetRepo string `bson:"target_repo,omitempty" json:"target_repo,omitempty"` + TargetPath string `bson:"target_path,omitempty" json:"target_path,omitempty"` + CommitSHA string `bson:"commit_sha,omitempty" json:"commit_sha,omitempty"` + PRNumber int `bson:"pr_number,omitempty" json:"pr_number,omitempty"` + Success bool `bson:"success" json:"success"` + ErrorMessage string `bson:"error_message,omitempty" json:"error_message,omitempty"` + DurationMs int64 `bson:"duration_ms,omitempty" json:"duration_ms,omitempty"` + FileSize int64 `bson:"file_size,omitempty" json:"file_size,omitempty"` + AdditionalData map[string]any `bson:"additional_data,omitempty" json:"additional_data,omitempty"` } // AuditLogger handles audit logging to MongoDB diff --git a/services/github_write_to_target.go b/services/github_write_to_target.go index 2159c51..bde8b6d 100644 --- a/services/github_write_to_target.go +++ b/services/github_write_to_target.go @@ -54,7 +54,8 @@ func normalizeRefPath(branchPath string, fullPath bool) string { // AddFilesToTargetRepos uploads files to target repository branches. // It accepts the upload map as a parameter for concurrency safety. -func AddFilesToTargetRepos(ctx context.Context, config *configs.Config, filesToUpload map[types.UploadKey]types.UploadFileContent, prTemplateFetcher PRTemplateFetcher, metricsCollector *MetricsCollector) { +// When auditLogger is non-nil, each file copy is recorded (success or failure) for MongoDB audit. +func AddFilesToTargetRepos(ctx context.Context, config *configs.Config, filesToUpload map[types.UploadKey]types.UploadFileContent, prTemplateFetcher PRTemplateFetcher, metricsCollector *MetricsCollector, auditLogger AuditLogger) { if config.DryRun { for key, value := range filesToUpload { LogInfo("[DRY-RUN] Would upload files to target repo", @@ -71,13 +72,95 @@ func AddFilesToTargetRepos(ctx context.Context, config *configs.Config, filesToU } for key, value := range filesToUpload { + batchStart := time.Now() if err := uploadToTarget(ctx, config, key, value, prTemplateFetcher); err != nil { LogCritical("Failed to upload files", "repo", key.RepoName, "error", err) recordBatchFailure(metricsCollector, len(value.Content)) + auditLogCopyBatchFailure(ctx, auditLogger, key, value, err) + } else { + auditLogCopyBatchSuccess(ctx, auditLogger, key, value, time.Since(batchStart)) } } } +func auditLogCopyBatchSuccess(ctx context.Context, auditLogger AuditLogger, key types.UploadKey, value types.UploadFileContent, elapsed time.Duration) { + if auditLogger == nil || len(value.Content) == 0 { + return + } + n := len(value.Content) + perFileMs := elapsed.Milliseconds() / int64(n) + if perFileMs == 0 && elapsed > 0 { + perFileMs = 1 + } + for i := range value.Content { + f := value.Content[i] + meta := types.CopierFileMeta{} + if i < len(value.FileMeta) { + meta = value.FileMeta[i] + } + srcPath := meta.SourcePath + if srcPath == "" { + srcPath = f.GetPath() + } + ev := &AuditEvent{ + RuleName: meta.RuleName, + SourceRepo: meta.SourceRepo, + SourcePath: srcPath, + TargetRepo: key.RepoName, + TargetPath: f.GetName(), + PRNumber: meta.PRNumber, + Success: true, + DurationMs: perFileMs, + FileSize: int64(decodedFileBytes(&f)), + } + if err := auditLogger.LogCopyEvent(ctx, ev); err != nil { + LogWarning("audit LogCopyEvent failed", "error", err) + } + } +} + +func auditLogCopyBatchFailure(ctx context.Context, auditLogger AuditLogger, key types.UploadKey, value types.UploadFileContent, batchErr error) { + if auditLogger == nil || len(value.Content) == 0 { + return + } + msg := batchErr.Error() + for i := range value.Content { + f := value.Content[i] + meta := types.CopierFileMeta{} + if i < len(value.FileMeta) { + meta = value.FileMeta[i] + } + srcPath := meta.SourcePath + if srcPath == "" { + srcPath = f.GetPath() + } + ev := &AuditEvent{ + RuleName: meta.RuleName, + SourceRepo: meta.SourceRepo, + SourcePath: srcPath, + TargetRepo: key.RepoName, + TargetPath: f.GetName(), + PRNumber: meta.PRNumber, + Success: false, + ErrorMessage: msg, + } + if err := auditLogger.LogCopyEvent(ctx, ev); err != nil { + LogWarning("audit LogCopyEvent (failure) failed", "error", err) + } + } +} + +func decodedFileBytes(f *github.RepositoryContent) int { + if f == nil { + return 0 + } + c, err := f.GetContent() + if err != nil { + return 0 + } + return len(c) +} + // uploadToTarget handles a single upload-key: authenticates for the target org, // resolves commit parameters, and dispatches to the appropriate strategy. func uploadToTarget(ctx context.Context, config *configs.Config, key types.UploadKey, value types.UploadFileContent, prTemplateFetcher PRTemplateFetcher) error { diff --git a/services/github_write_to_target_test.go b/services/github_write_to_target_test.go index 05e1383..e61221f 100644 --- a/services/github_write_to_target_test.go +++ b/services/github_write_to_target_test.go @@ -83,7 +83,7 @@ func TestAddFilesToTargetRepos_Direct_Succeeds(t *testing.T) { }, } - services.AddFilesToTargetRepos(context.Background(), test.TestConfig(), filesToUpload, nil, nil) + services.AddFilesToTargetRepos(context.Background(), test.TestConfig(), filesToUpload, nil, nil, nil) info := httpmock.GetCallCountInfo() require.Equal(t, 1, info["GET "+baseRefURL]) @@ -173,7 +173,7 @@ func TestAddFilesToTargetRepos_ViaPR_Succeeds(t *testing.T) { }, } - services.AddFilesToTargetRepos(context.Background(), cfg, filesToUpload, nil, nil) + services.AddFilesToTargetRepos(context.Background(), cfg, filesToUpload, nil, nil, nil) require.Equal(t, 1, test.CountByMethodAndURLRegexp("POST", regexp.MustCompile(`/app/installations/`+regexp.QuoteMeta(cfg.InstallationId)+`/access_tokens$`), @@ -244,7 +244,7 @@ func TestAddFilesToTargetRepos_Direct_SkipsEmptyCommit(t *testing.T) { }, } - services.AddFilesToTargetRepos(context.Background(), test.TestConfig(), filesToUpload, nil, nil) + services.AddFilesToTargetRepos(context.Background(), test.TestConfig(), filesToUpload, nil, nil, nil) info := httpmock.GetCallCountInfo() // Should still fetch the ref and create the tree @@ -284,7 +284,7 @@ func TestAddFiles_DirectConflict_NonFastForward(t *testing.T) { }, } - services.AddFilesToTargetRepos(context.Background(), test.TestConfig(), filesToUpload, nil, nil) + services.AddFilesToTargetRepos(context.Background(), test.TestConfig(), filesToUpload, nil, nil, nil) info := httpmock.GetCallCountInfo() require.Equal(t, 1, info["GET "+baseRefURL]) @@ -364,7 +364,7 @@ func TestAddFiles_ViaPR_MergeConflict_Dirty_NotMerged(t *testing.T) { }, } - services.AddFilesToTargetRepos(context.Background(), cfg, filesToUpload, nil, nil) + services.AddFilesToTargetRepos(context.Background(), cfg, filesToUpload, nil, nil, nil) info := httpmock.GetCallCountInfo() require.Equal(t, 1, info["POST "+createRefURL]) @@ -417,7 +417,7 @@ func TestPriority_Strategy_ConfigOverridesEnv_And_MessageFallbacks(t *testing.T) {RepoName: repo, BranchPath: "refs/heads/" + baseBranch, CommitStrategy: typeCfg.CopierCommitStrategy}: {TargetBranch: baseBranch, Content: files}, } - services.AddFilesToTargetRepos(context.Background(), testCfg, filesToUpload, nil, nil) + services.AddFilesToTargetRepos(context.Background(), testCfg, filesToUpload, nil, nil, nil) info := httpmock.GetCallCountInfo() require.Equal(t, 1, info["GET "+baseRefURL]) @@ -495,7 +495,7 @@ func TestPriority_PRTitleDefaultsToCommitMessage_And_NoAutoMergeWhenConfigPresen {RepoName: repo, BranchPath: "refs/heads/" + baseBranch, RuleName: "", CommitStrategy: "pr"}: {TargetBranch: baseBranch, Content: files, CommitStrategy: "pr"}, } - services.AddFilesToTargetRepos(context.Background(), cfg, filesToUpload, nil, nil) + services.AddFilesToTargetRepos(context.Background(), cfg, filesToUpload, nil, nil, nil) require.Equal(t, 1, test.CountByMethodAndURLRegexp("POST", regexp.MustCompile(`/pulls$`))) require.Equal(t, 0, test.CountByMethodAndURLRegexp("PUT", regexp.MustCompile(`/pulls/5/merge$`))) @@ -564,7 +564,7 @@ func TestAddFilesToTargetRepos_MixedStrategies_ProducesSeparateOperations(t *tes }, } - services.AddFilesToTargetRepos(context.Background(), cfg, filesToUpload, nil, nil) + services.AddFilesToTargetRepos(context.Background(), cfg, filesToUpload, nil, nil, nil) info := httpmock.GetCallCountInfo() @@ -647,7 +647,7 @@ func TestAddFilesViaPR_ReusesExistingCopierPR(t *testing.T) { }, } - services.AddFilesToTargetRepos(context.Background(), cfg, filesToUpload, nil, nil) + services.AddFilesToTargetRepos(context.Background(), cfg, filesToUpload, nil, nil, nil) info := httpmock.GetCallCountInfo() diff --git a/services/operator_ui.go b/services/operator_ui.go new file mode 100644 index 0000000..c90b304 --- /dev/null +++ b/services/operator_ui.go @@ -0,0 +1,329 @@ +package services + +import ( + "bytes" + "context" + "crypto/subtle" + _ "embed" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "regexp" + "strconv" + "strings" + "time" + + "github.com/grove-platform/github-copier/configs" +) + +//go:embed web/operator/index.html +var operatorIndexHTML []byte + +var operatorVersionTagRe = regexp.MustCompile(`^v[0-9]+\.[0-9]+\.[0-9]+$`) + +// RegisterOperatorRoutes mounts the operator HTML UI and JSON APIs under /operator/. +// cfg.OperatorUIToken must be non-empty before calling (caller checks). +func RegisterOperatorRoutes(mux *http.ServeMux, cfg *configs.Config, container *ServiceContainer, version string) { + o := &operatorUI{ + cfg: cfg, + container: container, + version: version, + } + mux.HandleFunc("/operator/api/audit/events", o.wrapAPI(o.handleAuditEvents)) + mux.HandleFunc("/operator/api/deployment", o.wrapAPI(o.handleDeployment)) + mux.HandleFunc("/operator/api/release", o.wrapAPI(o.handleRelease)) + mux.HandleFunc("/operator/", o.serveIndex) + mux.HandleFunc("/operator", func(w http.ResponseWriter, r *http.Request) { + http.Redirect(w, r, "/operator/", http.StatusFound) + }) +} + +type operatorUI struct { + cfg *configs.Config + container *ServiceContainer + version string +} + +func (o *operatorUI) wrapAPI(next http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + if !operatorAuthOK(o.cfg.OperatorUIToken, bearerToken(r)) { + w.WriteHeader(http.StatusUnauthorized) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "unauthorized"}) + return + } + next(w, r) + } +} + +func bearerToken(r *http.Request) string { + h := r.Header.Get("Authorization") + const p = "Bearer " + if len(h) > len(p) && strings.EqualFold(h[:len(p)], p) { + return strings.TrimSpace(h[len(p):]) + } + return "" +} + +func operatorAuthOK(expected, got string) bool { + if expected == "" { + return false + } + e := []byte(expected) + g := []byte(got) + if len(e) != len(g) { + return false + } + return subtle.ConstantTimeCompare(e, g) == 1 +} + +func (o *operatorUI) serveIndex(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/operator/" { + http.NotFound(w, r) + return + } + if r.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + w.Header().Set("Content-Type", "text/html; charset=utf-8") + _, _ = w.Write(operatorIndexHTML) +} + +func (o *operatorUI) handleAuditEvents(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) + return + } + limit := 50 + if q := r.URL.Query().Get("limit"); q != "" { + if n, err := strconv.Atoi(q); err == nil && n > 0 { + limit = n + } + } + if limit > 200 { + limit = 200 + } + ctx, cancel := context.WithTimeout(r.Context(), 10*time.Second) + defer cancel() + if o.container.AuditLogger == nil { + _ = json.NewEncoder(w).Encode(map[string]any{"events": []any{}}) + return + } + events, err := o.container.AuditLogger.GetRecentEvents(ctx, limit) + if err != nil { + w.WriteHeader(http.StatusBadGateway) + _ = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) + return + } + _ = json.NewEncoder(w).Encode(map[string]any{"events": events}) +} + +// OperatorDeploymentInfo is non-secret runtime and platform metadata for the operator UI. +type OperatorDeploymentInfo struct { + Version string `json:"version"` + GoogleCloudRegion string `json:"google_cloud_region,omitempty"` + CloudRunService string `json:"cloud_run_service,omitempty"` + CloudRunRevision string `json:"cloud_run_revision,omitempty"` + CloudRunConfig string `json:"cloud_run_configuration,omitempty"` + GoogleCloudProject string `json:"google_cloud_project,omitempty"` + Port string `json:"port"` + WebhookPath string `json:"webhook_path"` + DryRun bool `json:"dry_run"` + AuditEnabled bool `json:"audit_enabled"` + AuditDatabase string `json:"audit_database,omitempty"` + AuditCollection string `json:"audit_collection,omitempty"` + ConfigRepo string `json:"config_repo,omitempty"` + EffectiveConfig string `json:"effective_config_file,omitempty"` + OperatorRepoSlug string `json:"operator_repo_slug,omitempty"` + ReleaseAPIMode string `json:"release_api_mode"` + Env map[string]string `json:"cloud_env,omitempty"` +} + +func (o *operatorUI) handleDeployment(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) + return + } + releaseMode := "disabled" + if o.cfg.OperatorReleaseGitHubToken != "" && o.cfg.OperatorRepoSlug != "" { + releaseMode = "tag_create_enabled" + } + info := OperatorDeploymentInfo{ + Version: o.version, + CloudRunService: os.Getenv("K_SERVICE"), + CloudRunRevision: os.Getenv("K_REVISION"), + CloudRunConfig: os.Getenv("K_CONFIGURATION"), + GoogleCloudProject: o.cfg.GoogleCloudProjectId, + Port: o.cfg.Port, + WebhookPath: o.cfg.WebserverPath, + DryRun: o.cfg.DryRun, + AuditEnabled: o.cfg.AuditEnabled, + AuditDatabase: o.cfg.AuditDatabase, + AuditCollection: o.cfg.AuditCollection, + ConfigRepo: o.cfg.ConfigRepoOwner + "/" + o.cfg.ConfigRepoName, + EffectiveConfig: o.cfg.EffectiveConfigFile(), + OperatorRepoSlug: o.cfg.OperatorRepoSlug, + ReleaseAPIMode: releaseMode, + Env: map[string]string{ + "ENV": firstEnv("ENV"), + }, + } + if region := os.Getenv("GOOGLE_CLOUD_REGION"); region != "" { + info.GoogleCloudRegion = region + } + _ = json.NewEncoder(w).Encode(info) +} + +type operatorReleaseRequest struct { + Version string `json:"version"` +} + +type operatorReleaseResponse struct { + OK bool `json:"ok,omitempty"` + Ref string `json:"ref,omitempty"` + TagSHA string `json:"tag_sha,omitempty"` + Message string `json:"message,omitempty"` + Error string `json:"error,omitempty"` + Notice string `json:"notice,omitempty"` +} + +func (o *operatorUI) handleRelease(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) + return + } + body, err := io.ReadAll(io.LimitReader(r.Body, 4096)) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(operatorReleaseResponse{Error: "read body"}) + return + } + var req operatorReleaseRequest + if err := json.Unmarshal(body, &req); err != nil { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(operatorReleaseResponse{Error: "invalid json"}) + return + } + v := strings.TrimSpace(req.Version) + if !operatorVersionTagRe.MatchString(v) { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(operatorReleaseResponse{Error: "version must match vMAJOR.MINOR.PATCH"}) + return + } + if o.cfg.OperatorReleaseGitHubToken == "" || o.cfg.OperatorRepoSlug == "" { + w.WriteHeader(http.StatusNotImplemented) + _ = json.NewEncoder(w).Encode(operatorReleaseResponse{ + Error: "set OPERATOR_RELEASE_GITHUB_TOKEN and OPERATOR_REPO_SLUG to enable tag creation from the UI", + Notice: "Full releases (changelog + GitHub Release) still use ./scripts/release.sh locally.", + }) + return + } + ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second) + defer cancel() + ref, sha, err := githubCreateVersionTag(ctx, o.cfg.OperatorReleaseGitHubToken, o.cfg.OperatorRepoSlug, o.cfg.OperatorReleaseTargetBranch, v) + if err != nil { + w.WriteHeader(http.StatusBadGateway) + _ = json.NewEncoder(w).Encode(operatorReleaseResponse{Error: err.Error()}) + return + } + _ = json.NewEncoder(w).Encode(operatorReleaseResponse{ + OK: true, + Ref: ref, + TagSHA: sha, + Message: "Tag pushed to GitHub; if CI is configured for tag deploys, the pipeline should start shortly.", + Notice: "This does not update CHANGELOG.md — use scripts/release.sh for a documented release.", + }) +} + +func firstEnv(keys ...string) string { + for _, k := range keys { + if v := os.Getenv(k); v != "" { + return v + } + } + return "" +} + +func githubCreateVersionTag(ctx context.Context, pat, repoSlug, baseBranch, version string) (ref string, sha string, err error) { + parts := strings.SplitN(strings.TrimSpace(repoSlug), "/", 2) + if len(parts) != 2 || parts[0] == "" || parts[1] == "" { + return "", "", fmt.Errorf("invalid OPERATOR_REPO_SLUG (want owner/repo)") + } + owner, repo := parts[0], parts[1] + baseURL := fmt.Sprintf("https://api.github.com/repos/%s/%s/git/ref/heads/%s", owner, repo, baseBranch) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL, nil) + if err != nil { + return "", "", err + } + req.Header.Set("Authorization", "Bearer "+pat) + req.Header.Set("Accept", "application/vnd.github+json") + req.Header.Set("X-GitHub-Api-Version", "2022-11-28") + + resp, err := githubHTTPClient().Do(req) + if err != nil { + return "", "", err + } + defer func() { _ = resp.Body.Close() }() + baseBody, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<20)) + if resp.StatusCode != http.StatusOK { + return "", "", fmt.Errorf("github get branch ref: %s: %s", resp.Status, strings.TrimSpace(string(baseBody))) + } + var refObj struct { + Object struct { + SHA string `json:"sha"` + } `json:"object"` + } + if err := json.Unmarshal(baseBody, &refObj); err != nil { + return "", "", fmt.Errorf("parse branch ref: %w", err) + } + headSHA := refObj.Object.SHA + if headSHA == "" { + return "", "", fmt.Errorf("empty base sha for branch %s", baseBranch) + } + + tagRef := "refs/tags/" + version + payload := map[string]string{"ref": tagRef, "sha": headSHA} + buf, err := json.Marshal(payload) + if err != nil { + return "", "", err + } + postURL := fmt.Sprintf("https://api.github.com/repos/%s/%s/git/refs", owner, repo) + postReq, err := http.NewRequestWithContext(ctx, http.MethodPost, postURL, bytes.NewReader(buf)) + if err != nil { + return "", "", err + } + postReq.Header.Set("Authorization", "Bearer "+pat) + postReq.Header.Set("Accept", "application/vnd.github+json") + postReq.Header.Set("Content-Type", "application/json") + postReq.Header.Set("X-GitHub-Api-Version", "2022-11-28") + + postResp, err := githubHTTPClient().Do(postReq) + if err != nil { + return "", "", err + } + defer func() { _ = postResp.Body.Close() }() + postBody, _ := io.ReadAll(io.LimitReader(postResp.Body, 1<<20)) + if postResp.StatusCode != http.StatusCreated { + return "", "", fmt.Errorf("github create tag ref: %s: %s", postResp.Status, strings.TrimSpace(string(postBody))) + } + var created struct { + Ref string `json:"ref"` + Object struct { + SHA string `json:"sha"` + } `json:"object"` + } + if err := json.Unmarshal(postBody, &created); err != nil { + return "", "", fmt.Errorf("parse create ref response: %w", err) + } + return created.Ref, created.Object.SHA, nil +} + +func githubHTTPClient() *http.Client { + return &http.Client{Timeout: 25 * time.Second} +} diff --git a/services/web/operator/index.html b/services/web/operator/index.html new file mode 100644 index 0000000..8cb433e --- /dev/null +++ b/services/web/operator/index.html @@ -0,0 +1,163 @@ + + + + + + GitHub Copier — Operator + + + +

GitHub Copier — Operator

+

Recent copy audit events, deployment metadata, and optional tag-based release (same trigger as CI on version tags).

+ +
+

Access

+ +
+ + + +
+

Stored only in sessionStorage for this browser tab. Send Authorization: Bearer … on API calls.

+
+ +
+

Recent copy events

+
+
+ + +
+ +
+ +
+ + + + + + + + + + + + + +
TimeStatusRuleSourceTargetPRError
+
+
+ +
+

Deployment

+ + + +
+ +
+

Release (tag → deploy)

+

Production deploy runs on version tag pushes (vMAJOR.MINOR.PATCH). Changelog updates still require scripts/release.sh locally unless you only create a tag.

+ +
+ + +
+ + +
+ + + + diff --git a/services/webhook_handler_new.go b/services/webhook_handler_new.go index 9d8f4aa..23513e8 100644 --- a/services/webhook_handler_new.go +++ b/services/webhook_handler_new.go @@ -464,7 +464,7 @@ func fetchChangedFiles(ctx context.Context, config *configs.Config, container *S func uploadAndDeprecateFiles(ctx context.Context, config *configs.Config, container *ServiceContainer, sourceRepoOwner, sourceRepoName, sourceBranch string, prNumber int) { // Upload queued files filesToUpload := container.FileStateService.GetFilesToUpload() - AddFilesToTargetRepos(ctx, config, filesToUpload, container.PRTemplateFetcher, container.MetricsCollector) + AddFilesToTargetRepos(ctx, config, filesToUpload, container.PRTemplateFetcher, container.MetricsCollector, container.AuditLogger) container.FileStateService.ClearFilesToUpload() // Build deprecation map and update file in the source repo diff --git a/services/workflow_processor.go b/services/workflow_processor.go index c29d1b1..15a862b 100644 --- a/services/workflow_processor.go +++ b/services/workflow_processor.go @@ -150,7 +150,7 @@ func (wp *workflowProcessor) ProcessWorkflow( continue // fetch failed — already logged } mr.fileContent.Name = github.Ptr(mr.targetPath) - wp.queueUpload(ctx, mr.workflow, mr.fileContent, mr.targetPath, mr.prNumber, mr.sourceCommitSHA) + wp.queueUpload(ctx, mr.workflow, mr.fileContent, mr.targetPath, mr.prNumber, mr.sourceCommitSHA, mr.file.Path) filesMatched++ } @@ -411,6 +411,7 @@ func (wp *workflowProcessor) queueUpload( targetPath string, prNumber int, sourceCommitSHA string, + sourcePath string, ) { // Create upload key — includes CommitStrategy so that workflows with @@ -454,6 +455,12 @@ func (wp *workflowProcessor) queueUpload( // Add file to content content.Content = append(content.Content, *fileContent) + content.FileMeta = append(content.FileMeta, types.CopierFileMeta{ + RuleName: workflow.Name, + SourceRepo: workflow.Source.Repo, + SourcePath: sourcePath, + PRNumber: prNumber, + }) // Render templates with message context msgCtx := types.NewMessageContext() diff --git a/types/types.go b/types/types.go index 5f1a3c3..7a026f0 100644 --- a/types/types.go +++ b/types/types.go @@ -110,14 +110,24 @@ type UploadKey struct { } type UploadFileContent struct { - TargetBranch string `json:"target_branch"` - Content []github.RepositoryContent `json:"content"` - CommitStrategy CommitStrategy `json:"commit_strategy,omitempty"` - CommitMessage string `json:"commit_message,omitempty"` - PRTitle string `json:"pr_title,omitempty"` - PRBody string `json:"pr_body,omitempty"` - UsePRTemplate bool `json:"use_pr_template,omitempty"` // If true, fetch and merge PR template from target repo - AutoMergePR bool `json:"auto_merge_pr,omitempty"` + TargetBranch string `json:"target_branch"` + Content []github.RepositoryContent `json:"content"` + // FileMeta aligns 1:1 with Content — provenance for each file (audit, Slack, diagnostics). + FileMeta []CopierFileMeta `json:"file_meta,omitempty"` + CommitStrategy CommitStrategy `json:"commit_strategy,omitempty"` + CommitMessage string `json:"commit_message,omitempty"` + PRTitle string `json:"pr_title,omitempty"` + PRBody string `json:"pr_body,omitempty"` + UsePRTemplate bool `json:"use_pr_template,omitempty"` // If true, fetch and merge PR template from target repo + AutoMergePR bool `json:"auto_merge_pr,omitempty"` +} + +// CopierFileMeta carries per-file provenance for uploads (order matches UploadFileContent.Content). +type CopierFileMeta struct { + RuleName string `json:"rule_name,omitempty"` + SourceRepo string `json:"source_repo,omitempty"` + SourcePath string `json:"source_path,omitempty"` + PRNumber int `json:"pr_number,omitempty"` } // CommitStrategy represents the strategy for committing changes From 539d43cbe87ed08bb823fee9ac72182bdd7b1373 Mon Sep 17 00:00:00 2001 From: Cory Bullinger Date: Mon, 20 Apr 2026 14:09:52 -0400 Subject: [PATCH 02/20] feat(operator): comprehensive operator UI with writer-facing features Backend: - Add workflow config API (GET /operator/api/workflows) serving parsed config - Add webhook replay API (POST /operator/api/replay) with safety guards (in-flight dedup, merged-PR check, synthetic delivery ID) - Add per-delivery log viewer (GET /operator/api/logs) with context-tagged log capture via DeliveryLogBuffer ring buffer - Add PR number and file path filters to audit query API - Fix ObjectID decoding for mongo-driver v2 (ObjectIDAsHexString) - Fix constant-time token comparison leaking token length - Fix empty-commit PR creation bug (errTreeUnchanged sentinel) - Add CommitSHA to WebhookTraceEntry and CopierFileMeta - Enrich webhook trace detail with target repos and file counts - Add uptime + MongoDB health check to deployment API Frontend - operator UI rewrite: - 5-tab layout: Overview, Webhooks, Audit, Workflows, System - Writer/Operator mode toggle (hides infrastructure tabs for writers) - Sticky status bar: version, uptime, MongoDB health, last webhook, connection - Dark mode with full CSS variable theming - Collapsible sections with localStorage persistence - Grouped metrics with delta indicators, sparklines, health accent borders - Grouped deployment cards with copy-to-clipboard - Structured detail badges in webhook trace Detail column - Audit event detail drawer with all fields + replay button - File match tester: client-side glob/regex/move/copy matching against loaded workflow config with target path computation - PR lookup combining traces + audit into unified timeline view - Recent copies card-based feed - Workflow browser with search and visual cards - In-app help / getting started documentation - CSV export for audit events - Toast notifications for background auto-refresh errors - Connection heartbeat (60s ping with disconnected indicator) - Auto-refresh with persisted checkbox state - Shareable state URLs via history.replaceState - Deep-linkable filters, tabs, and mode (?tab=audit&pr=123&mode=writer) - Keyboard shortcuts (1-5 tabs, R refresh, D dark, T time, W mode, ? help) - Empty state messages, expandable errors, GitHub links throughout - Responsive audit table (hides SHA/Type columns on narrow screens) - Token show/hide toggle, release confirmation dialog, inline favicon - Relative/absolute time toggle with periodic update --- app.go | 26 +- configs/environment.go | 35 +- env-cloudrun.yaml | 4 + services/audit_logger.go | 84 +- services/audit_logger_test.go | 12 + services/delivery_tracker.go | 57 + services/delivery_tracker_test.go | 7 + services/github_write_to_target.go | 21 +- services/log_buffer.go | 116 ++ services/logger.go | 18 + services/operator_ui.go | 449 +++++++- services/service_container.go | 8 + services/web/operator/index.html | 1609 +++++++++++++++++++++++++--- services/webhook_handler_new.go | 133 ++- services/webhook_trace_buffer.go | 97 ++ services/workflow_processor.go | 1 + types/types.go | 1 + 17 files changed, 2483 insertions(+), 195 deletions(-) create mode 100644 services/log_buffer.go create mode 100644 services/webhook_trace_buffer.go diff --git a/app.go b/app.go index 22a70ab..df300b1 100644 --- a/app.go +++ b/app.go @@ -155,29 +155,23 @@ func startWebServer(config *configs.Config, container *services.ServiceContainer // Create HTTP handler with all routes mux := http.NewServeMux() - // Webhook endpoint - mux.HandleFunc(config.WebserverPath, func(w http.ResponseWriter, r *http.Request) { - handleWebhook(w, r, config, container) - }) - - // Liveness probe — lightweight, always 200 if process is running + // Register built-in paths before the configurable webhook route so a mis-set + // WEBSERVER_PATH can never shadow /health, /ready, /metrics, /config, or /operator. mux.HandleFunc("/health", services.HealthHandler(container.StartTime, version)) - - // Readiness probe — checks GitHub auth, MongoDB connectivity mux.HandleFunc("/ready", services.ReadinessHandler(container)) - - // Metrics endpoint (if enabled) if config.MetricsEnabled { mux.HandleFunc("/metrics", services.MetricsHandler(container.MetricsCollector, container.FileStateService)) } - - // Config diagnostic endpoint — shows resolved config with secrets redacted mux.HandleFunc("/config", services.ConfigDiagnosticHandler(container, version)) - - if config.OperatorUIToken != "" { + if config.OperatorUIEnabled { services.RegisterOperatorRoutes(mux, config, container, version) } + // GitHub webhook (configurable path, typically /events) + mux.HandleFunc(config.WebserverPath, func(w http.ResponseWriter, r *http.Request) { + handleWebhook(w, r, config, container) + }) + // Info endpoint mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { if r.URL.Path != "/" { @@ -193,8 +187,8 @@ func startWebServer(config *configs.Config, container *services.ServiceContainer if config.MetricsEnabled { _, _ = fmt.Fprintf(w, "Metrics: /metrics\n") } - if config.OperatorUIToken != "" { - _, _ = fmt.Fprintf(w, "Operator UI: /operator/\n") + if config.OperatorUIEnabled { + _, _ = fmt.Fprintf(w, "Operator UI: /operator/ (set OPERATOR_UI_TOKEN for secured APIs)\n") } }) diff --git a/configs/environment.go b/configs/environment.go index e76dc07..42e246f 100644 --- a/configs/environment.go +++ b/configs/environment.go @@ -70,7 +70,8 @@ type Config struct { WebhookMaxRetries int // max retry attempts for failed webhook processing WebhookRetryInitialDelay int // initial delay between retries in seconds (doubles each attempt) - // Operator web UI (optional) — protected by OPERATOR_UI_TOKEN when set + // Operator web UI — off unless OPERATOR_UI_ENABLED=true (intended for local dev). + OperatorUIEnabled bool OperatorUIToken string OperatorRepoSlug string // "owner/repo" for GitHub links and optional tag API OperatorReleaseGitHubToken string // PAT with contents:write to create a version tag (optional) @@ -123,7 +124,8 @@ const ( WebhookProcessingTimeoutSeconds = "WEBHOOK_PROCESSING_TIMEOUT_SECONDS" WebhookMaxRetries = "WEBHOOK_MAX_RETRIES" WebhookRetryInitialDelay = "WEBHOOK_RETRY_INITIAL_DELAY" //nolint:gosec // env var name, not a credential - OperatorUIToken = "OPERATOR_UI_TOKEN" // #nosec G101 -- env var name + OperatorUIEnabled = "OPERATOR_UI_ENABLED" + OperatorUIToken = "OPERATOR_UI_TOKEN" // #nosec G101 -- env var name OperatorRepoSlug = "OPERATOR_REPO_SLUG" OperatorReleaseGitHubToken = "OPERATOR_RELEASE_GITHUB_TOKEN" // #nosec G101 -- env var name OperatorReleaseTargetBranch = "OPERATOR_RELEASE_TARGET_BRANCH" @@ -245,6 +247,7 @@ func LoadEnvironment(envFile string) (*Config, error) { config.WebhookMaxRetries = getIntEnvWithDefault(WebhookMaxRetries, config.WebhookMaxRetries) config.WebhookRetryInitialDelay = getIntEnvWithDefault(WebhookRetryInitialDelay, config.WebhookRetryInitialDelay) + config.OperatorUIEnabled = getBoolEnvWithDefault(OperatorUIEnabled, false) config.OperatorUIToken = os.Getenv(OperatorUIToken) config.OperatorRepoSlug = os.Getenv(OperatorRepoSlug) config.OperatorReleaseGitHubToken = os.Getenv(OperatorReleaseGitHubToken) @@ -338,5 +341,33 @@ func validateConfig(config *Config) error { } } + if err := validateWebserverPath(config.WebserverPath); err != nil { + return err + } + + return nil +} + +// validateWebserverPath rejects values that would collide with built-in HTTP routes. +func validateWebserverPath(p string) error { + p = strings.TrimSpace(p) + if p == "" { + return fmt.Errorf("WEBSERVER_PATH cannot be empty") + } + if !strings.HasPrefix(p, "/") { + return fmt.Errorf("WEBSERVER_PATH must start with / (got %q)", p) + } + if p == "/" { + return fmt.Errorf("WEBSERVER_PATH cannot be / (reserved; use a dedicated path such as /events)") + } + for _, reserved := range []string{"/health", "/ready", "/metrics", "/config", "/operator"} { + if strings.EqualFold(p, reserved) { + return fmt.Errorf("WEBSERVER_PATH cannot be %s (reserved for a built-in route)", reserved) + } + } + norm := strings.TrimSuffix(strings.ToLower(p), "/") + "/" + if strings.HasPrefix(norm, "/operator/") { + return fmt.Errorf("WEBSERVER_PATH cannot be under /operator/ (reserved for the operator UI)") + } return nil } diff --git a/env-cloudrun.yaml b/env-cloudrun.yaml index 2829e5e..b61726a 100644 --- a/env-cloudrun.yaml +++ b/env-cloudrun.yaml @@ -33,3 +33,7 @@ COPIER_LOG_NAME: "code-copier-log" # Feature Flags AUDIT_ENABLED: "true" METRICS_ENABLED: "true" + +# Operator UI is disabled by default (OPERATOR_UI_ENABLED unset/false). Intended for +# local runs only — do not set OPERATOR_UI_ENABLED in Cloud Run unless you explicitly +# want this surface on the internet-facing service. diff --git a/services/audit_logger.go b/services/audit_logger.go index dc606ca..0c264bb 100644 --- a/services/audit_logger.go +++ b/services/audit_logger.go @@ -48,25 +48,37 @@ type AuditLogger interface { GetEventsByRule(ctx context.Context, ruleName string, limit int) ([]AuditEvent, error) GetStatsByRule(ctx context.Context) (map[string]RuleStats, error) GetDailyVolume(ctx context.Context, days int) ([]DailyStats, error) + QueryAuditEvents(ctx context.Context, q AuditListQuery) ([]AuditEvent, error) Ping(ctx context.Context) error Close(ctx context.Context) error } +// AuditListQuery filters audit rows for operator dashboards and APIs. +type AuditListQuery struct { + Limit int + EventType string // empty = any; otherwise copy | deprecation | error + Success *bool // nil = any + RuleName string // exact match when non-empty + PRNumber *int // nil = any; exact match when set + PathSearch string // substring match on source_path OR target_path when non-empty + Since *time.Time // inclusive lower bound on timestamp when set +} + // RuleStats represents statistics for a specific rule type RuleStats struct { - RuleName string `bson:"_id"` - TotalCopies int `bson:"total_copies"` - SuccessCount int `bson:"success_count"` - FailureCount int `bson:"failure_count"` - AvgDuration float64 `bson:"avg_duration"` + RuleName string `bson:"_id" json:"rule_name"` + TotalCopies int `bson:"total_copies" json:"total_copies"` + SuccessCount int `bson:"success_count" json:"success_count"` + FailureCount int `bson:"failure_count" json:"failure_count"` + AvgDuration float64 `bson:"avg_duration" json:"avg_duration_ms"` } // DailyStats represents daily copy volume statistics type DailyStats struct { - Date string `bson:"_id"` - TotalCopies int `bson:"total_copies"` - SuccessCount int `bson:"success_count"` - FailureCount int `bson:"failure_count"` + Date string `bson:"_id" json:"date"` + TotalCopies int `bson:"total_copies" json:"total_copies"` + SuccessCount int `bson:"success_count" json:"success_count"` + FailureCount int `bson:"failure_count" json:"failure_count"` } // MongoAuditLogger implements AuditLogger using MongoDB @@ -92,7 +104,10 @@ func NewMongoAuditLogger(ctx context.Context, mongoURI, database, collection str SetConnectTimeout(5 * time.Second). SetTimeout(10 * time.Second). SetMaxPoolSize(10). - SetRetryWrites(true) + SetRetryWrites(true). + SetBSONOptions(&options.BSONOptions{ + ObjectIDAsHexString: true, + }) client, err := mongo.Connect(clientOptions) if err != nil { return nil, fmt.Errorf("failed to connect to MongoDB: %w", err) @@ -168,7 +183,51 @@ func (mal *MongoAuditLogger) LogErrorEvent(ctx context.Context, event *AuditEven return err } -// GetRecentEvents retrieves recent audit events +// QueryAuditEvents retrieves audit events matching the given filter criteria. +func (mal *MongoAuditLogger) QueryAuditEvents(ctx context.Context, q AuditListQuery) ([]AuditEvent, error) { + limit := q.Limit + if limit <= 0 { + limit = 50 + } + if limit > 200 { + limit = 200 + } + filter := bson.M{} + if q.EventType != "" { + filter["event_type"] = AuditEventType(q.EventType) + } + if q.Success != nil { + filter["success"] = *q.Success + } + if q.RuleName != "" { + filter["rule_name"] = q.RuleName + } + if q.PRNumber != nil { + filter["pr_number"] = *q.PRNumber + } + if q.PathSearch != "" { + filter["$or"] = bson.A{ + bson.M{"source_path": bson.M{"$regex": q.PathSearch, "$options": "i"}}, + bson.M{"target_path": bson.M{"$regex": q.PathSearch, "$options": "i"}}, + } + } + if q.Since != nil { + filter["timestamp"] = bson.M{"$gte": *q.Since} + } + opts := options.Find().SetSort(bson.D{{Key: "timestamp", Value: -1}}).SetLimit(int64(limit)) + cursor, err := mal.collection.Find(ctx, filter, opts) + if err != nil { + return nil, err + } + defer func() { _ = cursor.Close(ctx) }() + + var events []AuditEvent + if err := cursor.All(ctx, &events); err != nil { + return nil, err + } + return events, nil +} + func (mal *MongoAuditLogger) GetRecentEvents(ctx context.Context, limit int) ([]AuditEvent, error) { opts := options.Find().SetSort(bson.D{{Key: "timestamp", Value: -1}}).SetLimit(int64(limit)) cursor, err := mal.collection.Find(ctx, bson.M{}, opts) @@ -318,5 +377,8 @@ func (nal *NoOpAuditLogger) GetStatsByRule(ctx context.Context) (map[string]Rule func (nal *NoOpAuditLogger) GetDailyVolume(ctx context.Context, days int) ([]DailyStats, error) { return []DailyStats{}, nil } +func (nal *NoOpAuditLogger) QueryAuditEvents(ctx context.Context, q AuditListQuery) ([]AuditEvent, error) { + return []AuditEvent{}, nil +} func (nal *NoOpAuditLogger) Ping(ctx context.Context) error { return nil } func (nal *NoOpAuditLogger) Close(ctx context.Context) error { return nil } diff --git a/services/audit_logger_test.go b/services/audit_logger_test.go index 1f89afa..7a1a3eb 100644 --- a/services/audit_logger_test.go +++ b/services/audit_logger_test.go @@ -173,6 +173,18 @@ func TestNoOpAuditLogger_GetStatsByRule(t *testing.T) { } } +func TestNoOpAuditLogger_QueryAuditEvents(t *testing.T) { + ctx := context.Background() + logger := &NoOpAuditLogger{} + got, err := logger.QueryAuditEvents(ctx, AuditListQuery{Limit: 10, EventType: string(AuditEventCopy)}) + if err != nil { + t.Fatalf("QueryAuditEvents: %v", err) + } + if len(got) != 0 { + t.Errorf("expected empty slice, got %d events", len(got)) + } +} + func TestNoOpAuditLogger_GetDailyVolume(t *testing.T) { logger := &NoOpAuditLogger{} ctx := context.Background() diff --git a/services/delivery_tracker.go b/services/delivery_tracker.go index fe36de0..0f46944 100644 --- a/services/delivery_tracker.go +++ b/services/delivery_tracker.go @@ -5,6 +5,15 @@ import ( "time" ) +const deliveryHistoryMax = 200 + +// DeliverySnapshot is one observed webhook delivery ID for operator diagnostics. +type DeliverySnapshot struct { + DeliveryID string `json:"delivery_id"` + SeenAt time.Time `json:"seen_at"` + Duplicate bool `json:"duplicate"` +} + // DeliveryTracker tracks processed GitHub webhook delivery IDs to prevent // duplicate processing. GitHub retries deliveries on timeout or error, and // the X-GitHub-Delivery header uniquely identifies each delivery. @@ -16,6 +25,9 @@ type DeliveryTracker struct { entries map[string]time.Time ttl time.Duration + // history is a bounded ring of recent TryRecord outcomes (new vs duplicate) for diagnostics. + history []DeliverySnapshot + // stopCleanup signals the background goroutine to stop stopCleanup chan struct{} } @@ -26,6 +38,7 @@ func NewDeliveryTracker(ttl time.Duration) *DeliveryTracker { dt := &DeliveryTracker{ entries: make(map[string]time.Time), ttl: ttl, + history: make([]DeliverySnapshot, 0, 32), stopCleanup: make(chan struct{}), } go dt.cleanupLoop() @@ -40,15 +53,59 @@ func (dt *DeliveryTracker) TryRecord(deliveryID string) bool { if seenAt, exists := dt.entries[deliveryID]; exists { if time.Since(seenAt) < dt.ttl { + dt.appendHistoryLocked(deliveryID, true) return false // duplicate within TTL } // Expired entry — allow reprocessing } dt.entries[deliveryID] = time.Now() + dt.appendHistoryLocked(deliveryID, false) return true } +func (dt *DeliveryTracker) appendHistoryLocked(deliveryID string, duplicate bool) { + if deliveryID == "" { + return + } + dt.history = append(dt.history, DeliverySnapshot{ + DeliveryID: deliveryID, + SeenAt: time.Now().UTC(), + Duplicate: duplicate, + }) + if len(dt.history) > deliveryHistoryMax { + dt.history = dt.history[len(dt.history)-deliveryHistoryMax:] + } +} + +// HistoryLen returns how many recent delivery observations are buffered for diagnostics. +func (dt *DeliveryTracker) HistoryLen() int { + dt.mu.Lock() + defer dt.mu.Unlock() + return len(dt.history) +} + +// RecentDeliveries returns the last up to max observations (newest at end). +func (dt *DeliveryTracker) RecentDeliveries(max int) []DeliverySnapshot { + dt.mu.Lock() + defer dt.mu.Unlock() + if len(dt.history) == 0 { + return nil + } + if max <= 0 { + max = 100 + } + if max > deliveryHistoryMax { + max = deliveryHistoryMax + } + if len(dt.history) <= max { + out := make([]DeliverySnapshot, len(dt.history)) + copy(out, dt.history) + return out + } + return append([]DeliverySnapshot(nil), dt.history[len(dt.history)-max:]...) +} + // Len returns the current number of tracked delivery IDs (for diagnostics). func (dt *DeliveryTracker) Len() int { dt.mu.Lock() diff --git a/services/delivery_tracker_test.go b/services/delivery_tracker_test.go index 0ebb725..e3d7483 100644 --- a/services/delivery_tracker_test.go +++ b/services/delivery_tracker_test.go @@ -20,6 +20,13 @@ func TestDeliveryTracker_TryRecord(t *testing.T) { if dt.TryRecord("delivery-1") { t.Error("expected duplicate TryRecord to return false") } + hist := dt.RecentDeliveries(10) + if len(hist) < 2 { + t.Fatalf("expected history len >= 2, got %d", len(hist)) + } + if !hist[len(hist)-1].Duplicate { + t.Error("expected last history entry to be duplicate") + } // Different ID should succeed if !dt.TryRecord("delivery-2") { diff --git a/services/github_write_to_target.go b/services/github_write_to_target.go index bde8b6d..a88fb6b 100644 --- a/services/github_write_to_target.go +++ b/services/github_write_to_target.go @@ -2,6 +2,7 @@ package services import ( "context" + "errors" "fmt" "net/http" "strings" @@ -12,6 +13,10 @@ import ( "github.com/grove-platform/github-copier/types" ) +// errTreeUnchanged is returned by commitFilesToBranch when the new file tree is +// identical to the branch HEAD, meaning there is nothing to commit. +var errTreeUnchanged = errors.New("tree unchanged — nothing to commit") + // parseRepoPath parses a repository path in the format "owner/repo" and returns owner and repo separately. // If the path doesn't contain a slash, it returns defaultOwner and the path as repo name. func parseRepoPath(repoPath string, defaultOwner string) (owner, repo string) { @@ -106,6 +111,7 @@ func auditLogCopyBatchSuccess(ctx context.Context, auditLogger AuditLogger, key RuleName: meta.RuleName, SourceRepo: meta.SourceRepo, SourcePath: srcPath, + CommitSHA: meta.CommitSHA, TargetRepo: key.RepoName, TargetPath: f.GetName(), PRNumber: meta.PRNumber, @@ -138,6 +144,7 @@ func auditLogCopyBatchFailure(ctx context.Context, auditLogger AuditLogger, key RuleName: meta.RuleName, SourceRepo: meta.SourceRepo, SourcePath: srcPath, + CommitSHA: meta.CommitSHA, TargetRepo: key.RepoName, TargetPath: f.GetName(), PRNumber: meta.PRNumber, @@ -313,6 +320,11 @@ func addFilesViaPR(ctx context.Context, config *configs.Config, client *github.C // Push new files to the existing branch if err := commitFilesToBranch(ctx, config, client, key, files, existingBranch, commitMessage); err != nil { + if errors.Is(err, errTreeUnchanged) { + LogInfo("No changes to push to existing copier PR — files already up to date", + "pr_number", existingPR.GetNumber(), "repo", key.RepoName) + return nil + } return fmt.Errorf("commit to existing copier branch %s: %w", existingBranch, err) } @@ -342,6 +354,13 @@ func addFilesViaPR(ctx context.Context, config *configs.Config, client *github.C // 2. Commit files to temp branch if err := commitFilesToBranch(ctx, config, client, key, files, tempBranch, commitMessage); err != nil { + if errors.Is(err, errTreeUnchanged) { + LogInfo("No changes to commit — files already match target. Cleaning up temp branch.", + "repo", key.RepoName, "branch", tempBranch) + // Best-effort cleanup of the empty branch + _, _ = client.Git.DeleteRef(ctx, owner, repoName, "refs/heads/"+tempBranch) + return nil + } return err } @@ -388,7 +407,7 @@ func commitFilesToBranch(ctx context.Context, config *configs.Config, client *gi "branch", tempBranch, "tree_sha", tr.TreeSHA, ) - return nil + return errTreeUnchanged } if err = createCommit(ctx, client, config.ConfigRepoOwner, tempKey, tr.BaseSHA, tr.TreeSHA, commitMessage); err != nil { diff --git a/services/log_buffer.go b/services/log_buffer.go new file mode 100644 index 0000000..0fa3563 --- /dev/null +++ b/services/log_buffer.go @@ -0,0 +1,116 @@ +package services + +import ( + "context" + "sync" + "time" +) + +// Maximum entries per delivery and total across all deliveries. +const ( + logBufferMaxPerDelivery = 100 + logBufferMaxDeliveries = 50 +) + +// LogEntry is a single captured log line for operator diagnostics. +type LogEntry struct { + Time time.Time `json:"time"` + Level string `json:"level"` + Message string `json:"message"` + Fields map[string]any `json:"fields,omitempty"` +} + +// DeliveryLogBuffer stores recent log entries keyed by delivery ID for the operator UI. +type DeliveryLogBuffer struct { + mu sync.Mutex + entries map[string][]LogEntry + order []string // insertion order for eviction +} + +// NewDeliveryLogBuffer creates an empty delivery log buffer. +func NewDeliveryLogBuffer() *DeliveryLogBuffer { + return &DeliveryLogBuffer{ + entries: make(map[string][]LogEntry), + order: make([]string, 0, logBufferMaxDeliveries), + } +} + +// Append adds a log entry for a delivery ID. +func (b *DeliveryLogBuffer) Append(deliveryID string, entry LogEntry) { + if b == nil || deliveryID == "" { + return + } + if entry.Time.IsZero() { + entry.Time = time.Now().UTC() + } + b.mu.Lock() + defer b.mu.Unlock() + + logs, exists := b.entries[deliveryID] + if !exists { + b.order = append(b.order, deliveryID) + // Evict oldest delivery if over limit + if len(b.order) > logBufferMaxDeliveries { + evict := b.order[0] + b.order = b.order[1:] + delete(b.entries, evict) + } + } + logs = append(logs, entry) + if len(logs) > logBufferMaxPerDelivery { + logs = logs[len(logs)-logBufferMaxPerDelivery:] + } + b.entries[deliveryID] = logs +} + +// Get returns log entries for a delivery ID (nil if not found). +func (b *DeliveryLogBuffer) Get(deliveryID string) []LogEntry { + if b == nil { + return nil + } + b.mu.Lock() + defer b.mu.Unlock() + logs, ok := b.entries[deliveryID] + if !ok { + return nil + } + out := make([]LogEntry, len(logs)) + copy(out, logs) + return out +} + +// context key for log buffer +type logBufferCtxKey struct{} + +// ContextWithLogBuffer returns a context that carries a delivery ID for log capture. +func ContextWithLogBuffer(ctx context.Context, deliveryID string, buf *DeliveryLogBuffer) context.Context { + return context.WithValue(ctx, logBufferCtxKey{}, &logBufferCtxVal{deliveryID: deliveryID, buf: buf}) +} + +type logBufferCtxVal struct { + deliveryID string + buf *DeliveryLogBuffer +} + +// logBufferFromCtx extracts the log buffer from context (nil if not set). +func logBufferFromCtx(ctx context.Context) *logBufferCtxVal { + if ctx == nil { + return nil + } + val, _ := ctx.Value(logBufferCtxKey{}).(*logBufferCtxVal) + return val +} + +// appendToCtxBuffer appends a log entry to the context's delivery log buffer if present. +func appendToCtxBuffer(ctx context.Context, level, message string, fields map[string]any) { + val := logBufferFromCtx(ctx) + if val == nil || val.buf == nil { + return + } + val.buf.Append(val.deliveryID, LogEntry{ + Time: time.Now().UTC(), + Level: level, + Message: message, + Fields: fields, + }) +} diff --git a/services/logger.go b/services/logger.go index 0efa0fe..98ea6bc 100644 --- a/services/logger.go +++ b/services/logger.go @@ -208,12 +208,14 @@ func LogCritical(message string, args ...any) { func LogInfoCtx(ctx context.Context, message string, fields map[string]interface{}) { slog.InfoContext(ctx, message, mapToAttrs(fields)...) logToGCP(slog.LevelInfo, message, mapToAttrs(fields)...) + appendToCtxBuffer(ctx, "info", message, fieldsToAny(fields)) } // LogWarningCtx writes a warning-level log with context. func LogWarningCtx(ctx context.Context, message string, fields map[string]interface{}) { slog.WarnContext(ctx, message, mapToAttrs(fields)...) logToGCP(slog.LevelWarn, message, mapToAttrs(fields)...) + appendToCtxBuffer(ctx, "warn", message, fieldsToAny(fields)) } // LogErrorCtx writes an error-level log with context and an optional error. @@ -224,6 +226,22 @@ func LogErrorCtx(ctx context.Context, message string, err error, fields map[stri } slog.ErrorContext(ctx, message, attrs...) logToGCP(slog.LevelError, message, attrs...) + f := fieldsToAny(fields) + if err != nil { + f["error"] = err.Error() + } + appendToCtxBuffer(ctx, "error", message, f) +} + +func fieldsToAny(fields map[string]interface{}) map[string]any { + if fields == nil { + return nil + } + out := make(map[string]any, len(fields)) + for k, v := range fields { + out[k] = v + } + return out } // LogWebhookOperation logs webhook-related operations. diff --git a/services/operator_ui.go b/services/operator_ui.go index c90b304..6816eff 100644 --- a/services/operator_ui.go +++ b/services/operator_ui.go @@ -13,6 +13,7 @@ import ( "regexp" "strconv" "strings" + "sync" "time" "github.com/grove-platform/github-copier/configs" @@ -24,31 +25,53 @@ var operatorIndexHTML []byte var operatorVersionTagRe = regexp.MustCompile(`^v[0-9]+\.[0-9]+\.[0-9]+$`) // RegisterOperatorRoutes mounts the operator HTML UI and JSON APIs under /operator/. -// cfg.OperatorUIToken must be non-empty before calling (caller checks). +// Call only when cfg.OperatorUIEnabled is true (local/dev). Secured APIs require +// OPERATOR_UI_TOKEN on the server plus Authorization: Bearer from the client. func RegisterOperatorRoutes(mux *http.ServeMux, cfg *configs.Config, container *ServiceContainer, version string) { o := &operatorUI{ cfg: cfg, container: container, version: version, } + // Register specific paths before the /operator/ subtree so /operator/api/* is not handled by serveIndex. + mux.HandleFunc("/operator/api/status", o.handleOperatorStatus) mux.HandleFunc("/operator/api/audit/events", o.wrapAPI(o.handleAuditEvents)) + mux.HandleFunc("/operator/api/audit/overview", o.wrapAPI(o.handleAuditOverview)) + mux.HandleFunc("/operator/api/observability/deliveries", o.wrapAPI(o.handleObservabilityDeliveries)) + mux.HandleFunc("/operator/api/observability/webhook-traces", o.wrapAPI(o.handleObservabilityWebhookTraces)) mux.HandleFunc("/operator/api/deployment", o.wrapAPI(o.handleDeployment)) mux.HandleFunc("/operator/api/release", o.wrapAPI(o.handleRelease)) + mux.HandleFunc("/operator/api/replay", o.wrapAPI(o.handleReplay)) + mux.HandleFunc("/operator/api/workflows", o.wrapAPI(o.handleWorkflows)) + mux.HandleFunc("/operator/api/logs", o.wrapAPI(o.handleDeliveryLogs)) mux.HandleFunc("/operator/", o.serveIndex) mux.HandleFunc("/operator", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "/operator/", http.StatusFound) }) + if cfg.OperatorUIToken == "" { + LogInfo("Operator UI: /operator/ (set OPERATOR_UI_TOKEN to enable audit, deployment JSON, and release APIs)") + } else { + LogInfo("Operator UI: /operator/ with API authentication enabled") + } } type operatorUI struct { - cfg *configs.Config - container *ServiceContainer - version string + cfg *configs.Config + container *ServiceContainer + version string + replayInFlight sync.Map // key: "owner/repo#pr" → prevents concurrent replays } func (o *operatorUI) wrapAPI(next http.HandlerFunc) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") + if o.cfg.OperatorUIToken == "" { + w.WriteHeader(http.StatusServiceUnavailable) + _ = json.NewEncoder(w).Encode(map[string]string{ + "error": "operator APIs disabled on server: set OPERATOR_UI_TOKEN in the environment and redeploy", + }) + return + } if !operatorAuthOK(o.cfg.OperatorUIToken, bearerToken(r)) { w.WriteHeader(http.StatusUnauthorized) _ = json.NewEncoder(w).Encode(map[string]string{"error": "unauthorized"}) @@ -58,6 +81,31 @@ func (o *operatorUI) wrapAPI(next http.HandlerFunc) http.HandlerFunc { } } +// handleOperatorStatus reports whether secured operator APIs are configured (no auth). +func (o *operatorUI) handleOperatorStatus(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) + return + } + w.Header().Set("Content-Type", "application/json") + out := map[string]any{ + "operator_apis_enabled": o.cfg.OperatorUIToken != "", + "metrics_enabled": o.cfg.MetricsEnabled, + "audit_enabled": o.cfg.AuditEnabled, + "version": o.version, + } + if o.container != nil && o.container.DeliveryTracker != nil { + out["webhook_dedupe_entries"] = o.container.DeliveryTracker.Len() + out["webhook_recent_observations"] = o.container.DeliveryTracker.HistoryLen() + } + if o.container != nil && o.container.WebhookTraces != nil { + out["webhook_trace_entries"] = o.container.WebhookTraces.Len() + } + _ = json.NewEncoder(w).Encode(out) +} + func bearerToken(r *http.Request) string { h := r.Header.Get("Authorization") const p = "Bearer " @@ -68,15 +116,12 @@ func bearerToken(r *http.Request) string { } func operatorAuthOK(expected, got string) bool { - if expected == "" { + if expected == "" || got == "" { return false } - e := []byte(expected) - g := []byte(got) - if len(e) != len(g) { - return false - } - return subtle.ConstantTimeCompare(e, g) == 1 + // subtle.ConstantTimeCompare returns 0 for different-length inputs without + // leaking the expected token length through timing. + return subtle.ConstantTimeCompare([]byte(expected), []byte(got)) == 1 } func (o *operatorUI) serveIndex(w http.ResponseWriter, r *http.Request) { @@ -98,14 +143,11 @@ func (o *operatorUI) handleAuditEvents(w http.ResponseWriter, r *http.Request) { _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) return } - limit := 50 - if q := r.URL.Query().Get("limit"); q != "" { - if n, err := strconv.Atoi(q); err == nil && n > 0 { - limit = n - } - } - if limit > 200 { - limit = 200 + q, err := parseAuditListQuery(r) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) + return } ctx, cancel := context.WithTimeout(r.Context(), 10*time.Second) defer cancel() @@ -113,7 +155,7 @@ func (o *operatorUI) handleAuditEvents(w http.ResponseWriter, r *http.Request) { _ = json.NewEncoder(w).Encode(map[string]any{"events": []any{}}) return } - events, err := o.container.AuditLogger.GetRecentEvents(ctx, limit) + events, err := o.container.AuditLogger.QueryAuditEvents(ctx, q) if err != nil { w.WriteHeader(http.StatusBadGateway) _ = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) @@ -122,9 +164,162 @@ func (o *operatorUI) handleAuditEvents(w http.ResponseWriter, r *http.Request) { _ = json.NewEncoder(w).Encode(map[string]any{"events": events}) } +func parseAuditListQuery(r *http.Request) (AuditListQuery, error) { + q := r.URL.Query() + lim := 50 + if v := q.Get("limit"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n > 0 { + lim = n + } + } + if lim > 200 { + lim = 200 + } + aq := AuditListQuery{Limit: lim} + if et := strings.TrimSpace(q.Get("event_type")); et != "" { + switch AuditEventType(et) { + case AuditEventCopy, AuditEventDeprecation, AuditEventError: + aq.EventType = et + default: + return AuditListQuery{}, fmt.Errorf("invalid event_type (use copy, deprecation, or error)") + } + } + switch strings.TrimSpace(strings.ToLower(q.Get("success"))) { + case "true": + t := true + aq.Success = &t + case "false": + f := false + aq.Success = &f + case "": + default: + return AuditListQuery{}, fmt.Errorf("invalid success (use true or false)") + } + if rn := strings.TrimSpace(q.Get("rule_name")); rn != "" { + aq.RuleName = rn + } + if prStr := strings.TrimSpace(q.Get("pr_number")); prStr != "" { + n, err := strconv.Atoi(prStr) + if err != nil || n <= 0 { + return AuditListQuery{}, fmt.Errorf("pr_number must be a positive integer") + } + aq.PRNumber = &n + } + if ps := strings.TrimSpace(q.Get("path")); ps != "" { + aq.PathSearch = ps + } + if since := strings.TrimSpace(q.Get("since")); since != "" { + t, err := time.Parse(time.RFC3339, since) + if err != nil { + return AuditListQuery{}, fmt.Errorf("since must be RFC3339: %w", err) + } + aq.Since = &t + } + return aq, nil +} + +func (o *operatorUI) handleAuditOverview(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) + return + } + days := 14 + if v := r.URL.Query().Get("days"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n > 0 { + days = n + } + } + if days > 366 { + days = 366 + } + ctx, cancel := context.WithTimeout(r.Context(), 20*time.Second) + defer cancel() + if o.container.AuditLogger == nil { + _ = json.NewEncoder(w).Encode(map[string]any{ + "days": days, + "daily_volume": []DailyStats{}, + "stats_by_rule": map[string]RuleStats{}, + "audit_disabled": true, + }) + return + } + daily, err1 := o.container.AuditLogger.GetDailyVolume(ctx, days) + if err1 != nil { + w.WriteHeader(http.StatusBadGateway) + _ = json.NewEncoder(w).Encode(map[string]string{"error": err1.Error()}) + return + } + byRule, err2 := o.container.AuditLogger.GetStatsByRule(ctx) + if err2 != nil { + w.WriteHeader(http.StatusBadGateway) + _ = json.NewEncoder(w).Encode(map[string]string{"error": err2.Error()}) + return + } + _ = json.NewEncoder(w).Encode(map[string]any{ + "days": days, + "daily_volume": daily, + "stats_by_rule": byRule, + }) +} + +func (o *operatorUI) handleObservabilityDeliveries(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) + return + } + max := 100 + if v := r.URL.Query().Get("limit"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n > 0 { + max = n + } + } + if max > deliveryHistoryMax { + max = deliveryHistoryMax + } + if o.container.DeliveryTracker == nil { + _ = json.NewEncoder(w).Encode(map[string]any{"deliveries": []DeliverySnapshot{}}) + return + } + snap := o.container.DeliveryTracker.RecentDeliveries(max) + _ = json.NewEncoder(w).Encode(map[string]any{ + "deliveries": snap, + "dedupe_entries": o.container.DeliveryTracker.Len(), + }) +} + +func (o *operatorUI) handleObservabilityWebhookTraces(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) + return + } + max := 50 + if v := r.URL.Query().Get("limit"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n > 0 { + max = n + } + } + if max > webhookTraceMaxEntries { + max = webhookTraceMaxEntries + } + if o.container == nil || o.container.WebhookTraces == nil { + _ = json.NewEncoder(w).Encode(map[string]any{"traces": []WebhookTraceEntry{}}) + return + } + tr := o.container.WebhookTraces.Recent(max) + _ = json.NewEncoder(w).Encode(map[string]any{ + "traces": tr, + "total": o.container.WebhookTraces.Len(), + }) +} + // OperatorDeploymentInfo is non-secret runtime and platform metadata for the operator UI. type OperatorDeploymentInfo struct { Version string `json:"version"` + UptimeSeconds int64 `json:"uptime_seconds"` + MongoHealthy *bool `json:"mongo_healthy,omitempty"` GoogleCloudRegion string `json:"google_cloud_region,omitempty"` CloudRunService string `json:"cloud_run_service,omitempty"` CloudRunRevision string `json:"cloud_run_revision,omitempty"` @@ -155,6 +350,7 @@ func (o *operatorUI) handleDeployment(w http.ResponseWriter, r *http.Request) { } info := OperatorDeploymentInfo{ Version: o.version, + UptimeSeconds: int64(time.Since(o.container.StartTime).Seconds()), CloudRunService: os.Getenv("K_SERVICE"), CloudRunRevision: os.Getenv("K_REVISION"), CloudRunConfig: os.Getenv("K_CONFIGURATION"), @@ -176,6 +372,12 @@ func (o *operatorUI) handleDeployment(w http.ResponseWriter, r *http.Request) { if region := os.Getenv("GOOGLE_CLOUD_REGION"); region != "" { info.GoogleCloudRegion = region } + if o.cfg.AuditEnabled && o.container.AuditLogger != nil { + ctx, cancel := context.WithTimeout(r.Context(), 3*time.Second) + defer cancel() + healthy := o.container.AuditLogger.Ping(ctx) == nil + info.MongoHealthy = &healthy + } _ = json.NewEncoder(w).Encode(info) } @@ -241,6 +443,213 @@ func (o *operatorUI) handleRelease(w http.ResponseWriter, r *http.Request) { }) } +// ── Per-delivery log viewer ── + +func (o *operatorUI) handleDeliveryLogs(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) + return + } + deliveryID := strings.TrimSpace(r.URL.Query().Get("delivery_id")) + if deliveryID == "" { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "delivery_id is required"}) + return + } + if o.container.DeliveryLogs == nil { + _ = json.NewEncoder(w).Encode(map[string]any{"logs": []LogEntry{}, "delivery_id": deliveryID}) + return + } + logs := o.container.DeliveryLogs.Get(deliveryID) + if logs == nil { + logs = []LogEntry{} + } + _ = json.NewEncoder(w).Encode(map[string]any{"logs": logs, "delivery_id": deliveryID}) +} + +// ── Workflow config browser ── + +func (o *operatorUI) handleWorkflows(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) + return + } + if o.container.ConfigLoader == nil { + w.WriteHeader(http.StatusServiceUnavailable) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "config loader not initialized"}) + return + } + ctx, cancel := context.WithTimeout(r.Context(), 15*time.Second) + defer cancel() + yamlCfg, err := o.container.ConfigLoader.LoadConfig(ctx, o.cfg) + if err != nil { + w.WriteHeader(http.StatusBadGateway) + _ = json.NewEncoder(w).Encode(map[string]any{ + "error": "failed to load config: " + err.Error(), + "workflows": []any{}, + }) + return + } + _ = json.NewEncoder(w).Encode(map[string]any{ + "workflows": yamlCfg.Workflows, + "defaults": yamlCfg.Defaults, + "config_file": o.cfg.EffectiveConfigFile(), + "config_repo": o.cfg.ConfigRepoOwner + "/" + o.cfg.ConfigRepoName, + }) +} + +// ── Webhook replay ── + +type operatorReplayRequest struct { + Repo string `json:"repo"` // "owner/repo" + PRNumber int `json:"pr_number"` + Branch string `json:"branch"` // base branch + CommitSHA string `json:"commit_sha"` // optional — fetched from GitHub if empty +} + +type operatorReplayResponse struct { + OK bool `json:"ok,omitempty"` + Message string `json:"message,omitempty"` + Error string `json:"error,omitempty"` +} + +func (o *operatorUI) handleReplay(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) + return + } + + body, err := io.ReadAll(io.LimitReader(r.Body, 4096)) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(operatorReplayResponse{Error: "read body"}) + return + } + var req operatorReplayRequest + if err := json.Unmarshal(body, &req); err != nil { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(operatorReplayResponse{Error: "invalid json"}) + return + } + + // Validate inputs + parts := strings.SplitN(strings.TrimSpace(req.Repo), "/", 2) + if len(parts) != 2 || parts[0] == "" || parts[1] == "" { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(operatorReplayResponse{Error: "repo must be owner/repo"}) + return + } + owner, repoName := parts[0], parts[1] + + if req.PRNumber <= 0 { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(operatorReplayResponse{Error: "pr_number must be > 0"}) + return + } + if strings.TrimSpace(req.Branch) == "" { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(operatorReplayResponse{Error: "branch is required"}) + return + } + + // In-flight dedup: prevent concurrent replays for the same PR + replayKey := fmt.Sprintf("%s#%d", req.Repo, req.PRNumber) + if _, loaded := o.replayInFlight.LoadOrStore(replayKey, true); loaded { + w.WriteHeader(http.StatusConflict) + _ = json.NewEncoder(w).Encode(operatorReplayResponse{Error: "replay already in progress for this PR"}) + return + } + + // Fetch commit SHA from GitHub if not provided + commitSHA := strings.TrimSpace(req.CommitSHA) + if commitSHA == "" { + ctx, cancel := context.WithTimeout(r.Context(), 15*time.Second) + defer cancel() + client, err := GetRestClientForOrg(ctx, o.cfg, owner) + if err != nil { + o.replayInFlight.Delete(replayKey) + w.WriteHeader(http.StatusBadGateway) + _ = json.NewEncoder(w).Encode(operatorReplayResponse{Error: "github auth: " + err.Error()}) + return + } + pr, _, err := client.PullRequests.Get(ctx, owner, repoName, req.PRNumber) + if err != nil { + o.replayInFlight.Delete(replayKey) + w.WriteHeader(http.StatusBadGateway) + _ = json.NewEncoder(w).Encode(operatorReplayResponse{Error: "fetch PR: " + err.Error()}) + return + } + if !pr.GetMerged() { + o.replayInFlight.Delete(replayKey) + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(operatorReplayResponse{Error: "PR is not merged — only merged PRs can be replayed"}) + return + } + commitSHA = pr.GetMergeCommitSHA() + if commitSHA == "" { + o.replayInFlight.Delete(replayKey) + w.WriteHeader(http.StatusBadGateway) + _ = json.NewEncoder(w).Encode(operatorReplayResponse{Error: "PR has no merge commit SHA"}) + return + } + } + + // Dispatch replay in background (same path as real webhook processing) + deliveryID := fmt.Sprintf("replay-%d", time.Now().UnixMilli()) + baseBranch := strings.TrimSpace(req.Branch) + + LogInfo("operator replay requested", + "repo", req.Repo, + "pr_number", req.PRNumber, + "branch", baseBranch, + "commit_sha", commitSHA, + "delivery_id", deliveryID, + ) + + AppendWebhookTrace(o.container, WebhookTraceEntry{ + DeliveryID: deliveryID, + EventType: "operator_replay", + Repo: req.Repo, + BaseBranch: baseBranch, + CommitSHA: commitSHA, + PRNumber: req.PRNumber, + Outcome: "replay_started", + Detail: "initiated via operator UI", + }) + + bgCtx := context.Background() + if o.container.DeliveryLogs != nil { + bgCtx = ContextWithLogBuffer(bgCtx, deliveryID, o.container.DeliveryLogs) + } + if o.cfg.WebhookProcessingTimeoutSeconds > 0 { + var cancel context.CancelFunc + bgCtx, cancel = context.WithTimeout(bgCtx, time.Duration(o.cfg.WebhookProcessingTimeoutSeconds)*time.Second) + o.container.wg.Add(1) + go func() { + defer o.container.wg.Done() + defer cancel() + defer o.replayInFlight.Delete(replayKey) + processWebhookWithRetry(bgCtx, req.PRNumber, commitSHA, owner, repoName, baseBranch, deliveryID, o.cfg, o.container) + }() + } else { + o.container.wg.Add(1) + go func() { + defer o.container.wg.Done() + defer o.replayInFlight.Delete(replayKey) + processWebhookWithRetry(bgCtx, req.PRNumber, commitSHA, owner, repoName, baseBranch, deliveryID, o.cfg, o.container) + }() + } + + w.WriteHeader(http.StatusAccepted) + _ = json.NewEncoder(w).Encode(operatorReplayResponse{ + OK: true, + Message: fmt.Sprintf("Replay started for %s PR #%d (delivery %s). Check webhook traces for progress.", req.Repo, req.PRNumber, deliveryID), + }) +} + func firstEnv(keys ...string) string { for _, k := range keys { if v := os.Getenv(k); v != "" { diff --git a/services/service_container.go b/services/service_container.go index 85c1dfb..212c15c 100644 --- a/services/service_container.go +++ b/services/service_container.go @@ -28,6 +28,12 @@ type ServiceContainer struct { // Webhook deduplication DeliveryTracker *DeliveryTracker + // Recent webhook outcomes for operator troubleshooting (in-memory) + WebhookTraces *WebhookTraceBuffer + + // Per-delivery log capture for operator diagnostics (in-memory) + DeliveryLogs *DeliveryLogBuffer + // Server state StartTime time.Time @@ -101,6 +107,8 @@ func NewServiceContainer(config *configs.Config) (*ServiceContainer, error) { MetricsCollector: metricsCollector, SlackNotifier: slackNotifier, DeliveryTracker: NewDeliveryTracker(1 * time.Hour), + WebhookTraces: NewWebhookTraceBuffer(), + DeliveryLogs: NewDeliveryLogBuffer(), StartTime: time.Now(), }, nil } diff --git a/services/web/operator/index.html b/services/web/operator/index.html index 8cb433e..847fb42 100644 --- a/services/web/operator/index.html +++ b/services/web/operator/index.html @@ -1,163 +1,1502 @@ - + + GitHub Copier — Operator -

GitHub Copier — Operator

-

Recent copy audit events, deployment metadata, and optional tag-based release (same trigger as CI on version tags).

-
-

Access

- -
- - - -
-

Stored only in sessionStorage for this browser tab. Send Authorization: Bearer … on API calls.

-
+
+
+ GitHub Copier + ... + + + + +
+
+
+ + + + +
+
+ + + +
-

Recent copy events

-
-
- - +
+ +
+
+ +
- -
- -
- - - - - - - - - - - - - -
TimeStatusRuleSourceTargetPRError
+

Stored in sessionStorage (this tab only).

-
-

Deployment

- - - -
+ +
+ + + + + +
-
-

Release (tag → deploy)

-

Production deploy runs on version tag pushes (vMAJOR.MINOR.PATCH). Changelog updates still require scripts/release.sh locally unless you only create a tag.

- -
- - + +
+
+
+

Live metrics

+ +
+
+
+ + +
+ +
+
+
+ +
+

Deployment

+
+ + + + + +
+
+
+ + +
+
+
+

Recent webhook activity

+ +
+
+
+
+
+
+ + +
+ +
+
TimeOutcomeRepoPRBaseEventActionDeliveryDetail
+
+
+
+ +
+

Webhook deliveries (dedup)

+
+

Recent X-GitHub-Delivery IDs. In-memory; resets on restart.

+
+
+ +
+ +
SeenDelivery IDDuplicate
+
+
+
+ + +
+ + - - -
- + + + - loadAudit(); - diff --git a/services/webhook_handler_new.go b/services/webhook_handler_new.go index 23513e8..871875d 100644 --- a/services/webhook_handler_new.go +++ b/services/webhook_handler_new.go @@ -88,26 +88,41 @@ func HandleWebhookWithContainer(w http.ResponseWriter, r *http.Request, config * if err != nil { LogWebhookOperation(ctx, "read_body", "failed to read webhook body", err) container.MetricsCollector.RecordWebhookFailed() + AppendWebhookTrace(container, WebhookTraceEntry{ + DeliveryID: r.Header.Get("X-GitHub-Delivery"), + EventType: r.Header.Get("X-GitHub-Event"), + Outcome: "read_body_failed", + Detail: err.Error(), + }) http.Error(w, "invalid body", http.StatusBadRequest) return } eventType := r.Header.Get("X-GitHub-Event") + deliveryID := r.Header.Get("X-GitHub-Delivery") if eventType == "" { LogWebhookOperation(ctx, "missing_event", "missing X-GitHub-Event header", nil) container.MetricsCollector.RecordWebhookFailed() + AppendWebhookTrace(container, WebhookTraceEntry{ + DeliveryID: deliveryID, + Outcome: "missing_event_header", + }) http.Error(w, "missing event type", http.StatusBadRequest) return } // Check for duplicate delivery using X-GitHub-Delivery header - deliveryID := r.Header.Get("X-GitHub-Delivery") if deliveryID != "" && container.DeliveryTracker != nil { if !container.DeliveryTracker.TryRecord(deliveryID) { LogInfoCtx(ctx, "duplicate webhook delivery, skipping", map[string]interface{}{ "delivery_id": deliveryID, "event_type": eventType, }) + AppendWebhookTrace(container, WebhookTraceEntry{ + DeliveryID: deliveryID, + EventType: eventType, + Outcome: "duplicate_delivery", + }) w.WriteHeader(http.StatusOK) return } @@ -125,6 +140,11 @@ func HandleWebhookWithContainer(w http.ResponseWriter, r *http.Request, config * if !simpleVerifySignature(sigHeader, payload, []byte(config.WebhookSecret)) { LogWebhookOperation(ctx, "signature_verification", "webhook signature verification failed", nil) container.MetricsCollector.RecordWebhookFailed() + AppendWebhookTrace(container, WebhookTraceEntry{ + DeliveryID: deliveryID, + EventType: eventType, + Outcome: "signature_failed", + }) http.Error(w, "unauthorized", http.StatusUnauthorized) return } @@ -141,6 +161,12 @@ func HandleWebhookWithContainer(w http.ResponseWriter, r *http.Request, config * LogWebhookOperation(ctx, "parse_payload", "failed to parse webhook payload", err, map[string]interface{}{"event_type": eventType}) container.MetricsCollector.RecordWebhookFailed() + AppendWebhookTrace(container, WebhookTraceEntry{ + DeliveryID: deliveryID, + EventType: eventType, + Outcome: "parse_failed", + Detail: err.Error(), + }) http.Error(w, "bad webhook", http.StatusBadRequest) return } @@ -156,6 +182,11 @@ func HandleWebhookWithContainer(w http.ResponseWriter, r *http.Request, config * "event_type": eventType, "size_bytes": len(payload), }) + AppendWebhookTrace(container, WebhookTraceEntry{ + DeliveryID: deliveryID, + EventType: eventType, + Outcome: "ignored_non_pull_request", + }) w.WriteHeader(http.StatusNoContent) return } @@ -173,6 +204,23 @@ func HandleWebhookWithContainer(w http.ResponseWriter, r *http.Request, config * "action": action, "merged": merged, }) + trace := WebhookTraceEntry{ + DeliveryID: deliveryID, + EventType: eventType, + Action: action, + Outcome: "skipped_not_merged_pr", + Detail: fmt.Sprintf("merged=%v", merged), + } + if r := prEvt.GetRepo(); r != nil { + trace.Repo = r.GetFullName() + } + if pr := prEvt.GetPullRequest(); pr != nil { + trace.PRNumber = pr.GetNumber() + if b := pr.GetBase(); b != nil { + trace.BaseBranch = b.GetRef() + } + } + AppendWebhookTrace(container, trace) w.WriteHeader(http.StatusNoContent) return } @@ -185,6 +233,13 @@ func HandleWebhookWithContainer(w http.ResponseWriter, r *http.Request, config * repo := prEvt.GetRepo() if repo == nil { LogWarningCtx(ctx, "webhook missing repository info", nil) + AppendWebhookTrace(container, WebhookTraceEntry{ + DeliveryID: deliveryID, + EventType: eventType, + Action: action, + PRNumber: prNumber, + Outcome: "invalid_payload_missing_repo", + }) w.WriteHeader(http.StatusBadRequest) return } @@ -230,6 +285,10 @@ func HandleWebhookWithContainer(w http.ResponseWriter, r *http.Request, config * // Process asynchronously in background with a new context. // Don't use the request context as it will be cancelled when the request completes. bgCtx := context.Background() + // Attach log buffer for operator diagnostics + if container.DeliveryLogs != nil { + bgCtx = ContextWithLogBuffer(bgCtx, deliveryID, container.DeliveryLogs) + } // Apply a timeout to prevent stuck API calls from running indefinitely (#9). if config.WebhookProcessingTimeoutSeconds > 0 { @@ -252,6 +311,27 @@ func HandleWebhookWithContainer(w http.ResponseWriter, r *http.Request, config * } } +// webhookResult carries completion info from a successful webhook processing run, +// surfaced in the operator UI webhook trace for at-a-glance diagnostics. +type webhookResult struct { + TargetRepos []string + FilesMatched int + FilesUploaded int + FilesFailed int +} + +func (r *webhookResult) traceDetail(attempt int) string { + if r == nil { + return fmt.Sprintf("attempt %d", attempt) + } + targets := strings.Join(r.TargetRepos, ", ") + if targets == "" { + targets = "(none)" + } + return fmt.Sprintf("attempt %d | %d matched, %d uploaded, %d failed | targets: %s", + attempt, r.FilesMatched, r.FilesUploaded, r.FilesFailed, targets) +} + // processWebhookWithRetry wraps handleMergedPRWithContainer with panic recovery // and exponential-backoff retries for transient failures (#7). func processWebhookWithRetry(ctx context.Context, prNumber int, sourceCommitSHA string, repoOwner string, repoName string, baseBranch string, deliveryID string, config *configs.Config, container *ServiceContainer) { @@ -261,8 +341,20 @@ func processWebhookWithRetry(ctx context.Context, prNumber int, sourceCommitSHA var lastErr error for attempt := 1; attempt <= maxAttempts; attempt++ { - lastErr = runWithRecovery(ctx, prNumber, sourceCommitSHA, repoOwner, repoName, baseBranch, config, container) + result, err := runWithRecovery(ctx, prNumber, sourceCommitSHA, repoOwner, repoName, baseBranch, config, container) + lastErr = err if lastErr == nil { + AppendWebhookTrace(container, WebhookTraceEntry{ + DeliveryID: deliveryID, + EventType: "pull_request", + Action: "closed", + Repo: webhookRepo, + BaseBranch: baseBranch, + CommitSHA: sourceCommitSHA, + PRNumber: prNumber, + Outcome: "processed_ok", + Detail: result.traceDetail(attempt), + }) return // success } @@ -322,6 +414,17 @@ func processWebhookWithRetry(ctx context.Context, prNumber int, sourceCommitSHA "error", lastErr, ) container.MetricsCollector.RecordWebhookFailed() + AppendWebhookTrace(container, WebhookTraceEntry{ + DeliveryID: deliveryID, + EventType: "pull_request", + Action: "closed", + Repo: webhookRepo, + BaseBranch: baseBranch, + CommitSHA: sourceCommitSHA, + PRNumber: prNumber, + Outcome: "processing_failed", + Detail: fmt.Sprintf("after %d attempt(s): %v", maxAttempts, lastErr), + }) if notifyErr := container.SlackNotifier.NotifyError(ctx, &ErrorEvent{ Operation: operation, Error: fmt.Errorf("failed after %d attempt(s): %w", maxAttempts, lastErr), @@ -336,9 +439,10 @@ func processWebhookWithRetry(ctx context.Context, prNumber int, sourceCommitSHA // runWithRecovery calls handleMergedPRWithContainer in a panic-safe wrapper, // converting panics into errors. -func runWithRecovery(ctx context.Context, prNumber int, sourceCommitSHA string, repoOwner string, repoName string, baseBranch string, config *configs.Config, container *ServiceContainer) (retErr error) { +func runWithRecovery(ctx context.Context, prNumber int, sourceCommitSHA string, repoOwner string, repoName string, baseBranch string, config *configs.Config, container *ServiceContainer) (retResult *webhookResult, retErr error) { defer func() { if r := recover(); r != nil { + retResult = nil retErr = fmt.Errorf("panic: %v", r) LogCritical("panic in webhook handler", "pr_number", prNumber, "repo_owner", repoOwner, "repo_name", repoName, "recovered", r) } @@ -348,8 +452,9 @@ func runWithRecovery(ctx context.Context, prNumber int, sourceCommitSHA string, // handleMergedPRWithContainer orchestrates processing of a merged PR: // auth → config → match workflows → fetch changed files → process → upload → notify. -// Returns an error if a retryable failure occurred (#6 — per-workflow error tracking). -func handleMergedPRWithContainer(ctx context.Context, prNumber int, sourceCommitSHA string, repoOwner string, repoName string, baseBranch string, config *configs.Config, container *ServiceContainer) error { +// Returns a webhookResult on success (for operator trace enrichment) and an error +// if a retryable failure occurred (#6 — per-workflow error tracking). +func handleMergedPRWithContainer(ctx context.Context, prNumber int, sourceCommitSHA string, repoOwner string, repoName string, baseBranch string, config *configs.Config, container *ServiceContainer) (*webhookResult, error) { startTime := time.Now() webhookRepo := fmt.Sprintf("%s/%s", repoOwner, repoName) @@ -359,20 +464,20 @@ func handleMergedPRWithContainer(ctx context.Context, prNumber int, sourceCommit LogAndReturnError(ctx, "auth", "failed to configure GitHub permissions", err) container.MetricsCollector.RecordWebhookFailed() notifySlackError(ctx, container, "auth", err, prNumber, webhookRepo) - return fmt.Errorf("auth: %w", err) + return nil, fmt.Errorf("auth: %w", err) } } // 2. Load config and find matching workflows yamlConfig, err := loadAndMatchWorkflows(ctx, config, container, webhookRepo, baseBranch, prNumber) if err != nil { - return fmt.Errorf("config: %w", err) + return nil, fmt.Errorf("config: %w", err) } // 3. Fetch changed files from the source PR changedFiles, err := fetchChangedFiles(ctx, config, container, repoOwner, repoName, prNumber, webhookRepo) if err != nil { - return fmt.Errorf("fetch_files: %w", err) + return nil, fmt.Errorf("fetch_files: %w", err) } // 4. Snapshot metrics before processing @@ -391,16 +496,24 @@ func handleMergedPRWithContainer(ctx context.Context, prNumber int, sourceCommit reportCompletion(ctx, container, webhookRepo, prNumber, sourceCommitSHA, startTime, filesMatchedBefore, filesUploadedBefore, filesFailedBefore, targetRepos) + // Build result for operator trace enrichment + result := &webhookResult{ + TargetRepos: targetRepos, + FilesMatched: container.MetricsCollector.GetFilesMatched() - filesMatchedBefore, + FilesUploaded: container.MetricsCollector.GetFilesUploaded() - filesUploadedBefore, + FilesFailed: container.MetricsCollector.GetFilesUploadFailed() - filesFailedBefore, + } + // Return an aggregate error if any workflows failed (enables retry for partial failures) if len(workflowErrors) > 0 { errMsgs := make([]string, 0, len(workflowErrors)) for wfName, wfErr := range workflowErrors { errMsgs = append(errMsgs, fmt.Sprintf("%s: %v", wfName, wfErr)) } - return fmt.Errorf("%d workflow(s) failed: %s", len(workflowErrors), strings.Join(errMsgs, "; ")) + return result, fmt.Errorf("%d workflow(s) failed: %s", len(workflowErrors), strings.Join(errMsgs, "; ")) } - return nil + return result, nil } // loadAndMatchWorkflows loads the YAML config and filters to workflows matching diff --git a/services/webhook_trace_buffer.go b/services/webhook_trace_buffer.go new file mode 100644 index 0000000..8d5b16e --- /dev/null +++ b/services/webhook_trace_buffer.go @@ -0,0 +1,97 @@ +package services + +import ( + "sync" + "time" +) + +const webhookTraceMaxEntries = 120 + +// WebhookTraceEntry is one observed webhook for operator troubleshooting (in-memory only). +type WebhookTraceEntry struct { + At time.Time `json:"at"` + DeliveryID string `json:"delivery_id,omitempty"` + EventType string `json:"event_type,omitempty"` + Action string `json:"action,omitempty"` + Repo string `json:"repo,omitempty"` + BaseBranch string `json:"base_branch,omitempty"` + CommitSHA string `json:"commit_sha,omitempty"` + PRNumber int `json:"pr_number,omitempty"` + Outcome string `json:"outcome"` + Detail string `json:"detail,omitempty"` +} + +// WebhookTraceBuffer stores the last N webhook outcomes for the operator UI. +type WebhookTraceBuffer struct { + mu sync.Mutex + buf []WebhookTraceEntry +} + +// NewWebhookTraceBuffer creates an empty trace buffer. +func NewWebhookTraceBuffer() *WebhookTraceBuffer { + return &WebhookTraceBuffer{buf: make([]WebhookTraceEntry, 0, 32)} +} + +// Append adds an entry (timestamps default to UTC now; detail is truncated). +func (b *WebhookTraceBuffer) Append(e WebhookTraceEntry) { + if b == nil { + return + } + if e.Outcome == "" { + e.Outcome = "unknown" + } + if e.At.IsZero() { + e.At = time.Now().UTC() + } + if len(e.Detail) > 500 { + e.Detail = e.Detail[:500] + "…" + } + b.mu.Lock() + defer b.mu.Unlock() + b.buf = append(b.buf, e) + if len(b.buf) > webhookTraceMaxEntries { + b.buf = b.buf[len(b.buf)-webhookTraceMaxEntries:] + } +} + +// Len returns how many trace entries are buffered. +func (b *WebhookTraceBuffer) Len() int { + if b == nil { + return 0 + } + b.mu.Lock() + defer b.mu.Unlock() + return len(b.buf) +} + +// Recent returns the last up to max entries (oldest first within the slice). +func (b *WebhookTraceBuffer) Recent(max int) []WebhookTraceEntry { + if b == nil { + return nil + } + b.mu.Lock() + defer b.mu.Unlock() + if len(b.buf) == 0 { + return nil + } + if max <= 0 { + max = 50 + } + if max > webhookTraceMaxEntries { + max = webhookTraceMaxEntries + } + if len(b.buf) <= max { + out := make([]WebhookTraceEntry, len(b.buf)) + copy(out, b.buf) + return out + } + return append([]WebhookTraceEntry(nil), b.buf[len(b.buf)-max:]...) +} + +// AppendWebhookTrace records one webhook row for the operator dashboard. +func AppendWebhookTrace(c *ServiceContainer, e WebhookTraceEntry) { + if c == nil || c.WebhookTraces == nil { + return + } + c.WebhookTraces.Append(e) +} diff --git a/services/workflow_processor.go b/services/workflow_processor.go index 15a862b..8700223 100644 --- a/services/workflow_processor.go +++ b/services/workflow_processor.go @@ -459,6 +459,7 @@ func (wp *workflowProcessor) queueUpload( RuleName: workflow.Name, SourceRepo: workflow.Source.Repo, SourcePath: sourcePath, + CommitSHA: sourceCommitSHA, PRNumber: prNumber, }) diff --git a/types/types.go b/types/types.go index 7a026f0..88a3d10 100644 --- a/types/types.go +++ b/types/types.go @@ -127,6 +127,7 @@ type CopierFileMeta struct { RuleName string `json:"rule_name,omitempty"` SourceRepo string `json:"source_repo,omitempty"` SourcePath string `json:"source_path,omitempty"` + CommitSHA string `json:"commit_sha,omitempty"` PRNumber int `json:"pr_number,omitempty"` } From 58975f44b335f2587dfe92e7f93406fca9d55edd Mon Sep 17 00:00:00 2001 From: Cory Bullinger Date: Tue, 21 Apr 2026 09:04:48 -0400 Subject: [PATCH 03/20] feat(operator): optional GitHub PAT auth with role + per-repo access checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a second authentication mode (OPERATOR_AUTH_MODE=github) that validates the bearer token as a GitHub Personal Access Token instead of a shared secret. The existing shared-token mode remains the default for backward compatibility. Backend: - New config: OPERATOR_AUTH_MODE (token|github), OPERATOR_AUTH_REPO - operator_auth.go: GitHub PAT validation with GET /user + per-user permission check on OPERATOR_AUTH_REPO. Maps GitHub permissions to operator/writer/denied roles. 5-minute cache keyed by PAT to avoid hitting GitHub on every request. - operator_ui.go: wrapAPI supports both auth modes, wrapOperatorOnly enforces the operator role on write endpoints (replay, release). - Per-repo permission check on POST /operator/api/replay — the user's PAT must have at least read access to the source repo being replayed (in github mode only; token mode unchanged). Cached per token+repo pair. - New endpoints: GET /operator/api/me returns the authenticated user + role; GET /operator/api/repo-permission?repos=... batch-checks repo read access for frontend button state. - /operator/api/status now includes auth_mode so the frontend can adapt. Frontend: - User chip in top bar shows GitHub avatar + username + role suffix - Dynamic token input label: "GitHub token" in github mode - body.role-writer hides elements with .operator-only class (replay, release) - Replay buttons pre-check per-repo access via fetchRepoPermissions and disable with an explanatory tooltip for inaccessible source repos - Token clear resets user state and permission cache --- configs/environment.go | 8 +- services/operator_auth.go | 267 +++++++++++++++++++++++++++++++ services/operator_ui.go | 190 ++++++++++++++++++++-- services/web/operator/index.html | 85 +++++++++- 4 files changed, 532 insertions(+), 18 deletions(-) create mode 100644 services/operator_auth.go diff --git a/configs/environment.go b/configs/environment.go index 42e246f..ec3a8a2 100644 --- a/configs/environment.go +++ b/configs/environment.go @@ -73,6 +73,8 @@ type Config struct { // Operator web UI — off unless OPERATOR_UI_ENABLED=true (intended for local dev). OperatorUIEnabled bool OperatorUIToken string + OperatorAuthMode string // "token" (default) or "github" + OperatorAuthRepo string // repo to check permissions against when AuthMode=github (e.g. "org/repo") OperatorRepoSlug string // "owner/repo" for GitHub links and optional tag API OperatorReleaseGitHubToken string // PAT with contents:write to create a version tag (optional) OperatorReleaseTargetBranch string // branch SHA used when creating a tag (default main) @@ -125,7 +127,9 @@ const ( WebhookMaxRetries = "WEBHOOK_MAX_RETRIES" WebhookRetryInitialDelay = "WEBHOOK_RETRY_INITIAL_DELAY" //nolint:gosec // env var name, not a credential OperatorUIEnabled = "OPERATOR_UI_ENABLED" - OperatorUIToken = "OPERATOR_UI_TOKEN" // #nosec G101 -- env var name + OperatorUIToken = "OPERATOR_UI_TOKEN" // #nosec G101 -- env var name + OperatorAuthMode = "OPERATOR_AUTH_MODE" // "token" or "github" + OperatorAuthRepo = "OPERATOR_AUTH_REPO" // repo for permission check in github mode OperatorRepoSlug = "OPERATOR_REPO_SLUG" OperatorReleaseGitHubToken = "OPERATOR_RELEASE_GITHUB_TOKEN" // #nosec G101 -- env var name OperatorReleaseTargetBranch = "OPERATOR_RELEASE_TARGET_BRANCH" @@ -249,6 +253,8 @@ func LoadEnvironment(envFile string) (*Config, error) { config.OperatorUIEnabled = getBoolEnvWithDefault(OperatorUIEnabled, false) config.OperatorUIToken = os.Getenv(OperatorUIToken) + config.OperatorAuthMode = getEnvWithDefault(OperatorAuthMode, "token") + config.OperatorAuthRepo = os.Getenv(OperatorAuthRepo) config.OperatorRepoSlug = os.Getenv(OperatorRepoSlug) config.OperatorReleaseGitHubToken = os.Getenv(OperatorReleaseGitHubToken) config.OperatorReleaseTargetBranch = getEnvWithDefault(OperatorReleaseTargetBranch, "main") diff --git a/services/operator_auth.go b/services/operator_auth.go new file mode 100644 index 0000000..08bd1ca --- /dev/null +++ b/services/operator_auth.go @@ -0,0 +1,267 @@ +package services + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "sync" + "time" +) + +// OperatorRole represents the permission level for the operator UI. +type OperatorRole string + +const ( + // RoleOperator has full access: view, replay, release. + RoleOperator OperatorRole = "operator" + // RoleWriter has read-only access: view workflows, audit, recent copies. + RoleWriter OperatorRole = "writer" + // RoleDenied means the user has no access. + RoleDenied OperatorRole = "denied" +) + +// OperatorUser represents an authenticated operator UI user. +type OperatorUser struct { + Login string `json:"login"` + AvatarURL string `json:"avatar_url,omitempty"` + Role OperatorRole `json:"role"` +} + +// ghAuthCache caches GitHub PAT validation results to avoid hitting the API on every request. +// It also caches per-repo permission lookups (one permission level per token+repo pair). +type ghAuthCache struct { + mu sync.RWMutex + entries map[string]*ghAuthEntry + repoPerm map[string]*ghRepoPermEntry // key: token + "\x00" + repo + ttl time.Duration +} + +type ghAuthEntry struct { + user *OperatorUser + err error + expiresAt time.Time +} + +type ghRepoPermEntry struct { + permission string // "admin", "maintain", "write", "triage", "read", or "" for denied + err error + expiresAt time.Time +} + +func newGHAuthCache(ttl time.Duration) *ghAuthCache { + return &ghAuthCache{ + entries: make(map[string]*ghAuthEntry), + repoPerm: make(map[string]*ghRepoPermEntry), + ttl: ttl, + } +} + +func (c *ghAuthCache) get(token string) (*OperatorUser, error, bool) { + c.mu.RLock() + defer c.mu.RUnlock() + e, ok := c.entries[token] + if !ok || time.Now().After(e.expiresAt) { + return nil, nil, false + } + return e.user, e.err, true +} + +func (c *ghAuthCache) set(token string, user *OperatorUser, err error) { + c.mu.Lock() + defer c.mu.Unlock() + c.entries[token] = &ghAuthEntry{ + user: user, + err: err, + expiresAt: time.Now().Add(c.ttl), + } + // Evict expired entries periodically (simple sweep when cache grows) + if len(c.entries) > 100 { + now := time.Now() + for k, v := range c.entries { + if now.After(v.expiresAt) { + delete(c.entries, k) + } + } + } +} + +func (c *ghAuthCache) getRepoPerm(token, repo string) (string, error, bool) { + key := token + "\x00" + repo + c.mu.RLock() + defer c.mu.RUnlock() + e, ok := c.repoPerm[key] + if !ok || time.Now().After(e.expiresAt) { + return "", nil, false + } + return e.permission, e.err, true +} + +func (c *ghAuthCache) setRepoPerm(token, repo, permission string, err error) { + key := token + "\x00" + repo + c.mu.Lock() + defer c.mu.Unlock() + c.repoPerm[key] = &ghRepoPermEntry{ + permission: permission, + err: err, + expiresAt: time.Now().Add(c.ttl), + } + if len(c.repoPerm) > 500 { + now := time.Now() + for k, v := range c.repoPerm { + if now.After(v.expiresAt) { + delete(c.repoPerm, k) + } + } + } +} + +// CanUserReadRepo returns true if the user (identified by PAT) has at least read access to the repo. +// Uses the cache when available. Returns (hasAccess, error). +func (c *ghAuthCache) CanUserReadRepo(ctx context.Context, pat, username, repo string) (bool, error) { + if perm, err, ok := c.getRepoPerm(pat, repo); ok { + if err != nil { + return false, err + } + return permissionGrantsRead(perm), nil + } + perm, err := ghAPIGetRepoPermission(ctx, pat, repo, username) + c.setRepoPerm(pat, repo, perm, err) + if err != nil { + return false, err + } + return permissionGrantsRead(perm), nil +} + +func permissionGrantsRead(perm string) bool { + switch perm { + case "admin", "maintain", "write", "triage", "read": + return true + } + return false +} + +// validateGitHubPAT validates a GitHub PAT and returns the authenticated user with their role. +// It calls the GitHub API to get the user info, then checks their permission on the auth repo. +func validateGitHubPAT(ctx context.Context, pat string, authRepo string) (*OperatorUser, error) { + if pat == "" { + return nil, fmt.Errorf("empty token") + } + + // 1. Get the authenticated user + ghUser, err := ghAPIGetUser(ctx, pat) + if err != nil { + return nil, fmt.Errorf("validate token: %w", err) + } + + user := &OperatorUser{ + Login: ghUser.Login, + AvatarURL: ghUser.AvatarURL, + Role: RoleWriter, // default to read-only + } + + // 2. If no auth repo configured, grant operator access to any valid GitHub user + if authRepo == "" { + user.Role = RoleOperator + return user, nil + } + + // 3. Check the user's permission on the auth repo + perm, err := ghAPIGetRepoPermission(ctx, pat, authRepo, ghUser.Login) + if err != nil { + // If we can't check permissions (repo not found, no access), default to writer + LogWarning("GitHub permission check failed, defaulting to writer role", + "user", ghUser.Login, "repo", authRepo, "error", err) + return user, nil + } + + switch perm { + case "admin", "maintain", "write": + user.Role = RoleOperator + case "read", "triage": + user.Role = RoleWriter + default: + user.Role = RoleDenied + return user, fmt.Errorf("user %s has no access to %s", ghUser.Login, authRepo) + } + + return user, nil +} + +// ghUserResponse is the minimal response from GET /user. +type ghUserResponse struct { + Login string `json:"login"` + AvatarURL string `json:"avatar_url"` +} + +func ghAPIGetUser(ctx context.Context, pat string) (*ghUserResponse, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.github.com/user", nil) + if err != nil { + return nil, err + } + req.Header.Set("Authorization", "Bearer "+pat) + req.Header.Set("Accept", "application/vnd.github+json") + req.Header.Set("X-GitHub-Api-Version", "2022-11-28") + + resp, err := (&http.Client{Timeout: 10 * time.Second}).Do(req) + if err != nil { + return nil, err + } + defer func() { _ = resp.Body.Close() }() + body, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<16)) + + if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden { + return nil, fmt.Errorf("invalid or expired GitHub token (HTTP %d)", resp.StatusCode) + } + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("GitHub API error: HTTP %d: %s", resp.StatusCode, strings.TrimSpace(string(body))) + } + + var user ghUserResponse + if err := json.Unmarshal(body, &user); err != nil { + return nil, fmt.Errorf("parse user response: %w", err) + } + if user.Login == "" { + return nil, fmt.Errorf("empty login in GitHub response") + } + return &user, nil +} + +// ghPermissionResponse is the response from GET /repos/{owner}/{repo}/collaborators/{user}/permission. +type ghPermissionResponse struct { + Permission string `json:"permission"` +} + +func ghAPIGetRepoPermission(ctx context.Context, pat string, repo string, username string) (string, error) { + parts := strings.SplitN(repo, "/", 2) + if len(parts) != 2 { + return "", fmt.Errorf("invalid repo format: %s (expected owner/repo)", repo) + } + url := fmt.Sprintf("https://api.github.com/repos/%s/%s/collaborators/%s/permission", parts[0], parts[1], username) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return "", err + } + req.Header.Set("Authorization", "Bearer "+pat) + req.Header.Set("Accept", "application/vnd.github+json") + req.Header.Set("X-GitHub-Api-Version", "2022-11-28") + + resp, err := (&http.Client{Timeout: 10 * time.Second}).Do(req) + if err != nil { + return "", err + } + defer func() { _ = resp.Body.Close() }() + body, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<16)) + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("permission check: HTTP %d: %s", resp.StatusCode, strings.TrimSpace(string(body))) + } + + var perm ghPermissionResponse + if err := json.Unmarshal(body, &perm); err != nil { + return "", fmt.Errorf("parse permission response: %w", err) + } + return perm.Permission, nil +} diff --git a/services/operator_ui.go b/services/operator_ui.go index 6816eff..9841d1f 100644 --- a/services/operator_ui.go +++ b/services/operator_ui.go @@ -33,6 +33,9 @@ func RegisterOperatorRoutes(mux *http.ServeMux, cfg *configs.Config, container * container: container, version: version, } + if cfg.OperatorAuthMode == "github" { + o.ghCache = newGHAuthCache(5 * time.Minute) + } // Register specific paths before the /operator/ subtree so /operator/api/* is not handled by serveIndex. mux.HandleFunc("/operator/api/status", o.handleOperatorStatus) mux.HandleFunc("/operator/api/audit/events", o.wrapAPI(o.handleAuditEvents)) @@ -40,18 +43,29 @@ func RegisterOperatorRoutes(mux *http.ServeMux, cfg *configs.Config, container * mux.HandleFunc("/operator/api/observability/deliveries", o.wrapAPI(o.handleObservabilityDeliveries)) mux.HandleFunc("/operator/api/observability/webhook-traces", o.wrapAPI(o.handleObservabilityWebhookTraces)) mux.HandleFunc("/operator/api/deployment", o.wrapAPI(o.handleDeployment)) - mux.HandleFunc("/operator/api/release", o.wrapAPI(o.handleRelease)) - mux.HandleFunc("/operator/api/replay", o.wrapAPI(o.handleReplay)) + mux.HandleFunc("/operator/api/release", o.wrapOperatorOnly(o.handleRelease)) + mux.HandleFunc("/operator/api/replay", o.wrapOperatorOnly(o.handleReplay)) mux.HandleFunc("/operator/api/workflows", o.wrapAPI(o.handleWorkflows)) mux.HandleFunc("/operator/api/logs", o.wrapAPI(o.handleDeliveryLogs)) + mux.HandleFunc("/operator/api/me", o.wrapAPI(o.handleMe)) + mux.HandleFunc("/operator/api/repo-permission", o.wrapAPI(o.handleRepoPermission)) mux.HandleFunc("/operator/", o.serveIndex) mux.HandleFunc("/operator", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "/operator/", http.StatusFound) }) - if cfg.OperatorUIToken == "" { - LogInfo("Operator UI: /operator/ (set OPERATOR_UI_TOKEN to enable audit, deployment JSON, and release APIs)") - } else { - LogInfo("Operator UI: /operator/ with API authentication enabled") + switch cfg.OperatorAuthMode { + case "github": + authRepo := cfg.OperatorAuthRepo + if authRepo == "" { + authRepo = "(any valid GitHub user)" + } + LogInfo("Operator UI: /operator/ with GitHub PAT authentication", "auth_repo", authRepo) + default: + if cfg.OperatorUIToken == "" { + LogInfo("Operator UI: /operator/ (set OPERATOR_UI_TOKEN or OPERATOR_AUTH_MODE=github to enable APIs)") + } else { + LogInfo("Operator UI: /operator/ with token authentication enabled") + } } } @@ -59,20 +73,52 @@ type operatorUI struct { cfg *configs.Config container *ServiceContainer version string - replayInFlight sync.Map // key: "owner/repo#pr" → prevents concurrent replays + replayInFlight sync.Map // key: "owner/repo#pr" → prevents concurrent replays + ghCache *ghAuthCache // GitHub PAT validation cache (nil when auth mode is "token") +} + +// operatorUserCtxKey is the context key for the authenticated operator user. +type operatorUserCtxKey struct{} + +// operatorUserFromCtx returns the authenticated user from the request context (nil if not set). +func operatorUserFromCtx(r *http.Request) *OperatorUser { + u, _ := r.Context().Value(operatorUserCtxKey{}).(*OperatorUser) + return u } func (o *operatorUI) wrapAPI(next http.HandlerFunc) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") + token := bearerToken(r) + + if o.cfg.OperatorAuthMode == "github" { + // GitHub PAT mode: validate the token as a GitHub PAT + if token == "" { + w.WriteHeader(http.StatusUnauthorized) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "provide a GitHub Personal Access Token as Bearer token"}) + return + } + user, err := o.authenticateGitHub(r.Context(), token) + if err != nil { + w.WriteHeader(http.StatusUnauthorized) + _ = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) + return + } + // Attach the user to the request context for downstream handlers + ctx := context.WithValue(r.Context(), operatorUserCtxKey{}, user) + next(w, r.WithContext(ctx)) + return + } + + // Default: simple shared-token mode if o.cfg.OperatorUIToken == "" { w.WriteHeader(http.StatusServiceUnavailable) _ = json.NewEncoder(w).Encode(map[string]string{ - "error": "operator APIs disabled on server: set OPERATOR_UI_TOKEN in the environment and redeploy", + "error": "operator APIs disabled on server: set OPERATOR_UI_TOKEN (or OPERATOR_AUTH_MODE=github) and redeploy", }) return } - if !operatorAuthOK(o.cfg.OperatorUIToken, bearerToken(r)) { + if !operatorAuthOK(o.cfg.OperatorUIToken, token) { w.WriteHeader(http.StatusUnauthorized) _ = json.NewEncoder(w).Encode(map[string]string{"error": "unauthorized"}) return @@ -81,6 +127,37 @@ func (o *operatorUI) wrapAPI(next http.HandlerFunc) http.HandlerFunc { } } +// wrapOperatorOnly wraps a handler that requires the "operator" role (replay, release). +// In token mode, all authenticated users are operators. In github mode, checks the role. +func (o *operatorUI) wrapOperatorOnly(next http.HandlerFunc) http.HandlerFunc { + return o.wrapAPI(func(w http.ResponseWriter, r *http.Request) { + user := operatorUserFromCtx(r) + if user != nil && user.Role != RoleOperator { + w.WriteHeader(http.StatusForbidden) + _ = json.NewEncoder(w).Encode(map[string]string{ + "error": fmt.Sprintf("this action requires operator access (you have %s)", string(user.Role)), + }) + return + } + next(w, r) + }) +} + +func (o *operatorUI) authenticateGitHub(ctx context.Context, pat string) (*OperatorUser, error) { + if o.ghCache != nil { + if user, err, ok := o.ghCache.get(pat); ok { + return user, err + } + } + authCtx, cancel := context.WithTimeout(ctx, 15*time.Second) + defer cancel() + user, err := validateGitHubPAT(authCtx, pat, o.cfg.OperatorAuthRepo) + if o.ghCache != nil { + o.ghCache.set(pat, user, err) + } + return user, err +} + // handleOperatorStatus reports whether secured operator APIs are configured (no auth). func (o *operatorUI) handleOperatorStatus(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodGet { @@ -90,8 +167,10 @@ func (o *operatorUI) handleOperatorStatus(w http.ResponseWriter, r *http.Request return } w.Header().Set("Content-Type", "application/json") + apisEnabled := o.cfg.OperatorUIToken != "" || o.cfg.OperatorAuthMode == "github" out := map[string]any{ - "operator_apis_enabled": o.cfg.OperatorUIToken != "", + "operator_apis_enabled": apisEnabled, + "auth_mode": o.cfg.OperatorAuthMode, "metrics_enabled": o.cfg.MetricsEnabled, "audit_enabled": o.cfg.AuditEnabled, "version": o.version, @@ -106,6 +185,76 @@ func (o *operatorUI) handleOperatorStatus(w http.ResponseWriter, r *http.Request _ = json.NewEncoder(w).Encode(out) } +// handleMe returns the authenticated user info (GitHub mode) or a generic response (token mode). +func (o *operatorUI) handleMe(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) + return + } + user := operatorUserFromCtx(r) + if user != nil { + _ = json.NewEncoder(w).Encode(user) + return + } + // Token mode — no user identity + _ = json.NewEncoder(w).Encode(map[string]any{ + "login": "operator", + "role": "operator", + }) +} + +// handleRepoPermission reports whether the authenticated user has read access to a given repo. +// Used by the frontend to pre-check replay eligibility. In token mode, always returns true. +// Query params: repos=owner/repo1,owner/repo2 (comma-separated). +func (o *operatorUI) handleRepoPermission(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) + return + } + reposParam := strings.TrimSpace(r.URL.Query().Get("repos")) + if reposParam == "" { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "repos query param required"}) + return + } + repos := strings.Split(reposParam, ",") + result := make(map[string]bool, len(repos)) + + // Token mode: no per-repo restrictions — grant all + if o.cfg.OperatorAuthMode != "github" || o.ghCache == nil { + for _, repo := range repos { + repo = strings.TrimSpace(repo) + if repo != "" { + result[repo] = true + } + } + _ = json.NewEncoder(w).Encode(map[string]any{"permissions": result}) + return + } + + user := operatorUserFromCtx(r) + userPAT := bearerToken(r) + if user == nil || userPAT == "" { + w.WriteHeader(http.StatusUnauthorized) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "unauthenticated"}) + return + } + + ctx, cancel := context.WithTimeout(r.Context(), 15*time.Second) + defer cancel() + for _, repo := range repos { + repo = strings.TrimSpace(repo) + if repo == "" { + continue + } + canRead, _ := o.ghCache.CanUserReadRepo(ctx, userPAT, user.Login, repo) + result[repo] = canRead + } + _ = json.NewEncoder(w).Encode(map[string]any{"permissions": result}) +} + func bearerToken(r *http.Request) string { h := r.Header.Get("Authorization") const p = "Bearer " @@ -555,6 +704,27 @@ func (o *operatorUI) handleReplay(w http.ResponseWriter, r *http.Request) { return } + // Source-repo permission check (GitHub auth mode only): the user's PAT must + // have at least read access to the source repo being replayed. + if o.cfg.OperatorAuthMode == "github" && o.ghCache != nil { + user := operatorUserFromCtx(r) + userPAT := bearerToken(r) + if user != nil && userPAT != "" { + permCtx, cancel := context.WithTimeout(r.Context(), 10*time.Second) + canRead, permErr := o.ghCache.CanUserReadRepo(permCtx, userPAT, user.Login, req.Repo) + cancel() + if !canRead { + w.WriteHeader(http.StatusForbidden) + msg := fmt.Sprintf("you do not have access to source repo %s", req.Repo) + if permErr != nil { + msg = fmt.Sprintf("%s: %s", msg, permErr.Error()) + } + _ = json.NewEncoder(w).Encode(operatorReplayResponse{Error: msg}) + return + } + } + } + // In-flight dedup: prevent concurrent replays for the same PR replayKey := fmt.Sprintf("%s#%d", req.Repo, req.PRNumber) if _, loaded := o.replayInFlight.LoadOrStore(replayKey, true); loaded { diff --git a/services/web/operator/index.html b/services/web/operator/index.html index 847fb42..b2c70d5 100644 --- a/services/web/operator/index.html +++ b/services/web/operator/index.html @@ -243,6 +243,11 @@ .help-content ol,.help-content ul{padding-left:1.4rem;margin:0.3rem 0;} .help-content li{margin:0.2rem 0;} + /* User avatar */ + .tb-avatar{width:16px;height:16px;border-radius:50%;vertical-align:middle;} + /* Writer role: hide operator-only actions */ + body.role-writer .operator-only{display:none !important;} + /* Mode toggle */ .mode-toggle{display:inline-flex;border:1px solid var(--border);border-radius:6px;overflow:hidden;font-size:0.78rem;} .mode-toggle button{padding:0.18rem 0.55rem;border:none;background:var(--surface);color:var(--text-3);cursor:pointer;font-size:0.78rem;font-weight:500;transition:all 0.15s;} @@ -279,6 +284,7 @@ +
@@ -557,7 +563,7 @@

Keyboard shortcuts

Pushes a vMAJOR.MINOR.PATCH tag. Changelog requires scripts/release.sh.

-
+
@@ -750,8 +756,35 @@

Keyboard shortcuts

/* ── Token ── */ $('token').value=sessionStorage.getItem(TKEY)||''; -$('saveToken').onclick=function(){var v=$('token').value.trim();if(!v)return;sessionStorage.setItem(TKEY,v);var btn=$('saveToken');btn.textContent='Saved!';btn.style.background='var(--green)';btn.style.borderColor='var(--green)';setTimeout(function(){btn.textContent='Save in session';btn.style.background='';btn.style.borderColor='';},1200);loadAllSecured();}; -$('clearToken').onclick=function(){sessionStorage.removeItem(TKEY);$('token').value='';}; +$('saveToken').onclick=function(){var v=$('token').value.trim();if(!v)return;sessionStorage.setItem(TKEY,v);var btn=$('saveToken');btn.textContent='Saved!';btn.style.background='var(--green)';btn.style.borderColor='var(--green)';setTimeout(function(){btn.textContent='Save in session';btn.style.background='';btn.style.borderColor='';},1200);fetchMe();loadAllSecured();}; +$('clearToken').onclick=function(){ + sessionStorage.removeItem(TKEY);$('token').value=''; + $('tbUser').hidden=true; + document.body.classList.remove('role-writer'); + _repoPerm={};_currentUser=null; +}; + +/* ── Authenticated user ── */ +var _currentUser=null; +function fetchMe(){ + if(!hasToken())return; + fetch(appURL('/operator/api/me'),{headers:authHeaders()}).then(function(r){ + if(!r.ok)return null; + return r.json(); + }).then(function(user){ + if(!user)return; + _currentUser=user; + var el=$('tbUser'); + el.hidden=false; + var inner=''; + if(user.avatar_url)inner+=' '; + inner+=escapeHtml(user.login||'operator'); + if(user.role&&user.role!=='operator')inner+=' ('+escapeHtml(user.role)+')'; + el.innerHTML=inner; + // Apply role-based visibility + document.body.classList.toggle('role-writer',user.role==='writer'); + }).catch(function(){}); +} /* ── Status bar ── */ function updateTBVersion(v){$('tbVersion').textContent=v||'...';} @@ -764,7 +797,9 @@

Keyboard shortcuts

function startHeartbeat(){if(!COP_ORIGIN)return;setInterval(function(){fetch(appURL('/operator/api/status')).then(function(r){if(!_connected){_connected=true;$('tbConn').hidden=true;toast('Reconnected to server','info');}}).catch(function(){if(_connected){_connected=false;$('tbConn').hidden=false;toast('Connection lost to server','error');}});},60000);} /* Server status */ -function checkServerStatus(){if(!COP_ORIGIN){$('serverStatusBanner').hidden=false;$('serverStatusBanner').textContent='Open this page at http://127.0.0.1:/operator/.';return;}fetch(appURL('/operator/api/status')).then(function(r){if(r.status===404){$('serverStatusBanner').hidden=false;$('serverStatusBanner').textContent='Operator routes not enabled.';return null;}return r.json();}).then(function(d){if(!d)return;updateTBVersion(d.version);if(!d.operator_apis_enabled){$('serverStatusBanner').hidden=false;$('serverStatusBanner').textContent='No OPERATOR_UI_TOKEN: secured APIs return 503.';}}).catch(function(){});} +function checkServerStatus(){if(!COP_ORIGIN){$('serverStatusBanner').hidden=false;$('serverStatusBanner').textContent='Open this page at http://127.0.0.1:/operator/.';return;}fetch(appURL('/operator/api/status')).then(function(r){if(r.status===404){$('serverStatusBanner').hidden=false;$('serverStatusBanner').textContent='Operator routes not enabled.';return null;}return r.json();}).then(function(d){if(!d)return;updateTBVersion(d.version); + if(d.auth_mode==='github'){$('token').placeholder='GitHub Personal Access Token';$('token').parentNode.parentNode.querySelector('label').innerHTML='GitHub token (PAT with repo read access)';} + if(!d.operator_apis_enabled){$('serverStatusBanner').hidden=false;$('serverStatusBanner').textContent='No authentication configured: set OPERATOR_UI_TOKEN or OPERATOR_AUTH_MODE=github.';}}).catch(function(){});} /* ── Metrics with groups, deltas, sparklines, health accents ── */ var _prevMetrics={}, _metricHistory={}; @@ -877,7 +912,14 @@

Keyboard shortcuts

/* ── Webhook traces with client-side filter/search (#4, #9) ── */ var _traceData=[]; +// Map of repo -> bool (true = user has read access). Populated after loading traces. +// In token mode, every repo is true. In github mode, only repos the user has access to. +var _repoPerm={}; function traceOutcomeClass(o){if(!o)return'';if(o==='processed_ok')return'ok';if(o==='duplicate_delivery'||o==='skipped_not_merged_pr'||o==='ignored_non_pull_request')return'warn';return'fail';} +function canReplayRepo(repo){ + // If we haven't checked yet (undefined), assume yes. If checked and false, disable. + return _repoPerm[repo]!==false; +} function renderTraces(){ var outcomeF=$('traceOutcome').value,searchF=$('traceSearch').value.toLowerCase().trim(); var filtered=_traceData; @@ -888,7 +930,12 @@

Keyboard shortcuts

var tr=document.createElement('tr'),oc=row.outcome||''; if(row.event_type==='operator_replay')tr.className='trace-replay'; var replayBtn=''; - if(row.repo&&row.pr_number&&row.base_branch){replayBtn='';} + if(row.repo&&row.pr_number&&row.base_branch){ + var allowed=canReplayRepo(row.repo); + var disAttrs=allowed?'':' disabled title="You do not have GitHub access to '+escapeHtml(row.repo)+'"'; + var onClick=allowed?'event.stopPropagation();confirmReplay('+escapeHtml(JSON.stringify(row.repo))+','+row.pr_number+','+escapeHtml(JSON.stringify(row.base_branch))+','+escapeHtml(JSON.stringify(row.commit_sha||''))+')':'event.stopPropagation()'; + replayBtn=''; + } if(row.delivery_id){replayBtn+=' ';} var detailHtml=parseDetailBadges(row.detail||''); tr.innerHTML=''+fmtTime(row.at)+''+escapeHtml(oc)+''+ghLink(row.repo)+''+ghPR(row.repo,row.pr_number)+''+escapeHtml(row.base_branch||'')+''+escapeHtml(row.event_type||'')+''+escapeHtml(row.action||'')+''+escapeHtml(row.delivery_id||'')+''+detailHtml+''+replayBtn+''; @@ -897,6 +944,27 @@

Keyboard shortcuts

setBadge('tracesCount',filtered.length+(_traceData.length>filtered.length?' / '+_traceData.length:'')); emptyState('traceBody',10,'\u{1F4E1}','No webhook activity yet'); } + +// Fetch repo read-permission for each unique source repo in _traceData. +// Only relevant in github auth mode; in token mode the backend returns true for all. +async function fetchRepoPermissions(){ + if(!_currentUser||!hasToken())return; + var uniqueRepos={}; + _traceData.forEach(function(r){if(r.repo)uniqueRepos[r.repo]=true;}); + // Also include audit data source repos so replay from the drawer works + _auditData.forEach(function(e){if(e.source_repo)uniqueRepos[e.source_repo]=true;}); + // Skip repos we've already checked + var toCheck=Object.keys(uniqueRepos).filter(function(r){return _repoPerm[r]===undefined;}); + if(toCheck.length===0)return; + try{ + var res=await fetch(appURL('/operator/api/repo-permission?repos='+encodeURIComponent(toCheck.join(','))),{headers:authHeaders()}); + if(!res.ok)return; + var body=await res.json(); + var perms=body.permissions||{}; + Object.keys(perms).forEach(function(r){_repoPerm[r]=!!perms[r];}); + renderTraces(); // Re-render to update button states + }catch(e){} +} async function loadWebhookTraces(){ showErr('traceErr',''); var lim=parseInt($('traceLimit').value,10)||50; @@ -907,6 +975,7 @@

Keyboard shortcuts

renderTraces();setRefreshed('tracesRefreshed'); if(_traceData.length>0&&_traceData[_traceData.length-1].at)updateTBLastWH(_traceData[_traceData.length-1].at); updateTabBadges(); + fetchRepoPermissions(); } $('traceOutcome').addEventListener('change',renderTraces); $('traceSearch').addEventListener('input',renderTraces); @@ -972,7 +1041,9 @@

Keyboard shortcuts

rows.forEach(function(r){var v=r[1];if(v===undefined||v===null||v==='')return;html+=''+escapeHtml(r[0])+''+escapeHtml(String(v))+'';}); html+=''; if(ev.source_repo&&ev.pr_number){ - html+='
'; + var rpAllowed=canReplayRepo(ev.source_repo); + var rpDis=rpAllowed?'':' disabled title="You do not have GitHub access to '+escapeHtml(ev.source_repo)+'"'; + html+='
'; } $('drawerBody').innerHTML=html;$('drawer').hidden=false; } @@ -1485,7 +1556,7 @@

Keyboard shortcuts

initDarkMode();initMode();syncTimeBtn();initSections();initTabs();initFromURL();checkServerStatus();startHeartbeat(); if(COP_ORIGIN){ loadAllProbes();loadMetricsCards();restoreAutoRefresh(); - if(hasToken()){loadAudit();loadWebhookTraces();loadOverview();$('loadDeploy').click();} + if(hasToken()){fetchMe();loadAudit();loadWebhookTraces();loadOverview();$('loadDeploy').click();} } From ef976736b9922a5dc927e14624695a64dce65a45 Mon Sep 17 00:00:00 2001 From: Cory Bullinger Date: Tue, 21 Apr 2026 09:29:38 -0400 Subject: [PATCH 04/20] refactor(operator): remove token auth mode, require GitHub PAT + auth repo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplifies the operator UI authentication to a single path: GitHub PAT validation with role assignment from OPERATOR_AUTH_REPO permissions. Prior: two auth modes (shared secret via OPERATOR_UI_TOKEN, or GitHub PAT via OPERATOR_AUTH_MODE=github). The shared-secret mode had no user identity, no per-repo restrictions, and a single token that could be leaked. Now: OPERATOR_UI_ENABLED=true always uses GitHub PAT auth. Each user authenticates with their personal token, and OPERATOR_AUTH_REPO (required) determines their role (operator for write/admin, writer for read/triage). Config: - Remove OPERATOR_UI_TOKEN env var and Config.OperatorUIToken field - Remove OPERATOR_AUTH_MODE env var and Config.OperatorAuthMode field - validateOperatorAuth now requires OPERATOR_AUTH_REPO in owner/repo format when the UI is enabled — startup fails with a clear error otherwise Backend (operator_ui.go): - Remove operatorAuthOK() and crypto/subtle import (no shared secret to compare) - wrapAPI: always validate bearer token as GitHub PAT, attach user to context - wrapOperatorOnly: always enforce operator role - handleOperatorStatus: return auth_repo instead of auth_mode - handleMe: user always present in context; 500 if somehow missing - handleRepoPermission: always checks real permissions via ghCache - Replay handler: unconditionally enforces source-repo permission check - ghCache always initialized (no longer gated on mode) Frontend (index.html): - Token input label always "GitHub Personal Access Token" with auth repo shown - Remove conditional label/placeholder logic based on auth_mode - Status check reads auth_repo and displays it in the token input label Startup: - app.go root page shows "authenticate with a GitHub PAT; role from " - operator_ui.go startup log includes auth_repo --- app.go | 2 +- configs/environment.go | 37 +++++--- services/operator_auth.go | 8 +- services/operator_ui.go | 149 ++++++++++--------------------- services/web/operator/index.html | 27 ++++-- 5 files changed, 101 insertions(+), 122 deletions(-) diff --git a/app.go b/app.go index df300b1..a7eed2c 100644 --- a/app.go +++ b/app.go @@ -188,7 +188,7 @@ func startWebServer(config *configs.Config, container *services.ServiceContainer _, _ = fmt.Fprintf(w, "Metrics: /metrics\n") } if config.OperatorUIEnabled { - _, _ = fmt.Fprintf(w, "Operator UI: /operator/ (set OPERATOR_UI_TOKEN for secured APIs)\n") + _, _ = fmt.Fprintf(w, "Operator UI: /operator/ (authenticate with a GitHub PAT; role from %s)\n", config.OperatorAuthRepo) } }) diff --git a/configs/environment.go b/configs/environment.go index ec3a8a2..d272583 100644 --- a/configs/environment.go +++ b/configs/environment.go @@ -70,12 +70,13 @@ type Config struct { WebhookMaxRetries int // max retry attempts for failed webhook processing WebhookRetryInitialDelay int // initial delay between retries in seconds (doubles each attempt) - // Operator web UI — off unless OPERATOR_UI_ENABLED=true (intended for local dev). + // Operator web UI — off unless OPERATOR_UI_ENABLED=true. Works with any HTTP + // origin (local dev, Cloud Run, etc.). Access is gated by GitHub PATs: + // each user authenticates with their personal token, and the role + // (operator or writer) is determined by their permission on OPERATOR_AUTH_REPO. OperatorUIEnabled bool - OperatorUIToken string - OperatorAuthMode string // "token" (default) or "github" - OperatorAuthRepo string // repo to check permissions against when AuthMode=github (e.g. "org/repo") - OperatorRepoSlug string // "owner/repo" for GitHub links and optional tag API + OperatorAuthRepo string // "owner/repo" — user permissions here determine role (required when UI is enabled) + OperatorRepoSlug string // "owner/repo" for GitHub links in audit/trace rows (optional) OperatorReleaseGitHubToken string // PAT with contents:write to create a version tag (optional) OperatorReleaseTargetBranch string // branch SHA used when creating a tag (default main) } @@ -127,9 +128,7 @@ const ( WebhookMaxRetries = "WEBHOOK_MAX_RETRIES" WebhookRetryInitialDelay = "WEBHOOK_RETRY_INITIAL_DELAY" //nolint:gosec // env var name, not a credential OperatorUIEnabled = "OPERATOR_UI_ENABLED" - OperatorUIToken = "OPERATOR_UI_TOKEN" // #nosec G101 -- env var name - OperatorAuthMode = "OPERATOR_AUTH_MODE" // "token" or "github" - OperatorAuthRepo = "OPERATOR_AUTH_REPO" // repo for permission check in github mode + OperatorAuthRepo = "OPERATOR_AUTH_REPO" // repo for GitHub PAT permission check OperatorRepoSlug = "OPERATOR_REPO_SLUG" OperatorReleaseGitHubToken = "OPERATOR_RELEASE_GITHUB_TOKEN" // #nosec G101 -- env var name OperatorReleaseTargetBranch = "OPERATOR_RELEASE_TARGET_BRANCH" @@ -252,8 +251,6 @@ func LoadEnvironment(envFile string) (*Config, error) { config.WebhookRetryInitialDelay = getIntEnvWithDefault(WebhookRetryInitialDelay, config.WebhookRetryInitialDelay) config.OperatorUIEnabled = getBoolEnvWithDefault(OperatorUIEnabled, false) - config.OperatorUIToken = os.Getenv(OperatorUIToken) - config.OperatorAuthMode = getEnvWithDefault(OperatorAuthMode, "token") config.OperatorAuthRepo = os.Getenv(OperatorAuthRepo) config.OperatorRepoSlug = os.Getenv(OperatorRepoSlug) config.OperatorReleaseGitHubToken = os.Getenv(OperatorReleaseGitHubToken) @@ -351,6 +348,26 @@ func validateConfig(config *Config) error { return err } + if err := validateOperatorAuth(config); err != nil { + return err + } + + return nil +} + +// validateOperatorAuth enforces that OPERATOR_AUTH_REPO is set when the UI is +// enabled. Without it, any valid GitHub user could authenticate with full +// operator access since there would be no per-repo permission gate. +func validateOperatorAuth(config *Config) error { + if !config.OperatorUIEnabled { + return nil + } + if strings.TrimSpace(config.OperatorAuthRepo) == "" { + return fmt.Errorf("OPERATOR_UI_ENABLED=true requires OPERATOR_AUTH_REPO (owner/repo) to gate access — each user authenticates with their GitHub PAT and their permission on that repo determines their role") + } + if !strings.Contains(config.OperatorAuthRepo, "/") { + return fmt.Errorf("OPERATOR_AUTH_REPO must be in owner/repo format (got %q)", config.OperatorAuthRepo) + } return nil } diff --git a/services/operator_auth.go b/services/operator_auth.go index 08bd1ca..947a369 100644 --- a/services/operator_auth.go +++ b/services/operator_auth.go @@ -162,13 +162,13 @@ func validateGitHubPAT(ctx context.Context, pat string, authRepo string) (*Opera Role: RoleWriter, // default to read-only } - // 2. If no auth repo configured, grant operator access to any valid GitHub user + // authRepo is required in github mode (enforced at config load via + // validateOperatorAuth). This guard is defensive only. if authRepo == "" { - user.Role = RoleOperator - return user, nil + return nil, fmt.Errorf("OPERATOR_AUTH_REPO is not configured") } - // 3. Check the user's permission on the auth repo + // 2. Check the user's permission on the auth repo perm, err := ghAPIGetRepoPermission(ctx, pat, authRepo, ghUser.Login) if err != nil { // If we can't check permissions (repo not found, no access), default to writer diff --git a/services/operator_ui.go b/services/operator_ui.go index 9841d1f..13b4de3 100644 --- a/services/operator_ui.go +++ b/services/operator_ui.go @@ -3,7 +3,6 @@ package services import ( "bytes" "context" - "crypto/subtle" _ "embed" "encoding/json" "fmt" @@ -25,16 +24,16 @@ var operatorIndexHTML []byte var operatorVersionTagRe = regexp.MustCompile(`^v[0-9]+\.[0-9]+\.[0-9]+$`) // RegisterOperatorRoutes mounts the operator HTML UI and JSON APIs under /operator/. -// Call only when cfg.OperatorUIEnabled is true (local/dev). Secured APIs require -// OPERATOR_UI_TOKEN on the server plus Authorization: Bearer from the client. +// Call only when cfg.OperatorUIEnabled is true. Works with any HTTP origin (local +// dev, Cloud Run, Kubernetes, etc.). Every secured API requires an Authorization: +// Bearer header. The user's permission on cfg.OperatorAuthRepo +// determines their role (operator or writer). func RegisterOperatorRoutes(mux *http.ServeMux, cfg *configs.Config, container *ServiceContainer, version string) { o := &operatorUI{ cfg: cfg, container: container, version: version, - } - if cfg.OperatorAuthMode == "github" { - o.ghCache = newGHAuthCache(5 * time.Minute) + ghCache: newGHAuthCache(5 * time.Minute), } // Register specific paths before the /operator/ subtree so /operator/api/* is not handled by serveIndex. mux.HandleFunc("/operator/api/status", o.handleOperatorStatus) @@ -53,20 +52,7 @@ func RegisterOperatorRoutes(mux *http.ServeMux, cfg *configs.Config, container * mux.HandleFunc("/operator", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "/operator/", http.StatusFound) }) - switch cfg.OperatorAuthMode { - case "github": - authRepo := cfg.OperatorAuthRepo - if authRepo == "" { - authRepo = "(any valid GitHub user)" - } - LogInfo("Operator UI: /operator/ with GitHub PAT authentication", "auth_repo", authRepo) - default: - if cfg.OperatorUIToken == "" { - LogInfo("Operator UI: /operator/ (set OPERATOR_UI_TOKEN or OPERATOR_AUTH_MODE=github to enable APIs)") - } else { - LogInfo("Operator UI: /operator/ with token authentication enabled") - } - } + LogInfo("Operator UI: /operator/ with GitHub PAT authentication", "auth_repo", cfg.OperatorAuthRepo) } type operatorUI struct { @@ -74,7 +60,7 @@ type operatorUI struct { container *ServiceContainer version string replayInFlight sync.Map // key: "owner/repo#pr" → prevents concurrent replays - ghCache *ghAuthCache // GitHub PAT validation cache (nil when auth mode is "token") + ghCache *ghAuthCache // GitHub PAT validation + per-repo permission cache } // operatorUserCtxKey is the context key for the authenticated operator user. @@ -86,56 +72,39 @@ func operatorUserFromCtx(r *http.Request) *OperatorUser { return u } +// wrapAPI validates the incoming request's GitHub PAT and attaches the user to the context. func (o *operatorUI) wrapAPI(next http.HandlerFunc) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") token := bearerToken(r) - - if o.cfg.OperatorAuthMode == "github" { - // GitHub PAT mode: validate the token as a GitHub PAT - if token == "" { - w.WriteHeader(http.StatusUnauthorized) - _ = json.NewEncoder(w).Encode(map[string]string{"error": "provide a GitHub Personal Access Token as Bearer token"}) - return - } - user, err := o.authenticateGitHub(r.Context(), token) - if err != nil { - w.WriteHeader(http.StatusUnauthorized) - _ = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) - return - } - // Attach the user to the request context for downstream handlers - ctx := context.WithValue(r.Context(), operatorUserCtxKey{}, user) - next(w, r.WithContext(ctx)) - return - } - - // Default: simple shared-token mode - if o.cfg.OperatorUIToken == "" { - w.WriteHeader(http.StatusServiceUnavailable) - _ = json.NewEncoder(w).Encode(map[string]string{ - "error": "operator APIs disabled on server: set OPERATOR_UI_TOKEN (or OPERATOR_AUTH_MODE=github) and redeploy", - }) + if token == "" { + w.WriteHeader(http.StatusUnauthorized) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "provide a GitHub Personal Access Token as Bearer token"}) return } - if !operatorAuthOK(o.cfg.OperatorUIToken, token) { + user, err := o.authenticateGitHub(r.Context(), token) + if err != nil { w.WriteHeader(http.StatusUnauthorized) - _ = json.NewEncoder(w).Encode(map[string]string{"error": "unauthorized"}) + _ = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) return } - next(w, r) + ctx := context.WithValue(r.Context(), operatorUserCtxKey{}, user) + next(w, r.WithContext(ctx)) } } // wrapOperatorOnly wraps a handler that requires the "operator" role (replay, release). -// In token mode, all authenticated users are operators. In github mode, checks the role. func (o *operatorUI) wrapOperatorOnly(next http.HandlerFunc) http.HandlerFunc { return o.wrapAPI(func(w http.ResponseWriter, r *http.Request) { user := operatorUserFromCtx(r) - if user != nil && user.Role != RoleOperator { + if user == nil || user.Role != RoleOperator { + role := "unknown" + if user != nil { + role = string(user.Role) + } w.WriteHeader(http.StatusForbidden) _ = json.NewEncoder(w).Encode(map[string]string{ - "error": fmt.Sprintf("this action requires operator access (you have %s)", string(user.Role)), + "error": fmt.Sprintf("this action requires operator access (you have %s)", role), }) return } @@ -167,10 +136,9 @@ func (o *operatorUI) handleOperatorStatus(w http.ResponseWriter, r *http.Request return } w.Header().Set("Content-Type", "application/json") - apisEnabled := o.cfg.OperatorUIToken != "" || o.cfg.OperatorAuthMode == "github" out := map[string]any{ - "operator_apis_enabled": apisEnabled, - "auth_mode": o.cfg.OperatorAuthMode, + "operator_apis_enabled": true, + "auth_repo": o.cfg.OperatorAuthRepo, "metrics_enabled": o.cfg.MetricsEnabled, "audit_enabled": o.cfg.AuditEnabled, "version": o.version, @@ -185,7 +153,7 @@ func (o *operatorUI) handleOperatorStatus(w http.ResponseWriter, r *http.Request _ = json.NewEncoder(w).Encode(out) } -// handleMe returns the authenticated user info (GitHub mode) or a generic response (token mode). +// handleMe returns the authenticated user's GitHub login, avatar, and role. func (o *operatorUI) handleMe(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodGet { w.WriteHeader(http.StatusMethodNotAllowed) @@ -193,19 +161,16 @@ func (o *operatorUI) handleMe(w http.ResponseWriter, r *http.Request) { return } user := operatorUserFromCtx(r) - if user != nil { - _ = json.NewEncoder(w).Encode(user) + if user == nil { + w.WriteHeader(http.StatusInternalServerError) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "no authenticated user in context"}) return } - // Token mode — no user identity - _ = json.NewEncoder(w).Encode(map[string]any{ - "login": "operator", - "role": "operator", - }) + _ = json.NewEncoder(w).Encode(user) } // handleRepoPermission reports whether the authenticated user has read access to a given repo. -// Used by the frontend to pre-check replay eligibility. In token mode, always returns true. +// Used by the frontend to pre-check replay eligibility per source repo. // Query params: repos=owner/repo1,owner/repo2 (comma-separated). func (o *operatorUI) handleRepoPermission(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodGet { @@ -222,18 +187,6 @@ func (o *operatorUI) handleRepoPermission(w http.ResponseWriter, r *http.Request repos := strings.Split(reposParam, ",") result := make(map[string]bool, len(repos)) - // Token mode: no per-repo restrictions — grant all - if o.cfg.OperatorAuthMode != "github" || o.ghCache == nil { - for _, repo := range repos { - repo = strings.TrimSpace(repo) - if repo != "" { - result[repo] = true - } - } - _ = json.NewEncoder(w).Encode(map[string]any{"permissions": result}) - return - } - user := operatorUserFromCtx(r) userPAT := bearerToken(r) if user == nil || userPAT == "" { @@ -264,15 +217,6 @@ func bearerToken(r *http.Request) string { return "" } -func operatorAuthOK(expected, got string) bool { - if expected == "" || got == "" { - return false - } - // subtle.ConstantTimeCompare returns 0 for different-length inputs without - // leaking the expected token length through timing. - return subtle.ConstantTimeCompare([]byte(expected), []byte(got)) == 1 -} - func (o *operatorUI) serveIndex(w http.ResponseWriter, r *http.Request) { if r.URL.Path != "/operator/" { http.NotFound(w, r) @@ -704,24 +648,27 @@ func (o *operatorUI) handleReplay(w http.ResponseWriter, r *http.Request) { return } - // Source-repo permission check (GitHub auth mode only): the user's PAT must - // have at least read access to the source repo being replayed. - if o.cfg.OperatorAuthMode == "github" && o.ghCache != nil { + // Source-repo permission check: the user's PAT must have at least read + // access to the source repo being replayed. + { user := operatorUserFromCtx(r) userPAT := bearerToken(r) - if user != nil && userPAT != "" { - permCtx, cancel := context.WithTimeout(r.Context(), 10*time.Second) - canRead, permErr := o.ghCache.CanUserReadRepo(permCtx, userPAT, user.Login, req.Repo) - cancel() - if !canRead { - w.WriteHeader(http.StatusForbidden) - msg := fmt.Sprintf("you do not have access to source repo %s", req.Repo) - if permErr != nil { - msg = fmt.Sprintf("%s: %s", msg, permErr.Error()) - } - _ = json.NewEncoder(w).Encode(operatorReplayResponse{Error: msg}) - return + if user == nil || userPAT == "" { + w.WriteHeader(http.StatusUnauthorized) + _ = json.NewEncoder(w).Encode(operatorReplayResponse{Error: "unauthenticated"}) + return + } + permCtx, cancel := context.WithTimeout(r.Context(), 10*time.Second) + canRead, permErr := o.ghCache.CanUserReadRepo(permCtx, userPAT, user.Login, req.Repo) + cancel() + if !canRead { + w.WriteHeader(http.StatusForbidden) + msg := fmt.Sprintf("you do not have access to source repo %s", req.Repo) + if permErr != nil { + msg = fmt.Sprintf("%s: %s", msg, permErr.Error()) } + _ = json.NewEncoder(w).Encode(operatorReplayResponse{Error: msg}) + return } } diff --git a/services/web/operator/index.html b/services/web/operator/index.html index b2c70d5..1e3516c 100644 --- a/services/web/operator/index.html +++ b/services/web/operator/index.html @@ -301,13 +301,13 @@
- +
-
+
-

Stored in sessionStorage (this tab only).

+

Stored in sessionStorage (this tab only). Your permission on the auth repo determines your role (operator or writer).

@@ -797,9 +797,24 @@

Keyboard shortcuts

function startHeartbeat(){if(!COP_ORIGIN)return;setInterval(function(){fetch(appURL('/operator/api/status')).then(function(r){if(!_connected){_connected=true;$('tbConn').hidden=true;toast('Reconnected to server','info');}}).catch(function(){if(_connected){_connected=false;$('tbConn').hidden=false;toast('Connection lost to server','error');}});},60000);} /* Server status */ -function checkServerStatus(){if(!COP_ORIGIN){$('serverStatusBanner').hidden=false;$('serverStatusBanner').textContent='Open this page at http://127.0.0.1:/operator/.';return;}fetch(appURL('/operator/api/status')).then(function(r){if(r.status===404){$('serverStatusBanner').hidden=false;$('serverStatusBanner').textContent='Operator routes not enabled.';return null;}return r.json();}).then(function(d){if(!d)return;updateTBVersion(d.version); - if(d.auth_mode==='github'){$('token').placeholder='GitHub Personal Access Token';$('token').parentNode.parentNode.querySelector('label').innerHTML='GitHub token (PAT with repo read access)';} - if(!d.operator_apis_enabled){$('serverStatusBanner').hidden=false;$('serverStatusBanner').textContent='No authentication configured: set OPERATOR_UI_TOKEN or OPERATOR_AUTH_MODE=github.';}}).catch(function(){});} +function checkServerStatus(){ + if(!COP_ORIGIN){ + $('serverStatusBanner').hidden=false; + $('serverStatusBanner').textContent='Open this page at http://127.0.0.1:/operator/.'; + return; + } + fetch(appURL('/operator/api/status')).then(function(r){ + if(r.status===404){$('serverStatusBanner').hidden=false;$('serverStatusBanner').textContent='Operator routes not enabled (OPERATOR_UI_ENABLED=false).';return null;} + return r.json(); + }).then(function(d){ + if(!d)return; + updateTBVersion(d.version); + if(d.auth_repo){ + var lbl=$('authRepoLabel'); + if(lbl){lbl.textContent=d.auth_repo;} + } + }).catch(function(){}); +} /* ── Metrics with groups, deltas, sparklines, health accents ── */ var _prevMetrics={}, _metricHistory={}; From b9ad455a603d0721af93abbd89e4e301b8f4d46e Mon Sep 17 00:00:00 2001 From: Cory Bullinger Date: Tue, 21 Apr 2026 11:07:37 -0400 Subject: [PATCH 05/20] feat(operator): AI rule suggester with full UI-based Ollama management MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an LLM-powered rule suggester in the Workflows tab. Given a source file path and a desired target file path, the LLM proposes a copier workflow rule and the server verifies the rule actually produces the target before returning it to the user. All Ollama management (connect/disconnect, list installed models, pull/delete, switch active model, change base URL) happens from the UI. No LLM_ENABLED env var or restart required — availability is detected at runtime via an Ollama ping. Backend: - services/llm_client.go: pluggable LLMClient interface with an Ollama implementation. Mutex-protected mutable state (active model, base URL) so operators can switch models at runtime. Streams pull progress as NDJSON, exposes ListModels/Ping/DeleteModel/PullModel. - services/operator_suggest_rule.go: POST /operator/api/suggest-rule prompts the LLM with a bounded schema (move/copy/glob/regex) and verifies the generated rule against the user's source/target using the existing PatternMatcher + PathTransformer. Returns YAML + explanation + verification status + computed_path so writers see exactly what the rule would produce. - services/operator_llm_admin.go: four admin endpoints GET /operator/api/llm/status reachability, models, active model POST /operator/api/llm/settings set active model / base URL at runtime POST /operator/api/llm/pull stream NDJSON progress from Ollama DELETE /operator/api/llm/model remove a model Write operations require operator role. - operator_ui.go: always instantiate the LLM client (no env gate), expose llm_available via /operator/api/status. - configs/environment.go: LLM_BASE_URL and LLM_MODEL are now optional initial defaults; LLM_ENABLED is removed entirely. Frontend (index.html): - AI settings panel in the Workflows tab (operator-only): connection status chip, installed-models list with delete buttons, active-model selector, pull-model input with live streaming progress bar, editable base URL. Shows install instructions with a link to ollama.com/download when Ollama isn't reachable. - AI rule suggester panel: source + target inputs (plus optional target repo), generate button with long-running spinner, result card with verification badge (green or red), computed path on failure, YAML code block with copy-to-clipboard, and the LLM's explanation. - Pre-fills the AI suggester's source from the file match tester if set. --- configs/environment.go | 14 ++ services/llm_client.go | 296 ++++++++++++++++++++++++++ services/operator_llm_admin.go | 184 ++++++++++++++++ services/operator_suggest_rule.go | 324 +++++++++++++++++++++++++++++ services/operator_ui.go | 15 ++ services/web/operator/index.html | 335 +++++++++++++++++++++++++++++- 6 files changed, 1166 insertions(+), 2 deletions(-) create mode 100644 services/llm_client.go create mode 100644 services/operator_llm_admin.go create mode 100644 services/operator_suggest_rule.go diff --git a/configs/environment.go b/configs/environment.go index d272583..8d6ad05 100644 --- a/configs/environment.go +++ b/configs/environment.go @@ -79,6 +79,13 @@ type Config struct { OperatorRepoSlug string // "owner/repo" for GitHub links in audit/trace rows (optional) OperatorReleaseGitHubToken string // PAT with contents:write to create a version tag (optional) OperatorReleaseTargetBranch string // branch SHA used when creating a tag (default main) + + // AI rule suggestion (optional) — LLM-powered rule generation in the operator UI. + // The feature is available whenever the LLM provider is reachable at runtime; + // operators can change the active model and base URL from the UI without restart. + LLMProvider string // "ollama" (default). Pluggable for future providers. + LLMBaseURL string // initial default; overridable from the UI + LLMModel string // initial default; overridable from the UI } const ( @@ -132,6 +139,9 @@ const ( OperatorRepoSlug = "OPERATOR_REPO_SLUG" OperatorReleaseGitHubToken = "OPERATOR_RELEASE_GITHUB_TOKEN" // #nosec G101 -- env var name OperatorReleaseTargetBranch = "OPERATOR_RELEASE_TARGET_BRANCH" + LLMProvider = "LLM_PROVIDER" + LLMBaseURL = "LLM_BASE_URL" + LLMModel = "LLM_MODEL" ) // NewConfig returns a new Config instance with default values @@ -256,6 +266,10 @@ func LoadEnvironment(envFile string) (*Config, error) { config.OperatorReleaseGitHubToken = os.Getenv(OperatorReleaseGitHubToken) config.OperatorReleaseTargetBranch = getEnvWithDefault(OperatorReleaseTargetBranch, "main") + config.LLMProvider = getEnvWithDefault(LLMProvider, "ollama") + config.LLMBaseURL = getEnvWithDefault(LLMBaseURL, "http://localhost:11434") + config.LLMModel = getEnvWithDefault(LLMModel, "qwen2.5-coder:7b") + if err := validateConfig(config); err != nil { return nil, err } diff --git a/services/llm_client.go b/services/llm_client.go new file mode 100644 index 0000000..e5502ae --- /dev/null +++ b/services/llm_client.go @@ -0,0 +1,296 @@ +package services + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "sync" + "time" +) + +// LLMClient is the minimal interface used by the operator UI. It supports +// runtime reconfiguration (active model, base URL) and provider management +// operations (list/pull/delete models). +type LLMClient interface { + // GenerateJSON sends a prompt to the LLM and returns the raw response body. + GenerateJSON(ctx context.Context, systemPrompt, userPrompt string) (string, error) + + // ProviderName returns a short identifier for logging. + ProviderName() string + + // Ping checks whether the LLM service is reachable. + Ping(ctx context.Context) error + + // GetBaseURL returns the current base URL. + GetBaseURL() string + // SetBaseURL updates the base URL at runtime. + SetBaseURL(url string) + + // GetActiveModel returns the model that will be used for generations. + GetActiveModel() string + // SetActiveModel updates the active model at runtime. + SetActiveModel(model string) + + // ListModels returns the models installed/available on the LLM server. + ListModels(ctx context.Context) ([]LLMModel, error) + + // PullModel asks the server to download a model. Progress updates are + // written to progress as they arrive. The function blocks until the pull + // completes or the context is cancelled. + PullModel(ctx context.Context, name string, progress func(LLMPullProgress)) error + + // DeleteModel removes a model from the server. + DeleteModel(ctx context.Context, name string) error +} + +// LLMModel describes an installed model returned by the provider. +type LLMModel struct { + Name string `json:"name"` + Size int64 `json:"size,omitempty"` + ModifiedAt string `json:"modified_at,omitempty"` +} + +// LLMPullProgress is a single progress event emitted during PullModel. +type LLMPullProgress struct { + Status string `json:"status"` + Completed int64 `json:"completed,omitempty"` + Total int64 `json:"total,omitempty"` + Digest string `json:"digest,omitempty"` + Error string `json:"error,omitempty"` +} + +// NewLLMClient returns a client for the configured provider. +func NewLLMClient(provider, baseURL, model string) (LLMClient, error) { + switch strings.ToLower(strings.TrimSpace(provider)) { + case "", "ollama": + if baseURL == "" { + baseURL = "http://localhost:11434" + } + if model == "" { + model = "qwen2.5-coder:7b" + } + return &ollamaClient{ + baseURL: strings.TrimSuffix(baseURL, "/"), + model: model, + http: &http.Client{Timeout: 60 * time.Second}, + pullHTTP: &http.Client{ + // No timeout for pulls — model downloads can take 10+ minutes + }, + }, nil + default: + return nil, fmt.Errorf("unsupported LLM provider: %q (only \"ollama\" is implemented)", provider) + } +} + +// ── Ollama ── + +type ollamaClient struct { + mu sync.RWMutex + baseURL string + model string + http *http.Client // short-timeout client for most calls + pullHTTP *http.Client // no-timeout client for streaming pull requests +} + +func (c *ollamaClient) ProviderName() string { return "ollama" } + +func (c *ollamaClient) GetBaseURL() string { + c.mu.RLock() + defer c.mu.RUnlock() + return c.baseURL +} + +func (c *ollamaClient) SetBaseURL(url string) { + c.mu.Lock() + defer c.mu.Unlock() + c.baseURL = strings.TrimSuffix(strings.TrimSpace(url), "/") +} + +func (c *ollamaClient) GetActiveModel() string { + c.mu.RLock() + defer c.mu.RUnlock() + return c.model +} + +func (c *ollamaClient) SetActiveModel(model string) { + c.mu.Lock() + defer c.mu.Unlock() + c.model = strings.TrimSpace(model) +} + +// Ping calls GET /api/tags as a reachability check (cheap, no model load). +func (c *ollamaClient) Ping(ctx context.Context) error { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.GetBaseURL()+"/api/tags", nil) + if err != nil { + return err + } + resp, err := c.http.Do(req) + if err != nil { + return fmt.Errorf("ollama unreachable at %s: %w", c.GetBaseURL(), err) + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<15)) + return fmt.Errorf("ollama returned %s: %s", resp.Status, strings.TrimSpace(string(body))) + } + return nil +} + +// ollamaTagsResponse is GET /api/tags. +type ollamaTagsResponse struct { + Models []struct { + Name string `json:"name"` + Size int64 `json:"size"` + ModifiedAt string `json:"modified_at"` + } `json:"models"` +} + +func (c *ollamaClient) ListModels(ctx context.Context) ([]LLMModel, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.GetBaseURL()+"/api/tags", nil) + if err != nil { + return nil, err + } + resp, err := c.http.Do(req) + if err != nil { + return nil, fmt.Errorf("list models: %w", err) + } + defer func() { _ = resp.Body.Close() }() + body, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<20)) + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("ollama returned %s: %s", resp.Status, strings.TrimSpace(string(body))) + } + var tags ollamaTagsResponse + if err := json.Unmarshal(body, &tags); err != nil { + return nil, fmt.Errorf("parse models: %w", err) + } + out := make([]LLMModel, 0, len(tags.Models)) + for _, m := range tags.Models { + out = append(out, LLMModel{Name: m.Name, Size: m.Size, ModifiedAt: m.ModifiedAt}) + } + return out, nil +} + +// PullModel starts a model pull and streams NDJSON progress events. +func (c *ollamaClient) PullModel(ctx context.Context, name string, progress func(LLMPullProgress)) error { + body, _ := json.Marshal(map[string]any{"name": name, "stream": true}) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.GetBaseURL()+"/api/pull", bytes.NewReader(body)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + resp, err := c.pullHTTP.Do(req) + if err != nil { + return fmt.Errorf("start pull: %w", err) + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != http.StatusOK { + respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<15)) + return fmt.Errorf("ollama returned %s: %s", resp.Status, strings.TrimSpace(string(respBody))) + } + + // Ollama emits newline-delimited JSON progress events. Stream them through. + scanner := bufio.NewScanner(resp.Body) + scanner.Buffer(make([]byte, 64*1024), 1024*1024) + for scanner.Scan() { + line := scanner.Bytes() + if len(line) == 0 { + continue + } + var ev LLMPullProgress + if err := json.Unmarshal(line, &ev); err != nil { + // Skip unparseable lines rather than aborting + continue + } + if progress != nil { + progress(ev) + } + if ev.Error != "" { + return fmt.Errorf("pull error: %s", ev.Error) + } + } + return scanner.Err() +} + +// DeleteModel removes a locally installed model. +func (c *ollamaClient) DeleteModel(ctx context.Context, name string) error { + body, _ := json.Marshal(map[string]string{"name": name}) + req, err := http.NewRequestWithContext(ctx, http.MethodDelete, c.GetBaseURL()+"/api/delete", bytes.NewReader(body)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + resp, err := c.http.Do(req) + if err != nil { + return fmt.Errorf("delete model: %w", err) + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != http.StatusOK { + respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<15)) + return fmt.Errorf("ollama returned %s: %s", resp.Status, strings.TrimSpace(string(respBody))) + } + return nil +} + +// ollamaGenerateRequest is the body of POST /api/generate. +type ollamaGenerateRequest struct { + Model string `json:"model"` + Prompt string `json:"prompt"` + System string `json:"system,omitempty"` + Stream bool `json:"stream"` + Format string `json:"format,omitempty"` // "json" constrains output to valid JSON +} + +type ollamaGenerateResponse struct { + Model string `json:"model"` + Response string `json:"response"` + Done bool `json:"done"` + DoneError string `json:"error,omitempty"` +} + +func (c *ollamaClient) GenerateJSON(ctx context.Context, systemPrompt, userPrompt string) (string, error) { + body, err := json.Marshal(ollamaGenerateRequest{ + Model: c.GetActiveModel(), + System: systemPrompt, + Prompt: userPrompt, + Stream: false, + Format: "json", + }) + if err != nil { + return "", fmt.Errorf("marshal ollama request: %w", err) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.GetBaseURL()+"/api/generate", bytes.NewReader(body)) + if err != nil { + return "", err + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + + resp, err := c.http.Do(req) + if err != nil { + return "", fmt.Errorf("call ollama at %s: %w (is ollama running?)", c.GetBaseURL(), err) + } + defer func() { _ = resp.Body.Close() }() + + respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<20)) + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("ollama returned %s: %s", resp.Status, strings.TrimSpace(string(respBody))) + } + + var out ollamaGenerateResponse + if err := json.Unmarshal(respBody, &out); err != nil { + return "", fmt.Errorf("parse ollama response: %w", err) + } + if out.DoneError != "" { + return "", fmt.Errorf("ollama error: %s", out.DoneError) + } + if out.Response == "" { + return "", fmt.Errorf("ollama returned empty response (check that model %q is pulled)", c.GetActiveModel()) + } + return out.Response, nil +} diff --git a/services/operator_llm_admin.go b/services/operator_llm_admin.go new file mode 100644 index 0000000..1521d9c --- /dev/null +++ b/services/operator_llm_admin.go @@ -0,0 +1,184 @@ +package services + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +// handleLLMStatus returns the current LLM settings, reachability, and installed models. +func (o *operatorUI) handleLLMStatus(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) + return + } + out := map[string]any{ + "available": o.llm != nil, + "provider": o.cfg.LLMProvider, + "base_url": "", + "active_model": "", + "reachable": false, + "models": []LLMModel{}, + } + if o.llm == nil { + out["error"] = "LLM client not initialized" + _ = json.NewEncoder(w).Encode(out) + return + } + out["base_url"] = o.llm.GetBaseURL() + out["active_model"] = o.llm.GetActiveModel() + + ctx, cancel := context.WithTimeout(r.Context(), 5*time.Second) + defer cancel() + if err := o.llm.Ping(ctx); err != nil { + out["error"] = err.Error() + _ = json.NewEncoder(w).Encode(out) + return + } + out["reachable"] = true + + models, err := o.llm.ListModels(ctx) + if err != nil { + out["error"] = "list models: " + err.Error() + _ = json.NewEncoder(w).Encode(out) + return + } + out["models"] = models + _ = json.NewEncoder(w).Encode(out) +} + +// handleLLMSettings updates the active model and/or base URL at runtime. +// In-memory only — reverts to env-var defaults on process restart. +type llmSettingsRequest struct { + ActiveModel string `json:"active_model,omitempty"` + BaseURL string `json:"base_url,omitempty"` +} + +func (o *operatorUI) handleLLMSettings(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) + return + } + if o.llm == nil { + w.WriteHeader(http.StatusServiceUnavailable) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "LLM client not initialized"}) + return + } + body, _ := io.ReadAll(io.LimitReader(r.Body, 4096)) + var req llmSettingsRequest + if err := json.Unmarshal(body, &req); err != nil { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "invalid json"}) + return + } + if m := strings.TrimSpace(req.ActiveModel); m != "" { + o.llm.SetActiveModel(m) + } + if u := strings.TrimSpace(req.BaseURL); u != "" { + o.llm.SetBaseURL(u) + } + _ = json.NewEncoder(w).Encode(map[string]any{ + "active_model": o.llm.GetActiveModel(), + "base_url": o.llm.GetBaseURL(), + }) +} + +// handleLLMDeleteModel deletes a model from the LLM server. +func (o *operatorUI) handleLLMDeleteModel(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodDelete { + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) + return + } + if o.llm == nil { + w.WriteHeader(http.StatusServiceUnavailable) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "LLM client not initialized"}) + return + } + name := strings.TrimSpace(r.URL.Query().Get("name")) + if name == "" { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "name query param required"}) + return + } + ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second) + defer cancel() + if err := o.llm.DeleteModel(ctx, name); err != nil { + w.WriteHeader(http.StatusBadGateway) + _ = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) + return + } + _ = json.NewEncoder(w).Encode(map[string]any{"ok": true, "deleted": name}) +} + +// handleLLMPullModel streams pull progress to the client as NDJSON. +// Each line is a JSON object with {status, completed, total, error}. +func (o *operatorUI) handleLLMPullModel(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "method not allowed"}) + return + } + if o.llm == nil { + w.WriteHeader(http.StatusServiceUnavailable) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "LLM client not initialized"}) + return + } + body, _ := io.ReadAll(io.LimitReader(r.Body, 4096)) + var req struct { + Name string `json:"name"` + } + if err := json.Unmarshal(body, &req); err != nil { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "invalid json"}) + return + } + req.Name = strings.TrimSpace(req.Name) + if req.Name == "" { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "name is required"}) + return + } + + // Switch to NDJSON streaming + w.Header().Set("Content-Type", "application/x-ndjson") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("X-Accel-Buffering", "no") // disable nginx buffering when behind a proxy + flusher, canFlush := w.(http.Flusher) + encoder := json.NewEncoder(w) + + // Pulls can take a long time; don't use r.Context() if the client could disconnect + // prematurely. Use a 20-minute timeout as a safety net. + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Minute) + defer cancel() + // Still honor client cancellation + go func() { + <-r.Context().Done() + cancel() + }() + + err := o.llm.PullModel(ctx, req.Name, func(ev LLMPullProgress) { + _ = encoder.Encode(ev) + if canFlush { + flusher.Flush() + } + }) + if err != nil { + _ = encoder.Encode(LLMPullProgress{Error: fmt.Sprintf("pull failed: %s", err.Error())}) + if canFlush { + flusher.Flush() + } + return + } + // Final event so the client knows the stream ended successfully + _ = encoder.Encode(LLMPullProgress{Status: "done"}) + if canFlush { + flusher.Flush() + } +} diff --git a/services/operator_suggest_rule.go b/services/operator_suggest_rule.go new file mode 100644 index 0000000..e6db858 --- /dev/null +++ b/services/operator_suggest_rule.go @@ -0,0 +1,324 @@ +package services + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "regexp" + "strings" + "time" + + "github.com/grove-platform/github-copier/types" +) + +// operatorSuggestRuleRequest is what the operator UI sends when asking the LLM +// to generate a copier rule from a source→target example. +type operatorSuggestRuleRequest struct { + SourcePath string `json:"source_path"` + TargetPath string `json:"target_path"` + TargetRepo string `json:"target_repo,omitempty"` // optional + SourceRepo string `json:"source_repo,omitempty"` // optional, for context +} + +// operatorSuggestRuleResponse is what the handler returns: the generated rule, +// an explanation, and a verification check against the user's example. +type operatorSuggestRuleResponse struct { + RuleYAML string `json:"rule_yaml"` + Explanation string `json:"explanation,omitempty"` + Verified bool `json:"verified"` // true if the rule produces target_path from source_path + ComputedPath string `json:"computed_path,omitempty"` // actual target path the rule would produce + VerifyError string `json:"verify_error,omitempty"` // reason verification failed (if any) + Warning string `json:"warning,omitempty"` // any non-fatal concern + Error string `json:"error,omitempty"` +} + +// llmSuggestedRule is the structured JSON we ask the LLM to return. +type llmSuggestedRule struct { + Name string `json:"name"` + DestRepo string `json:"destination_repo"` + DestBranch string `json:"destination_branch,omitempty"` + TransformType string `json:"transform_type"` // "move" | "copy" | "glob" | "regex" + TransformFrom string `json:"transform_from,omitempty"` + TransformTo string `json:"transform_to,omitempty"` + Pattern string `json:"pattern,omitempty"` + TransformTempl string `json:"transform_template,omitempty"` + CommitStrategy string `json:"commit_strategy,omitempty"` // "direct" or "pull_request" + Explanation string `json:"explanation,omitempty"` + Extra map[string]string `json:"-"` +} + +// handleSuggestRule accepts a source/target pair and asks the configured LLM to +// generate a copier workflow rule that would produce that transformation. +// The generated rule is self-verified against the example before returning. +func (o *operatorUI) handleSuggestRule(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + w.WriteHeader(http.StatusMethodNotAllowed) + _ = json.NewEncoder(w).Encode(operatorSuggestRuleResponse{Error: "method not allowed"}) + return + } + if o.llm == nil { + w.WriteHeader(http.StatusServiceUnavailable) + _ = json.NewEncoder(w).Encode(operatorSuggestRuleResponse{ + Error: "LLM client not initialized on server (check startup logs)", + }) + return + } + + body, err := io.ReadAll(io.LimitReader(r.Body, 4096)) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(operatorSuggestRuleResponse{Error: "read body"}) + return + } + var req operatorSuggestRuleRequest + if err := json.Unmarshal(body, &req); err != nil { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(operatorSuggestRuleResponse{Error: "invalid json"}) + return + } + req.SourcePath = strings.TrimSpace(req.SourcePath) + req.TargetPath = strings.TrimSpace(req.TargetPath) + req.TargetRepo = strings.TrimSpace(req.TargetRepo) + req.SourceRepo = strings.TrimSpace(req.SourceRepo) + if req.SourcePath == "" || req.TargetPath == "" { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(operatorSuggestRuleResponse{Error: "source_path and target_path are required"}) + return + } + + ctx, cancel := context.WithTimeout(r.Context(), 90*time.Second) + defer cancel() + + suggestion, err := o.askLLMForRule(ctx, req) + if err != nil { + w.WriteHeader(http.StatusBadGateway) + _ = json.NewEncoder(w).Encode(operatorSuggestRuleResponse{Error: err.Error()}) + return + } + + ruleYAML := renderRuleYAML(suggestion, req) + verified, computed, vErr := verifySuggestedRule(suggestion, req.SourcePath, req.TargetPath) + + resp := operatorSuggestRuleResponse{ + RuleYAML: ruleYAML, + Explanation: suggestion.Explanation, + Verified: verified, + ComputedPath: computed, + } + if vErr != nil { + resp.VerifyError = vErr.Error() + } + if !verified { + resp.Warning = "Generated rule did not produce the expected target path from your example. Review and adjust before saving." + } + _ = json.NewEncoder(w).Encode(resp) +} + +// askLLMForRule sends a structured prompt to the LLM and parses the JSON response. +func (o *operatorUI) askLLMForRule(ctx context.Context, req operatorSuggestRuleRequest) (*llmSuggestedRule, error) { + systemPrompt := `You are an expert in GitHub Copier workflow configuration. You generate concise, correct YAML rules that match a single source→target file transformation example. + +The copier supports 4 transform types (pick the simplest that works): +- move: { from: "prefix/path", to: "new/prefix" } — renames a directory prefix. Matches any file under "from" and replaces the prefix with "to". +- copy: { from: "exact/file.md", to: "new/file.md" } — renames one exact file. Use only when source is a single specific file. +- glob: { pattern: "dir/**/*.ext", transform: "new/${relative_path}" } — matches files by glob pattern. Use "${relative_path}" to preserve subdirectory structure. +- regex: { pattern: "dir/(?P.+)\\.ext", transform: "new/${name}.ext" } — uses Go regex with named capture groups. Use ONLY when move/copy/glob are insufficient. + +Prefer move > copy > glob > regex (simpler is better). + +You return JSON with these fields: +- transform_type: "move" | "copy" | "glob" | "regex" +- transform_from, transform_to: for move/copy +- pattern, transform_template: for glob/regex +- name: a kebab-case rule name (e.g., "agg-python-models") +- destination_repo: the target repository (use the one the user provided, or infer from context) +- destination_branch: optional, defaults to "main" +- commit_strategy: "direct" or "pull_request" (default "pull_request") +- explanation: 1-2 sentence plain-English justification + +IMPORTANT: The generated rule MUST produce the user's target file when applied to their source file. Test your logic mentally before responding.` + + userPrompt := fmt.Sprintf(`Generate a copier rule for this transformation: + +Source file: %s +Target file: %s +Target repo: %s + +Return ONLY a JSON object with the fields documented above. No prose outside the JSON.`, + req.SourcePath, req.TargetPath, defaultIfEmpty(req.TargetRepo, "(user did not specify — use a placeholder like \"org/target-repo\")")) + + raw, err := o.llm.GenerateJSON(ctx, systemPrompt, userPrompt) + if err != nil { + return nil, fmt.Errorf("LLM error: %w", err) + } + + var suggestion llmSuggestedRule + if err := json.Unmarshal([]byte(raw), &suggestion); err != nil { + return nil, fmt.Errorf("LLM returned invalid JSON: %w (response: %s)", err, truncate(raw, 200)) + } + suggestion.TransformType = strings.ToLower(strings.TrimSpace(suggestion.TransformType)) + if suggestion.DestRepo == "" && req.TargetRepo != "" { + suggestion.DestRepo = req.TargetRepo + } + if suggestion.DestBranch == "" { + suggestion.DestBranch = "main" + } + if suggestion.CommitStrategy == "" { + suggestion.CommitStrategy = "pull_request" + } + if suggestion.Name == "" { + suggestion.Name = "generated-rule" + } + return &suggestion, nil +} + +// verifySuggestedRule tests whether the suggested rule, applied to sourcePath, +// produces targetPath. Returns (matched, computedPath, error). +func verifySuggestedRule(s *llmSuggestedRule, sourcePath, targetPath string) (bool, string, error) { + transformer := NewPathTransformer() + + switch s.TransformType { + case "move": + if s.TransformFrom == "" || s.TransformTo == "" { + return false, "", fmt.Errorf("move rule missing from/to") + } + from := strings.TrimSuffix(s.TransformFrom, "/") + if !strings.HasPrefix(sourcePath, from) { + return false, "", fmt.Errorf("source path does not start with %q", from) + } + rel := strings.TrimPrefix(strings.TrimPrefix(sourcePath, from), "/") + computed := strings.TrimSuffix(s.TransformTo, "/") + "/" + rel + computed = strings.TrimSuffix(computed, "/") + return computed == targetPath, computed, nil + + case "copy": + if s.TransformFrom == "" || s.TransformTo == "" { + return false, "", fmt.Errorf("copy rule missing from/to") + } + if sourcePath != s.TransformFrom { + return false, "", fmt.Errorf("source path %q does not equal copy from %q", sourcePath, s.TransformFrom) + } + return s.TransformTo == targetPath, s.TransformTo, nil + + case "glob": + if s.Pattern == "" || s.TransformTempl == "" { + return false, "", fmt.Errorf("glob rule missing pattern/transform") + } + matcher := NewPatternMatcher() + result := matcher.Match(sourcePath, types.SourcePattern{Type: types.PatternTypeGlob, Pattern: s.Pattern}) + if !result.Matched { + return false, "", fmt.Errorf("glob pattern %q does not match %q", s.Pattern, sourcePath) + } + // Add relative_path (server-side glob transform convention): strip prefix before first wildcard + vars := result.Variables + if vars == nil { + vars = make(map[string]string) + } + vars["relative_path"] = computeGlobRelativePath(sourcePath, s.Pattern) + computed, err := transformer.Transform(sourcePath, s.TransformTempl, vars) + if err != nil { + return false, "", fmt.Errorf("apply transform: %w", err) + } + return computed == targetPath, computed, nil + + case "regex": + if s.Pattern == "" || s.TransformTempl == "" { + return false, "", fmt.Errorf("regex rule missing pattern/transform") + } + re, err := regexp.Compile(s.Pattern) + if err != nil { + return false, "", fmt.Errorf("invalid regex: %w", err) + } + match := re.FindStringSubmatch(sourcePath) + if match == nil { + return false, "", fmt.Errorf("regex %q does not match %q", s.Pattern, sourcePath) + } + vars := map[string]string{"matched_pattern": s.Pattern} + for i, name := range re.SubexpNames() { + if i > 0 && name != "" { + vars[name] = match[i] + } + } + computed, err := transformer.Transform(sourcePath, s.TransformTempl, vars) + if err != nil { + return false, "", fmt.Errorf("apply transform: %w", err) + } + return computed == targetPath, computed, nil + + default: + return false, "", fmt.Errorf("unknown transform type: %q", s.TransformType) + } +} + +// computeGlobRelativePath mirrors the server-side convention: strip the +// longest literal prefix (before the first wildcard) from the source path. +func computeGlobRelativePath(sourcePath, pattern string) string { + // Find the first wildcard character in the pattern + idx := strings.IndexAny(pattern, "*?[") + if idx < 0 { + return "" + } + prefix := pattern[:idx] + // Trim to the last '/' before the wildcard to get a clean directory prefix + if slash := strings.LastIndex(prefix, "/"); slash >= 0 { + prefix = prefix[:slash+1] + } + return strings.TrimPrefix(sourcePath, prefix) +} + +// renderRuleYAML produces a YAML snippet for the operator UI to display. +func renderRuleYAML(s *llmSuggestedRule, req operatorSuggestRuleRequest) string { + var sb strings.Builder + sb.WriteString("- name: \"") + sb.WriteString(s.Name) + sb.WriteString("\"\n") + if req.SourceRepo != "" { + sb.WriteString(" source:\n") + sb.WriteString(" repo: \"") + sb.WriteString(req.SourceRepo) + sb.WriteString("\"\n") + } + sb.WriteString(" destination:\n") + sb.WriteString(" repo: \"") + sb.WriteString(s.DestRepo) + sb.WriteString("\"\n") + sb.WriteString(" branch: \"") + sb.WriteString(s.DestBranch) + sb.WriteString("\"\n") + sb.WriteString(" transformations:\n") + switch s.TransformType { + case "move": + fmt.Fprintf(&sb, " - move: { from: %q, to: %q }\n", s.TransformFrom, s.TransformTo) + case "copy": + fmt.Fprintf(&sb, " - copy: { from: %q, to: %q }\n", s.TransformFrom, s.TransformTo) + case "glob": + sb.WriteString(" - glob:\n") + fmt.Fprintf(&sb, " pattern: %q\n", s.Pattern) + fmt.Fprintf(&sb, " transform: %q\n", s.TransformTempl) + case "regex": + sb.WriteString(" - regex:\n") + fmt.Fprintf(&sb, " pattern: %q\n", s.Pattern) + fmt.Fprintf(&sb, " transform: %q\n", s.TransformTempl) + } + sb.WriteString(" commit_strategy:\n") + sb.WriteString(" type: \"") + sb.WriteString(s.CommitStrategy) + sb.WriteString("\"\n") + return sb.String() +} + +func defaultIfEmpty(s, def string) string { + if s == "" { + return def + } + return s +} + +func truncate(s string, n int) string { + if len(s) <= n { + return s + } + return s[:n] + "…" +} diff --git a/services/operator_ui.go b/services/operator_ui.go index 13b4de3..2b9b97d 100644 --- a/services/operator_ui.go +++ b/services/operator_ui.go @@ -35,6 +35,14 @@ func RegisterOperatorRoutes(mux *http.ServeMux, cfg *configs.Config, container * version: version, ghCache: newGHAuthCache(5 * time.Minute), } + // Always create the LLM client; availability is checked dynamically via Ping. + // Operators can change the active model and base URL from the UI without restart. + if client, err := NewLLMClient(cfg.LLMProvider, cfg.LLMBaseURL, cfg.LLMModel); err != nil { + LogWarning("LLM client init failed", "error", err.Error()) + } else { + o.llm = client + LogInfo("LLM rule suggester ready", "provider", client.ProviderName(), "base_url", cfg.LLMBaseURL, "model", cfg.LLMModel, "note", "availability checked at request time") + } // Register specific paths before the /operator/ subtree so /operator/api/* is not handled by serveIndex. mux.HandleFunc("/operator/api/status", o.handleOperatorStatus) mux.HandleFunc("/operator/api/audit/events", o.wrapAPI(o.handleAuditEvents)) @@ -48,6 +56,11 @@ func RegisterOperatorRoutes(mux *http.ServeMux, cfg *configs.Config, container * mux.HandleFunc("/operator/api/logs", o.wrapAPI(o.handleDeliveryLogs)) mux.HandleFunc("/operator/api/me", o.wrapAPI(o.handleMe)) mux.HandleFunc("/operator/api/repo-permission", o.wrapAPI(o.handleRepoPermission)) + mux.HandleFunc("/operator/api/suggest-rule", o.wrapAPI(o.handleSuggestRule)) + mux.HandleFunc("/operator/api/llm/status", o.wrapAPI(o.handleLLMStatus)) + mux.HandleFunc("/operator/api/llm/settings", o.wrapOperatorOnly(o.handleLLMSettings)) + mux.HandleFunc("/operator/api/llm/model", o.wrapOperatorOnly(o.handleLLMDeleteModel)) + mux.HandleFunc("/operator/api/llm/pull", o.wrapOperatorOnly(o.handleLLMPullModel)) mux.HandleFunc("/operator/", o.serveIndex) mux.HandleFunc("/operator", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "/operator/", http.StatusFound) @@ -61,6 +74,7 @@ type operatorUI struct { version string replayInFlight sync.Map // key: "owner/repo#pr" → prevents concurrent replays ghCache *ghAuthCache // GitHub PAT validation + per-repo permission cache + llm LLMClient // optional: enabled when cfg.LLMEnabled is true } // operatorUserCtxKey is the context key for the authenticated operator user. @@ -139,6 +153,7 @@ func (o *operatorUI) handleOperatorStatus(w http.ResponseWriter, r *http.Request out := map[string]any{ "operator_apis_enabled": true, "auth_repo": o.cfg.OperatorAuthRepo, + "llm_available": o.llm != nil, // client exists; reachability checked via /operator/api/llm/status "metrics_enabled": o.cfg.MetricsEnabled, "audit_enabled": o.cfg.AuditEnabled, "version": o.version, diff --git a/services/web/operator/index.html b/services/web/operator/index.html index 1e3516c..28bf425 100644 --- a/services/web/operator/index.html +++ b/services/web/operator/index.html @@ -219,6 +219,34 @@ .pr-search{display:flex;gap:0.5rem;align-items:flex-end;margin-bottom:0.65rem;padding:0.55rem 0.75rem;background:var(--surface-alt);border:1px solid var(--border);border-radius:8px;} .pr-search input{flex:1;max-width:14rem;} + /* AI settings panel */ + .ai-status-line{display:flex;align-items:center;gap:0.5rem;padding:0.5rem 0.7rem;border-radius:6px;background:var(--surface-alt);border:1px solid var(--border);margin-bottom:0.6rem;} + .ai-status-line.ai-ok{background:var(--green-light);border-color:var(--green);} + .ai-status-line.ai-fail{background:var(--red-light);border-color:var(--red);} + .ai-status-line .dot{flex-shrink:0;} + .ai-model-list{display:flex;flex-direction:column;gap:0.35rem;margin-top:0.5rem;} + .ai-model-row{display:flex;align-items:center;gap:0.5rem;padding:0.4rem 0.6rem;background:var(--surface-alt);border:1px solid var(--border);border-radius:6px;font-size:0.85rem;} + .ai-model-row.active{border-color:var(--blue);background:var(--blue-light);} + .ai-model-name{flex:1;font-family:ui-monospace,monospace;font-size:0.8rem;word-break:break-all;} + .ai-model-size{font-size:0.72rem;color:var(--text-3);white-space:nowrap;} + .ai-progress{margin-top:0.5rem;padding:0.5rem 0.7rem;border-radius:6px;background:var(--surface-alt);border:1px solid var(--border);font-size:0.82rem;} + .ai-progress-bar{height:8px;background:var(--border);border-radius:4px;overflow:hidden;margin-top:0.3rem;} + .ai-progress-fill{height:100%;background:var(--blue);transition:width 0.2s;} + .ai-progress-status{display:flex;justify-content:space-between;font-size:0.75rem;color:var(--text-3);margin-top:0.25rem;} + .ai-install-steps{font-size:0.82rem;color:var(--text-2);line-height:1.6;margin-top:0.4rem;} + .ai-install-steps ol{margin:0.3rem 0;padding-left:1.4rem;} + .ai-install-steps code{background:var(--surface);border:1px solid var(--border);border-radius:3px;padding:0.08rem 0.3rem;font-size:0.78rem;} + + /* AI rule suggester */ + .ai-badge{display:inline-block;font-size:0.65rem;font-weight:700;padding:0.1rem 0.4rem;border-radius:3px;background:linear-gradient(135deg,#8b5cf6,#6366f1);color:#fff;letter-spacing:.05em;margin-left:0.4rem;vertical-align:middle;} + .ai-result{margin-top:0.65rem;padding:0.75rem;border:1px solid var(--border);border-radius:8px;background:var(--surface-alt);} + .ai-verify{display:inline-flex;align-items:center;gap:4px;padding:0.15rem 0.5rem;border-radius:9999px;font-size:0.75rem;font-weight:600;margin-bottom:0.5rem;} + .ai-verify-ok{background:var(--green-light);color:var(--green);border:1px solid var(--green);} + .ai-verify-fail{background:var(--red-light);color:var(--red);border:1px solid var(--red);} + .ai-yaml{background:var(--pre-bg);border:1px solid var(--border-light);border-radius:6px;padding:0.6rem;font-family:ui-monospace,monospace;font-size:0.78rem;color:var(--text);white-space:pre;overflow:auto;max-height:260px;margin:0.3rem 0;} + .ai-explain{font-size:0.84rem;color:var(--text-2);margin-top:0.4rem;line-height:1.5;} + .ai-disabled{padding:0.75rem;color:var(--text-3);font-size:0.85rem;background:var(--surface-alt);border:1px dashed var(--border);border-radius:6px;} + /* File match tester */ .fmt-result{margin-top:0.5rem;padding:0.5rem 0.7rem;border-radius:8px;font-size:0.84rem;} .fmt-match{background:var(--green-light);border:1px solid var(--green);color:var(--green);} @@ -484,6 +512,85 @@

By rule

+ +
+
+

AI settingsAI

+ +
+
+

Configure the local LLM provider (Ollama) for the AI rule suggester. Settings are in-memory and reset on server restart.

+
+ +
+
+ + + +
+
+ + +
+
+

AI rule suggesterAI

+
+
+ + +
+
+
@@ -813,6 +920,7 @@

Keyboard shortcuts

var lbl=$('authRepoLabel'); if(lbl){lbl.textContent=d.auth_repo;} } + setLLMAvailable(!!d.llm_available); }).catch(function(){}); } @@ -1343,6 +1451,229 @@

Keyboard shortcuts

document.querySelectorAll('.wf-card').forEach(function(c){c.classList.remove('wf-match','wf-nomatch');}); }; +/* ── AI rule suggester ── */ +var _llmAvailable=false; +function setLLMAvailable(enabled){ + _llmAvailable=enabled; + $('aiEnabledUI').hidden=!enabled; + $('aiDisabledNote').hidden=enabled; +} +async function generateRuleSuggestion(){ + showErr('aiErr',''); + var src=$('aiSource').value.trim(); + var tgt=$('aiTarget').value.trim(); + if(!src||!tgt){showErr('aiErr','Both source and target paths are required');return;} + var btn=$('aiGenerate'); + var orig=btn.textContent; + btn.disabled=true;btn.textContent='Generating\u2026 (can take 10-60s)'; + try{ + var res=await fetch(appURL('/operator/api/suggest-rule'),{ + method:'POST', + headers:Object.assign({'Content-Type':'application/json'},authHeaders()), + body:JSON.stringify({ + source_path:src, + target_path:tgt, + target_repo:$('aiTargetRepo').value.trim(), + source_repo:$('aiSourceRepo').value.trim() + }) + }); + var body=await res.json().catch(function(){return{};}); + if(!res.ok){showErr('aiErr',body.error||res.statusText);$('aiResult').innerHTML='';return;} + renderAIResult(body); + }catch(e){showErr('aiErr','Request failed: '+e);} + finally{btn.disabled=false;btn.textContent=orig;} +} +function renderAIResult(body){ + var html=''; + var verify=body.verified + ?'
\u2713 Verified — rule produces the expected target path
' + :'
\u2717 Not verified — rule does not produce the expected target path
'; + html+=verify; + if(!body.verified&&body.computed_path){ + html+='
Rule would produce: '+escapeHtml(body.computed_path)+'
'; + } + if(!body.verified&&body.verify_error){ + html+='
Reason: '+escapeHtml(body.verify_error)+'
'; + } + if(body.warning){ + html+='
\u26A0 '+escapeHtml(body.warning)+'
'; + } + html+='
'; + html+='Generated rule'; + html+=''; + html+='
'; + html+='
'+escapeHtml(body.rule_yaml||'')+'
'; + if(body.explanation){ + html+='
Why: '+escapeHtml(body.explanation)+'
'; + } + $('aiResult').innerHTML='
'+html+'
'; +} +function copyAIRule(){ + var yaml=$('aiYAML').textContent; + navigator.clipboard.writeText(yaml).then(function(){toast('Rule YAML copied to clipboard','info');}).catch(function(){toast('Copy failed','error');}); +} +$('aiGenerate').onclick=generateRuleSuggestion; +$('aiClear').onclick=function(){ + $('aiSource').value='';$('aiTarget').value='';$('aiTargetRepo').value='';$('aiSourceRepo').value=''; + $('aiResult').innerHTML='';showErr('aiErr',''); +}; +// Pre-fill from the file match tester if the user has already typed a source path +$('aiSource').addEventListener('focus',function(){ + if(!this.value){var fmt=$('fmtPath').value.trim();if(fmt)this.value=fmt;} +}); + +/* ── AI settings panel ── */ +var _aiLastStatus=null; +function fmtBytes(n){ + if(!n||n<=0)return''; + var units=['B','KB','MB','GB','TB'];var i=0;var v=n; + while(v>=1024&&i'+(s.error?' — '+escapeHtml(s.error):''); + $('aiConnectedUI').hidden=true;$('aiNotConnectedUI').hidden=false; + $('aiNotConnectedURL').textContent=s.base_url||''; + if(!$('aiBaseURLDisc').value) $('aiBaseURLDisc').value=s.base_url||''; + return; + } + line.className='ai-status-line ai-ok'; + line.innerHTML=' Ollama connected at '+escapeHtml(s.base_url||'')+''; + $('aiConnectedUI').hidden=false;$('aiNotConnectedUI').hidden=true; + + // Populate model dropdown and list + var sel=$('aiActiveModelSel');sel.innerHTML=''; + var list=$('aiModelList');list.innerHTML=''; + var models=s.models||[]; + if(models.length===0){ + list.innerHTML='
\u{1F4E6}
No models installed. Pull one below to get started.
'; + }else{ + models.forEach(function(m){ + var opt=document.createElement('option');opt.value=m.name;opt.textContent=m.name; + if(m.name===s.active_model)opt.selected=true; + sel.appendChild(opt); + var row=document.createElement('div'); + row.className='ai-model-row'+(m.name===s.active_model?' active':''); + var badge=m.name===s.active_model?' \u2713 active':''; + row.innerHTML=''+escapeHtml(m.name)+badge+'' + +''+escapeHtml(fmtBytes(m.size))+'' + +''; + list.appendChild(row); + }); + } + if(!$('aiBaseURL').value) $('aiBaseURL').value=s.base_url||''; +} +$('aiRefreshStatus').onclick=function(){withLoading('aiRefreshStatus',loadAISettingsStatus);}; +$('aiSetActive').onclick=async function(){ + var m=$('aiActiveModelSel').value; + if(!m){toast('Pick a model first','info');return;} + await updateLLMSettings({active_model:m}); +}; +$('aiSaveBaseURL').onclick=async function(){ + var u=$('aiBaseURL').value.trim(); + if(!u){toast('Enter a base URL','info');return;} + await updateLLMSettings({base_url:u}); +}; +$('aiSaveBaseURLDisc').onclick=async function(){ + var u=$('aiBaseURLDisc').value.trim(); + if(!u){toast('Enter a base URL','info');return;} + await updateLLMSettings({base_url:u}); +}; +async function updateLLMSettings(settings){ + try{ + var res=await fetch(appURL('/operator/api/llm/settings'),{ + method:'POST', + headers:Object.assign({'Content-Type':'application/json'},authHeaders()), + body:JSON.stringify(settings) + }); + var body=await res.json().catch(function(){return{};}); + if(!res.ok){toast('Update failed: '+(body.error||res.statusText),'error');return;} + toast('Settings updated','info'); + loadAISettingsStatus(); + }catch(e){toast('Request failed: '+e,'error');} +} +async function aiDeleteModel(name){ + if(!confirm('Delete model "'+name+'"? This frees disk space on the Ollama server.'))return; + try{ + var res=await fetch(appURL('/operator/api/llm/model?name='+encodeURIComponent(name)),{method:'DELETE',headers:authHeaders()}); + var body=await res.json().catch(function(){return{};}); + if(!res.ok){toast('Delete failed: '+(body.error||res.statusText),'error');return;} + toast('Deleted '+name,'info'); + loadAISettingsStatus(); + }catch(e){toast('Request failed: '+e,'error');} +} +// Pull with streaming NDJSON progress +$('aiPullStart').onclick=async function(){ + var name=$('aiPullName').value.trim(); + if(!name){toast('Enter a model name (e.g. qwen2.5-coder:7b)','info');return;} + var btn=$('aiPullStart'); + var orig=btn.textContent; + btn.disabled=true;btn.textContent='Pulling\u2026'; + var prog=$('aiPullProgress');prog.hidden=false; + prog.innerHTML='
Pulling '+escapeHtml(name)+'
starting\u20260%
'; + try{ + var res=await fetch(appURL('/operator/api/llm/pull'),{ + method:'POST', + headers:Object.assign({'Content-Type':'application/json'},authHeaders()), + body:JSON.stringify({name:name}) + }); + if(!res.ok){ + var errBody=await res.json().catch(function(){return{};}); + prog.querySelector('.ai-status-text').textContent='failed: '+(errBody.error||res.statusText); + return; + } + var reader=res.body.getReader(); + var decoder=new TextDecoder(); + var buf=''; + while(true){ + var chunk=await reader.read(); + if(chunk.done)break; + buf+=decoder.decode(chunk.value,{stream:true}); + var lines=buf.split('\n'); + buf=lines.pop(); + lines.forEach(function(line){ + line=line.trim();if(!line)return; + try{ + var ev=JSON.parse(line); + var pct=0; + if(ev.total&&ev.completed)pct=Math.round(ev.completed/ev.total*100); + prog.querySelector('.ai-status-text').textContent=(ev.status||'')+(ev.error?' ('+ev.error+')':''); + if(pct>0){ + prog.querySelector('.ai-progress-fill').style.width=pct+'%'; + prog.querySelector('.ai-progress-pct').textContent=pct+'%'+(ev.total?' · '+fmtBytes(ev.completed)+' / '+fmtBytes(ev.total):''); + } + }catch(e){} + }); + } + toast('Pull complete: '+name,'info'); + $('aiPullName').value=''; + loadAISettingsStatus(); + }catch(e){toast('Pull failed: '+e,'error');} + finally{btn.disabled=false;btn.textContent=orig;} +}; + /* ── Workflow browser ── */ var _wfData=[]; async function loadWorkflows(){ @@ -1527,7 +1858,7 @@

Keyboard shortcuts

}; /* ── Global refresh ── */ -function loadAllSecured(){if(!hasToken())return;loadAudit();loadWebhookTraces();loadDeliveries();loadOverview();loadWorkflows();$('loadDeploy').click();} +function loadAllSecured(){if(!hasToken())return;loadAudit();loadWebhookTraces();loadDeliveries();loadOverview();loadWorkflows();loadAISettingsStatus();$('loadDeploy').click();} function refreshAll(){if(COP_ORIGIN){loadMetricsCards();loadAllProbes();}loadAllSecured();} $('refreshAll').onclick=refreshAll; @@ -1571,7 +1902,7 @@

Keyboard shortcuts

initDarkMode();initMode();syncTimeBtn();initSections();initTabs();initFromURL();checkServerStatus();startHeartbeat(); if(COP_ORIGIN){ loadAllProbes();loadMetricsCards();restoreAutoRefresh(); - if(hasToken()){fetchMe();loadAudit();loadWebhookTraces();loadOverview();$('loadDeploy').click();} + if(hasToken()){fetchMe();loadAudit();loadWebhookTraces();loadOverview();loadAISettingsStatus();$('loadDeploy').click();} } From 35ae635346bb3955e0fcb7327aab62ed7995f483 Mon Sep 17 00:00:00 2001 From: Cory Bullinger Date: Tue, 21 Apr 2026 11:10:19 -0400 Subject: [PATCH 06/20] security: fix gosec G107/G704 SSRF findings on PR branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gosec CI was failing on 3 SSRF findings via taint analysis: operator_auth.go (2 findings): ghAPIGetRepoPermission constructs a GitHub API URL using the repo owner/name and username. While the host is hardcoded, gosec flags the user-derived path components. - Add strict input validation: ghUsernameRe and ghRepoNameRe whitelists enforce GitHub's username and repo name rules before URL construction (using RE2-compatible syntax — no lookahead) - Use url.PathEscape on path components as defense in depth - Add #nosec G107 G704 with justification on the Request and Do calls slack_notifier.go (1 finding): sendPayload had an existing nosec on the Do call but gosec now also flags NewRequestWithContext on the line above. - Extend the nosec annotation to cover the Request call No behavioral change — only input validation tightening and nosec coverage on calls where the URL is either hardcoded (GitHub API) or comes from trusted server config (Slack webhook). --- services/operator_auth.go | 32 +++++++++++++++++++++++++++++--- services/slack_notifier.go | 4 ++-- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/services/operator_auth.go b/services/operator_auth.go index 947a369..f624757 100644 --- a/services/operator_auth.go +++ b/services/operator_auth.go @@ -6,11 +6,23 @@ import ( "fmt" "io" "net/http" + "net/url" + "regexp" "strings" "sync" "time" ) +// ghUsernameRe matches valid GitHub usernames: alphanumeric + hyphens, +// cannot start or end with a hyphen, max 39 chars. Used to reject hostile +// input before it reaches URL construction for the GitHub API. (RE2 has no +// lookahead, so this doesn't reject consecutive hyphens — that's a GitHub +// policy issue, not a security one; such requests simply fail downstream.) +var ghUsernameRe = regexp.MustCompile(`^[a-zA-Z0-9]([a-zA-Z0-9-]{0,37}[a-zA-Z0-9])?$`) + +// ghRepoNameRe matches valid GitHub repo names. +var ghRepoNameRe = regexp.MustCompile(`^[a-zA-Z0-9_.-]{1,100}$`) + // OperatorRole represents the permission level for the operator UI. type OperatorRole string @@ -239,8 +251,22 @@ func ghAPIGetRepoPermission(ctx context.Context, pat string, repo string, userna if len(parts) != 2 { return "", fmt.Errorf("invalid repo format: %s (expected owner/repo)", repo) } - url := fmt.Sprintf("https://api.github.com/repos/%s/%s/collaborators/%s/permission", parts[0], parts[1], username) - req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + // Validate path components against strict whitelists before URL construction. + // Host is hardcoded to api.github.com — not user-controlled. + if !ghUsernameRe.MatchString(parts[0]) { + return "", fmt.Errorf("invalid owner in repo %q", repo) + } + if !ghRepoNameRe.MatchString(parts[1]) { + return "", fmt.Errorf("invalid repo name in %q", repo) + } + if !ghUsernameRe.MatchString(username) { + return "", fmt.Errorf("invalid username %q", username) + } + apiURL := fmt.Sprintf( + "https://api.github.com/repos/%s/%s/collaborators/%s/permission", + url.PathEscape(parts[0]), url.PathEscape(parts[1]), url.PathEscape(username), + ) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil) // #nosec G107 G704 -- host is hardcoded to api.github.com; path components validated above if err != nil { return "", err } @@ -248,7 +274,7 @@ func ghAPIGetRepoPermission(ctx context.Context, pat string, repo string, userna req.Header.Set("Accept", "application/vnd.github+json") req.Header.Set("X-GitHub-Api-Version", "2022-11-28") - resp, err := (&http.Client{Timeout: 10 * time.Second}).Do(req) + resp, err := (&http.Client{Timeout: 10 * time.Second}).Do(req) // #nosec G107 G704 -- host is hardcoded to api.github.com; path components validated above if err != nil { return "", err } diff --git a/services/slack_notifier.go b/services/slack_notifier.go index 95a0bfb..b9ac25d 100644 --- a/services/slack_notifier.go +++ b/services/slack_notifier.go @@ -413,14 +413,14 @@ func (sn *DefaultSlackNotifier) sendMessageWithFallback(ctx context.Context, mes // sendPayload sends the raw JSON payload to Slack func (sn *DefaultSlackNotifier) sendPayload(ctx context.Context, payload []byte) error { - req, err := http.NewRequestWithContext(ctx, "POST", sn.webhookURL, bytes.NewBuffer(payload)) + req, err := http.NewRequestWithContext(ctx, "POST", sn.webhookURL, bytes.NewBuffer(payload)) // #nosec G107 G704 -- URL is the Slack webhook URL from trusted server config, not user input if err != nil { return fmt.Errorf("failed to create slack request: %w", err) } req.Header.Set("Content-Type", "application/json") - resp, err := sn.httpClient.Do(req) // #nosec G704 -- URL is the Slack webhook URL from trusted config + resp, err := sn.httpClient.Do(req) // #nosec G107 G704 -- URL is the Slack webhook URL from trusted server config, not user input if err != nil { return fmt.Errorf("failed to send slack message: %w", err) } From 82b1c52979ceaf0130d77cd14af7aef5c649c81e Mon Sep 17 00:00:00 2001 From: Cory Bullinger Date: Tue, 21 Apr 2026 12:02:51 -0400 Subject: [PATCH 07/20] chore(startup): show operator UI and AI settings in banner Surface OPERATOR_UI_ENABLED, auth repo, AI model, and AI base URL in the startup banner so local dev runs make the active operator/AI configuration visible at a glance. Adds a truncMiddle helper (ASCII ellipsis) to keep long paths and URLs from breaking the banner's byte-count-aligned padding. --- app.go | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/app.go b/app.go index a7eed2c..03b3b3e 100644 --- a/app.go +++ b/app.go @@ -136,15 +136,34 @@ func printBanner(config *configs.Config, container *services.ServiceContainer) { fmt.Printf("║ Version: %-48s║\n", version) fmt.Printf("║ Port: %-48s║\n", config.Port) fmt.Printf("║ Webhook Path: %-48s║\n", config.WebserverPath) - fmt.Printf("║ Config File: %-48s║\n", config.EffectiveConfigFile()) + fmt.Printf("║ Config File: %-48s║\n", truncMiddle(config.EffectiveConfigFile(), 48)) fmt.Printf("║ Dry Run: %-48v║\n", config.DryRun) fmt.Printf("║ Audit Log: %-48v║\n", config.AuditEnabled) fmt.Printf("║ Metrics: %-48v║\n", config.MetricsEnabled) fmt.Printf("║ Slack: %-48v║\n", config.SlackEnabled) + fmt.Printf("║ Operator UI: %-48v║\n", config.OperatorUIEnabled) + if config.OperatorUIEnabled { + fmt.Printf("║ Auth Repo: %-48s║\n", truncMiddle(config.OperatorAuthRepo, 48)) + fmt.Printf("║ AI Model: %-48s║\n", truncMiddle(config.LLMModel, 48)) + fmt.Printf("║ AI URL: %-48s║\n", truncMiddle(config.LLMBaseURL, 48)) + } fmt.Println("╚════════════════════════════════════════════════════════════════╝") fmt.Println() } +// truncMiddle shortens s to max bytes, replacing the middle with "..." when +// too long. Uses ASCII so Go's byte-count-based %-Ns padding stays aligned. +func truncMiddle(s string, max int) string { + if len(s) <= max { + return s + } + if max < 6 { + return s[:max] + } + keep := (max - 3) / 2 + return s[:keep] + "..." + s[len(s)-(max-3-keep):] +} + func validateConfiguration(container *services.ServiceContainer) error { ctx := context.Background() _, err := container.ConfigLoader.LoadConfig(ctx, container.Config) From e4536da655dfa2174a54c3f2710d939102140d60 Mon Sep 17 00:00:00 2001 From: Cory Bullinger Date: Tue, 21 Apr 2026 15:16:38 -0400 Subject: [PATCH 08/20] chore(deploy): wire operator UI, enable audit, populate changelog MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prep the Cloud Run deploy for the operator UI + AI rule suggester work: - CI deploy env vars (ci.yml): - OPERATOR_UI_ENABLED=true - OPERATOR_AUTH_REPO=grove-platform/github-copier (required when UI is on) - OPERATOR_REPO_SLUG=grove-platform/github-copier (for audit-row deep links) - AUDIT_ENABLED flipped false → true, aligning with the v0.3.0 "enabled by default" CHANGELOG entry; the operator UI's Audit tab is inert without it and MONGO_URI_SECRET_NAME is already wired. - env-cloudrun.yaml: drop the stale OPERATOR_UI_TOKEN comment (token auth mode was removed earlier on this branch) and document the PAT/auth-repo model plus optional LLM_* overrides. - CHANGELOG.md: populate [Unreleased] so release.sh has content to extract when the next tag is cut. Covers the operator UI, PAT auth, AI rule suggester, per-delivery logs, empty-commit fix, audit-decode fix, and the gosec SSRF hardening. LLM_* deliberately left unset in the deploy — defaults point at localhost, which is unreachable from Cloud Run; operators can configure a real endpoint from the UI at runtime. --- .github/workflows/ci.yml | 2 +- CHANGELOG.md | 27 ++++++++++++++++++++++++++- env-cloudrun.yaml | 19 ++++++++++++++++--- 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 89165be..0a78c33 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -128,7 +128,7 @@ jobs: --region $REGION \ --project $PROJECT_ID \ --allow-unauthenticated \ - --set-env-vars="^|^CONFIG_REPO_OWNER=grove-platform|CONFIG_REPO_NAME=github-copier|CONFIG_REPO_BRANCH=main|PEM_NAME=CODE_COPIER_PEM|WEBHOOK_SECRET_NAME=webhook-secret|MONGO_URI_SECRET_NAME=mongo-uri|WEBSERVER_PATH=/events|MAIN_CONFIG_FILE=.copier/main.yaml|USE_MAIN_CONFIG=true|DEPRECATION_FILE=deprecated_examples.json|COMMITTER_NAME=GitHub Copier App|COMMITTER_EMAIL=bot@mongodb.com|GOOGLE_CLOUD_PROJECT_ID=github-copy-code-examples|COPIER_LOG_NAME=code-copier-log|AUDIT_ENABLED=false|METRICS_ENABLED=true|GITHUB_APP_ID=${{ secrets.APP_ID }}|INSTALLATION_ID=${{ secrets.INSTALLATION_ID }}" \ + --set-env-vars="^|^CONFIG_REPO_OWNER=grove-platform|CONFIG_REPO_NAME=github-copier|CONFIG_REPO_BRANCH=main|PEM_NAME=CODE_COPIER_PEM|WEBHOOK_SECRET_NAME=webhook-secret|MONGO_URI_SECRET_NAME=mongo-uri|WEBSERVER_PATH=/events|MAIN_CONFIG_FILE=.copier/main.yaml|USE_MAIN_CONFIG=true|DEPRECATION_FILE=deprecated_examples.json|COMMITTER_NAME=GitHub Copier App|COMMITTER_EMAIL=bot@mongodb.com|GOOGLE_CLOUD_PROJECT_ID=github-copy-code-examples|COPIER_LOG_NAME=code-copier-log|AUDIT_ENABLED=true|METRICS_ENABLED=true|OPERATOR_UI_ENABLED=true|OPERATOR_AUTH_REPO=grove-platform/github-copier|OPERATOR_REPO_SLUG=grove-platform/github-copier|GITHUB_APP_ID=${{ secrets.APP_ID }}|INSTALLATION_ID=${{ secrets.INSTALLATION_ID }}" \ --set-build-env-vars="VERSION=${{ steps.version.outputs.tag }}" \ --tag="${{ steps.version.outputs.traffic_tag }}" \ --max-instances=10 \ diff --git a/CHANGELOG.md b/CHANGELOG.md index bb44838..4830b4c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,32 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] -## [v0.3.0] - 2026-04-14 +### Added + +- **Operator UI — comprehensive writer + operator dashboard** at `/operator/` (`OPERATOR_UI_ENABLED=true`). Five tabs (Overview, Webhooks, Audit, Workflows, System), sticky status bar, dark mode, keyboard shortcuts, shareable URLs, and a writer/operator mode toggle persisted to localStorage. +- **GitHub PAT authentication** — users sign in with their personal access token; role is derived from their permission on `OPERATOR_AUTH_REPO` (admin/maintain/write → operator, read/triage → writer). Replay and other write actions additionally enforce read access on the source repo for that specific delivery. +- **AI rule suggester (Ollama)** — paste a source path and desired target state, receive a suggested workflow rule with self-verification via the in-process `PatternMatcher`. Fully UI-managed: connect to an Ollama endpoint, pull/delete models, switch the active model, all without a redeploy. +- **Writer-facing features** — workflow browser with per-rule coverage, PR lookup by URL, recent copies feed, file match tester (with clear button and Python-style `(?P)` regex translation for in-browser use), PR timeline, and in-app help overlay. +- **Per-delivery log viewer** — context-tagged ring buffer captures logs per webhook delivery, surfaced in an audit drawer alongside the trace and outcome summary. +- **Audit event enrichment** — `processed_ok` traces now include destination repo(s), files matched / uploaded / failed, and commit SHA. +- **Startup banner** — Operator UI, auth repo, AI model, and AI base URL are now surfaced when the app boots (local and Cloud Run). + +### Changed + +- **MongoDB audit logging enabled in production** — the Cloud Run deploy previously forced `AUDIT_ENABLED=false`; it is now `true`, aligning with the v0.3.0 "enabled by default" change. +- **Operator auth hardened** — token-based auth (`OPERATOR_UI_TOKEN`) removed entirely; GitHub PAT is the only supported mechanism. `OPERATOR_UI_ENABLED=true` now requires `OPERATOR_AUTH_REPO` at config load (validated in `validateOperatorAuth`). +- **`createPullRequest` skipped for empty commits** — `commitFilesToBranch` now returns an `errTreeUnchanged` sentinel so `addFilesViaPR` no longer calls the GitHub PR API with an unchanged tree (previously 422'd). +- **MongoDB driver v2 ObjectID decoding** — audit reads set `ObjectIDAsHexString: true` to avoid "error decoding key `_id`" on queries. + +### Fixed + +- **gosec G107 / G704 SSRF findings** — GitHub API URL construction in `services/operator_auth.go` now validates path components against strict RE2-compatible whitelists (`ghUsernameRe`, `ghRepoNameRe`) and escapes them with `url.PathEscape` before request construction; `slack_notifier.go` `#nosec` annotation extended to cover `NewRequestWithContext`. +- **Keyboard-shortcut overlay wouldn't close** — `.help-bg[hidden]` now wins over the base `display:flex`. +- **File match tester returned no matches for Java files** — JavaScript `RegExp` does not support Python-style `(?P)` named groups; the tester now rewrites `(?P<` → `(?<` before compilation. + +### Security + +- **Token auth removed** — the operator UI no longer accepts a shared bearer token; all access is per-user via GitHub PAT with repo-scoped permission checks. ### Changed diff --git a/env-cloudrun.yaml b/env-cloudrun.yaml index b61726a..9f70d77 100644 --- a/env-cloudrun.yaml +++ b/env-cloudrun.yaml @@ -34,6 +34,19 @@ COPIER_LOG_NAME: "code-copier-log" AUDIT_ENABLED: "true" METRICS_ENABLED: "true" -# Operator UI is disabled by default (OPERATOR_UI_ENABLED unset/false). Intended for -# local runs only — do not set OPERATOR_UI_ENABLED in Cloud Run unless you explicitly -# want this surface on the internet-facing service. +# Operator dashboard at https:///operator/ +# Access is gated by each user's GitHub PAT: they authenticate with their +# personal token, and their permission on OPERATOR_AUTH_REPO determines role +# (admin/maintain/write → operator, read/triage → writer). +OPERATOR_UI_ENABLED: "true" +OPERATOR_AUTH_REPO: "grove-platform/github-copier" +OPERATOR_REPO_SLUG: "grove-platform/github-copier" +# +# Optional: OPERATOR_RELEASE_GITHUB_TOKEN, OPERATOR_RELEASE_TARGET_BRANCH +# +# AI rule suggester (optional) — if set, points the operator UI at an +# Ollama-compatible endpoint. Operators can also configure base URL and +# active model from the UI at runtime without a redeploy. +# LLM_PROVIDER: "ollama" +# LLM_BASE_URL: "http://ollama.internal:11434" +# LLM_MODEL: "qwen2.5-coder:7b" From f3818044b2205864344392f4ca147d7139c75537 Mon Sep 17 00:00:00 2001 From: Cory Bullinger Date: Tue, 21 Apr 2026 15:20:54 -0400 Subject: [PATCH 09/20] docs: restore v0.3.0 heading in CHANGELOG The previous commit's edit accidentally dropped the '## [v0.3.0] - 2026-04-14' heading, leaving its Changed/Fixed/Security sections orphaned under [Unreleased]. Re-add the heading so the release history stays intact. --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4830b4c..41c57b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - **Token auth removed** — the operator UI no longer accepts a shared bearer token; all access is per-user via GitHub PAT with repo-scoped permission checks. +## [v0.3.0] - 2026-04-14 + ### Changed - **Audit logging enabled** — MongoDB audit logging is now enabled by default. From 0e863eedee60deeb95e4d7f33a5de19bbe249fc5 Mon Sep 17 00:00:00 2001 From: Cory Bullinger Date: Tue, 21 Apr 2026 15:44:55 -0400 Subject: [PATCH 10/20] feat(operator): Anthropic LLM provider for AI rule suggester MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an Anthropic Messages API client alongside the existing Ollama client so the AI rule suggester works on Cloud Run without standing up a model-serving VM. Ollama stays as the local/dev option. Backend - New services/llm_anthropic.go implementing LLMClient against /v1/messages and /v1/models. Pins anthropic-version=2023-06-01 and sends a JSON-only guardrail in the system prompt (Anthropic has no native JSON mode); strips ``` code fences from responses as a defensive post-process. - llm_client.go: add ErrModelManagementNotSupported sentinel; change NewLLMClient to take LLMClientOptions (provider, base URL, model, API key) and dispatch anthropic/ollama. - operator_llm_admin.go: include provider + supports_model_mgmt in the /llm/status response so the UI can branch; map the sentinel to 400 on pull/delete. - configs/environment.go: add AnthropicAPIKey and AnthropicAPIKeySecretName fields + env vars; per-provider LLM_BASE_URL / LLM_MODEL defaults (Anthropic defaults to api.anthropic.com + claude-haiku-4-5-20251001). - github_auth.go: LoadAnthropicAPIKey follows the existing Secret Manager pattern used for the webhook secret and Mongo URI. - app.go: call LoadAnthropicAPIKey on boot (non-fatal — UI shows "not configured" if missing); banner now prints AI Provider. UI - web/operator/index.html: renderAISettings branches on provider/ supports_model_mgmt to hide Ollama-only pull/delete sections for hosted providers; separate onboarding text for each. Status line labels the provider ("Anthropic connected at…" vs "Ollama connected at…"). Deploy config - ci.yml + env-cloudrun.yaml: select anthropic, default to claude-haiku-4-5-20251001, load the API key from Secret Manager via ANTHROPIC_API_KEY_SECRET_NAME=anthropic-api-key. Operators can still switch the active model from the UI at runtime. Before deploy, a one-time step is needed on the prod project: gcloud secrets create anthropic-api-key --data-file= gcloud secrets add-iam-policy-binding anthropic-api-key \ --member=serviceAccount: --role=roles/secretmanager.secretAccessor --- .github/workflows/ci.yml | 2 +- CHANGELOG.md | 4 +- app.go | 10 ++ configs/environment.go | 24 ++- env-cloudrun.yaml | 15 +- services/github_auth.go | 20 +++ services/llm_anthropic.go | 267 +++++++++++++++++++++++++++++++ services/llm_client.go | 29 +++- services/operator_llm_admin.go | 17 +- services/operator_ui.go | 7 +- services/web/operator/index.html | 87 ++++++---- 11 files changed, 439 insertions(+), 43 deletions(-) create mode 100644 services/llm_anthropic.go diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0a78c33..842d3da 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -128,7 +128,7 @@ jobs: --region $REGION \ --project $PROJECT_ID \ --allow-unauthenticated \ - --set-env-vars="^|^CONFIG_REPO_OWNER=grove-platform|CONFIG_REPO_NAME=github-copier|CONFIG_REPO_BRANCH=main|PEM_NAME=CODE_COPIER_PEM|WEBHOOK_SECRET_NAME=webhook-secret|MONGO_URI_SECRET_NAME=mongo-uri|WEBSERVER_PATH=/events|MAIN_CONFIG_FILE=.copier/main.yaml|USE_MAIN_CONFIG=true|DEPRECATION_FILE=deprecated_examples.json|COMMITTER_NAME=GitHub Copier App|COMMITTER_EMAIL=bot@mongodb.com|GOOGLE_CLOUD_PROJECT_ID=github-copy-code-examples|COPIER_LOG_NAME=code-copier-log|AUDIT_ENABLED=true|METRICS_ENABLED=true|OPERATOR_UI_ENABLED=true|OPERATOR_AUTH_REPO=grove-platform/github-copier|OPERATOR_REPO_SLUG=grove-platform/github-copier|GITHUB_APP_ID=${{ secrets.APP_ID }}|INSTALLATION_ID=${{ secrets.INSTALLATION_ID }}" \ + --set-env-vars="^|^CONFIG_REPO_OWNER=grove-platform|CONFIG_REPO_NAME=github-copier|CONFIG_REPO_BRANCH=main|PEM_NAME=CODE_COPIER_PEM|WEBHOOK_SECRET_NAME=webhook-secret|MONGO_URI_SECRET_NAME=mongo-uri|WEBSERVER_PATH=/events|MAIN_CONFIG_FILE=.copier/main.yaml|USE_MAIN_CONFIG=true|DEPRECATION_FILE=deprecated_examples.json|COMMITTER_NAME=GitHub Copier App|COMMITTER_EMAIL=bot@mongodb.com|GOOGLE_CLOUD_PROJECT_ID=github-copy-code-examples|COPIER_LOG_NAME=code-copier-log|AUDIT_ENABLED=true|METRICS_ENABLED=true|OPERATOR_UI_ENABLED=true|OPERATOR_AUTH_REPO=grove-platform/github-copier|OPERATOR_REPO_SLUG=grove-platform/github-copier|LLM_PROVIDER=anthropic|LLM_MODEL=claude-haiku-4-5-20251001|ANTHROPIC_API_KEY_SECRET_NAME=anthropic-api-key|GITHUB_APP_ID=${{ secrets.APP_ID }}|INSTALLATION_ID=${{ secrets.INSTALLATION_ID }}" \ --set-build-env-vars="VERSION=${{ steps.version.outputs.tag }}" \ --tag="${{ steps.version.outputs.traffic_tag }}" \ --max-instances=10 \ diff --git a/CHANGELOG.md b/CHANGELOG.md index 41c57b3..bd9e165 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - **Operator UI — comprehensive writer + operator dashboard** at `/operator/` (`OPERATOR_UI_ENABLED=true`). Five tabs (Overview, Webhooks, Audit, Workflows, System), sticky status bar, dark mode, keyboard shortcuts, shareable URLs, and a writer/operator mode toggle persisted to localStorage. - **GitHub PAT authentication** — users sign in with their personal access token; role is derived from their permission on `OPERATOR_AUTH_REPO` (admin/maintain/write → operator, read/triage → writer). Replay and other write actions additionally enforce read access on the source repo for that specific delivery. -- **AI rule suggester (Ollama)** — paste a source path and desired target state, receive a suggested workflow rule with self-verification via the in-process `PatternMatcher`. Fully UI-managed: connect to an Ollama endpoint, pull/delete models, switch the active model, all without a redeploy. +- **AI rule suggester** — paste a source path and desired target state, receive a suggested workflow rule with self-verification via the in-process `PatternMatcher`. Two providers supported: + - **Anthropic (hosted)** — default for Cloud Run. API key loaded from Secret Manager via `ANTHROPIC_API_KEY_SECRET_NAME`. No infra required; operators switch between Haiku / Sonnet / Opus from the UI. + - **Ollama (local)** — for dev or self-hosted deployments. UI manages connection, model pulls, deletes, and active-model switching without a redeploy. - **Writer-facing features** — workflow browser with per-rule coverage, PR lookup by URL, recent copies feed, file match tester (with clear button and Python-style `(?P)` regex translation for in-browser use), PR timeline, and in-app help overlay. - **Per-delivery log viewer** — context-tagged ring buffer captures logs per webhook delivery, surfaced in an audit drawer alongside the trace and outcome summary. - **Audit event enrichment** — `processed_ok` traces now include destination repo(s), files matched / uploaded / failed, and commit SHA. diff --git a/app.go b/app.go index 03b3b3e..ac4d804 100644 --- a/app.go +++ b/app.go @@ -63,6 +63,15 @@ func main() { os.Exit(1) } + // Anthropic API key is only needed when the operator UI's AI suggester uses + // the anthropic provider. Failure to load is non-fatal — the UI will show + // "not configured" and writers can still use every other feature. + if config.OperatorUIEnabled && config.LLMProvider == "anthropic" { + if err := services.LoadAnthropicAPIKey(ctx, config); err != nil { + fmt.Printf("⚠️ Anthropic API key not loaded: %v (AI suggester will be disabled)\n", err) + } + } + // Override dry-run from command line if dryRun { config.DryRun = true @@ -144,6 +153,7 @@ func printBanner(config *configs.Config, container *services.ServiceContainer) { fmt.Printf("║ Operator UI: %-48v║\n", config.OperatorUIEnabled) if config.OperatorUIEnabled { fmt.Printf("║ Auth Repo: %-48s║\n", truncMiddle(config.OperatorAuthRepo, 48)) + fmt.Printf("║ AI Provider:%-48s║\n", truncMiddle(config.LLMProvider, 48)) fmt.Printf("║ AI Model: %-48s║\n", truncMiddle(config.LLMModel, 48)) fmt.Printf("║ AI URL: %-48s║\n", truncMiddle(config.LLMBaseURL, 48)) } diff --git a/configs/environment.go b/configs/environment.go index 8d6ad05..14c7329 100644 --- a/configs/environment.go +++ b/configs/environment.go @@ -83,9 +83,15 @@ type Config struct { // AI rule suggestion (optional) — LLM-powered rule generation in the operator UI. // The feature is available whenever the LLM provider is reachable at runtime; // operators can change the active model and base URL from the UI without restart. - LLMProvider string // "ollama" (default). Pluggable for future providers. + LLMProvider string // "ollama" (local) or "anthropic" (hosted) LLMBaseURL string // initial default; overridable from the UI LLMModel string // initial default; overridable from the UI + + // Anthropic API key — required when LLMProvider="anthropic". Loaded from + // Secret Manager via AnthropicAPIKeySecretName, or directly via + // ANTHROPIC_API_KEY for local dev. + AnthropicAPIKey string + AnthropicAPIKeySecretName string } const ( @@ -142,6 +148,8 @@ const ( LLMProvider = "LLM_PROVIDER" LLMBaseURL = "LLM_BASE_URL" LLMModel = "LLM_MODEL" + AnthropicAPIKey = "ANTHROPIC_API_KEY" // #nosec G101 -- env var name, not a credential + AnthropicAPIKeySecretName = "ANTHROPIC_API_KEY_SECRET_NAME" // #nosec G101 -- env var name, not a credential ) // NewConfig returns a new Config instance with default values @@ -266,9 +274,17 @@ func LoadEnvironment(envFile string) (*Config, error) { config.OperatorReleaseGitHubToken = os.Getenv(OperatorReleaseGitHubToken) config.OperatorReleaseTargetBranch = getEnvWithDefault(OperatorReleaseTargetBranch, "main") - config.LLMProvider = getEnvWithDefault(LLMProvider, "ollama") - config.LLMBaseURL = getEnvWithDefault(LLMBaseURL, "http://localhost:11434") - config.LLMModel = getEnvWithDefault(LLMModel, "qwen2.5-coder:7b") + config.LLMProvider = strings.ToLower(getEnvWithDefault(LLMProvider, "ollama")) + // Per-provider defaults: Ollama runs locally, Anthropic is hosted. + if config.LLMProvider == "anthropic" { + config.LLMBaseURL = getEnvWithDefault(LLMBaseURL, "https://api.anthropic.com") + config.LLMModel = getEnvWithDefault(LLMModel, "claude-haiku-4-5-20251001") + } else { + config.LLMBaseURL = getEnvWithDefault(LLMBaseURL, "http://localhost:11434") + config.LLMModel = getEnvWithDefault(LLMModel, "qwen2.5-coder:7b") + } + config.AnthropicAPIKey = os.Getenv(AnthropicAPIKey) + config.AnthropicAPIKeySecretName = os.Getenv(AnthropicAPIKeySecretName) if err := validateConfig(config); err != nil { return nil, err diff --git a/env-cloudrun.yaml b/env-cloudrun.yaml index 9f70d77..8fa9e29 100644 --- a/env-cloudrun.yaml +++ b/env-cloudrun.yaml @@ -44,9 +44,18 @@ OPERATOR_REPO_SLUG: "grove-platform/github-copier" # # Optional: OPERATOR_RELEASE_GITHUB_TOKEN, OPERATOR_RELEASE_TARGET_BRANCH # -# AI rule suggester (optional) — if set, points the operator UI at an -# Ollama-compatible endpoint. Operators can also configure base URL and -# active model from the UI at runtime without a redeploy. +# AI rule suggester — uses the hosted Anthropic API so Cloud Run can call it +# directly without standing up a model-serving VM. The API key is loaded from +# Secret Manager via ANTHROPIC_API_KEY_SECRET_NAME (create the secret once +# with `gcloud secrets create anthropic-api-key --data-file=...`). Operators +# can still switch the active model (haiku / sonnet / opus) from the UI +# without a redeploy. +LLM_PROVIDER: "anthropic" +LLM_MODEL: "claude-haiku-4-5-20251001" +ANTHROPIC_API_KEY_SECRET_NAME: "anthropic-api-key" +# +# To use a local Ollama instance instead (e.g. in a dev environment with a +# reachable Ollama VM), comment out the three lines above and use: # LLM_PROVIDER: "ollama" # LLM_BASE_URL: "http://ollama.internal:11434" # LLM_MODEL: "qwen2.5-coder:7b" diff --git a/services/github_auth.go b/services/github_auth.go index 8317d0a..25d80e4 100644 --- a/services/github_auth.go +++ b/services/github_auth.go @@ -158,6 +158,26 @@ func LoadMongoURI(ctx context.Context, config *configs.Config) error { return nil } +// LoadAnthropicAPIKey loads the Anthropic API key from Secret Manager or +// environment variable. Only called when the LLM provider is "anthropic". +// Missing value is non-fatal: NewLLMClient will refuse to construct a client, +// the operator UI will show "not configured", and the rest of the app runs. +func LoadAnthropicAPIKey(ctx context.Context, config *configs.Config) error { + if config.AnthropicAPIKey != "" { + return nil + } + if config.AnthropicAPIKeySecretName == "" { + return nil + } + resolvedName := config.SecretPath(config.AnthropicAPIKeySecretName) + key, err := getSecretFromSecretManager(ctx, resolvedName, "ANTHROPIC_API_KEY") + if err != nil { + return fmt.Errorf("failed to load Anthropic API key: %w", err) + } + config.AnthropicAPIKey = key + return nil +} + // getSecretFromSecretManager is a generic function to retrieve any secret from Secret Manager func getSecretFromSecretManager(ctx context.Context, secretName, envVarName string) (string, error) { if os.Getenv("SKIP_SECRET_MANAGER") == "true" { diff --git a/services/llm_anthropic.go b/services/llm_anthropic.go new file mode 100644 index 0000000..ea130b0 --- /dev/null +++ b/services/llm_anthropic.go @@ -0,0 +1,267 @@ +package services + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "sync" + "time" +) + +// anthropicAPIVersion is pinned to the stable Messages API version. Bump this +// only when we intentionally adopt a new API contract. +const anthropicAPIVersion = "2023-06-01" + +// defaultAnthropicBaseURL is the hosted Anthropic API. Override (via SetBaseURL +// or LLM_BASE_URL) only to route through a gateway or proxy that speaks the +// same wire format. +const defaultAnthropicBaseURL = "https://api.anthropic.com" + +// anthropicFallbackModels is used when /v1/models returns an empty list or +// errors; keeps the UI usable with a known-good default set. +var anthropicFallbackModels = []LLMModel{ + {Name: "claude-opus-4-7"}, + {Name: "claude-sonnet-4-6"}, + {Name: "claude-haiku-4-5-20251001"}, +} + +type anthropicClient struct { + mu sync.RWMutex + baseURL string + model string + apiKey string + http *http.Client +} + +func newAnthropicClient(baseURL, model, apiKey string) *anthropicClient { + if strings.TrimSpace(baseURL) == "" { + baseURL = defaultAnthropicBaseURL + } + if strings.TrimSpace(model) == "" { + model = "claude-haiku-4-5-20251001" + } + return &anthropicClient{ + baseURL: strings.TrimSuffix(baseURL, "/"), + model: model, + apiKey: apiKey, + http: &http.Client{Timeout: 60 * time.Second}, + } +} + +func (c *anthropicClient) ProviderName() string { return "anthropic" } + +func (c *anthropicClient) GetBaseURL() string { + c.mu.RLock() + defer c.mu.RUnlock() + return c.baseURL +} + +func (c *anthropicClient) SetBaseURL(url string) { + c.mu.Lock() + defer c.mu.Unlock() + c.baseURL = strings.TrimSuffix(strings.TrimSpace(url), "/") +} + +func (c *anthropicClient) GetActiveModel() string { + c.mu.RLock() + defer c.mu.RUnlock() + return c.model +} + +func (c *anthropicClient) SetActiveModel(model string) { + c.mu.Lock() + defer c.mu.Unlock() + c.model = strings.TrimSpace(model) +} + +// newAuthedRequest builds a request with the Anthropic auth + version headers. +// Callers must have already validated that URL components are not user-supplied; +// the base URL is derived from a pinned default or operator-set value. +func (c *anthropicClient) newAuthedRequest(ctx context.Context, method, path string, body io.Reader) (*http.Request, error) { + req, err := http.NewRequestWithContext(ctx, method, c.GetBaseURL()+path, body) // #nosec G107 -- base URL is pinned default or operator-set; path is a literal constant + if err != nil { + return nil, err + } + req.Header.Set("x-api-key", c.apiKey) + req.Header.Set("anthropic-version", anthropicAPIVersion) + req.Header.Set("content-type", "application/json") + return req, nil +} + +// Ping calls GET /v1/models as an auth + reachability check. +func (c *anthropicClient) Ping(ctx context.Context) error { + if strings.TrimSpace(c.apiKey) == "" { + return fmt.Errorf("ANTHROPIC_API_KEY is not configured") + } + req, err := c.newAuthedRequest(ctx, http.MethodGet, "/v1/models", nil) + if err != nil { + return err + } + resp, err := c.http.Do(req) // #nosec G107 -- see newAuthedRequest + if err != nil { + return fmt.Errorf("anthropic unreachable at %s: %w", c.GetBaseURL(), err) + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden { + return fmt.Errorf("anthropic auth failed (HTTP %d) — check ANTHROPIC_API_KEY", resp.StatusCode) + } + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<15)) + return fmt.Errorf("anthropic returned %s: %s", resp.Status, strings.TrimSpace(string(body))) + } + return nil +} + +type anthropicModelsResponse struct { + Data []struct { + ID string `json:"id"` + DisplayName string `json:"display_name"` + CreatedAt string `json:"created_at"` + } `json:"data"` +} + +// ListModels returns models available to the account. Falls back to a static +// list if the API call fails so the UI stays usable. +func (c *anthropicClient) ListModels(ctx context.Context) ([]LLMModel, error) { + req, err := c.newAuthedRequest(ctx, http.MethodGet, "/v1/models", nil) + if err != nil { + return nil, err + } + resp, err := c.http.Do(req) // #nosec G107 -- see newAuthedRequest + if err != nil { + return anthropicFallbackModels, nil + } + defer func() { _ = resp.Body.Close() }() + body, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<20)) + if resp.StatusCode != http.StatusOK { + return anthropicFallbackModels, nil + } + var out anthropicModelsResponse + if err := json.Unmarshal(body, &out); err != nil { + return anthropicFallbackModels, nil + } + if len(out.Data) == 0 { + return anthropicFallbackModels, nil + } + models := make([]LLMModel, 0, len(out.Data)) + for _, m := range out.Data { + models = append(models, LLMModel{Name: m.ID, ModifiedAt: m.CreatedAt}) + } + return models, nil +} + +// PullModel / DeleteModel are not supported for hosted providers. The UI +// hides the relevant sections when provider != "ollama", so these should not +// normally be reached; returning a sentinel lets the HTTP layer map cleanly. +func (c *anthropicClient) PullModel(_ context.Context, _ string, _ func(LLMPullProgress)) error { + return ErrModelManagementNotSupported +} + +func (c *anthropicClient) DeleteModel(_ context.Context, _ string) error { + return ErrModelManagementNotSupported +} + +// anthropicMessagesRequest is the body of POST /v1/messages. +type anthropicMessagesRequest struct { + Model string `json:"model"` + MaxTokens int `json:"max_tokens"` + System string `json:"system,omitempty"` + Messages []anthropicMessage `json:"messages"` +} + +type anthropicMessage struct { + Role string `json:"role"` + Content string `json:"content"` +} + +type anthropicMessagesResponse struct { + Content []struct { + Type string `json:"type"` + Text string `json:"text"` + } `json:"content"` + StopReason string `json:"stop_reason"` + Error *struct { + Type string `json:"type"` + Message string `json:"message"` + } `json:"error,omitempty"` +} + +// jsonGuardrail is appended to the system prompt to nudge the model toward +// raw JSON output. Anthropic has no native JSON mode on /v1/messages, so we +// rely on prompting + a post-processing fence strip. +const jsonGuardrail = "\n\nRespond with ONLY valid JSON — no prose, no explanations outside the JSON, no code fences, no backticks. Just the JSON object." + +func (c *anthropicClient) GenerateJSON(ctx context.Context, systemPrompt, userPrompt string) (string, error) { + if strings.TrimSpace(c.apiKey) == "" { + return "", fmt.Errorf("ANTHROPIC_API_KEY is not configured") + } + reqBody, err := json.Marshal(anthropicMessagesRequest{ + Model: c.GetActiveModel(), + MaxTokens: 4096, + System: systemPrompt + jsonGuardrail, + Messages: []anthropicMessage{{Role: "user", Content: userPrompt}}, + }) + if err != nil { + return "", fmt.Errorf("marshal anthropic request: %w", err) + } + + req, err := c.newAuthedRequest(ctx, http.MethodPost, "/v1/messages", bytes.NewReader(reqBody)) + if err != nil { + return "", err + } + resp, err := c.http.Do(req) // #nosec G107 -- see newAuthedRequest + if err != nil { + return "", fmt.Errorf("call anthropic at %s: %w", c.GetBaseURL(), err) + } + defer func() { _ = resp.Body.Close() }() + + body, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<20)) + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("anthropic returned %s: %s", resp.Status, strings.TrimSpace(string(body))) + } + + var out anthropicMessagesResponse + if err := json.Unmarshal(body, &out); err != nil { + return "", fmt.Errorf("parse anthropic response: %w", err) + } + if out.Error != nil { + return "", fmt.Errorf("anthropic error: %s: %s", out.Error.Type, out.Error.Message) + } + // Concatenate all text blocks (usually one). + var sb strings.Builder + for _, block := range out.Content { + if block.Type == "text" { + sb.WriteString(block.Text) + } + } + raw := strings.TrimSpace(sb.String()) + if raw == "" { + return "", fmt.Errorf("anthropic returned empty response (model %q)", c.GetActiveModel()) + } + return stripJSONFences(raw), nil +} + +// stripJSONFences removes ```json ... ``` or ``` ... ``` wrappers that models +// sometimes add despite being asked for raw JSON. If the input doesn't look +// fenced, it's returned unchanged. +func stripJSONFences(s string) string { + t := strings.TrimSpace(s) + if !strings.HasPrefix(t, "```") { + return t + } + // Drop the opening fence (```json or ```) + if nl := strings.IndexByte(t, '\n'); nl >= 0 { + t = t[nl+1:] + } else { + return strings.TrimSpace(strings.TrimPrefix(t, "```")) + } + // Drop the trailing fence + if idx := strings.LastIndex(t, "```"); idx >= 0 { + t = t[:idx] + } + return strings.TrimSpace(t) +} diff --git a/services/llm_client.go b/services/llm_client.go index e5502ae..b928f6f 100644 --- a/services/llm_client.go +++ b/services/llm_client.go @@ -5,6 +5,7 @@ import ( "bytes" "context" "encoding/json" + "errors" "fmt" "io" "net/http" @@ -13,6 +14,12 @@ import ( "time" ) +// ErrModelManagementNotSupported is returned by providers (e.g. Anthropic) where +// model pulls/deletes don't apply. Handlers should map this to a 400-class +// response rather than a 502, since it's a client-intent error, not a backend +// failure. +var ErrModelManagementNotSupported = errors.New("model management not supported for this provider") + // LLMClient is the minimal interface used by the operator UI. It supports // runtime reconfiguration (active model, base URL) and provider management // operations (list/pull/delete models). @@ -64,13 +71,24 @@ type LLMPullProgress struct { Error string `json:"error,omitempty"` } +// LLMClientOptions carries the per-provider settings NewLLMClient needs. +// APIKey is required for hosted providers (anthropic); ignored by ollama. +type LLMClientOptions struct { + Provider string + BaseURL string + Model string + APIKey string +} + // NewLLMClient returns a client for the configured provider. -func NewLLMClient(provider, baseURL, model string) (LLMClient, error) { - switch strings.ToLower(strings.TrimSpace(provider)) { +func NewLLMClient(opts LLMClientOptions) (LLMClient, error) { + switch strings.ToLower(strings.TrimSpace(opts.Provider)) { case "", "ollama": + baseURL := opts.BaseURL if baseURL == "" { baseURL = "http://localhost:11434" } + model := opts.Model if model == "" { model = "qwen2.5-coder:7b" } @@ -82,8 +100,13 @@ func NewLLMClient(provider, baseURL, model string) (LLMClient, error) { // No timeout for pulls — model downloads can take 10+ minutes }, }, nil + case "anthropic": + if strings.TrimSpace(opts.APIKey) == "" { + return nil, fmt.Errorf("anthropic provider requires ANTHROPIC_API_KEY") + } + return newAnthropicClient(opts.BaseURL, opts.Model, opts.APIKey), nil default: - return nil, fmt.Errorf("unsupported LLM provider: %q (only \"ollama\" is implemented)", provider) + return nil, fmt.Errorf("unsupported LLM provider: %q (expected \"ollama\" or \"anthropic\")", opts.Provider) } } diff --git a/services/operator_llm_admin.go b/services/operator_llm_admin.go index 1521d9c..3cb7937 100644 --- a/services/operator_llm_admin.go +++ b/services/operator_llm_admin.go @@ -3,6 +3,7 @@ package services import ( "context" "encoding/json" + "errors" "fmt" "io" "net/http" @@ -24,6 +25,9 @@ func (o *operatorUI) handleLLMStatus(w http.ResponseWriter, r *http.Request) { "active_model": "", "reachable": false, "models": []LLMModel{}, + // supports_model_mgmt tells the UI whether to show pull/delete sections. + // Hosted providers (anthropic) don't expose those operations. + "supports_model_mgmt": strings.ToLower(strings.TrimSpace(o.cfg.LLMProvider)) != "anthropic", } if o.llm == nil { out["error"] = "LLM client not initialized" @@ -110,7 +114,11 @@ func (o *operatorUI) handleLLMDeleteModel(w http.ResponseWriter, r *http.Request ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second) defer cancel() if err := o.llm.DeleteModel(ctx, name); err != nil { - w.WriteHeader(http.StatusBadGateway) + status := http.StatusBadGateway + if errors.Is(err, ErrModelManagementNotSupported) { + status = http.StatusBadRequest + } + w.WriteHeader(status) _ = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) return } @@ -145,6 +153,13 @@ func (o *operatorUI) handleLLMPullModel(w http.ResponseWriter, r *http.Request) _ = json.NewEncoder(w).Encode(map[string]string{"error": "name is required"}) return } + // Reject up-front for hosted providers so the client doesn't have to interpret + // an NDJSON error event. + if strings.ToLower(strings.TrimSpace(o.cfg.LLMProvider)) == "anthropic" { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(map[string]string{"error": ErrModelManagementNotSupported.Error()}) + return + } // Switch to NDJSON streaming w.Header().Set("Content-Type", "application/x-ndjson") diff --git a/services/operator_ui.go b/services/operator_ui.go index 2b9b97d..36abf22 100644 --- a/services/operator_ui.go +++ b/services/operator_ui.go @@ -37,7 +37,12 @@ func RegisterOperatorRoutes(mux *http.ServeMux, cfg *configs.Config, container * } // Always create the LLM client; availability is checked dynamically via Ping. // Operators can change the active model and base URL from the UI without restart. - if client, err := NewLLMClient(cfg.LLMProvider, cfg.LLMBaseURL, cfg.LLMModel); err != nil { + if client, err := NewLLMClient(LLMClientOptions{ + Provider: cfg.LLMProvider, + BaseURL: cfg.LLMBaseURL, + Model: cfg.LLMModel, + APIKey: cfg.AnthropicAPIKey, + }); err != nil { LogWarning("LLM client init failed", "error", err.Error()) } else { o.llm = client diff --git a/services/web/operator/index.html b/services/web/operator/index.html index 28bf425..b490b64 100644 --- a/services/web/operator/index.html +++ b/services/web/operator/index.html @@ -519,7 +519,7 @@

By rule

-

Configure the local LLM provider (Ollama) for the AI rule suggester. Settings are in-memory and reset on server restart.

+

Configure the LLM provider for the AI rule suggester. Settings are in-memory and reset on server restart.

@@ -529,14 +529,16 @@

By rule

-

Installed models

-
-

Pull a new model

-
-
- +
+

Installed models

+
+

Pull a new model

+
+
+ +
+
-

Base URL

@@ -544,8 +546,8 @@

Base URL