diff --git a/pkg/integrations/gitlab/glrunner.go b/pkg/integrations/gitlab/glrunner.go index f50ba64fa..69546703b 100644 --- a/pkg/integrations/gitlab/glrunner.go +++ b/pkg/integrations/gitlab/glrunner.go @@ -44,12 +44,13 @@ func (args *GitLabRunnerArgs) GetUserDataValues() *integrations.UserDataValues { return nil } return &integrations.UserDataValues{ - Name: args.Name, - Token: args.AuthToken, // Use auth token (set by Pulumi during deployment) - CliURL: downloadURL(), - RepoURL: args.URL, - Unsecure: args.Unsecure, - Concurrent: args.Concurrent, + Name: args.Name, + Token: args.AuthToken, // Use auth token (set by Pulumi during deployment) + CliURL: downloadURL(), + RepoURL: args.URL, + Unsecure: args.Unsecure, + Concurrent: args.Concurrent, + LogToJournald: args.LogToJournald, } } diff --git a/pkg/integrations/gitlab/snippet-darwin.sh b/pkg/integrations/gitlab/snippet-darwin.sh index ff64d9f05..da43b5f57 100644 --- a/pkg/integrations/gitlab/snippet-darwin.sh +++ b/pkg/integrations/gitlab/snippet-darwin.sh @@ -8,6 +8,7 @@ sudo gitlab-runner register \ --non-interactive \ --url "{{ .RepoURL }}" \ --token "{{ .Token }}" \ + --name "{{ .Name }}" \ --executor "shell" # Install and start as LaunchDaemon diff --git a/pkg/integrations/gitlab/snippet-linux.sh b/pkg/integrations/gitlab/snippet-linux.sh index 70f4aa621..a4f855611 100644 --- a/pkg/integrations/gitlab/snippet-linux.sh +++ b/pkg/integrations/gitlab/snippet-linux.sh @@ -14,16 +14,168 @@ sudo restorecon -v /usr/bin/gitlab-runner 2>/dev/null || true # Enable Podman socket so the docker executor can reach it sudo systemctl enable --now podman.socket +# Detect the host's upstream DNS servers and propagate them into every Podman +# container (including nested build containers created by `podman build`). +# Without this, inner build containers inherit a loopback stub address +# (127.0.0.53 / systemd-resolved) that is unreachable from inside a container, +# causing DNS resolution failures like "Could not resolve host: github.com". +_dns_servers="" +if command -v resolvectl &>/dev/null; then + _dns_servers=$(resolvectl dns 2>/dev/null \ + | awk '{for(i=2;i<=NF;i++) print $i}' \ + | grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' \ + | sort -u | tr '\n' ' ' | xargs) +fi +if [ -z "$_dns_servers" ] && command -v nmcli &>/dev/null; then + _dns_servers=$(nmcli dev show 2>/dev/null \ + | awk '/IP4\.DNS/ {print $2}' \ + | tr '\n' ' ' | xargs) +fi +# On systemd-resolved systems (Ubuntu), /run/systemd/resolve/resolv.conf holds +# the real upstream DNS servers (not the 127.0.0.53 stub in /etc/resolv.conf). +if [ -z "$_dns_servers" ]; then + _dns_servers=$(awk '/^nameserver/ && $2 !~ /^127\./ && $2 != "::1" {print $2}' \ + /run/systemd/resolve/resolv.conf 2>/dev/null \ + | tr '\n' ' ' | xargs) +fi +if [ -z "$_dns_servers" ]; then + _dns_servers=$(awk '/^nameserver/ && $2 !~ /^127\./ && $2 != "::1" {print $2}' /etc/resolv.conf \ + | tr '\n' ' ' | xargs) +fi +# Last-resort fallback: if no local DNS could be detected, use public resolvers. +# The machine must have internet access (it talks to GitLab), so these will work. +if [ -z "$_dns_servers" ]; then + _dns_servers="8.8.8.8 8.8.4.4" +fi +# Build --docker-dns flags for runner registration so every job container gets +# working DNS servers even when Podman's Docker socket API does not honour +# containers.conf dns_servers (which affects executor-container resolution). +_docker_dns_args=() +for _ip in $_dns_servers; do + _docker_dns_args+=(--docker-dns "$_ip") +done + +if [ -n "$_dns_servers" ]; then + _toml_list="" + for _ip in $_dns_servers; do + [ -n "$_toml_list" ] && _toml_list="${_toml_list}, " + _toml_list="${_toml_list}\"${_ip}\"" + done + sudo mkdir -p /etc/containers + if [ ! -f /etc/containers/containers.conf ]; then + printf '[containers]\ndns_servers = [%s]\ndns_options = ["timeout:2", "attempts:5", "single-request"]\n' \ + "$_toml_list" | sudo tee /etc/containers/containers.conf > /dev/null + elif grep -q '^\[containers\]' /etc/containers/containers.conf; then + # Scope the dns_servers check to the [containers] section only + if awk '/^\[containers\]/{f=1;next} /^\[/{f=0} f && /^dns_servers/{found=1} END{exit !found}' \ + /etc/containers/containers.conf; then + # Replace dns_servers only within [containers] + awk -v "val=dns_servers = [${_toml_list}]" \ + '/^\[containers\]/{s=1} /^\[/ && !/^\[containers\]/{s=0} + s && /^dns_servers/{$0=val} 1' \ + /etc/containers/containers.conf \ + | sudo tee /etc/containers/containers.conf.tmp > /dev/null \ + && sudo mv /etc/containers/containers.conf.tmp /etc/containers/containers.conf + else + sudo sed -i "/^\[containers\]/a dns_servers = [${_toml_list}]" \ + /etc/containers/containers.conf + fi + # Add or update dns_options within [containers] + if grep -q '^dns_options' /etc/containers/containers.conf; then + sudo sed -i 's|^dns_options.*|dns_options = ["timeout:2", "attempts:5", "single-request"]|' \ + /etc/containers/containers.conf + else + sudo sed -i '/^\[containers\]/a dns_options = ["timeout:2", "attempts:5", "single-request"]' \ + /etc/containers/containers.conf + fi + else + printf '\n[containers]\ndns_servers = [%s]\ndns_options = ["timeout:2", "attempts:5", "single-request"]\n' \ + "$_toml_list" | sudo tee -a /etc/containers/containers.conf > /dev/null + fi + # Ensure the file is world-readable so rootless Podman can also load it + sudo chmod 644 /etc/containers/containers.conf +fi + +# Guarantee the file exists even when DNS detection found nothing, so that the +# volume mount added to the runner below always has a real file to bind. +sudo mkdir -p /etc/containers +if [ ! -f /etc/containers/containers.conf ]; then + printf '[containers]\n' | sudo tee /etc/containers/containers.conf > /dev/null + sudo chmod 644 /etc/containers/containers.conf +fi + +{{- if .LogToJournald}} +# Set journald as the container log driver so CI job output is captured by the +# systemd journal and can be correlated with runner daemon logs via job_id. +sudo mkdir -p /etc/containers +if [ ! -f /etc/containers/containers.conf ]; then + printf '[containers]\nlog_driver = "journald"\n' \ + | sudo tee /etc/containers/containers.conf > /dev/null +elif grep -q '^\[containers\]' /etc/containers/containers.conf; then + if awk '/^\[containers\]/{f=1;next} /^\[/{f=0} f && /^log_driver/{found=1} END{exit !found}' \ + /etc/containers/containers.conf; then + # Replace existing log_driver within [containers] + awk '/^\[containers\]/{s=1} /^\[/ && !/^\[containers\]/{s=0} + s && /^log_driver/{$0="log_driver = \"journald\""} 1' \ + /etc/containers/containers.conf \ + | sudo tee /etc/containers/containers.conf.tmp > /dev/null \ + && sudo mv /etc/containers/containers.conf.tmp /etc/containers/containers.conf + else + sudo sed -i '/^\[containers\]/a log_driver = "journald"' \ + /etc/containers/containers.conf + fi +else + printf '\n[containers]\nlog_driver = "journald"\n' \ + | sudo tee -a /etc/containers/containers.conf > /dev/null +fi +sudo chmod 644 /etc/containers/containers.conf +{{- end}} + +# Create an executor-specific containers.conf that adds a non-conflicting inner +# subnet for nested Netavark networks. The host containers.conf intentionally +# omits [network] so the host Podman bridge keeps its default 10.88.0.0/16. +# The executor copy adds default_subnet = 192.168.100.0/24 so that Netavark +# inside a privileged executor container creates a bridge in a different subnet, +# eliminating the duplicate-route conflict that breaks DNS in nested containers +# on Netavark-based hosts (RHEL 9 / ppc64le). +sudo cp /etc/containers/containers.conf /etc/containers/executor-containers.conf +printf '\n[network]\ndefault_subnet = "192.168.100.0/24"\n' \ + | sudo tee -a /etc/containers/executor-containers.conf > /dev/null +sudo chmod 644 /etc/containers/executor-containers.conf + +# Enable IP forwarding so Netavark can NAT containers through the host's +# network interface. Persist via sysctl.d so the setting survives reboots. +printf 'net.ipv4.ip_forward = 1\nnet.ipv4.conf.all.forwarding = 1\n' \ + | sudo tee /etc/sysctl.d/99-podman-ip-forward.conf > /dev/null +sudo sysctl -w net.ipv4.ip_forward=1 +sudo sysctl -w net.ipv4.conf.all.forwarding=1 + +# Ensure NAT masquerade is active for the Podman bridge subnet. +# On RHEL/firewalld systems, Netavark normally configures this, but +# 'podman system reset' can leave firewalld without the masquerade rule +# until the first container is actually created — too late for the runner +# to resolve DNS at job startup. We add the rule explicitly so it is in +# place before any job container tries to reach an external DNS server. +sudo iptables -t nat -A POSTROUTING \ + -s 10.88.0.0/16 ! -d 10.88.0.0/16 -j MASQUERADE 2>/dev/null || true +# On firewalld systems (RHEL/Fedora), enable masquerade permanently so it +# survives firewalld restarts and reboots, then reload to activate immediately. +sudo firewall-cmd --permanent --add-masquerade 2>/dev/null || true +sudo firewall-cmd --reload 2>/dev/null || true + # Register runner using docker executor backed by Podman # --docker-privileged is required for Podman: containers need CAP_SYS_ADMIN to mount /proc sudo gitlab-runner register \ --non-interactive \ --url "{{ .RepoURL }}" \ --token "{{ .Token }}" \ + --name "{{ .Name }}" \ --executor "docker" \ --docker-image "fedora:latest" \ --docker-host "unix:///run/podman/podman.sock" \ - --docker-privileged + --docker-privileged \ + "${_docker_dns_args[@]}" \ + --docker-volumes "/etc/containers/executor-containers.conf:/etc/containers/containers.conf:ro" {{- if not .Unsecure}} # Create a dedicated system user for running CI jobs diff --git a/pkg/integrations/gitlab/snippet-windows.ps1 b/pkg/integrations/gitlab/snippet-windows.ps1 index 2013c1bee..a83f3414a 100644 --- a/pkg/integrations/gitlab/snippet-windows.ps1 +++ b/pkg/integrations/gitlab/snippet-windows.ps1 @@ -7,6 +7,7 @@ C:\GitLab-Runner\gitlab-runner.exe register ` --non-interactive ` --url "{{ .RepoURL }}" ` --token "{{ .Token }}" ` + --name "{{ .Name }}" ` --executor "shell" # Install and start as Windows service diff --git a/pkg/integrations/gitlab/types.go b/pkg/integrations/gitlab/types.go index 1c8087b57..5cd5c0923 100644 --- a/pkg/integrations/gitlab/types.go +++ b/pkg/integrations/gitlab/types.go @@ -26,6 +26,7 @@ type GitLabRunnerArgs struct { Arch *Arch // Target architecture User string // OS user to run as (only used when Unsecure is true) AuthToken string // Runner authentication token (set by Pulumi during deployment) - Unsecure bool // When false (default) a dedicated gitlab-runner system user is created; when true the runner service runs as User - Concurrent int // Maximum number of concurrent jobs (written to config.toml; 0 means leave at default of 1) + Unsecure bool // When false (default) a dedicated gitlab-runner system user is created; when true the runner service runs as User + Concurrent int // Maximum number of concurrent jobs (written to config.toml; 0 means leave at default of 1) + LogToJournald bool // When true, sets Podman log_driver=journald so CI job output is captured by systemd journal for OTel correlation } diff --git a/pkg/integrations/integrations.go b/pkg/integrations/integrations.go index 7b59ceef7..7ec6153de 100644 --- a/pkg/integrations/integrations.go +++ b/pkg/integrations/integrations.go @@ -6,16 +6,17 @@ import ( ) type UserDataValues struct { - CliURL string - User string - Name string - Token string - Labels string - Port string - RepoURL string - Executor string - Unsecure bool - Concurrent int + CliURL string + User string + Name string + Token string + Labels string + Port string + RepoURL string + Executor string + Unsecure bool + Concurrent int + LogToJournald bool } type IntegrationConfig interface { diff --git a/pkg/integrations/otelcol/snippet-linux.sh b/pkg/integrations/otelcol/snippet-linux.sh index ec7038edc..4f308b567 100644 --- a/pkg/integrations/otelcol/snippet-linux.sh +++ b/pkg/integrations/otelcol/snippet-linux.sh @@ -143,10 +143,31 @@ receivers: - type: remove id: remove_file_name field: attributes["log.file.name"] + - type: regex_parser + id: parse_job_id + parse_from: body + regex: '\bjob=(?P\d+)' + on_error: send + - type: regex_parser + id: parse_runner_token + parse_from: body + regex: '\brunner=(?P\w+)' + on_error: send attributes: index: "{{.Index}}" _sourceCategory: gitlab-runner _sourceHost: ${env:HOSTNAME} + journald/gitlab-jobs: + operators: + - type: regex_parser + id: parse_container_name + parse_from: attributes["CONTAINER_NAME"] + regex: '^runner-(?P.+?)-project-(?P\d+)-concurrent-(?P\d+)-(?P\d+)$' + on_error: send + attributes: + index: "{{.Index}}" + _sourceCategory: gitlab-runner-jobs + _sourceHost: ${env:HOSTNAME} {{- end}} processors: filter/drop_null_bytes: @@ -185,7 +206,7 @@ service: level: "basic" pipelines: logs: - receivers: [filelog/syslog, filelog/secure, filelog/audit{{if .MonitorGitLabRunner}}, filelog/gitlab-runner{{end}}] + receivers: [filelog/syslog, filelog/secure, filelog/audit{{if .MonitorGitLabRunner}}, filelog/gitlab-runner, journald/gitlab-jobs{{end}}] processors: [filter/drop_null_bytes, resource, batch] exporters: [otlphttp] OTELEOF diff --git a/pkg/manager/context/context.go b/pkg/manager/context/context.go index bf21c5e33..4454b578e 100644 --- a/pkg/manager/context/context.go +++ b/pkg/manager/context/context.go @@ -191,7 +191,7 @@ func manageIntegration(c *Context, ca *ContextArgs) error { cirrus.Init(ca.CirrusPWArgs) } if ca.GLRunnerArgs != nil { - ca.GLRunnerArgs.Name = c.RunID() + ca.GLRunnerArgs.Name = c.ProjectName() gitlab.Init(ca.GLRunnerArgs) } return nil diff --git a/pkg/provider/ibmcloud/action/ibm-power/cloud-config b/pkg/provider/ibmcloud/action/ibm-power/cloud-config index 7088603ee..2053274ed 100644 --- a/pkg/provider/ibmcloud/action/ibm-power/cloud-config +++ b/pkg/provider/ibmcloud/action/ibm-power/cloud-config @@ -10,25 +10,31 @@ write_files: if mountpoint -q /home; then mkdir -p /home/containers/storage else + systemctl start multipathd 2>/dev/null || true DATA_DEV="" + _attempts=0 while true; do DATA_DEV=$(lsblk -rnpo NAME,TYPE | awk '$2=="mpath"{mpath[$1]=1} $2=="part"{p=$1; sub(/p?[0-9]+$/,"",p); has_part[p]=1} END{for(d in mpath) if(!has_part[d]&&d!~/control/) print d}' | head -1) [ -n "$DATA_DEV" ] && break + _attempts=$((_attempts + 1)) + [ "$_attempts" -ge 60 ] && break udevadm trigger --subsystem-match=block 2>/dev/null || true udevadm settle 2>/dev/null || true multipathd reconfigure 2>/dev/null || true sleep 10 done - udevadm settle 2>/dev/null || true - sleep 10 - mkfs.xfs -f -K "$DATA_DEV" - UUID=$(blkid -s UUID -o value "$DATA_DEV") - mkdir -p /mnt/home-tmp - cp -a /home/. /mnt/home-tmp/ - mount "$DATA_DEV" /home - cp -a /mnt/home-tmp/. /home/ - rm -rf /mnt/home-tmp - echo "UUID=$UUID /home xfs defaults 0 2" >> /etc/fstab + if [ -n "$DATA_DEV" ]; then + udevadm settle 2>/dev/null || true + sleep 10 + mkfs.xfs -f -K "$DATA_DEV" + UUID=$(blkid -s UUID -o value "$DATA_DEV") + mkdir -p /mnt/home-tmp + cp -a /home/. /mnt/home-tmp/ + mount "$DATA_DEV" /home + cp -a /mnt/home-tmp/. /home/ + rm -rf /mnt/home-tmp + echo "UUID=$UUID /home xfs defaults 0 2" >> /etc/fstab + fi mkdir -p /home/containers/storage fi if ! mountpoint -q /var/lib/containers/storage; then diff --git a/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go b/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go index 0adde3b7c..c0c65c1c5 100644 --- a/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go +++ b/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go @@ -192,6 +192,7 @@ func (r *pwRequest) deploy(ctx *pulumi.Context) error { } gateway := subnetInfo.Gateway localArgs := *glRunnerArgs + localArgs.LogToJournald = hasOtel piUserDataInput = authToken.ApplyT(func(token string) (*string, error) { localArgs.AuthToken = token glSnippet, err := integrations.GetIntegrationSnippetAsCloudInitWritableFile(&localArgs, defaultUser) @@ -229,6 +230,38 @@ func (r *pwRequest) deploy(ctx *pulumi.Context) error { return err } + // piv.ID() returns "cloudInstanceId/resourceId" — extract just the resource ID + splitID := func(id string) (string, error) { + if parts := strings.SplitN(id, "/", 2); len(parts) == 2 { + return parts[1], nil + } + return id, nil + } + // Create the data volume before the instance so we can pass it via + // PiVolumeIds at instance-creation time. This bypasses the separate + // PiVolumeAttach resource whose Terraform provider polls + // GET /volumes/{id} without retrying on HTTP 500, causing consistent + // failures on IBM Cloud PowerVS. Attaching via PiVolumeIds lets IBM + // Cloud handle the attachment internally during instance creation, which + // also gives the correct destroy order: instance is destroyed before + // volume, so IBM Cloud auto-detaches the volume before we delete it. + userTags := ibmcloudp.TagsAsStringArray(r.mCtx.GetTags()) + + piv, err := ibmcloud.NewPiVolume(ctx, + resourcesUtil.GetResourceName(*r.prefix, stackIBMPowerVS, "piv"), + &ibmcloud.PiVolumeArgs{ + PiCloudInstanceId: pulumi.String(r.workspaceID), + PiVolumeName: pulumi.String(r.mCtx.ProjectName()), + PiVolumeSize: pulumi.Float64(float64(r.diskSize)), + PiVolumeType: pulumi.String(r.storageType), + PiVolumeShareable: pulumi.Bool(false), + PiUserTags: userTags, + }) + if err != nil { + return err + } + pivVolumeId := piv.ID().ApplyT(splitID).(pulumi.StringOutput) + i, err := ibmcloud.NewPiInstance(ctx, resourcesUtil.GetResourceName(*r.prefix, stackIBMPowerVS, "pii"), &ibmcloud.PiInstanceArgs{ @@ -243,6 +276,8 @@ func (r *pwRequest) deploy(ctx *pulumi.Context) error { PiStorageType: pulumi.String(r.storageType), PiKeyPairName: pki.PiKeyName, PiUserData: piUserDataInput, + PiVolumeIds: pulumi.StringArray{pivVolumeId}, + PiUserTags: userTags, PiNetworks: ibmcloud.PiInstancePiNetworkArray{ &ibmcloud.PiInstancePiNetworkArgs{ NetworkId: pulumi.String(r.piPrivateSubnetID), @@ -253,40 +288,6 @@ func (r *pwRequest) deploy(ctx *pulumi.Context) error { return err } - // Both i.ID() and piv.ID() return "cloudInstanceId/resourceId" — extract just the resource ID - splitID := func(id string) (string, error) { - if parts := strings.SplitN(id, "/", 2); len(parts) == 2 { - return parts[1], nil - } - return id, nil - } - piInstanceId := i.ID().ApplyT(splitID).(pulumi.StringOutput) - piv, err := ibmcloud.NewPiVolume(ctx, - resourcesUtil.GetResourceName(*r.prefix, stackIBMPowerVS, "piv"), - &ibmcloud.PiVolumeArgs{ - PiCloudInstanceId: pulumi.String(r.workspaceID), - PiVolumeName: pulumi.String(r.mCtx.ProjectName()), - PiVolumeSize: pulumi.Float64(float64(r.diskSize)), - PiVolumeType: pulumi.String(r.storageType), - PiVolumeShareable: pulumi.Bool(false), - PiAffinityPolicy: pulumi.String("affinity"), - PiAffinityInstance: piInstanceId.ToStringPtrOutput(), - }) - if err != nil { - return err - } - pivVolumeId := piv.ID().ApplyT(splitID).(pulumi.StringOutput) - _, err = ibmcloud.NewPiVolumeAttach(ctx, - resourcesUtil.GetResourceName(*r.prefix, stackIBMPowerVS, "piva"), - &ibmcloud.PiVolumeAttachArgs{ - PiCloudInstanceId: pulumi.String(r.workspaceID), - PiInstanceId: piInstanceId, - PiVolumeId: pivVolumeId, - }) - if err != nil { - return err - } - ctx.Export(fmt.Sprintf("%s-%s", *r.prefix, outputUsername), pulumi.String(defaultUser)) // Use ExternalIp when available (pub-vlan network); fall back to IpAddress for private networks. ctx.Export(fmt.Sprintf("%s-%s", *r.prefix, outputHost), @@ -321,12 +322,14 @@ func (r *pwRequest) deployBastion(ctx *pulumi.Context) error { } name := fmt.Sprintf("%s-%s-bastion", *r.prefix, r.mCtx.ProjectName()) + userTags := ibmcloudp.TagsAsStringArray(r.mCtx.GetTags()) sg, err := network.NewSecurityGroupWithSSH(ctx, &network.SecurityGroupArgs{ Prefix: *r.prefix, ComponentID: bastionComponentID, Name: name, VPC: pulumi.String(subnetInfo.Vpc), + Tags: userTags, }) if err != nil { return err @@ -347,6 +350,7 @@ func (r *pwRequest) deployBastion(ctx *pulumi.Context) error { &ibmcloud.IsSshKeyArgs{ Name: pulumi.String(name), PublicKey: bpk.PublicKeyOpenssh, + Tags: userTags, }) if err != nil { return err @@ -369,6 +373,7 @@ func (r *pwRequest) deployBastion(ctx *pulumi.Context) error { Vpc: pulumi.String(subnetInfo.Vpc), Zone: pulumi.String(subnetInfo.Zone), Keys: pulumi.StringArray{bsshKey.ID()}, + Tags: userTags, PrimaryNetworkInterface: &ibmcloud.IsInstancePrimaryNetworkInterfaceArgs{ Subnet: pulumi.String(r.vpcPublicSubnetID), SecurityGroups: pulumi.StringArray{sg.ID()}, @@ -383,6 +388,7 @@ func (r *pwRequest) deployBastion(ctx *pulumi.Context) error { ComponentID: bastionComponentID, Name: name, Zone: pulumi.String(subnetInfo.Zone), + Tags: userTags, }) if err != nil { return err diff --git a/pkg/provider/ibmcloud/action/ibm-z/ibm-z.go b/pkg/provider/ibmcloud/action/ibm-z/ibm-z.go index 432baf915..c2719d94d 100644 --- a/pkg/provider/ibmcloud/action/ibm-z/ibm-z.go +++ b/pkg/provider/ibmcloud/action/ibm-z/ibm-z.go @@ -160,11 +160,14 @@ func (r *zRequest) deploy(ctx *pulumi.Context) error { return r.deployWithExistingSubnet(ctx) } zone := *r.zone + userTags := ibmcloudp.TagsAsStringArray(r.mCtx.GetTags()) + rg, err := ibmcloud.NewResourceGroup( ctx, resourcesUtil.GetResourceName(*r.prefix, stackIBMS390, "rg"), &ibmcloud.ResourceGroupArgs{ Name: pulumi.String(r.mCtx.ProjectName()), + Tags: userTags, }) if err != nil { return err @@ -176,11 +179,12 @@ func (r *zRequest) deploy(ctx *pulumi.Context) error { RG: rg, ComponentID: stackIBMS390, Name: fmt.Sprintf("%s-%s", *r.prefix, r.mCtx.ProjectName()), + Tags: userTags, }) if err != nil { return err } - pk, pik, err := isKey(ctx, r.mCtx, *r.prefix, stackIBMS390, rg) + pk, pik, err := isKey(ctx, r.mCtx, *r.prefix, stackIBMS390, rg, userTags) if err != nil { return err } @@ -203,6 +207,7 @@ func (r *zRequest) deploy(ctx *pulumi.Context) error { }, ResourceGroup: rg.ID(), Keys: pulumi.StringArray{pik.ID()}, + Tags: userTags, PrimaryNetworkInterface: &ibmcloud.IsInstancePrimaryNetworkInterfaceArgs{ Subnet: n.Subnet.ID(), SecurityGroups: pulumi.StringArray{ @@ -253,11 +258,14 @@ func (r *zRequest) deployWithExistingSubnet(ctx *pulumi.Context) error { return err } name := fmt.Sprintf("%s-%s", *r.prefix, r.mCtx.ProjectName()) + userTags := ibmcloudp.TagsAsStringArray(r.mCtx.GetTags()) + sg, err := network.NewSecurityGroupWithSSH(ctx, &network.SecurityGroupArgs{ Prefix: *r.prefix, ComponentID: stackIBMS390, Name: name, VPC: pulumi.String(subnetInfo.Vpc), + Tags: userTags, }) if err != nil { return err @@ -267,12 +275,13 @@ func (r *zRequest) deployWithExistingSubnet(ctx *pulumi.Context) error { ComponentID: stackIBMS390, Name: name, Zone: pulumi.String(subnetInfo.Zone), + Tags: userTags, }) if err != nil { return err } // rg is nil: the SSH key is placed in the account default resource group. - pk, pik, err := isKey(ctx, r.mCtx, *r.prefix, stackIBMS390, nil) + pk, pik, err := isKey(ctx, r.mCtx, *r.prefix, stackIBMS390, nil, userTags) if err != nil { return err } @@ -294,6 +303,7 @@ func (r *zRequest) deployWithExistingSubnet(ctx *pulumi.Context) error { Size: pulumi.Int(r.diskSize), }, Keys: pulumi.StringArray{pik.ID()}, + Tags: userTags, PrimaryNetworkInterface: &ibmcloud.IsInstancePrimaryNetworkInterfaceArgs{ Subnet: pulumi.String(*r.subnetID), SecurityGroups: pulumi.StringArray{sg.ID()}, @@ -362,6 +372,7 @@ func (r *zRequest) buildUserDataInput() (pulumi.StringPtrInput, error) { hasOtel := otelSet == 3 if r.glAuthToken != nil { localArgs := *r.glRunnerArgsCopy + localArgs.LogToJournald = hasOtel return r.glAuthToken.ApplyT(func(token string) (*string, error) { localArgs.AuthToken = token glSnippet, err := integrations.GetIntegrationSnippetAsCloudInitWritableFile(&localArgs, defaultUser) @@ -452,7 +463,7 @@ func manageResults(mCtx *mc.Context, stackResult auto.UpResult, prefix string) e // isKey creates a 4096-bit RSA TLS key pair and registers the public key as // an IBM Cloud VPC SSH key. Pass rg=nil to place the key in the account // default resource group. -func isKey(ctx *pulumi.Context, mCtx *mc.Context, prefix, cId string, rg *ibmcloud.ResourceGroup) (*tls.PrivateKey, *ibmcloud.IsSshKey, error) { +func isKey(ctx *pulumi.Context, mCtx *mc.Context, prefix, cId string, rg *ibmcloud.ResourceGroup, tags pulumi.StringArray) (*tls.PrivateKey, *ibmcloud.IsSshKey, error) { pk, err := tls.NewPrivateKey( ctx, resourcesUtil.GetResourceName(prefix, cId, "pk"), @@ -473,6 +484,7 @@ func isKey(ctx *pulumi.Context, mCtx *mc.Context, prefix, cId string, rg *ibmclo sshKeyArgs := &ibmcloud.IsSshKeyArgs{ Name: pulumi.String(mCtx.ProjectName()), PublicKey: pk.PublicKeyOpenssh, + Tags: tags, } if rg != nil { sshKeyArgs.ResourceGroup = rg.ID() diff --git a/pkg/provider/ibmcloud/ibmcloud.go b/pkg/provider/ibmcloud/ibmcloud.go index 7250453f7..ad2891345 100644 --- a/pkg/provider/ibmcloud/ibmcloud.go +++ b/pkg/provider/ibmcloud/ibmcloud.go @@ -7,6 +7,7 @@ import ( "os" "strings" + "github.com/pulumi/pulumi/sdk/v3/go/pulumi" "github.com/redhat-developer/mapt/pkg/manager" mc "github.com/redhat-developer/mapt/pkg/manager/context" "github.com/redhat-developer/mapt/pkg/manager/credentials" @@ -85,22 +86,26 @@ func requireEnv(name string) (string, error) { return v, nil } -func extractBucket(backedURL string) (string, error) { - u, err := url.Parse(backedURL) - if err != nil { - return "", fmt.Errorf("failed to parse backed URL %q: %w", backedURL, err) +func extractBucketAndPath(backedURL string) (bucket, path string, err error) { + u, parseErr := url.Parse(backedURL) + if parseErr != nil { + return "", "", fmt.Errorf("failed to parse backed URL %q: %w", backedURL, parseErr) } if strings.HasPrefix(backedURL, "s3://") { if u.Host == "" { - return "", fmt.Errorf("backed URL %q missing bucket name (expected s3://bucket-name)", backedURL) + return "", "", fmt.Errorf("backed URL %q missing bucket name (expected s3://bucket-name)", backedURL) } - return u.Host, nil + return u.Host, strings.TrimPrefix(u.Path, "/"), nil } - bucket := strings.TrimPrefix(u.Path, "/") - if bucket == "" { - return "", fmt.Errorf("backed URL %q missing bucket name in path (expected https:///)", backedURL) + rest := strings.TrimPrefix(u.Path, "/") + if rest == "" { + return "", "", fmt.Errorf("backed URL %q missing bucket name in path (expected https:///)", backedURL) } - return strings.SplitN(bucket, "/", 2)[0], nil + parts := strings.SplitN(rest, "/", 2) + if len(parts) == 2 { + return parts[0], parts[1], nil + } + return parts[0], "", nil } func initCOSBackend(backedURL string) (string, error) { @@ -122,13 +127,17 @@ func initCOSBackend(backedURL string) (string, error) { endpoint = fmt.Sprintf("s3.%s.cloud-object-storage.appdomain.cloud", region) } - bucket, err := extractBucket(backedURL) + bucket, path, err := extractBucketAndPath(backedURL) if err != nil { return "", err } + resolvedBase := bucket + if path != "" { + resolvedBase = bucket + "/" + path + } resolvedURL := fmt.Sprintf("s3://%s?endpoint=%s&s3ForcePathStyle=true", - bucket, endpoint) + resolvedBase, ensureHTTPS(endpoint)) for k, v := range map[string]string{ "AWS_ACCESS_KEY_ID": accessKey, @@ -212,6 +221,17 @@ func CleanupState(mCtx *mc.Context) error { return nil } +// TagsAsStringArray converts the map[string]string tags supplied via the +// --tags CLI flag to the key:value string format expected by IBM Cloud +// resource user tags (PiUserTags / Tags fields on Pulumi resources). +func TagsAsStringArray(tags map[string]string) pulumi.StringArray { + result := make(pulumi.StringArray, 0, len(tags)) + for k, v := range tags { + result = append(result, pulumi.String(fmt.Sprintf("%s:%s", k, v))) + } + return result +} + func Destroy(mCtx *mc.Context, stackName string) error { stack := manager.Stack{ StackName: mCtx.StackNameByProject(stackName), diff --git a/pkg/provider/ibmcloud/modules/network/network.go b/pkg/provider/ibmcloud/modules/network/network.go index 03c2dd6f9..3f1e0f67c 100644 --- a/pkg/provider/ibmcloud/modules/network/network.go +++ b/pkg/provider/ibmcloud/modules/network/network.go @@ -17,6 +17,7 @@ type NetworkArgs struct { Name string RG *ibmcloud.ResourceGroup Zone *string + Tags pulumi.StringArray } type Network struct { @@ -33,6 +34,7 @@ type SecurityGroupArgs struct { Name string VPC pulumi.StringInput RG *ibmcloud.ResourceGroup + Tags pulumi.StringArray } // NewSecurityGroupWithSSH creates a security group with inbound SSH (port 22) @@ -41,6 +43,7 @@ func NewSecurityGroupWithSSH(ctx *pulumi.Context, args *SecurityGroupArgs) (*ibm sgArgs := &ibmcloud.IsSecurityGroupArgs{ Name: pulumi.String(args.Name), Vpc: args.VPC, + Tags: args.Tags, } if args.RG != nil { sgArgs.ResourceGroup = args.RG.ID() @@ -85,6 +88,7 @@ type FloatingIPArgs struct { Name string Zone pulumi.StringInput RG *ibmcloud.ResourceGroup + Tags pulumi.StringArray } // NewFloatingIP creates an IBM Cloud VPC floating IP. @@ -93,6 +97,7 @@ func NewFloatingIP(ctx *pulumi.Context, args *FloatingIPArgs) (*ibmcloud.IsFloat fipArgs := &ibmcloud.IsFloatingIpArgs{ Name: pulumi.String(args.Name), Zone: args.Zone, + Tags: args.Tags, } if args.RG != nil { fipArgs.ResourceGroup = args.RG.ID() @@ -108,6 +113,7 @@ func New(ctx *pulumi.Context, args *NetworkArgs) (*Network, error) { &ibmcloud.IsVpcArgs{ Name: pulumi.String(args.Name), ResourceGroup: args.RG.ID(), + Tags: args.Tags, }) if err != nil { return nil, err @@ -131,6 +137,7 @@ func New(ctx *pulumi.Context, args *NetworkArgs) (*Network, error) { Zone: pulumi.String(*args.Zone), Ipv4CidrBlock: pulumi.String(cidrSN), ResourceGroup: args.RG.ID(), + Tags: args.Tags, }, pulumi.DependsOn([]pulumi.Resource{vpcap})) if err != nil { return nil, err @@ -142,6 +149,7 @@ func New(ctx *pulumi.Context, args *NetworkArgs) (*Network, error) { Vpc: vpc.ID(), Zone: pulumi.String(*args.Zone), ResourceGroup: args.RG.ID(), + Tags: args.Tags, }) if err != nil { return nil, err @@ -161,6 +169,7 @@ func New(ctx *pulumi.Context, args *NetworkArgs) (*Network, error) { Name: args.Name, VPC: vpc.ID(), RG: args.RG, + Tags: args.Tags, }) if err != nil { return nil, err @@ -171,6 +180,7 @@ func New(ctx *pulumi.Context, args *NetworkArgs) (*Network, error) { Name: args.Name, Zone: pulumi.String(*args.Zone), RG: args.RG, + Tags: args.Tags, }) if err != nil { return nil, err diff --git a/pkg/provider/ibmcloud/services/power/power.go b/pkg/provider/ibmcloud/services/power/power.go index 487b39ca0..b79161c94 100644 --- a/pkg/provider/ibmcloud/services/power/power.go +++ b/pkg/provider/ibmcloud/services/power/power.go @@ -68,6 +68,41 @@ func waitForInstance(mCtx *mc.Context, pc *v.IBMPIInstanceClient, instanceId str return fmt.Errorf("timed out waiting for instance %s to become ready", instanceId) } +// WaitForVolumeAvailable polls the IBM Cloud PowerVS API until the volume +// reaches "available" (or "in-use") state, retrying on 500 errors. This is +// necessary because the Terraform IBM Cloud provider's ibm_pi_volume_attach +// resource polls volume state via GET but does not retry on HTTP 500, causing +// spurious failures when the IBM Cloud backend returns a transient 500 shortly +// after volume creation. Running this wait before the Pulumi attachment resource +// is registered ensures the volume is in a stable state before the provider's +// own polling begins. +func WaitForVolumeAvailable(mCtx *mc.Context, cloudInstanceId, volumeId string) (string, error) { + vc, err := volumeClient(mCtx, cloudInstanceId) + if err != nil { + return "", err + } + for i := 0; i < 60; i++ { // up to ~10 minutes + vol, err := vc.Get(volumeId) + if err == nil { + switch vol.State { + case "available", "in-use": + logging.Infof("volume %s is in state %q, proceeding with attachment", volumeId, vol.State) + return volumeId, nil + default: + logging.Infof("volume %s state: %q, retrying in 10s...", volumeId, vol.State) + } + } else { + logging.Infof("volume %s GET returned error (retrying in 10s): %v", volumeId, err) + } + select { + case <-mCtx.Context().Done(): + return "", mCtx.Context().Err() + case <-time.After(10 * time.Second): + } + } + return "", fmt.Errorf("timed out waiting for volume %s to become available", volumeId) +} + func client(mCtx *mc.Context, cloudInstanceId string) (*v.IBMPIInstanceClient, error) { options := &ps.IBMPIOptions{ Authenticator: &core.IamAuthenticator{ @@ -85,6 +120,23 @@ func client(mCtx *mc.Context, cloudInstanceId string) (*v.IBMPIInstanceClient, e return v.NewIBMPIInstanceClient(mCtx.Context(), session, cloudInstanceId), nil } +func volumeClient(mCtx *mc.Context, cloudInstanceId string) (*v.IBMPIVolumeClient, error) { + options := &ps.IBMPIOptions{ + Authenticator: &core.IamAuthenticator{ + ApiKey: os.Getenv(icConstants.EnvIBMCloudAPIKey), + }, + UserAccount: os.Getenv(icConstants.EnvIBMCloudAccount), + Zone: os.Getenv("IC_ZONE"), + URL: powerURL(os.Getenv("IC_REGION")), + Debug: mCtx.Debug(), + } + session, err := ps.NewIBMPISession(options) + if err != nil { + return nil, err + } + return v.NewIBMPIVolumeClient(mCtx.Context(), session, cloudInstanceId), nil +} + func convertToPVMInstanceCreate(s *PowerArgs) *models.PVMInstanceCreate { return &models.PVMInstanceCreate{ ServerName: s.InstanceArgs.ServerName,