Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 179 additions & 0 deletions .pipelines/azure-pipeline-aks-extension-managed-ev2-sdp.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
#################################################################################
# OneBranch Pipelines #
# Documentation: https://aka.ms/obpipelines #
# Yaml Schema: https://aka.ms/obpipelines/yaml/schema #
# Reference doc: https://msazure.visualstudio.com/One/_wiki/wikis/One.wiki/ #
# 671961/Automating-release-of-new-versions-using-ev2 #
# #
# Purpose: Roll out new versions of the extension type #
# microsoft.azuremonitor.containers (Container Insights logs / ama-logs) #
# to AKS (Managed) clusters via the central Ev2 extension rollout artifacts. #
# #
# This is the ama-logs analogue of prometheus-collector's #
# .pipelines/azure-pipeline-aks-extension-managed-ev2-sdp.yml. It delivers the #
# unified extension Helm chart from MCR and lets the cluster-extension platform #
# (central SDP policy) auto-install/upgrade it on AKS managed clusters. #
# #
# ACTIVATION DEPENDENCY (P0, cross-team): #
# The ClusterConfig / Cluster-Extensions partner team must first register #
# `microsoft.azuremonitor.containers` as an AKS managed-cluster extension #
# type and create the AKS packageConfig + serviceGroup. Until then this #
# pipeline authors the rollout but cannot successfully run. All values marked #
# TODO(P0) below must be confirmed with that team before first run. DevSkim: ignore DS176209
#################################################################################

trigger: none

parameters:
- name: 'overrideExtensionVersion'
displayName: 'Override Extension Version (leave blank to use upstream build runName)'
type: string
default: ''

- name: 'releaseTrain'
displayName: 'Release Train'
type: string
default: 'preview'
values:
- preview
- stable

- name: 'rolloutType'
displayName: 'SDP rollout type'
type: string
default: 'normal'
values:
- normal
- emergency
- globaloutage

- name: 'overrideManagedValidationDuration'
displayName: 'Override standard SDP duration?'
type: boolean
default: false

- name: 'managedValidationDurationInHours'
displayName: 'Override standard SDP duration (in hours)'
type: number
default: 0

- name: 'icmIncidentId'
displayName: 'IcM Incident Id (required when rollout type is globaloutage)'
type: number
default: 0

resources:
repositories:
- repository: templates
type: git
name: OneBranch.Pipelines/GovernedTemplates
ref: refs/heads/main
pipelines:
# Upstream build pipeline that publishes the unified extension helm chart to MCR.
# runName is used as the extension/chart version unless overridden.
- pipeline: '_ci-build'
project: 'microsoft'
source: 'CDPX\docker-provider\ContainerInsights-MultiArch-MergedBranches'

variables:
# ---- Identifiers ---------------------------------------------------------
# Container Insights service tree id (same value used in the existing
# deployment/**/ServiceGroupRoot/ServiceModel.json and ci-*-release pipelines).
- name: serviceIdentifier
value: '3170cdd2-19f0-4027-912b-1027311691a2'

# Ev2 ServiceGroup registered with the extension platform team for AKS
# Managed central rollout of microsoft.azuremonitor.containers.
# TODO(P0): confirm exact name with ClusterConfig/Extensions partner team DevSkim: ignore DS176209
# (modelled on the metrics example:
# Microsoft.Azure.InfrastructureInsights.ContainerInsights.AzureMonitorMetrics).
- name: serviceGroup
value: 'Microsoft.Azure.InfrastructureInsights.ContainerInsights.AzureMonitorLogs'

# ---- Version selection ---------------------------------------------------
- ${{ if ne(parameters.overrideExtensionVersion, '') }}:
- name: extensionVersionValue
value: '${{ parameters.overrideExtensionVersion }}'
- ${{ if eq(parameters.overrideExtensionVersion, '') }}:
- name: extensionVersionValue
value: '$(resources.pipeline._ci-build.runName)'

# ---- ConfigurationOverrides (per cloud) ---------------------------------
# TODO(P0): packageConfig must be the AKS package config name the partner team DevSkim: ignore DS176209
# creates for microsoft.azuremonitor.containers (logs equivalent of
# Microsoft.AzureMonitor.Containers.Metrics-PromAks052926).
# userAssignedIdentity is the existing ci-prod ev2-agent-release MSI used by the
# current agent Ev2 release (deployment/.../Configurations.Public.Prod.json), and
# must be the MSI registered in the extension type registration `msiClientIds`.
- name: ProdConfigurationOverrides
value: |
{
"ConfigurationSpecification": {
"settings": {
"extensionTypeName": "microsoft.azuremonitor.containers",
"packageConfig": "TODO_P0_AKS_PACKAGE_CONFIG_NAME",
"helmChartUrl": "mcr.microsoft.com/azuremonitor/containerinsights/ciprod/ama-logs",
"userAssignedIdentity": "/subscriptions/30c56c3a-54da-46ea-b004-06eb33432687/resourceGroups/containerinsightsprod/providers/Microsoft.ManagedIdentity/userAssignedIdentities/ev2-agent-release",
"version": "$(extensionVersionValue)",
"releaseTrain": "${{ parameters.releaseTrain }}"
}
}
}

- name: LinuxContainerImage
value: 'mcr.microsoft.com/oss/go/microsoft/golang:1.24.2-fips-azurelinux3.0'
- name: RevisionCounter
value: $[counter('0', 1)]
- name: CustomBuildVersion
value: "Build - $(extensionVersionValue) Release - $(RevisionCounter) ReleaseTrain - ${{ parameters.releaseTrain }}"

# Disable showing the last commit message.
appendCommitMessageToRunName: false

extends:
template: v2/OneBranch.Official.CrossPlat.yml@templates
parameters:
ev2ManagedSdpRolloutConfig:
rolloutType: ${{ parameters.rolloutType }}
overrideManagedValidationDuration: ${{ parameters.overrideManagedValidationDuration }}
managedValidationOverrideDurationInHours: ${{ parameters.managedValidationDurationInHours }}
icmIncidentId: ${{ parameters.icmIncidentId }}

stages:
- stage: PROD_Managed_SDP
displayName: "PROD: Managed SDP"
variables:
ob_release_environment: Production
jobs:
- job: PROD_Managed_SDP
displayName: "PROD: Managed SDP"
pool:
type: release
steps:
- task: onebranch.pipeline.version@1
condition: ne(variables['CustomBuildVersion'], '')
displayName: "Setup BuildNumber"
inputs:
system: 'Custom'
customVersion: '$(CustomBuildVersion)'
- task: vsrm-ev2.ev2-rollout.ev2-rollout-task.Ev2RARollout@2
displayName: "Ev2 Managed SDP Rollout (Prod)"
inputs:
EndpointProviderType: ApprovalService
ApprovalServiceEnvironment: Production
TaskAction: CentralArtifactsRollout
CentralArtifactsServiceIdentifier: 2bbbdccc-fa4c-4f81-b9fd-12cd0234701c
CentralArtifactsPolicyName: CentralArtifactsPolicy
ServiceIdentifier: $(serviceIdentifier)
ServiceGroup: $(serviceGroup)
ConfigurationOverrides: $(ProdConfigurationOverrides)
# For first-run / canary testing of the pipeline itself,
# temporarily replace the line below with an explicit
# region filter, e.g.:
# Select: regions(eastus2euap)
# to roll out to the 2 EUAP regions only. Once validated,
# restore `regions(*)` so the central SDP policy controls
# the wave order (Canary -> Pilot -> ... -> HighAvailability).
Select: regions(centraluseuap,eastus2euap)
#Select: regions(*)
ArtifactsVersionOverride: $(extensionVersionValue)
77 changes: 77 additions & 0 deletions .pipelines/azure_pipeline_mergedbranches.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,83 @@ extends:
FileDirPath: '$(Build.ArtifactStagingDirectory)'
DisableRemediation: false
AcceptableOutdatedSignatureInHours: 72
# ---------------------------------------------------------------------------
# Package + push the AKS extension Helm chart (ama-logs) to MCR.
#
# Consumed by the central-artifacts Ev2 Managed-SDP rollout pipeline:
# .pipelines/azure-pipeline-aks-extension-managed-ev2-sdp.yml (AKS Managed)
#
# INCUBATION: gated on BUILD_EXTENSION_CHART=true so it does NOT affect normal
# builds until charts/azuremonitor-containers-extension is production-ready
# (see that chart's EXTENSION-MIGRATION.md). Set the pipeline variable to enable.
# ---------------------------------------------------------------------------
- job: package_extension_chart
displayName: "Package and push ama-logs extension helm chart"
condition: and(succeeded(), eq(variables['BUILD_EXTENSION_CHART'], 'true'))
dependsOn:
- common
- build_linux
- build_windows_multi_arc
variables:
linuxTelemetryTag: $[ dependencies.common.outputs['setup.linuxTelemetryTag'] ]
windowsTelemetryTag: $[ dependencies.common.outputs['setup.windowsTelemetryTag'] ]
Codeql.SkipTaskAutoInjection: true
# Extension chart repo path in ACR/MCR (mirrors the agent image namespace).
extensionChartAcrPath: 'public/azuremonitor/containerinsights/cidev/ama-logs'
templateContext:
outputs:
- output: pipelineArtifact
targetPath: '$(Build.ArtifactStagingDirectory)/extension-chart'
artifactName: extension-chart-drop
steps:
- task: HelmInstaller@1
displayName: Install Helm
inputs:
helmVersionToInstall: 3.19.0
- task: AzureCLI@2
displayName: "Package + push extension chart (AKS)"
inputs:
azureSubscription: ${{ variables.armServiceConnectionName }}
scriptType: bash
scriptLocation: inlineScript
inlineScript: |
set -euo pipefail
sudo apt-get update && sudo apt-get install -y gettext-base
mkdir -p $(Build.ArtifactStagingDirectory)/extension-chart

# Chart version must be SemVer2. Use the telemetry tag for release builds,
# otherwise a dev prerelease derived from the image tag.
CHART_VERSION="$(linuxTelemetryTag)"
if [ "$(IS_RELEASE)" != "True" ]; then
CHART_VERSION="0.0.0-$(echo "$(linuxTelemetryTag)" | tr '._/' '-' )"
fi
echo "Extension chart version: $CHART_VERSION"

cd charts/azuremonitor-containers-extension
export HELM_SEMVER="$CHART_VERSION"
export IMAGE_TAG="$(linuxTelemetryTag)"
export IMAGE_TAG_WINDOWS="$(windowsTelemetryTag)"
envsubst < Chart-template.yaml > Chart.yaml
envsubst < values-template.yaml > values.yaml

echo "=== helm lint ==="
helm lint .

helm package . --version "$CHART_VERSION"
cp ./azuremonitor-containers-extension-*.tgz $(Build.ArtifactStagingDirectory)/extension-chart/

# Only push on non-PR builds.
if [ "$(IS_PR)" == "True" ]; then
echo "PR build - skipping chart push to ACR."
exit 0
fi

export HELM_EXPERIMENTAL_OCI=1
az acr login -n ${{ variables.containerRegistry }}
CHART_TGZ=$(ls ./azuremonitor-containers-extension-*.tgz)
DEST="oci://${{ variables.containerRegistry }}.azurecr.io/$(extensionChartAcrPath)"
echo "Pushing $CHART_TGZ -> $DEST (version $CHART_VERSION)"
helm push "$CHART_TGZ" "$DEST"

- stage: Deploy_and_Test_Images_In_Dev_Clusters
displayName: Deploy and Test Images in Dev Clusters
Expand Down
63 changes: 63 additions & 0 deletions Documentation/AgentRelease/extension-release.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# ama-logs independent AKS extension-based release (WIP)

This document describes the in-progress migration of the Container Insights logs agent
(`ama-logs`) to an **independent, extension-based release on AKS** — owning the Safe
Deployment Process (SDP) to customer clusters ourselves, instead of releasing through the
AKS team. It mirrors prometheus-collector's `aks/extension-charts` work.

> Scope is **AKS only** for now. Arc continues to use the existing rollout
> (`deployment/arc-k8s-extension*`). Modernizing the Arc rollout to the same
> central-artifacts model is a possible later step, out of scope here.

## Why
Today the AKS logs agent ships via **AgentBaker** (image baked into the AKS node VHD) +
a version bump in **aks-rp**, then rolls out on **AKS's** release train. We do not control
the cadence or region waves. The target model delivers `ama-logs` as a **cluster extension
chart published to MCR**; the AKS cluster-extension platform auto-installs/upgrades it, and
**our** pipeline owns the version, cadence and SDP waves — the same model the metrics
(ama-metrics) agent already uses.

See the session migration analysis for the full background (two delivery models:
AgentBaker+RP vs MCR-chart+extension).

## Components added in this repo
| Artifact | Purpose |
|---|---|
| `charts/azuremonitor-containers-extension/` | New parallel Helm chart delivering `ama-logs` as an **AKS managed-cluster extension**. Existing `charts/azuremonitor-containers` is untouched. |
| `.pipelines/azure-pipeline-aks-extension-managed-ev2-sdp.yml` | Central-artifacts Ev2 Managed-SDP rollout of `microsoft.azuremonitor.containers` to **AKS Managed** clusters. |
| `package_extension_chart` job in `azure_pipeline_mergedbranches.yaml` | Packages + pushes the extension chart to MCR. Gated on `BUILD_EXTENSION_CHART=true` during incubation. |

## End-to-end flow (target)
1. Build (`azure_pipeline_mergedbranches.yaml`) builds multi-arch images and, with
`BUILD_EXTENSION_CHART=true`, packages + pushes the chart to MCR (`ama-logs`).
2. Images are promoted to prod MCR by the existing agent Ev2 release.
3. The AKS extension rollout pipeline registers the new chart version with the
cluster-extension platform via **central artifacts** (`Ev2RARollout@2` /
`CentralArtifactsRollout`), starting in canary (`centraluseuap,eastus2euap`).
4. The platform's **central SDP policy** advances the version region by region
(Canary -> Pilot -> ... -> HighAvailability) with bake times.
5. The in-cluster extension-manager pulls the chart from MCR and installs/upgrades the
agent automatically — no AgentBaker, no aks-rp version bump, no manual helm install.

## Per-release version handling
The rollout pipeline defaults the extension version to the upstream build `runName`
(`overrideExtensionVersion` to pin a specific validated build, e.g. for rollback). For
incubation off a feature branch you may pin a known-good `ci-prod` image tag, mirroring
prometheus-collector's `update-extension-dev` skill.

## Cross-team dependency (P0 — gates ACTIVATION, not authoring)
The **ClusterConfig / Cluster-Extensions partner team** must:
- register `microsoft.azuremonitor.containers` as an **AKS managed-cluster extension type**
(with our `ev2-agent-release` MSI in `msiClientIds`),
- create the AKS **`packageConfig`** name,
- confirm the **`serviceGroup`** name for central rollout.

All such values are marked `TODO(P0)` in the rollout pipeline. Note prometheus-collector
has the same open TODO for AKS — coordinate with the same partner-team contacts.

## Remaining repo work
- Complete the chart template rework (token adapter via values, prune base-chart Arc-only
resources, schedulability). See
`charts/azuremonitor-containers-extension/EXTENSION-MIGRATION.md`.
- After validation + P0: cut over, drop the AgentBaker PR + aks-rp version bump, and retire
the legacy CDPX build files (`.pipelines/pipeline.user.*.yml`, `pull-from-cdpx-*.sh`).
4 changes: 4 additions & 0 deletions charts/azuremonitor-containers-extension/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by envsubst from *-template.yaml at build time
Chart.yaml
values.yaml
*.tgz
21 changes: 21 additions & 0 deletions charts/azuremonitor-containers-extension/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*~
# Various IDEs
.project
.idea/
*.tmproj
29 changes: 29 additions & 0 deletions charts/azuremonitor-containers-extension/Chart-template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
apiVersion: v2
type: application
# appVersion is the agent image tag this chart deploys; injected at package time.
appVersion: "${IMAGE_TAG}"
description: Helm chart for deploying the Azure Monitor Container Insights (ama-logs) agent as a managed cluster extension on AKS (Managed) clusters
name: azuremonitor-containers-extension
# Chart version is decoupled from appVersion so the extension/chart version can be
# stamped independently at package time (envsubst). See azure_pipeline_mergedbranches.yaml.
version: ${HELM_SEMVER}
kubeVersion: "^1.10.0-0"
keywords:
- monitoring
- azuremonitor
- azure
- ama
- containerinsights
- logs
- containerhealth
- kubernetesmonitoring
- kubernetes
home: https://docs.microsoft.com/en-us/azure/monitoring/monitoring-container-health
icon: https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/img/azuremonitor-containers.svg
sources:
- https://github.com/microsoft/Docker-Provider/tree/ci_prod
maintainers:
- name: vishiy
email: visnara@microsoft.com
- name: ganga1980
email: gangams@microsoft.com
Loading
Loading