From 6eb60969d0cb6604db4919b29304b8db5be4c5a7 Mon Sep 17 00:00:00 2001 From: Koen Date: Thu, 19 Feb 2026 13:48:23 +0200 Subject: [PATCH 1/9] =?UTF-8?q?=F0=9F=92=9A=20automate=20indexer=20deploym?= =?UTF-8?q?ent?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/check-indexer-sync.yaml | 227 ++++++++++++++++++ .../workflows/deploy-indexer-bluegreen.yaml | 176 ++++++++++++++ atp-indexer/src/api/handlers/sync-status.ts | 82 +++++++ atp-indexer/src/api/index.ts | 2 + .../src/api/routes/sync-status.routes.ts | 11 + scripts/init-deployment-state.sh | 114 +++++++++ staking-dashboard/bootstrap.sh | 21 +- staking-dashboard/terraform/data.tf | 1 + staking-dashboard/terraform/main.tf | 118 ++++++++- 9 files changed, 729 insertions(+), 23 deletions(-) create mode 100644 .github/workflows/check-indexer-sync.yaml create mode 100644 .github/workflows/deploy-indexer-bluegreen.yaml create mode 100644 atp-indexer/src/api/handlers/sync-status.ts create mode 100644 atp-indexer/src/api/routes/sync-status.routes.ts create mode 100755 scripts/init-deployment-state.sh diff --git a/.github/workflows/check-indexer-sync.yaml b/.github/workflows/check-indexer-sync.yaml new file mode 100644 index 000000000..50b85f897 --- /dev/null +++ b/.github/workflows/check-indexer-sync.yaml @@ -0,0 +1,227 @@ +name: Check Indexer Sync & Switchover + +# Cron workflow that checks if a pending blue-green switchover is ready. +# Runs every 30 minutes. When no switchover is pending, exits in <10s. +# When a pending switchover's slave indexer reports synced: +# 1. Updates the frontend CloudFront's indexer origin to point to the new live backend +# 2. Updates the S3 deployment state +# 3. Triggers a deploy of the old live backend (so both end up updated) + +on: + schedule: + - cron: '*/30 * * * *' + workflow_dispatch: + inputs: + environment: + description: "Check specific environment only (leave empty for all)" + required: false + type: choice + options: + - "" + - testnet + - prod + +permissions: + id-token: write + contents: read + actions: write + +jobs: + check-and-switch: + runs-on: ubuntu-latest + strategy: + matrix: + environment: [testnet, prod] + # For manual runs targeting a specific env, skip others + if: >- + github.event_name == 'schedule' || + inputs.environment == '' || + inputs.environment == matrix.environment + environment: ${{ matrix.environment }} + env: + STATE_BUCKET: aztec-token-sale-terraform-state + AWS_REGION: ${{ secrets.AWS_DEFAULT_REGION }} + + steps: + - uses: actions/checkout@v4 + + - name: Configure AWS credentials with GitHub OIDC + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ vars.AWS_OIDC_ROLE_ARN }} + role-session-name: ${{ github.run_id }}-${{ matrix.environment }} + aws-region: ${{ secrets.AWS_DEFAULT_REGION }} + + - name: Read deployment state + id: state + run: | + STATE_KEY="deployment-state/${{ matrix.environment }}.json" + + if ! aws s3 cp "s3://${STATE_BUCKET}/${STATE_KEY}" /tmp/deploy-state.json 2>/dev/null; then + echo "No deployment state for ${{ matrix.environment }}, skipping" + echo "has_pending=false" >> $GITHUB_OUTPUT + exit 0 + fi + + PENDING=$(jq -r '.pending_switchover' /tmp/deploy-state.json) + if [ "$PENDING" = "null" ]; then + echo "No pending switchover for ${{ matrix.environment }}" + echo "has_pending=false" >> $GITHUB_OUTPUT + exit 0 + fi + + echo "has_pending=true" >> $GITHUB_OUTPUT + + TARGET_COLOR=$(jq -r '.pending_switchover.target_color' /tmp/deploy-state.json) + STARTED_AT=$(jq -r '.pending_switchover.started_at' /tmp/deploy-state.json) + LIVE_COLOR=$(jq -r '.live_color' /tmp/deploy-state.json) + TARGET_CF_DOMAIN=$(jq -r ".colors.${TARGET_COLOR}.cf_domain" /tmp/deploy-state.json) + FRONTEND_DIST_ID=$(jq -r '.frontend_distribution_id' /tmp/deploy-state.json) + + echo "target_color=$TARGET_COLOR" >> $GITHUB_OUTPUT + echo "live_color=$LIVE_COLOR" >> $GITHUB_OUTPUT + echo "target_cf_domain=$TARGET_CF_DOMAIN" >> $GITHUB_OUTPUT + echo "frontend_dist_id=$FRONTEND_DIST_ID" >> $GITHUB_OUTPUT + echo "started_at=$STARTED_AT" >> $GITHUB_OUTPUT + + echo "Pending switchover: $LIVE_COLOR → $TARGET_COLOR (since $STARTED_AT)" + + - name: Check timeout (2 hours) + if: steps.state.outputs.has_pending == 'true' + id: timeout + run: | + STARTED_AT="${{ steps.state.outputs.started_at }}" + STARTED_EPOCH=$(date -d "$STARTED_AT" +%s 2>/dev/null || date -j -f "%Y-%m-%dT%H:%M:%SZ" "$STARTED_AT" +%s) + NOW_EPOCH=$(date +%s) + ELAPSED=$(( NOW_EPOCH - STARTED_EPOCH )) + TIMEOUT=7200 # 2 hours + + if [ "$ELAPSED" -gt "$TIMEOUT" ]; then + echo "::error::Switchover timed out after $(( ELAPSED / 60 )) minutes" + echo "timed_out=true" >> $GITHUB_OUTPUT + + # Clear the pending switchover + jq '.pending_switchover = null' /tmp/deploy-state.json > /tmp/deploy-state-updated.json + aws s3 cp /tmp/deploy-state-updated.json \ + "s3://${STATE_BUCKET}/deployment-state/${{ matrix.environment }}.json" \ + --content-type "application/json" + else + echo "Elapsed: $(( ELAPSED / 60 )) minutes (timeout: $(( TIMEOUT / 60 )) minutes)" + echo "timed_out=false" >> $GITHUB_OUTPUT + fi + + - name: Check slave sync status + if: steps.state.outputs.has_pending == 'true' && steps.timeout.outputs.timed_out == 'false' + id: sync + run: | + TARGET_CF_DOMAIN="${{ steps.state.outputs.target_cf_domain }}" + SYNC_URL="https://${TARGET_CF_DOMAIN}/api/sync-status" + + echo "Checking sync status at: $SYNC_URL" + + HTTP_RESPONSE=$(curl -s -w "\n%{http_code}" --max-time 30 "$SYNC_URL" 2>/dev/null || echo -e "\n000") + HTTP_BODY=$(echo "$HTTP_RESPONSE" | head -n -1) + HTTP_CODE=$(echo "$HTTP_RESPONSE" | tail -n 1) + + echo "HTTP Status: $HTTP_CODE" + echo "Response: $HTTP_BODY" + + if [ "$HTTP_CODE" != "200" ]; then + echo "Sync endpoint not ready (HTTP $HTTP_CODE)" + echo "is_synced=false" >> $GITHUB_OUTPUT + exit 0 + fi + + IS_SYNCED=$(echo "$HTTP_BODY" | jq -r '.synced') + BEHIND=$(echo "$HTTP_BODY" | jq -r '.behindBlocks') + HAS_DATA=$(echo "$HTTP_BODY" | jq -r '.hasData') + + echo "Synced: $IS_SYNCED | Behind: $BEHIND blocks | Has data: $HAS_DATA" + + if [ "$IS_SYNCED" = "true" ]; then + echo "is_synced=true" >> $GITHUB_OUTPUT + else + echo "is_synced=false" >> $GITHUB_OUTPUT + fi + + - name: Switch frontend CloudFront indexer origin + if: steps.state.outputs.has_pending == 'true' && steps.timeout.outputs.timed_out == 'false' && steps.sync.outputs.is_synced == 'true' + run: | + FRONTEND_DIST_ID="${{ steps.state.outputs.frontend_dist_id }}" + NEW_ORIGIN="${{ steps.state.outputs.target_cf_domain }}" + + echo "Switching indexer origin on frontend CloudFront $FRONTEND_DIST_ID to: $NEW_ORIGIN" + + # Get current distribution config + aws cloudfront get-distribution-config --id "$FRONTEND_DIST_ID" --output json > /tmp/cf-config.json + ETAG=$(jq -r '.ETag' /tmp/cf-config.json) + + # Update the "indexerOrigin" origin's domain name (not the S3 origin) + jq --arg domain "$NEW_ORIGIN" \ + '(.DistributionConfig.Origins.Items[] | select(.Id == "indexerOrigin")).DomainName = $domain' \ + /tmp/cf-config.json | jq '.DistributionConfig' > /tmp/cf-config-updated.json + + # Apply the update + aws cloudfront update-distribution \ + --id "$FRONTEND_DIST_ID" \ + --distribution-config file:///tmp/cf-config-updated.json \ + --if-match "$ETAG" \ + --no-cli-pager + + # Invalidate /api/* cache + aws cloudfront create-invalidation \ + --distribution-id "$FRONTEND_DIST_ID" \ + --paths "/api/*" \ + --no-cli-pager + + echo "Frontend CloudFront indexer origin updated and /api/* cache invalidated" + + - name: Update deployment state + if: steps.state.outputs.has_pending == 'true' && steps.timeout.outputs.timed_out == 'false' && steps.sync.outputs.is_synced == 'true' + run: | + NEW_LIVE="${{ steps.state.outputs.target_color }}" + + jq --arg live "$NEW_LIVE" \ + '.live_color = $live | .pending_switchover = null' \ + /tmp/deploy-state.json > /tmp/deploy-state-updated.json + + echo "Updated state:" + cat /tmp/deploy-state-updated.json + + aws s3 cp /tmp/deploy-state-updated.json \ + "s3://${STATE_BUCKET}/deployment-state/${{ matrix.environment }}.json" \ + --content-type "application/json" + + - name: Trigger deploy to old live (now slave) + if: steps.state.outputs.has_pending == 'true' && steps.timeout.outputs.timed_out == 'false' && steps.sync.outputs.is_synced == 'true' + uses: actions/github-script@v7 + with: + script: | + const oldLive = '${{ steps.state.outputs.live_color }}'; + const env = '${{ matrix.environment }}'; + + console.log(`Triggering deploy to old live (${oldLive}) for ${env}`); + + await github.rest.actions.createWorkflowDispatch({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'deploy-indexer.yaml', + ref: 'main', + inputs: { + environment: env, + green: oldLive === 'green' ? 'true' : 'false', + dry_run: 'false' + } + }); + + console.log(`Deploy triggered for ${env} ${oldLive}`); + + - name: Write summary + if: steps.state.outputs.has_pending == 'true' && steps.timeout.outputs.timed_out == 'false' && steps.sync.outputs.is_synced == 'true' + run: | + echo "### Switchover Complete" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "- **Environment:** ${{ matrix.environment }}" >> $GITHUB_STEP_SUMMARY + echo "- **New live:** ${{ steps.state.outputs.target_color }}" >> $GITHUB_STEP_SUMMARY + echo "- **Old live (${{ steps.state.outputs.live_color }}):** deploy triggered to update" >> $GITHUB_STEP_SUMMARY + echo "- **Frontend CloudFront:** ${{ steps.state.outputs.frontend_dist_id }}" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/deploy-indexer-bluegreen.yaml b/.github/workflows/deploy-indexer-bluegreen.yaml new file mode 100644 index 000000000..c73180954 --- /dev/null +++ b/.github/workflows/deploy-indexer-bluegreen.yaml @@ -0,0 +1,176 @@ +name: Deploy Indexer (Blue-Green) + +# Automated blue-green deployment for the ATP indexer. +# Reads deployment state from S3 to determine which color is the slave, +# deploys the indexer to the slave, and marks a switchover as pending. +# The check-indexer-sync.yaml cron workflow handles the actual switchover +# once the slave finishes re-indexing. + +on: + workflow_dispatch: + inputs: + environment: + description: "Environment to deploy to" + required: true + default: "testnet" + type: choice + options: + - testnet + - prod + dry_run: + description: "Whether to run a dry run (plan only)" + required: false + default: false + type: boolean + force: + description: "Force deploy even if a switchover is already pending" + required: false + default: false + type: boolean + +permissions: + id-token: write + contents: read + actions: read + +jobs: + deploy-to-slave: + runs-on: ubuntu-latest + environment: ${{ inputs.environment }} + env: + ENV: ${{ inputs.environment }} + DRY_RUN: ${{ inputs.dry_run }} + FORCE: ${{ inputs.force }} + + # AWS Configuration + AWS_ACCOUNT: ${{ secrets.AWS_ACCOUNT }} + AWS_REGION: ${{ secrets.AWS_DEFAULT_REGION }} + + # Network Configuration + RPC_URL: ${{ secrets.RPC_URL }} + CHAIN_ID: ${{ vars.CHAIN_ID }} + SEPOLIA_RPC_URL: ${{ secrets.RPC_URL }} + TESTNET_RPC_URL: ${{ secrets.RPC_URL }} + + # Contract Addresses (from GitHub environment variables) + ATP_FACTORY_ADDRESS: ${{ vars.ATP_FACTORY_ADDRESS }} + ATP_FACTORY_AUCTION_ADDRESS: ${{ vars.ATP_FACTORY_AUCTION_ADDRESS }} + ATP_REGISTRY_ADDRESS: ${{ vars.ATP_REGISTRY_ADDRESS }} + ATP_REGISTRY_AUCTION_ADDRESS: ${{ vars.ATP_REGISTRY_AUCTION_ADDRESS }} + STAKING_REGISTRY_ADDRESS: ${{ vars.STAKING_REGISTRY_ADDRESS }} + ROLLUP_ADDRESS: ${{ vars.ROLLUP_ADDRESS }} + START_BLOCK: ${{ vars.ATP_FACTORY_DEPLOYMENT_BLOCK }} + + steps: + - uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + cache: 'yarn' + cache-dependency-path: atp-indexer/yarn.lock + + - name: Install Foundry + uses: foundry-rs/foundry-toolchain@v1 + + - name: Install Terraform + uses: hashicorp/setup-terraform@v3 + + - name: Mark repo as safe + run: git config --global --add safe.directory $GITHUB_WORKSPACE + + - name: Configure AWS credentials with GitHub OIDC + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ vars.AWS_OIDC_ROLE_ARN }} + role-session-name: ${{ github.run_id }} + aws-region: ${{ secrets.AWS_DEFAULT_REGION }} + + - name: Verify AWS identity + run: aws sts get-caller-identity + + - name: Read deployment state from S3 + id: state + run: | + STATE_KEY="deployment-state/${{ inputs.environment }}.json" + STATE_BUCKET="aztec-token-sale-terraform-state" + + if aws s3 cp "s3://${STATE_BUCKET}/${STATE_KEY}" /tmp/deploy-state.json 2>/dev/null; then + echo "Found existing deployment state" + cat /tmp/deploy-state.json + + LIVE_COLOR=$(jq -r '.live_color' /tmp/deploy-state.json) + PENDING=$(jq -r '.pending_switchover' /tmp/deploy-state.json) + + # Determine slave color + if [ "$LIVE_COLOR" = "red" ]; then + SLAVE_COLOR="green" + else + SLAVE_COLOR="red" + fi + + echo "live_color=$LIVE_COLOR" >> $GITHUB_OUTPUT + echo "slave_color=$SLAVE_COLOR" >> $GITHUB_OUTPUT + echo "has_pending=$([ "$PENDING" != "null" ] && echo 'true' || echo 'false')" >> $GITHUB_OUTPUT + else + echo "::error::No deployment state found. Run scripts/init-deployment-state.sh first." + exit 1 + fi + + - name: Check for pending switchover + if: steps.state.outputs.has_pending == 'true' && inputs.force == false + run: | + echo "::error::A switchover is already pending. Use force=true to override." + echo "Current state:" + cat /tmp/deploy-state.json + exit 1 + + - name: Deploy indexer to slave (${{ steps.state.outputs.slave_color }}) + working-directory: atp-indexer + run: | + SLAVE="${{ steps.state.outputs.slave_color }}" + echo "Deploying to ${{ inputs.environment }} ($SLAVE)" + + if [ "${{ inputs.environment }}" = "testnet" ]; then + if [ "$SLAVE" = "green" ]; then + ./bootstrap.sh deploy-testnet-green + else + ./bootstrap.sh deploy-testnet + fi + elif [ "${{ inputs.environment }}" = "prod" ]; then + if [ "$SLAVE" = "green" ]; then + ./bootstrap.sh deploy-prod-green + else + ./bootstrap.sh deploy-prod + fi + fi + + - name: Update deployment state with pending switchover + if: inputs.dry_run == false + run: | + SLAVE="${{ steps.state.outputs.slave_color }}" + NOW=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + + # Update state with pending switchover + jq --arg target "$SLAVE" \ + --arg started "$NOW" \ + --arg sha "${{ github.sha }}" \ + '.pending_switchover = { target_color: $target, started_at: $started, commit_sha: $sha }' \ + /tmp/deploy-state.json > /tmp/deploy-state-updated.json + + echo "Updated deployment state:" + cat /tmp/deploy-state-updated.json + + aws s3 cp /tmp/deploy-state-updated.json \ + "s3://aztec-token-sale-terraform-state/deployment-state/${{ inputs.environment }}.json" \ + --content-type "application/json" + + echo "### Deployment Summary" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "- **Environment:** ${{ inputs.environment }}" >> $GITHUB_STEP_SUMMARY + echo "- **Deployed to:** $SLAVE (slave)" >> $GITHUB_STEP_SUMMARY + echo "- **Live:** ${{ steps.state.outputs.live_color }}" >> $GITHUB_STEP_SUMMARY + echo "- **Commit:** ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "The check-indexer-sync cron will monitor and switch over once indexing completes." >> $GITHUB_STEP_SUMMARY diff --git a/atp-indexer/src/api/handlers/sync-status.ts b/atp-indexer/src/api/handlers/sync-status.ts new file mode 100644 index 000000000..ae072ab72 --- /dev/null +++ b/atp-indexer/src/api/handlers/sync-status.ts @@ -0,0 +1,82 @@ +import type { Context } from 'hono'; +import { db } from 'ponder:api'; +import { max, count } from 'drizzle-orm'; +import { deposit, provider, atpPosition } from 'ponder:schema'; +import { getPublicClient } from '../../utils/viem-client'; + +interface SyncStatusResponse { + synced: boolean; + indexedBlock: number; + chainHead: number; + behindBlocks: number; + hasData: boolean; + timestamp: string; +} + +const SYNC_THRESHOLD_BLOCKS = 50; + +/** + * Handle GET /api/sync-status + * Returns the indexer's sync status by comparing the latest indexed block to the chain head. + * Used by the blue-green deployment cron to determine when a slave indexer has caught up. + */ +export async function handleSyncStatus(c: Context): Promise { + try { + const client = getPublicClient(); + + const [ + chainHeadBlock, + depositMaxBlock, + providerMaxBlock, + atpMaxBlock, + providerCountResult, + ] = await Promise.all([ + client.getBlockNumber(), + db.select({ maxBlock: max(deposit.blockNumber) }).from(deposit), + db.select({ maxBlock: max(provider.blockNumber) }).from(provider), + db.select({ maxBlock: max(atpPosition.blockNumber) }).from(atpPosition), + db.select({ count: count() }).from(provider), + ]); + + const chainHead = Number(chainHeadBlock); + + // Take the highest block number across all tables + const maxBlocks = [ + depositMaxBlock[0]?.maxBlock, + providerMaxBlock[0]?.maxBlock, + atpMaxBlock[0]?.maxBlock, + ] + .filter((b): b is bigint => b !== null && b !== undefined) + .map(Number); + + const indexedBlock = maxBlocks.length > 0 ? Math.max(...maxBlocks) : 0; + const hasData = Number(providerCountResult[0].count) > 0; + const behindBlocks = chainHead - indexedBlock; + const synced = behindBlocks < SYNC_THRESHOLD_BLOCKS && hasData; + + const response: SyncStatusResponse = { + synced, + indexedBlock, + chainHead, + behindBlocks, + hasData, + timestamp: new Date().toISOString(), + }; + + return c.json(response); + } catch (error) { + console.error('Sync status check failed:', error); + return c.json( + { + synced: false, + indexedBlock: 0, + chainHead: 0, + behindBlocks: -1, + hasData: false, + timestamp: new Date().toISOString(), + error: 'Failed to check sync status', + }, + 500 + ); + } +} diff --git a/atp-indexer/src/api/index.ts b/atp-indexer/src/api/index.ts index 40a61972f..8e5dd11dc 100644 --- a/atp-indexer/src/api/index.ts +++ b/atp-indexer/src/api/index.ts @@ -6,6 +6,7 @@ import { healthRoutes } from './routes/health.routes'; import { providerRoutes } from './routes/provider.routes'; import { stakingRoutes } from './routes/staking.routes'; import { atpRoutes } from './routes/atp.routes'; +import { syncStatusRoutes } from './routes/sync-status.routes'; import { config } from '../config'; /** @@ -32,6 +33,7 @@ app.route('/api/health', healthRoutes); app.route('/api/providers', providerRoutes); app.route('/api/staking', stakingRoutes); app.route('/api/atp', atpRoutes); +app.route('/api/sync-status', syncStatusRoutes); app.notFound((c) => { return c.json({ error: 'Not found' }, 404); diff --git a/atp-indexer/src/api/routes/sync-status.routes.ts b/atp-indexer/src/api/routes/sync-status.routes.ts new file mode 100644 index 000000000..c1b461fff --- /dev/null +++ b/atp-indexer/src/api/routes/sync-status.routes.ts @@ -0,0 +1,11 @@ +import { Hono } from 'hono'; +import { handleSyncStatus } from '../handlers/sync-status'; +import { healthCheckLimiter } from '../middleware/rate-limit'; + +export const syncStatusRoutes = new Hono(); + +/** + * GET /api/sync-status + * Returns indexer sync status for blue-green deployment automation + */ +syncStatusRoutes.get('/', healthCheckLimiter, handleSyncStatus); diff --git a/scripts/init-deployment-state.sh b/scripts/init-deployment-state.sh new file mode 100755 index 000000000..b8da331de --- /dev/null +++ b/scripts/init-deployment-state.sh @@ -0,0 +1,114 @@ +#!/bin/bash +set -eu + +# Initialize the S3 deployment state files for blue-green indexer deployments. +# Run this once per environment before using the deploy-indexer-bluegreen workflow. +# +# Prerequisites: +# - AWS CLI configured with access to the terraform state bucket +# - Frontend staking-dashboard Terraform already applied +# - Both red and green indexer deployments exist +# +# Usage: +# ./scripts/init-deployment-state.sh +# +# Examples: +# ./scripts/init-deployment-state.sh testnet red +# ./scripts/init-deployment-state.sh prod red + +ROOT=$(git rev-parse --show-toplevel) +source "$ROOT/scripts/logging.sh" + +ENVIRONMENT=${1:-""} +LIVE_COLOR=${2:-""} +STATE_BUCKET="aztec-token-sale-terraform-state" + +if [ -z "$ENVIRONMENT" ] || [ -z "$LIVE_COLOR" ]; then + echo "Usage: $0 " + echo "" + echo " environment: testnet or prod" + echo " live_color: red or green (which color is currently serving traffic)" + echo "" + echo "Examples:" + echo " $0 testnet red" + echo " $0 prod red" + exit 1 +fi + +if [ "$ENVIRONMENT" != "testnet" ] && [ "$ENVIRONMENT" != "prod" ]; then + echo "Error: Environment must be 'testnet' or 'prod'" + exit 1 +fi + +if [ "$LIVE_COLOR" != "red" ] && [ "$LIVE_COLOR" != "green" ]; then + echo "Error: Live color must be 'red' or 'green'" + exit 1 +fi + +log_step "Initializing deployment state for $ENVIRONMENT (live: $LIVE_COLOR)" + +# Get CloudFront domain names from indexer terraform states +log_step "Reading red indexer CloudFront domain from terraform state..." +RED_STATE_KEY="${ENVIRONMENT}/backends/atp-indexer/terraform.tfstate" +RED_CF_DOMAIN=$(aws s3 cp "s3://${STATE_BUCKET}/${RED_STATE_KEY}" - | \ + jq -r '.outputs.cf_domain_name.value // empty') + +if [ -z "$RED_CF_DOMAIN" ]; then + echo "Error: Could not read red indexer CloudFront domain from state" + echo "Make sure the red indexer has been deployed for $ENVIRONMENT" + exit 1 +fi +echo " Red CF domain: $RED_CF_DOMAIN" + +log_step "Reading green indexer CloudFront domain from terraform state..." +GREEN_STATE_KEY="${ENVIRONMENT}-green/backends/atp-indexer/terraform.tfstate" +GREEN_CF_DOMAIN=$(aws s3 cp "s3://${STATE_BUCKET}/${GREEN_STATE_KEY}" - | \ + jq -r '.outputs.cf_domain_name.value // empty') + +if [ -z "$GREEN_CF_DOMAIN" ]; then + echo "Error: Could not read green indexer CloudFront domain from state" + echo "Make sure the green indexer has been deployed for $ENVIRONMENT" + exit 1 +fi +echo " Green CF domain: $GREEN_CF_DOMAIN" + +# Get frontend CloudFront distribution ID +log_step "Reading frontend CloudFront distribution ID from terraform state..." +FRONTEND_STATE_KEY="${ENVIRONMENT}-aztec-staking-dashboard/terraform.tfstate" +FRONTEND_DIST_ID=$(aws s3 cp "s3://${STATE_BUCKET}/${FRONTEND_STATE_KEY}" - | \ + jq -r '.outputs.staking_dashboard_distribution_id.value // empty') + +if [ -z "$FRONTEND_DIST_ID" ]; then + echo "Error: Could not read frontend distribution ID from state." + echo "Make sure the staking-dashboard Terraform has been applied for $ENVIRONMENT" + exit 1 +fi +echo " Frontend distribution ID: $FRONTEND_DIST_ID" + +# Create state file +STATE_FILE="/tmp/deploy-state-${ENVIRONMENT}.json" +cat > "$STATE_FILE" << EOF +{ + "live_color": "$LIVE_COLOR", + "frontend_distribution_id": "$FRONTEND_DIST_ID", + "colors": { + "red": { "cf_domain": "$RED_CF_DOMAIN" }, + "green": { "cf_domain": "$GREEN_CF_DOMAIN" } + }, + "pending_switchover": null +} +EOF + +echo "" +log_step "State file contents:" +cat "$STATE_FILE" +echo "" + +# Upload to S3 +STATE_KEY="deployment-state/${ENVIRONMENT}.json" +log_step "Uploading to s3://${STATE_BUCKET}/${STATE_KEY}..." +aws s3 cp "$STATE_FILE" "s3://${STATE_BUCKET}/${STATE_KEY}" --content-type "application/json" + +log_success "Deployment state initialized for $ENVIRONMENT (live: $LIVE_COLOR)" +echo "" +echo "You can now use the 'Deploy Indexer (Blue-Green)' workflow in GitHub Actions." diff --git a/staking-dashboard/bootstrap.sh b/staking-dashboard/bootstrap.sh index bf300d340..fb99e0c08 100755 --- a/staking-dashboard/bootstrap.sh +++ b/staking-dashboard/bootstrap.sh @@ -93,23 +93,20 @@ function update_env_file() { if [ -z "${VITE_API_HOST:-}" ]; then log_step "Updating VITE_API_HOST" - if [ "$environment" = "staging" ]; then + if [ "$environment" = "prod" ]; then + # Same-domain API — /api/* is routed to the live indexer by CloudFront. + # No need to distinguish red/green; the blue-green cron handles origin switching. + VITE_API_HOST="https://stake.aztec.network" + elif [ "$environment" = "testnet" ]; then + # Same-domain API for testnet + VITE_API_HOST="https://testnet.stake.aztec.network" + elif [ "$environment" = "staging" ]; then if [ "$green" = "green" ]; then - # staging green deployment VITE_API_HOST="https://d1lzkj24db7400.cloudfront.net" else - # staging red deployment VITE_API_HOST="https://d24imfdgeak2db.cloudfront.net" fi - elif [ "$environment" = "prod" ]; then - if [ "$green" = "green" ]; then - # prod green deployment - VITE_API_HOST="https://dgk9duhuxabbq.cloudfront.net" - else - # prod red deployment - VITE_API_HOST="https://d10cun7h2qqnvc.cloudfront.net" - fi - else + else VITE_API_HOST="http://localhost:42068" fi fi diff --git a/staking-dashboard/terraform/data.tf b/staking-dashboard/terraform/data.tf index 0e18ed5b5..e99b5dfc7 100644 --- a/staking-dashboard/terraform/data.tf +++ b/staking-dashboard/terraform/data.tf @@ -20,6 +20,7 @@ data "terraform_remote_state" "shared" { # Local references to backend service URLs locals { atp_indexer_url = "https://${data.terraform_remote_state.atp-indexer.outputs.cf_domain_name}" + atp_indexer_cf_domain = data.terraform_remote_state.atp-indexer.outputs.cf_domain_name cloudfront_logs_bucket = try(data.terraform_remote_state.shared.outputs.cloudfront_logs_bucket_domain_name, "") } diff --git a/staking-dashboard/terraform/main.tf b/staking-dashboard/terraform/main.tf index 7bfab8206..183a08967 100644 --- a/staking-dashboard/terraform/main.tf +++ b/staking-dashboard/terraform/main.tf @@ -180,20 +180,107 @@ resource "aws_cloudfront_function" "basic_auth_staking_dashboard" { }) } +# CloudFront function for SPA routing — rewrites non-file URIs to /index.html. +# This replaces the 404 custom_error_response so that API 404s pass through correctly +# (custom_error_response is distribution-wide and would swallow API errors). +resource "aws_cloudfront_function" "spa_routing" { + name = "${var.env}-aztec-staking-dashboard-spa-routing" + runtime = "cloudfront-js-2.0" + comment = "SPA routing: rewrite non-file paths to /index.html" + + code = <<-EOF + function handler(event) { + var request = event.request; + var uri = request.uri; + + // If the URI has a file extension (e.g. .js, .css, .png), serve it as-is. + // Otherwise rewrite to /index.html for SPA client-side routing. + if (!uri.includes('.')) { + request.uri = '/index.html'; + } + + return request; + } + EOF +} + +# CORS response headers policy for the /api/* behavior +resource "aws_cloudfront_response_headers_policy" "api_cors" { + name = "${var.env}-staking-dashboard-api-cors" + + cors_config { + access_control_allow_credentials = false + + access_control_allow_headers { + items = ["Content-Type", "Origin", "Accept", "X-Requested-With"] + } + + access_control_allow_methods { + items = ["GET", "OPTIONS", "HEAD"] + } + + access_control_allow_origins { + items = ["*"] + } + + access_control_expose_headers { + items = ["Content-Type"] + } + + access_control_max_age_sec = 86400 + origin_override = true + } +} + resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { enabled = true default_root_object = "index.html" web_acl_id = module.website_waf.web_acl_arn - + # Use custom domain with certificate aliases = var.env == "prod" ? ["stake.aztec.network"] : var.env == "testnet" ? ["testnet.stake.aztec.network"] : [] + # Origin 1: S3 bucket for static frontend assets origin { domain_name = aws_s3_bucket.staking_dashboard_bucket.bucket_regional_domain_name origin_id = "stakingDashboardS3Origin" origin_access_control_id = aws_cloudfront_origin_access_control.oac-staking-dashboard.id } + # Origin 2: Live indexer CloudFront (proxied for /api/* requests). + # The blue-green cron workflow updates this origin's domain via AWS CLI + # when switching between red/green indexers. + origin { + domain_name = local.atp_indexer_cf_domain + origin_id = "indexerOrigin" + + custom_origin_config { + http_port = 80 + https_port = 443 + origin_protocol_policy = "https-only" + origin_ssl_protocols = ["TLSv1.2"] + } + } + + # /api/* requests → indexer origin + ordered_cache_behavior { + path_pattern = "/api/*" + allowed_methods = ["DELETE", "GET", "HEAD", "OPTIONS", "PATCH", "POST", "PUT"] + cached_methods = ["GET", "HEAD", "OPTIONS"] + target_origin_id = "indexerOrigin" + + viewer_protocol_policy = "redirect-to-https" + + # CachingDisabled — the per-color indexer CloudFront handles caching + cache_policy_id = "4135ea2d-6df8-44a3-9df3-4b5a84be39ad" + + # AllViewer — forward all headers to origin + origin_request_policy_id = "216adef6-5c7f-47e4-b989-5492eafa07d3" + + response_headers_policy_id = aws_cloudfront_response_headers_policy.api_cors.id + } + + # Default: S3 static frontend assets default_cache_behavior { allowed_methods = ["GET", "HEAD", "OPTIONS"] cached_methods = ["GET", "HEAD"] @@ -208,10 +295,17 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { forward = "none" } } - + + # SPA routing: rewrite non-file paths to /index.html so client-side + # routing works on page refresh. This replaces the old 404 custom_error_response + # which was distribution-wide and would have swallowed API 404s. + function_association { + event_type = "viewer-request" + function_arn = aws_cloudfront_function.spa_routing.arn + } } - # Redirect to blocked.html for 403 errors + # Redirect to blocked.html for 403 errors (geo-blocking) custom_error_response { error_code = 403 response_code = 403 @@ -219,14 +313,9 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { error_caching_min_ttl = 0 } - # Redirect to index.html for 404 errors - # This is to handle the case where the user is on a route and refreshes the page - custom_error_response { - error_code = 404 - response_code = 200 - response_page_path = "/index.html" - error_caching_min_ttl = 0 - } + # NOTE: The 404 custom_error_response was removed because it's distribution-wide + # and would intercept API 404s (returning index.html instead of JSON errors). + # SPA routing is now handled by the spa_routing CloudFront Function above. restrictions { geo_restriction { @@ -250,6 +339,13 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { prefix = "frontend/staking-dashboard/" } } + + # The indexer origin domain is updated by the blue-green cron via AWS CLI. + # Ignore origin changes so Terraform doesn't revert the switchover. + # The S3 origin never changes so this is safe. + lifecycle { + ignore_changes = [origin] + } } # From 63f7e8c2de8db3a1fe214f7ad8bdac7aa8c10f2d Mon Sep 17 00:00:00 2001 From: Koen Date: Fri, 20 Feb 2026 12:09:33 +0200 Subject: [PATCH 2/9] =?UTF-8?q?=F0=9F=92=9A=20add=20dev=20&=20staging=20to?= =?UTF-8?q?=20CI?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/check-indexer-sync.yaml | 4 ++- .../workflows/deploy-indexer-bluegreen.yaml | 21 +++++------- .github/workflows/deploy-indexer.yaml | 19 +++-------- atp-indexer/bootstrap.sh | 33 ++++++++++++++++--- scripts/init-deployment-state.sh | 10 ++++-- 5 files changed, 52 insertions(+), 35 deletions(-) diff --git a/.github/workflows/check-indexer-sync.yaml b/.github/workflows/check-indexer-sync.yaml index 50b85f897..6e2d87885 100644 --- a/.github/workflows/check-indexer-sync.yaml +++ b/.github/workflows/check-indexer-sync.yaml @@ -18,6 +18,8 @@ on: type: choice options: - "" + - dev + - staging - testnet - prod @@ -31,7 +33,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - environment: [testnet, prod] + environment: [dev, staging, testnet, prod] # For manual runs targeting a specific env, skip others if: >- github.event_name == 'schedule' || diff --git a/.github/workflows/deploy-indexer-bluegreen.yaml b/.github/workflows/deploy-indexer-bluegreen.yaml index c73180954..54f21c725 100644 --- a/.github/workflows/deploy-indexer-bluegreen.yaml +++ b/.github/workflows/deploy-indexer-bluegreen.yaml @@ -15,6 +15,8 @@ on: default: "testnet" type: choice options: + - dev + - staging - testnet - prod dry_run: @@ -130,20 +132,13 @@ jobs: working-directory: atp-indexer run: | SLAVE="${{ steps.state.outputs.slave_color }}" - echo "Deploying to ${{ inputs.environment }} ($SLAVE)" + ENV="${{ inputs.environment }}" + echo "Deploying to ${ENV} ($SLAVE)" - if [ "${{ inputs.environment }}" = "testnet" ]; then - if [ "$SLAVE" = "green" ]; then - ./bootstrap.sh deploy-testnet-green - else - ./bootstrap.sh deploy-testnet - fi - elif [ "${{ inputs.environment }}" = "prod" ]; then - if [ "$SLAVE" = "green" ]; then - ./bootstrap.sh deploy-prod-green - else - ./bootstrap.sh deploy-prod - fi + if [ "$SLAVE" = "green" ]; then + ./bootstrap.sh "deploy-${ENV}-green" + else + ./bootstrap.sh "deploy-${ENV}" fi - name: Update deployment state with pending switchover diff --git a/.github/workflows/deploy-indexer.yaml b/.github/workflows/deploy-indexer.yaml index 0ab6d1dac..f492c559d 100644 --- a/.github/workflows/deploy-indexer.yaml +++ b/.github/workflows/deploy-indexer.yaml @@ -9,6 +9,8 @@ on: default: "testnet" type: choice options: + - dev + - staging - testnet - prod dry_run: @@ -94,19 +96,8 @@ jobs: - name: Deploy working-directory: atp-indexer run: | - if [ "$ENV" = "testnet" ]; then - if [ "$GREEN" = "true" ]; then - ./bootstrap.sh deploy-testnet-green - else - ./bootstrap.sh deploy-testnet - fi - elif [ "$ENV" = "prod" ]; then - if [ "$GREEN" = "true" ]; then - ./bootstrap.sh deploy-prod-green - else - ./bootstrap.sh deploy-prod - fi + if [ "$GREEN" = "true" ]; then + ./bootstrap.sh "deploy-${ENV}-green" else - echo "Unknown environment: $ENV" - exit 1 + ./bootstrap.sh "deploy-${ENV}" fi diff --git a/atp-indexer/bootstrap.sh b/atp-indexer/bootstrap.sh index 5d906d5fa..cb93a5b03 100755 --- a/atp-indexer/bootstrap.sh +++ b/atp-indexer/bootstrap.sh @@ -317,9 +317,16 @@ function deploy() { CHAIN_ID=11155111 chain_environment="sepolia_testnet" infra_parent_environment="dev" - fi - - if [ "$infra_environment" = "prod" ]; then + elif [ "$infra_environment" = "dev" ] || [ "$infra_environment" = "staging" ]; then + if [ -z "${RPC_URL:-}" ]; then + echo "Error: RPC_URL must be set" + exit 1 + fi + RPC_URL=$RPC_URL + CHAIN_ID=1 + chain_environment="prod" + infra_parent_environment="dev" + elif [ "$infra_environment" = "prod" ]; then if [ -z "${RPC_URL:-}" ]; then echo "Error: RPC_URL must be set" exit 1 @@ -421,6 +428,18 @@ case $ACTION in build) build ;; + deploy-dev) + deploy "dev" + ;; + deploy-dev-green) + deploy "dev" "-g" + ;; + deploy-staging) + deploy "staging" + ;; + deploy-staging-green) + deploy "staging" "-g" + ;; deploy-testnet) deploy "testnet" ;; @@ -439,8 +458,14 @@ case $ACTION in echo "Actions:" echo " dev Start development server" echo " build Install deps, generate providers & types" + echo " deploy-dev Deploy to dev" + echo " deploy-dev-green Deploy to dev (green)" + echo " deploy-staging Deploy to staging" + echo " deploy-staging-green Deploy to staging (green)" echo " deploy-testnet Deploy to testnet" - echo " deploy-prod Deploy to prod" + echo " deploy-testnet-green Deploy to testnet (green)" + echo " deploy-prod Deploy to prod" + echo " deploy-prod-green Deploy to prod (green)" echo " help Show this help" echo "" echo "Environments:" diff --git a/scripts/init-deployment-state.sh b/scripts/init-deployment-state.sh index b8da331de..2a00947f7 100755 --- a/scripts/init-deployment-state.sh +++ b/scripts/init-deployment-state.sh @@ -13,6 +13,8 @@ set -eu # ./scripts/init-deployment-state.sh # # Examples: +# ./scripts/init-deployment-state.sh dev red +# ./scripts/init-deployment-state.sh staging red # ./scripts/init-deployment-state.sh testnet red # ./scripts/init-deployment-state.sh prod red @@ -26,17 +28,19 @@ STATE_BUCKET="aztec-token-sale-terraform-state" if [ -z "$ENVIRONMENT" ] || [ -z "$LIVE_COLOR" ]; then echo "Usage: $0 " echo "" - echo " environment: testnet or prod" + echo " environment: dev, staging, testnet, or prod" echo " live_color: red or green (which color is currently serving traffic)" echo "" echo "Examples:" + echo " $0 dev red" + echo " $0 staging red" echo " $0 testnet red" echo " $0 prod red" exit 1 fi -if [ "$ENVIRONMENT" != "testnet" ] && [ "$ENVIRONMENT" != "prod" ]; then - echo "Error: Environment must be 'testnet' or 'prod'" +if [ "$ENVIRONMENT" != "dev" ] && [ "$ENVIRONMENT" != "staging" ] && [ "$ENVIRONMENT" != "testnet" ] && [ "$ENVIRONMENT" != "prod" ]; then + echo "Error: Environment must be 'dev', 'staging', 'testnet', or 'prod'" exit 1 fi From 0647aa400eb488ad3201fb62d993d07d2307c803 Mon Sep 17 00:00:00 2001 From: Koen Date: Fri, 20 Feb 2026 12:14:52 +0200 Subject: [PATCH 3/9] =?UTF-8?q?=F0=9F=92=9A=20fix=20if=20condition?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/check-indexer-sync.yaml | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/.github/workflows/check-indexer-sync.yaml b/.github/workflows/check-indexer-sync.yaml index 6e2d87885..2963a4cba 100644 --- a/.github/workflows/check-indexer-sync.yaml +++ b/.github/workflows/check-indexer-sync.yaml @@ -34,20 +34,32 @@ jobs: strategy: matrix: environment: [dev, staging, testnet, prod] - # For manual runs targeting a specific env, skip others - if: >- - github.event_name == 'schedule' || - inputs.environment == '' || - inputs.environment == matrix.environment environment: ${{ matrix.environment }} env: STATE_BUCKET: aztec-token-sale-terraform-state AWS_REGION: ${{ secrets.AWS_DEFAULT_REGION }} steps: + # For manual runs targeting a specific env, skip others + - name: Check if this environment is targeted + id: should-run + run: | + EVENT="${{ github.event_name }}" + TARGET="${{ inputs.environment }}" + CURRENT="${{ matrix.environment }}" + + if [ "$EVENT" = "schedule" ] || [ -z "$TARGET" ] || [ "$TARGET" = "$CURRENT" ]; then + echo "run=true" >> $GITHUB_OUTPUT + else + echo "Skipping $CURRENT (targeted: $TARGET)" + echo "run=false" >> $GITHUB_OUTPUT + fi + - uses: actions/checkout@v4 + if: steps.should-run.outputs.run == 'true' - name: Configure AWS credentials with GitHub OIDC + if: steps.should-run.outputs.run == 'true' uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: ${{ vars.AWS_OIDC_ROLE_ARN }} @@ -55,6 +67,7 @@ jobs: aws-region: ${{ secrets.AWS_DEFAULT_REGION }} - name: Read deployment state + if: steps.should-run.outputs.run == 'true' id: state run: | STATE_KEY="deployment-state/${{ matrix.environment }}.json" From 27a7df861e34180c76d215cdb496448886e7e882 Mon Sep 17 00:00:00 2001 From: Koen Date: Fri, 20 Feb 2026 13:39:27 +0200 Subject: [PATCH 4/9] =?UTF-8?q?=F0=9F=93=9D=20=20add=20deployment=20docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/check-indexer-sync.yaml | 6 +- .../workflows/deploy-indexer-bluegreen.yaml | 30 +-- DEPLOYMENT.md | 192 ++++++++++++++++++ atp-indexer/src/api/handlers/sync-status.ts | 2 +- 4 files changed, 211 insertions(+), 19 deletions(-) create mode 100644 DEPLOYMENT.md diff --git a/.github/workflows/check-indexer-sync.yaml b/.github/workflows/check-indexer-sync.yaml index 2963a4cba..57b2f7c46 100644 --- a/.github/workflows/check-indexer-sync.yaml +++ b/.github/workflows/check-indexer-sync.yaml @@ -2,7 +2,7 @@ name: Check Indexer Sync & Switchover # Cron workflow that checks if a pending blue-green switchover is ready. # Runs every 30 minutes. When no switchover is pending, exits in <10s. -# When a pending switchover's slave indexer reports synced: +# When a pending switchover's backup indexer reports synced: # 1. Updates the frontend CloudFront's indexer origin to point to the new live backend # 2. Updates the S3 deployment state # 3. Triggers a deploy of the old live backend (so both end up updated) @@ -125,7 +125,7 @@ jobs: echo "timed_out=false" >> $GITHUB_OUTPUT fi - - name: Check slave sync status + - name: Check backup sync status if: steps.state.outputs.has_pending == 'true' && steps.timeout.outputs.timed_out == 'false' id: sync run: | @@ -207,7 +207,7 @@ jobs: "s3://${STATE_BUCKET}/deployment-state/${{ matrix.environment }}.json" \ --content-type "application/json" - - name: Trigger deploy to old live (now slave) + - name: Trigger deploy to old live (now backup) if: steps.state.outputs.has_pending == 'true' && steps.timeout.outputs.timed_out == 'false' && steps.sync.outputs.is_synced == 'true' uses: actions/github-script@v7 with: diff --git a/.github/workflows/deploy-indexer-bluegreen.yaml b/.github/workflows/deploy-indexer-bluegreen.yaml index 54f21c725..b657671ec 100644 --- a/.github/workflows/deploy-indexer-bluegreen.yaml +++ b/.github/workflows/deploy-indexer-bluegreen.yaml @@ -1,10 +1,10 @@ name: Deploy Indexer (Blue-Green) # Automated blue-green deployment for the ATP indexer. -# Reads deployment state from S3 to determine which color is the slave, -# deploys the indexer to the slave, and marks a switchover as pending. +# Reads deployment state from S3 to determine which color is the backup, +# deploys the indexer to the backup, and marks a switchover as pending. # The check-indexer-sync.yaml cron workflow handles the actual switchover -# once the slave finishes re-indexing. +# once the backup finishes re-indexing. on: workflow_dispatch: @@ -36,7 +36,7 @@ permissions: actions: read jobs: - deploy-to-slave: + deploy-to-backup: runs-on: ubuntu-latest environment: ${{ inputs.environment }} env: @@ -105,15 +105,15 @@ jobs: LIVE_COLOR=$(jq -r '.live_color' /tmp/deploy-state.json) PENDING=$(jq -r '.pending_switchover' /tmp/deploy-state.json) - # Determine slave color + # Determine backup color if [ "$LIVE_COLOR" = "red" ]; then - SLAVE_COLOR="green" + BACKUP_COLOR="green" else - SLAVE_COLOR="red" + BACKUP_COLOR="red" fi echo "live_color=$LIVE_COLOR" >> $GITHUB_OUTPUT - echo "slave_color=$SLAVE_COLOR" >> $GITHUB_OUTPUT + echo "backup_color=$BACKUP_COLOR" >> $GITHUB_OUTPUT echo "has_pending=$([ "$PENDING" != "null" ] && echo 'true' || echo 'false')" >> $GITHUB_OUTPUT else echo "::error::No deployment state found. Run scripts/init-deployment-state.sh first." @@ -128,14 +128,14 @@ jobs: cat /tmp/deploy-state.json exit 1 - - name: Deploy indexer to slave (${{ steps.state.outputs.slave_color }}) + - name: Deploy indexer to backup (${{ steps.state.outputs.backup_color }}) working-directory: atp-indexer run: | - SLAVE="${{ steps.state.outputs.slave_color }}" + BACKUP="${{ steps.state.outputs.backup_color }}" ENV="${{ inputs.environment }}" - echo "Deploying to ${ENV} ($SLAVE)" + echo "Deploying to ${ENV} ($BACKUP)" - if [ "$SLAVE" = "green" ]; then + if [ "$BACKUP" = "green" ]; then ./bootstrap.sh "deploy-${ENV}-green" else ./bootstrap.sh "deploy-${ENV}" @@ -144,11 +144,11 @@ jobs: - name: Update deployment state with pending switchover if: inputs.dry_run == false run: | - SLAVE="${{ steps.state.outputs.slave_color }}" + BACKUP="${{ steps.state.outputs.backup_color }}" NOW=$(date -u +"%Y-%m-%dT%H:%M:%SZ") # Update state with pending switchover - jq --arg target "$SLAVE" \ + jq --arg target "$BACKUP" \ --arg started "$NOW" \ --arg sha "${{ github.sha }}" \ '.pending_switchover = { target_color: $target, started_at: $started, commit_sha: $sha }' \ @@ -164,7 +164,7 @@ jobs: echo "### Deployment Summary" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "- **Environment:** ${{ inputs.environment }}" >> $GITHUB_STEP_SUMMARY - echo "- **Deployed to:** $SLAVE (slave)" >> $GITHUB_STEP_SUMMARY + echo "- **Deployed to:** $BACKUP (backup)" >> $GITHUB_STEP_SUMMARY echo "- **Live:** ${{ steps.state.outputs.live_color }}" >> $GITHUB_STEP_SUMMARY echo "- **Commit:** ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 000000000..316edcced --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,192 @@ +# Deployment + +This repo contains two deployable components: + +1. **Staking Dashboard** (`staking-dashboard/`) — React frontend served from S3 via CloudFront +2. **ATP Indexer** (`atp-indexer/`) — Ponder blockchain indexer running on ECS Fargate + +Both are deployed per-environment (`dev`, `staging`, `testnet`, `prod`). The indexer has two instances per environment — **red** and **green** — to enable zero-downtime deployments. + +## Architecture + +``` + stake.aztec.network + │ + ┌──────┴──────┐ + │ CloudFront │ + │ Distribution│ + └──┬───────┬──┘ + │ │ + /static │ │ /api/* + │ │ + ┌──────┴──┐ ┌─┴─────────────┐ + │ S3 │ │ indexerOrigin │ ← points to live color + │ Bucket │ │ (CF domain) │ + └─────────┘ └──────┬────────┘ + │ + ┌────────────┴────────────┐ + │ │ + ┌──────┴──────┐ ┌───────┴─────┐ + │ Red CF │ │ Green CF │ + └──────┬──────┘ └──────┬──────┘ + ┌──────┴──────┐ ┌──────┴──────┐ + │ Red ALB │ │ Green ALB │ + └──────┬──────┘ └──────┴──────┘ + │ │ + Red ECS Green ECS + (indexer+server) (indexer+server) +``` + +The frontend CloudFront distribution has two origins: +- **S3** for static assets (default behavior) +- **indexerOrigin** for `/api/*` requests, pointing to whichever indexer color is live + +This means the frontend always uses its own domain for API calls (`/api/*`). Indexer switchovers only update the CloudFront origin — no frontend redeploy needed. + +## Environments + +| Environment | Chain | AWS Cluster | Domain | Branch restriction | +|-------------|----------|-------------|---------------------------------|--------------------| +| `dev` | Mainnet | dev | — | None (any PR) | +| `staging` | Mainnet | dev | — | None | +| `testnet` | Sepolia | dev | `testnet.stake.aztec.network` | None | +| `prod` | Mainnet | prod | `stake.aztec.network` | `main` only | + +Each environment requires a matching [GitHub environment](https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment) with the relevant secrets and variables (AWS credentials, RPC URL, contract addresses, etc.). + +To allow deploying `dev` from any PR branch, set its GitHub environment's **Deployment branches** to "All branches". + +## Deploying the Frontend + +**Workflow:** `Deploy Staking Dashboard` (`deploy-staking-dashboard.yaml`) + +Trigger manually from the Actions tab or push a tag: +``` +v1.0.0-testnet-dashboard +v1.0.0-prod-dashboard +``` + +This builds the React app, uploads to S3, and invalidates the CloudFront cache. No interaction with red/green — the frontend is a single static deployment. + +## Deploying the Indexer (Blue-Green) + +The indexer uses a two-phase blue-green deployment. When indexer code changes, the new version re-indexes from scratch (~30 minutes). Rather than having a GitHub Actions runner sit idle waiting, the deploy exits immediately and a cron job handles the switchover. + +### Phase 1: Deploy to Backup + +**Workflow:** `Deploy Indexer (Blue-Green)` (`deploy-indexer-bluegreen.yaml`) + +1. Reads deployment state from S3 to determine which color is **live** and which is **backup** +2. Deploys the indexer to the backup (Terraform + Docker + ECS) +3. Writes a `pending_switchover` to the S3 state file +4. Exits (~5–10 min total) + +Trigger manually from the Actions tab: +- **environment**: `dev` / `staging` / `testnet` / `prod` +- **dry_run**: Plan only, don't apply +- **force**: Override an existing pending switchover + +### Phase 2: Automatic Switchover + +**Workflow:** `Check Indexer Sync & Switchover` (`check-indexer-sync.yaml`) + +Runs on a cron every 30 minutes. For each environment with a `pending_switchover`: + +1. Hits `GET /api/sync-status` on the backup's CloudFront domain +2. If not synced yet → exits, retries next cron run +3. If synced → performs the switchover: + - Updates the frontend CloudFront's `indexerOrigin` to point to the new live color (via AWS CLI) + - Invalidates `/api/*` cache + - Updates the S3 deployment state (`live_color` = new color, `pending_switchover` = null) + - Triggers `Deploy ATP Indexer` for the old live (so both colors end up on the latest code) +4. If timed out (>2 hours) → clears the pending switchover and logs an error + +Can also be triggered manually to check a specific environment immediately. + +### Sync Status Endpoint + +`GET /api/sync-status` returns: + +```json +{ + "synced": true, + "indexedBlock": 21345678, + "chainHead": 21345680, + "behindBlocks": 2, + "hasData": true, + "timestamp": "2024-02-19T12:00:00Z" +} +``` + +The indexer is considered synced when `behindBlocks < 50` and `hasData` is true (at least one provider exists in the database). + +### Deploying to a Single Color (Manual) + +**Workflow:** `Deploy ATP Indexer` (`deploy-indexer.yaml`) + +Deploys to a specific color without blue-green orchestration. Used by the cron to update the old live, or for manual overrides: +- Set **green** = true to deploy the green instance, false for red + +Can also be triggered via tags: +``` +v1.0.0-testnet-indexer +v1.0.0-testnet-indexer-green +v1.0.0-prod-indexer +v1.0.0-prod-indexer-green +``` + +## S3 Deployment State + +Path: `s3://aztec-token-sale-terraform-state/deployment-state/{env}.json` + +```json +{ + "live_color": "red", + "frontend_distribution_id": "E1234567890", + "colors": { + "red": { "cf_domain": "d10cun7h2qqnvc.cloudfront.net" }, + "green": { "cf_domain": "dgk9duhuxabbq.cloudfront.net" } + }, + "pending_switchover": null +} +``` + +When a switchover is pending: +```json +{ + "pending_switchover": { + "target_color": "green", + "started_at": "2024-02-19T12:00:00Z", + "commit_sha": "abc123" + } +} +``` + +## Initial Setup + +Before using the blue-green workflow for an environment, run the init script once: + +```bash +./scripts/init-deployment-state.sh +``` + +This reads the CloudFront domains from Terraform state and creates the S3 deployment state file. Prerequisites: +- AWS CLI configured with access to the state bucket +- Both red and green indexer Terraform applied +- Frontend staking-dashboard Terraform applied + +## Terraform + +The frontend CloudFront distribution's `indexerOrigin` is managed with `lifecycle { ignore_changes = [origin] }` so that Terraform doesn't revert origin changes made by the blue-green cron via AWS CLI. + +SPA routing is handled by a CloudFront Function (`spa_routing`) on the default behavior's viewer-request event instead of a 404 `custom_error_response`, because `custom_error_response` is distribution-wide and would intercept API 404s. + +## Troubleshooting + +**Switchover stuck / timed out:** The cron clears pending switchovers after 2 hours. Check the backup's `/api/sync-status` endpoint directly. If the indexer is erroring, check ECS logs. + +**Switchover never triggers:** Verify the S3 state file has a `pending_switchover` set. The cron only runs every 30 minutes — trigger `Check Indexer Sync & Switchover` manually for faster feedback. + +**Wrong color is live:** Manually run `Deploy ATP Indexer` targeting the correct color, then update the S3 state file's `live_color` field directly. + +**Terraform wants to revert the origin:** The `lifecycle { ignore_changes = [origin] }` block should prevent this. If it's happening, check that the block is still present in `staking-dashboard/terraform/main.tf`. diff --git a/atp-indexer/src/api/handlers/sync-status.ts b/atp-indexer/src/api/handlers/sync-status.ts index ae072ab72..9921d81f7 100644 --- a/atp-indexer/src/api/handlers/sync-status.ts +++ b/atp-indexer/src/api/handlers/sync-status.ts @@ -18,7 +18,7 @@ const SYNC_THRESHOLD_BLOCKS = 50; /** * Handle GET /api/sync-status * Returns the indexer's sync status by comparing the latest indexed block to the chain head. - * Used by the blue-green deployment cron to determine when a slave indexer has caught up. + * Used by the blue-green deployment cron to determine when a backup indexer has caught up. */ export async function handleSyncStatus(c: Context): Promise { try { From 10c64bc885eedd61dcb779f0d72cc8ad089e55a1 Mon Sep 17 00:00:00 2001 From: Koen Date: Fri, 20 Feb 2026 14:21:58 +0200 Subject: [PATCH 5/9] =?UTF-8?q?=F0=9F=92=9A=20add=20domains=20for=20dev=20?= =?UTF-8?q?&=20staging?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../workflows/deploy-staking-dashboard.yaml | 36 +++--------- DEPLOYMENT.md | 4 +- staking-dashboard/bootstrap.sh | 55 +++++++++---------- staking-dashboard/terraform/main.tf | 4 +- 4 files changed, 39 insertions(+), 60 deletions(-) diff --git a/.github/workflows/deploy-staking-dashboard.yaml b/.github/workflows/deploy-staking-dashboard.yaml index 538827576..348c056f0 100644 --- a/.github/workflows/deploy-staking-dashboard.yaml +++ b/.github/workflows/deploy-staking-dashboard.yaml @@ -6,9 +6,11 @@ on: environment: description: "Environment to deploy to" required: true - default: "staging" + default: "dev" type: choice options: + - dev + - staging - testnet - prod dry_run: @@ -16,18 +18,13 @@ on: required: false default: false type: boolean - green: - description: "Whether to use the green indexer" - required: false - default: false - type: boolean push: tags: + - 'v*-dev-dashboard' + - 'v*-staging-dashboard' - 'v*-testnet-dashboard' - - 'v*-testnet-dashboard-green' - 'v*-prod-dashboard' - - 'v*-prod-dashboard-green' permissions: id-token: write @@ -37,10 +34,9 @@ permissions: jobs: deploy: runs-on: ubuntu-latest - environment: ${{ inputs.environment || (contains(github.ref, '-prod') && 'prod') || 'staging' }} + environment: ${{ inputs.environment || (contains(github.ref, '-prod') && 'prod') || (contains(github.ref, '-testnet') && 'testnet') || (contains(github.ref, '-staging') && 'staging') || 'dev' }} env: - ENV: ${{ inputs.environment || (contains(github.ref, '-prod') && 'prod') || 'staging' }} - GREEN: ${{ inputs.green || contains(github.ref, '-green') }} + ENV: ${{ inputs.environment || (contains(github.ref, '-prod') && 'prod') || (contains(github.ref, '-testnet') && 'testnet') || (contains(github.ref, '-staging') && 'staging') || 'dev' }} DRY_RUN: ${{ inputs.dry_run }} # AWS Configuration @@ -98,20 +94,4 @@ jobs: - name: Deploy working-directory: staking-dashboard - run: | - if [ "$ENV" = "testnet" ]; then - if [ "$GREEN" = "true" ]; then - ./bootstrap.sh deploy-testnet-green - else - ./bootstrap.sh deploy-testnet - fi - elif [ "$ENV" = "prod" ]; then - if [ "$GREEN" = "true" ]; then - ./bootstrap.sh deploy-prod-green - else - ./bootstrap.sh deploy-prod - fi - else - echo "Unknown environment: $ENV" - exit 1 - fi + run: ./bootstrap.sh "deploy-${ENV}" diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md index 316edcced..3cea53c42 100644 --- a/DEPLOYMENT.md +++ b/DEPLOYMENT.md @@ -47,8 +47,8 @@ This means the frontend always uses its own domain for API calls (`/api/*`). Ind | Environment | Chain | AWS Cluster | Domain | Branch restriction | |-------------|----------|-------------|---------------------------------|--------------------| -| `dev` | Mainnet | dev | — | None (any PR) | -| `staging` | Mainnet | dev | — | None | +| `dev` | Mainnet | dev | `dev.stake.aztec.network` | None (any PR) | +| `staging` | Mainnet | dev | `staging.stake.aztec.network` | None | | `testnet` | Sepolia | dev | `testnet.stake.aztec.network` | None | | `prod` | Mainnet | prod | `stake.aztec.network` | `main` only | diff --git a/staking-dashboard/bootstrap.sh b/staking-dashboard/bootstrap.sh index fb99e0c08..30d6641d9 100755 --- a/staking-dashboard/bootstrap.sh +++ b/staking-dashboard/bootstrap.sh @@ -97,15 +97,9 @@ function update_env_file() { # Same-domain API — /api/* is routed to the live indexer by CloudFront. # No need to distinguish red/green; the blue-green cron handles origin switching. VITE_API_HOST="https://stake.aztec.network" - elif [ "$environment" = "testnet" ]; then - # Same-domain API for testnet - VITE_API_HOST="https://testnet.stake.aztec.network" - elif [ "$environment" = "staging" ]; then - if [ "$green" = "green" ]; then - VITE_API_HOST="https://d1lzkj24db7400.cloudfront.net" - else - VITE_API_HOST="https://d24imfdgeak2db.cloudfront.net" - fi + elif [ "$environment" = "testnet" ] || [ "$environment" = "staging" ]; then + # Same-domain API for testnet/staging + VITE_API_HOST="https://${environment}.stake.aztec.network" else VITE_API_HOST="http://localhost:42068" fi @@ -377,9 +371,7 @@ function deploy() { CHAIN_ID=11155111 chain_environment="sepolia_testnet" VITE_EXPLORER_URL="https://sepolia.etherscan.io" - fi - - if [ "$environment" = "prod" ]; then + elif [ "$environment" = "dev" ] || [ "$environment" = "staging" ] || [ "$environment" = "prod" ]; then if [ -z "${RPC_URL:-}" ]; then echo "Error: RPC_URL environment variable must be set" exit 1 @@ -412,11 +404,11 @@ function deploy() { export TF_VAR_basic_auth_pass="${BASIC_AUTH_PASSWORD:-}" # Set parent environment for shared infrastructure - # testnet uses dev shared infrastructure - if [ "$environment" = "testnet" ]; then - export TF_VAR_env_parent="dev" + # Only prod uses the prod cluster; dev, staging, and testnet use dev + if [ "$environment" = "prod" ]; then + export TF_VAR_env_parent="prod" else - export TF_VAR_env_parent="$environment" + export TF_VAR_env_parent="dev" fi if [ "${DRY_RUN:-false}" = "true" ]; then @@ -436,19 +428,14 @@ function deploy() { # Apply the terraform configuration (cd terraform && terraform apply -auto-approve -var="indexer_deployment_suffix=$indexer_deployment_suffix") - # Get ATP indexer URL from terraform output, fallback to localhost if not available - ATP_INDEXER_URL=$(cd $WEBSITE_ROOT/terraform && terraform output -raw atp_indexer_url) - - # TODO: Remove this. - # Use dev-tn indexer URL for dev environment. - # This is because website for dev-tn cannot be deployed due to broken tfstate. - if [ "$environment" = "dev" ]; then - echo "WARNING:Using hardcoded dev-tn indexer URL" - ATP_INDEXER_URL="https://d1ibwybv6l4hzw.cloudfront.net" + # Same-domain API — /api/* is routed to the live indexer by CloudFront. + # No need to reference the indexer directly; the blue-green cron handles origin switching. + if [ "$environment" = "prod" ]; then + export VITE_API_HOST="https://stake.aztec.network" + else + export VITE_API_HOST="https://${environment}.stake.aztec.network" fi - - echo "ATP_INDEXER_URL: $ATP_INDEXER_URL" - export VITE_API_HOST="$ATP_INDEXER_URL" + echo "VITE_API_HOST: $VITE_API_HOST" export VITE_CHAIN_ID=$CHAIN_ID export VITE_RPC_URL=$RPC_URL @@ -493,6 +480,18 @@ case $ACTION in build) build ;; + deploy-dev) + deploy "dev" + ;; + deploy-dev-green) + deploy "dev" "-green" + ;; + deploy-staging) + deploy "staging" + ;; + deploy-staging-green) + deploy "staging" "-green" + ;; deploy-testnet) deploy "testnet" ;; diff --git a/staking-dashboard/terraform/main.tf b/staking-dashboard/terraform/main.tf index 183a08967..d7519fdfd 100644 --- a/staking-dashboard/terraform/main.tf +++ b/staking-dashboard/terraform/main.tf @@ -1,5 +1,5 @@ locals { - create_dns_record = var.env == "prod" || var.env == "testnet" ? true : false + create_dns_record = true } terraform { required_version = ">= 1.5.0" @@ -238,7 +238,7 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { web_acl_id = module.website_waf.web_acl_arn # Use custom domain with certificate - aliases = var.env == "prod" ? ["stake.aztec.network"] : var.env == "testnet" ? ["testnet.stake.aztec.network"] : [] + aliases = var.env == "prod" ? ["stake.aztec.network"] : ["${var.env}.stake.aztec.network"] # Origin 1: S3 bucket for static frontend assets origin { From 51ad74650a39f2a39f1abf91a7df70efd94d834d Mon Sep 17 00:00:00 2001 From: Koen Date: Fri, 20 Feb 2026 14:58:17 +0200 Subject: [PATCH 6/9] :green_heart: Fix db schema --- db-schemas.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/db-schemas.json b/db-schemas.json index 38bb99d1d..d39311d04 100644 --- a/db-schemas.json +++ b/db-schemas.json @@ -1,5 +1,7 @@ { "atp-indexer": { + "dev": "atp-indexer-dev-v01", + "staging": "atp-indexer-staging-v01", "testnet": "atp-indexer-testnet-v03", "prod": "atp-indexer-prod-v14" } From 5d8b685ce95db9d4166a459b9d01d82b9a33f5ff Mon Sep 17 00:00:00 2001 From: Koen Date: Fri, 20 Feb 2026 15:56:46 +0200 Subject: [PATCH 7/9] =?UTF-8?q?=F0=9F=92=9A=20bump=20dev=20db=20schema=20t?= =?UTF-8?q?o=202?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- db-schemas.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db-schemas.json b/db-schemas.json index d39311d04..717518f41 100644 --- a/db-schemas.json +++ b/db-schemas.json @@ -1,6 +1,6 @@ { "atp-indexer": { - "dev": "atp-indexer-dev-v01", + "dev": "atp-indexer-dev-v02", "staging": "atp-indexer-staging-v01", "testnet": "atp-indexer-testnet-v03", "prod": "atp-indexer-prod-v14" From ccd92bebdefe14115ced6eb193e14ef493d77299 Mon Sep 17 00:00:00 2001 From: Koen Date: Fri, 20 Feb 2026 16:16:54 +0200 Subject: [PATCH 8/9] =?UTF-8?q?=F0=9F=93=9D=20add=20comment=20to=20cloudfr?= =?UTF-8?q?ont?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- atp-indexer/terraform/app.tf | 1 + staking-dashboard/terraform/main.tf | 1 + 2 files changed, 2 insertions(+) diff --git a/atp-indexer/terraform/app.tf b/atp-indexer/terraform/app.tf index 54f6d13bf..55364eb26 100644 --- a/atp-indexer/terraform/app.tf +++ b/atp-indexer/terraform/app.tf @@ -528,6 +528,7 @@ resource "aws_cloudfront_response_headers_policy" "cors_policy" { resource "aws_cloudfront_distribution" "cf" { + comment = "ATP Indexer (${var.env}${var.deployment_suffix})" enabled = true default_root_object = "" web_acl_id = local.backend_waf_arn diff --git a/staking-dashboard/terraform/main.tf b/staking-dashboard/terraform/main.tf index d7519fdfd..437562d29 100644 --- a/staking-dashboard/terraform/main.tf +++ b/staking-dashboard/terraform/main.tf @@ -233,6 +233,7 @@ resource "aws_cloudfront_response_headers_policy" "api_cors" { } resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { + comment = "Staking Dashboard (${var.env}) — frontend + /api/* proxy to indexer" enabled = true default_root_object = "index.html" web_acl_id = module.website_waf.web_acl_arn From 07105c41e9f6de6625f6dfef4480e6a771614439 Mon Sep 17 00:00:00 2001 From: Koen Date: Fri, 20 Feb 2026 16:47:12 +0200 Subject: [PATCH 9/9] =?UTF-8?q?=F0=9F=92=9A=20=20comment=20for=20first=20d?= =?UTF-8?q?eploy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- staking-dashboard/terraform/main.tf | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/staking-dashboard/terraform/main.tf b/staking-dashboard/terraform/main.tf index 437562d29..ba2207c78 100644 --- a/staking-dashboard/terraform/main.tf +++ b/staking-dashboard/terraform/main.tf @@ -344,9 +344,13 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { # The indexer origin domain is updated by the blue-green cron via AWS CLI. # Ignore origin changes so Terraform doesn't revert the switchover. # The S3 origin never changes so this is safe. - lifecycle { - ignore_changes = [origin] - } + # + # IMPORTANT: For the first deploy to a new environment, comment out the + # lifecycle block below so Terraform can create the indexerOrigin. + # After the first successful apply, uncomment it. + # lifecycle { + # ignore_changes = [origin] + # } } #