feat: initial Claude Code configuration scaffold
Comprehensive Claude Code guidance system with: - 5 agents: tdd-guardian, code-reviewer, security-scanner, refactor-scan, dependency-audit - 18 skills covering languages (Python, TypeScript, Rust, Go, Java, C#), infrastructure (AWS, Azure, GCP, Terraform, Ansible, Docker/K8s, Database, CI/CD), testing (TDD, UI, Browser), and patterns (Monorepo, API Design, Observability) - 3 hooks: secret detection, auto-formatting, TDD git pre-commit - Strict TDD enforcement with 80%+ coverage requirements - Multi-model strategy: Opus for planning, Sonnet for execution (opusplan) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
632
.claude/skills/infrastructure/ansible/SKILL.md
Normal file
632
.claude/skills/infrastructure/ansible/SKILL.md
Normal file
@@ -0,0 +1,632 @@
|
||||
---
|
||||
name: ansible-automation
|
||||
description: Ansible configuration management with playbook patterns, roles, and best practices. Use when writing Ansible playbooks, roles, or inventory configurations.
|
||||
---
|
||||
|
||||
# Ansible Automation Skill
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
ansible/
|
||||
├── ansible.cfg
|
||||
├── inventory/
|
||||
│ ├── dev/
|
||||
│ │ ├── hosts.yml
|
||||
│ │ └── group_vars/
|
||||
│ │ ├── all.yml
|
||||
│ │ └── webservers.yml
|
||||
│ ├── staging/
|
||||
│ └── prod/
|
||||
├── playbooks/
|
||||
│ ├── site.yml # Main playbook
|
||||
│ ├── webservers.yml
|
||||
│ ├── databases.yml
|
||||
│ └── deploy.yml
|
||||
├── roles/
|
||||
│ ├── common/
|
||||
│ ├── nginx/
|
||||
│ ├── postgresql/
|
||||
│ └── app/
|
||||
├── group_vars/
|
||||
│ └── all.yml
|
||||
├── host_vars/
|
||||
└── files/
|
||||
```
|
||||
|
||||
## Configuration (ansible.cfg)
|
||||
|
||||
```ini
|
||||
[defaults]
|
||||
inventory = inventory/dev/hosts.yml
|
||||
roles_path = roles
|
||||
remote_user = ec2-user
|
||||
host_key_checking = False
|
||||
retry_files_enabled = False
|
||||
gathering = smart
|
||||
fact_caching = jsonfile
|
||||
fact_caching_connection = /tmp/ansible_facts
|
||||
fact_caching_timeout = 86400
|
||||
|
||||
# Security
|
||||
no_log = False
|
||||
display_skipped_hosts = False
|
||||
|
||||
[privilege_escalation]
|
||||
become = True
|
||||
become_method = sudo
|
||||
become_user = root
|
||||
become_ask_pass = False
|
||||
|
||||
[ssh_connection]
|
||||
pipelining = True
|
||||
control_path = /tmp/ansible-ssh-%%h-%%p-%%r
|
||||
```
|
||||
|
||||
## Inventory Patterns
|
||||
|
||||
### YAML Inventory (recommended)
|
||||
```yaml
|
||||
# inventory/dev/hosts.yml
|
||||
all:
|
||||
children:
|
||||
webservers:
|
||||
hosts:
|
||||
web1:
|
||||
ansible_host: 10.0.1.10
|
||||
web2:
|
||||
ansible_host: 10.0.1.11
|
||||
vars:
|
||||
nginx_port: 80
|
||||
app_port: 8000
|
||||
|
||||
databases:
|
||||
hosts:
|
||||
db1:
|
||||
ansible_host: 10.0.2.10
|
||||
postgresql_version: "15"
|
||||
|
||||
workers:
|
||||
hosts:
|
||||
worker[1:3]:
|
||||
ansible_host: "10.0.3.{{ item }}"
|
||||
|
||||
vars:
|
||||
ansible_user: ec2-user
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
```
|
||||
|
||||
### Dynamic Inventory (AWS)
|
||||
```yaml
|
||||
# inventory/aws_ec2.yml
|
||||
plugin: amazon.aws.aws_ec2
|
||||
regions:
|
||||
- eu-west-2
|
||||
filters:
|
||||
tag:Environment: dev
|
||||
instance-state-name: running
|
||||
keyed_groups:
|
||||
- key: tags.Role
|
||||
prefix: role
|
||||
- key: placement.availability_zone
|
||||
prefix: az
|
||||
hostnames:
|
||||
- private-ip-address
|
||||
compose:
|
||||
ansible_host: private_ip_address
|
||||
```
|
||||
|
||||
## Playbook Patterns
|
||||
|
||||
### Main Site Playbook
|
||||
```yaml
|
||||
# playbooks/site.yml
|
||||
---
|
||||
- name: Configure all hosts
|
||||
hosts: all
|
||||
become: true
|
||||
roles:
|
||||
- common
|
||||
|
||||
- name: Configure web servers
|
||||
hosts: webservers
|
||||
become: true
|
||||
roles:
|
||||
- nginx
|
||||
- app
|
||||
|
||||
- name: Configure databases
|
||||
hosts: databases
|
||||
become: true
|
||||
roles:
|
||||
- postgresql
|
||||
```
|
||||
|
||||
### Application Deployment
|
||||
```yaml
|
||||
# playbooks/deploy.yml
|
||||
---
|
||||
- name: Deploy application
|
||||
hosts: webservers
|
||||
become: true
|
||||
serial: "25%" # Rolling deployment
|
||||
max_fail_percentage: 25
|
||||
|
||||
vars:
|
||||
app_version: "{{ lookup('env', 'APP_VERSION') | default('latest') }}"
|
||||
|
||||
pre_tasks:
|
||||
- name: Verify deployment prerequisites
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- app_version is defined
|
||||
- app_version != ''
|
||||
fail_msg: "APP_VERSION must be set"
|
||||
|
||||
- name: Remove from load balancer
|
||||
ansible.builtin.uri:
|
||||
url: "{{ lb_api_url }}/deregister"
|
||||
method: POST
|
||||
body:
|
||||
instance_id: "{{ ansible_hostname }}"
|
||||
body_format: json
|
||||
delegate_to: localhost
|
||||
when: lb_api_url is defined
|
||||
|
||||
roles:
|
||||
- role: app
|
||||
vars:
|
||||
app_state: present
|
||||
|
||||
post_tasks:
|
||||
- name: Wait for application health check
|
||||
ansible.builtin.uri:
|
||||
url: "http://localhost:{{ app_port }}/health"
|
||||
status_code: 200
|
||||
register: health_check
|
||||
until: health_check.status == 200
|
||||
retries: 30
|
||||
delay: 5
|
||||
|
||||
- name: Add back to load balancer
|
||||
ansible.builtin.uri:
|
||||
url: "{{ lb_api_url }}/register"
|
||||
method: POST
|
||||
body:
|
||||
instance_id: "{{ ansible_hostname }}"
|
||||
body_format: json
|
||||
delegate_to: localhost
|
||||
when: lb_api_url is defined
|
||||
|
||||
handlers:
|
||||
- name: Restart application
|
||||
ansible.builtin.systemd:
|
||||
name: myapp
|
||||
state: restarted
|
||||
daemon_reload: true
|
||||
```
|
||||
|
||||
## Role Structure
|
||||
|
||||
### Role Layout
|
||||
```
|
||||
roles/app/
|
||||
├── defaults/
|
||||
│ └── main.yml # Default variables (lowest priority)
|
||||
├── vars/
|
||||
│ └── main.yml # Role variables (higher priority)
|
||||
├── tasks/
|
||||
│ ├── main.yml # Main task entry point
|
||||
│ ├── install.yml
|
||||
│ ├── configure.yml
|
||||
│ └── service.yml
|
||||
├── handlers/
|
||||
│ └── main.yml # Handlers for notifications
|
||||
├── templates/
|
||||
│ ├── app.conf.j2
|
||||
│ └── systemd.service.j2
|
||||
├── files/
|
||||
│ └── scripts/
|
||||
├── meta/
|
||||
│ └── main.yml # Role metadata and dependencies
|
||||
└── README.md
|
||||
```
|
||||
|
||||
### Role Tasks
|
||||
```yaml
|
||||
# roles/app/tasks/main.yml
|
||||
---
|
||||
- name: Include installation tasks
|
||||
ansible.builtin.include_tasks: install.yml
|
||||
tags:
|
||||
- install
|
||||
|
||||
- name: Include configuration tasks
|
||||
ansible.builtin.include_tasks: configure.yml
|
||||
tags:
|
||||
- configure
|
||||
|
||||
- name: Include service tasks
|
||||
ansible.builtin.include_tasks: service.yml
|
||||
tags:
|
||||
- service
|
||||
```
|
||||
|
||||
```yaml
|
||||
# roles/app/tasks/install.yml
|
||||
---
|
||||
- name: Create application user
|
||||
ansible.builtin.user:
|
||||
name: "{{ app_user }}"
|
||||
system: true
|
||||
shell: /bin/false
|
||||
home: "{{ app_home }}"
|
||||
create_home: true
|
||||
|
||||
- name: Create application directories
|
||||
ansible.builtin.file:
|
||||
path: "{{ item }}"
|
||||
state: directory
|
||||
owner: "{{ app_user }}"
|
||||
group: "{{ app_group }}"
|
||||
mode: "0755"
|
||||
loop:
|
||||
- "{{ app_home }}"
|
||||
- "{{ app_home }}/releases"
|
||||
- "{{ app_home }}/shared"
|
||||
- "{{ app_log_dir }}"
|
||||
|
||||
- name: Download application artifact
|
||||
ansible.builtin.get_url:
|
||||
url: "{{ app_artifact_url }}/{{ app_version }}/app.tar.gz"
|
||||
dest: "{{ app_home }}/releases/{{ app_version }}.tar.gz"
|
||||
checksum: "sha256:{{ app_checksum }}"
|
||||
register: download_result
|
||||
|
||||
- name: Extract application
|
||||
ansible.builtin.unarchive:
|
||||
src: "{{ app_home }}/releases/{{ app_version }}.tar.gz"
|
||||
dest: "{{ app_home }}/releases/{{ app_version }}"
|
||||
remote_src: true
|
||||
when: download_result.changed
|
||||
|
||||
- name: Link current release
|
||||
ansible.builtin.file:
|
||||
src: "{{ app_home }}/releases/{{ app_version }}"
|
||||
dest: "{{ app_home }}/current"
|
||||
state: link
|
||||
notify: Restart application
|
||||
```
|
||||
|
||||
### Role Handlers
|
||||
```yaml
|
||||
# roles/app/handlers/main.yml
|
||||
---
|
||||
- name: Restart application
|
||||
ansible.builtin.systemd:
|
||||
name: "{{ app_service_name }}"
|
||||
state: restarted
|
||||
daemon_reload: true
|
||||
|
||||
- name: Reload nginx
|
||||
ansible.builtin.systemd:
|
||||
name: nginx
|
||||
state: reloaded
|
||||
```
|
||||
|
||||
### Role Defaults
|
||||
```yaml
|
||||
# roles/app/defaults/main.yml
|
||||
---
|
||||
app_user: myapp
|
||||
app_group: myapp
|
||||
app_home: /opt/myapp
|
||||
app_port: 8000
|
||||
app_log_dir: /var/log/myapp
|
||||
app_service_name: myapp
|
||||
|
||||
# These should be overridden
|
||||
app_version: ""
|
||||
app_artifact_url: ""
|
||||
app_checksum: ""
|
||||
```
|
||||
|
||||
## Templates (Jinja2)
|
||||
|
||||
### Application Config
|
||||
```jinja2
|
||||
{# roles/app/templates/app.conf.j2 #}
|
||||
# Application Configuration
|
||||
# Managed by Ansible - DO NOT EDIT
|
||||
|
||||
[server]
|
||||
host = {{ app_bind_host | default('0.0.0.0') }}
|
||||
port = {{ app_port }}
|
||||
workers = {{ app_workers | default(ansible_processor_vcpus * 2) }}
|
||||
|
||||
[database]
|
||||
host = {{ db_host }}
|
||||
port = {{ db_port | default(5432) }}
|
||||
name = {{ db_name }}
|
||||
user = {{ db_user }}
|
||||
# Password from environment variable
|
||||
password_env = DB_PASSWORD
|
||||
|
||||
[logging]
|
||||
level = {{ app_log_level | default('INFO') }}
|
||||
file = {{ app_log_dir }}/app.log
|
||||
|
||||
{% if app_features is defined %}
|
||||
[features]
|
||||
{% for feature, enabled in app_features.items() %}
|
||||
{{ feature }} = {{ enabled | lower }}
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
```
|
||||
|
||||
### Systemd Service
|
||||
```jinja2
|
||||
{# roles/app/templates/systemd.service.j2 #}
|
||||
[Unit]
|
||||
Description={{ app_description | default('Application Service') }}
|
||||
After=network.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User={{ app_user }}
|
||||
Group={{ app_group }}
|
||||
WorkingDirectory={{ app_home }}/current
|
||||
ExecStart={{ app_home }}/current/bin/app serve
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
|
||||
# Environment
|
||||
Environment="PORT={{ app_port }}"
|
||||
Environment="LOG_LEVEL={{ app_log_level | default('INFO') }}"
|
||||
EnvironmentFile=-{{ app_home }}/shared/.env
|
||||
|
||||
# Security
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
ProtectSystem=strict
|
||||
ReadWritePaths={{ app_log_dir }} {{ app_home }}/shared
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
## Secrets Management with Vault
|
||||
|
||||
### Encrypting Variables
|
||||
```bash
|
||||
# Create encrypted file
|
||||
ansible-vault create group_vars/prod/vault.yml
|
||||
|
||||
# Edit encrypted file
|
||||
ansible-vault edit group_vars/prod/vault.yml
|
||||
|
||||
# Encrypt existing file
|
||||
ansible-vault encrypt group_vars/prod/secrets.yml
|
||||
|
||||
# Encrypt string for inline use
|
||||
ansible-vault encrypt_string 'mysecret' --name 'db_password'
|
||||
```
|
||||
|
||||
### Vault Variables Pattern
|
||||
```yaml
|
||||
# group_vars/prod/vault.yml (encrypted)
|
||||
vault_db_password: "supersecretpassword"
|
||||
vault_api_key: "api-key-here"
|
||||
|
||||
# group_vars/prod/vars.yml (plain, references vault)
|
||||
db_password: "{{ vault_db_password }}"
|
||||
api_key: "{{ vault_api_key }}"
|
||||
```
|
||||
|
||||
### Using Vault in Playbooks
|
||||
```bash
|
||||
# Run with vault password file
|
||||
ansible-playbook playbooks/site.yml --vault-password-file ~/.vault_pass
|
||||
|
||||
# Run with vault password prompt
|
||||
ansible-playbook playbooks/site.yml --ask-vault-pass
|
||||
|
||||
# Multiple vault IDs
|
||||
ansible-playbook playbooks/site.yml \
|
||||
--vault-id dev@~/.vault_pass_dev \
|
||||
--vault-id prod@~/.vault_pass_prod
|
||||
```
|
||||
|
||||
## Idempotency Best Practices
|
||||
|
||||
```yaml
|
||||
# GOOD: Idempotent - can run multiple times safely
|
||||
- name: Ensure package is installed
|
||||
ansible.builtin.apt:
|
||||
name: nginx
|
||||
state: present
|
||||
|
||||
- name: Ensure service is running
|
||||
ansible.builtin.systemd:
|
||||
name: nginx
|
||||
state: started
|
||||
enabled: true
|
||||
|
||||
- name: Ensure configuration file exists
|
||||
ansible.builtin.template:
|
||||
src: nginx.conf.j2
|
||||
dest: /etc/nginx/nginx.conf
|
||||
mode: "0644"
|
||||
notify: Reload nginx
|
||||
|
||||
# BAD: Not idempotent - will fail on second run
|
||||
- name: Add line to file
|
||||
ansible.builtin.shell: echo "export PATH=/app/bin:$PATH" >> /etc/profile
|
||||
# Use lineinfile instead!
|
||||
|
||||
# GOOD: Idempotent alternative
|
||||
- name: Add application to PATH
|
||||
ansible.builtin.lineinfile:
|
||||
path: /etc/profile.d/app.sh
|
||||
line: 'export PATH=/app/bin:$PATH'
|
||||
create: true
|
||||
mode: "0644"
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
```yaml
|
||||
- name: Deploy with error handling
|
||||
block:
|
||||
- name: Download artifact
|
||||
ansible.builtin.get_url:
|
||||
url: "{{ artifact_url }}"
|
||||
dest: /tmp/artifact.tar.gz
|
||||
|
||||
- name: Extract artifact
|
||||
ansible.builtin.unarchive:
|
||||
src: /tmp/artifact.tar.gz
|
||||
dest: /opt/app
|
||||
remote_src: true
|
||||
|
||||
rescue:
|
||||
- name: Log deployment failure
|
||||
ansible.builtin.debug:
|
||||
msg: "Deployment failed on {{ inventory_hostname }}"
|
||||
|
||||
- name: Send alert
|
||||
ansible.builtin.uri:
|
||||
url: "{{ slack_webhook }}"
|
||||
method: POST
|
||||
body:
|
||||
text: "Deployment failed on {{ inventory_hostname }}"
|
||||
body_format: json
|
||||
delegate_to: localhost
|
||||
|
||||
always:
|
||||
- name: Clean up temporary files
|
||||
ansible.builtin.file:
|
||||
path: /tmp/artifact.tar.gz
|
||||
state: absent
|
||||
```
|
||||
|
||||
## Conditionals and Loops
|
||||
|
||||
```yaml
|
||||
# Conditional execution
|
||||
- name: Install package (Debian)
|
||||
ansible.builtin.apt:
|
||||
name: nginx
|
||||
state: present
|
||||
when: ansible_os_family == "Debian"
|
||||
|
||||
- name: Install package (RedHat)
|
||||
ansible.builtin.yum:
|
||||
name: nginx
|
||||
state: present
|
||||
when: ansible_os_family == "RedHat"
|
||||
|
||||
# Loops
|
||||
- name: Create users
|
||||
ansible.builtin.user:
|
||||
name: "{{ item.name }}"
|
||||
groups: "{{ item.groups }}"
|
||||
state: present
|
||||
loop:
|
||||
- { name: deploy, groups: [wheel, docker] }
|
||||
- { name: monitoring, groups: [wheel] }
|
||||
|
||||
# Loop with dict
|
||||
- name: Configure services
|
||||
ansible.builtin.systemd:
|
||||
name: "{{ item.key }}"
|
||||
state: "{{ item.value.state }}"
|
||||
enabled: "{{ item.value.enabled }}"
|
||||
loop: "{{ services | dict2items }}"
|
||||
vars:
|
||||
services:
|
||||
nginx:
|
||||
state: started
|
||||
enabled: true
|
||||
postgresql:
|
||||
state: started
|
||||
enabled: true
|
||||
```
|
||||
|
||||
## Commands
|
||||
|
||||
```bash
|
||||
# Syntax check
|
||||
ansible-playbook playbooks/site.yml --syntax-check
|
||||
|
||||
# Dry run (check mode)
|
||||
ansible-playbook playbooks/site.yml --check
|
||||
|
||||
# Dry run with diff
|
||||
ansible-playbook playbooks/site.yml --check --diff
|
||||
|
||||
# Run playbook
|
||||
ansible-playbook playbooks/site.yml
|
||||
|
||||
# Run with specific inventory
|
||||
ansible-playbook -i inventory/prod/hosts.yml playbooks/site.yml
|
||||
|
||||
# Limit to specific hosts
|
||||
ansible-playbook playbooks/site.yml --limit webservers
|
||||
|
||||
# Run specific tags
|
||||
ansible-playbook playbooks/site.yml --tags "configure,service"
|
||||
|
||||
# Skip tags
|
||||
ansible-playbook playbooks/site.yml --skip-tags "install"
|
||||
|
||||
# Extra variables
|
||||
ansible-playbook playbooks/deploy.yml -e "app_version=1.2.3"
|
||||
|
||||
# Ad-hoc commands
|
||||
ansible webservers -m ping
|
||||
ansible all -m shell -a "uptime"
|
||||
ansible databases -m service -a "name=postgresql state=restarted" --become
|
||||
```
|
||||
|
||||
## Anti-Patterns to Avoid
|
||||
|
||||
```yaml
|
||||
# BAD: Using shell when module exists
|
||||
- name: Install package
|
||||
ansible.builtin.shell: apt-get install -y nginx
|
||||
|
||||
# GOOD: Use the appropriate module
|
||||
- name: Install package
|
||||
ansible.builtin.apt:
|
||||
name: nginx
|
||||
state: present
|
||||
|
||||
|
||||
# BAD: Hardcoded values
|
||||
- name: Create user
|
||||
ansible.builtin.user:
|
||||
name: deploy
|
||||
uid: 1001
|
||||
|
||||
# GOOD: Use variables
|
||||
- name: Create user
|
||||
ansible.builtin.user:
|
||||
name: "{{ deploy_user }}"
|
||||
uid: "{{ deploy_uid | default(omit) }}"
|
||||
|
||||
|
||||
# BAD: Secrets in plain text
|
||||
- name: Set database password
|
||||
ansible.builtin.lineinfile:
|
||||
path: /etc/app/config
|
||||
line: "DB_PASSWORD=mysecret" # NEVER!
|
||||
|
||||
# GOOD: Use vault
|
||||
- name: Set database password
|
||||
ansible.builtin.lineinfile:
|
||||
path: /etc/app/config
|
||||
line: "DB_PASSWORD={{ vault_db_password }}"
|
||||
```
|
||||
423
.claude/skills/infrastructure/aws/SKILL.md
Normal file
423
.claude/skills/infrastructure/aws/SKILL.md
Normal file
@@ -0,0 +1,423 @@
|
||||
---
|
||||
name: aws-services
|
||||
description: AWS service patterns, IAM best practices, and common architectures. Use when designing or implementing AWS infrastructure.
|
||||
---
|
||||
|
||||
# AWS Services Skill
|
||||
|
||||
## Common Architecture Patterns
|
||||
|
||||
### Web Application (ECS + RDS)
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ VPC │
|
||||
│ ┌─────────────────────────────────────────────────────────┐│
|
||||
│ │ Public Subnets ││
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ││
|
||||
│ │ │ ALB │ │ NAT GW │ ││
|
||||
│ │ └──────┬──────┘ └──────┬──────┘ ││
|
||||
│ └─────────┼───────────────────────────────────┼───────────┘│
|
||||
│ │ │ │
|
||||
│ ┌─────────┼───────────────────────────────────┼───────────┐│
|
||||
│ │ │ Private Subnets │ ││
|
||||
│ │ ┌──────▼──────┐ ┌───────▼─────┐ ││
|
||||
│ │ │ ECS Fargate │ │ RDS │ ││
|
||||
│ │ │ (Tasks) │───────────────────▶│ PostgreSQL │ ││
|
||||
│ │ └─────────────┘ └─────────────┘ ││
|
||||
│ └─────────────────────────────────────────────────────────┘│
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Serverless (Lambda + API Gateway)
|
||||
```
|
||||
┌────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ Route53 │────▶│ API Gateway │────▶│ Lambda │
|
||||
└────────────┘ └─────────────┘ └──────┬──────┘
|
||||
│
|
||||
┌──────────────────────────┼──────────────┐
|
||||
│ │ │
|
||||
┌──────▼─────┐ ┌─────────┐ ┌────▼────┐
|
||||
│ DynamoDB │ │ S3 │ │ Secrets │
|
||||
└────────────┘ └─────────┘ │ Manager │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
## IAM Best Practices
|
||||
|
||||
### Least Privilege Policy
|
||||
```json
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Sid": "AllowS3ReadSpecificBucket",
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"s3:GetObject",
|
||||
"s3:ListBucket"
|
||||
],
|
||||
"Resource": [
|
||||
"arn:aws:s3:::my-app-data-bucket",
|
||||
"arn:aws:s3:::my-app-data-bucket/*"
|
||||
]
|
||||
},
|
||||
{
|
||||
"Sid": "AllowSecretsAccess",
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"secretsmanager:GetSecretValue"
|
||||
],
|
||||
"Resource": [
|
||||
"arn:aws:secretsmanager:eu-west-2:123456789:secret:my-app/*"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Trust Policy (for ECS Tasks)
|
||||
```json
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Principal": {
|
||||
"Service": "ecs-tasks.amazonaws.com"
|
||||
},
|
||||
"Action": "sts:AssumeRole"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Cross-Account Access
|
||||
```json
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Principal": {
|
||||
"AWS": "arn:aws:iam::ACCOUNT_ID:role/CrossAccountRole"
|
||||
},
|
||||
"Action": "sts:AssumeRole",
|
||||
"Condition": {
|
||||
"StringEquals": {
|
||||
"sts:ExternalId": "unique-external-id"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Secrets Management
|
||||
|
||||
### Using Secrets Manager
|
||||
```python
|
||||
# Python - boto3
|
||||
import boto3
|
||||
import json
|
||||
|
||||
def get_secret(secret_name: str, region: str = "eu-west-2") -> dict:
|
||||
client = boto3.client("secretsmanager", region_name=region)
|
||||
response = client.get_secret_value(SecretId=secret_name)
|
||||
return json.loads(response["SecretString"])
|
||||
|
||||
# Usage
|
||||
db_creds = get_secret("myapp/prod/database")
|
||||
connection_string = f"postgresql://{db_creds['username']}:{db_creds['password']}@{db_creds['host']}/{db_creds['database']}"
|
||||
```
|
||||
|
||||
```typescript
|
||||
// TypeScript - AWS SDK v3
|
||||
import { SecretsManagerClient, GetSecretValueCommand } from "@aws-sdk/client-secrets-manager";
|
||||
|
||||
async function getSecret(secretName: string): Promise<Record<string, string>> {
|
||||
const client = new SecretsManagerClient({ region: "eu-west-2" });
|
||||
const command = new GetSecretValueCommand({ SecretId: secretName });
|
||||
const response = await client.send(command);
|
||||
|
||||
if (!response.SecretString) {
|
||||
throw new Error("Secret not found");
|
||||
}
|
||||
|
||||
return JSON.parse(response.SecretString);
|
||||
}
|
||||
```
|
||||
|
||||
### ECS Task with Secrets
|
||||
```json
|
||||
// Task definition
|
||||
{
|
||||
"containerDefinitions": [
|
||||
{
|
||||
"name": "app",
|
||||
"secrets": [
|
||||
{
|
||||
"name": "DATABASE_PASSWORD",
|
||||
"valueFrom": "arn:aws:secretsmanager:eu-west-2:123456789:secret:myapp/database:password::"
|
||||
},
|
||||
{
|
||||
"name": "API_KEY",
|
||||
"valueFrom": "arn:aws:secretsmanager:eu-west-2:123456789:secret:myapp/api-key"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## S3 Patterns
|
||||
|
||||
### Bucket Policy (Least Privilege)
|
||||
```json
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Sid": "AllowECSTaskAccess",
|
||||
"Effect": "Allow",
|
||||
"Principal": {
|
||||
"AWS": "arn:aws:iam::123456789:role/ecs-task-role"
|
||||
},
|
||||
"Action": [
|
||||
"s3:GetObject",
|
||||
"s3:PutObject"
|
||||
],
|
||||
"Resource": "arn:aws:s3:::my-bucket/uploads/*"
|
||||
},
|
||||
{
|
||||
"Sid": "DenyUnencryptedUploads",
|
||||
"Effect": "Deny",
|
||||
"Principal": "*",
|
||||
"Action": "s3:PutObject",
|
||||
"Resource": "arn:aws:s3:::my-bucket/*",
|
||||
"Condition": {
|
||||
"StringNotEquals": {
|
||||
"s3:x-amz-server-side-encryption": "AES256"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Presigned URLs
|
||||
```python
|
||||
import boto3
|
||||
from botocore.config import Config
|
||||
|
||||
def generate_presigned_url(bucket: str, key: str, expiration: int = 3600) -> str:
|
||||
"""Generate a presigned URL for S3 object access."""
|
||||
s3_client = boto3.client(
|
||||
"s3",
|
||||
config=Config(signature_version="s3v4"),
|
||||
region_name="eu-west-2"
|
||||
)
|
||||
|
||||
return s3_client.generate_presigned_url(
|
||||
"get_object",
|
||||
Params={"Bucket": bucket, "Key": key},
|
||||
ExpiresIn=expiration
|
||||
)
|
||||
```
|
||||
|
||||
## DynamoDB Patterns
|
||||
|
||||
### Single Table Design
|
||||
```python
|
||||
# Entity types in same table
|
||||
ENTITY_TYPES = {
|
||||
"USER": {"PK": "USER#", "SK": "PROFILE"},
|
||||
"ORDER": {"PK": "USER#", "SK": "ORDER#"},
|
||||
"PRODUCT": {"PK": "PRODUCT#", "SK": "DETAILS"},
|
||||
}
|
||||
|
||||
# Access patterns
|
||||
def get_user(user_id: str) -> dict:
|
||||
return table.get_item(
|
||||
Key={"PK": f"USER#{user_id}", "SK": "PROFILE"}
|
||||
)["Item"]
|
||||
|
||||
def get_user_orders(user_id: str) -> list:
|
||||
response = table.query(
|
||||
KeyConditionExpression="PK = :pk AND begins_with(SK, :sk)",
|
||||
ExpressionAttributeValues={
|
||||
":pk": f"USER#{user_id}",
|
||||
":sk": "ORDER#"
|
||||
}
|
||||
)
|
||||
return response["Items"]
|
||||
```
|
||||
|
||||
## Lambda Patterns
|
||||
|
||||
### Handler with Error Handling
|
||||
```python
|
||||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
def handler(event: dict, context: Any) -> dict:
|
||||
"""Lambda handler with proper error handling."""
|
||||
try:
|
||||
logger.info("Processing event", extra={"event": event})
|
||||
|
||||
# Process request
|
||||
body = json.loads(event.get("body", "{}"))
|
||||
result = process_request(body)
|
||||
|
||||
return {
|
||||
"statusCode": 200,
|
||||
"headers": {"Content-Type": "application/json"},
|
||||
"body": json.dumps(result)
|
||||
}
|
||||
|
||||
except ValidationError as e:
|
||||
logger.warning("Validation error", extra={"error": str(e)})
|
||||
return {
|
||||
"statusCode": 400,
|
||||
"body": json.dumps({"error": str(e)})
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Unexpected error")
|
||||
return {
|
||||
"statusCode": 500,
|
||||
"body": json.dumps({"error": "Internal server error"})
|
||||
}
|
||||
```
|
||||
|
||||
### Cold Start Optimization
|
||||
```python
|
||||
# Initialize outside handler (runs once per container)
|
||||
import boto3
|
||||
|
||||
# These persist across invocations
|
||||
dynamodb = boto3.resource("dynamodb")
|
||||
table = dynamodb.Table("my-table")
|
||||
secrets_client = boto3.client("secretsmanager")
|
||||
|
||||
# Cache secrets
|
||||
_cached_secrets = {}
|
||||
|
||||
def get_cached_secret(name: str) -> dict:
|
||||
if name not in _cached_secrets:
|
||||
response = secrets_client.get_secret_value(SecretId=name)
|
||||
_cached_secrets[name] = json.loads(response["SecretString"])
|
||||
return _cached_secrets[name]
|
||||
|
||||
def handler(event, context):
|
||||
# Use cached resources
|
||||
secret = get_cached_secret("my-secret")
|
||||
# ...
|
||||
```
|
||||
|
||||
## CloudWatch Patterns
|
||||
|
||||
### Structured Logging
|
||||
```python
|
||||
import json
|
||||
import logging
|
||||
|
||||
class JsonFormatter(logging.Formatter):
|
||||
def format(self, record):
|
||||
log_record = {
|
||||
"timestamp": self.formatTime(record),
|
||||
"level": record.levelname,
|
||||
"message": record.getMessage(),
|
||||
"logger": record.name,
|
||||
}
|
||||
|
||||
# Add extra fields
|
||||
if hasattr(record, "extra"):
|
||||
log_record.update(record.extra)
|
||||
|
||||
return json.dumps(log_record)
|
||||
|
||||
# Setup
|
||||
logger = logging.getLogger()
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(JsonFormatter())
|
||||
logger.addHandler(handler)
|
||||
|
||||
# Usage
|
||||
logger.info("User created", extra={"user_id": "123", "email": "user@example.com"})
|
||||
```
|
||||
|
||||
### Custom Metrics
|
||||
```python
|
||||
import boto3
|
||||
|
||||
cloudwatch = boto3.client("cloudwatch")
|
||||
|
||||
def publish_metric(name: str, value: float, unit: str = "Count"):
|
||||
cloudwatch.put_metric_data(
|
||||
Namespace="MyApp",
|
||||
MetricData=[
|
||||
{
|
||||
"MetricName": name,
|
||||
"Value": value,
|
||||
"Unit": unit,
|
||||
"Dimensions": [
|
||||
{"Name": "Environment", "Value": "prod"},
|
||||
{"Name": "Service", "Value": "api"},
|
||||
]
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
# Usage
|
||||
publish_metric("OrdersProcessed", 1)
|
||||
publish_metric("ProcessingTime", 150, "Milliseconds")
|
||||
```
|
||||
|
||||
## CLI Commands
|
||||
|
||||
```bash
|
||||
# IAM
|
||||
aws iam get-role --role-name MyRole
|
||||
aws iam list-attached-role-policies --role-name MyRole
|
||||
aws sts get-caller-identity
|
||||
|
||||
# S3
|
||||
aws s3 ls s3://my-bucket/
|
||||
aws s3 cp file.txt s3://my-bucket/
|
||||
aws s3 presign s3://my-bucket/file.txt --expires-in 3600
|
||||
|
||||
# Secrets Manager
|
||||
aws secretsmanager get-secret-value --secret-id my-secret
|
||||
aws secretsmanager list-secrets
|
||||
|
||||
# ECS
|
||||
aws ecs list-clusters
|
||||
aws ecs describe-services --cluster my-cluster --services my-service
|
||||
aws ecs update-service --cluster my-cluster --service my-service --force-new-deployment
|
||||
|
||||
# Lambda
|
||||
aws lambda invoke --function-name my-function output.json
|
||||
aws lambda list-functions
|
||||
aws logs tail /aws/lambda/my-function --follow
|
||||
|
||||
# CloudWatch
|
||||
aws logs filter-log-events --log-group-name /aws/lambda/my-function --filter-pattern "ERROR"
|
||||
```
|
||||
|
||||
## Security Checklist
|
||||
|
||||
- [ ] All S3 buckets have versioning enabled
|
||||
- [ ] All S3 buckets block public access (unless explicitly needed)
|
||||
- [ ] Encryption at rest enabled for all data stores
|
||||
- [ ] Encryption in transit (TLS) for all connections
|
||||
- [ ] IAM roles use least privilege
|
||||
- [ ] No long-term credentials (use IAM roles/instance profiles)
|
||||
- [ ] Secrets in Secrets Manager (not env vars or code)
|
||||
- [ ] VPC endpoints for AWS services (avoid public internet)
|
||||
- [ ] Security groups follow principle of least privilege
|
||||
- [ ] CloudTrail enabled for auditing
|
||||
- [ ] GuardDuty enabled for threat detection
|
||||
442
.claude/skills/infrastructure/azure/SKILL.md
Normal file
442
.claude/skills/infrastructure/azure/SKILL.md
Normal file
@@ -0,0 +1,442 @@
|
||||
---
|
||||
name: azure-services
|
||||
description: Azure service patterns, RBAC best practices, and common architectures. Use when designing or implementing Azure infrastructure.
|
||||
---
|
||||
|
||||
# Azure Services Skill
|
||||
|
||||
## Common Architecture Patterns
|
||||
|
||||
### Web Application (App Service + Azure SQL)
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ VNet │
|
||||
│ ┌─────────────────────────────────────────────────────────┐│
|
||||
│ │ Public Subnet ││
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ││
|
||||
│ │ │ App Gateway │ │ NAT GW │ ││
|
||||
│ │ └──────┬──────┘ └──────┬──────┘ ││
|
||||
│ └─────────┼───────────────────────────────────┼───────────┘│
|
||||
│ │ │ │
|
||||
│ ┌─────────┼───────────────────────────────────┼───────────┐│
|
||||
│ │ │ Private Subnet │ ││
|
||||
│ │ ┌──────▼──────┐ ┌───────▼─────┐ ││
|
||||
│ │ │ App Service │ │ Azure SQL │ ││
|
||||
│ │ │ (Web App) │───────────────────▶│ Database │ ││
|
||||
│ │ └─────────────┘ └─────────────┘ ││
|
||||
│ └─────────────────────────────────────────────────────────┘│
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Serverless (Azure Functions + API Management)
|
||||
```
|
||||
┌────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ Front │────▶│ APIM │────▶│ Functions │
|
||||
│ Door │ │ │ └──────┬──────┘
|
||||
└────────────┘ └─────────────┘ │
|
||||
┌──────────────────────────┼──────────────┐
|
||||
│ │ │
|
||||
┌──────▼─────┐ ┌─────────┐ ┌────▼────┐
|
||||
│ Cosmos DB │ │ Blob │ │ Key │
|
||||
└────────────┘ │ Storage │ │ Vault │
|
||||
└─────────┘ └─────────┘
|
||||
```
|
||||
|
||||
## RBAC Best Practices
|
||||
|
||||
### Custom Role Definition
|
||||
```json
|
||||
{
|
||||
"Name": "App Data Reader",
|
||||
"Description": "Read access to application data in storage",
|
||||
"Actions": [
|
||||
"Microsoft.Storage/storageAccounts/blobServices/containers/read",
|
||||
"Microsoft.Storage/storageAccounts/blobServices/containers/blobs/read"
|
||||
],
|
||||
"NotActions": [],
|
||||
"DataActions": [
|
||||
"Microsoft.Storage/storageAccounts/blobServices/containers/blobs/read"
|
||||
],
|
||||
"NotDataActions": [],
|
||||
"AssignableScopes": [
|
||||
"/subscriptions/{subscription-id}/resourceGroups/{resource-group}"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Managed Identity Usage
|
||||
```python
|
||||
# Python - azure-identity
|
||||
from azure.identity import DefaultAzureCredential
|
||||
from azure.keyvault.secrets import SecretClient
|
||||
from azure.storage.blob import BlobServiceClient
|
||||
|
||||
# Uses managed identity when deployed to Azure
|
||||
credential = DefaultAzureCredential()
|
||||
|
||||
# Key Vault access
|
||||
secret_client = SecretClient(
|
||||
vault_url="https://my-vault.vault.azure.net/",
|
||||
credential=credential
|
||||
)
|
||||
secret = secret_client.get_secret("database-password")
|
||||
|
||||
# Blob Storage access
|
||||
blob_service = BlobServiceClient(
|
||||
account_url="https://mystorageaccount.blob.core.windows.net/",
|
||||
credential=credential
|
||||
)
|
||||
```
|
||||
|
||||
```typescript
|
||||
// TypeScript - @azure/identity
|
||||
import { DefaultAzureCredential } from "@azure/identity";
|
||||
import { SecretClient } from "@azure/keyvault-secrets";
|
||||
import { BlobServiceClient } from "@azure/storage-blob";
|
||||
|
||||
const credential = new DefaultAzureCredential();
|
||||
|
||||
// Key Vault access
|
||||
const secretClient = new SecretClient(
|
||||
"https://my-vault.vault.azure.net/",
|
||||
credential
|
||||
);
|
||||
const secret = await secretClient.getSecret("database-password");
|
||||
|
||||
// Blob Storage access
|
||||
const blobService = new BlobServiceClient(
|
||||
"https://mystorageaccount.blob.core.windows.net/",
|
||||
credential
|
||||
);
|
||||
```
|
||||
|
||||
## Key Vault Patterns
|
||||
|
||||
### Secrets Management
|
||||
```python
|
||||
from azure.identity import DefaultAzureCredential
|
||||
from azure.keyvault.secrets import SecretClient
|
||||
|
||||
def get_secret(vault_url: str, secret_name: str) -> str:
|
||||
"""Retrieve secret from Key Vault using managed identity."""
|
||||
credential = DefaultAzureCredential()
|
||||
client = SecretClient(vault_url=vault_url, credential=credential)
|
||||
return client.get_secret(secret_name).value
|
||||
|
||||
# Usage
|
||||
db_password = get_secret(
|
||||
"https://my-vault.vault.azure.net/",
|
||||
"database-password"
|
||||
)
|
||||
```
|
||||
|
||||
### App Service with Key Vault References
|
||||
```json
|
||||
// App Service configuration
|
||||
{
|
||||
"name": "DatabasePassword",
|
||||
"value": "@Microsoft.KeyVault(SecretUri=https://my-vault.vault.azure.net/secrets/db-password/)",
|
||||
"slotSetting": false
|
||||
}
|
||||
```
|
||||
|
||||
## Blob Storage Patterns
|
||||
|
||||
### SAS Token Generation
|
||||
```python
|
||||
from datetime import datetime, timedelta
|
||||
from azure.storage.blob import (
|
||||
BlobServiceClient,
|
||||
generate_blob_sas,
|
||||
BlobSasPermissions,
|
||||
)
|
||||
|
||||
def generate_read_sas(
|
||||
account_name: str,
|
||||
account_key: str,
|
||||
container: str,
|
||||
blob_name: str,
|
||||
expiry_hours: int = 1
|
||||
) -> str:
|
||||
"""Generate a read-only SAS URL for a blob."""
|
||||
sas_token = generate_blob_sas(
|
||||
account_name=account_name,
|
||||
container_name=container,
|
||||
blob_name=blob_name,
|
||||
account_key=account_key,
|
||||
permission=BlobSasPermissions(read=True),
|
||||
expiry=datetime.utcnow() + timedelta(hours=expiry_hours),
|
||||
)
|
||||
|
||||
return f"https://{account_name}.blob.core.windows.net/{container}/{blob_name}?{sas_token}"
|
||||
```
|
||||
|
||||
### User Delegation SAS (More Secure)
|
||||
```python
|
||||
from azure.identity import DefaultAzureCredential
|
||||
from azure.storage.blob import BlobServiceClient, UserDelegationKey
|
||||
|
||||
def generate_user_delegation_sas(
|
||||
account_url: str,
|
||||
container: str,
|
||||
blob_name: str,
|
||||
) -> str:
|
||||
"""Generate SAS using user delegation key (no storage key needed)."""
|
||||
credential = DefaultAzureCredential()
|
||||
blob_service = BlobServiceClient(account_url, credential=credential)
|
||||
|
||||
# Get user delegation key
|
||||
delegation_key = blob_service.get_user_delegation_key(
|
||||
key_start_time=datetime.utcnow(),
|
||||
key_expiry_time=datetime.utcnow() + timedelta(hours=1)
|
||||
)
|
||||
|
||||
sas_token = generate_blob_sas(
|
||||
account_name=blob_service.account_name,
|
||||
container_name=container,
|
||||
blob_name=blob_name,
|
||||
user_delegation_key=delegation_key,
|
||||
permission=BlobSasPermissions(read=True),
|
||||
expiry=datetime.utcnow() + timedelta(hours=1),
|
||||
)
|
||||
|
||||
return f"{account_url}/{container}/{blob_name}?{sas_token}"
|
||||
```
|
||||
|
||||
## Cosmos DB Patterns
|
||||
|
||||
### Async Client Usage
|
||||
```python
|
||||
from azure.cosmos.aio import CosmosClient
|
||||
from azure.identity.aio import DefaultAzureCredential
|
||||
|
||||
async def get_cosmos_client() -> CosmosClient:
|
||||
"""Create async Cosmos client with managed identity."""
|
||||
credential = DefaultAzureCredential()
|
||||
return CosmosClient(
|
||||
url="https://my-cosmos.documents.azure.com:443/",
|
||||
credential=credential
|
||||
)
|
||||
|
||||
async def query_items(container_name: str, query: str) -> list:
|
||||
"""Query items from Cosmos DB container."""
|
||||
async with await get_cosmos_client() as client:
|
||||
database = client.get_database_client("my-database")
|
||||
container = database.get_container_client(container_name)
|
||||
|
||||
items = []
|
||||
async for item in container.query_items(
|
||||
query=query,
|
||||
enable_cross_partition_query=True
|
||||
):
|
||||
items.append(item)
|
||||
|
||||
return items
|
||||
```
|
||||
|
||||
### Partition Key Design
|
||||
```python
|
||||
# Good partition key choices:
|
||||
# - tenant_id for multi-tenant apps
|
||||
# - user_id for user-specific data
|
||||
# - category for catalog data
|
||||
|
||||
# Document structure
|
||||
{
|
||||
"id": "order-12345",
|
||||
"partitionKey": "customer-789", # Use customer ID for orders
|
||||
"orderDate": "2024-01-15",
|
||||
"items": [...],
|
||||
"total": 150.00
|
||||
}
|
||||
```
|
||||
|
||||
## Azure Functions Patterns
|
||||
|
||||
### HTTP Trigger with Input Validation
|
||||
```python
|
||||
import azure.functions as func
|
||||
import logging
|
||||
from pydantic import BaseModel, ValidationError
|
||||
|
||||
class CreateOrderRequest(BaseModel):
|
||||
customer_id: str
|
||||
items: list[dict]
|
||||
|
||||
app = func.FunctionApp()
|
||||
|
||||
@app.route(route="orders", methods=["POST"])
|
||||
async def create_order(req: func.HttpRequest) -> func.HttpResponse:
|
||||
"""Create a new order with validation."""
|
||||
try:
|
||||
body = req.get_json()
|
||||
request = CreateOrderRequest(**body)
|
||||
|
||||
# Process order...
|
||||
result = await process_order(request)
|
||||
|
||||
return func.HttpResponse(
|
||||
body=result.model_dump_json(),
|
||||
status_code=201,
|
||||
mimetype="application/json"
|
||||
)
|
||||
|
||||
except ValidationError as e:
|
||||
return func.HttpResponse(
|
||||
body=e.json(),
|
||||
status_code=400,
|
||||
mimetype="application/json"
|
||||
)
|
||||
except Exception as e:
|
||||
logging.exception("Error processing order")
|
||||
return func.HttpResponse(
|
||||
body='{"error": "Internal server error"}',
|
||||
status_code=500,
|
||||
mimetype="application/json"
|
||||
)
|
||||
```
|
||||
|
||||
### Durable Functions Orchestration
|
||||
```python
|
||||
import azure.functions as func
|
||||
import azure.durable_functions as df
|
||||
|
||||
app = func.FunctionApp()
|
||||
|
||||
@app.orchestration_trigger(context_name="context")
|
||||
def order_orchestrator(context: df.DurableOrchestrationContext):
|
||||
"""Orchestrate multi-step order processing."""
|
||||
order = context.get_input()
|
||||
|
||||
# Step 1: Validate inventory
|
||||
inventory_result = yield context.call_activity(
|
||||
"validate_inventory", order["items"]
|
||||
)
|
||||
|
||||
if not inventory_result["available"]:
|
||||
return {"status": "failed", "reason": "insufficient_inventory"}
|
||||
|
||||
# Step 2: Process payment
|
||||
payment_result = yield context.call_activity(
|
||||
"process_payment", order["payment"]
|
||||
)
|
||||
|
||||
if not payment_result["success"]:
|
||||
return {"status": "failed", "reason": "payment_failed"}
|
||||
|
||||
# Step 3: Create shipment
|
||||
shipment = yield context.call_activity(
|
||||
"create_shipment", order
|
||||
)
|
||||
|
||||
return {"status": "completed", "shipment_id": shipment["id"]}
|
||||
```
|
||||
|
||||
## Application Insights
|
||||
|
||||
### Structured Logging
|
||||
```python
|
||||
import logging
|
||||
from opencensus.ext.azure.log_exporter import AzureLogHandler
|
||||
|
||||
# Configure logging with Application Insights
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.addHandler(AzureLogHandler(
|
||||
connection_string="InstrumentationKey=xxx;IngestionEndpoint=xxx"
|
||||
))
|
||||
|
||||
# Log with custom dimensions
|
||||
logger.info(
|
||||
"Order processed",
|
||||
extra={
|
||||
"custom_dimensions": {
|
||||
"order_id": "12345",
|
||||
"customer_id": "cust-789",
|
||||
"total": 150.00
|
||||
}
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
### Custom Metrics
|
||||
```python
|
||||
from opencensus.ext.azure import metrics_exporter
|
||||
from opencensus.stats import aggregation, measure, stats, view
|
||||
|
||||
# Create measure
|
||||
orders_measure = measure.MeasureInt(
|
||||
"orders_processed",
|
||||
"Number of orders processed",
|
||||
"orders"
|
||||
)
|
||||
|
||||
# Create view
|
||||
orders_view = view.View(
|
||||
"orders_processed_total",
|
||||
"Total orders processed",
|
||||
[],
|
||||
orders_measure,
|
||||
aggregation.CountAggregation()
|
||||
)
|
||||
|
||||
# Register and export
|
||||
view_manager = stats.stats.view_manager
|
||||
view_manager.register_view(orders_view)
|
||||
|
||||
exporter = metrics_exporter.new_metrics_exporter(
|
||||
connection_string="InstrumentationKey=xxx"
|
||||
)
|
||||
view_manager.register_exporter(exporter)
|
||||
|
||||
# Record metric
|
||||
mmap = stats.stats.stats_recorder.new_measurement_map()
|
||||
mmap.measure_int_put(orders_measure, 1)
|
||||
mmap.record()
|
||||
```
|
||||
|
||||
## CLI Commands
|
||||
|
||||
```bash
|
||||
# Authentication
|
||||
az login
|
||||
az account set --subscription "My Subscription"
|
||||
az account show
|
||||
|
||||
# Resource Groups
|
||||
az group list --output table
|
||||
az group create --name my-rg --location uksouth
|
||||
|
||||
# Key Vault
|
||||
az keyvault secret show --vault-name my-vault --name my-secret
|
||||
az keyvault secret set --vault-name my-vault --name my-secret --value "secret-value"
|
||||
|
||||
# Storage
|
||||
az storage blob list --account-name mystorageaccount --container-name mycontainer
|
||||
az storage blob upload --account-name mystorageaccount --container-name mycontainer --file local.txt --name remote.txt
|
||||
|
||||
# App Service
|
||||
az webapp list --output table
|
||||
az webapp restart --name my-app --resource-group my-rg
|
||||
az webapp log tail --name my-app --resource-group my-rg
|
||||
|
||||
# Functions
|
||||
az functionapp list --output table
|
||||
az functionapp restart --name my-func --resource-group my-rg
|
||||
|
||||
# Cosmos DB
|
||||
az cosmosdb list --output table
|
||||
az cosmosdb sql database list --account-name my-cosmos --resource-group my-rg
|
||||
```
|
||||
|
||||
## Security Checklist
|
||||
|
||||
- [ ] Use Managed Identities instead of connection strings
|
||||
- [ ] Store secrets in Key Vault, not app settings
|
||||
- [ ] Enable Azure Defender for all resources
|
||||
- [ ] Use Private Endpoints for PaaS services
|
||||
- [ ] Enable diagnostic logging to Log Analytics
|
||||
- [ ] Configure Network Security Groups
|
||||
- [ ] Use User Delegation SAS instead of account keys
|
||||
- [ ] Enable soft delete on Key Vault and Storage
|
||||
- [ ] Configure Azure Policy for compliance
|
||||
- [ ] Enable Microsoft Defender for Cloud
|
||||
599
.claude/skills/infrastructure/cicd/SKILL.md
Normal file
599
.claude/skills/infrastructure/cicd/SKILL.md
Normal file
@@ -0,0 +1,599 @@
|
||||
---
|
||||
name: cicd-pipelines
|
||||
description: CI/CD pipeline patterns for Jenkins, GitHub Actions, and GitLab CI. Use when setting up continuous integration or deployment pipelines.
|
||||
---
|
||||
|
||||
# CI/CD Pipelines Skill
|
||||
|
||||
## Jenkins
|
||||
|
||||
### Declarative Pipeline
|
||||
```groovy
|
||||
// Jenkinsfile
|
||||
pipeline {
|
||||
agent any
|
||||
|
||||
environment {
|
||||
REGISTRY = 'myregistry.azurecr.io'
|
||||
IMAGE_NAME = 'myapp'
|
||||
COVERAGE_THRESHOLD = '80'
|
||||
}
|
||||
|
||||
options {
|
||||
timeout(time: 30, unit: 'MINUTES')
|
||||
disableConcurrentBuilds()
|
||||
buildDiscarder(logRotator(numToKeepStr: '10'))
|
||||
}
|
||||
|
||||
stages {
|
||||
stage('Checkout') {
|
||||
steps {
|
||||
checkout scm
|
||||
}
|
||||
}
|
||||
|
||||
stage('Install Dependencies') {
|
||||
parallel {
|
||||
stage('Python') {
|
||||
when {
|
||||
changeset "apps/backend/**"
|
||||
}
|
||||
steps {
|
||||
sh 'uv sync'
|
||||
}
|
||||
}
|
||||
stage('Node') {
|
||||
when {
|
||||
changeset "apps/frontend/**"
|
||||
}
|
||||
steps {
|
||||
sh 'npm ci'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Lint & Type Check') {
|
||||
parallel {
|
||||
stage('Python Lint') {
|
||||
when {
|
||||
changeset "apps/backend/**"
|
||||
}
|
||||
steps {
|
||||
sh 'uv run ruff check apps/backend/'
|
||||
sh 'uv run mypy apps/backend/'
|
||||
}
|
||||
}
|
||||
stage('TypeScript Lint') {
|
||||
when {
|
||||
changeset "apps/frontend/**"
|
||||
}
|
||||
steps {
|
||||
sh 'npm run lint --workspace=frontend'
|
||||
sh 'npm run typecheck --workspace=frontend'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Test') {
|
||||
parallel {
|
||||
stage('Backend Tests') {
|
||||
when {
|
||||
changeset "apps/backend/**"
|
||||
}
|
||||
steps {
|
||||
sh """
|
||||
uv run pytest apps/backend/ \
|
||||
--cov=apps/backend/src \
|
||||
--cov-report=xml \
|
||||
--cov-fail-under=${COVERAGE_THRESHOLD} \
|
||||
--junitxml=test-results/backend.xml
|
||||
"""
|
||||
}
|
||||
post {
|
||||
always {
|
||||
junit 'test-results/backend.xml'
|
||||
publishCoverage adapters: [coberturaAdapter('coverage.xml')]
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Frontend Tests') {
|
||||
when {
|
||||
changeset "apps/frontend/**"
|
||||
}
|
||||
steps {
|
||||
sh """
|
||||
npm run test --workspace=frontend -- \
|
||||
--coverage \
|
||||
--coverageThreshold='{"global":{"branches":${COVERAGE_THRESHOLD},"functions":${COVERAGE_THRESHOLD},"lines":${COVERAGE_THRESHOLD}}}' \
|
||||
--reporter=junit \
|
||||
--outputFile=test-results/frontend.xml
|
||||
"""
|
||||
}
|
||||
post {
|
||||
always {
|
||||
junit 'test-results/frontend.xml'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Security Scan') {
|
||||
steps {
|
||||
sh 'trivy fs --severity HIGH,CRITICAL --exit-code 1 .'
|
||||
}
|
||||
}
|
||||
|
||||
stage('Build') {
|
||||
when {
|
||||
anyOf {
|
||||
branch 'main'
|
||||
branch 'release/*'
|
||||
}
|
||||
}
|
||||
steps {
|
||||
script {
|
||||
def version = sh(script: 'git describe --tags --always', returnStdout: true).trim()
|
||||
sh """
|
||||
docker build -t ${REGISTRY}/${IMAGE_NAME}:${version} .
|
||||
docker tag ${REGISTRY}/${IMAGE_NAME}:${version} ${REGISTRY}/${IMAGE_NAME}:latest
|
||||
"""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Push') {
|
||||
when {
|
||||
branch 'main'
|
||||
}
|
||||
steps {
|
||||
withCredentials([usernamePassword(
|
||||
credentialsId: 'registry-credentials',
|
||||
usernameVariable: 'REGISTRY_USER',
|
||||
passwordVariable: 'REGISTRY_PASS'
|
||||
)]) {
|
||||
sh """
|
||||
echo \$REGISTRY_PASS | docker login ${REGISTRY} -u \$REGISTRY_USER --password-stdin
|
||||
docker push ${REGISTRY}/${IMAGE_NAME}:${version}
|
||||
docker push ${REGISTRY}/${IMAGE_NAME}:latest
|
||||
"""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Deploy to Staging') {
|
||||
when {
|
||||
branch 'main'
|
||||
}
|
||||
steps {
|
||||
sh 'kubectl apply -f k8s/staging/'
|
||||
sh 'kubectl rollout status deployment/myapp -n staging'
|
||||
}
|
||||
}
|
||||
|
||||
stage('Deploy to Production') {
|
||||
when {
|
||||
branch 'release/*'
|
||||
}
|
||||
input {
|
||||
message "Deploy to production?"
|
||||
ok "Deploy"
|
||||
}
|
||||
steps {
|
||||
sh 'kubectl apply -f k8s/production/'
|
||||
sh 'kubectl rollout status deployment/myapp -n production'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
post {
|
||||
always {
|
||||
cleanWs()
|
||||
}
|
||||
success {
|
||||
slackSend(
|
||||
channel: '#deployments',
|
||||
color: 'good',
|
||||
message: "Build ${env.BUILD_NUMBER} succeeded: ${env.BUILD_URL}"
|
||||
)
|
||||
}
|
||||
failure {
|
||||
slackSend(
|
||||
channel: '#deployments',
|
||||
color: 'danger',
|
||||
message: "Build ${env.BUILD_NUMBER} failed: ${env.BUILD_URL}"
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Shared Library
|
||||
```groovy
|
||||
// vars/pythonPipeline.groovy
|
||||
def call(Map config = [:]) {
|
||||
pipeline {
|
||||
agent any
|
||||
|
||||
stages {
|
||||
stage('Test') {
|
||||
steps {
|
||||
sh "uv run pytest ${config.testPath ?: 'tests/'} --cov --cov-fail-under=${config.coverage ?: 80}"
|
||||
}
|
||||
}
|
||||
stage('Lint') {
|
||||
steps {
|
||||
sh "uv run ruff check ${config.srcPath ?: 'src/'}"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Usage in Jenkinsfile
|
||||
@Library('my-shared-library') _
|
||||
|
||||
pythonPipeline(
|
||||
testPath: 'apps/backend/tests/',
|
||||
srcPath: 'apps/backend/src/',
|
||||
coverage: 85
|
||||
)
|
||||
```
|
||||
|
||||
## GitHub Actions
|
||||
|
||||
### Complete Workflow
|
||||
```yaml
|
||||
# .github/workflows/ci.yml
|
||||
name: CI/CD
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, 'release/*']
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
|
||||
jobs:
|
||||
detect-changes:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
backend: ${{ steps.changes.outputs.backend }}
|
||||
frontend: ${{ steps.changes.outputs.frontend }}
|
||||
infrastructure: ${{ steps.changes.outputs.infrastructure }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: dorny/paths-filter@v3
|
||||
id: changes
|
||||
with:
|
||||
filters: |
|
||||
backend:
|
||||
- 'apps/backend/**'
|
||||
- 'packages/shared/**'
|
||||
frontend:
|
||||
- 'apps/frontend/**'
|
||||
- 'packages/shared/**'
|
||||
infrastructure:
|
||||
- 'infrastructure/**'
|
||||
|
||||
backend-test:
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.backend == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:16
|
||||
env:
|
||||
POSTGRES_USER: test
|
||||
POSTGRES_PASSWORD: test
|
||||
POSTGRES_DB: test
|
||||
ports:
|
||||
- 5432:5432
|
||||
options: >-
|
||||
--health-cmd pg_isready
|
||||
--health-interval 10s
|
||||
--health-timeout 5s
|
||||
--health-retries 5
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: uv sync
|
||||
|
||||
- name: Lint
|
||||
run: |
|
||||
uv run ruff check apps/backend/
|
||||
uv run mypy apps/backend/
|
||||
|
||||
- name: Test
|
||||
env:
|
||||
DATABASE_URL: postgresql://test:test@localhost:5432/test
|
||||
run: |
|
||||
uv run pytest apps/backend/ \
|
||||
--cov=apps/backend/src \
|
||||
--cov-report=xml \
|
||||
--cov-fail-under=80
|
||||
|
||||
- name: Upload coverage
|
||||
uses: codecov/codecov-action@v4
|
||||
with:
|
||||
files: coverage.xml
|
||||
flags: backend
|
||||
|
||||
frontend-test:
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.frontend == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '22'
|
||||
cache: 'npm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Lint & Type Check
|
||||
run: |
|
||||
npm run lint --workspace=frontend
|
||||
npm run typecheck --workspace=frontend
|
||||
|
||||
- name: Test
|
||||
run: npm run test --workspace=frontend -- --coverage
|
||||
|
||||
- name: Upload coverage
|
||||
uses: codecov/codecov-action@v4
|
||||
with:
|
||||
files: apps/frontend/coverage/lcov.info
|
||||
flags: frontend
|
||||
|
||||
security-scan:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@master
|
||||
with:
|
||||
scan-type: 'fs'
|
||||
severity: 'CRITICAL,HIGH'
|
||||
exit-code: '1'
|
||||
|
||||
- name: Run Gitleaks
|
||||
uses: gitleaks/gitleaks-action@v2
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
build-and-push:
|
||||
needs: [backend-test, frontend-test, security-scan]
|
||||
if: github.ref == 'refs/heads/main'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Log in to Container registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=sha
|
||||
type=ref,event=branch
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
deploy-staging:
|
||||
needs: build-and-push
|
||||
runs-on: ubuntu-latest
|
||||
environment: staging
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Deploy to staging
|
||||
run: |
|
||||
kubectl apply -f k8s/staging/
|
||||
kubectl rollout status deployment/myapp -n staging
|
||||
|
||||
deploy-production:
|
||||
needs: deploy-staging
|
||||
runs-on: ubuntu-latest
|
||||
environment: production
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Deploy to production
|
||||
run: |
|
||||
kubectl apply -f k8s/production/
|
||||
kubectl rollout status deployment/myapp -n production
|
||||
```
|
||||
|
||||
### Reusable Workflow
|
||||
```yaml
|
||||
# .github/workflows/python-ci.yml
|
||||
name: Python CI
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
python-version:
|
||||
required: false
|
||||
type: string
|
||||
default: '3.12'
|
||||
working-directory:
|
||||
required: true
|
||||
type: string
|
||||
coverage-threshold:
|
||||
required: false
|
||||
type: number
|
||||
default: 80
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: astral-sh/setup-uv@v4
|
||||
|
||||
- run: uv sync
|
||||
|
||||
- run: uv run ruff check .
|
||||
|
||||
- run: uv run pytest --cov --cov-fail-under=${{ inputs.coverage-threshold }}
|
||||
```
|
||||
|
||||
## GitLab CI
|
||||
|
||||
```yaml
|
||||
# .gitlab-ci.yml
|
||||
stages:
|
||||
- test
|
||||
- build
|
||||
- deploy
|
||||
|
||||
variables:
|
||||
REGISTRY: registry.gitlab.com
|
||||
IMAGE_NAME: $CI_PROJECT_PATH
|
||||
|
||||
.python-base:
|
||||
image: python:3.12
|
||||
before_script:
|
||||
- pip install uv
|
||||
- uv sync
|
||||
|
||||
.node-base:
|
||||
image: node:22
|
||||
before_script:
|
||||
- npm ci
|
||||
|
||||
test:backend:
|
||||
extends: .python-base
|
||||
stage: test
|
||||
script:
|
||||
- uv run ruff check apps/backend/
|
||||
- uv run pytest apps/backend/ --cov --cov-fail-under=80
|
||||
rules:
|
||||
- changes:
|
||||
- apps/backend/**
|
||||
|
||||
test:frontend:
|
||||
extends: .node-base
|
||||
stage: test
|
||||
script:
|
||||
- npm run lint --workspace=frontend
|
||||
- npm run test --workspace=frontend -- --coverage
|
||||
rules:
|
||||
- changes:
|
||||
- apps/frontend/**
|
||||
|
||||
build:
|
||||
stage: build
|
||||
image: docker:24
|
||||
services:
|
||||
- docker:24-dind
|
||||
script:
|
||||
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
|
||||
- docker build -t $REGISTRY/$IMAGE_NAME:$CI_COMMIT_SHA .
|
||||
- docker push $REGISTRY/$IMAGE_NAME:$CI_COMMIT_SHA
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == "main"
|
||||
|
||||
deploy:staging:
|
||||
stage: deploy
|
||||
script:
|
||||
- kubectl apply -f k8s/staging/
|
||||
environment:
|
||||
name: staging
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == "main"
|
||||
|
||||
deploy:production:
|
||||
stage: deploy
|
||||
script:
|
||||
- kubectl apply -f k8s/production/
|
||||
environment:
|
||||
name: production
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == "main"
|
||||
when: manual
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### Pipeline Design Principles
|
||||
|
||||
1. **Fail Fast** - Run quick checks (lint, type check) before slow ones (tests)
|
||||
2. **Parallelize** - Run independent jobs concurrently
|
||||
3. **Cache** - Cache dependencies between runs
|
||||
4. **Change Detection** - Only run what's affected
|
||||
5. **Immutable Artifacts** - Tag images with commit SHA
|
||||
6. **Environment Parity** - Same process for all environments
|
||||
7. **Secrets Management** - Never hardcode, use CI/CD secrets
|
||||
|
||||
### Quality Gates
|
||||
|
||||
```yaml
|
||||
# Minimum checks before merge
|
||||
- Lint passes
|
||||
- Type check passes
|
||||
- Unit tests pass
|
||||
- Coverage threshold met (80%+)
|
||||
- Security scan passes
|
||||
- No secrets detected
|
||||
```
|
||||
|
||||
### Deployment Strategies
|
||||
|
||||
```yaml
|
||||
# Rolling update (default)
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
rollingUpdate:
|
||||
maxSurge: 1
|
||||
maxUnavailable: 0
|
||||
|
||||
# Blue-green (via service switch)
|
||||
# Deploy new version alongside old
|
||||
# Switch service selector when ready
|
||||
|
||||
# Canary (gradual rollout)
|
||||
# Route percentage of traffic to new version
|
||||
# Monitor metrics before full rollout
|
||||
```
|
||||
510
.claude/skills/infrastructure/database/SKILL.md
Normal file
510
.claude/skills/infrastructure/database/SKILL.md
Normal file
@@ -0,0 +1,510 @@
|
||||
---
|
||||
name: database-patterns
|
||||
description: Database design patterns, migrations with Alembic/Prisma, and query optimization. Use when working with SQL/NoSQL databases or schema migrations.
|
||||
---
|
||||
|
||||
# Database Patterns Skill
|
||||
|
||||
## Schema Migrations
|
||||
|
||||
### Alembic (Python/SQLAlchemy)
|
||||
|
||||
#### Setup
|
||||
```bash
|
||||
# Initialize Alembic
|
||||
alembic init alembic
|
||||
|
||||
# Configure alembic.ini
|
||||
sqlalchemy.url = postgresql://user:pass@localhost/myapp
|
||||
```
|
||||
|
||||
#### alembic/env.py Configuration
|
||||
```python
|
||||
from logging.config import fileConfig
|
||||
from sqlalchemy import engine_from_config, pool
|
||||
from alembic import context
|
||||
import os
|
||||
|
||||
# Import your models
|
||||
from app.models import Base
|
||||
|
||||
config = context.config
|
||||
|
||||
# Override with environment variable
|
||||
config.set_main_option(
|
||||
"sqlalchemy.url",
|
||||
os.environ.get("DATABASE_URL", config.get_main_option("sqlalchemy.url"))
|
||||
)
|
||||
|
||||
target_metadata = Base.metadata
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""Run migrations in 'offline' mode."""
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
"""Run migrations in 'online' mode."""
|
||||
connectable = engine_from_config(
|
||||
config.get_section(config.config_ini_section, {}),
|
||||
prefix="sqlalchemy.",
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
|
||||
with connectable.connect() as connection:
|
||||
context.configure(
|
||||
connection=connection,
|
||||
target_metadata=target_metadata,
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
```
|
||||
|
||||
#### Migration Commands
|
||||
```bash
|
||||
# Create migration from model changes
|
||||
alembic revision --autogenerate -m "add users table"
|
||||
|
||||
# Create empty migration
|
||||
alembic revision -m "add custom index"
|
||||
|
||||
# Apply migrations
|
||||
alembic upgrade head
|
||||
|
||||
# Rollback one migration
|
||||
alembic downgrade -1
|
||||
|
||||
# Rollback to specific revision
|
||||
alembic downgrade abc123
|
||||
|
||||
# Show current revision
|
||||
alembic current
|
||||
|
||||
# Show migration history
|
||||
alembic history --verbose
|
||||
```
|
||||
|
||||
#### Migration Best Practices
|
||||
```python
|
||||
# alembic/versions/001_add_users_table.py
|
||||
"""Add users table
|
||||
|
||||
Revision ID: abc123
|
||||
Revises:
|
||||
Create Date: 2024-01-15 10:00:00.000000
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
revision: str = 'abc123'
|
||||
down_revision: Union[str, None] = None
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
'users',
|
||||
sa.Column('id', sa.UUID(), nullable=False),
|
||||
sa.Column('email', sa.String(255), nullable=False),
|
||||
sa.Column('name', sa.String(100), nullable=False),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||
sa.Column('updated_at', sa.DateTime(timezone=True), onupdate=sa.func.now()),
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
)
|
||||
# Create index separately for clarity
|
||||
op.create_index('ix_users_email', 'users', ['email'], unique=True)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index('ix_users_email', table_name='users')
|
||||
op.drop_table('users')
|
||||
```
|
||||
|
||||
#### Data Migrations
|
||||
```python
|
||||
"""Backfill user full names
|
||||
|
||||
Revision ID: def456
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.sql import table, column
|
||||
|
||||
def upgrade() -> None:
|
||||
# Define table structure for data migration
|
||||
users = table('users',
|
||||
column('id', sa.UUID),
|
||||
column('first_name', sa.String),
|
||||
column('last_name', sa.String),
|
||||
column('full_name', sa.String),
|
||||
)
|
||||
|
||||
# Batch update for large tables
|
||||
connection = op.get_bind()
|
||||
connection.execute(
|
||||
users.update().values(
|
||||
full_name=users.c.first_name + ' ' + users.c.last_name
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Data migrations typically aren't reversible
|
||||
pass
|
||||
```
|
||||
|
||||
### Prisma (TypeScript)
|
||||
|
||||
#### Schema Definition
|
||||
```prisma
|
||||
// prisma/schema.prisma
|
||||
datasource db {
|
||||
provider = "postgresql"
|
||||
url = env("DATABASE_URL")
|
||||
}
|
||||
|
||||
generator client {
|
||||
provider = "prisma-client-js"
|
||||
}
|
||||
|
||||
model User {
|
||||
id String @id @default(uuid())
|
||||
email String @unique
|
||||
name String
|
||||
role Role @default(USER)
|
||||
posts Post[]
|
||||
createdAt DateTime @default(now()) @map("created_at")
|
||||
updatedAt DateTime @updatedAt @map("updated_at")
|
||||
|
||||
@@map("users")
|
||||
@@index([email])
|
||||
}
|
||||
|
||||
model Post {
|
||||
id String @id @default(uuid())
|
||||
title String
|
||||
content String?
|
||||
published Boolean @default(false)
|
||||
author User @relation(fields: [authorId], references: [id])
|
||||
authorId String @map("author_id")
|
||||
createdAt DateTime @default(now()) @map("created_at")
|
||||
|
||||
@@map("posts")
|
||||
@@index([authorId])
|
||||
}
|
||||
|
||||
enum Role {
|
||||
USER
|
||||
ADMIN
|
||||
}
|
||||
```
|
||||
|
||||
#### Migration Commands
|
||||
```bash
|
||||
# Create migration from schema changes
|
||||
npx prisma migrate dev --name add_users_table
|
||||
|
||||
# Apply migrations in production
|
||||
npx prisma migrate deploy
|
||||
|
||||
# Reset database (development only)
|
||||
npx prisma migrate reset
|
||||
|
||||
# Generate client
|
||||
npx prisma generate
|
||||
|
||||
# View database
|
||||
npx prisma studio
|
||||
```
|
||||
|
||||
## SQLAlchemy 2.0 Patterns
|
||||
|
||||
### Model Definition
|
||||
```python
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from uuid import UUID, uuid4
|
||||
from sqlalchemy import String, ForeignKey, func
|
||||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
|
||||
class User(Base):
|
||||
__tablename__ = "users"
|
||||
|
||||
id: Mapped[UUID] = mapped_column(primary_key=True, default=uuid4)
|
||||
email: Mapped[str] = mapped_column(String(255), unique=True, index=True)
|
||||
name: Mapped[str] = mapped_column(String(100))
|
||||
role: Mapped[str] = mapped_column(String(20), default="user")
|
||||
created_at: Mapped[datetime] = mapped_column(server_default=func.now())
|
||||
updated_at: Mapped[Optional[datetime]] = mapped_column(onupdate=func.now())
|
||||
|
||||
# Relationships
|
||||
orders: Mapped[list["Order"]] = relationship(back_populates="user")
|
||||
|
||||
|
||||
class Order(Base):
|
||||
__tablename__ = "orders"
|
||||
|
||||
id: Mapped[UUID] = mapped_column(primary_key=True, default=uuid4)
|
||||
user_id: Mapped[UUID] = mapped_column(ForeignKey("users.id"))
|
||||
total: Mapped[int] # Store as cents
|
||||
status: Mapped[str] = mapped_column(String(20), default="pending")
|
||||
created_at: Mapped[datetime] = mapped_column(server_default=func.now())
|
||||
|
||||
# Relationships
|
||||
user: Mapped["User"] = relationship(back_populates="orders")
|
||||
items: Mapped[list["OrderItem"]] = relationship(back_populates="order")
|
||||
```
|
||||
|
||||
### Async Repository Pattern
|
||||
```python
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
|
||||
class UserRepository:
|
||||
def __init__(self, session: AsyncSession):
|
||||
self.session = session
|
||||
|
||||
async def get_by_id(self, user_id: UUID) -> User | None:
|
||||
result = await self.session.execute(
|
||||
select(User).where(User.id == user_id)
|
||||
)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def get_by_email(self, email: str) -> User | None:
|
||||
result = await self.session.execute(
|
||||
select(User).where(User.email == email)
|
||||
)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def list_with_orders(
|
||||
self,
|
||||
limit: int = 20,
|
||||
offset: int = 0
|
||||
) -> list[User]:
|
||||
result = await self.session.execute(
|
||||
select(User)
|
||||
.options(selectinload(User.orders))
|
||||
.limit(limit)
|
||||
.offset(offset)
|
||||
)
|
||||
return list(result.scalars().all())
|
||||
|
||||
async def create(self, user: User) -> User:
|
||||
self.session.add(user)
|
||||
await self.session.flush()
|
||||
return user
|
||||
|
||||
async def update(self, user: User) -> User:
|
||||
await self.session.flush()
|
||||
return user
|
||||
|
||||
async def delete(self, user: User) -> None:
|
||||
await self.session.delete(user)
|
||||
await self.session.flush()
|
||||
```
|
||||
|
||||
## Query Optimization
|
||||
|
||||
### Indexing Strategies
|
||||
```sql
|
||||
-- Primary lookup patterns
|
||||
CREATE INDEX idx_users_email ON users(email);
|
||||
CREATE INDEX idx_orders_user_id ON orders(user_id);
|
||||
|
||||
-- Composite indexes (order matters!)
|
||||
CREATE INDEX idx_orders_user_status ON orders(user_id, status);
|
||||
|
||||
-- Partial indexes
|
||||
CREATE INDEX idx_orders_pending ON orders(user_id) WHERE status = 'pending';
|
||||
|
||||
-- Covering indexes
|
||||
CREATE INDEX idx_users_email_name ON users(email) INCLUDE (name);
|
||||
```
|
||||
|
||||
### N+1 Query Prevention
|
||||
```python
|
||||
# BAD - N+1 queries
|
||||
users = await session.execute(select(User))
|
||||
for user in users.scalars():
|
||||
print(user.orders) # Each access triggers a query!
|
||||
|
||||
# GOOD - Eager loading
|
||||
from sqlalchemy.orm import selectinload, joinedload
|
||||
|
||||
# Use selectinload for collections
|
||||
users = await session.execute(
|
||||
select(User).options(selectinload(User.orders))
|
||||
)
|
||||
|
||||
# Use joinedload for single relations
|
||||
orders = await session.execute(
|
||||
select(Order).options(joinedload(Order.user))
|
||||
)
|
||||
```
|
||||
|
||||
### Pagination
|
||||
```python
|
||||
from sqlalchemy import select, func
|
||||
|
||||
|
||||
async def paginate_users(
|
||||
session: AsyncSession,
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
) -> dict:
|
||||
# Count total
|
||||
count_query = select(func.count()).select_from(User)
|
||||
total = (await session.execute(count_query)).scalar_one()
|
||||
|
||||
# Fetch page
|
||||
offset = (page - 1) * page_size
|
||||
query = select(User).limit(page_size).offset(offset).order_by(User.created_at.desc())
|
||||
result = await session.execute(query)
|
||||
users = list(result.scalars().all())
|
||||
|
||||
return {
|
||||
"items": users,
|
||||
"total": total,
|
||||
"page": page,
|
||||
"page_size": page_size,
|
||||
"has_more": offset + len(users) < total,
|
||||
}
|
||||
```
|
||||
|
||||
## NoSQL Patterns (MongoDB)
|
||||
|
||||
### Document Design
|
||||
```python
|
||||
from pydantic import BaseModel, Field
|
||||
from datetime import datetime
|
||||
from bson import ObjectId
|
||||
|
||||
|
||||
class PyObjectId(ObjectId):
|
||||
@classmethod
|
||||
def __get_validators__(cls):
|
||||
yield cls.validate
|
||||
|
||||
@classmethod
|
||||
def validate(cls, v):
|
||||
if not ObjectId.is_valid(v):
|
||||
raise ValueError("Invalid ObjectId")
|
||||
return ObjectId(v)
|
||||
|
||||
|
||||
class UserDocument(BaseModel):
|
||||
id: PyObjectId = Field(default_factory=PyObjectId, alias="_id")
|
||||
email: str
|
||||
name: str
|
||||
# Embed frequently accessed data
|
||||
profile: dict = {}
|
||||
# Reference for large/changing data
|
||||
order_ids: list[str] = []
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
class Config:
|
||||
populate_by_name = True
|
||||
json_encoders = {ObjectId: str}
|
||||
```
|
||||
|
||||
### MongoDB with Motor (Async)
|
||||
```python
|
||||
from motor.motor_asyncio import AsyncIOMotorClient
|
||||
|
||||
|
||||
class MongoUserRepository:
|
||||
def __init__(self, client: AsyncIOMotorClient, db_name: str):
|
||||
self.collection = client[db_name].users
|
||||
|
||||
async def get_by_id(self, user_id: str) -> dict | None:
|
||||
return await self.collection.find_one({"_id": ObjectId(user_id)})
|
||||
|
||||
async def create(self, user: UserDocument) -> str:
|
||||
result = await self.collection.insert_one(user.dict(by_alias=True))
|
||||
return str(result.inserted_id)
|
||||
|
||||
async def find_by_email_domain(self, domain: str) -> list[dict]:
|
||||
cursor = self.collection.find(
|
||||
{"email": {"$regex": f"@{domain}$"}},
|
||||
{"email": 1, "name": 1} # Projection
|
||||
).limit(100)
|
||||
return await cursor.to_list(length=100)
|
||||
```
|
||||
|
||||
## Migration Safety
|
||||
|
||||
### Zero-Downtime Migration Pattern
|
||||
|
||||
```python
|
||||
# Step 1: Add new column (nullable)
|
||||
def upgrade_step1():
|
||||
op.add_column('users', sa.Column('full_name', sa.String(200), nullable=True))
|
||||
|
||||
# Step 2: Backfill data (separate deployment)
|
||||
def upgrade_step2():
|
||||
# Run as background job, not in migration
|
||||
pass
|
||||
|
||||
# Step 3: Make column required (after backfill complete)
|
||||
def upgrade_step3():
|
||||
op.alter_column('users', 'full_name', nullable=False)
|
||||
|
||||
# Step 4: Remove old columns (after app updated)
|
||||
def upgrade_step4():
|
||||
op.drop_column('users', 'first_name')
|
||||
op.drop_column('users', 'last_name')
|
||||
```
|
||||
|
||||
### Pre-Migration Checklist
|
||||
|
||||
- [ ] Backup database before migration
|
||||
- [ ] Test migration on copy of production data
|
||||
- [ ] Check migration doesn't lock tables for too long
|
||||
- [ ] Ensure rollback script works
|
||||
- [ ] Plan for zero-downtime if needed
|
||||
- [ ] Coordinate with application deployments
|
||||
|
||||
## Commands
|
||||
|
||||
```bash
|
||||
# Alembic
|
||||
alembic upgrade head # Apply all migrations
|
||||
alembic downgrade -1 # Rollback one
|
||||
alembic history # Show history
|
||||
alembic current # Show current version
|
||||
|
||||
# Prisma
|
||||
npx prisma migrate dev # Development migration
|
||||
npx prisma migrate deploy # Production migration
|
||||
npx prisma db push # Push schema without migration
|
||||
|
||||
# PostgreSQL
|
||||
pg_dump -Fc mydb > backup.dump # Backup
|
||||
pg_restore -d mydb backup.dump # Restore
|
||||
psql -d mydb -f migration.sql # Run SQL file
|
||||
```
|
||||
459
.claude/skills/infrastructure/docker-kubernetes/SKILL.md
Normal file
459
.claude/skills/infrastructure/docker-kubernetes/SKILL.md
Normal file
@@ -0,0 +1,459 @@
|
||||
---
|
||||
name: docker-kubernetes
|
||||
description: Docker containerization and Kubernetes orchestration patterns. Use when building containers, writing Dockerfiles, or deploying to Kubernetes.
|
||||
---
|
||||
|
||||
# Docker & Kubernetes Skill
|
||||
|
||||
## Dockerfile Best Practices
|
||||
|
||||
### Multi-Stage Build (Python)
|
||||
```dockerfile
|
||||
# Build stage
|
||||
FROM python:3.12-slim as builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install build dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Python dependencies
|
||||
COPY pyproject.toml uv.lock ./
|
||||
RUN pip install uv && uv sync --frozen --no-dev
|
||||
|
||||
# Production stage
|
||||
FROM python:3.12-slim as production
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Create non-root user
|
||||
RUN groupadd -r appuser && useradd -r -g appuser appuser
|
||||
|
||||
# Copy only necessary files from builder
|
||||
COPY --from=builder /app/.venv /app/.venv
|
||||
COPY src/ ./src/
|
||||
|
||||
# Set environment
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
|
||||
# Switch to non-root user
|
||||
USER appuser
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
||||
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"
|
||||
|
||||
CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
```
|
||||
|
||||
### Multi-Stage Build (Node.js)
|
||||
```dockerfile
|
||||
# Build stage
|
||||
FROM node:22-alpine as builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install dependencies first (layer caching)
|
||||
COPY package*.json ./
|
||||
RUN npm ci
|
||||
|
||||
# Build application
|
||||
COPY . .
|
||||
RUN npm run build
|
||||
|
||||
# Production stage
|
||||
FROM node:22-alpine as production
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Create non-root user
|
||||
RUN addgroup -S appuser && adduser -S appuser -G appuser
|
||||
|
||||
# Copy only production dependencies and built files
|
||||
COPY --from=builder /app/package*.json ./
|
||||
RUN npm ci --only=production && npm cache clean --force
|
||||
|
||||
COPY --from=builder /app/dist ./dist
|
||||
|
||||
USER appuser
|
||||
|
||||
EXPOSE 3000
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
||||
CMD wget --no-verbose --tries=1 --spider http://localhost:3000/health || exit 1
|
||||
|
||||
CMD ["node", "dist/main.js"]
|
||||
```
|
||||
|
||||
### Multi-Stage Build (Rust)
|
||||
```dockerfile
|
||||
# Build stage
|
||||
FROM rust:1.75-slim as builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Create a dummy project for dependency caching
|
||||
RUN cargo new --bin app
|
||||
WORKDIR /app/app
|
||||
|
||||
# Copy manifests and build dependencies
|
||||
COPY Cargo.toml Cargo.lock ./
|
||||
RUN cargo build --release && rm -rf src
|
||||
|
||||
# Copy source and build
|
||||
COPY src ./src
|
||||
RUN touch src/main.rs && cargo build --release
|
||||
|
||||
# Production stage
|
||||
FROM debian:bookworm-slim as production
|
||||
|
||||
# Install runtime dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Create non-root user
|
||||
RUN groupadd -r appuser && useradd -r -g appuser appuser
|
||||
|
||||
COPY --from=builder /app/app/target/release/app /usr/local/bin/app
|
||||
|
||||
USER appuser
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
||||
CMD ["/usr/local/bin/app", "health"]
|
||||
|
||||
CMD ["/usr/local/bin/app"]
|
||||
```
|
||||
|
||||
## Docker Compose
|
||||
|
||||
### Development Setup
|
||||
```yaml
|
||||
# docker-compose.yml
|
||||
services:
|
||||
app:
|
||||
build:
|
||||
context: .
|
||||
target: development
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- .:/app
|
||||
- /app/.venv # Exclude venv from mount
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://user:pass@db:5432/myapp
|
||||
- REDIS_URL=redis://redis:6379/0
|
||||
- DEBUG=true
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_started
|
||||
|
||||
db:
|
||||
image: postgres:16-alpine
|
||||
environment:
|
||||
POSTGRES_USER: user
|
||||
POSTGRES_PASSWORD: pass
|
||||
POSTGRES_DB: myapp
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U user -d myapp"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
redis_data:
|
||||
```
|
||||
|
||||
### Production Setup
|
||||
```yaml
|
||||
# docker-compose.prod.yml
|
||||
services:
|
||||
app:
|
||||
image: ${REGISTRY}/myapp:${VERSION}
|
||||
deploy:
|
||||
replicas: 3
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
reservations:
|
||||
cpus: '0.25'
|
||||
memory: 256M
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 5s
|
||||
max_attempts: 3
|
||||
environment:
|
||||
- DATABASE_URL_FILE=/run/secrets/db_url
|
||||
secrets:
|
||||
- db_url
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
|
||||
secrets:
|
||||
db_url:
|
||||
external: true
|
||||
```
|
||||
|
||||
## Kubernetes Manifests
|
||||
|
||||
### Deployment
|
||||
```yaml
|
||||
# k8s/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: myapp
|
||||
labels:
|
||||
app: myapp
|
||||
spec:
|
||||
replicas: 3
|
||||
selector:
|
||||
matchLabels:
|
||||
app: myapp
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: myapp
|
||||
spec:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
fsGroup: 1000
|
||||
containers:
|
||||
- name: myapp
|
||||
image: myregistry/myapp:v1.0.0
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
resources:
|
||||
requests:
|
||||
cpu: "100m"
|
||||
memory: "128Mi"
|
||||
limits:
|
||||
cpu: "500m"
|
||||
memory: "512Mi"
|
||||
env:
|
||||
- name: DATABASE_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: myapp-secrets
|
||||
key: database-url
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8000
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /ready
|
||||
port: 8000
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
```
|
||||
|
||||
### Service
|
||||
```yaml
|
||||
# k8s/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: myapp
|
||||
spec:
|
||||
selector:
|
||||
app: myapp
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 8000
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: myapp
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
spec:
|
||||
tls:
|
||||
- hosts:
|
||||
- myapp.example.com
|
||||
secretName: myapp-tls
|
||||
rules:
|
||||
- host: myapp.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: myapp
|
||||
port:
|
||||
number: 80
|
||||
```
|
||||
|
||||
### ConfigMap and Secrets
|
||||
```yaml
|
||||
# k8s/config.yaml
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: myapp-config
|
||||
data:
|
||||
LOG_LEVEL: "info"
|
||||
CACHE_TTL: "3600"
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: myapp-secrets
|
||||
type: Opaque
|
||||
stringData:
|
||||
database-url: postgresql://user:pass@db:5432/myapp # Use sealed-secrets in production
|
||||
```
|
||||
|
||||
### Horizontal Pod Autoscaler
|
||||
```yaml
|
||||
# k8s/hpa.yaml
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: myapp
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: myapp
|
||||
minReplicas: 2
|
||||
maxReplicas: 10
|
||||
metrics:
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 70
|
||||
- type: Resource
|
||||
resource:
|
||||
name: memory
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 80
|
||||
```
|
||||
|
||||
## Helm Chart Structure
|
||||
|
||||
```
|
||||
myapp-chart/
|
||||
├── Chart.yaml
|
||||
├── values.yaml
|
||||
├── values-prod.yaml
|
||||
├── templates/
|
||||
│ ├── _helpers.tpl
|
||||
│ ├── deployment.yaml
|
||||
│ ├── service.yaml
|
||||
│ ├── ingress.yaml
|
||||
│ ├── configmap.yaml
|
||||
│ ├── secret.yaml
|
||||
│ └── hpa.yaml
|
||||
└── charts/
|
||||
```
|
||||
|
||||
### values.yaml
|
||||
```yaml
|
||||
replicaCount: 2
|
||||
|
||||
image:
|
||||
repository: myregistry/myapp
|
||||
tag: latest
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 80
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
host: myapp.example.com
|
||||
tls: true
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
|
||||
autoscaling:
|
||||
enabled: true
|
||||
minReplicas: 2
|
||||
maxReplicas: 10
|
||||
targetCPUUtilization: 70
|
||||
```
|
||||
|
||||
## Commands
|
||||
|
||||
```bash
|
||||
# Docker
|
||||
docker build -t myapp:latest .
|
||||
docker build --target development -t myapp:dev .
|
||||
docker run -p 8000:8000 myapp:latest
|
||||
docker compose up -d
|
||||
docker compose logs -f app
|
||||
docker compose down -v
|
||||
|
||||
# Kubernetes
|
||||
kubectl apply -f k8s/
|
||||
kubectl get pods -l app=myapp
|
||||
kubectl logs -f deployment/myapp
|
||||
kubectl rollout status deployment/myapp
|
||||
kubectl rollout undo deployment/myapp
|
||||
kubectl port-forward svc/myapp 8000:80
|
||||
|
||||
# Helm
|
||||
helm install myapp ./myapp-chart
|
||||
helm upgrade myapp ./myapp-chart -f values-prod.yaml
|
||||
helm rollback myapp 1
|
||||
helm uninstall myapp
|
||||
```
|
||||
|
||||
## Security Checklist
|
||||
|
||||
- [ ] Run as non-root user
|
||||
- [ ] Use multi-stage builds (minimal final image)
|
||||
- [ ] Pin base image versions
|
||||
- [ ] Scan images for vulnerabilities (Trivy, Snyk)
|
||||
- [ ] No secrets in images or environment variables
|
||||
- [ ] Read-only root filesystem where possible
|
||||
- [ ] Drop all capabilities, add only needed ones
|
||||
- [ ] Set resource limits
|
||||
- [ ] Use network policies
|
||||
- [ ] Enable Pod Security Standards
|
||||
464
.claude/skills/infrastructure/gcp/SKILL.md
Normal file
464
.claude/skills/infrastructure/gcp/SKILL.md
Normal file
@@ -0,0 +1,464 @@
|
||||
---
|
||||
name: gcp-services
|
||||
description: Google Cloud Platform service patterns, IAM best practices, and common architectures. Use when designing or implementing GCP infrastructure.
|
||||
---
|
||||
|
||||
# GCP Services Skill
|
||||
|
||||
## Common Architecture Patterns
|
||||
|
||||
### Web Application (Cloud Run + Cloud SQL)
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ VPC │
|
||||
│ ┌─────────────────────────────────────────────────────────┐│
|
||||
│ │ Public Subnet ││
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ││
|
||||
│ │ │ Cloud │ │ Cloud │ ││
|
||||
│ │ │ Load │ │ NAT │ ││
|
||||
│ │ │ Balancing │ │ │ ││
|
||||
│ │ └──────┬──────┘ └──────┬──────┘ ││
|
||||
│ └─────────┼───────────────────────────────────┼───────────┘│
|
||||
│ │ │ │
|
||||
│ ┌─────────┼───────────────────────────────────┼───────────┐│
|
||||
│ │ │ Private Subnet │ ││
|
||||
│ │ ┌──────▼──────┐ ┌───────▼─────┐ ││
|
||||
│ │ │ Cloud Run │ │ Cloud SQL │ ││
|
||||
│ │ │ (Service) │───────────────────▶│ PostgreSQL │ ││
|
||||
│ │ └─────────────┘ └─────────────┘ ││
|
||||
│ └─────────────────────────────────────────────────────────┘│
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Serverless (Cloud Functions + API Gateway)
|
||||
```
|
||||
┌────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ Cloud │────▶│ API │────▶│ Cloud │
|
||||
│ CDN │ │ Gateway │ │ Functions │
|
||||
└────────────┘ └─────────────┘ └──────┬──────┘
|
||||
│
|
||||
┌──────────────────────────┼──────────────┐
|
||||
│ │ │
|
||||
┌──────▼─────┐ ┌─────────┐ ┌────▼────┐
|
||||
│ Firestore │ │ Cloud │ │ Secret │
|
||||
└────────────┘ │ Storage │ │ Manager │
|
||||
└─────────┘ └─────────┘
|
||||
```
|
||||
|
||||
## IAM Best Practices
|
||||
|
||||
### Service Account with Least Privilege
|
||||
```yaml
|
||||
# Service account for Cloud Run service
|
||||
resource "google_service_account" "app_sa" {
|
||||
account_id = "my-app-service"
|
||||
display_name = "My App Service Account"
|
||||
}
|
||||
|
||||
# Grant specific permissions
|
||||
resource "google_project_iam_member" "app_storage" {
|
||||
project = var.project_id
|
||||
role = "roles/storage.objectViewer"
|
||||
member = "serviceAccount:${google_service_account.app_sa.email}"
|
||||
}
|
||||
|
||||
resource "google_project_iam_member" "app_secrets" {
|
||||
project = var.project_id
|
||||
role = "roles/secretmanager.secretAccessor"
|
||||
member = "serviceAccount:${google_service_account.app_sa.email}"
|
||||
}
|
||||
```
|
||||
|
||||
### Workload Identity Federation
|
||||
```python
|
||||
# Python - google-auth with workload identity
|
||||
from google.auth import default
|
||||
from google.cloud import storage
|
||||
|
||||
# Automatically uses workload identity when on GKE/Cloud Run
|
||||
credentials, project = default()
|
||||
|
||||
# Access Cloud Storage
|
||||
storage_client = storage.Client(credentials=credentials, project=project)
|
||||
bucket = storage_client.bucket("my-bucket")
|
||||
```
|
||||
|
||||
## Secret Manager Patterns
|
||||
|
||||
### Accessing Secrets
|
||||
```python
|
||||
from google.cloud import secretmanager
|
||||
|
||||
def get_secret(project_id: str, secret_id: str, version: str = "latest") -> str:
|
||||
"""Access a secret from Secret Manager."""
|
||||
client = secretmanager.SecretManagerServiceClient()
|
||||
name = f"projects/{project_id}/secrets/{secret_id}/versions/{version}"
|
||||
response = client.access_secret_version(request={"name": name})
|
||||
return response.payload.data.decode("UTF-8")
|
||||
|
||||
# Usage
|
||||
db_password = get_secret("my-project", "database-password")
|
||||
```
|
||||
|
||||
```typescript
|
||||
// TypeScript - @google-cloud/secret-manager
|
||||
import { SecretManagerServiceClient } from "@google-cloud/secret-manager";
|
||||
|
||||
async function getSecret(
|
||||
projectId: string,
|
||||
secretId: string,
|
||||
version: string = "latest"
|
||||
): Promise<string> {
|
||||
const client = new SecretManagerServiceClient();
|
||||
const name = `projects/${projectId}/secrets/${secretId}/versions/${version}`;
|
||||
|
||||
const [response] = await client.accessSecretVersion({ name });
|
||||
return response.payload?.data?.toString() || "";
|
||||
}
|
||||
```
|
||||
|
||||
### Cloud Run with Secret References
|
||||
```yaml
|
||||
# Cloud Run service with secret environment variables
|
||||
apiVersion: serving.knative.dev/v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: my-service
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- image: gcr.io/my-project/my-app
|
||||
env:
|
||||
- name: DATABASE_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: database-password
|
||||
key: latest
|
||||
```
|
||||
|
||||
## Cloud Storage Patterns
|
||||
|
||||
### Signed URLs
|
||||
```python
|
||||
from datetime import timedelta
|
||||
from google.cloud import storage
|
||||
|
||||
def generate_signed_url(
|
||||
bucket_name: str,
|
||||
blob_name: str,
|
||||
expiration_minutes: int = 60
|
||||
) -> str:
|
||||
"""Generate a signed URL for downloading a blob."""
|
||||
storage_client = storage.Client()
|
||||
bucket = storage_client.bucket(bucket_name)
|
||||
blob = bucket.blob(blob_name)
|
||||
|
||||
url = blob.generate_signed_url(
|
||||
version="v4",
|
||||
expiration=timedelta(minutes=expiration_minutes),
|
||||
method="GET",
|
||||
)
|
||||
|
||||
return url
|
||||
```
|
||||
|
||||
### Upload with Resumable Upload
|
||||
```python
|
||||
from google.cloud import storage
|
||||
|
||||
def upload_large_file(
|
||||
bucket_name: str,
|
||||
source_file: str,
|
||||
destination_blob: str
|
||||
) -> str:
|
||||
"""Upload a large file using resumable upload."""
|
||||
storage_client = storage.Client()
|
||||
bucket = storage_client.bucket(bucket_name)
|
||||
blob = bucket.blob(destination_blob)
|
||||
|
||||
# For files > 5MB, uses resumable upload automatically
|
||||
blob.upload_from_filename(source_file)
|
||||
|
||||
return f"gs://{bucket_name}/{destination_blob}"
|
||||
```
|
||||
|
||||
## Firestore Patterns
|
||||
|
||||
### Async Operations
|
||||
```python
|
||||
from google.cloud import firestore
|
||||
from google.cloud.firestore_v1.async_client import AsyncClient
|
||||
|
||||
async def get_firestore_client() -> AsyncClient:
|
||||
"""Create async Firestore client."""
|
||||
return AsyncClient()
|
||||
|
||||
async def get_user(user_id: str) -> dict | None:
|
||||
"""Get a user document from Firestore."""
|
||||
db = await get_firestore_client()
|
||||
doc_ref = db.collection("users").document(user_id)
|
||||
doc = await doc_ref.get()
|
||||
|
||||
if doc.exists:
|
||||
return doc.to_dict()
|
||||
return None
|
||||
|
||||
async def query_users_by_status(status: str) -> list[dict]:
|
||||
"""Query users by status."""
|
||||
db = await get_firestore_client()
|
||||
query = db.collection("users").where("status", "==", status)
|
||||
|
||||
docs = await query.get()
|
||||
return [doc.to_dict() for doc in docs]
|
||||
```
|
||||
|
||||
### Transaction Pattern
|
||||
```python
|
||||
from google.cloud import firestore
|
||||
|
||||
def transfer_credits(
|
||||
from_user_id: str,
|
||||
to_user_id: str,
|
||||
amount: int
|
||||
) -> bool:
|
||||
"""Transfer credits between users atomically."""
|
||||
db = firestore.Client()
|
||||
|
||||
@firestore.transactional
|
||||
def update_in_transaction(transaction):
|
||||
from_ref = db.collection("users").document(from_user_id)
|
||||
to_ref = db.collection("users").document(to_user_id)
|
||||
|
||||
from_snapshot = from_ref.get(transaction=transaction)
|
||||
to_snapshot = to_ref.get(transaction=transaction)
|
||||
|
||||
from_credits = from_snapshot.get("credits")
|
||||
|
||||
if from_credits < amount:
|
||||
raise ValueError("Insufficient credits")
|
||||
|
||||
transaction.update(from_ref, {"credits": from_credits - amount})
|
||||
transaction.update(to_ref, {"credits": to_snapshot.get("credits") + amount})
|
||||
|
||||
return True
|
||||
|
||||
transaction = db.transaction()
|
||||
return update_in_transaction(transaction)
|
||||
```
|
||||
|
||||
## Cloud Functions Patterns
|
||||
|
||||
### HTTP Function with Validation
|
||||
```python
|
||||
import functions_framework
|
||||
from flask import jsonify, Request
|
||||
from pydantic import BaseModel, ValidationError
|
||||
|
||||
class CreateOrderRequest(BaseModel):
|
||||
customer_id: str
|
||||
items: list[dict]
|
||||
total: float
|
||||
|
||||
@functions_framework.http
|
||||
def create_order(request: Request):
|
||||
"""HTTP Cloud Function for creating orders."""
|
||||
try:
|
||||
body = request.get_json(silent=True)
|
||||
|
||||
if not body:
|
||||
return jsonify({"error": "Request body required"}), 400
|
||||
|
||||
order_request = CreateOrderRequest(**body)
|
||||
|
||||
# Process order...
|
||||
result = process_order(order_request)
|
||||
|
||||
return jsonify(result.dict()), 201
|
||||
|
||||
except ValidationError as e:
|
||||
return jsonify({"error": e.errors()}), 400
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
return jsonify({"error": "Internal server error"}), 500
|
||||
```
|
||||
|
||||
### Pub/Sub Triggered Function
|
||||
```python
|
||||
import base64
|
||||
import functions_framework
|
||||
from cloudevents.http import CloudEvent
|
||||
|
||||
@functions_framework.cloud_event
|
||||
def process_message(cloud_event: CloudEvent):
|
||||
"""Process Pub/Sub message."""
|
||||
# Decode the Pub/Sub message
|
||||
data = base64.b64decode(cloud_event.data["message"]["data"]).decode()
|
||||
|
||||
print(f"Received message: {data}")
|
||||
|
||||
# Process the message
|
||||
process_event(data)
|
||||
```
|
||||
|
||||
## Cloud Run Patterns
|
||||
|
||||
### Service with Health Checks
|
||||
```python
|
||||
from fastapi import FastAPI
|
||||
from pydantic import BaseModel
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
status: str
|
||||
version: str
|
||||
|
||||
@app.get("/health", response_model=HealthResponse)
|
||||
async def health_check():
|
||||
"""Health check endpoint for Cloud Run."""
|
||||
return HealthResponse(status="healthy", version="1.0.0")
|
||||
|
||||
@app.get("/ready")
|
||||
async def readiness_check():
|
||||
"""Readiness check - verify dependencies."""
|
||||
# Check database connection, etc.
|
||||
await check_database_connection()
|
||||
return {"status": "ready"}
|
||||
```
|
||||
|
||||
### Cloud Run Job
|
||||
```python
|
||||
# main.py for Cloud Run Job
|
||||
import os
|
||||
from google.cloud import bigquery
|
||||
|
||||
def main():
|
||||
"""Main entry point for Cloud Run Job."""
|
||||
task_index = int(os.environ.get("CLOUD_RUN_TASK_INDEX", 0))
|
||||
task_count = int(os.environ.get("CLOUD_RUN_TASK_COUNT", 1))
|
||||
|
||||
print(f"Processing task {task_index} of {task_count}")
|
||||
|
||||
# Process batch based on task index
|
||||
process_batch(task_index, task_count)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
```
|
||||
|
||||
## Cloud Logging
|
||||
|
||||
### Structured Logging
|
||||
```python
|
||||
import json
|
||||
import logging
|
||||
from google.cloud import logging as cloud_logging
|
||||
|
||||
# Setup Cloud Logging
|
||||
client = cloud_logging.Client()
|
||||
client.setup_logging()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def log_with_trace(message: str, trace_id: str, **kwargs):
|
||||
"""Log with trace ID for request correlation."""
|
||||
log_entry = {
|
||||
"message": message,
|
||||
"logging.googleapis.com/trace": f"projects/{project_id}/traces/{trace_id}",
|
||||
**kwargs
|
||||
}
|
||||
logger.info(json.dumps(log_entry))
|
||||
|
||||
# Usage
|
||||
log_with_trace(
|
||||
"Order processed",
|
||||
trace_id="abc123",
|
||||
order_id="order-456",
|
||||
customer_id="cust-789"
|
||||
)
|
||||
```
|
||||
|
||||
### Custom Metrics
|
||||
```python
|
||||
from google.cloud import monitoring_v3
|
||||
|
||||
def write_metric(project_id: str, metric_type: str, value: float):
|
||||
"""Write a custom metric to Cloud Monitoring."""
|
||||
client = monitoring_v3.MetricServiceClient()
|
||||
project_name = f"projects/{project_id}"
|
||||
|
||||
series = monitoring_v3.TimeSeries()
|
||||
series.metric.type = f"custom.googleapis.com/{metric_type}"
|
||||
series.resource.type = "global"
|
||||
|
||||
now = time.time()
|
||||
seconds = int(now)
|
||||
nanos = int((now - seconds) * 10**9)
|
||||
|
||||
interval = monitoring_v3.TimeInterval(
|
||||
{"end_time": {"seconds": seconds, "nanos": nanos}}
|
||||
)
|
||||
point = monitoring_v3.Point(
|
||||
{"interval": interval, "value": {"double_value": value}}
|
||||
)
|
||||
series.points = [point]
|
||||
|
||||
client.create_time_series(name=project_name, time_series=[series])
|
||||
|
||||
# Usage
|
||||
write_metric("my-project", "orders/processed", 1.0)
|
||||
```
|
||||
|
||||
## CLI Commands
|
||||
|
||||
```bash
|
||||
# Authentication
|
||||
gcloud auth login
|
||||
gcloud config set project my-project
|
||||
gcloud config list
|
||||
|
||||
# Compute
|
||||
gcloud compute instances list
|
||||
gcloud compute ssh my-instance
|
||||
|
||||
# Cloud Run
|
||||
gcloud run services list
|
||||
gcloud run deploy my-service --image gcr.io/my-project/my-app
|
||||
gcloud run services describe my-service
|
||||
|
||||
# Cloud Functions
|
||||
gcloud functions list
|
||||
gcloud functions deploy my-function --runtime python311 --trigger-http
|
||||
gcloud functions logs read my-function
|
||||
|
||||
# Secret Manager
|
||||
gcloud secrets list
|
||||
gcloud secrets create my-secret --data-file=secret.txt
|
||||
gcloud secrets versions access latest --secret=my-secret
|
||||
|
||||
# Cloud Storage
|
||||
gsutil ls gs://my-bucket/
|
||||
gsutil cp local-file.txt gs://my-bucket/
|
||||
gsutil signurl -d 1h key.json gs://my-bucket/file.txt
|
||||
|
||||
# Firestore
|
||||
gcloud firestore databases list
|
||||
gcloud firestore export gs://my-bucket/firestore-backup
|
||||
|
||||
# Logging
|
||||
gcloud logging read "resource.type=cloud_run_revision" --limit=10
|
||||
```
|
||||
|
||||
## Security Checklist
|
||||
|
||||
- [ ] Use Service Accounts with least privilege
|
||||
- [ ] Enable VPC Service Controls for sensitive data
|
||||
- [ ] Use Secret Manager for all secrets
|
||||
- [ ] Enable Cloud Audit Logs
|
||||
- [ ] Configure Identity-Aware Proxy for internal apps
|
||||
- [ ] Use Private Google Access for GCE instances
|
||||
- [ ] Enable Binary Authorization for GKE
|
||||
- [ ] Configure Cloud Armor for DDoS protection
|
||||
- [ ] Use Customer-Managed Encryption Keys (CMEK) where required
|
||||
- [ ] Enable Security Command Center
|
||||
556
.claude/skills/infrastructure/terraform/SKILL.md
Normal file
556
.claude/skills/infrastructure/terraform/SKILL.md
Normal file
@@ -0,0 +1,556 @@
|
||||
---
|
||||
name: terraform-aws
|
||||
description: Terraform Infrastructure as Code for AWS with module patterns, state management, and best practices. Use when writing Terraform configurations or planning AWS infrastructure.
|
||||
---
|
||||
|
||||
# Terraform AWS Skill
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
infrastructure/
|
||||
├── environments/
|
||||
│ ├── dev/
|
||||
│ │ ├── main.tf
|
||||
│ │ ├── variables.tf
|
||||
│ │ ├── outputs.tf
|
||||
│ │ ├── terraform.tfvars
|
||||
│ │ └── backend.tf
|
||||
│ ├── staging/
|
||||
│ └── prod/
|
||||
├── modules/
|
||||
│ ├── vpc/
|
||||
│ │ ├── main.tf
|
||||
│ │ ├── variables.tf
|
||||
│ │ ├── outputs.tf
|
||||
│ │ └── README.md
|
||||
│ ├── ecs-service/
|
||||
│ ├── rds/
|
||||
│ ├── lambda/
|
||||
│ └── s3-bucket/
|
||||
└── shared/
|
||||
└── providers.tf
|
||||
```
|
||||
|
||||
## Backend Configuration
|
||||
|
||||
### S3 Backend (recommended for AWS)
|
||||
```hcl
|
||||
# backend.tf
|
||||
terraform {
|
||||
backend "s3" {
|
||||
bucket = "mycompany-terraform-state"
|
||||
key = "environments/dev/terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
encrypt = true
|
||||
dynamodb_table = "terraform-state-lock"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### State Lock Table
|
||||
```hcl
|
||||
# One-time setup for state locking
|
||||
resource "aws_dynamodb_table" "terraform_lock" {
|
||||
name = "terraform-state-lock"
|
||||
billing_mode = "PAY_PER_REQUEST"
|
||||
hash_key = "LockID"
|
||||
|
||||
attribute {
|
||||
name = "LockID"
|
||||
type = "S"
|
||||
}
|
||||
|
||||
tags = {
|
||||
Name = "Terraform State Lock"
|
||||
Environment = "shared"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Provider Configuration
|
||||
|
||||
```hcl
|
||||
# providers.tf
|
||||
terraform {
|
||||
required_version = ">= 1.6.0"
|
||||
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 5.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
provider "aws" {
|
||||
region = var.aws_region
|
||||
|
||||
default_tags {
|
||||
tags = {
|
||||
Environment = var.environment
|
||||
Project = var.project_name
|
||||
ManagedBy = "terraform"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Module Patterns
|
||||
|
||||
### VPC Module
|
||||
```hcl
|
||||
# modules/vpc/variables.tf
|
||||
variable "name" {
|
||||
description = "Name prefix for VPC resources"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "cidr_block" {
|
||||
description = "CIDR block for the VPC"
|
||||
type = string
|
||||
default = "10.0.0.0/16"
|
||||
}
|
||||
|
||||
variable "availability_zones" {
|
||||
description = "List of availability zones"
|
||||
type = list(string)
|
||||
}
|
||||
|
||||
variable "enable_nat_gateway" {
|
||||
description = "Enable NAT Gateway for private subnets"
|
||||
type = bool
|
||||
default = true
|
||||
}
|
||||
|
||||
variable "single_nat_gateway" {
|
||||
description = "Use single NAT Gateway (cost saving for non-prod)"
|
||||
type = bool
|
||||
default = false
|
||||
}
|
||||
|
||||
# modules/vpc/main.tf
|
||||
locals {
|
||||
az_count = length(var.availability_zones)
|
||||
|
||||
public_subnets = [
|
||||
for i, az in var.availability_zones :
|
||||
cidrsubnet(var.cidr_block, 8, i)
|
||||
]
|
||||
|
||||
private_subnets = [
|
||||
for i, az in var.availability_zones :
|
||||
cidrsubnet(var.cidr_block, 8, i + local.az_count)
|
||||
]
|
||||
}
|
||||
|
||||
resource "aws_vpc" "main" {
|
||||
cidr_block = var.cidr_block
|
||||
enable_dns_hostnames = true
|
||||
enable_dns_support = true
|
||||
|
||||
tags = {
|
||||
Name = var.name
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_internet_gateway" "main" {
|
||||
vpc_id = aws_vpc.main.id
|
||||
|
||||
tags = {
|
||||
Name = "${var.name}-igw"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_subnet" "public" {
|
||||
count = local.az_count
|
||||
vpc_id = aws_vpc.main.id
|
||||
cidr_block = local.public_subnets[count.index]
|
||||
availability_zone = var.availability_zones[count.index]
|
||||
map_public_ip_on_launch = true
|
||||
|
||||
tags = {
|
||||
Name = "${var.name}-public-${var.availability_zones[count.index]}"
|
||||
Type = "public"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_subnet" "private" {
|
||||
count = local.az_count
|
||||
vpc_id = aws_vpc.main.id
|
||||
cidr_block = local.private_subnets[count.index]
|
||||
availability_zone = var.availability_zones[count.index]
|
||||
|
||||
tags = {
|
||||
Name = "${var.name}-private-${var.availability_zones[count.index]}"
|
||||
Type = "private"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_eip" "nat" {
|
||||
count = var.enable_nat_gateway ? (var.single_nat_gateway ? 1 : local.az_count) : 0
|
||||
domain = "vpc"
|
||||
|
||||
tags = {
|
||||
Name = "${var.name}-nat-eip-${count.index}"
|
||||
}
|
||||
|
||||
depends_on = [aws_internet_gateway.main]
|
||||
}
|
||||
|
||||
resource "aws_nat_gateway" "main" {
|
||||
count = var.enable_nat_gateway ? (var.single_nat_gateway ? 1 : local.az_count) : 0
|
||||
allocation_id = aws_eip.nat[count.index].id
|
||||
subnet_id = aws_subnet.public[count.index].id
|
||||
|
||||
tags = {
|
||||
Name = "${var.name}-nat-${count.index}"
|
||||
}
|
||||
}
|
||||
|
||||
# modules/vpc/outputs.tf
|
||||
output "vpc_id" {
|
||||
description = "ID of the VPC"
|
||||
value = aws_vpc.main.id
|
||||
}
|
||||
|
||||
output "public_subnet_ids" {
|
||||
description = "IDs of public subnets"
|
||||
value = aws_subnet.public[*].id
|
||||
}
|
||||
|
||||
output "private_subnet_ids" {
|
||||
description = "IDs of private subnets"
|
||||
value = aws_subnet.private[*].id
|
||||
}
|
||||
```
|
||||
|
||||
### Module Usage
|
||||
```hcl
|
||||
# environments/dev/main.tf
|
||||
module "vpc" {
|
||||
source = "../../modules/vpc"
|
||||
|
||||
name = "${var.project_name}-${var.environment}"
|
||||
cidr_block = "10.0.0.0/16"
|
||||
availability_zones = ["eu-west-2a", "eu-west-2b", "eu-west-2c"]
|
||||
enable_nat_gateway = true
|
||||
single_nat_gateway = true # Cost saving for dev
|
||||
}
|
||||
|
||||
module "ecs_cluster" {
|
||||
source = "../../modules/ecs-cluster"
|
||||
|
||||
name = "${var.project_name}-${var.environment}"
|
||||
vpc_id = module.vpc.vpc_id
|
||||
private_subnet_ids = module.vpc.private_subnet_ids
|
||||
|
||||
depends_on = [module.vpc]
|
||||
}
|
||||
```
|
||||
|
||||
## Security Best Practices
|
||||
|
||||
### Security Groups
|
||||
```hcl
|
||||
resource "aws_security_group" "app" {
|
||||
name = "${var.name}-app-sg"
|
||||
description = "Security group for application servers"
|
||||
vpc_id = var.vpc_id
|
||||
|
||||
# Only allow inbound from load balancer
|
||||
ingress {
|
||||
description = "HTTP from ALB"
|
||||
from_port = var.app_port
|
||||
to_port = var.app_port
|
||||
protocol = "tcp"
|
||||
security_groups = [aws_security_group.alb.id]
|
||||
}
|
||||
|
||||
# Allow all outbound
|
||||
egress {
|
||||
description = "Allow all outbound"
|
||||
from_port = 0
|
||||
to_port = 0
|
||||
protocol = "-1"
|
||||
cidr_blocks = ["0.0.0.0/0"]
|
||||
}
|
||||
|
||||
tags = {
|
||||
Name = "${var.name}-app-sg"
|
||||
}
|
||||
}
|
||||
|
||||
# NEVER do this - open to world
|
||||
# ingress {
|
||||
# from_port = 22
|
||||
# to_port = 22
|
||||
# protocol = "tcp"
|
||||
# cidr_blocks = ["0.0.0.0/0"] # BAD!
|
||||
# }
|
||||
```
|
||||
|
||||
### IAM Roles with Least Privilege
|
||||
```hcl
|
||||
data "aws_iam_policy_document" "ecs_task_assume_role" {
|
||||
statement {
|
||||
actions = ["sts:AssumeRole"]
|
||||
|
||||
principals {
|
||||
type = "Service"
|
||||
identifiers = ["ecs-tasks.amazonaws.com"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_iam_role" "ecs_task_role" {
|
||||
name = "${var.name}-ecs-task-role"
|
||||
assume_role_policy = data.aws_iam_policy_document.ecs_task_assume_role.json
|
||||
}
|
||||
|
||||
# Specific permissions only
|
||||
data "aws_iam_policy_document" "ecs_task_policy" {
|
||||
statement {
|
||||
sid = "AllowS3Access"
|
||||
effect = "Allow"
|
||||
actions = [
|
||||
"s3:GetObject",
|
||||
"s3:PutObject",
|
||||
]
|
||||
resources = [
|
||||
"${aws_s3_bucket.app_data.arn}/*"
|
||||
]
|
||||
}
|
||||
|
||||
statement {
|
||||
sid = "AllowSecretsAccess"
|
||||
effect = "Allow"
|
||||
actions = [
|
||||
"secretsmanager:GetSecretValue"
|
||||
]
|
||||
resources = [
|
||||
aws_secretsmanager_secret.app_secrets.arn
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Secrets Management
|
||||
```hcl
|
||||
# NEVER hardcode secrets
|
||||
# BAD:
|
||||
# resource "aws_db_instance" "main" {
|
||||
# password = "mysecretpassword" # NEVER!
|
||||
# }
|
||||
|
||||
# GOOD: Use AWS Secrets Manager
|
||||
resource "aws_secretsmanager_secret" "db_password" {
|
||||
name = "${var.name}/db-password"
|
||||
recovery_window_in_days = 7
|
||||
}
|
||||
|
||||
resource "aws_secretsmanager_secret_version" "db_password" {
|
||||
secret_id = aws_secretsmanager_secret.db_password.id
|
||||
secret_string = jsonencode({
|
||||
password = random_password.db.result
|
||||
})
|
||||
}
|
||||
|
||||
resource "random_password" "db" {
|
||||
length = 32
|
||||
special = true
|
||||
override_special = "!#$%&*()-_=+[]{}<>:?"
|
||||
}
|
||||
|
||||
# Reference in RDS
|
||||
resource "aws_db_instance" "main" {
|
||||
# ...
|
||||
password = random_password.db.result
|
||||
|
||||
lifecycle {
|
||||
ignore_changes = [password]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Variables and Validation
|
||||
|
||||
```hcl
|
||||
variable "environment" {
|
||||
description = "Environment name"
|
||||
type = string
|
||||
|
||||
validation {
|
||||
condition = contains(["dev", "staging", "prod"], var.environment)
|
||||
error_message = "Environment must be dev, staging, or prod."
|
||||
}
|
||||
}
|
||||
|
||||
variable "instance_type" {
|
||||
description = "EC2 instance type"
|
||||
type = string
|
||||
default = "t3.micro"
|
||||
|
||||
validation {
|
||||
condition = can(regex("^t[23]\\.", var.instance_type))
|
||||
error_message = "Only t2 and t3 instance types are allowed."
|
||||
}
|
||||
}
|
||||
|
||||
variable "tags" {
|
||||
description = "Additional tags for resources"
|
||||
type = map(string)
|
||||
default = {}
|
||||
}
|
||||
```
|
||||
|
||||
## Data Sources
|
||||
|
||||
```hcl
|
||||
# Get latest Amazon Linux 2 AMI
|
||||
data "aws_ami" "amazon_linux_2" {
|
||||
most_recent = true
|
||||
owners = ["amazon"]
|
||||
|
||||
filter {
|
||||
name = "name"
|
||||
values = ["amzn2-ami-hvm-*-x86_64-gp2"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "virtualization-type"
|
||||
values = ["hvm"]
|
||||
}
|
||||
}
|
||||
|
||||
# Get current AWS account ID
|
||||
data "aws_caller_identity" "current" {}
|
||||
|
||||
# Get current region
|
||||
data "aws_region" "current" {}
|
||||
|
||||
# Reference in resources
|
||||
resource "aws_instance" "example" {
|
||||
ami = data.aws_ami.amazon_linux_2.id
|
||||
instance_type = var.instance_type
|
||||
|
||||
tags = {
|
||||
Name = "example-${data.aws_region.current.name}"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Outputs
|
||||
|
||||
```hcl
|
||||
output "vpc_id" {
|
||||
description = "ID of the VPC"
|
||||
value = module.vpc.vpc_id
|
||||
}
|
||||
|
||||
output "alb_dns_name" {
|
||||
description = "DNS name of the Application Load Balancer"
|
||||
value = aws_lb.main.dns_name
|
||||
}
|
||||
|
||||
output "db_endpoint" {
|
||||
description = "Endpoint of the RDS instance"
|
||||
value = aws_db_instance.main.endpoint
|
||||
sensitive = false
|
||||
}
|
||||
|
||||
output "db_password_secret_arn" {
|
||||
description = "ARN of the database password secret"
|
||||
value = aws_secretsmanager_secret.db_password.arn
|
||||
sensitive = true # Hide in logs
|
||||
}
|
||||
```
|
||||
|
||||
## Lifecycle Rules
|
||||
|
||||
```hcl
|
||||
resource "aws_instance" "example" {
|
||||
ami = data.aws_ami.amazon_linux_2.id
|
||||
instance_type = var.instance_type
|
||||
|
||||
lifecycle {
|
||||
# Prevent accidental destruction
|
||||
prevent_destroy = true
|
||||
|
||||
# Create new before destroying old
|
||||
create_before_destroy = true
|
||||
|
||||
# Ignore changes to tags made outside Terraform
|
||||
ignore_changes = [
|
||||
tags["LastModified"],
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Commands
|
||||
|
||||
```bash
|
||||
# Initialize
|
||||
terraform init
|
||||
terraform init -upgrade # Upgrade providers
|
||||
|
||||
# Planning
|
||||
terraform plan # Preview changes
|
||||
terraform plan -out=tfplan # Save plan
|
||||
terraform plan -target=module.vpc # Plan specific resource
|
||||
|
||||
# Applying
|
||||
terraform apply # Apply changes
|
||||
terraform apply tfplan # Apply saved plan
|
||||
terraform apply -auto-approve # Skip confirmation (CI/CD only!)
|
||||
|
||||
# State management
|
||||
terraform state list # List resources
|
||||
terraform state show aws_vpc.main # Show specific resource
|
||||
terraform import aws_vpc.main vpc-123 # Import existing resource
|
||||
|
||||
# Workspace (for multiple environments)
|
||||
terraform workspace list
|
||||
terraform workspace new staging
|
||||
terraform workspace select dev
|
||||
|
||||
# Formatting and validation
|
||||
terraform fmt -check # Check formatting
|
||||
terraform fmt -recursive # Format all files
|
||||
terraform validate # Validate configuration
|
||||
```
|
||||
|
||||
## Anti-Patterns to Avoid
|
||||
|
||||
```hcl
|
||||
# BAD: Hardcoded values
|
||||
resource "aws_instance" "web" {
|
||||
ami = "ami-12345678" # Will break across regions
|
||||
instance_type = "t2.micro" # No flexibility
|
||||
}
|
||||
|
||||
# GOOD: Use data sources and variables
|
||||
resource "aws_instance" "web" {
|
||||
ami = data.aws_ami.amazon_linux_2.id
|
||||
instance_type = var.instance_type
|
||||
}
|
||||
|
||||
|
||||
# BAD: No state locking
|
||||
terraform {
|
||||
backend "s3" {
|
||||
bucket = "my-state"
|
||||
key = "terraform.tfstate"
|
||||
# Missing dynamodb_table!
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# BAD: Secrets in tfvars
|
||||
# terraform.tfvars
|
||||
db_password = "mysecret" # NEVER!
|
||||
|
||||
# GOOD: Use secrets manager or environment variables
|
||||
# export TF_VAR_db_password=$(aws secretsmanager get-secret-value ...)
|
||||
```
|
||||
Reference in New Issue
Block a user