feat: initial Claude Code configuration scaffold

Comprehensive Claude Code guidance system with: - 5 agents: tdd-guardian, code-reviewer, security-scanner, refactor-scan, dependency-audit - 18 skills covering languages (Python, TypeScript, Rust, Go, Java, C#), infrastructure (AWS, Azure, GCP, Terraform, Ansible, Docker/K8s, Database, CI/CD), testing (TDD, UI, Browser), and patterns (Monorepo, API Design, Observability) - 3 hooks: secret detection, auto-formatting, TDD git pre-commit - Strict TDD enforcement with 80%+ coverage requirements - Multi-model strategy: Opus for planning, Sonnet for execution (opusplan) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-20 15:47:34 -05:00
commit befb8fbaeb
34 changed files with 12233 additions and 0 deletions
--- a/.claude/skills/patterns/api-design/SKILL.md
+++ b/.claude/skills/patterns/api-design/SKILL.md
@@ -0,0 +1,462 @@
+---
+name: api-design
+description: REST API design patterns with Pydantic/Zod schemas, error handling, and OpenAPI documentation. Use when designing or implementing API endpoints.
+---
+
+# API Design Skill
+
+## Schema-First Development
+
+Always define schemas before implementation. Schemas serve as:
+- Runtime validation
+- Type definitions
+- API documentation
+- Test data factories
+
+### Python (Pydantic)
+
+```python
+# schemas/user.py
+from datetime import datetime
+from pydantic import BaseModel, EmailStr, Field, field_validator
+
+
+class UserBase(BaseModel):
+    """Shared fields for user schemas."""
+    email: EmailStr
+    name: str = Field(..., min_length=1, max_length=100)
+
+
+class UserCreate(UserBase):
+    """Request schema for creating a user."""
+    password: str = Field(..., min_length=8)
+
+    @field_validator("password")
+    @classmethod
+    def password_strength(cls, v: str) -> str:
+        if not any(c.isupper() for c in v):
+            raise ValueError("Password must contain uppercase")
+        if not any(c.isdigit() for c in v):
+            raise ValueError("Password must contain digit")
+        return v
+
+
+class UserUpdate(BaseModel):
+    """Request schema for updating a user (all optional)."""
+    email: EmailStr | None = None
+    name: str | None = Field(None, min_length=1, max_length=100)
+
+
+class UserResponse(UserBase):
+    """Response schema (no password)."""
+    id: str
+    is_active: bool
+    created_at: datetime
+
+    model_config = {"from_attributes": True}
+
+
+class UserListResponse(BaseModel):
+    """Paginated list response."""
+    items: list[UserResponse]
+    total: int
+    page: int
+    page_size: int
+    has_more: bool
+```
+
+### TypeScript (Zod)
+
+```typescript
+// schemas/user.schema.ts
+import { z } from 'zod';
+
+export const userBaseSchema = z.object({
+  email: z.string().email(),
+  name: z.string().min(1).max(100),
+});
+
+export const userCreateSchema = userBaseSchema.extend({
+  password: z
+    .string()
+    .min(8)
+    .refine((p) => /[A-Z]/.test(p), 'Must contain uppercase')
+    .refine((p) => /\d/.test(p), 'Must contain digit'),
+});
+
+export const userUpdateSchema = userBaseSchema.partial();
+
+export const userResponseSchema = userBaseSchema.extend({
+  id: z.string().uuid(),
+  isActive: z.boolean(),
+  createdAt: z.string().datetime(),
+});
+
+export const userListResponseSchema = z.object({
+  items: z.array(userResponseSchema),
+  total: z.number().int().nonnegative(),
+  page: z.number().int().positive(),
+  pageSize: z.number().int().positive(),
+  hasMore: z.boolean(),
+});
+
+// Derived types
+export type UserCreate = z.infer<typeof userCreateSchema>;
+export type UserUpdate = z.infer<typeof userUpdateSchema>;
+export type UserResponse = z.infer<typeof userResponseSchema>;
+export type UserListResponse = z.infer<typeof userListResponseSchema>;
+
+// Validation functions for API boundaries
+export const parseUserCreate = (data: unknown) => userCreateSchema.parse(data);
+export const parseUserResponse = (data: unknown) => userResponseSchema.parse(data);
+```
+
+## REST Endpoint Patterns
+
+### Resource Naming
+```
+GET    /users              # List users
+POST   /users              # Create user
+GET    /users/{id}         # Get single user
+PUT    /users/{id}         # Full update
+PATCH  /users/{id}         # Partial update
+DELETE /users/{id}         # Delete user
+
+# Nested resources
+GET    /users/{id}/orders  # User's orders
+POST   /users/{id}/orders  # Create order for user
+
+# Actions (when CRUD doesn't fit)
+POST   /users/{id}/activate
+POST   /orders/{id}/cancel
+```
+
+### FastAPI Implementation
+
+```python
+# routers/users.py
+from fastapi import APIRouter, Depends, HTTPException, Query, status
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.schemas.user import (
+    UserCreate,
+    UserUpdate,
+    UserResponse,
+    UserListResponse,
+)
+from app.services.user import UserService
+from app.dependencies import get_db, get_current_user
+
+router = APIRouter(prefix="/users", tags=["users"])
+
+
+@router.get("", response_model=UserListResponse)
+async def list_users(
+    page: int = Query(1, ge=1),
+    page_size: int = Query(20, ge=1, le=100),
+    db: AsyncSession = Depends(get_db),
+) -> UserListResponse:
+    """List users with pagination."""
+    service = UserService(db)
+    return await service.list_users(page=page, page_size=page_size)
+
+
+@router.post("", response_model=UserResponse, status_code=status.HTTP_201_CREATED)
+async def create_user(
+    data: UserCreate,
+    db: AsyncSession = Depends(get_db),
+) -> UserResponse:
+    """Create a new user."""
+    service = UserService(db)
+    try:
+        return await service.create_user(data)
+    except ValueError as e:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
+
+
+@router.get("/{user_id}", response_model=UserResponse)
+async def get_user(
+    user_id: str,
+    db: AsyncSession = Depends(get_db),
+) -> UserResponse:
+    """Get a user by ID."""
+    service = UserService(db)
+    user = await service.get_user(user_id)
+    if not user:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="User not found")
+    return user
+
+
+@router.patch("/{user_id}", response_model=UserResponse)
+async def update_user(
+    user_id: str,
+    data: UserUpdate,
+    db: AsyncSession = Depends(get_db),
+    current_user: UserResponse = Depends(get_current_user),
+) -> UserResponse:
+    """Partially update a user."""
+    service = UserService(db)
+    user = await service.update_user(user_id, data)
+    if not user:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="User not found")
+    return user
+
+
+@router.delete("/{user_id}", status_code=status.HTTP_204_NO_CONTENT)
+async def delete_user(
+    user_id: str,
+    db: AsyncSession = Depends(get_db),
+    current_user: UserResponse = Depends(get_current_user),
+) -> None:
+    """Delete a user."""
+    service = UserService(db)
+    deleted = await service.delete_user(user_id)
+    if not deleted:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="User not found")
+```
+
+## Error Handling
+
+### Standard Error Response (RFC 7807)
+
+```python
+# schemas/error.py
+from pydantic import BaseModel
+
+
+class ErrorDetail(BaseModel):
+    """Standard error response following RFC 7807."""
+    type: str = "about:blank"
+    title: str
+    status: int
+    detail: str
+    instance: str | None = None
+
+
+# Exception handler
+from fastapi import Request
+from fastapi.responses import JSONResponse
+
+async def validation_exception_handler(request: Request, exc: RequestValidationError):
+    return JSONResponse(
+        status_code=422,
+        content=ErrorDetail(
+            type="validation_error",
+            title="Validation Error",
+            status=422,
+            detail=str(exc.errors()),
+            instance=str(request.url),
+        ).model_dump(),
+    )
+```
+
+### TypeScript Error Handling
+
+```typescript
+// lib/api-client.ts
+import axios, { AxiosError } from 'axios';
+import { z } from 'zod';
+
+const errorSchema = z.object({
+  type: z.string(),
+  title: z.string(),
+  status: z.number(),
+  detail: z.string(),
+  instance: z.string().optional(),
+});
+
+export class ApiError extends Error {
+  constructor(
+    public status: number,
+    public title: string,
+    public detail: string,
+  ) {
+    super(detail);
+    this.name = 'ApiError';
+  }
+}
+
+export const apiClient = axios.create({
+  baseURL: '/api',
+  headers: { 'Content-Type': 'application/json' },
+});
+
+apiClient.interceptors.response.use(
+  (response) => response,
+  (error: AxiosError) => {
+    if (error.response?.data) {
+      const parsed = errorSchema.safeParse(error.response.data);
+      if (parsed.success) {
+        throw new ApiError(
+          parsed.data.status,
+          parsed.data.title,
+          parsed.data.detail,
+        );
+      }
+    }
+    throw new ApiError(500, 'Server Error', 'An unexpected error occurred');
+  },
+);
+```
+
+## Pagination Pattern
+
+```python
+# schemas/pagination.py
+from typing import Generic, TypeVar
+from pydantic import BaseModel, Field
+
+T = TypeVar("T")
+
+
+class PaginatedResponse(BaseModel, Generic[T]):
+    """Generic paginated response."""
+    items: list[T]
+    total: int
+    page: int = Field(ge=1)
+    page_size: int = Field(ge=1, le=100)
+
+    @property
+    def has_more(self) -> bool:
+        return self.page * self.page_size < self.total
+
+    @property
+    def total_pages(self) -> int:
+        return (self.total + self.page_size - 1) // self.page_size
+
+
+# Usage
+class UserListResponse(PaginatedResponse[UserResponse]):
+    pass
+```
+
+## Query Parameters
+
+```python
+# dependencies/pagination.py
+from fastapi import Query
+from pydantic import BaseModel
+
+
+class PaginationParams(BaseModel):
+    page: int = Query(1, ge=1, description="Page number")
+    page_size: int = Query(20, ge=1, le=100, description="Items per page")
+
+    @property
+    def offset(self) -> int:
+        return (self.page - 1) * self.page_size
+
+
+class SortParams(BaseModel):
+    sort_by: str = Query("created_at", description="Field to sort by")
+    sort_order: str = Query("desc", pattern="^(asc|desc)$")
+
+
+class FilterParams(BaseModel):
+    search: str | None = Query(None, min_length=1, max_length=100)
+    status: str | None = Query(None, pattern="^(active|inactive|pending)$")
+    created_after: datetime | None = Query(None)
+    created_before: datetime | None = Query(None)
+```
+
+## OpenAPI Documentation
+
+```python
+# main.py
+from fastapi import FastAPI
+from fastapi.openapi.utils import get_openapi
+
+app = FastAPI(
+    title="My API",
+    description="API for managing resources",
+    version="1.0.0",
+    docs_url="/docs",
+    redoc_url="/redoc",
+)
+
+def custom_openapi():
+    if app.openapi_schema:
+        return app.openapi_schema
+
+    openapi_schema = get_openapi(
+        title=app.title,
+        version=app.version,
+        description=app.description,
+        routes=app.routes,
+    )
+
+    # Add security scheme
+    openapi_schema["components"]["securitySchemes"] = {
+        "bearerAuth": {
+            "type": "http",
+            "scheme": "bearer",
+            "bearerFormat": "JWT",
+        }
+    }
+
+    app.openapi_schema = openapi_schema
+    return app.openapi_schema
+
+app.openapi = custom_openapi
+```
+
+## HTTP Status Codes
+
+| Code | Meaning | When to Use |
+|------|---------|-------------|
+| 200 | OK | Successful GET, PUT, PATCH |
+| 201 | Created | Successful POST creating resource |
+| 204 | No Content | Successful DELETE |
+| 400 | Bad Request | Invalid request body/params |
+| 401 | Unauthorized | Missing/invalid authentication |
+| 403 | Forbidden | Authenticated but not authorized |
+| 404 | Not Found | Resource doesn't exist |
+| 409 | Conflict | Duplicate resource (e.g., email exists) |
+| 422 | Unprocessable | Validation error |
+| 500 | Server Error | Unexpected server error |
+
+## Anti-Patterns
+
+```python
+# BAD: Returning different shapes
+@router.get("/users/{id}")
+async def get_user(id: str):
+    user = await get_user(id)
+    if user:
+        return user  # UserResponse
+    return {"error": "not found"}  # Different shape!
+
+# GOOD: Consistent response or exception
+@router.get("/users/{id}", response_model=UserResponse)
+async def get_user(id: str):
+    user = await get_user(id)
+    if not user:
+        raise HTTPException(status_code=404, detail="User not found")
+    return user
+
+
+# BAD: Exposing internal details
+class UserResponse(BaseModel):
+    id: str
+    email: str
+    hashed_password: str  # NEVER expose!
+    internal_notes: str   # Internal only!
+
+# GOOD: Explicit public fields
+class UserResponse(BaseModel):
+    id: str
+    email: str
+    name: str
+    # Only fields clients need
+
+
+# BAD: No validation at boundary
+@router.post("/users")
+async def create_user(data: dict):  # Unvalidated!
+    return await service.create(data)
+
+# GOOD: Schema validation
+@router.post("/users", response_model=UserResponse)
+async def create_user(data: UserCreate):  # Validated!
+    return await service.create(data)
+```
--- a/.claude/skills/patterns/monorepo/SKILL.md
+++ b/.claude/skills/patterns/monorepo/SKILL.md
@@ -0,0 +1,404 @@
+---
+name: monorepo-patterns
+description: Monorepo workspace patterns for multi-package projects with shared dependencies, testing strategies, and CI/CD. Use when working in monorepo structures.
+---
+
+# Monorepo Patterns Skill
+
+## Recommended Structure
+
+```
+project/
+├── apps/
+│   ├── backend/                 # Python FastAPI
+│   │   ├── src/
+│   │   ├── tests/
+│   │   └── pyproject.toml
+│   └── frontend/                # React TypeScript
+│       ├── src/
+│       ├── tests/
+│       └── package.json
+├── packages/
+│   ├── shared-types/            # Shared TypeScript types
+│   │   ├── src/
+│   │   └── package.json
+│   └── ui-components/           # Shared React components
+│       ├── src/
+│       └── package.json
+├── infrastructure/
+│   ├── terraform/
+│   │   ├── environments/
+│   │   └── modules/
+│   └── ansible/
+│       ├── playbooks/
+│       └── roles/
+├── scripts/                     # Shared scripts
+├── docs/                        # Documentation
+├── .github/
+│   └── workflows/
+├── package.json                 # Root (workspaces config)
+├── pyproject.toml               # Python workspace config
+└── CLAUDE.md                    # Project-level guidance
+```
+
+## Workspace Configuration
+
+### npm Workspaces (Node.js)
+```json
+// package.json (root)
+{
+  "name": "my-monorepo",
+  "private": true,
+  "workspaces": [
+    "apps/*",
+    "packages/*"
+  ],
+  "scripts": {
+    "dev": "npm run dev --workspaces --if-present",
+    "build": "npm run build --workspaces --if-present",
+    "test": "npm run test --workspaces --if-present",
+    "lint": "npm run lint --workspaces --if-present",
+    "typecheck": "npm run typecheck --workspaces --if-present"
+  },
+  "devDependencies": {
+    "typescript": "^5.6.0",
+    "vitest": "^3.2.0",
+    "@types/node": "^22.0.0"
+  }
+}
+```
+
+### UV Workspace (Python)
+```toml
+# pyproject.toml (root)
+[project]
+name = "my-monorepo"
+version = "0.0.0"
+requires-python = ">=3.11"
+
+[tool.uv.workspace]
+members = ["apps/*", "packages/*"]
+
+[tool.uv.sources]
+shared-utils = { workspace = true }
+```
+
+## Package References
+
+### TypeScript Internal Packages
+```json
+// packages/shared-types/package.json
+{
+  "name": "@myorg/shared-types",
+  "version": "0.0.0",
+  "private": true,
+  "main": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "import": "./dist/index.js"
+    }
+  },
+  "scripts": {
+    "build": "tsc",
+    "dev": "tsc --watch"
+  }
+}
+
+// apps/frontend/package.json
+{
+  "name": "@myorg/frontend",
+  "dependencies": {
+    "@myorg/shared-types": "workspace:*"
+  }
+}
+```
+
+### Python Internal Packages
+```toml
+# packages/shared-utils/pyproject.toml
+[project]
+name = "shared-utils"
+version = "0.1.0"
+dependencies = []
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+# apps/backend/pyproject.toml
+[project]
+name = "backend"
+dependencies = [
+  "shared-utils",  # Resolved via workspace
+]
+```
+
+## Testing Strategies
+
+### Run All Tests
+```bash
+# From root
+npm test                          # All Node packages
+uv run pytest                     # All Python packages
+
+# Specific workspace
+npm test --workspace=@myorg/frontend
+uv run pytest apps/backend/
+```
+
+### Test Dependencies Between Packages
+```typescript
+// packages/shared-types/src/user.ts
+export type User = {
+  id: string;
+  email: string;
+  name: string;
+};
+
+// apps/frontend/src/features/users/types.ts
+// Import from workspace package
+import type { User } from '@myorg/shared-types';
+
+export type UserListProps = {
+  users: User[];
+  onSelect: (user: User) => void;
+};
+```
+
+### Integration Tests Across Packages
+```typescript
+// apps/frontend/tests/integration/api.test.ts
+import { User } from '@myorg/shared-types';
+import { renderWithProviders } from '../utils/render';
+
+describe('Frontend-Backend Integration', () => {
+  it('should display user from API', async () => {
+    const mockUser: User = {
+      id: 'user-1',
+      email: 'test@example.com',
+      name: 'Test User',
+    };
+
+    // Mock API response with shared type
+    server.use(
+      http.get('/api/users/user-1', () => HttpResponse.json(mockUser))
+    );
+
+    render(<UserProfile userId="user-1" />);
+
+    await expect(screen.findByText('Test User')).resolves.toBeInTheDocument();
+  });
+});
+```
+
+## CI/CD Patterns
+
+### Change Detection
+```yaml
+# .github/workflows/ci.yml
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  detect-changes:
+    runs-on: ubuntu-latest
+    outputs:
+      frontend: ${{ steps.changes.outputs.frontend }}
+      backend: ${{ steps.changes.outputs.backend }}
+      infrastructure: ${{ steps.changes.outputs.infrastructure }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dorny/paths-filter@v3
+        id: changes
+        with:
+          filters: |
+            frontend:
+              - 'apps/frontend/**'
+              - 'packages/shared-types/**'
+              - 'packages/ui-components/**'
+            backend:
+              - 'apps/backend/**'
+              - 'packages/shared-utils/**'
+            infrastructure:
+              - 'infrastructure/**'
+
+  frontend:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.frontend == 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+          cache: 'npm'
+      - run: npm ci
+      - run: npm run typecheck --workspace=@myorg/frontend
+      - run: npm run lint --workspace=@myorg/frontend
+      - run: npm run test --workspace=@myorg/frontend
+
+  backend:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.backend == 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: astral-sh/setup-uv@v4
+      - run: uv sync
+      - run: uv run ruff check apps/backend/
+      - run: uv run mypy apps/backend/
+      - run: uv run pytest apps/backend/ --cov --cov-fail-under=80
+```
+
+### Jenkinsfile for Monorepo
+```groovy
+// Jenkinsfile
+pipeline {
+    agent any
+
+    stages {
+        stage('Detect Changes') {
+            steps {
+                script {
+                    def changes = sh(
+                        script: 'git diff --name-only HEAD~1',
+                        returnStdout: true
+                    ).trim().split('\n')
+
+                    env.FRONTEND_CHANGED = changes.any { it.startsWith('apps/frontend/') || it.startsWith('packages/') }
+                    env.BACKEND_CHANGED = changes.any { it.startsWith('apps/backend/') }
+                    env.INFRA_CHANGED = changes.any { it.startsWith('infrastructure/') }
+                }
+            }
+        }
+
+        stage('Frontend') {
+            when {
+                expression { env.FRONTEND_CHANGED == 'true' }
+            }
+            steps {
+                dir('apps/frontend') {
+                    sh 'npm ci'
+                    sh 'npm run typecheck'
+                    sh 'npm run lint'
+                    sh 'npm run test'
+                }
+            }
+        }
+
+        stage('Backend') {
+            when {
+                expression { env.BACKEND_CHANGED == 'true' }
+            }
+            steps {
+                sh 'uv sync'
+                sh 'uv run ruff check apps/backend/'
+                sh 'uv run pytest apps/backend/ --cov --cov-fail-under=80'
+            }
+        }
+
+        stage('Infrastructure') {
+            when {
+                expression { env.INFRA_CHANGED == 'true' }
+            }
+            steps {
+                dir('infrastructure/terraform') {
+                    sh 'terraform init'
+                    sh 'terraform validate'
+                    sh 'terraform fmt -check -recursive'
+                }
+            }
+        }
+    }
+}
+```
+
+## Dependency Management
+
+### Shared Dependencies at Root
+```json
+// package.json (root)
+{
+  "devDependencies": {
+    // Shared dev dependencies
+    "typescript": "^5.6.0",
+    "vitest": "^3.2.0",
+    "eslint": "^9.0.0",
+    "@types/node": "^22.0.0"
+  }
+}
+```
+
+### Package-Specific Dependencies
+```json
+// apps/frontend/package.json
+{
+  "dependencies": {
+    // App-specific dependencies
+    "react": "^18.3.0",
+    "@tanstack/react-query": "^5.0.0"
+  }
+}
+```
+
+## Commands Quick Reference
+
+```bash
+# Install all dependencies
+npm install                       # Node (from root)
+uv sync                           # Python
+
+# Run in specific workspace
+npm run dev --workspace=@myorg/frontend
+npm run test --workspace=@myorg/shared-types
+
+# Run in all workspaces
+npm run build --workspaces
+npm run test --workspaces --if-present
+
+# Add dependency to specific package
+npm install lodash --workspace=@myorg/frontend
+uv add requests --package backend
+
+# Add shared dependency to root
+npm install -D prettier
+```
+
+## CLAUDE.md Placement
+
+### Root CLAUDE.md (Project-Wide)
+```markdown
+# Project Standards
+
+[Core standards that apply everywhere]
+```
+
+### Package-Specific CLAUDE.md
+```markdown
+# apps/frontend/CLAUDE.md
+
+## Frontend-Specific Standards
+
+- Use React Testing Library for component tests
+- Prefer Radix UI primitives
+- Use TanStack Query for server state
+```
+
+```markdown
+# apps/backend/CLAUDE.md
+
+## Backend-Specific Standards
+
+- Use pytest-asyncio for async tests
+- Pydantic v2 for all schemas
+- SQLAlchemy 2.0 async patterns
+```
+
+Skills in `~/.claude/skills/` are automatically available across all packages in the monorepo.
--- a/.claude/skills/patterns/observability/SKILL.md
+++ b/.claude/skills/patterns/observability/SKILL.md
@@ -0,0 +1,486 @@
+---
+name: observability
+description: Logging, metrics, and tracing patterns for application observability. Use when implementing monitoring, debugging, or production visibility.
+---
+
+# Observability Skill
+
+## Three Pillars
+
+1. **Logs** - Discrete events with context
+2. **Metrics** - Aggregated measurements over time
+3. **Traces** - Request flow across services
+
+## Structured Logging
+
+### Python (structlog)
+```python
+import structlog
+from structlog.types import Processor
+
+def configure_logging(json_output: bool = True) -> None:
+    """Configure structured logging."""
+    processors: list[Processor] = [
+        structlog.contextvars.merge_contextvars,
+        structlog.processors.add_log_level,
+        structlog.processors.TimeStamper(fmt="iso"),
+        structlog.processors.StackInfoRenderer(),
+    ]
+
+    if json_output:
+        processors.append(structlog.processors.JSONRenderer())
+    else:
+        processors.append(structlog.dev.ConsoleRenderer())
+
+    structlog.configure(
+        processors=processors,
+        wrapper_class=structlog.make_filtering_bound_logger(logging.INFO),
+        context_class=dict,
+        logger_factory=structlog.PrintLoggerFactory(),
+        cache_logger_on_first_use=True,
+    )
+
+# Usage
+logger = structlog.get_logger()
+
+# Add context that persists across log calls
+structlog.contextvars.bind_contextvars(
+    request_id="req-123",
+    user_id="user-456",
+)
+
+logger.info("order_created", order_id="order-789", total=150.00)
+# {"event": "order_created", "order_id": "order-789", "total": 150.0, "request_id": "req-123", "user_id": "user-456", "level": "info", "timestamp": "2024-01-15T10:30:00Z"}
+
+logger.error("payment_failed", order_id="order-789", error="insufficient_funds")
+```
+
+### TypeScript (pino)
+```typescript
+import pino from 'pino';
+
+const logger = pino({
+  level: process.env.LOG_LEVEL || 'info',
+  formatters: {
+    level: (label) => ({ level: label }),
+  },
+  timestamp: pino.stdTimeFunctions.isoTime,
+  redact: ['password', 'token', 'authorization'],
+});
+
+// Create child logger with bound context
+const requestLogger = logger.child({
+  requestId: 'req-123',
+  userId: 'user-456',
+});
+
+requestLogger.info({ orderId: 'order-789', total: 150.0 }, 'order_created');
+requestLogger.error({ orderId: 'order-789', error: 'insufficient_funds' }, 'payment_failed');
+
+// Express middleware
+import { randomUUID } from 'crypto';
+
+const loggingMiddleware = (req, res, next) => {
+  const requestId = req.headers['x-request-id'] || randomUUID();
+
+  req.log = logger.child({
+    requestId,
+    method: req.method,
+    path: req.path,
+    userAgent: req.headers['user-agent'],
+  });
+
+  const startTime = Date.now();
+
+  res.on('finish', () => {
+    req.log.info({
+      statusCode: res.statusCode,
+      durationMs: Date.now() - startTime,
+    }, 'request_completed');
+  });
+
+  next();
+};
+```
+
+### Log Levels
+
+| Level | When to Use |
+|-------|-------------|
+| `error` | Failures requiring attention |
+| `warn` | Unexpected but handled situations |
+| `info` | Business events (order created, user logged in) |
+| `debug` | Technical details for debugging |
+| `trace` | Very detailed tracing (rarely used in prod) |
+
+## Metrics
+
+### Python (prometheus-client)
+```python
+from prometheus_client import Counter, Histogram, Gauge, start_http_server
+import time
+
+# Define metrics
+REQUEST_COUNT = Counter(
+    'http_requests_total',
+    'Total HTTP requests',
+    ['method', 'endpoint', 'status']
+)
+
+REQUEST_LATENCY = Histogram(
+    'http_request_duration_seconds',
+    'HTTP request latency',
+    ['method', 'endpoint'],
+    buckets=[0.01, 0.05, 0.1, 0.5, 1.0, 5.0]
+)
+
+ACTIVE_CONNECTIONS = Gauge(
+    'active_connections',
+    'Number of active connections'
+)
+
+ORDERS_PROCESSED = Counter(
+    'orders_processed_total',
+    'Total orders processed',
+    ['status']  # success, failed
+)
+
+# Usage
+def process_request(method: str, endpoint: str):
+    ACTIVE_CONNECTIONS.inc()
+    start_time = time.time()
+
+    try:
+        # Process request...
+        REQUEST_COUNT.labels(method=method, endpoint=endpoint, status='200').inc()
+    except Exception:
+        REQUEST_COUNT.labels(method=method, endpoint=endpoint, status='500').inc()
+        raise
+    finally:
+        REQUEST_LATENCY.labels(method=method, endpoint=endpoint).observe(
+            time.time() - start_time
+        )
+        ACTIVE_CONNECTIONS.dec()
+
+# FastAPI middleware
+from fastapi import FastAPI, Request
+from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
+from starlette.responses import Response
+
+app = FastAPI()
+
+@app.middleware("http")
+async def metrics_middleware(request: Request, call_next):
+    start_time = time.time()
+    response = await call_next(request)
+
+    REQUEST_COUNT.labels(
+        method=request.method,
+        endpoint=request.url.path,
+        status=response.status_code
+    ).inc()
+
+    REQUEST_LATENCY.labels(
+        method=request.method,
+        endpoint=request.url.path
+    ).observe(time.time() - start_time)
+
+    return response
+
+@app.get("/metrics")
+async def metrics():
+    return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
+```
+
+### TypeScript (prom-client)
+```typescript
+import { Registry, Counter, Histogram, Gauge, collectDefaultMetrics } from 'prom-client';
+
+const register = new Registry();
+collectDefaultMetrics({ register });
+
+const httpRequestsTotal = new Counter({
+  name: 'http_requests_total',
+  help: 'Total HTTP requests',
+  labelNames: ['method', 'path', 'status'],
+  registers: [register],
+});
+
+const httpRequestDuration = new Histogram({
+  name: 'http_request_duration_seconds',
+  help: 'HTTP request duration',
+  labelNames: ['method', 'path'],
+  buckets: [0.01, 0.05, 0.1, 0.5, 1, 5],
+  registers: [register],
+});
+
+// Express middleware
+const metricsMiddleware = (req, res, next) => {
+  const end = httpRequestDuration.startTimer({ method: req.method, path: req.path });
+
+  res.on('finish', () => {
+    httpRequestsTotal.inc({ method: req.method, path: req.path, status: res.statusCode });
+    end();
+  });
+
+  next();
+};
+
+// Metrics endpoint
+app.get('/metrics', async (req, res) => {
+  res.set('Content-Type', register.contentType);
+  res.end(await register.metrics());
+});
+```
+
+### Key Metrics (RED Method)
+
+| Metric | Description |
+|--------|-------------|
+| **R**ate | Requests per second |
+| **E**rrors | Error rate (%) |
+| **D**uration | Latency (p50, p95, p99) |
+
+### Key Metrics (USE Method for Resources)
+
+| Metric | Description |
+|--------|-------------|
+| **U**tilization | % time resource is busy |
+| **S**aturation | Queue depth, backlog |
+| **E**rrors | Error count |
+
+## Distributed Tracing
+
+### Python (OpenTelemetry)
+```python
+from opentelemetry import trace
+from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
+from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
+from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
+
+def configure_tracing(service_name: str, otlp_endpoint: str) -> None:
+    """Configure OpenTelemetry tracing."""
+    resource = Resource.create({"service.name": service_name})
+
+    provider = TracerProvider(resource=resource)
+    processor = BatchSpanProcessor(OTLPSpanExporter(endpoint=otlp_endpoint))
+    provider.add_span_processor(processor)
+
+    trace.set_tracer_provider(provider)
+
+    # Auto-instrument libraries
+    FastAPIInstrumentor.instrument()
+    SQLAlchemyInstrumentor().instrument()
+    HTTPXClientInstrumentor().instrument()
+
+# Manual instrumentation
+tracer = trace.get_tracer(__name__)
+
+async def process_order(order_id: str) -> dict:
+    with tracer.start_as_current_span("process_order") as span:
+        span.set_attribute("order.id", order_id)
+
+        # Child span for validation
+        with tracer.start_as_current_span("validate_order"):
+            validated = await validate_order(order_id)
+
+        # Child span for payment
+        with tracer.start_as_current_span("process_payment") as payment_span:
+            payment_span.set_attribute("payment.method", "card")
+            result = await charge_payment(order_id)
+
+        span.set_attribute("order.status", "completed")
+        return result
+```
+
+### TypeScript (OpenTelemetry)
+```typescript
+import { NodeSDK } from '@opentelemetry/sdk-node';
+import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node';
+import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-grpc';
+import { Resource } from '@opentelemetry/resources';
+import { SemanticResourceAttributes } from '@opentelemetry/semantic-conventions';
+
+const sdk = new NodeSDK({
+  resource: new Resource({
+    [SemanticResourceAttributes.SERVICE_NAME]: 'my-service',
+  }),
+  traceExporter: new OTLPTraceExporter({
+    url: process.env.OTLP_ENDPOINT,
+  }),
+  instrumentations: [getNodeAutoInstrumentations()],
+});
+
+sdk.start();
+
+// Manual instrumentation
+import { trace, SpanStatusCode } from '@opentelemetry/api';
+
+const tracer = trace.getTracer('my-service');
+
+async function processOrder(orderId: string) {
+  return tracer.startActiveSpan('process_order', async (span) => {
+    try {
+      span.setAttribute('order.id', orderId);
+
+      await tracer.startActiveSpan('validate_order', async (validateSpan) => {
+        await validateOrder(orderId);
+        validateSpan.end();
+      });
+
+      const result = await tracer.startActiveSpan('process_payment', async (paymentSpan) => {
+        paymentSpan.setAttribute('payment.method', 'card');
+        const res = await chargePayment(orderId);
+        paymentSpan.end();
+        return res;
+      });
+
+      span.setStatus({ code: SpanStatusCode.OK });
+      return result;
+    } catch (error) {
+      span.setStatus({ code: SpanStatusCode.ERROR, message: error.message });
+      span.recordException(error);
+      throw error;
+    } finally {
+      span.end();
+    }
+  });
+}
+```
+
+## Health Checks
+
+```python
+from fastapi import FastAPI, Response
+from pydantic import BaseModel
+from enum import Enum
+
+class HealthStatus(str, Enum):
+    HEALTHY = "healthy"
+    DEGRADED = "degraded"
+    UNHEALTHY = "unhealthy"
+
+class ComponentHealth(BaseModel):
+    name: str
+    status: HealthStatus
+    message: str | None = None
+
+class HealthResponse(BaseModel):
+    status: HealthStatus
+    version: str
+    components: list[ComponentHealth]
+
+async def check_database() -> ComponentHealth:
+    try:
+        await db.execute("SELECT 1")
+        return ComponentHealth(name="database", status=HealthStatus.HEALTHY)
+    except Exception as e:
+        return ComponentHealth(name="database", status=HealthStatus.UNHEALTHY, message=str(e))
+
+async def check_redis() -> ComponentHealth:
+    try:
+        await redis.ping()
+        return ComponentHealth(name="redis", status=HealthStatus.HEALTHY)
+    except Exception as e:
+        return ComponentHealth(name="redis", status=HealthStatus.DEGRADED, message=str(e))
+
+@app.get("/health", response_model=HealthResponse)
+async def health_check(response: Response):
+    components = await asyncio.gather(
+        check_database(),
+        check_redis(),
+    )
+
+    # Overall status is worst component status
+    if any(c.status == HealthStatus.UNHEALTHY for c in components):
+        overall = HealthStatus.UNHEALTHY
+        response.status_code = 503
+    elif any(c.status == HealthStatus.DEGRADED for c in components):
+        overall = HealthStatus.DEGRADED
+    else:
+        overall = HealthStatus.HEALTHY
+
+    return HealthResponse(
+        status=overall,
+        version="1.0.0",
+        components=components,
+    )
+
+@app.get("/ready")
+async def readiness_check():
+    """Kubernetes readiness probe - can we serve traffic?"""
+    # Check critical dependencies
+    await check_database()
+    return {"status": "ready"}
+
+@app.get("/live")
+async def liveness_check():
+    """Kubernetes liveness probe - is the process healthy?"""
+    return {"status": "alive"}
+```
+
+## Alerting Rules
+
+```yaml
+# prometheus-rules.yaml
+groups:
+  - name: application
+    rules:
+      # High error rate
+      - alert: HighErrorRate
+        expr: |
+          sum(rate(http_requests_total{status=~"5.."}[5m]))
+          /
+          sum(rate(http_requests_total[5m])) > 0.05
+        for: 5m
+        labels:
+          severity: critical
+        annotations:
+          summary: "High error rate detected"
+          description: "Error rate is {{ $value | humanizePercentage }}"
+
+      # High latency
+      - alert: HighLatency
+        expr: |
+          histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 1
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High latency detected"
+          description: "p95 latency is {{ $value }}s"
+
+      # Service down
+      - alert: ServiceDown
+        expr: up == 0
+        for: 1m
+        labels:
+          severity: critical
+        annotations:
+          summary: "Service is down"
+```
+
+## Best Practices
+
+### Logging
+- Use structured JSON logs
+- Include correlation/request IDs
+- Redact sensitive data
+- Use appropriate log levels
+- Don't log in hot paths (use sampling)
+
+### Metrics
+- Use consistent naming conventions
+- Keep cardinality under control
+- Use histograms for latency (not averages)
+- Export business metrics alongside technical ones
+
+### Tracing
+- Instrument at service boundaries
+- Propagate context across services
+- Sample appropriately in production
+- Add relevant attributes to spans