diff --git a/.env.example b/.env.example index f820780..925b7e4 100644 --- a/.env.example +++ b/.env.example @@ -5,6 +5,8 @@ REDIS_URL=redis://localhost:6379/0 SECRET_KEY=replace-with-a-long-random-secret JWT_ALGORITHM=HS256 ACCESS_TOKEN_EXPIRE_MINUTES=60 +ALLOWED_ORIGINS=http://localhost:8000,http://127.0.0.1:8000 +ENABLE_PUBLIC_REGISTRATION=true AWS_REGION=us-east-1 DEFAULT_INGRESS_DOMAIN=apps.local KUBERNETES_NAMESPACE_PREFIX=tenant @@ -12,4 +14,7 @@ KUBERNETES_DRY_RUN=true TERRAFORM_DRY_RUN=true TERRAFORM_STATE_BUCKET=replace-me-terraform-state TERRAFORM_LOCK_TABLE=replace-me-terraform-locks +TERRAFORM_JOB_BACKEND=background +TERRAFORM_JOB_REDIS_URL=redis://localhost:6379/1 +TERRAFORM_JOB_QUEUE_NAME=terraform-jobs RATE_LIMIT_REQUESTS_PER_HOUR=100 diff --git a/Dockerfile b/Dockerfile index 8596f91..e30d87c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,19 @@ FROM python:3.11-slim +ARG TARGETARCH +ARG TERRAFORM_VERSION=1.8.5 + # Create non-root user -RUN useradd -m appuser +RUN apt-get update \ + && apt-get install -y --no-install-recommends ca-certificates curl unzip \ + && arch="${TARGETARCH:-amd64}" \ + && curl -fsSL "https://releases.hashicorp.com/terraform/${TERRAFORM_VERSION}/terraform_${TERRAFORM_VERSION}_linux_${arch}.zip" -o /tmp/terraform.zip \ + && unzip /tmp/terraform.zip -d /usr/local/bin \ + && chmod 0755 /usr/local/bin/terraform \ + && rm -f /tmp/terraform.zip \ + && apt-get purge -y --auto-remove curl unzip \ + && rm -rf /var/lib/apt/lists/* \ + && useradd -m appuser WORKDIR /app COPY requirements.txt ./ diff --git a/README.md b/README.md index e90e577..e33d495 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,7 @@ For local development without a Kubernetes cluster or Terraform credentials, kee KUBERNETES_DRY_RUN=true TERRAFORM_DRY_RUN=true DATABASE_URL=sqlite:///./idp.db +ENABLE_PUBLIC_REGISTRATION=true ``` Install dependencies and run the API: @@ -133,13 +134,15 @@ curl -X POST http://localhost:8000/deployments \ ## Terraform -The infrastructure API renders [terraform/main.tf.j2](terraform/main.tf.j2) and can create or destroy AWS resources. It is dry-run by default. Before enabling real execution: +The infrastructure API records requests, returns `202 Accepted`, and queues Terraform work for a worker that updates the infrastructure status. Local development can use the in-process `background` backend, while production should use the Redis-backed worker queue. It renders [terraform/main.tf.j2](terraform/main.tf.j2) and can create or destroy AWS resources. It is dry-run by default. Before enabling real execution: - Create an encrypted S3 backend bucket. - Create a DynamoDB lock table. - Replace `TERRAFORM_STATE_BUCKET` and `TERRAFORM_LOCK_TABLE`. - Use IAM roles with least privilege. - Review generated plans before production use. +- For production, move the background job behind a durable queue or use Terraform Cloud, Atlantis, GitHub Actions, or Argo Workflows for plan approval and audit history. +- Set `TERRAFORM_JOB_BACKEND=redis` and run the worker with `python -m services.infra_worker`. Example: @@ -152,11 +155,16 @@ curl -X POST http://localhost:8000/infrastructure/create \ "cloud_provider": "aws", "config": { "aws_region": "us-east-1", - "eks_role_arn": "arn:aws:iam::123456789012:role/EKSRole" + "eks_role_arn": "arn:aws:iam::123456789012:role/EKSClusterRole", + "node_role_arn": "arn:aws:iam::123456789012:role/EKSNodeRole", + "state_bucket": "company-terraform-state", + "lock_table": "company-terraform-locks" } }' ``` +The initial response will have a status such as `queued`; poll `GET /infrastructure/{id}` for `provisioning`, `ready`, or `failed`. The Helm chart deploys a Terraform worker when `worker.enabled=true`. + ## Helm Deployment Render or install the API chart: @@ -173,6 +181,8 @@ helm upgrade --install idp-api helm/charts/idp-api \ --set secrets.secretKey='replace-with-long-random-secret' ``` +The chart intentionally fails if `image.tag` is empty. Use a release tag or digest rather than `latest`. + ## Security Implemented: @@ -182,15 +192,19 @@ Implemented: - Protected infrastructure, deployment, Kubernetes, and monitoring APIs - Redis-backed rate limiting with local fallback - Non-root Docker container +- Security headers and restricted CORS origin configuration +- Production startup validation for weak/default `SECRET_KEY` +- Helm defaults for public registration disabled, debug disabled, read-only root filesystem, and dropped Linux capabilities - Kubernetes RBAC and network-policy examples - No hardcoded production secret requirement in Helm Recommended before production: - Use AWS Secrets Manager, External Secrets Operator, or sealed-secrets. +- Keep public registration disabled unless you add an invite/admin onboarding flow. - Replace SQLite with managed PostgreSQL. - Use Alembic migrations. -- Run Terraform through a job queue or workflow engine rather than synchronous HTTP requests. +- Run Terraform through the Redis worker queue, Terraform Cloud, Atlantis, GitHub Actions, or another workflow engine with audit history. - Enforce tenant-aware namespace ownership. - Add admission policies with Kyverno or OPA Gatekeeper. - Use image allowlists and vulnerability scanning. diff --git a/api/routes/auth.py b/api/routes/auth.py index 55f9143..f7ef808 100644 --- a/api/routes/auth.py +++ b/api/routes/auth.py @@ -1,5 +1,6 @@ from fastapi import APIRouter, HTTPException, Depends from sqlalchemy.orm import Session +from app.config import settings from database.models import User from auth.jwt_utils import get_password_hash, verify_password, create_access_token from auth.rbac import get_current_user @@ -10,6 +11,8 @@ @router.post("/register") def register(request: RegisterRequest, db: Session = Depends(get_db)): + if not settings.ENABLE_PUBLIC_REGISTRATION: + raise HTTPException(status_code=403, detail="Public registration is disabled") user = db.query(User).filter(User.username == request.username).first() if user: raise HTTPException(status_code=400, detail="Username already registered") diff --git a/api/routes/catalog.py b/api/routes/catalog.py index 27e7d1b..8851ef1 100644 --- a/api/routes/catalog.py +++ b/api/routes/catalog.py @@ -6,9 +6,9 @@ { "id": "nginx-web", "name": "Nginx web app", - "description": "Official nginx image that serves a default HTTP page on port 80.", + "description": "Official nginx image pinned to a stable tag for a tiny static web app.", "default_app_name": "nginx-web", - "image": "nginx:1.25", + "image": "nginx:1.25-alpine", "port": 80, "replicas": 2, "min_replicas": 1, @@ -30,7 +30,7 @@ { "id": "whoami-api", "name": "Whoami API", - "description": "Tiny HTTP app that returns request and container details.", + "description": "Tiny HTTP app that returns request metadata; useful for smoke tests.", "default_app_name": "whoami-api", "image": "traefik/whoami:v1.10", "port": 80, @@ -51,13 +51,26 @@ "max_replicas": 5, "cpu_threshold": 70, }, + { + "id": "echo-server", + "name": "Echo server", + "description": "Small request/response test service for ingress and header debugging.", + "default_app_name": "echo-server", + "image": "ealen/echo-server:0.9.2", + "port": 80, + "replicas": 2, + "min_replicas": 1, + "max_replicas": 5, + "cpu_threshold": 70, + }, ] IMAGE_CATALOG = [ - {"label": "nginx 1.25", "image": "nginx:1.25", "port": 80}, + {"label": "nginx 1.25 alpine", "image": "nginx:1.25-alpine", "port": 80}, {"label": "httpd 2.4", "image": "httpd:2.4", "port": 80}, {"label": "traefik whoami", "image": "traefik/whoami:v1.10", "port": 80}, {"label": "nginx hello demo", "image": "nginxdemos/hello:plain-text", "port": 80}, + {"label": "echo server", "image": "ealen/echo-server:0.9.2", "port": 80}, ] diff --git a/api/routes/infrastructure.py b/api/routes/infrastructure.py index b367be2..bf526f1 100644 --- a/api/routes/infrastructure.py +++ b/api/routes/infrastructure.py @@ -1,61 +1,77 @@ -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException from sqlalchemy.orm import Session from api.schemas import InfrastructureCreateRequest, InfrastructureResponse from auth.rbac import get_current_user from database.models import Infrastructure, User from database.session import get_db -from services.infra_service import provision_infrastructure, destroy_infrastructure +from services.infra_queue import InfrastructureQueueError, enqueue_infrastructure_job +from services.infra_service import validate_infrastructure_config router = APIRouter() -@router.post("/create", response_model=InfrastructureResponse, status_code=201) + +@router.post("/create", response_model=InfrastructureResponse, status_code=202) def create_infrastructure( request: InfrastructureCreateRequest, + background_tasks: BackgroundTasks, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), ): + validation_error = validate_infrastructure_config(request.name, request.cloud_provider, request.config) + if validation_error: + raise HTTPException(status_code=400, detail=validation_error) + infra = Infrastructure( owner_id=current_user.id, name=request.name, cloud_provider=request.cloud_provider, config=request.config, - status="provisioning", + status="queued", ) db.add(infra) db.commit() db.refresh(infra) - result = provision_infrastructure(request.name, request.cloud_provider, request.config) - if result is True: - infra.status = "ready" - else: + try: + enqueue_infrastructure_job("provision", infra.id, background_tasks) + except InfrastructureQueueError as exc: infra.status = "failed" - infra.last_error = str(result) - db.add(infra) - db.commit() - db.refresh(infra) + infra.last_error = str(exc) + db.add(infra) + db.commit() + raise HTTPException(status_code=503, detail=str(exc)) return infra + @router.delete("/{id}") def delete_infrastructure( id: int, + background_tasks: BackgroundTasks, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), ): infra = db.query(Infrastructure).filter(Infrastructure.id == id, Infrastructure.owner_id == current_user.id).first() if not infra: raise HTTPException(status_code=404, detail="Infrastructure not found") - result = destroy_infrastructure(infra.name, infra.cloud_provider, infra.config or {}) - if result is True: - infra.status = "deleted" - db.add(infra) - db.commit() - return {"id": id, "status": "deleted"} - infra.status = "delete_failed" - infra.last_error = str(result) + if infra.status in {"deleting", "deleted"}: + return {"id": id, "status": infra.status} + if infra.status in {"queued", "provisioning"}: + raise HTTPException(status_code=409, detail="Infrastructure is still provisioning") + + infra.status = "delete_queued" + infra.last_error = None db.add(infra) db.commit() - raise HTTPException(status_code=500, detail=str(result)) + try: + enqueue_infrastructure_job("destroy", infra.id, background_tasks) + except InfrastructureQueueError as exc: + infra.status = "delete_failed" + infra.last_error = str(exc) + db.add(infra) + db.commit() + raise HTTPException(status_code=503, detail=str(exc)) + return {"id": id, "status": infra.status} + @router.get("/{id}", response_model=InfrastructureResponse) def get_infrastructure( diff --git a/api/schemas.py b/api/schemas.py index 9d86750..1f8c2a2 100644 --- a/api/schemas.py +++ b/api/schemas.py @@ -10,13 +10,13 @@ class TokenResponse(BaseModel): class RegisterRequest(BaseModel): - username: str = Field(..., min_length=3, max_length=64) + username: str = Field(..., min_length=3, max_length=64, regex=r"^[a-zA-Z0-9_.-]+$") password: str = Field(..., min_length=8, max_length=128) class LoginRequest(BaseModel): - username: str - password: str + username: str = Field(..., min_length=3, max_length=64) + password: str = Field(..., min_length=8, max_length=128) class InfrastructureCreateRequest(BaseModel): @@ -92,16 +92,16 @@ class ServiceExposeRequest(BaseModel): class IngressRequest(BaseModel): - namespace: str - name: str - service_name: str + namespace: str = Field(..., min_length=3, max_length=63, regex=r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$") + name: str = Field(..., min_length=3, max_length=63, regex=r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$") + service_name: str = Field(..., min_length=3, max_length=63, regex=r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$") service_port: int = Field(..., ge=1, le=65535) - host: str + host: str = Field(..., min_length=3, max_length=253, regex=r"^[a-z0-9]([-a-z0-9.]*[a-z0-9])?$") class AutoscalingRequest(BaseModel): - namespace: str = "default" - deployment: str + namespace: str = Field(default="default", min_length=3, max_length=63, regex=r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$") + deployment: str = Field(..., min_length=3, max_length=63, regex=r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$") min_replicas: int = Field(..., ge=1) max_replicas: int = Field(..., ge=1) cpu_threshold: int = Field(..., ge=10, le=95) diff --git a/app/config.py b/app/config.py index b531bb3..bff1329 100644 --- a/app/config.py +++ b/app/config.py @@ -1,5 +1,10 @@ from functools import lru_cache -from pydantic import BaseSettings, Field +from pydantic import BaseSettings, Field, validator + + +PRODUCTION_ENVIRONMENTS = {"production", "prod", "staging"} +DEFAULT_SECRET_KEY = "change-me-in-production" + class Settings(BaseSettings): PROJECT_NAME: str = "Cloud Infrastructure Provisioning API" @@ -8,9 +13,11 @@ class Settings(BaseSettings): DEBUG: bool = Field(default=True, env="APP_DEBUG") DATABASE_URL: str = Field(default="sqlite:///./idp.db", env="DATABASE_URL") REDIS_URL: str = Field(default="redis://localhost:6379/0", env="REDIS_URL") - SECRET_KEY: str = Field(default="change-me-in-production", env="SECRET_KEY") + SECRET_KEY: str = Field(default=DEFAULT_SECRET_KEY, env="SECRET_KEY") JWT_ALGORITHM: str = "HS256" ACCESS_TOKEN_EXPIRE_MINUTES: int = Field(default=60, env="ACCESS_TOKEN_EXPIRE_MINUTES") + ALLOWED_ORIGINS: str = Field(default="http://localhost:8000,http://127.0.0.1:8000", env="ALLOWED_ORIGINS") + ENABLE_PUBLIC_REGISTRATION: bool = Field(default=True, env="ENABLE_PUBLIC_REGISTRATION") AWS_REGION: str = Field(default="us-east-1", env="AWS_REGION") DEFAULT_INGRESS_DOMAIN: str = Field(default="apps.local", env="DEFAULT_INGRESS_DOMAIN") KUBERNETES_NAMESPACE_PREFIX: str = Field(default="tenant", env="KUBERNETES_NAMESPACE_PREFIX") @@ -18,9 +25,33 @@ class Settings(BaseSettings): TERRAFORM_DRY_RUN: bool = Field(default=True, env="TERRAFORM_DRY_RUN") TERRAFORM_STATE_BUCKET: str = Field(default="replace-me-terraform-state", env="TERRAFORM_STATE_BUCKET") TERRAFORM_LOCK_TABLE: str = Field(default="replace-me-terraform-locks", env="TERRAFORM_LOCK_TABLE") + TERRAFORM_JOB_BACKEND: str = Field(default="background", env="TERRAFORM_JOB_BACKEND") + TERRAFORM_JOB_REDIS_URL: str = Field(default="redis://localhost:6379/1", env="TERRAFORM_JOB_REDIS_URL") + TERRAFORM_JOB_QUEUE_NAME: str = Field(default="terraform-jobs", env="TERRAFORM_JOB_QUEUE_NAME") RATE_LIMIT_REQUESTS_PER_HOUR: int = Field(default=100, env="RATE_LIMIT_REQUESTS_PER_HOUR") REQUIRE_AUTH_FOR_PLATFORM_APIS: bool = Field(default=True, env="REQUIRE_AUTH_FOR_PLATFORM_APIS") + @validator("SECRET_KEY") + def require_strong_secret_for_non_local(cls, value, values): + environment = values.get("ENVIRONMENT", "local").lower() + if environment in PRODUCTION_ENVIRONMENTS and (value == DEFAULT_SECRET_KEY or len(value) < 32): + raise ValueError("SECRET_KEY must be changed to a random value of at least 32 characters") + return value + + @validator("TERRAFORM_JOB_BACKEND") + def validate_terraform_job_backend(cls, value): + if value not in {"background", "redis"}: + raise ValueError("TERRAFORM_JOB_BACKEND must be either 'background' or 'redis'") + return value + + @property + def allowed_origins_list(self) -> list[str]: + return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",") if origin.strip()] + + @property + def is_production_like(self) -> bool: + return self.ENVIRONMENT.lower() in PRODUCTION_ENVIRONMENTS + class Config: env_file = ".env" case_sensitive = True diff --git a/app/main.py b/app/main.py index 91cc939..7ddaf07 100644 --- a/app/main.py +++ b/app/main.py @@ -1,10 +1,12 @@ from fastapi import Depends, FastAPI +from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import RedirectResponse from fastapi.staticfiles import StaticFiles from api.routes import auth, catalog, deployments, infrastructure, kubernetes, monitoring from auth.rate_limit import rate_limiter from app.config import settings from app.logger import setup_logging +from app.security import SecurityHeadersMiddleware from database.session import init_db setup_logging() @@ -17,6 +19,16 @@ "Internal Developer Platform API for infrastructure provisioning " "and Kubernetes application deployment." ), + debug=settings.DEBUG, +) + +app.add_middleware(SecurityHeadersMiddleware) +app.add_middleware( + CORSMiddleware, + allow_origins=settings.allowed_origins_list, + allow_credentials=True, + allow_methods=["GET", "POST", "DELETE"], + allow_headers=["Authorization", "Content-Type"], ) diff --git a/app/security.py b/app/security.py new file mode 100644 index 0000000..7983b16 --- /dev/null +++ b/app/security.py @@ -0,0 +1,19 @@ +from collections.abc import Callable + +from fastapi import Request, Response +from starlette.middleware.base import BaseHTTPMiddleware + +from app.config import settings + + +class SecurityHeadersMiddleware(BaseHTTPMiddleware): + async def dispatch(self, request: Request, call_next: Callable) -> Response: + response = await call_next(request) + response.headers.setdefault("X-Content-Type-Options", "nosniff") + response.headers.setdefault("X-Frame-Options", "DENY") + response.headers.setdefault("Referrer-Policy", "no-referrer") + response.headers.setdefault("Permissions-Policy", "camera=(), microphone=(), geolocation=()") + response.headers.setdefault("Cross-Origin-Opener-Policy", "same-origin") + if settings.is_production_like: + response.headers.setdefault("Strict-Transport-Security", "max-age=31536000; includeSubDomains") + return response diff --git a/helm/charts/idp-api/templates/configmap.yaml b/helm/charts/idp-api/templates/configmap.yaml index ae96ac7..0aa8072 100644 --- a/helm/charts/idp-api/templates/configmap.yaml +++ b/helm/charts/idp-api/templates/configmap.yaml @@ -4,6 +4,11 @@ metadata: name: {{ include "idp-api.fullname" . }}-config data: ENVIRONMENT: {{ .Values.config.environment | quote }} + APP_DEBUG: {{ .Values.config.appDebug | quote }} + ALLOWED_ORIGINS: {{ .Values.config.allowedOrigins | quote }} + ENABLE_PUBLIC_REGISTRATION: {{ .Values.config.enablePublicRegistration | quote }} + TERRAFORM_JOB_BACKEND: {{ .Values.config.terraformJobBackend | quote }} + TERRAFORM_JOB_QUEUE_NAME: {{ .Values.config.terraformJobQueueName | quote }} AWS_REGION: {{ .Values.config.awsRegion | quote }} DEFAULT_INGRESS_DOMAIN: {{ .Values.config.defaultIngressDomain | quote }} KUBERNETES_DRY_RUN: {{ .Values.config.kubernetesDryRun | quote }} diff --git a/helm/charts/idp-api/templates/deployment.yaml b/helm/charts/idp-api/templates/deployment.yaml index 03bb6ea..123e141 100644 --- a/helm/charts/idp-api/templates/deployment.yaml +++ b/helm/charts/idp-api/templates/deployment.yaml @@ -17,12 +17,19 @@ spec: app: {{ include "idp-api.name" . }} spec: serviceAccountName: {{ include "idp-api.fullname" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} containers: - name: idp-api - image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + image: "{{ .Values.image.repository }}:{{ required "image.tag is required; pin an immutable release tag or digest" .Values.image.tag }}" imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} ports: - containerPort: 8000 + volumeMounts: + - name: tmp + mountPath: /tmp envFrom: - configMapRef: name: {{ include "idp-api.fullname" . }}-config @@ -42,3 +49,6 @@ spec: periodSeconds: 5 resources: {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} diff --git a/helm/charts/idp-api/templates/secret.yaml b/helm/charts/idp-api/templates/secret.yaml index 942efda..a409c63 100644 --- a/helm/charts/idp-api/templates/secret.yaml +++ b/helm/charts/idp-api/templates/secret.yaml @@ -6,4 +6,5 @@ type: Opaque stringData: DATABASE_URL: {{ required "secrets.databaseUrl is required" .Values.secrets.databaseUrl | quote }} REDIS_URL: {{ .Values.secrets.redisUrl | quote }} + TERRAFORM_JOB_REDIS_URL: {{ .Values.secrets.terraformJobRedisUrl | quote }} SECRET_KEY: {{ required "secrets.secretKey is required" .Values.secrets.secretKey | quote }} diff --git a/helm/charts/idp-api/templates/worker-deployment.yaml b/helm/charts/idp-api/templates/worker-deployment.yaml new file mode 100644 index 0000000..4fd25fb --- /dev/null +++ b/helm/charts/idp-api/templates/worker-deployment.yaml @@ -0,0 +1,41 @@ +{{- if .Values.worker.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "idp-api.fullname" . }}-worker + labels: + app: {{ include "idp-api.name" . }}-worker +spec: + replicas: {{ .Values.worker.replicaCount }} + selector: + matchLabels: + app: {{ include "idp-api.name" . }}-worker + template: + metadata: + labels: + app: {{ include "idp-api.name" . }}-worker + spec: + serviceAccountName: {{ include "idp-api.fullname" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: terraform-worker + image: "{{ .Values.image.repository }}:{{ required "image.tag is required; pin an immutable release tag or digest" .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["python", "-m", "services.infra_worker"] + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + volumeMounts: + - name: tmp + mountPath: /tmp + envFrom: + - configMapRef: + name: {{ include "idp-api.fullname" . }}-config + - secretRef: + name: {{ include "idp-api.fullname" . }}-secrets + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} +{{- end }} diff --git a/helm/charts/idp-api/values.yaml b/helm/charts/idp-api/values.yaml index 6910829..be485aa 100644 --- a/helm/charts/idp-api/values.yaml +++ b/helm/charts/idp-api/values.yaml @@ -1,13 +1,18 @@ replicaCount: 2 image: repository: your-docker-repo/idp-api - tag: latest + tag: "" pullPolicy: IfNotPresent service: type: ClusterIP port: 8000 config: environment: production + appDebug: "false" + allowedOrigins: "https://idp.local" + enablePublicRegistration: "false" + terraformJobBackend: "redis" + terraformJobQueueName: "terraform-jobs" awsRegion: us-east-1 defaultIngressDomain: apps.example.com kubernetesDryRun: "false" @@ -15,6 +20,7 @@ config: secrets: databaseUrl: "" redisUrl: "redis://redis-master:6379/0" + terraformJobRedisUrl: "redis://redis-master:6379/1" secretKey: "" ingress: enabled: true @@ -37,3 +43,17 @@ autoscaling: minReplicas: 2 maxReplicas: 10 targetCPUUtilizationPercentage: 70 +podSecurityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 +securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL +worker: + enabled: true + replicaCount: 1 diff --git a/mongo-express.yaml b/mongo-express.yaml index 3ec25d7..363b27e 100644 --- a/mongo-express.yaml +++ b/mongo-express.yaml @@ -16,7 +16,12 @@ spec: spec: containers: - name: mongo-express - image: mongo-express + image: mongo-express:1.0.2-20 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL ports: - containerPort: 8081 env: diff --git a/mongodb-deployment.yaml b/mongodb-deployment.yaml index 1f26f6e..53aca87 100644 --- a/mongodb-deployment.yaml +++ b/mongodb-deployment.yaml @@ -16,7 +16,12 @@ spec: spec: containers: - name: mongodb - image: mongo + image: mongo:7.0 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL ports: - containerPort: 27017 env: @@ -42,4 +47,3 @@ spec: - protocol: TCP port: 27017 targetPort: 27017 - diff --git a/scripts/prod_checklist.md b/scripts/prod_checklist.md index 09b5d68..de3d437 100644 --- a/scripts/prod_checklist.md +++ b/scripts/prod_checklist.md @@ -4,6 +4,8 @@ - [ ] Terraform S3 backend configured - [ ] AWS resources provisioned (VPC, EKS, IAM) - [ ] Terraform state bucket secured +- [ ] Terraform worker deployed with `TERRAFORM_JOB_BACKEND=redis` +- [ ] Redis job queue monitored and backed up according to environment needs ## Kubernetes - [ ] Ingress controller deployed (NGINX/ALB) @@ -13,9 +15,15 @@ - [ ] Secrets created and referenced - [ ] App deployed via Helm - [ ] HTTPS enforced at ingress +- [ ] API image tag pinned to a release tag or digest +- [ ] Pod and container security contexts enabled ## Application - [ ] All secrets from env/K8s Secrets +- [ ] `SECRET_KEY` is a random value of at least 32 characters +- [ ] `APP_DEBUG=false` in production +- [ ] `ENABLE_PUBLIC_REGISTRATION=false` unless intentionally offering self-service signup +- [ ] `ALLOWED_ORIGINS` restricted to trusted dashboard origins - [ ] JWT, RBAC, rate limiting enabled - [ ] Liveness/readiness probes configured - [ ] Structured logging enabled @@ -35,6 +43,7 @@ - [ ] HTTPS everywhere - [ ] RBAC and network policies - [ ] Non-root containers +- [ ] Read-only root filesystem where supported - [ ] Regular secret rotation ## Advanced diff --git a/secret.yaml b/secret.yaml index 5330f9f..ef74b0b 100644 --- a/secret.yaml +++ b/secret.yaml @@ -3,6 +3,6 @@ kind: Secret metadata: name: mongodb-secret type: Opaque -data: - mongo-root-username: dXNlcm5hbWU= - mongo-root-password: cGFzc3dvcmQ= +stringData: + mongo-root-username: CHANGE_ME + mongo-root-password: CHANGE_ME_WITH_A_LONG_RANDOM_PASSWORD diff --git a/services/infra_jobs.py b/services/infra_jobs.py new file mode 100644 index 0000000..dfadbb2 --- /dev/null +++ b/services/infra_jobs.py @@ -0,0 +1,70 @@ +import logging +from typing import Callable + +from database.models import Infrastructure +from database.session import SessionLocal +from services.infra_service import destroy_infrastructure, provision_infrastructure + +logger = logging.getLogger(__name__) + + +TerraformAction = Callable[[str, str, dict], bool | str] + + +def _run_infrastructure_job( + infrastructure_id: int, + action: TerraformAction, + in_progress_status: str, + success_status: str, + failure_status: str, +) -> None: + db = SessionLocal() + infra = None + try: + infra = db.get(Infrastructure, infrastructure_id) + if infra is None: + logger.warning("Infrastructure job skipped; id %s no longer exists", infrastructure_id) + return + + infra.status = in_progress_status + infra.last_error = None + db.add(infra) + db.commit() + db.refresh(infra) + + result = action(infra.name, infra.cloud_provider, infra.config or {}) + infra.status = success_status if result is True else failure_status + infra.last_error = None if result is True else str(result) + db.add(infra) + db.commit() + except Exception as exc: + logger.exception("Infrastructure job failed for id %s", infrastructure_id) + if infra is None: + infra = db.get(Infrastructure, infrastructure_id) + if infra is not None: + infra.status = failure_status + infra.last_error = str(exc) + db.add(infra) + db.commit() + finally: + db.close() + + +def provision_infrastructure_job(infrastructure_id: int) -> None: + _run_infrastructure_job( + infrastructure_id, + provision_infrastructure, + in_progress_status="provisioning", + success_status="ready", + failure_status="failed", + ) + + +def destroy_infrastructure_job(infrastructure_id: int) -> None: + _run_infrastructure_job( + infrastructure_id, + destroy_infrastructure, + in_progress_status="deleting", + success_status="deleted", + failure_status="delete_failed", + ) diff --git a/services/infra_queue.py b/services/infra_queue.py new file mode 100644 index 0000000..ea4383e --- /dev/null +++ b/services/infra_queue.py @@ -0,0 +1,68 @@ +import json +import logging +from datetime import UTC, datetime +from uuid import uuid4 + +import redis +from fastapi import BackgroundTasks +from redis.exceptions import RedisError + +from app.config import settings +from services.infra_jobs import destroy_infrastructure_job, provision_infrastructure_job + +logger = logging.getLogger(__name__) + +SUPPORTED_ACTIONS = {"provision", "destroy"} + + +class InfrastructureQueueError(RuntimeError): + pass + + +def _redis_client() -> redis.Redis: + return redis.Redis.from_url(settings.TERRAFORM_JOB_REDIS_URL, socket_connect_timeout=2, socket_timeout=2) + + +def enqueue_infrastructure_job( + action: str, + infrastructure_id: int, + background_tasks: BackgroundTasks | None = None, +) -> str: + if action not in SUPPORTED_ACTIONS: + raise InfrastructureQueueError(f"Unsupported infrastructure job action: {action}") + + job_id = str(uuid4()) + payload = { + "id": job_id, + "action": action, + "infrastructure_id": infrastructure_id, + "enqueued_at": datetime.now(UTC).isoformat(), + } + + if settings.TERRAFORM_JOB_BACKEND == "background": + if background_tasks is None: + raise InfrastructureQueueError("BackgroundTasks is required for background job backend") + job = provision_infrastructure_job if action == "provision" else destroy_infrastructure_job + background_tasks.add_task(job, infrastructure_id) + return job_id + + if settings.TERRAFORM_JOB_BACKEND != "redis": + raise InfrastructureQueueError("TERRAFORM_JOB_BACKEND must be either 'redis' or 'background'") + + try: + _redis_client().rpush(settings.TERRAFORM_JOB_QUEUE_NAME, json.dumps(payload)) + except RedisError as exc: + logger.exception("Failed to enqueue infrastructure job") + raise InfrastructureQueueError("Infrastructure job queue is unavailable") from exc + return job_id + + +def decode_job(raw_payload: bytes | str) -> dict: + if isinstance(raw_payload, bytes): + raw_payload = raw_payload.decode("utf-8") + payload = json.loads(raw_payload) + if payload.get("action") not in SUPPORTED_ACTIONS: + raise InfrastructureQueueError("Queued infrastructure job has an unsupported action") + if not isinstance(payload.get("infrastructure_id"), int): + raise InfrastructureQueueError("Queued infrastructure job is missing infrastructure_id") + return payload diff --git a/services/infra_service.py b/services/infra_service.py index 2410fac..b50d5c2 100644 --- a/services/infra_service.py +++ b/services/infra_service.py @@ -1,43 +1,158 @@ +import ipaddress import os +import re import subprocess import tempfile -from jinja2 import Template +from pathlib import Path +from typing import Any + +from jinja2 import StrictUndefined, Template + from app.config import settings -TERRAFORM_TEMPLATE = os.path.join(os.path.dirname(__file__), '../terraform/main.tf.j2') +TERRAFORM_TEMPLATE = Path(__file__).resolve().parent.parent / "terraform" / "main.tf.j2" +AWS_REGION_RE = re.compile(r"^[a-z]{2}-[a-z]+-\d$") +IAM_ROLE_ARN_RE = re.compile(r"^arn:aws:iam::\d{12}:role\/[A-Za-z0-9+=,.@_/-]+$") +DEFAULT_CLUSTER_VERSION = "1.29" +DEFAULT_NODE_INSTANCE_TYPES = ["t3.medium"] +DEFAULT_TAGS = { + "ManagedBy": "idp-api", + "Project": "internal-developer-platform", +} -def render_terraform_config(context: dict) -> str: +def render_terraform_config(context: dict[str, Any]) -> str: with open(TERRAFORM_TEMPLATE) as f: - template = Template(f.read()) + template = Template(f.read(), undefined=StrictUndefined) return template.render(**context) def run_terraform(directory: str, action: str = 'apply'): if settings.TERRAFORM_DRY_RUN: return True + if action not in {"apply", "destroy"}: + raise ValueError("Unsupported Terraform action") cmds = [ - ['terraform', 'init'], - ['terraform', 'plan', '-out=tfplan'], - ['terraform', 'apply', '-auto-approve', 'tfplan'] + ['terraform', 'init', '-input=false'], + ['terraform', 'validate'], + ['terraform', 'plan', '-input=false', '-lock-timeout=5m', '-out=tfplan'], + ['terraform', 'apply', '-input=false', '-lock-timeout=5m', '-auto-approve', 'tfplan'] if action == 'apply' - else ['terraform', 'destroy', '-auto-approve'] + else ['terraform', 'destroy', '-input=false', '-lock-timeout=5m', '-auto-approve'] ] for cmd in cmds: proc = subprocess.run(cmd, cwd=directory, capture_output=True, text=True, timeout=1800) if proc.returncode != 0: - raise Exception(f"Terraform {cmd[1]} failed: {proc.stderr}") + stderr = proc.stderr.strip() or proc.stdout.strip() + raise RuntimeError(f"Terraform {cmd[1]} failed: {stderr}") return True -def provision_infrastructure(name: str, cloud_provider: str, config: dict): - context = { - 'aws_region': config.get('aws_region', settings.AWS_REGION), - 'cluster_name': name, - 'eks_role_arn': config.get('eks_role_arn', 'arn:aws:iam::123456789012:role/EKSRole'), - 'state_bucket': config.get('state_bucket', settings.TERRAFORM_STATE_BUCKET), - 'lock_table': config.get('lock_table', settings.TERRAFORM_LOCK_TABLE), +def _validate_cidr(value: str, field_name: str) -> str: + try: + ipaddress.ip_network(value) + except ValueError as exc: + raise ValueError(f"{field_name} must be a valid CIDR block") from exc + return value + + +def _string_list(value: Any, field_name: str, default: list[str]) -> list[str]: + if value is None: + return default + if not isinstance(value, list) or not value or not all(isinstance(item, str) and item for item in value): + raise ValueError(f"{field_name} must be a non-empty list of strings") + return value + + +def _build_context(name: str, config: dict[str, Any]) -> dict[str, Any]: + aws_region = config.get("aws_region", settings.AWS_REGION) + if not AWS_REGION_RE.match(aws_region): + raise ValueError("aws_region must look like a valid AWS region, for example us-east-1") + + eks_role_arn = config.get("eks_role_arn") + if not eks_role_arn or not IAM_ROLE_ARN_RE.match(eks_role_arn): + raise ValueError("eks_role_arn is required and must be a valid IAM role ARN") + node_role_arn = config.get("node_role_arn") + if not node_role_arn or not IAM_ROLE_ARN_RE.match(node_role_arn): + raise ValueError("node_role_arn is required and must be a valid IAM role ARN") + + state_bucket = config.get("state_bucket", settings.TERRAFORM_STATE_BUCKET) + lock_table = config.get("lock_table", settings.TERRAFORM_LOCK_TABLE) + if state_bucket == "replace-me-terraform-state" or lock_table == "replace-me-terraform-locks": + raise ValueError("Terraform backend state_bucket and lock_table must be configured") + + public_subnet_cidrs = _string_list( + config.get("public_subnet_cidrs"), + "public_subnet_cidrs", + ["10.0.0.0/20", "10.0.16.0/20"], + ) + private_subnet_cidrs = _string_list( + config.get("private_subnet_cidrs"), + "private_subnet_cidrs", + ["10.0.128.0/20", "10.0.144.0/20"], + ) + if len(public_subnet_cidrs) < 2 or len(private_subnet_cidrs) < 2: + raise ValueError("EKS requires at least two public and two private subnet CIDRs") + + node_min_size = int(config.get("node_min_size", 1)) + node_desired_size = int(config.get("node_desired_size", 2)) + node_max_size = int(config.get("node_max_size", 4)) + if not node_min_size <= node_desired_size <= node_max_size: + raise ValueError("node sizing must satisfy node_min_size <= node_desired_size <= node_max_size") + + custom_tags = config.get("tags", {}) + tags_are_strings = all(isinstance(k, str) and isinstance(v, str) for k, v in custom_tags.items()) + if not isinstance(custom_tags, dict) or not tags_are_strings: + raise ValueError("tags must be an object with string keys and values") + + tags = {**DEFAULT_TAGS, **custom_tags} + return { + "aws_region": aws_region, + "cluster_name": name, + "cluster_version": config.get("cluster_version", DEFAULT_CLUSTER_VERSION), + "eks_role_arn": eks_role_arn, + "node_role_arn": node_role_arn, + "state_bucket": state_bucket, + "lock_table": lock_table, + "vpc_cidr": _validate_cidr(config.get("vpc_cidr", "10.0.0.0/16"), "vpc_cidr"), + "public_subnet_cidrs": [_validate_cidr(cidr, "public_subnet_cidrs") for cidr in public_subnet_cidrs], + "private_subnet_cidrs": [_validate_cidr(cidr, "private_subnet_cidrs") for cidr in private_subnet_cidrs], + "endpoint_public_access": bool(config.get("endpoint_public_access", False)), + "endpoint_private_access": bool(config.get("endpoint_private_access", True)), + "enabled_cluster_log_types": _string_list( + config.get("enabled_cluster_log_types"), + "enabled_cluster_log_types", + ["api", "audit", "authenticator"], + ), + "node_instance_types": _string_list( + config.get("node_instance_types"), + "node_instance_types", + DEFAULT_NODE_INSTANCE_TYPES, + ), + "node_desired_size": node_desired_size, + "node_min_size": node_min_size, + "node_max_size": node_max_size, + "tags": tags, } + + +def validate_infrastructure_config(name: str, cloud_provider: str, config: dict[str, Any]) -> str | None: + if cloud_provider != "aws": + return "Unsupported cloud provider" + try: + _build_context(name, config) + except ValueError as exc: + return str(exc) + return None + + +def provision_infrastructure(name: str, cloud_provider: str, config: dict): + if cloud_provider != "aws": + return "Unsupported cloud provider" + try: + context = _build_context(name, config) + except ValueError as exc: + return str(exc) with tempfile.TemporaryDirectory() as tmpdir: tf_path = os.path.join(tmpdir, 'main.tf') with open(tf_path, 'w') as f: @@ -50,13 +165,12 @@ def provision_infrastructure(name: str, cloud_provider: str, config: dict): def destroy_infrastructure(name: str, cloud_provider: str, config: dict): - context = { - 'aws_region': config.get('aws_region', settings.AWS_REGION), - 'cluster_name': name, - 'eks_role_arn': config.get('eks_role_arn', 'arn:aws:iam::123456789012:role/EKSRole'), - 'state_bucket': config.get('state_bucket', settings.TERRAFORM_STATE_BUCKET), - 'lock_table': config.get('lock_table', settings.TERRAFORM_LOCK_TABLE), - } + if cloud_provider != "aws": + return "Unsupported cloud provider" + try: + context = _build_context(name, config) + except ValueError as exc: + return str(exc) with tempfile.TemporaryDirectory() as tmpdir: tf_path = os.path.join(tmpdir, 'main.tf') with open(tf_path, 'w') as f: diff --git a/services/infra_worker.py b/services/infra_worker.py new file mode 100644 index 0000000..0395e0b --- /dev/null +++ b/services/infra_worker.py @@ -0,0 +1,57 @@ +import logging +import signal +from typing import Any + +from redis.exceptions import RedisError + +from app.config import settings +from app.logger import setup_logging +from services.infra_jobs import destroy_infrastructure_job, provision_infrastructure_job +from services.infra_queue import InfrastructureQueueError, _redis_client, decode_job + +logger = logging.getLogger(__name__) +_shutdown_requested = False + + +def _handle_shutdown(signum: int, frame: Any) -> None: + global _shutdown_requested + _shutdown_requested = True + logger.info("Terraform worker shutdown requested") + + +def process_job(payload: dict) -> None: + action = payload["action"] + infrastructure_id = payload["infrastructure_id"] + logger.info("Processing infrastructure job %s for id %s", action, infrastructure_id) + if action == "provision": + provision_infrastructure_job(infrastructure_id) + elif action == "destroy": + destroy_infrastructure_job(infrastructure_id) + else: + raise InfrastructureQueueError("Unsupported infrastructure job action") + + +def run_worker() -> None: + setup_logging() + signal.signal(signal.SIGTERM, _handle_shutdown) + signal.signal(signal.SIGINT, _handle_shutdown) + client = _redis_client() + logger.info("Terraform worker listening on Redis queue '%s'", settings.TERRAFORM_JOB_QUEUE_NAME) + + while not _shutdown_requested: + try: + item = client.blpop(settings.TERRAFORM_JOB_QUEUE_NAME, timeout=5) + if item is None: + continue + _, raw_payload = item + process_job(decode_job(raw_payload)) + except InfrastructureQueueError: + logger.exception("Invalid infrastructure job payload") + except RedisError: + logger.exception("Redis error while reading infrastructure job queue") + except Exception: + logger.exception("Unhandled infrastructure worker error") + + +if __name__ == "__main__": + run_worker() diff --git a/terraform/main.tf.j2 b/terraform/main.tf.j2 index a832f61..1b69295 100644 --- a/terraform/main.tf.j2 +++ b/terraform/main.tf.j2 @@ -1,43 +1,169 @@ - terraform { + required_version = ">= 1.6.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } + backend "s3" { - bucket = "{{ state_bucket }}" - key = "idp/{{ cluster_name }}/terraform.tfstate" - region = "{{ aws_region }}" - encrypt = true + bucket = "{{ state_bucket }}" + key = "idp/{{ cluster_name }}/terraform.tfstate" + region = "{{ aws_region }}" + encrypt = true dynamodb_table = "{{ lock_table }}" } } provider "aws" { region = "{{ aws_region }}" + + default_tags { + tags = { +{% for key, value in tags.items() %} + "{{ key }}" = "{{ value }}" +{% endfor %} + } + } +} + +data "aws_availability_zones" "available" { + state = "available" } resource "aws_vpc" "main" { - cidr_block = "10.0.0.0/16" + cidr_block = "{{ vpc_cidr }}" + enable_dns_hostnames = true + enable_dns_support = true + + tags = { + Name = "{{ cluster_name }}-vpc" + } +} + +resource "aws_internet_gateway" "main" { + vpc_id = aws_vpc.main.id + + tags = { + Name = "{{ cluster_name }}-igw" + } } -resource "aws_subnet" "main" { - vpc_id = aws_vpc.main.id - cidr_block = "10.0.1.0/24" +resource "aws_subnet" "public" { + count = {{ public_subnet_cidrs | length }} + vpc_id = aws_vpc.main.id + cidr_block = local.public_subnet_cidrs[count.index] + availability_zone = data.aws_availability_zones.available.names[count.index] + map_public_ip_on_launch = true + + tags = { + Name = "{{ cluster_name }}-public-${count.index + 1}" + "kubernetes.io/role/elb" = "1" + "kubernetes.io/cluster/{{ cluster_name }}" = "shared" + } +} + +resource "aws_subnet" "private" { + count = {{ private_subnet_cidrs | length }} + vpc_id = aws_vpc.main.id + cidr_block = local.private_subnet_cidrs[count.index] + availability_zone = data.aws_availability_zones.available.names[count.index] + + tags = { + Name = "{{ cluster_name }}-private-${count.index + 1}" + "kubernetes.io/role/internal-elb" = "1" + "kubernetes.io/cluster/{{ cluster_name }}" = "shared" + } +} + +resource "aws_route_table" "public" { + vpc_id = aws_vpc.main.id + + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.main.id + } + + tags = { + Name = "{{ cluster_name }}-public" + } +} + +resource "aws_route_table_association" "public" { + count = length(aws_subnet.public) + subnet_id = aws_subnet.public[count.index].id + route_table_id = aws_route_table.public.id } resource "aws_security_group" "main" { name_prefix = "{{ cluster_name }}-eks-" - vpc_id = aws_vpc.main.id + description = "EKS control plane security group for {{ cluster_name }}" + vpc_id = aws_vpc.main.id + egress { from_port = 0 to_port = 0 protocol = "-1" cidr_blocks = ["0.0.0.0/0"] } + + tags = { + Name = "{{ cluster_name }}-eks-control-plane" + } } resource "aws_eks_cluster" "main" { - name = "{{ cluster_name }}" - role_arn = "{{ eks_role_arn }}" + name = "{{ cluster_name }}" + role_arn = "{{ eks_role_arn }}" + version = "{{ cluster_version }}" + enabled_cluster_log_types = {{ enabled_cluster_log_types | tojson }} + vpc_config { - subnet_ids = [aws_subnet.main.id] - security_group_ids = [aws_security_group.main.id] + subnet_ids = concat(aws_subnet.public[*].id, aws_subnet.private[*].id) + security_group_ids = [aws_security_group.main.id] + endpoint_public_access = {{ endpoint_public_access | lower }} + endpoint_private_access = {{ endpoint_private_access | lower }} + } + + tags = { + Name = "{{ cluster_name }}" } } + +resource "aws_eks_node_group" "main" { + cluster_name = aws_eks_cluster.main.name + node_group_name = "{{ cluster_name }}-default" + node_role_arn = "{{ node_role_arn }}" + subnet_ids = aws_subnet.private[*].id + instance_types = {{ node_instance_types | tojson }} + + scaling_config { + desired_size = {{ node_desired_size }} + min_size = {{ node_min_size }} + max_size = {{ node_max_size }} + } + + update_config { + max_unavailable = 1 + } + + tags = { + Name = "{{ cluster_name }}-default-node-group" + } +} + +locals { + public_subnet_cidrs = {{ public_subnet_cidrs | tojson }} + private_subnet_cidrs = {{ private_subnet_cidrs | tojson }} +} + +output "cluster_name" { + value = aws_eks_cluster.main.name +} + +output "cluster_endpoint" { + value = aws_eks_cluster.main.endpoint + sensitive = true +} diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index cc172a5..e09b853 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -10,3 +10,8 @@ def test_login(client): response = client.post("/auth/login", json={"username": "testuser", "password": "testpass123"}) assert response.status_code == 200 assert "access_token" in response.json() + + +def test_register_rejects_invalid_username(client): + response = client.post("/auth/register", json={"username": "../bad", "password": "testpass123"}) + assert response.status_code == 422 diff --git a/tests/unit/test_health.py b/tests/unit/test_health.py index fade712..357d146 100644 --- a/tests/unit/test_health.py +++ b/tests/unit/test_health.py @@ -2,3 +2,5 @@ def test_health(client): response = client.get("/healthz") assert response.status_code == 200 assert response.json()["status"] == "ok" + assert response.headers["x-content-type-options"] == "nosniff" + assert response.headers["x-frame-options"] == "DENY" diff --git a/tests/unit/test_infra_service.py b/tests/unit/test_infra_service.py new file mode 100644 index 0000000..e98f977 --- /dev/null +++ b/tests/unit/test_infra_service.py @@ -0,0 +1,44 @@ +from services.infra_service import ( + _build_context, + provision_infrastructure, + render_terraform_config, + validate_infrastructure_config, +) +from services.infra_queue import decode_job + + +VALID_CONFIG = { + "aws_region": "us-east-1", + "eks_role_arn": "arn:aws:iam::123456789012:role/EKSClusterRole", + "node_role_arn": "arn:aws:iam::123456789012:role/EKSNodeRole", + "state_bucket": "company-terraform-state", + "lock_table": "company-terraform-locks", +} + + +def test_infra_requires_real_backend_and_role(): + result = provision_infrastructure("platform-dev", "aws", {"aws_region": "us-east-1"}) + assert "eks_role_arn is required" in result + + +def test_infra_validation_catches_bad_requests_before_queueing(): + result = validate_infrastructure_config("platform-dev", "aws", {"aws_region": "us-east-1"}) + assert "eks_role_arn is required" in result + + +def test_rendered_terraform_has_professional_eks_defaults(): + context = _build_context("platform-dev", VALID_CONFIG) + rendered = render_terraform_config(context) + + assert 'required_version = ">= 1.6.0"' in rendered + assert 'resource "aws_eks_node_group" "main"' in rendered + assert "endpoint_public_access = false" in rendered + assert 'enabled_cluster_log_types = ["api", "audit", "authenticator"]' in rendered + assert 'node_role_arn = "arn:aws:iam::123456789012:role/EKSNodeRole"' in rendered + assert "aws_subnet.private" in rendered + + +def test_decode_infrastructure_queue_job(): + payload = decode_job(b'{"action":"provision","infrastructure_id":42}') + assert payload["action"] == "provision" + assert payload["infrastructure_id"] == 42