From 73eef48cb3a55a3148ae6ed5f01925c70f3f31c6 Mon Sep 17 00:00:00 2001 From: tutorial Date: Wed, 13 May 2026 22:22:49 +0200 Subject: [PATCH] Add cloud infrastructure provisioning API --- .env.example | 15 ++ .github/workflows/ci-cd.yaml | 41 ++++ .gitignore | 7 + Dockerfile | 18 ++ README.md | 225 ++++++++++++++++-- api/routes/auth.py | 33 +++ api/routes/deployments.py | 59 +++++ api/routes/infrastructure.py | 69 ++++++ api/routes/kubernetes.py | 59 +++++ api/routes/monitoring.py | 19 ++ api/schemas.py | 105 ++++++++ app/config.py | 34 +++ app/logger.py | 9 + app/main.py | 57 +++++ auth/jwt_utils.py | 30 +++ auth/rate_limit.py | 28 +++ auth/rbac.py | 31 +++ database/models.py | 42 ++++ database/session.py | 22 ++ helm/charts/README.md | 14 ++ helm/charts/idp-api/Chart.yaml | 4 + helm/charts/idp-api/templates/_helpers.tpl | 11 + helm/charts/idp-api/templates/configmap.yaml | 10 + helm/charts/idp-api/templates/deployment.yaml | 44 ++++ helm/charts/idp-api/templates/hpa.yaml | 20 ++ helm/charts/idp-api/templates/ingress.yaml | 27 +++ helm/charts/idp-api/templates/secret.yaml | 9 + helm/charts/idp-api/templates/service.yaml | 13 + .../idp-api/templates/serviceaccount.yaml | 4 + helm/charts/idp-api/values.yaml | 39 +++ kubernetes/client.py | 18 ++ kubernetes/network-policy.yaml | 25 ++ kubernetes/rbac.yaml | 29 +++ monitoring/grafana-dashboard.json | 24 ++ monitoring/prometheus-scrape-config.yaml | 15 ++ requirements.txt | 16 ++ scripts/bootstrap.sh | 15 ++ scripts/migrate_db.sh | 11 + scripts/prod_checklist.md | 44 ++++ scripts/setup_env.sh | 4 + services/autoscaling_service.py | 40 ++++ services/deployment_service.py | 198 +++++++++++++++ services/infra_service.py | 68 ++++++ services/ingress_service.py | 51 ++++ services/k8s_service.py | 29 +++ services/kubernetes_client.py | 20 ++ services/monitoring_service.py | 31 +++ services/service_service.py | 35 +++ setup.cfg | 4 + terraform/main.tf.j2 | 43 ++++ tests/conftest.py | 14 ++ tests/unit/test_auth.py | 12 + tests/unit/test_health.py | 4 + tests/unit/test_namespace.py | 10 + 54 files changed, 1837 insertions(+), 21 deletions(-) create mode 100644 .env.example create mode 100644 .github/workflows/ci-cd.yaml create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 api/routes/auth.py create mode 100644 api/routes/deployments.py create mode 100644 api/routes/infrastructure.py create mode 100644 api/routes/kubernetes.py create mode 100644 api/routes/monitoring.py create mode 100644 api/schemas.py create mode 100644 app/config.py create mode 100644 app/logger.py create mode 100644 app/main.py create mode 100644 auth/jwt_utils.py create mode 100644 auth/rate_limit.py create mode 100644 auth/rbac.py create mode 100644 database/models.py create mode 100644 database/session.py create mode 100644 helm/charts/README.md create mode 100644 helm/charts/idp-api/Chart.yaml create mode 100644 helm/charts/idp-api/templates/_helpers.tpl create mode 100644 helm/charts/idp-api/templates/configmap.yaml create mode 100644 helm/charts/idp-api/templates/deployment.yaml create mode 100644 helm/charts/idp-api/templates/hpa.yaml create mode 100644 helm/charts/idp-api/templates/ingress.yaml create mode 100644 helm/charts/idp-api/templates/secret.yaml create mode 100644 helm/charts/idp-api/templates/service.yaml create mode 100644 helm/charts/idp-api/templates/serviceaccount.yaml create mode 100644 helm/charts/idp-api/values.yaml create mode 100644 kubernetes/client.py create mode 100644 kubernetes/network-policy.yaml create mode 100644 kubernetes/rbac.yaml create mode 100644 monitoring/grafana-dashboard.json create mode 100644 monitoring/prometheus-scrape-config.yaml create mode 100644 requirements.txt create mode 100644 scripts/bootstrap.sh create mode 100644 scripts/migrate_db.sh create mode 100644 scripts/prod_checklist.md create mode 100644 scripts/setup_env.sh create mode 100644 services/autoscaling_service.py create mode 100644 services/deployment_service.py create mode 100644 services/infra_service.py create mode 100644 services/ingress_service.py create mode 100644 services/k8s_service.py create mode 100644 services/kubernetes_client.py create mode 100644 services/monitoring_service.py create mode 100644 services/service_service.py create mode 100644 setup.cfg create mode 100644 terraform/main.tf.j2 create mode 100644 tests/conftest.py create mode 100644 tests/unit/test_auth.py create mode 100644 tests/unit/test_health.py create mode 100644 tests/unit/test_namespace.py diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..f820780 --- /dev/null +++ b/.env.example @@ -0,0 +1,15 @@ +ENVIRONMENT=local +APP_DEBUG=true +DATABASE_URL=sqlite:///./idp.db +REDIS_URL=redis://localhost:6379/0 +SECRET_KEY=replace-with-a-long-random-secret +JWT_ALGORITHM=HS256 +ACCESS_TOKEN_EXPIRE_MINUTES=60 +AWS_REGION=us-east-1 +DEFAULT_INGRESS_DOMAIN=apps.local +KUBERNETES_NAMESPACE_PREFIX=tenant +KUBERNETES_DRY_RUN=true +TERRAFORM_DRY_RUN=true +TERRAFORM_STATE_BUCKET=replace-me-terraform-state +TERRAFORM_LOCK_TABLE=replace-me-terraform-locks +RATE_LIMIT_REQUESTS_PER_HOUR=100 diff --git a/.github/workflows/ci-cd.yaml b/.github/workflows/ci-cd.yaml new file mode 100644 index 0000000..2394d86 --- /dev/null +++ b/.github/workflows/ci-cd.yaml @@ -0,0 +1,41 @@ +name: CI/CD Pipeline + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + runs-on: ubuntu-latest + env: + DATABASE_URL: sqlite:///./test-idp.db + SECRET_KEY: ci-test-secret + KUBERNETES_DRY_RUN: "true" + TERRAFORM_DRY_RUN: "true" + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Lint + run: | + pip install flake8 + flake8 app api auth database services tests --max-line-length=120 + - name: Run tests + run: | + pytest + - name: Build Docker image + run: | + docker build -t idp-api:${{ github.sha }} . + - name: Push Docker image (optional) + if: github.ref == 'refs/heads/main' + run: echo "Configure registry login and docker push here" + - name: Deploy to Kubernetes (optional) + run: echo "Deploy with Helm or ArgoCD after registry configuration" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6b5af3f --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.venv/ +__pycache__/ +*.py[cod] +*.db +.pytest_cache/ +.env +.history/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8596f91 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.11-slim + +# Create non-root user +RUN useradd -m appuser +WORKDIR /app + +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt \ + && rm -rf /root/.cache + +COPY . . + +# Change ownership and permissions +RUN chown -R appuser:appuser /app +USER appuser + +ENV PYTHONUNBUFFERED=1 +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/README.md b/README.md index b07f75b..72b9911 100644 --- a/README.md +++ b/README.md @@ -1,42 +1,225 @@ -# Single Application Cluster Setup in Kubernetes +# Cloud Infrastructure Provisioning API -This guide outlines the process for deploying a single application within a Kubernetes cluster, from cluster creation to application access. +This project is an Internal Developer Platform API built on FastAPI, Kubernetes, Terraform, PostgreSQL/SQLite, Redis, Helm, Prometheus, Grafana, and JWT authentication. It extends the original Kubernetes manifests into a mini Heroku/Render/Railway-style platform for provisioning infrastructure and deploying containerized applications through REST APIs. +## Architecture -## Step 1: Write a Kubernetes Deployment YAML File +The API receives authenticated platform requests, validates input, stores metadata in the database, and orchestrates Kubernetes or Terraform operations through service-layer modules. -Define the application's deployment on Kubernetes: +Request flow for application deployment: -- Draft a YAML file (`deployment-file.yaml`) that specifies the deployment details, including the Docker image, ports, environment variables, and other configurations. +1. User authenticates with JWT. +2. API validates Docker image, namespace, port, replica, ingress, and autoscaling inputs. +3. A deployment row is created in the database. +4. Kubernetes service layer creates namespace, Deployment, Service, Ingress, and HPA. +5. Deployment status, URL, autoscaling settings, and errors are persisted. +6. Users query deployment status, logs, metrics, and cluster health through API endpoints. -## Step 2: Create a Kubernetes Deployment +## Project Structure -Deploy the application using `kubectl`: +```text +app/ FastAPI app, configuration, logging +api/ Route handlers and Pydantic schemas +auth/ JWT, RBAC, rate limiting +database/ SQLAlchemy models and session lifecycle +services/ Kubernetes, Terraform, deployment, monitoring logic +kubernetes/ Cluster RBAC and network policy examples +terraform/ AWS Terraform templates +helm/ Helm chart for the API itself +monitoring/ Prometheus and Grafana examples +scripts/ Bootstrap, migration, production checklist helpers +tests/ Unit tests +``` -- Run `kubectl apply -f ` to deploy the application, replacing `` with the path to the YAML file. +## API Endpoints -## Step 3: Expose The Deployment as a Service +Auth: -Make the deployment accessible: +- `POST /auth/register` +- `POST /auth/login` +- `GET /auth/me` -- Expose the deployment by executing: +Infrastructure: -Replace `` and `` with specific details to create a LoadBalancer service for external traffic. +- `POST /infrastructure/create` +- `GET /infrastructure/{id}` +- `DELETE /infrastructure/{id}` -## Step 4: Verify the Deployment +Deployments: -Check the status of the deployment: +- `POST /deployments` +- `GET /deployments/{id}` +- `DELETE /deployments/{id}` -- Confirm the application's pod is running with `kubectl get pods`. -- Check the service creation and the external IP assignment with `kubectl get services`. +Kubernetes: -## Step 5: Access The Application +- `POST /namespace/create` +- `POST /service/expose` +- `POST /autoscaling/create` +- `POST /kubernetes/ingress/create` -Visit the deployed application: +Monitoring: -- Use a web browser to navigate to the external IP address assigned to the service and access the application. +- `GET /cluster/health` +- `GET /metrics` +- `GET /logs/{pod}?namespace=default` -Following these steps will get the application deployed and accessible within a Kubernetes cluster. +Swagger/OpenAPI is available at `/docs`. +## Local Development -Screenshot 2023-04-02 at 20 16 24 +Create an environment file: + +```bash +cp .env.example .env +``` + +For local development without a Kubernetes cluster or Terraform credentials, keep: + +```bash +KUBERNETES_DRY_RUN=true +TERRAFORM_DRY_RUN=true +DATABASE_URL=sqlite:///./idp.db +``` + +Install dependencies and run the API: + +```bash +python3 -m venv .venv +./.venv/bin/pip install -r requirements.txt +./.venv/bin/uvicorn app.main:app --reload +``` + +Register and log in: + +```bash +curl -X POST http://localhost:8000/auth/register \ + -H "Content-Type: application/json" \ + -d '{"username":"platform-user","password":"change-me-123"}' + +TOKEN=$(curl -s -X POST http://localhost:8000/auth/login \ + -H "Content-Type: application/json" \ + -d '{"username":"platform-user","password":"change-me-123"}' | jq -r .access_token) +``` + +Deploy an application: + +```bash +curl -X POST http://localhost:8000/deployments \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "demo-api", + "image": "nginx:1.25", + "port": 80, + "replicas": 2, + "min_replicas": 1, + "max_replicas": 5, + "cpu_threshold": 70 + }' +``` + +## Terraform + +The infrastructure API renders [terraform/main.tf.j2](terraform/main.tf.j2) and can create or destroy AWS resources. It is dry-run by default. Before enabling real execution: + +- Create an encrypted S3 backend bucket. +- Create a DynamoDB lock table. +- Replace `TERRAFORM_STATE_BUCKET` and `TERRAFORM_LOCK_TABLE`. +- Use IAM roles with least privilege. +- Review generated plans before production use. + +Example: + +```bash +curl -X POST http://localhost:8000/infrastructure/create \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "platform-dev", + "cloud_provider": "aws", + "config": { + "aws_region": "us-east-1", + "eks_role_arn": "arn:aws:iam::123456789012:role/EKSRole" + } + }' +``` + +## Helm Deployment + +Render or install the API chart: + +```bash +helm template idp-api helm/charts/idp-api \ + --set secrets.databaseUrl='postgresql://user:pass@postgres:5432/idp' \ + --set secrets.secretKey='replace-with-long-random-secret' + +helm upgrade --install idp-api helm/charts/idp-api \ + --set image.repository=registry.example.com/idp-api \ + --set image.tag=v1 \ + --set secrets.databaseUrl='postgresql://user:pass@postgres:5432/idp' \ + --set secrets.secretKey='replace-with-long-random-secret' +``` + +## Security + +Implemented: + +- JWT authentication +- Role-aware user model +- Protected infrastructure, deployment, Kubernetes, and monitoring APIs +- Redis-backed rate limiting with local fallback +- Non-root Docker container +- Kubernetes RBAC and network-policy examples +- No hardcoded production secret requirement in Helm + +Recommended before production: + +- Use AWS Secrets Manager, External Secrets Operator, or sealed-secrets. +- Replace SQLite with managed PostgreSQL. +- Use Alembic migrations. +- Run Terraform through a job queue or workflow engine rather than synchronous HTTP requests. +- Enforce tenant-aware namespace ownership. +- Add admission policies with Kyverno or OPA Gatekeeper. +- Use image allowlists and vulnerability scanning. +- Require immutable image digests for production deployments. + +## Observability + +The API exposes Prometheus metrics at `/metrics`. Example scrape configuration and a Grafana dashboard starter live in `monitoring/`. + +Recommended production stack: + +- Prometheus Operator +- Grafana dashboards for API latency, error rate, Kubernetes deployment state, and Terraform failures +- Loki or OpenSearch for structured logs +- Alertmanager alerts for failed provisions, high error rate, and unhealthy clusters + +## CI/CD + +The GitHub Actions workflow installs dependencies, runs linting/tests, and builds the Docker image. Registry push and Kubernetes deployment are intentionally placeholders until registry, cluster, and secret strategy are configured. + +## Implementation Phases + +Phase 1: Architecture and folder structure are represented by the layered app layout. + +Phase 2: FastAPI backend includes auth, validation, database models, OpenAPI, health checks, and rate limiting. + +Phase 3: Kubernetes integration creates namespaces, deployments, services, ingress, HPA, status, logs, and safe deletes. + +Phase 4: Terraform automation renders AWS templates and supports apply/destroy with remote-state configuration. + +Phase 5: Monitoring exposes Prometheus metrics, cluster health, pod logs, and dashboard examples. + +Phase 6: CI/CD builds, lints, tests, and prepares image/deployment stages. + +Phase 7: Production hardening is documented in `scripts/prod_checklist.md` and should be completed before real cloud use. + +## Scaling Recommendations + +- Move long-running deploy/provision tasks to Celery, RQ, Temporal, or Argo Workflows. +- Add per-tenant quotas for namespaces, replicas, CPU, memory, and load balancers. +- Use GitOps with ArgoCD for reconciliation and auditability. +- Split API, worker, scheduler, and webhook receiver into separate deployments. +- Use PostgreSQL row-level ownership checks and explicit tenant IDs. +- Add blue/green and canary deployment strategies with Argo Rollouts or Flagger. diff --git a/api/routes/auth.py b/api/routes/auth.py new file mode 100644 index 0000000..55f9143 --- /dev/null +++ b/api/routes/auth.py @@ -0,0 +1,33 @@ +from fastapi import APIRouter, HTTPException, Depends +from sqlalchemy.orm import Session +from database.models import User +from auth.jwt_utils import get_password_hash, verify_password, create_access_token +from auth.rbac import get_current_user +from database.session import get_db +from api.schemas import LoginRequest, RegisterRequest, TokenResponse + +router = APIRouter() + +@router.post("/register") +def register(request: RegisterRequest, db: Session = Depends(get_db)): + user = db.query(User).filter(User.username == request.username).first() + if user: + raise HTTPException(status_code=400, detail="Username already registered") + hashed_pw = get_password_hash(request.password) + new_user = User(username=request.username, hashed_password=hashed_pw) + db.add(new_user) + db.commit() + db.refresh(new_user) + return {"id": new_user.id, "username": new_user.username, "role": new_user.role} + +@router.post("/login", response_model=TokenResponse) +def login(request: LoginRequest, db: Session = Depends(get_db)): + user = db.query(User).filter(User.username == request.username).first() + if not user or not verify_password(request.password, user.hashed_password): + raise HTTPException(status_code=401, detail="Invalid credentials") + token = create_access_token({"sub": str(user.id), "role": user.role}) + return {"access_token": token, "token_type": "bearer"} + +@router.get("/me") +def get_me(current_user=Depends(get_current_user)): + return {"id": current_user.id, "username": current_user.username, "role": current_user.role} diff --git a/api/routes/deployments.py b/api/routes/deployments.py new file mode 100644 index 0000000..5fe5917 --- /dev/null +++ b/api/routes/deployments.py @@ -0,0 +1,59 @@ +from fastapi import APIRouter, Depends, HTTPException +from sqlalchemy.orm import Session + +from api.schemas import DeploymentCreateRequest, DeploymentResponse +from auth.rbac import get_current_user +from database.models import Deployment, User +from database.session import get_db +from services.deployment_service import ( + delete_kubernetes_deployment, + get_kubernetes_deployment_status, + provision_application, +) + +router = APIRouter() + +@router.post("", response_model=DeploymentResponse, status_code=201) +def create_deployment_route( + request: DeploymentCreateRequest, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user), +): + return provision_application(db, current_user, request) + +@router.get("/{id}", response_model=DeploymentResponse) +def get_deployment( + id: int, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user), +): + deployment = db.query(Deployment).filter(Deployment.id == id, Deployment.owner_id == current_user.id).first() + if not deployment: + raise HTTPException(status_code=404, detail="Deployment not found") + if deployment.status == "running": + try: + deployment.metadata_json = { + **(deployment.metadata_json or {}), + "kubernetes_status": get_kubernetes_deployment_status(deployment.namespace, deployment.name), + } + db.add(deployment) + db.commit() + db.refresh(deployment) + except Exception as exc: + deployment.metadata_json = {**(deployment.metadata_json or {}), "status_error": str(exc)} + return deployment + +@router.delete("/{id}") +def delete_deployment( + id: int, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user), +): + deployment = db.query(Deployment).filter(Deployment.id == id, Deployment.owner_id == current_user.id).first() + if not deployment: + raise HTTPException(status_code=404, detail="Deployment not found") + delete_kubernetes_deployment(deployment.namespace, deployment.name) + deployment.status = "deleted" + db.add(deployment) + db.commit() + return {"id": id, "status": "deleted"} diff --git a/api/routes/infrastructure.py b/api/routes/infrastructure.py new file mode 100644 index 0000000..b367be2 --- /dev/null +++ b/api/routes/infrastructure.py @@ -0,0 +1,69 @@ +from fastapi import APIRouter, Depends, HTTPException +from sqlalchemy.orm import Session + +from api.schemas import InfrastructureCreateRequest, InfrastructureResponse +from auth.rbac import get_current_user +from database.models import Infrastructure, User +from database.session import get_db +from services.infra_service import provision_infrastructure, destroy_infrastructure + +router = APIRouter() + +@router.post("/create", response_model=InfrastructureResponse, status_code=201) +def create_infrastructure( + request: InfrastructureCreateRequest, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user), +): + infra = Infrastructure( + owner_id=current_user.id, + name=request.name, + cloud_provider=request.cloud_provider, + config=request.config, + status="provisioning", + ) + db.add(infra) + db.commit() + db.refresh(infra) + result = provision_infrastructure(request.name, request.cloud_provider, request.config) + if result is True: + infra.status = "ready" + else: + infra.status = "failed" + infra.last_error = str(result) + db.add(infra) + db.commit() + db.refresh(infra) + return infra + +@router.delete("/{id}") +def delete_infrastructure( + id: int, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user), +): + infra = db.query(Infrastructure).filter(Infrastructure.id == id, Infrastructure.owner_id == current_user.id).first() + if not infra: + raise HTTPException(status_code=404, detail="Infrastructure not found") + result = destroy_infrastructure(infra.name, infra.cloud_provider, infra.config or {}) + if result is True: + infra.status = "deleted" + db.add(infra) + db.commit() + return {"id": id, "status": "deleted"} + infra.status = "delete_failed" + infra.last_error = str(result) + db.add(infra) + db.commit() + raise HTTPException(status_code=500, detail=str(result)) + +@router.get("/{id}", response_model=InfrastructureResponse) +def get_infrastructure( + id: int, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user), +): + infra = db.query(Infrastructure).filter(Infrastructure.id == id, Infrastructure.owner_id == current_user.id).first() + if not infra: + raise HTTPException(status_code=404, detail="Infrastructure not found") + return infra diff --git a/api/routes/kubernetes.py b/api/routes/kubernetes.py new file mode 100644 index 0000000..203b809 --- /dev/null +++ b/api/routes/kubernetes.py @@ -0,0 +1,59 @@ +from fastapi import APIRouter, Depends, HTTPException + +from api.schemas import AutoscalingRequest, IngressRequest, NamespaceRequest, ServiceExposeRequest +from auth.rbac import get_current_user +from services.autoscaling_service import create_hpa +from services.ingress_service import create_ingress +from services.k8s_service import create_namespace +from services.service_service import expose_service + +router = APIRouter() + +@router.post("/namespace/create") +def create_namespace_route(request: NamespaceRequest, current_user=Depends(get_current_user)): + success = create_namespace(request.name) + if success: + return {"msg": f"Namespace '{request.name}' created."} + raise HTTPException(status_code=500, detail="Failed to create namespace.") + +@router.post("/service/expose") +def expose_service_route(request: ServiceExposeRequest, current_user=Depends(get_current_user)): + success = expose_service( + namespace=request.namespace, + name=request.name, + port=request.port, + target_port=request.target_port, + type_=request.type + ) + if success: + return {"msg": f"Service '{request.name}' exposed in namespace '{request.namespace}'."} + raise HTTPException(status_code=500, detail="Failed to expose service.") + +@router.post("/autoscaling/create") +def create_autoscaling_route(request: AutoscalingRequest, current_user=Depends(get_current_user)): + success = create_hpa( + namespace=request.namespace, + deployment=request.deployment, + min_replicas=request.min_replicas, + max_replicas=request.max_replicas, + cpu_threshold=request.cpu_threshold + ) + if success: + return {"msg": f"Autoscaling for {request.namespace}/{request.deployment} configured."} + raise HTTPException(status_code=500, detail="Failed to configure autoscaling.") + + +@router.post("/ingress/create") +def create_ingress_route(request: IngressRequest, current_user=Depends(get_current_user)): + success = create_ingress( + namespace=request.namespace, + name=request.name, + service_name=request.service_name, + service_port=request.service_port, + host=request.host + ) + if success: + return { + "msg": f"Ingress '{request.name}' created for service '{request.service_name}' on host '{request.host}'." + } + raise HTTPException(status_code=500, detail="Failed to create ingress.") diff --git a/api/routes/monitoring.py b/api/routes/monitoring.py new file mode 100644 index 0000000..ca2b4ed --- /dev/null +++ b/api/routes/monitoring.py @@ -0,0 +1,19 @@ +from fastapi import APIRouter, Depends, Query +from services.monitoring_service import get_cluster_health, get_pod_logs +from fastapi import Response +from prometheus_client import generate_latest, CONTENT_TYPE_LATEST +from auth.rbac import get_current_user + +router = APIRouter() + +@router.get("/cluster/health") +def cluster_health(current_user=Depends(get_current_user)): + return get_cluster_health() + +@router.get("/metrics") +def metrics(current_user=Depends(get_current_user)): + return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST) + +@router.get("/logs/{pod}") +def logs(pod: str, namespace: str = Query("default"), current_user=Depends(get_current_user)): + return get_pod_logs(namespace, pod) diff --git a/api/schemas.py b/api/schemas.py new file mode 100644 index 0000000..882e19c --- /dev/null +++ b/api/schemas.py @@ -0,0 +1,105 @@ +from datetime import datetime +from typing import Any, Dict, Optional + +from pydantic import BaseModel, Field, validator + + +class TokenResponse(BaseModel): + access_token: str + token_type: str = "bearer" + + +class RegisterRequest(BaseModel): + username: str = Field(..., min_length=3, max_length=64) + password: str = Field(..., min_length=8, max_length=128) + + +class LoginRequest(BaseModel): + username: str + password: str + + +class InfrastructureCreateRequest(BaseModel): + name: str = Field(..., min_length=3, max_length=63, regex=r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$") + cloud_provider: str = Field(default="aws", regex=r"^aws$") + config: Dict[str, Any] = Field(default_factory=dict) + + +class InfrastructureResponse(BaseModel): + id: int + name: str + cloud_provider: str + status: str + config: Dict[str, Any] + last_error: Optional[str] = None + created_at: datetime + + class Config: + orm_mode = True + + +class DeploymentCreateRequest(BaseModel): + image: str = Field(..., min_length=3, max_length=255) + name: str = Field(..., min_length=3, max_length=63, regex=r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$") + namespace: Optional[str] = Field(default=None, max_length=63) + port: int = Field(default=80, ge=1, le=65535) + replicas: int = Field(default=1, ge=1, le=20) + min_replicas: int = Field(default=1, ge=1, le=20) + max_replicas: int = Field(default=3, ge=1, le=100) + cpu_threshold: int = Field(default=70, ge=10, le=95) + ingress_host: Optional[str] = Field(default=None, max_length=253) + env: Dict[str, str] = Field(default_factory=dict) + + @validator("max_replicas") + def max_gte_min(cls, value, values): + min_replicas = values.get("min_replicas", 1) + if value < min_replicas: + raise ValueError("max_replicas must be greater than or equal to min_replicas") + return value + + +class DeploymentResponse(BaseModel): + id: int + owner_id: int + name: str + namespace: str + image: str + port: int + replicas: int + ingress_host: Optional[str] = None + url: Optional[str] = None + status: str + metadata_json: Dict[str, Any] = {} + last_error: Optional[str] = None + created_at: datetime + + class Config: + orm_mode = True + + +class NamespaceRequest(BaseModel): + name: str = Field(..., min_length=3, max_length=63, regex=r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$") + + +class ServiceExposeRequest(BaseModel): + namespace: str + name: str + port: int = Field(..., ge=1, le=65535) + target_port: int = Field(..., ge=1, le=65535) + type: str = Field(default="ClusterIP", regex=r"^(ClusterIP|NodePort|LoadBalancer)$") + + +class IngressRequest(BaseModel): + namespace: str + name: str + service_name: str + service_port: int = Field(..., ge=1, le=65535) + host: str + + +class AutoscalingRequest(BaseModel): + namespace: str = "default" + deployment: str + min_replicas: int = Field(..., ge=1) + max_replicas: int = Field(..., ge=1) + cpu_threshold: int = Field(..., ge=10, le=95) diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..b531bb3 --- /dev/null +++ b/app/config.py @@ -0,0 +1,34 @@ +from functools import lru_cache +from pydantic import BaseSettings, Field + +class Settings(BaseSettings): + PROJECT_NAME: str = "Cloud Infrastructure Provisioning API" + VERSION: str = "1.0.0" + ENVIRONMENT: str = Field(default="local", env="ENVIRONMENT") + DEBUG: bool = Field(default=True, env="APP_DEBUG") + DATABASE_URL: str = Field(default="sqlite:///./idp.db", env="DATABASE_URL") + REDIS_URL: str = Field(default="redis://localhost:6379/0", env="REDIS_URL") + SECRET_KEY: str = Field(default="change-me-in-production", env="SECRET_KEY") + JWT_ALGORITHM: str = "HS256" + ACCESS_TOKEN_EXPIRE_MINUTES: int = Field(default=60, env="ACCESS_TOKEN_EXPIRE_MINUTES") + AWS_REGION: str = Field(default="us-east-1", env="AWS_REGION") + DEFAULT_INGRESS_DOMAIN: str = Field(default="apps.local", env="DEFAULT_INGRESS_DOMAIN") + KUBERNETES_NAMESPACE_PREFIX: str = Field(default="tenant", env="KUBERNETES_NAMESPACE_PREFIX") + KUBERNETES_DRY_RUN: bool = Field(default=False, env="KUBERNETES_DRY_RUN") + TERRAFORM_DRY_RUN: bool = Field(default=True, env="TERRAFORM_DRY_RUN") + TERRAFORM_STATE_BUCKET: str = Field(default="replace-me-terraform-state", env="TERRAFORM_STATE_BUCKET") + TERRAFORM_LOCK_TABLE: str = Field(default="replace-me-terraform-locks", env="TERRAFORM_LOCK_TABLE") + RATE_LIMIT_REQUESTS_PER_HOUR: int = Field(default=100, env="RATE_LIMIT_REQUESTS_PER_HOUR") + REQUIRE_AUTH_FOR_PLATFORM_APIS: bool = Field(default=True, env="REQUIRE_AUTH_FOR_PLATFORM_APIS") + + class Config: + env_file = ".env" + case_sensitive = True + + +@lru_cache() +def get_settings() -> Settings: + return Settings() + + +settings = get_settings() diff --git a/app/logger.py b/app/logger.py new file mode 100644 index 0000000..8861b01 --- /dev/null +++ b/app/logger.py @@ -0,0 +1,9 @@ +import logging +import sys + +def setup_logging(): + logging.basicConfig( + level=logging.INFO, + format='{"time":"%(asctime)s","level":"%(levelname)s","logger":"%(name)s","message":"%(message)s"}', + handlers=[logging.StreamHandler(sys.stdout)] + ) diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..e0da8b9 --- /dev/null +++ b/app/main.py @@ -0,0 +1,57 @@ +from fastapi import Depends, FastAPI +from api.routes import auth, infrastructure, deployments, kubernetes, monitoring +from auth.rate_limit import rate_limiter +from app.config import settings +from app.logger import setup_logging +from database.session import init_db + +setup_logging() + + +app = FastAPI( + title=settings.PROJECT_NAME, + version=settings.VERSION, + description=( + "Internal Developer Platform API for infrastructure provisioning " + "and Kubernetes application deployment." + ), +) + + +@app.on_event("startup") +def on_startup(): + init_db() + + +app.include_router(auth.router, prefix="/auth", tags=["auth"], dependencies=[Depends(rate_limiter)]) +app.include_router( + infrastructure.router, + prefix="/infrastructure", + tags=["infrastructure"], + dependencies=[Depends(rate_limiter)], +) +app.include_router( + deployments.router, + prefix="/deployments", + tags=["deployments"], + dependencies=[Depends(rate_limiter)], +) +app.include_router(kubernetes.router, prefix="/kubernetes", tags=["kubernetes"], dependencies=[Depends(rate_limiter)]) +app.include_router(monitoring.router, prefix="/monitoring", tags=["monitoring"], dependencies=[Depends(rate_limiter)]) +app.include_router(kubernetes.router, tags=["kubernetes"], dependencies=[Depends(rate_limiter)]) +app.include_router(monitoring.router, tags=["monitoring"], dependencies=[Depends(rate_limiter)]) + + +@app.get("/healthz") +def health_check(): + return {"status": "ok"} + + +@app.get("/readyz") +def readiness_check(): + return { + "status": "ready", + "environment": settings.ENVIRONMENT, + "kubernetes_dry_run": settings.KUBERNETES_DRY_RUN, + "terraform_dry_run": settings.TERRAFORM_DRY_RUN, + } diff --git a/auth/jwt_utils.py b/auth/jwt_utils.py new file mode 100644 index 0000000..41e838e --- /dev/null +++ b/auth/jwt_utils.py @@ -0,0 +1,30 @@ +from datetime import datetime, timedelta +from jose import JWTError, jwt +from passlib.context import CryptContext +from app.config import settings + +pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") + +SECRET_KEY = settings.SECRET_KEY +ALGORITHM = settings.JWT_ALGORITHM +ACCESS_TOKEN_EXPIRE_MINUTES = settings.ACCESS_TOKEN_EXPIRE_MINUTES + +def verify_password(plain_password, hashed_password): + return pwd_context.verify(plain_password, hashed_password) + +def get_password_hash(password): + return pwd_context.hash(password) + +def create_access_token(data: dict, expires_delta: timedelta = None): + to_encode = data.copy() + expire = datetime.utcnow() + (expires_delta or timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)) + to_encode.update({"exp": expire}) + encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM) + return encoded_jwt + +def decode_access_token(token: str): + try: + payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM]) + return payload + except JWTError: + return None diff --git a/auth/rate_limit.py b/auth/rate_limit.py new file mode 100644 index 0000000..61dd7ac --- /dev/null +++ b/auth/rate_limit.py @@ -0,0 +1,28 @@ +import redis +from fastapi import Request, HTTPException +from app.config import settings + +try: + r = redis.Redis.from_url(settings.REDIS_URL, socket_connect_timeout=0.2, socket_timeout=0.2) +except Exception: + r = None + + +def rate_limiter(request: Request): + if r is None: + return + ip = request.client.host + key = f"rate_limit:{ip}" + try: + count = r.get(key) + if count and int(count) >= settings.RATE_LIMIT_REQUESTS_PER_HOUR: + raise HTTPException(status_code=429, detail="Rate limit exceeded") + pipe = r.pipeline() + pipe.incr(key, 1) + pipe.expire(key, 3600) + pipe.execute() + except HTTPException: + raise + except Exception: + # Keep local development and CI usable if Redis is not available. + return diff --git a/auth/rbac.py b/auth/rbac.py new file mode 100644 index 0000000..0ef0ed5 --- /dev/null +++ b/auth/rbac.py @@ -0,0 +1,31 @@ +from fastapi import Depends, HTTPException +from fastapi.security import OAuth2PasswordBearer +from jose import JWTError, jwt +from sqlalchemy.orm import Session +from app.config import settings +from database.models import User +from database.session import get_db + +oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/auth/login") + +def get_current_user(token: str = Depends(oauth2_scheme), db: Session = Depends(get_db)): + try: + payload = jwt.decode(token, settings.SECRET_KEY, algorithms=[settings.JWT_ALGORITHM]) + user_id: str = payload.get("sub") + if user_id is None: + raise HTTPException(status_code=401, detail="Invalid credentials") + user = db.query(User).filter(User.id == int(user_id)).first() + if not user: + raise HTTPException(status_code=401, detail="Invalid credentials") + return user + except JWTError: + raise HTTPException(status_code=401, detail="Invalid credentials") + except ValueError: + raise HTTPException(status_code=401, detail="Invalid credentials") + +def require_role(required_role: str): + def role_checker(user=Depends(get_current_user)): + if user.role != required_role: + raise HTTPException(status_code=403, detail="Insufficient permissions") + return user + return role_checker diff --git a/database/models.py b/database/models.py new file mode 100644 index 0000000..4bf890a --- /dev/null +++ b/database/models.py @@ -0,0 +1,42 @@ +from sqlalchemy import Column, Integer, String, DateTime, JSON, Text +from sqlalchemy.ext.declarative import declarative_base +import datetime + +Base = declarative_base() + +class User(Base): + __tablename__ = 'users' + id = Column(Integer, primary_key=True, index=True) + username = Column(String, unique=True, index=True, nullable=False) + hashed_password = Column(String, nullable=False) + role = Column(String, default='user', nullable=False) + created_at = Column(DateTime, default=datetime.datetime.utcnow) + +class Deployment(Base): + __tablename__ = 'deployments' + id = Column(Integer, primary_key=True, index=True) + owner_id = Column(Integer, index=True, nullable=False) + name = Column(String, index=True, nullable=False) + namespace = Column(String, index=True, nullable=False) + image = Column(String, nullable=False) + port = Column(Integer, default=80) + replicas = Column(Integer, default=1) + ingress_host = Column(String) + url = Column(String) + status = Column(String, default='pending') + metadata_json = Column(JSON, default=dict) + last_error = Column(Text) + created_at = Column(DateTime, default=datetime.datetime.utcnow) + updated_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow) + +class Infrastructure(Base): + __tablename__ = 'infrastructure' + id = Column(Integer, primary_key=True, index=True) + owner_id = Column(Integer, index=True, nullable=False) + name = Column(String, index=True, nullable=False) + cloud_provider = Column(String, default='aws') + config = Column(JSON, default=dict) + status = Column(String, default='provisioning') + last_error = Column(Text) + created_at = Column(DateTime, default=datetime.datetime.utcnow) + updated_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow) diff --git a/database/session.py b/database/session.py new file mode 100644 index 0000000..e268be1 --- /dev/null +++ b/database/session.py @@ -0,0 +1,22 @@ +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from app.config import settings +from database.models import Base + + +connect_args = {"check_same_thread": False} if settings.DATABASE_URL.startswith("sqlite") else {} +engine = create_engine(settings.DATABASE_URL, pool_pre_ping=True, connect_args=connect_args) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + + +def init_db() -> None: + Base.metadata.create_all(bind=engine) + + +def get_db(): + db = SessionLocal() + try: + yield db + finally: + db.close() diff --git a/helm/charts/README.md b/helm/charts/README.md new file mode 100644 index 0000000..1e6f06b --- /dev/null +++ b/helm/charts/README.md @@ -0,0 +1,14 @@ +# Helm Charts + +Place your Helm charts here for application deployments, ingress, monitoring, etc. + +Example structure: + +charts/ + my-app/ + Chart.yaml + values.yaml + templates/ + deployment.yaml + service.yaml + ingress.yaml diff --git a/helm/charts/idp-api/Chart.yaml b/helm/charts/idp-api/Chart.yaml new file mode 100644 index 0000000..47a5193 --- /dev/null +++ b/helm/charts/idp-api/Chart.yaml @@ -0,0 +1,4 @@ +apiVersion: v2 +name: idp-api +version: 0.1.0 +description: A Helm chart for the Cloud Infrastructure Provisioning API diff --git a/helm/charts/idp-api/templates/_helpers.tpl b/helm/charts/idp-api/templates/_helpers.tpl new file mode 100644 index 0000000..2d42b28 --- /dev/null +++ b/helm/charts/idp-api/templates/_helpers.tpl @@ -0,0 +1,11 @@ +{{- define "idp-api.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "idp-api.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name (include "idp-api.name" .) | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} diff --git a/helm/charts/idp-api/templates/configmap.yaml b/helm/charts/idp-api/templates/configmap.yaml new file mode 100644 index 0000000..ae96ac7 --- /dev/null +++ b/helm/charts/idp-api/templates/configmap.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "idp-api.fullname" . }}-config +data: + ENVIRONMENT: {{ .Values.config.environment | quote }} + AWS_REGION: {{ .Values.config.awsRegion | quote }} + DEFAULT_INGRESS_DOMAIN: {{ .Values.config.defaultIngressDomain | quote }} + KUBERNETES_DRY_RUN: {{ .Values.config.kubernetesDryRun | quote }} + TERRAFORM_DRY_RUN: {{ .Values.config.terraformDryRun | quote }} diff --git a/helm/charts/idp-api/templates/deployment.yaml b/helm/charts/idp-api/templates/deployment.yaml new file mode 100644 index 0000000..03bb6ea --- /dev/null +++ b/helm/charts/idp-api/templates/deployment.yaml @@ -0,0 +1,44 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "idp-api.fullname" . }} + labels: + app: {{ include "idp-api.name" . }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + app: {{ include "idp-api.name" . }} + template: + metadata: + labels: + app: {{ include "idp-api.name" . }} + spec: + serviceAccountName: {{ include "idp-api.fullname" . }} + containers: + - name: idp-api + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - containerPort: 8000 + envFrom: + - configMapRef: + name: {{ include "idp-api.fullname" . }}-config + - secretRef: + name: {{ include "idp-api.fullname" . }}-secrets + livenessProbe: + httpGet: + path: /healthz + port: 8000 + initialDelaySeconds: 10 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /readyz + port: 8000 + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {{- toYaml .Values.resources | nindent 12 }} diff --git a/helm/charts/idp-api/templates/hpa.yaml b/helm/charts/idp-api/templates/hpa.yaml new file mode 100644 index 0000000..44f0eb8 --- /dev/null +++ b/helm/charts/idp-api/templates/hpa.yaml @@ -0,0 +1,20 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "idp-api.fullname" . }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "idp-api.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} +{{- end }} diff --git a/helm/charts/idp-api/templates/ingress.yaml b/helm/charts/idp-api/templates/ingress.yaml new file mode 100644 index 0000000..b1080ef --- /dev/null +++ b/helm/charts/idp-api/templates/ingress.yaml @@ -0,0 +1,27 @@ +{{- if .Values.ingress.enabled }} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "idp-api.fullname" . }} + annotations: + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" +spec: + ingressClassName: {{ .Values.ingress.className }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + pathType: {{ .pathType }} + backend: + service: + name: {{ include "idp-api.fullname" $ }} + port: + number: 8000 + {{- end }} + {{- end }} + tls: + {{- toYaml .Values.ingress.tls | nindent 4 }} +{{- end }} diff --git a/helm/charts/idp-api/templates/secret.yaml b/helm/charts/idp-api/templates/secret.yaml new file mode 100644 index 0000000..942efda --- /dev/null +++ b/helm/charts/idp-api/templates/secret.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "idp-api.fullname" . }}-secrets +type: Opaque +stringData: + DATABASE_URL: {{ required "secrets.databaseUrl is required" .Values.secrets.databaseUrl | quote }} + REDIS_URL: {{ .Values.secrets.redisUrl | quote }} + SECRET_KEY: {{ required "secrets.secretKey is required" .Values.secrets.secretKey | quote }} diff --git a/helm/charts/idp-api/templates/service.yaml b/helm/charts/idp-api/templates/service.yaml new file mode 100644 index 0000000..7b809a7 --- /dev/null +++ b/helm/charts/idp-api/templates/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "idp-api.fullname" . }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 8000 + protocol: TCP + name: http + selector: + app: {{ include "idp-api.name" . }} diff --git a/helm/charts/idp-api/templates/serviceaccount.yaml b/helm/charts/idp-api/templates/serviceaccount.yaml new file mode 100644 index 0000000..869c773 --- /dev/null +++ b/helm/charts/idp-api/templates/serviceaccount.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "idp-api.fullname" . }} diff --git a/helm/charts/idp-api/values.yaml b/helm/charts/idp-api/values.yaml new file mode 100644 index 0000000..6910829 --- /dev/null +++ b/helm/charts/idp-api/values.yaml @@ -0,0 +1,39 @@ +replicaCount: 2 +image: + repository: your-docker-repo/idp-api + tag: latest + pullPolicy: IfNotPresent +service: + type: ClusterIP + port: 8000 +config: + environment: production + awsRegion: us-east-1 + defaultIngressDomain: apps.example.com + kubernetesDryRun: "false" + terraformDryRun: "false" +secrets: + databaseUrl: "" + redisUrl: "redis://redis-master:6379/0" + secretKey: "" +ingress: + enabled: true + className: "nginx" + hosts: + - host: idp.local + paths: + - path: / + pathType: Prefix + tls: [] +resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi +autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 10 + targetCPUUtilizationPercentage: 70 diff --git a/kubernetes/client.py b/kubernetes/client.py new file mode 100644 index 0000000..d3b1c7f --- /dev/null +++ b/kubernetes/client.py @@ -0,0 +1,18 @@ +from kubernetes import client, config +import os +import logging + +logger = logging.getLogger(__name__) + +def get_k8s_client(): + try: + if os.getenv("KUBERNETES_SERVICE_HOST"): + config.load_incluster_config() + logger.info("Loaded in-cluster Kubernetes config") + else: + config.load_kube_config() + logger.info("Loaded local kubeconfig") + return client.CoreV1Api(), client.AppsV1Api(), client.AutoscalingV1Api(), client.NetworkingV1Api() + except Exception as e: + logger.error(f"Failed to load Kubernetes config: {e}") + raise diff --git a/kubernetes/network-policy.yaml b/kubernetes/network-policy.yaml new file mode 100644 index 0000000..4be378c --- /dev/null +++ b/kubernetes/network-policy.yaml @@ -0,0 +1,25 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-idp-api + namespace: default +spec: + podSelector: + matchLabels: + app: idp-api + policyTypes: + - Ingress + - Egress + ingress: + - from: + - podSelector: {} + ports: + - protocol: TCP + port: 8000 + egress: + - to: + - ipBlock: + cidr: 0.0.0.0/0 + ports: + - protocol: TCP + port: 53 diff --git a/kubernetes/rbac.yaml b/kubernetes/rbac.yaml new file mode 100644 index 0000000..dd8216f --- /dev/null +++ b/kubernetes/rbac.yaml @@ -0,0 +1,29 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: idp-api + namespace: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + namespace: default + name: idp-api-role +rules: +- apiGroups: [""] + resources: ["pods", "services", "namespaces"] + verbs: ["get", "list", "create", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: idp-api-rolebinding + namespace: default +subjects: +- kind: ServiceAccount + name: idp-api + namespace: default +roleRef: + kind: Role + name: idp-api-role + apiGroup: rbac.authorization.k8s.io diff --git a/monitoring/grafana-dashboard.json b/monitoring/grafana-dashboard.json new file mode 100644 index 0000000..0831900 --- /dev/null +++ b/monitoring/grafana-dashboard.json @@ -0,0 +1,24 @@ +{ + "dashboard": { + "id": null, + "title": "IDP API Overview", + "panels": [ + { + "type": "graph", + "title": "Request Rate", + "targets": [ + { "expr": "rate(http_requests_total[5m])", "format": "time_series" } + ], + "datasource": "Prometheus" + }, + { + "type": "graph", + "title": "Error Rate", + "targets": [ + { "expr": "rate(http_requests_total{status=\"500\"}[5m])", "format": "time_series" } + ], + "datasource": "Prometheus" + } + ] + } +} diff --git a/monitoring/prometheus-scrape-config.yaml b/monitoring/prometheus-scrape-config.yaml new file mode 100644 index 0000000..a0f273e --- /dev/null +++ b/monitoring/prometheus-scrape-config.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-scrape-config + namespace: monitoring + labels: + app: prometheus + annotations: + prometheus.io/scrape: 'true' +data: + prometheus.yml: | + scrape_configs: + - job_name: 'idp-api' + static_configs: + - targets: ['idp-api.default.svc.cluster.local:8000'] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..73261cc --- /dev/null +++ b/requirements.txt @@ -0,0 +1,16 @@ +fastapi>=0.110,<0.111 +uvicorn[standard]>=0.27,<1.0 +pydantic>=1.10,<2.0 +kubernetes>=29,<31 +python-jose[cryptography]>=3.3,<4.0 +passlib[bcrypt]>=1.7,<2.0 +bcrypt>=4.0,<5.0 +psycopg2-binary>=2.9,<3.0 +sqlalchemy>=2.0,<3.0 +redis>=5.0,<6.0 +httpx>=0.27,<1.0 +jinja2>=3.1,<4.0 +python-dotenv>=1.0,<2.0 +prometheus-client>=0.20,<1.0 +pytest>=8.0,<9.0 +pytest-asyncio>=0.23,<1.0 diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh new file mode 100644 index 0000000..58b847e --- /dev/null +++ b/scripts/bootstrap.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +echo "[+] Bootstrapping local dev environment..." + +# Create Python venv +echo "[+] Creating Python virtual environment..." +python3 -m venv .venv +source .venv/bin/activate + +# Install dependencies +pip install --upgrade pip +pip install -r requirements.txt + +echo "[+] Done." diff --git a/scripts/migrate_db.sh b/scripts/migrate_db.sh new file mode 100644 index 0000000..a404bf3 --- /dev/null +++ b/scripts/migrate_db.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +# Example migration script using Alembic (if used) + +if [ ! -f alembic.ini ]; then + echo "[!] alembic.ini not found. Please set up Alembic." + exit 1 +fi + +alembic upgrade head diff --git a/scripts/prod_checklist.md b/scripts/prod_checklist.md new file mode 100644 index 0000000..09b5d68 --- /dev/null +++ b/scripts/prod_checklist.md @@ -0,0 +1,44 @@ +# Production Readiness Checklist for IDP API + +## Infrastructure +- [ ] Terraform S3 backend configured +- [ ] AWS resources provisioned (VPC, EKS, IAM) +- [ ] Terraform state bucket secured + +## Kubernetes +- [ ] Ingress controller deployed (NGINX/ALB) +- [ ] Prometheus and Grafana installed +- [ ] RBAC and ServiceAccount applied +- [ ] NetworkPolicy applied +- [ ] Secrets created and referenced +- [ ] App deployed via Helm +- [ ] HTTPS enforced at ingress + +## Application +- [ ] All secrets from env/K8s Secrets +- [ ] JWT, RBAC, rate limiting enabled +- [ ] Liveness/readiness probes configured +- [ ] Structured logging enabled +- [ ] Prometheus metrics exposed + +## CI/CD +- [ ] GitHub Actions for build/test/deploy +- [ ] Security scanning (Trivy/Snyk) +- [ ] Automated DB migrations + +## Observability +- [ ] Prometheus scraping `/monitoring/metrics` +- [ ] Grafana dashboards imported +- [ ] Alerting configured + +## Security +- [ ] HTTPS everywhere +- [ ] RBAC and network policies +- [ ] Non-root containers +- [ ] Regular secret rotation + +## Advanced +- [ ] Multi-tenancy (namespaces, RBAC) +- [ ] GitOps (ArgoCD/Flux) +- [ ] Cost estimation +- [ ] Canary/blue-green deployments diff --git a/scripts/setup_env.sh b/scripts/setup_env.sh new file mode 100644 index 0000000..3407bbf --- /dev/null +++ b/scripts/setup_env.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +cp .env.example .env diff --git a/services/autoscaling_service.py b/services/autoscaling_service.py new file mode 100644 index 0000000..faf4358 --- /dev/null +++ b/services/autoscaling_service.py @@ -0,0 +1,40 @@ +from kubernetes import client +import logging +from app.config import settings +from services.kubernetes_client import get_k8s_client + +logger = logging.getLogger(__name__) + +def create_hpa(namespace: str, deployment: str, min_replicas: int, max_replicas: int, cpu_threshold: int): + if settings.KUBERNETES_DRY_RUN: + logger.info("Dry run: hpa %s/%s would be created", namespace, deployment) + return True + try: + _, _, autoscaling_v1, _ = get_k8s_client() + hpa = client.V1HorizontalPodAutoscaler( + api_version="autoscaling/v1", + kind="HorizontalPodAutoscaler", + metadata=client.V1ObjectMeta(name=deployment, namespace=namespace), + spec=client.V1HorizontalPodAutoscalerSpec( + scale_target_ref=client.V1CrossVersionObjectReference( + api_version="apps/v1", + kind="Deployment", + name=deployment + ), + min_replicas=min_replicas, + max_replicas=max_replicas, + target_cpu_utilization_percentage=cpu_threshold + ) + ) + autoscaling_v1.create_namespaced_horizontal_pod_autoscaler(namespace=namespace, body=hpa) + logger.info("HPA for deployment '%s' created in namespace '%s'.", deployment, namespace) + return True + except client.rest.ApiException as e: + if e.status == 409: + logger.info("HPA for '%s' already exists.", deployment) + return True + logger.error("Error creating HPA: %s", e) + return False + except Exception as e: + logger.error("Unexpected error: %s", e) + return False diff --git a/services/deployment_service.py b/services/deployment_service.py new file mode 100644 index 0000000..010ec59 --- /dev/null +++ b/services/deployment_service.py @@ -0,0 +1,198 @@ +import logging +from typing import Dict, Optional + +from kubernetes import client +from kubernetes.client.rest import ApiException +from sqlalchemy.orm import Session + +from app.config import settings +from api.schemas import DeploymentCreateRequest +from database.models import Deployment, User +from services.kubernetes_client import get_k8s_client +from services.autoscaling_service import create_hpa +from services.ingress_service import create_ingress +from services.k8s_service import create_namespace +from services.service_service import expose_service + +logger = logging.getLogger(__name__) + + +def _default_namespace(user: User, name: str) -> str: + return f"{settings.KUBERNETES_NAMESPACE_PREFIX}-{user.id}-{name}"[:63].rstrip("-") + + +def _default_host(user: User, name: str) -> str: + return f"{name}-{user.id}.{settings.DEFAULT_INGRESS_DOMAIN}" + + +def validate_docker_image(image: str) -> None: + if " " in image or image.startswith(":") or image.endswith(":"): + raise ValueError("Invalid Docker image reference") + if "/" not in image and ":" not in image: + # Still allow official images, but force explicit tags in production. + logger.warning("Image '%s' has no registry or tag; use immutable tags in production", image) + + +def create_secret(namespace: str, name: str, data: Dict[str, str]) -> bool: + if not data: + return True + if settings.KUBERNETES_DRY_RUN: + logger.info("Dry run: secret %s/%s would be created", namespace, name) + return True + try: + v1, _, _, _ = get_k8s_client() + secret = client.V1Secret( + api_version="v1", + kind="Secret", + metadata=client.V1ObjectMeta(name=name, namespace=namespace), + type="Opaque", + string_data=data, + ) + v1.create_namespaced_secret(namespace=namespace, body=secret) + return True + except ApiException as exc: + if exc.status == 409: + logger.info("Secret '%s' already exists.", name) + return True + logger.error("Error creating secret: %s", exc) + return False + + +def create_deployment( + namespace: str, + name: str, + image: str, + port: int = 80, + replicas: int = 1, + secret_name: Optional[str] = None, +): + if settings.KUBERNETES_DRY_RUN: + logger.info("Dry run: deployment %s/%s with image %s would be created", namespace, name, image) + return True + try: + _, apps_v1, _, _ = get_k8s_client() + container = client.V1Container( + name=name, + image=image, + ports=[client.V1ContainerPort(container_port=port)], + env_from=[ + client.V1EnvFromSource(secret_ref=client.V1SecretEnvSource(name=secret_name)) + ] if secret_name else None, + ) + template = client.V1PodTemplateSpec( + metadata=client.V1ObjectMeta(labels={"app": name}), + spec=client.V1PodSpec(containers=[container]) + ) + spec = client.V1DeploymentSpec( + replicas=replicas, + template=template, + selector={'matchLabels': {'app': name}} + ) + deployment = client.V1Deployment( + api_version="apps/v1", + kind="Deployment", + metadata=client.V1ObjectMeta(name=name, namespace=namespace), + spec=spec + ) + apps_v1.create_namespaced_deployment(namespace=namespace, body=deployment) + logger.info("Deployment '%s' created in namespace '%s'.", name, namespace) + return True + except ApiException as e: + if e.status == 409: + logger.info("Deployment '%s' already exists.", name) + return True + logger.error("Error creating deployment: %s", e) + return False + except Exception as e: + logger.error("Unexpected error: %s", e) + return False + + +def delete_kubernetes_deployment(namespace: str, name: str) -> None: + if settings.KUBERNETES_DRY_RUN: + logger.info("Dry run: Kubernetes resources for %s/%s would be deleted", namespace, name) + return + v1, apps_v1, autoscaling_v1, networking_v1 = get_k8s_client() + for delete_call, kwargs in [ + (networking_v1.delete_namespaced_ingress, {"name": name, "namespace": namespace}), + (autoscaling_v1.delete_namespaced_horizontal_pod_autoscaler, {"name": name, "namespace": namespace}), + (v1.delete_namespaced_service, {"name": name, "namespace": namespace}), + (apps_v1.delete_namespaced_deployment, {"name": name, "namespace": namespace}), + (v1.delete_namespaced_secret, {"name": f"{name}-env", "namespace": namespace}), + ]: + try: + delete_call(**kwargs) + except ApiException as exc: + if exc.status != 404: + raise + + +def get_kubernetes_deployment_status(namespace: str, name: str) -> Dict[str, Optional[int]]: + if settings.KUBERNETES_DRY_RUN: + return {"available_replicas": 1, "ready_replicas": 1, "replicas": 1} + _, apps_v1, _, _ = get_k8s_client() + deployment = apps_v1.read_namespaced_deployment(name=name, namespace=namespace) + status = deployment.status + return { + "available_replicas": status.available_replicas or 0, + "ready_replicas": status.ready_replicas or 0, + "replicas": status.replicas or 0, + } + + +def provision_application(db: Session, user: User, request: DeploymentCreateRequest) -> Deployment: + validate_docker_image(request.image) + namespace = request.namespace or _default_namespace(user, request.name) + ingress_host = request.ingress_host or _default_host(user, request.name) + deployment = Deployment( + owner_id=user.id, + name=request.name, + namespace=namespace, + image=request.image, + port=request.port, + replicas=request.replicas, + ingress_host=ingress_host, + url=f"https://{ingress_host}", + status="provisioning", + metadata_json={"env_keys": sorted(request.env.keys())}, + ) + db.add(deployment) + db.commit() + db.refresh(deployment) + + try: + steps = [ + create_namespace(namespace), + create_secret(namespace, f"{request.name}-env", request.env), + create_deployment( + namespace, + request.name, + request.image, + request.port, + request.replicas, + secret_name=f"{request.name}-env" if request.env else None, + ), + expose_service(namespace, request.name, 80, request.port), + create_ingress(namespace, request.name, request.name, 80, ingress_host), + create_hpa(namespace, request.name, request.min_replicas, request.max_replicas, request.cpu_threshold), + ] + if not all(steps): + raise RuntimeError("One or more Kubernetes resources failed to apply") + deployment.status = "running" + deployment.metadata_json = { + **(deployment.metadata_json or {}), + "autoscaling": { + "min_replicas": request.min_replicas, + "max_replicas": request.max_replicas, + "cpu_threshold": request.cpu_threshold, + }, + "dry_run": settings.KUBERNETES_DRY_RUN, + } + except Exception as exc: + logger.exception("Deployment provisioning failed") + deployment.status = "failed" + deployment.last_error = str(exc) + db.add(deployment) + db.commit() + db.refresh(deployment) + return deployment diff --git a/services/infra_service.py b/services/infra_service.py new file mode 100644 index 0000000..2410fac --- /dev/null +++ b/services/infra_service.py @@ -0,0 +1,68 @@ +import os +import subprocess +import tempfile +from jinja2 import Template +from app.config import settings + +TERRAFORM_TEMPLATE = os.path.join(os.path.dirname(__file__), '../terraform/main.tf.j2') + + +def render_terraform_config(context: dict) -> str: + with open(TERRAFORM_TEMPLATE) as f: + template = Template(f.read()) + return template.render(**context) + + +def run_terraform(directory: str, action: str = 'apply'): + if settings.TERRAFORM_DRY_RUN: + return True + cmds = [ + ['terraform', 'init'], + ['terraform', 'plan', '-out=tfplan'], + ['terraform', 'apply', '-auto-approve', 'tfplan'] + if action == 'apply' + else ['terraform', 'destroy', '-auto-approve'] + ] + for cmd in cmds: + proc = subprocess.run(cmd, cwd=directory, capture_output=True, text=True, timeout=1800) + if proc.returncode != 0: + raise Exception(f"Terraform {cmd[1]} failed: {proc.stderr}") + return True + + +def provision_infrastructure(name: str, cloud_provider: str, config: dict): + context = { + 'aws_region': config.get('aws_region', settings.AWS_REGION), + 'cluster_name': name, + 'eks_role_arn': config.get('eks_role_arn', 'arn:aws:iam::123456789012:role/EKSRole'), + 'state_bucket': config.get('state_bucket', settings.TERRAFORM_STATE_BUCKET), + 'lock_table': config.get('lock_table', settings.TERRAFORM_LOCK_TABLE), + } + with tempfile.TemporaryDirectory() as tmpdir: + tf_path = os.path.join(tmpdir, 'main.tf') + with open(tf_path, 'w') as f: + f.write(render_terraform_config(context)) + try: + run_terraform(tmpdir, 'apply') + return True + except Exception as e: + return str(e) + + +def destroy_infrastructure(name: str, cloud_provider: str, config: dict): + context = { + 'aws_region': config.get('aws_region', settings.AWS_REGION), + 'cluster_name': name, + 'eks_role_arn': config.get('eks_role_arn', 'arn:aws:iam::123456789012:role/EKSRole'), + 'state_bucket': config.get('state_bucket', settings.TERRAFORM_STATE_BUCKET), + 'lock_table': config.get('lock_table', settings.TERRAFORM_LOCK_TABLE), + } + with tempfile.TemporaryDirectory() as tmpdir: + tf_path = os.path.join(tmpdir, 'main.tf') + with open(tf_path, 'w') as f: + f.write(render_terraform_config(context)) + try: + run_terraform(tmpdir, 'destroy') + return True + except Exception as e: + return str(e) diff --git a/services/ingress_service.py b/services/ingress_service.py new file mode 100644 index 0000000..6a53815 --- /dev/null +++ b/services/ingress_service.py @@ -0,0 +1,51 @@ +from kubernetes import client +import logging +from app.config import settings +from services.kubernetes_client import get_k8s_client + +logger = logging.getLogger(__name__) + +def create_ingress(namespace: str, name: str, service_name: str, service_port: int, host: str): + if settings.KUBERNETES_DRY_RUN: + logger.info("Dry run: ingress %s/%s for host %s would be created", namespace, name, host) + return True + try: + _, _, _, networking_v1 = get_k8s_client() + ingress = client.V1Ingress( + api_version="networking.k8s.io/v1", + kind="Ingress", + metadata=client.V1ObjectMeta(name=name, namespace=namespace), + spec=client.V1IngressSpec( + rules=[ + client.V1IngressRule( + host=host, + http=client.V1HTTPIngressRuleValue( + paths=[ + client.V1HTTPIngressPath( + path="/", + path_type="Prefix", + backend=client.V1IngressBackend( + service=client.V1IngressServiceBackend( + name=service_name, + port=client.V1ServiceBackendPort(number=service_port) + ) + ) + ) + ] + ) + ) + ] + ) + ) + networking_v1.create_namespaced_ingress(namespace=namespace, body=ingress) + logger.info("Ingress '%s' created for service '%s' on host '%s'.", name, service_name, host) + return True + except client.rest.ApiException as e: + if e.status == 409: + logger.info("Ingress '%s' already exists.", name) + return True + logger.error("Error creating ingress: %s", e) + return False + except Exception as e: + logger.error("Unexpected error: %s", e) + return False diff --git a/services/k8s_service.py b/services/k8s_service.py new file mode 100644 index 0000000..d9d2f40 --- /dev/null +++ b/services/k8s_service.py @@ -0,0 +1,29 @@ +import logging + +from kubernetes.client import V1Namespace, V1ObjectMeta +from kubernetes.client.rest import ApiException + +from app.config import settings +from services.kubernetes_client import get_k8s_client + +logger = logging.getLogger(__name__) + +def create_namespace(name: str): + if settings.KUBERNETES_DRY_RUN: + logger.info("Dry run: namespace %s would be created", name) + return True + try: + v1, _, _, _ = get_k8s_client() + ns = V1Namespace(metadata=V1ObjectMeta(name=name)) + v1.create_namespace(ns) + logger.info("Namespace '%s' created.", name) + return True + except ApiException as e: + if e.status == 409: + logger.info("Namespace '%s' already exists.", name) + return True + logger.error("Error creating namespace: %s", e) + return False + except Exception as e: + logger.error("Unexpected error: %s", e) + return False diff --git a/services/kubernetes_client.py b/services/kubernetes_client.py new file mode 100644 index 0000000..53b28c1 --- /dev/null +++ b/services/kubernetes_client.py @@ -0,0 +1,20 @@ +import logging +import os + +from kubernetes import client, config + +logger = logging.getLogger(__name__) + + +def get_k8s_client(): + try: + if os.getenv("KUBERNETES_SERVICE_HOST"): + config.load_incluster_config() + logger.info("Loaded in-cluster Kubernetes config") + else: + config.load_kube_config() + logger.info("Loaded local kubeconfig") + return client.CoreV1Api(), client.AppsV1Api(), client.AutoscalingV1Api(), client.NetworkingV1Api() + except Exception: + logger.exception("Failed to load Kubernetes config") + raise diff --git a/services/monitoring_service.py b/services/monitoring_service.py new file mode 100644 index 0000000..759350a --- /dev/null +++ b/services/monitoring_service.py @@ -0,0 +1,31 @@ +import logging +from app.config import settings +from services.kubernetes_client import get_k8s_client + +logger = logging.getLogger(__name__) + +def get_cluster_health(): + if settings.KUBERNETES_DRY_RUN: + return {"status": "healthy", "mode": "dry-run"} + try: + v1, _, _, _ = get_k8s_client() + nodes = v1.list_node() + for node in nodes.items: + for condition in node.status.conditions: + if condition.type == "Ready" and condition.status != "True": + return {"status": "unhealthy"} + return {"status": "healthy"} + except Exception as e: + logger.error("Error checking cluster health: %s", e) + return {"status": "error", "detail": str(e)} + +def get_pod_logs(namespace: str, pod: str): + if settings.KUBERNETES_DRY_RUN: + return {"pod": pod, "namespace": namespace, "logs": "dry-run: Kubernetes API not called"} + try: + v1, _, _, _ = get_k8s_client() + logs = v1.read_namespaced_pod_log(name=pod, namespace=namespace) + return {"pod": pod, "logs": logs} + except Exception as e: + logger.error("Error fetching logs for pod %s: %s", pod, e) + return {"pod": pod, "logs": str(e)} diff --git a/services/service_service.py b/services/service_service.py new file mode 100644 index 0000000..be38ba4 --- /dev/null +++ b/services/service_service.py @@ -0,0 +1,35 @@ +from kubernetes import client +import logging +from app.config import settings +from services.kubernetes_client import get_k8s_client + +logger = logging.getLogger(__name__) + +def expose_service(namespace: str, name: str, port: int, target_port: int, type_: str = "ClusterIP"): + if settings.KUBERNETES_DRY_RUN: + logger.info("Dry run: service %s/%s would be exposed", namespace, name) + return True + try: + v1, _, _, _ = get_k8s_client() + service = client.V1Service( + api_version="v1", + kind="Service", + metadata=client.V1ObjectMeta(name=name, namespace=namespace), + spec=client.V1ServiceSpec( + selector={"app": name}, + ports=[client.V1ServicePort(port=port, target_port=target_port)], + type=type_ + ) + ) + v1.create_namespaced_service(namespace=namespace, body=service) + logger.info("Service '%s' exposed on port %s in namespace '%s'.", name, port, namespace) + return True + except client.rest.ApiException as e: + if e.status == 409: + logger.info("Service '%s' already exists.", name) + return True + logger.error("Error exposing service: %s", e) + return False + except Exception as e: + logger.error("Unexpected error: %s", e) + return False diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..8837be9 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,4 @@ +[flake8] +max-line-length = 120 +extend-ignore = E302,E402 +exclude = .git,.venv,.history,__pycache__ diff --git a/terraform/main.tf.j2 b/terraform/main.tf.j2 new file mode 100644 index 0000000..a832f61 --- /dev/null +++ b/terraform/main.tf.j2 @@ -0,0 +1,43 @@ + +terraform { + backend "s3" { + bucket = "{{ state_bucket }}" + key = "idp/{{ cluster_name }}/terraform.tfstate" + region = "{{ aws_region }}" + encrypt = true + dynamodb_table = "{{ lock_table }}" + } +} + +provider "aws" { + region = "{{ aws_region }}" +} + +resource "aws_vpc" "main" { + cidr_block = "10.0.0.0/16" +} + +resource "aws_subnet" "main" { + vpc_id = aws_vpc.main.id + cidr_block = "10.0.1.0/24" +} + +resource "aws_security_group" "main" { + name_prefix = "{{ cluster_name }}-eks-" + vpc_id = aws_vpc.main.id + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "aws_eks_cluster" "main" { + name = "{{ cluster_name }}" + role_arn = "{{ eks_role_arn }}" + vpc_config { + subnet_ids = [aws_subnet.main.id] + security_group_ids = [aws_security_group.main.id] + } +} diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..85f9d0d --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,14 @@ +import os +import sys + +import pytest +from fastapi.testclient import TestClient + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + +from app.main import app + +@pytest.fixture(scope="module") +def client(): + with TestClient(app) as c: + yield c diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py new file mode 100644 index 0000000..cc172a5 --- /dev/null +++ b/tests/unit/test_auth.py @@ -0,0 +1,12 @@ +def test_register(client): + response = client.post("/auth/register", json={"username": "testuser", "password": "testpass123"}) + if response.status_code == 400: + assert response.json()["detail"] == "Username already registered" + return + assert response.status_code == 200 + +def test_login(client): + client.post("/auth/register", json={"username": "testuser", "password": "testpass123"}) + response = client.post("/auth/login", json={"username": "testuser", "password": "testpass123"}) + assert response.status_code == 200 + assert "access_token" in response.json() diff --git a/tests/unit/test_health.py b/tests/unit/test_health.py new file mode 100644 index 0000000..fade712 --- /dev/null +++ b/tests/unit/test_health.py @@ -0,0 +1,4 @@ +def test_health(client): + response = client.get("/healthz") + assert response.status_code == 200 + assert response.json()["status"] == "ok" diff --git a/tests/unit/test_namespace.py b/tests/unit/test_namespace.py new file mode 100644 index 0000000..e3d3873 --- /dev/null +++ b/tests/unit/test_namespace.py @@ -0,0 +1,10 @@ +def test_create_namespace(client): + client.post("/auth/register", json={"username": "namespacetest", "password": "testpass123"}) + login = client.post("/auth/login", json={"username": "namespacetest", "password": "testpass123"}) + token = login.json()["access_token"] + response = client.post( + "/kubernetes/namespace/create", + json={"name": "test-ns"}, + headers={"Authorization": f"Bearer {token}"}, + ) + assert response.status_code in [200, 500] # 500 if cluster not available