Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added team_docs/Adeolu_Mary_Oshadare_Role.pdf
Binary file not shown.
Binary file added team_docs/Linda_Oraegbunam_Role.pdf
Binary file not shown.
320 changes: 308 additions & 12 deletions team_docs/generate_role_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,27 +174,42 @@ def create_adeolu_doc():
pdf.bullet("Build data validation and quality checks for incoming satellite imagery")
pdf.bullet("Manage the data/ directory structure (raw, processed, satellite)")
pdf.bullet("Create EDA notebooks for spatial data exploration and visualization")
pdf.ln(2)
pdf.ln(1)

pdf.subsection_title("Sprint Progress - April 2026")
pdf.bullet("DONE: band_mapping.py - analysis-specific band mapping from config.yaml")
pdf.bullet("DONE: gee_downloader.py - GEE tile download with metadata")
pdf.bullet("DONE: preprocessing.py - SCL-based cloud masking")
pdf.bullet("DONE: dataset.py - PyTorch Dataset and DataLoader")
pdf.bullet("DONE: augmentation.py - training/validation transforms")
pdf.bullet("DONE: synthetic.py - synthetic fallback tile generation")
pdf.bullet("PENDING: Real GEE tile download in inference pipeline (not just NDVI stats)")
pdf.bullet("PENDING: Analysis-specific band mapping enforcement in inference preprocessing")
pdf.bullet("PENDING: Cloud masking integration at inference time before model forward pass")
pdf.bullet("PENDING: Synthetic fallback guardrails - clearly label synthetic tiles in metadata")
pdf.ln(1)

# Codebase Ownership
pdf.section_title("Your Codebase Ownership")
pdf.body_text("You are the primary owner of the following files and directories:")
pdf.code_block(
"src/climatevision/data/ # PRIMARY OWNER - Entire data module\n"
" sentinel2.py # Sentinel-2 downloader & preprocessor\n"
" landsat.py # Landsat data loader\n"
" dataset.py # PyTorch Dataset classes\n"
" preprocess.py # Cloud masking, normalization\n"
" augmentation.py # Data augmentation pipeline\n"
" band_mapping.py # Analysis-specific band mapping (ACTIVE)\n"
" gee_downloader.py # GEE tile downloader (ACTIVE)\n"
" preprocessing.py # Cloud masking, normalization (ACTIVE)\n"
" dataset.py # PyTorch Dataset & DataLoader (ACTIVE)\n"
" augmentation.py # Data augmentation pipeline (ACTIVE)\n"
" synthetic.py # Synthetic tile fallback (ACTIVE)\n"
" __init__.py # Module exports\n"
"\n"
"src/climatevision/utils/\n"
" geospatial.py # CO-OWNER - Geospatial utilities\n"
" visualization.py # CO-OWNER - Spatial visualizations\n"
"\n"
"scripts/\n"
" setup_gee.py # Google Earth Engine setup\n"
" download_data.py # Automated satellite data download\n"
" prepare_data.py # Automated satellite data download\n"
"\n"
"config.yaml # Band mapping & analysis type config\n"
"\n"
"data/ # Data directory structure\n"
" raw/ | processed/ | satellite/\n"
Expand Down Expand Up @@ -288,7 +303,41 @@ def create_adeolu_doc():
"# Follow browser prompt to authorise your GEE service account"
)

pdf.subsection_title("Step 2: Ingest Satellite Data")
pdf.subsection_title("Step 2: Analysis-Specific Band Mapping")
pdf.body_text("config.yaml defines bands per analysis type. Your band_mapping.py reads this:")
pdf.code_block(
"# config.yaml analysis type band definitions\n"
"deforestation: [B04, B03, B02, B08] # Red, Green, Blue, NIR -> 4ch -> 2cl\n"
"ice_melting: [B02, B03, B04, B11] # Blue, Green, Red, SWIR -> 4ch -> 3cl\n"
"flooding: [B03, B08, B11] # Green, NIR, SWIR -> 3ch -> 3cl\n"
"\n"
"# Python usage\n"
"from climatevision.data.band_mapping import get_bands_for_analysis, get_model_config\n"
"\n"
"bands = get_bands_for_analysis('flooding')\n"
"# -> ['B03', 'B08', 'B11']\n"
"\n"
"cfg = get_model_config('flooding')\n"
"# -> {'in_channels': 3, 'num_classes': 3, 'weights': 'models/unet_flood.pth'}"
)
pdf.ln(2)

pdf.subsection_title("Step 3: Cloud Masking with SCL Band")
pdf.body_text("Sentinel-2 Scene Classification Layer (SCL) pixel values:")
pdf.code_block(
"# SCL band values\n"
"KEEP = [4, 5, 6, 7, 10] # vegetation, bare soil, water, low cloud, thin cirrus\n"
"MASK = [0, 1, 2, 3, 8, 9] # NO_DATA, SATURATED, DARK, SHADOW, med/high cloud\n"
"\n"
"# Usage in preprocessing.py\n"
"from climatevision.data.preprocessing import apply_scl_cloud_mask\n"
"\n"
"clean_image = apply_scl_cloud_mask(image, scl_band)\n"
"# image: (C, H, W) scl_band: (H, W) uint8"
)
pdf.ln(2)

pdf.subsection_title("Step 4: Ingest Satellite Data")
pdf.code_block(
"# Download Sentinel-2 imagery for a bounding box and date range\n"
"python scripts/prepare_data.py \\\n"
Expand All @@ -301,7 +350,7 @@ def create_adeolu_doc():
"# Output: GeoTIFF tiles saved to data/raw/amazon_2023/"
)

pdf.subsection_title("Step 3: Preprocess & Build Dataset")
pdf.subsection_title("Step 5: Preprocess & Build Dataset")
pdf.code_block(
"# Run cloud masking, normalization, and 256x256 tiling\n"
"python - <<'EOF'\n"
Expand All @@ -323,7 +372,7 @@ def create_adeolu_doc():
"EOF"
)

pdf.subsection_title("Step 4: Compute Spectral Indices")
pdf.subsection_title("Step 6: Compute Spectral Indices")
pdf.code_block(
"# Calculate NDVI, EVI, and moisture indices from raw bands\n"
"python - <<'EOF'\n"
Expand All @@ -336,7 +385,7 @@ def create_adeolu_doc():
"EOF"
)

pdf.subsection_title("Step 5: Commit & Push Your Work")
pdf.subsection_title("Step 7: Commit & Push Your Work")
pdf.code_block(
"# Switch to your git identity\n"
"source team_docs/switch_user.sh adeolu\n"
Expand Down Expand Up @@ -2310,3 +2359,250 @@ def create_gold_doc():
create_paul_doc()
create_gold_doc()
print(f"\nAll 8 role documents generated in: {OUTPUT_DIR}")

def create_linda_doc():
pdf = RoleDoc("Linda Oraegbunam")
pdf.add_page()

pdf.set_font("Helvetica", "B", 18)
pdf.cell(0, 10, "Linda Oraegbunam", align="C", new_x="LMARGIN", new_y="NEXT")
pdf.set_font("Helvetica", "", 11)
pdf.set_text_color(100, 100, 100)
pdf.cell(0, 7, "Responsible AI & Model Governance Lead", align="C", new_x="LMARGIN", new_y="NEXT")
pdf.set_text_color(0, 0, 0)
pdf.ln(5)

pdf.key_value("GitHub", "@obielin")
pdf.key_value("Access Level", "Maintainer")
pdf.key_value("Reports To", "@Goldokpa (Project Owner)")
pdf.key_value("Project Duration", "3 Months")
pdf.ln(3)

pdf.section_title("How This Role Fits You")
pdf.body_text(
"Your FRSS fellowship, peer-reviewed publications in AI ethics, and open-source security tooling (skillguard) "
"give you a governance lens that no one else on this team has. While others build models and pipelines, "
"you ensure the outputs are trustworthy, explainable, and secure enough for NGOs and government agencies to act on."
)
pdf.body_text(
"Your skillguard project - detecting prompt injection and supply chain attacks in agentic AI - maps directly "
"to securing ClimateVision's inference pipeline. Your agentsync work on multi-agent orchestration applies to "
"building autonomous monitoring agents that track deforestation regions without human intervention."
)
pdf.body_text(
"Your HMRC experience operationalising 30+ governed KPIs and your Readrly.io MLOps work with fairness/bias "
"evaluation frameworks mean you understand how to build auditability into production systems. That is exactly "
"what ClimateVision needs: every prediction must be explainable, every model version auditable, and every "
"regional disparity measurable."
)
pdf.ln(2)

pdf.section_title("Your Role on ClimateVision")
pdf.body_text(
"You own the responsible AI, explainability, and governance layer - everything that turns black-box "
"segmentation outputs into trustworthy, actionable intelligence for conservation stakeholders."
)
pdf.subsection_title("Core Responsibilities")
pdf.bullet("Build SHAP-based explainability for U-Net segmentation predictions - explain WHY each pixel was classified")
pdf.bullet("Implement bias and fairness audits across geographic regions (Amazon, Congo, Southeast Asia)")
pdf.bullet("Create anomaly detection for inference inputs and outputs - flag unusual predictions for human review")
pdf.bullet("Design model governance framework: version tracking, audit trails, compliance documentation")
pdf.bullet("Conduct AI security audit of FastAPI inference pipeline (OWASP Agentic Top 10, input validation)")
pdf.bullet("Build automated impact reports using LLM/NLP pipelines from model outputs")
pdf.bullet("Integrate responsible AI evaluation into CI/CD - automated fairness checks on every model release")
pdf.ln(2)

pdf.subsection_title("Sprint Progress - April 2026")
pdf.bullet("PENDING: SHAP explainability integration for segmentation masks")
pdf.bullet("PENDING: Regional bias audit framework")
pdf.bullet("PENDING: Anomaly detection for inference pipeline")
pdf.bullet("PENDING: Model governance and audit trail system")
pdf.bullet("PENDING: AI security audit of API endpoints")
pdf.bullet("PENDING: LLM-based automated impact report generation")
pdf.ln(1)

pdf.section_title("Your Codebase Ownership")
pdf.body_text("You are the primary owner of the following files and directories:")
pdf.code_block(
"src/climatevision/governance/ # PRIMARY OWNER - New module\n"
" explainability.py # SHAP explainability for segmentation\n"
" bias_audit.py # Regional fairness evaluation\n"
" anomaly_detector.py # Input/output anomaly detection\n"
" audit_logger.py # Model audit trails\n"
" __init__.py\n"
"\n"
"src/climatevision/security/ # PRIMARY OWNER - New module\n"
" api_security.py # API input validation & sanitization\n"
" pipeline_guard.py # Inference pipeline security checks\n"
" __init__.py\n"
"\n"
"src/climatevision/reports/ # PRIMARY OWNER - New module\n"
" llm_reporter.py # LLM-based impact report generation\n"
" __init__.py\n"
"\n"
"scripts/\n"
" audit_model.py # Run full governance audit on a model\n"
" security_scan.py # OWASP-style scan of API endpoints\n"
"\n"
"notebooks/\n"
" 06_explainability.ipynb # SHAP visualization notebook\n"
" 07_bias_audit.ipynb # Regional fairness analysis\n"
" 08_security_audit.ipynb # API security assessment"
)
pdf.ln(2)

pdf.section_title("Your 3-Month Delivery Timeline")
pdf.month_block("MONTH 1: Explainability & Bias (Weeks 1-4)", [
("Week 1-2: SHAP Explainability", [
"Integrate SHAP (DeepExplainer) into U-Net forward pass for pixel-level attribution",
"Build explainability.py - generate heatmaps showing which bands/regions drove predictions",
"Create 06_explainability.ipynb with visual examples across all 3 analysis types",
"Add /api/explain endpoint returning SHAP values for a given prediction",
]),
("Week 3-4: Bias & Fairness Audit", [
"Build bias_audit.py - evaluate model performance disparity across regions",
"Implement demographic parity and equalized odds metrics for geographic splits",
"Create automated bias report generation (JSON + PDF)",
"Write 07_bias_audit.ipynb demonstrating fairness evaluation",
]),
])
pdf.month_block("MONTH 2: Security & Anomaly Detection (Weeks 5-8)", [
("Week 5-6: AI Security Audit", [
"Map ClimateVision API to OWASP Agentic Top 10 and MITRE ATLAS frameworks",
"Build api_security.py - input sanitization, rate limiting, payload validation",
"Implement pipeline_guard.py - detect adversarial inputs to inference pipeline",
"Run security_scan.py against all endpoints and document findings",
]),
("Week 7-8: Anomaly Detection", [
"Build anomaly_detector.py - isolation forest + statistical checks for predictions",
"Flag predictions with confidence outside historical norms",
"Create anomaly alert integration with Olufemi's alert system",
"Add /api/anomalies endpoint for querying flagged predictions",
]),
])
pdf.month_block("MONTH 3: Governance & Reporting (Weeks 9-12)", [
("Week 9-10: Model Governance", [
"Build audit_logger.py - immutable audit trail for every model version and prediction",
"Integrate with MLflow for governance metadata tagging",
"Create model card generator (automated from training config + evaluation metrics)",
"Implement CI gate: block releases that fail fairness or security thresholds",
]),
("Week 11-12: LLM Impact Reports & Documentation", [
"Build llm_reporter.py - generate natural-language impact reports from prediction data",
"Integrate with Francis's carbon analytics for comprehensive stakeholder reports",
"Write governance documentation and responsible AI deployment guide",
"Final security sign-off and production readiness review",
]),
])

pdf.section_title("Your Git Workflow")
pdf.code_block(
"# Create feature branches from develop\n"
"git checkout develop\n"
"git pull origin develop\n"
"git checkout -b feature/governance-shap-explainability\n"
"\n"
"# Your branch naming convention:\n"
"feature/governance-* (governance & explainability features)\n"
"feature/security-* (security & audit features)\n"
"feature/reports-* (LLM report generation)\n"
"fix/governance-* (bug fixes)"
)
pdf.body_text(
"All PRs go to the develop branch. Tag @edoh-Onuh for ML architecture reviews, "
"Olufemi for API integration, and @franchaise for analytics alignment."
)
pdf.ln(3)

pdf.section_title("Your Key Collaborators")
pdf.bullet("@edoh-Onuh (ML Lead) - His models are what you explain and audit. Coordinate on model architectures, checkpoint formats, and prediction outputs.")
pdf.bullet("Olufemi Taiwo (API Lead) - Your /api/explain and /api/anomalies endpoints extend his API. Align on schemas, auth, and rate limiting.")
pdf.bullet("@franchaise (Analytics Lead) - His carbon metrics feed your LLM impact reports. Coordinate on data contracts and report templates.")
pdf.bullet("@cutewizzy11 (Full-Stack) - Frontend visualisations of SHAP heatmaps and anomaly dashboards need his React expertise.")

pdf.section_title("Your Code Pipeline")
pdf.body_text("Your pipeline covers explainability generation, bias evaluation, security scanning, and governance reporting.")

pdf.subsection_title("Step 1: Environment Setup")
pdf.code_block(
"git clone https://github.com/Climate-Vision/ClimateVision.git\n"
"cd ClimateVision\n"
"pip install -r requirements.txt\n"
"pip install shap mlflow transformers torch\n"
"\n"
"# Verify dependencies\n"
"python -c \"import shap, transformers, torch; print('Governance stack ready')\""
)

pdf.subsection_title("Step 2: Generate SHAP Explanations")
pdf.code_block(
"# Run SHAP explainability on a prediction mask\n"
"python - <<'EOF'\n"
"from climatevision.governance.explainability import explain_prediction\n"
"explanation = explain_prediction(\n"
" model_path='models/unet_deforestation.pth',\n"
" image_path='data/test/amazon_tile.tif',\n"
" analysis_type='deforestation'\n"
")\n"
"print(f\"Top contributing bands: {explanation['top_bands']}\")\n"
"print(f\"Spatial attribution saved to: {explanation['heatmap_path']}\")\n"
"EOF"
)

pdf.subsection_title("Step 3: Run Bias Audit")
pdf.code_block(
"# Evaluate fairness across geographic regions\n"
"python - <<'EOF'\n"
"from climatevision.governance.bias_audit import run_bias_audit\n"
"report = run_bias_audit(\n"
" model_path='models/unet_deforestation.pth',\n"
" regions=['amazon', 'congo', 'southeast_asia'],\n"
" metric='equalized_odds'\n"
")\n"
"print(f\"Fairness score: {report['score']:.3f}\")\n"
"print(f\"Disparity detected: {report['disparity_regions']}\")\n"
"EOF"
)

pdf.subsection_title("Step 4: Security Scan")
pdf.code_block(
"# Run OWASP-style scan on API endpoints\n"
"python scripts/security_scan.py --target http://localhost:8000\n"
"\n"
"# Output: security_report.json with findings and remediation"
)

pdf.subsection_title("Step 5: Generate LLM Impact Report")
pdf.code_block(
"# Generate natural-language report from prediction data\n"
"python - <<'EOF'\n"
"from climatevision.reports.llm_reporter import generate_impact_report\n"
"report = generate_impact_report(\n"
" run_id=12345,\n"
" region='amazon',\n"
" include_shap=True,\n"
" include_carbon=True\n"
")\n"
"print(report['summary'])\n"
"# -> 'The model detected 1,247 hectares of deforestation with 94% confidence...'\n"
"EOF"
)

pdf.subsection_title("Step 6: Commit & Push Your Work")
pdf.code_block(
"# Switch to your git identity (create one if needed)\n"
"git config user.name 'Linda Oraegbunam'\n"
"git config user.email 'linda@placeholder.com'\n"
"\n"
"git checkout develop && git pull origin develop\n"
"git checkout -b feature/governance-shap-explainability\n"
"\n"
"git add src/climatevision/governance/\n"
"git add notebooks/06_explainability.ipynb\n"
"git commit -m \"feat(governance): add SHAP explainability for segmentation predictions\"\n"
"\n"
"git push origin feature/governance-shap-explainability"
)

pdf.output(os.path.join(OUTPUT_DIR, "Linda_Oraegbunam_Role.pdf"))
print("Created: Linda_Oraegbunam_Role.pdf")
Loading