Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion infra/dcp/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ terraform {
source = "hashicorp/random"
version = ">= 3.0"
}
time = {
source = "hashicorp/time"
version = ">= 0.7.0"
}
}
}

Expand Down Expand Up @@ -56,6 +60,14 @@ resource "google_project_service" "apis" {
disable_on_destroy = false
}

resource "time_sleep" "wait_for_foundation" {
create_duration = "90s"

depends_on = [
google_project_service.apis
]
}

locals {
global_config = {
project_id = var.project_id
Expand Down Expand Up @@ -149,5 +161,5 @@ module "stack" {
redis_config = local.redis_config
ingestion_config = local.ingestion_config

depends_on = [google_project_service.apis]
foundation_dependency = time_sleep.wait_for_foundation.id
}
22 changes: 5 additions & 17 deletions infra/dcp/modules/datacommons_services/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,12 @@ locals {
resource "google_service_account" "serving_sa" {
account_id = "${local.name_prefix}dc-srvs-sa"
display_name = "Data Commons Serving Service Account"
}

resource "google_project_iam_member" "serving_sa_roles" {
for_each = setsubtract(toset([
"roles/compute.networkViewer",
"roles/redis.editor",
"roles/storage.objectViewer",
"roles/vpcaccess.user",
# TODO: Review this overly broad permission.
"roles/iam.serviceAccountUser",
"roles/secretmanager.secretAccessor",
"roles/spanner.databaseUser",
"roles/workflows.invoker"
]), var.use_spanner ? [] : ["roles/spanner.databaseUser"])

project = var.project_id
member = "serviceAccount:${google_service_account.serving_sa.email}"
role = each.value
depends_on = [var.foundation_dependency]
}



resource "google_cloud_run_v2_service" "dc_web_service" {
name = "${local.name_prefix}dc-datacommons-service"
location = var.region
Expand Down Expand Up @@ -66,6 +52,8 @@ resource "google_cloud_run_v2_service" "dc_web_service" {
value = var.project_id
}



dynamic "env" {
for_each = var.secret_env_vars
content {
Expand Down
8 changes: 8 additions & 0 deletions infra/dcp/modules/datacommons_services/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,11 @@ variable "secret_env_vars" {
version = string
}))
}

variable "foundation_dependency" {
description = "An artificial dependency to delay resource creation until APIs are ready."
type = any
default = null
}


16 changes: 4 additions & 12 deletions infra/dcp/modules/ingestion/dataflow/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,15 @@ resource "google_service_account" "dataflow_sa" {
count = var.deploy ? 1 : 0
account_id = "${local.name_prefix}dc-ing-df-sa"
display_name = "Data Commons Ingestion Dataflow SA"
}

resource "google_project_iam_member" "ingestion_spanner_user" {
count = var.deploy && var.use_spanner ? 1 : 0
project = var.project_id
role = "roles/spanner.databaseUser"
member = "serviceAccount:${google_service_account.dataflow_sa[0].email}"
depends_on = [var.foundation_dependency]
}



resource "google_project_iam_member" "dataflow_worker" {
count = var.deploy ? 1 : 0
project = var.project_id
role = "roles/dataflow.worker"
member = "serviceAccount:${google_service_account.dataflow_sa[0].email}"
}




# This is only needed to trigger the services restart to pick up the GCS embeddings change

Expand Down
6 changes: 6 additions & 0 deletions infra/dcp/modules/ingestion/dataflow/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,9 @@ variable "use_spanner" {
default = true
}

variable "foundation_dependency" {
description = "An artificial dependency to delay resource creation until APIs are ready."
type = any
default = null
}

17 changes: 2 additions & 15 deletions infra/dcp/modules/ingestion/helper_service/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,7 @@ resource "google_cloud_run_v2_service" "ingestion_helper" {
}
}

resource "google_project_iam_member" "helper_spanner_user" {
count = var.deploy && var.use_spanner ? 1 : 0
project = var.project_id
role = "roles/spanner.databaseUser"
member = "serviceAccount:${google_service_account.helper_sa[0].email}"
}



resource "google_storage_bucket_iam_member" "helper_bucket_access" {
Expand All @@ -97,15 +92,7 @@ resource "google_storage_bucket_iam_member" "helper_bucket_access" {
member = "serviceAccount:${google_service_account.helper_sa[0].email}"
}

resource "google_project_iam_member" "helper_bq_roles" {
for_each = toset(var.deploy && var.enable_bigquery_postprocessing ? [
"roles/bigquery.dataEditor",
"roles/bigquery.jobUser"
] : [])
project = var.project_id
role = each.value
member = "serviceAccount:${google_service_account.helper_sa[0].email}"
}


resource "google_bigquery_connection_iam_member" "helper_connection_user" {
count = var.deploy && var.enable_bigquery_connection ? 1 : 0
Expand Down
10 changes: 5 additions & 5 deletions infra/dcp/modules/ingestion/preprocessing_job/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ locals {
resource "google_service_account" "preprocessing_sa" {
account_id = "${local.name_prefix}dc-ing-pre-sa"
display_name = "Data Commons Ingestion Preprocessing SA"

depends_on = [var.foundation_dependency]
}

resource "google_cloud_run_v2_job" "dc_data_job" {
Expand Down Expand Up @@ -44,6 +46,8 @@ resource "google_cloud_run_v2_job" "dc_data_job" {
}
}



env {
name = "GCS_BUCKET"
value = var.bucket_name
Expand Down Expand Up @@ -118,10 +122,6 @@ resource "google_secret_manager_secret_iam_member" "preprocessing_maps_key_acces
member = "serviceAccount:${google_service_account.preprocessing_sa.email}"
}

resource "google_project_iam_member" "preprocessing_workflow_invoker" {
project = var.project_id
role = "roles/workflows.invoker"
member = "serviceAccount:${google_service_account.preprocessing_sa.email}"
}



8 changes: 8 additions & 0 deletions infra/dcp/modules/ingestion/preprocessing_job/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,11 @@ variable "maps_api_key_secret_id" {
description = "Secret ID for Maps API key"
default = ""
}

variable "foundation_dependency" {
description = "An artificial dependency to delay resource creation until APIs are ready."
type = any
default = null
}


8 changes: 2 additions & 6 deletions infra/dcp/modules/ingestion/workflow/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ resource "google_workflows_workflow" "ingestion_orchestrator" {
- init:
assign:
- project_id: '${var.project_id}'

- workflow_id: '$${sys.get_env("GOOGLE_CLOUD_WORKFLOW_EXECUTION_ID")}'
- version: '$${"version-" + string(int(sys.now()))}'
- bucket_name: '$${text.split(input.tempLocation, "/")[2]}'
Expand Down Expand Up @@ -214,9 +215,4 @@ resource "google_cloud_run_v2_service_iam_member" "helper_invoker" {
member = "serviceAccount:${google_service_account.workflow_sa[0].email}"
}

resource "google_project_iam_member" "workflow_run_viewer" {
count = var.deploy ? 1 : 0
project = var.project_id
role = "roles/run.viewer"
member = "serviceAccount:${google_service_account.workflow_sa[0].email}"
}

2 changes: 2 additions & 0 deletions infra/dcp/modules/ingestion/workflow/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,5 @@ variable "enable_redis_cache_clearing" {
description = "Flag to enable Redis cache clearing"
default = false
}


9 changes: 3 additions & 6 deletions infra/dcp/modules/spanner/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ resource "google_spanner_instance" "main" {
processing_units = var.processing_units
force_destroy = !var.stateful_deletion_protection
edition = "ENTERPRISE"

depends_on = [var.foundation_dependency]
}

resource "google_spanner_database" "database" {
Expand Down Expand Up @@ -51,12 +53,7 @@ resource "google_spanner_database_iam_member" "spanner_reader" {
member = "serviceAccount:${data.google_bigquery_default_service_account.bq_sa.email}"
}

resource "google_project_iam_member" "bq_sa_spanner_viewer" {
count = var.enable_bigquery_connection ? 1 : 0
project = var.project_id
role = "roles/spanner.viewer"
member = "serviceAccount:${data.google_bigquery_default_service_account.bq_sa.email}"
}



# Create the BigQuery Reservation for Federation queries
Expand Down
6 changes: 6 additions & 0 deletions infra/dcp/modules/spanner/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,9 @@ variable "bigquery_reservation_max_slots" {
description = "Max slots for BigQuery reservation autoscale"
default = 400
}

variable "foundation_dependency" {
description = "An artificial dependency to delay resource creation until APIs are ready."
type = any
default = null
}
81 changes: 81 additions & 0 deletions infra/dcp/modules/stack/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ module "spanner" {
create_bigquery_reservation = var.spanner_config.create_bigquery_reservation
bigquery_reservation_slot_capacity = var.spanner_config.bigquery_reservation_slot_capacity
bigquery_reservation_max_slots = var.spanner_config.bigquery_reservation_max_slots
foundation_dependency = var.foundation_dependency
}


Expand All @@ -114,6 +115,8 @@ module "storage" {
# Shared vars
project_id = var.global.project_id
namespace = var.global.namespace

foundation_dependency = var.foundation_dependency
}

module "ingestion_preprocessing_job" {
Expand All @@ -138,6 +141,7 @@ module "ingestion_preprocessing_job" {
secret_env_vars = local.datacommons_services_secrets
dc_api_key_secret_id = module.auth.dc_api_key_secret_id
maps_api_key_secret_id = module.auth.maps_api_key_secret_id
foundation_dependency = var.foundation_dependency

depends_on = [module.auth]
}
Expand All @@ -150,6 +154,7 @@ module "ingestion_dataflow" {
namespace = var.global.namespace
ingestion_bucket_name = module.storage.artifacts_bucket_name
use_spanner = var.spanner_config.enable
foundation_dependency = var.foundation_dependency
}


Expand Down Expand Up @@ -249,6 +254,7 @@ module "datacommons_services" {
use_spanner = var.spanner_config.enable
env_vars = local.cloud_run_shared_env_variables
secret_env_vars = local.datacommons_services_secrets
foundation_dependency = var.foundation_dependency

depends_on = [module.ingestion_preprocessing_job]
}
Expand Down Expand Up @@ -352,6 +358,81 @@ resource "google_cloud_run_v2_service_iam_member" "workflow_serving_developer" {
member = "serviceAccount:${module.ingestion_workflow.service_account_email}"
}

data "google_bigquery_default_service_account" "bq_sa" {
project = var.global.project_id
}
Comment on lines +361 to +363

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The google_bigquery_default_service_account data source is evaluated during the Terraform plan phase. If the BigQuery API (bigquery.googleapis.com) is not already enabled in the target GCP project (which is common during initial bootstrapping), this data source will fail to evaluate, blocking the plan.\n\nTo prevent this, add an explicit dependency on var.foundation_dependency (which waits for the APIs to be enabled) to defer the data source evaluation until the apply phase.

data "google_bigquery_default_service_account" "bq_sa" {
  project = var.global.project_id

  depends_on = [var.foundation_dependency]
}


resource "google_project_iam_member" "bq_sa_spanner_viewer" {
count = var.spanner_config.enable && var.spanner_config.enable_bigquery_connection ? 1 : 0
project = var.global.project_id
role = "roles/spanner.viewer"
member = "serviceAccount:${data.google_bigquery_default_service_account.bq_sa.email}"
}

resource "google_project_iam_member" "serving_sa_roles" {
for_each = var.datacommons_services_config.enable ? setsubtract(toset([
"roles/compute.networkViewer",
"roles/redis.editor",
"roles/storage.objectViewer",
"roles/vpcaccess.user",
"roles/iam.serviceAccountUser",
"roles/secretmanager.secretAccessor",
"roles/spanner.databaseUser",
"roles/workflows.invoker"
]), var.spanner_config.enable ? [] : ["roles/spanner.databaseUser"]) : []

project = var.global.project_id
member = "serviceAccount:${module.datacommons_services[0].service_account_email}"

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Referencing module.datacommons_services[0] directly can cause an evaluation error (index out of bounds / empty tuple) during plan or apply if var.datacommons_services_config.enable is false (meaning the module's count is 0).\n\nUsing the one() function with the splat operator [*] is a safer and more idiomatic way to handle conditional modules in Terraform, as it safely returns null if the list is empty.

  member  = "serviceAccount:${one(module.datacommons_services[*].service_account_email)}"

role = each.value
}

resource "google_project_iam_member" "helper_spanner_user" {
count = var.ingestion_config.enable_ingestion && var.spanner_config.enable ? 1 : 0
project = var.global.project_id
role = "roles/spanner.databaseUser"
member = "serviceAccount:${module.ingestion_helper_service.service_account_email}"
}

resource "google_project_iam_member" "helper_bq_roles" {
for_each = toset(var.ingestion_config.enable_ingestion && var.ingestion_config.workflow_enable_bigquery_postprocessing ? [
"roles/bigquery.dataEditor",
"roles/bigquery.jobUser"
] : [])
project = var.global.project_id
role = each.value
member = "serviceAccount:${module.ingestion_helper_service.service_account_email}"
}
Comment on lines +396 to +404

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The roles/bigquery.dataEditor role is granted at the project level for the ingestion helper service account. To adhere to the principle of least privilege, BigQuery data editor roles should be granted at the dataset level rather than the project level. Project-level access should be restricted to roles like roles/bigquery.jobUser which are required to run jobs in the project.\n\nPlease remove roles/bigquery.dataEditor from the project-level IAM binding and instead grant it at the dataset level using google_bigquery_dataset_iam_member.

resource "google_project_iam_member" "helper_bq_roles" {
  for_each = toset(var.ingestion_config.enable_ingestion && var.ingestion_config.workflow_enable_bigquery_postprocessing ? [
    "roles/bigquery.jobUser"
  ] : [])
  project = var.global.project_id
  role    = each.value
  member  = "serviceAccount:${module.ingestion_helper_service.service_account_email}"
}
References
  1. Grant BigQuery roles, such as 'roles/bigquery.dataEditor', at the dataset level rather than the project level to adhere to the principle of least privilege.


resource "google_project_iam_member" "workflow_run_viewer" {
count = var.ingestion_config.enable_ingestion ? 1 : 0
project = var.global.project_id
role = "roles/run.viewer"
member = "serviceAccount:${module.ingestion_workflow.service_account_email}"
}

resource "google_project_iam_member" "preprocessing_workflow_invoker" {
count = var.ingestion_config.enable_ingestion ? 1 : 0
project = var.global.project_id
role = "roles/workflows.invoker"
member = "serviceAccount:${module.ingestion_preprocessing_job[0].service_account_email}"

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Referencing module.ingestion_preprocessing_job[0] directly can cause an evaluation error (index out of bounds / empty tuple) during plan or apply if var.ingestion_config.enable_ingestion is false (meaning the module's count is 0).\n\nUsing the one() function with the splat operator [*] is a safer and more idiomatic way to handle conditional modules in Terraform, as it safely returns null if the list is empty.

  member  = "serviceAccount:${one(module.ingestion_preprocessing_job[*].service_account_email)}"

}

resource "google_project_iam_member" "ingestion_spanner_user" {
count = var.ingestion_config.enable_ingestion && var.spanner_config.enable ? 1 : 0
project = var.global.project_id
role = "roles/spanner.databaseUser"
member = "serviceAccount:${module.ingestion_dataflow.service_account_email}"
}

resource "google_project_iam_member" "dataflow_worker" {
count = var.ingestion_config.enable_ingestion ? 1 : 0
project = var.global.project_id
role = "roles/dataflow.worker"
member = "serviceAccount:${module.ingestion_dataflow.service_account_email}"
}






Expand Down
6 changes: 6 additions & 0 deletions infra/dcp/modules/stack/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,9 @@ variable "ingestion_config" {
helper_service_image = string
})
}

variable "foundation_dependency" {
description = "An artificial dependency to delay resource creation until APIs are ready."
type = any
default = null
}
2 changes: 2 additions & 0 deletions infra/dcp/modules/storage/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,7 @@ resource "google_storage_bucket" "artifacts_bucket" {
location = var.region
uniform_bucket_level_access = true
force_destroy = !var.stateful_deletion_protection

depends_on = [var.foundation_dependency]
}

Loading
Loading