diff --git a/README.md b/README.md index 9e143d414..541a872f8 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ from upstream in the cluster: ```bash BUCKET= CLUSTER= -gsutil cp presto/presto.sh gs://${BUCKET}/ +gcloud storage cp presto/presto.sh gs://${BUCKET}/ gcloud dataproc clusters create ${CLUSTER} --initialization-actions gs://${BUCKET}/presto.sh ``` diff --git a/alluxio/alluxio.sh b/alluxio/alluxio.sh index 9d41820d4..3775c6dbf 100644 --- a/alluxio/alluxio.sh +++ b/alluxio/alluxio.sh @@ -41,7 +41,7 @@ download_file() { local -r uri="$1" if [[ "${uri}" == gs://* ]]; then - gsutil cp "${uri}" ./ + gcloud storage cp "${uri}" ./ else # TODO Add metadata header tag to the wget for filtering out in download metrics. wget -nv --timeout=30 --tries=5 --retry-connrefused "${uri}" diff --git a/beam/README.md b/beam/README.md index e03de8c27..0aecdd1a7 100644 --- a/beam/README.md +++ b/beam/README.md @@ -62,7 +62,7 @@ Then, upload the jar to a Cloud Storage path that clusters can access during initialization. ```bash -gsutil cp \ +gcloud storage cp \ ./runners/flink/job-server/build/libs/beam-runners-flink_2.11-job-server-*-SNAPSHOT.jar \ /beam-runners-flink_2.11-job-server-latest-SNAPSHOT.jar ``` diff --git a/beam/beam.sh b/beam/beam.sh index 2ce1640bf..f3fe96145 100755 --- a/beam/beam.sh +++ b/beam/beam.sh @@ -40,7 +40,7 @@ function download_snapshot() { readonly snapshot_url="${1}" readonly protocol="$(echo "${snapshot_url}" | head -c5)" if [ "${protocol}" = "gs://" ]; then - gsutil cp "${snapshot_url}" "${LOCAL_JAR_NAME}" + gcloud storage cp "${snapshot_url}" "${LOCAL_JAR_NAME}" else curl -o "${LOCAL_JAR_NAME}" "${snapshot_url}" fi diff --git a/conda/README.md b/conda/README.md index a4227dc65..59ec32b36 100644 --- a/conda/README.md +++ b/conda/README.md @@ -77,8 +77,8 @@ Where `create-my-cluster.sh` specifies a list of conda and/or pip packages to in ``` #!/usr/bin/env bash -gsutil -m cp -r gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh . -gsutil -m cp -r gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh . +gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh . +gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh . chmod 755 ./*conda*.sh @@ -100,9 +100,9 @@ Similarly, one can also specify a [conda environment yml file](https://github.co CONDA_ENV_YAML_GSC_LOC="gs://my-bucket/path/to/conda-environment.yml" CONDA_ENV_YAML_PATH="/root/conda-environment.yml" echo "Downloading conda environment at $CONDA_ENV_YAML_GSC_LOC to $CONDA_ENV_YAML_PATH ... " -gsutil -m cp -r $CONDA_ENV_YAML_GSC_LOC $CONDA_ENV_YAML_PATH -gsutil -m cp -r gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh . -gsutil -m cp -r gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh . +gcloud storage cp --recursive $CONDA_ENV_YAML_GSC_LOC $CONDA_ENV_YAML_PATH +gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh . +gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh . chmod 755 ./*conda*.sh diff --git a/connectors/connectors.sh b/connectors/connectors.sh index 22157dafa..ee985e27f 100755 --- a/connectors/connectors.sh +++ b/connectors/connectors.sh @@ -128,7 +128,7 @@ update_connector_url() { find "${vm_connectors_dir}/" -name "${pattern}" -delete - gsutil cp -P "${url}" "${vm_connectors_dir}/" + gcloud storage cp --preserve-posix "${url}" "${vm_connectors_dir}/" local -r jar_name=${url##*/} diff --git a/dask/README.md b/dask/README.md index 69d70738b..ec0f6909b 100644 --- a/dask/README.md +++ b/dask/README.md @@ -136,7 +136,7 @@ You can also `ssh` into the cluster and execute Dask jobs from Python files. To run jobs, you can either `scp` a file onto your cluster or use `gsutil` on the cluster to download the Python file. -`gcloud compute ssh --command="gsutil cp gs://path/to/file.py .; +`gcloud compute ssh --command="gcloud storage cp gs://path/to/file.py .; python file.py` ### Accessing Web UIs diff --git a/hbase/hbase.sh b/hbase/hbase.sh index 10724dab9..7325b17fd 100755 --- a/hbase/hbase.sh +++ b/hbase/hbase.sh @@ -223,7 +223,7 @@ EOF kadmin.local -q "addprinc -randkey hbase/${m}.${DOMAIN}@${REALM}" echo "Generating hbase keytab..." kadmin.local -q "xst -k ${HBASE_HOME}/conf/hbase-${m}.keytab hbase/${m}.${DOMAIN}" - gsutil cp "${HBASE_HOME}/conf/hbase-${m}.keytab" \ + gcloud storage cp "${HBASE_HOME}/conf/hbase-${m}.keytab" \ "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-${m}.keytab" done @@ -232,17 +232,17 @@ EOF kadmin.local -q "addprinc -randkey hbase/${CLUSTER_NAME}-w-${c}.${DOMAIN}" echo "Generating hbase keytab..." kadmin.local -q "xst -k ${HBASE_HOME}/conf/hbase-${CLUSTER_NAME}-w-${c}.keytab hbase/${CLUSTER_NAME}-w-${c}.${DOMAIN}" - gsutil cp "${HBASE_HOME}/conf/hbase-${CLUSTER_NAME}-w-${c}.keytab" \ + gcloud storage cp "${HBASE_HOME}/conf/hbase-${CLUSTER_NAME}-w-${c}.keytab" \ "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-${CLUSTER_NAME}-w-${c}.keytab" done touch /tmp/_success - gsutil cp /tmp/_success "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" + gcloud storage cp /tmp/_success "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" fi success=1 while [[ $success == "1" ]]; do sleep 1 success=$( - gsutil -q stat "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" + gcloud storage objects list --stat --fetch-encrypted-object-hashes "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" echo $? ) done @@ -255,7 +255,7 @@ EOF fi # Copy keytab to machine - gsutil cp "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-$(hostname -s).keytab" $hbase_keytab_path + gcloud storage cp "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-$(hostname -s).keytab" $hbase_keytab_path # Change owner of keytab to hbase with read only permissions if [ -f $hbase_keytab_path ]; then diff --git a/hive-llap/llap.sh b/hive-llap/llap.sh index 5009fb92a..b3af46a91 100644 --- a/hive-llap/llap.sh +++ b/hive-llap/llap.sh @@ -69,7 +69,7 @@ function download_init_actions() { # Download initialization actions locally. This will download the start_llap.sh file to the cluster for execution Check if metadata is supplied echo "downalod init actions supplied as metadata..." mkdir -p "${INIT_ACTIONS_DIR}" - gsutil cp "${INIT_ACTIONS_REPO}/hive-llap/start_llap.sh" "${INIT_ACTIONS_DIR}" + gcloud storage cp "${INIT_ACTIONS_REPO}/hive-llap/start_llap.sh" "${INIT_ACTIONS_DIR}" chmod 700 "${INIT_ACTIONS_DIR}/start_llap.sh" } diff --git a/mlvm/mlvm.sh b/mlvm/mlvm.sh index 320edfdc3..3227ac102 100644 --- a/mlvm/mlvm.sh +++ b/mlvm/mlvm.sh @@ -97,9 +97,9 @@ function download_init_actions() { # Download initialization actions locally. mkdir "${INIT_ACTIONS_DIR}"/{gpu,rapids,dask} - gsutil -m rsync -r "${INIT_ACTIONS_REPO}/rapids/" "${INIT_ACTIONS_DIR}/rapids/" - gsutil -m rsync -r "${INIT_ACTIONS_REPO}/gpu/" "${INIT_ACTIONS_DIR}/gpu/" - gsutil -m rsync -r "${INIT_ACTIONS_REPO}/dask/" "${INIT_ACTIONS_DIR}/dask/" + gcloud storage rsync --recursive "${INIT_ACTIONS_REPO}/rapids/" "${INIT_ACTIONS_DIR}/rapids/" + gcloud storage rsync --recursive "${INIT_ACTIONS_REPO}/gpu/" "${INIT_ACTIONS_DIR}/gpu/" + gcloud storage rsync --recursive "${INIT_ACTIONS_REPO}/dask/" "${INIT_ACTIONS_DIR}/dask/" find "${INIT_ACTIONS_DIR}" -name '*.sh' -exec chmod +x {} \; } @@ -167,7 +167,7 @@ function install_spark_nlp() { function install_connectors() { local -r url="gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-${SPARK_BIGQUERY_VERSION}.jar" - gsutil cp "${url}" "${CONNECTORS_DIR}/" + gcloud storage cp "${url}" "${CONNECTORS_DIR}/" local -r jar_name=${url##*/}