Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ public class BuiltInMetricsConstant {
ImmutableList.of(EEF_FALLBACK_COUNT_NAME, EEF_CALL_STATUS_NAME);

public static final String SPANNER_RESOURCE_TYPE = "spanner_instance_client";
public static final String UNDEFINED_PROJECT_ID = "undefined-project";

public static final AttributeKey<String> PROJECT_ID_KEY = AttributeKey.stringKey("project_id");
public static final AttributeKey<String> INSTANCE_ID_KEY = AttributeKey.stringKey("instance_id");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import com.google.cloud.opentelemetry.detection.AttributeKeys;
import com.google.cloud.opentelemetry.detection.DetectedPlatform;
import com.google.cloud.opentelemetry.detection.GCPPlatformDetector;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Strings;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
Expand Down Expand Up @@ -75,10 +76,13 @@ final class BuiltInMetricsProvider {
private static final String default_location = "global";

private OpenTelemetry openTelemetry;
private String projectId;
private boolean mismatchedProjectIdLogged;
private Thread shutdownHook;

private BuiltInMetricsProvider() {}

OpenTelemetry getOrCreateOpenTelemetry(
synchronized OpenTelemetry getOrCreateOpenTelemetry(
String projectId,
@Nullable Credentials credentials,
@Nullable String monitoringHost,
Expand All @@ -88,12 +92,13 @@ OpenTelemetry getOrCreateOpenTelemetry(
SdkMeterProviderBuilder sdkMeterProviderBuilder = SdkMeterProvider.builder();
BuiltInMetricsView.registerBuiltinMetrics(
SpannerCloudMonitoringExporter.create(
projectId, credentials, monitoringHost, universeDomain),
this::getProjectId, credentials, monitoringHost, universeDomain),
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rahul2393 I did not understand this solution. getOrCreateOpenTelemetry is called from GapicSpannerRPC while creating SpannerClient. At the time projectId shared here could be the projectId of GKE instance for example.

So in this case we will be initialising SpannerCloudMonitoringExporter with null projectId ? As by this time setProjectIdIfAbsent won't be called, it is called later during database init.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So flow is:

  1. SpannerClient init → OpenTelemetry/exporter may be created, project supplier returns null
  2. no export happens yet because no database project is known
  3. getDatabaseClient(DatabaseId) → database project is set once
  4. future metric exports use that database project

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are also passing the projectId in next line to create OpenTelemetry Resource

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We also do

monitoredResourceBuilder.putLabels(PROJECT_ID_KEY.getKey(), projectId);

So the resource created during SDK initialization may contain the early/default project, but before sending
createServiceTimeSeries, we overwrite the monitored resource label with the database project from the exporter supplier.

sdkMeterProviderBuilder);
sdkMeterProviderBuilder.setResource(Resource.create(createResourceAttributes(projectId)));
SdkMeterProvider sdkMeterProvider = sdkMeterProviderBuilder.build();
this.openTelemetry = OpenTelemetrySdk.builder().setMeterProvider(sdkMeterProvider).build();
Runtime.getRuntime().addShutdownHook(new Thread(sdkMeterProvider::close));
this.shutdownHook = new Thread(sdkMeterProvider::close);
Runtime.getRuntime().addShutdownHook(this.shutdownHook);
}
return this.openTelemetry;
} catch (IOException ex) {
Expand All @@ -106,6 +111,47 @@ OpenTelemetry getOrCreateOpenTelemetry(
}
}

synchronized void setProjectIdIfAbsent(String projectId) {
if (this.projectId == null) {
this.projectId = projectId;
} else if (!this.projectId.equals(projectId) && !mismatchedProjectIdLogged) {
mismatchedProjectIdLogged = true;
logger.log(
Level.WARNING,
"Built-in metrics fallback project is already initialized to project {0}. Non-Spanner"
+ " metrics without project information will be exported using that project instead"
+ " of project {1}.",
new Object[] {this.projectId, projectId});
}
}

@Nullable
synchronized OpenTelemetry getOpenTelemetry() {
return this.openTelemetry;
}

synchronized String getProjectId() {
return this.projectId;
}

@VisibleForTesting
synchronized void reset() {
if (this.openTelemetry instanceof OpenTelemetrySdk) {
((OpenTelemetrySdk) this.openTelemetry).getSdkMeterProvider().close();
}
if (this.shutdownHook != null) {
try {
Runtime.getRuntime().removeShutdownHook(this.shutdownHook);
} catch (IllegalStateException ignored) {
// The JVM is already shutting down.
}
}
this.openTelemetry = null;
this.projectId = null;
this.mismatchedProjectIdLogged = false;
this.shutdownHook = null;
}

// TODO: Remove when
// https://github.com/GoogleCloudPlatform/opentelemetry-operations-java/issues/421
// has been fixed.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,15 @@
import io.opentelemetry.sdk.metrics.data.AggregationTemporality;
import io.opentelemetry.sdk.metrics.data.MetricData;
import io.opentelemetry.sdk.metrics.export.MetricExporter;
import io.opentelemetry.sdk.resources.Resource;
import java.io.IOException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Supplier;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
Expand All @@ -69,13 +71,12 @@ class SpannerCloudMonitoringExporter implements MetricExporter {
// This the quota limit from Cloud Monitoring. More details in
// https://cloud.google.com/monitoring/quotas#custom_metrics_quotas.
private static final int EXPORT_BATCH_SIZE_LIMIT = 200;
private final AtomicBoolean spannerExportFailureLogged = new AtomicBoolean(false);
private final AtomicBoolean lastExportSkippedData = new AtomicBoolean(false);
private final Set<String> spannerExportFailureLoggedProjects = ConcurrentHashMap.newKeySet();
private final MetricServiceClient client;
private final String spannerProjectId;
private final Supplier<String> fallbackProjectIdSupplier;

static SpannerCloudMonitoringExporter create(
String projectId,
Supplier<String> fallbackProjectIdSupplier,
@Nullable Credentials credentials,
@Nullable String monitoringHost,
String universeDomain)
Expand Down Expand Up @@ -114,13 +115,19 @@ static SpannerCloudMonitoringExporter create(
settingsBuilder.createServiceTimeSeriesSettings().setSimpleTimeoutNoRetriesDuration(timeout);

return new SpannerCloudMonitoringExporter(
projectId, MetricServiceClient.create(settingsBuilder.build()));
fallbackProjectIdSupplier, MetricServiceClient.create(settingsBuilder.build()));
}

@VisibleForTesting
SpannerCloudMonitoringExporter(String projectId, MetricServiceClient client) {
SpannerCloudMonitoringExporter(MetricServiceClient client) {
this(() -> null, client);
}

@VisibleForTesting
SpannerCloudMonitoringExporter(
Supplier<String> fallbackProjectIdSupplier, MetricServiceClient client) {
this.client = client;
this.spannerProjectId = projectId;
this.fallbackProjectIdSupplier = fallbackProjectIdSupplier;
}

@Override
Expand All @@ -140,37 +147,16 @@ MetricServiceClient getMetricServiceClient() {

/** Export client built in metrics */
private CompletableResultCode exportSpannerClientMetrics(Collection<MetricData> collection) {
// Filter spanner metrics. Only include metrics that contain a valid project.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you can safely remove this code when using this strategy. Or technically you can, but this then changes the current behavior of clients that use multiple different projects, which I don't think is a side-effect that we want from this fix. Previously, metrics with mismatched project IDs would be filtered out and not exported. Now, they are all set to whatever project ID is used by the first DatabaseClient that is created and then exported. I think that the latter is a degradation from the previous behavior, where they were dropped, as now you risk getting metrics from a 'wrong' database into a different project.

An alternative to setting a fixed project ID that is used for all metrics, is to dynamically collect and then batch export the metrics per project ID (which in the vast majority of cases would be just one project). That would remove the requirement to try to set a project ID the first time a DatabaseClient is created, which would simplify the code a bit. See here for an example: https://github.com/googleapis/google-cloud-java/compare/spanner-export-metrics-per-project (Note: That sample has not been further refined, so it might need a bit of polishing before it is ready to use)

The up and downsides of the strategy in https://github.com/googleapis/google-cloud-java/compare/spanner-export-metrics-per-project are:

  • Upside: It correctly exports Spanner metrics to the correct project, even when a client creates multiple DatabaseClients with different project IDs.
  • Downside: It uses the default Project ID of the environment (so for example the GKE project ID) for core gRPC metrics that are not handled by the Spanner interceptor.

The downside mentioned above could partly be mitigated by combining it with the strategy in this pull request, and dynamically setting the project ID that is used for non-Spanner metrics to the project ID of the first DatabaseClient.

List<MetricData> spannerMetricData = collection.stream().collect(Collectors.toList());

// Log warnings for metrics that will be skipped.
boolean mustFilter = false;
if (spannerMetricData.stream()
.map(metricData -> metricData.getResource())
.anyMatch(this::shouldSkipPointDataDueToProjectId)) {
logger.log(
Level.WARNING, "Some metric data contain a different projectId. These will be skipped.");
mustFilter = true;
}

if (mustFilter) {
spannerMetricData =
spannerMetricData.stream()
.filter(this::shouldSkipMetricData)
.collect(Collectors.toList());
}
lastExportSkippedData.set(mustFilter);

// Skips exporting if there's none
if (spannerMetricData.isEmpty()) {
if (collection.isEmpty()) {
return CompletableResultCode.ofSuccess();
}

List<TimeSeries> spannerTimeSeries;
try {
spannerTimeSeries =
SpannerCloudMonitoringExporterUtils.convertToSpannerTimeSeries(
spannerMetricData, this.spannerProjectId);
collection, fallbackProjectIdSupplier.get());
} catch (Throwable e) {
logger.log(
Level.WARNING,
Expand All @@ -179,37 +165,60 @@ private CompletableResultCode exportSpannerClientMetrics(Collection<MetricData>
return CompletableResultCode.ofFailure();
}

ProjectName projectName = ProjectName.of(spannerProjectId);
if (spannerTimeSeries.isEmpty()) {
return CompletableResultCode.ofSuccess();
}

Map<String, List<TimeSeries>> timeSeriesByProject =
spannerTimeSeries.stream()
.collect(
Collectors.groupingBy(
timeSeries ->
timeSeries
.getResource()
.getLabelsMap()
.get(BuiltInMetricsConstant.PROJECT_ID_KEY.getKey())));

List<ApiFuture<List<Empty>>> futures = new ArrayList<>();
for (Map.Entry<String, List<TimeSeries>> entry : timeSeriesByProject.entrySet()) {
ProjectName projectName = ProjectName.of(entry.getKey());
ApiFuture<List<Empty>> future = exportTimeSeriesInBatch(projectName, entry.getValue());
ApiFutures.addCallback(
future,
new ApiFutureCallback<List<Empty>>() {
@Override
public void onFailure(Throwable throwable) {
logExportFailure(throwable, projectName);
}

@Override
public void onSuccess(List<Empty> ignored) {
spannerExportFailureLoggedProjects.remove(projectName.getProject());
}
},
MoreExecutors.directExecutor());
futures.add(future);
}

ApiFuture<List<Empty>> futureList = exportTimeSeriesInBatch(projectName, spannerTimeSeries);
ApiFuture<List<List<Empty>>> groupedFuture = ApiFutures.allAsList(futures);
ApiFuture<List<Empty>> futureList =
ApiFutures.transform(
groupedFuture,
groupedResults ->
groupedResults.stream().flatMap(List::stream).collect(Collectors.toList()),
MoreExecutors.directExecutor());

CompletableResultCode spannerExportCode = new CompletableResultCode();
ApiFutures.addCallback(
futureList,
new ApiFutureCallback<List<Empty>>() {
@Override
public void onFailure(Throwable throwable) {
if (spannerExportFailureLogged.compareAndSet(false, true)) {
String msg = "createServiceTimeSeries request failed for spanner metrics.";
if (throwable instanceof PermissionDeniedException) {
// TODO: Add the link of public documentation when available in the log message.
msg +=
String.format(
" Need monitoring metric writer permission on project=%s. Follow"
+ " https://cloud.google.com/spanner/docs/view-manage-client-side-metrics#access-client-side-metrics"
+ " to set up permissions",
projectName.getProject());
}
logger.log(Level.WARNING, msg, throwable);
}
spannerExportCode.fail();
}

@Override
public void onSuccess(List<Empty> empty) {
// When an export succeeded reset the export failure flag to false so if there's a
// transient failure it'll be logged.
spannerExportFailureLogged.set(false);
spannerExportCode.succeed();
}
},
Expand All @@ -218,16 +227,22 @@ public void onSuccess(List<Empty> empty) {
return spannerExportCode;
}

private boolean shouldSkipMetricData(MetricData metricData) {
return shouldSkipPointDataDueToProjectId(metricData.getResource());
}

private boolean shouldSkipPointDataDueToProjectId(Resource resource) {
return !spannerProjectId.equals(SpannerCloudMonitoringExporterUtils.getProjectId(resource));
}

boolean lastExportSkippedData() {
return this.lastExportSkippedData.get();
private void logExportFailure(Throwable throwable, ProjectName projectName) {
if (spannerExportFailureLoggedProjects.add(projectName.getProject())) {
String msg = "createServiceTimeSeries request failed for spanner metrics.";
if (throwable instanceof PermissionDeniedException) {
msg +=
String.format(
" Need monitoring metric writer permission on project=%s. Follow"
+ " https://cloud.google.com/spanner/docs/view-manage-client-side-metrics"
+ "#access-client-side-metrics"
+ " to set up permissions",
projectName.getProject());
} else {
msg += String.format(" project=%s.", projectName.getProject());
}
logger.log(Level.WARNING, msg, throwable);
}
}

private ApiFuture<List<Empty>> exportTimeSeriesInBatch(
Expand Down
Loading
Loading