-
Notifications
You must be signed in to change notification settings - Fork 1.1k
fix(spanner): derive built-in metrics project from database client #13262
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,6 +32,7 @@ | |
| import com.google.cloud.opentelemetry.detection.AttributeKeys; | ||
| import com.google.cloud.opentelemetry.detection.DetectedPlatform; | ||
| import com.google.cloud.opentelemetry.detection.GCPPlatformDetector; | ||
| import com.google.common.annotations.VisibleForTesting; | ||
| import com.google.common.base.Strings; | ||
| import com.google.common.hash.HashFunction; | ||
| import com.google.common.hash.Hashing; | ||
|
|
@@ -75,10 +76,13 @@ final class BuiltInMetricsProvider { | |
| private static final String default_location = "global"; | ||
|
|
||
| private OpenTelemetry openTelemetry; | ||
| private String projectId; | ||
| private boolean mismatchedProjectIdLogged; | ||
| private Thread shutdownHook; | ||
|
|
||
| private BuiltInMetricsProvider() {} | ||
|
|
||
| OpenTelemetry getOrCreateOpenTelemetry( | ||
| synchronized OpenTelemetry getOrCreateOpenTelemetry( | ||
| String projectId, | ||
| @Nullable Credentials credentials, | ||
| @Nullable String monitoringHost, | ||
|
|
@@ -88,12 +92,13 @@ OpenTelemetry getOrCreateOpenTelemetry( | |
| SdkMeterProviderBuilder sdkMeterProviderBuilder = SdkMeterProvider.builder(); | ||
| BuiltInMetricsView.registerBuiltinMetrics( | ||
| SpannerCloudMonitoringExporter.create( | ||
| projectId, credentials, monitoringHost, universeDomain), | ||
| this::getProjectId, credentials, monitoringHost, universeDomain), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are also passing the projectId in next line to create OpenTelemetry Resource
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We also do monitoredResourceBuilder.putLabels(PROJECT_ID_KEY.getKey(), projectId);So the resource created during SDK initialization may contain the early/default project, but before sending |
||
| sdkMeterProviderBuilder); | ||
| sdkMeterProviderBuilder.setResource(Resource.create(createResourceAttributes(projectId))); | ||
| SdkMeterProvider sdkMeterProvider = sdkMeterProviderBuilder.build(); | ||
| this.openTelemetry = OpenTelemetrySdk.builder().setMeterProvider(sdkMeterProvider).build(); | ||
| Runtime.getRuntime().addShutdownHook(new Thread(sdkMeterProvider::close)); | ||
| this.shutdownHook = new Thread(sdkMeterProvider::close); | ||
| Runtime.getRuntime().addShutdownHook(this.shutdownHook); | ||
| } | ||
| return this.openTelemetry; | ||
| } catch (IOException ex) { | ||
|
|
@@ -106,6 +111,47 @@ OpenTelemetry getOrCreateOpenTelemetry( | |
| } | ||
| } | ||
|
|
||
| synchronized void setProjectIdIfAbsent(String projectId) { | ||
| if (this.projectId == null) { | ||
| this.projectId = projectId; | ||
| } else if (!this.projectId.equals(projectId) && !mismatchedProjectIdLogged) { | ||
| mismatchedProjectIdLogged = true; | ||
| logger.log( | ||
| Level.WARNING, | ||
| "Built-in metrics fallback project is already initialized to project {0}. Non-Spanner" | ||
| + " metrics without project information will be exported using that project instead" | ||
| + " of project {1}.", | ||
| new Object[] {this.projectId, projectId}); | ||
| } | ||
| } | ||
|
|
||
| @Nullable | ||
| synchronized OpenTelemetry getOpenTelemetry() { | ||
| return this.openTelemetry; | ||
| } | ||
|
|
||
| synchronized String getProjectId() { | ||
| return this.projectId; | ||
| } | ||
|
|
||
| @VisibleForTesting | ||
| synchronized void reset() { | ||
| if (this.openTelemetry instanceof OpenTelemetrySdk) { | ||
| ((OpenTelemetrySdk) this.openTelemetry).getSdkMeterProvider().close(); | ||
| } | ||
| if (this.shutdownHook != null) { | ||
| try { | ||
| Runtime.getRuntime().removeShutdownHook(this.shutdownHook); | ||
| } catch (IllegalStateException ignored) { | ||
| // The JVM is already shutting down. | ||
| } | ||
| } | ||
| this.openTelemetry = null; | ||
| this.projectId = null; | ||
| this.mismatchedProjectIdLogged = false; | ||
| this.shutdownHook = null; | ||
| } | ||
|
|
||
| // TODO: Remove when | ||
| // https://github.com/GoogleCloudPlatform/opentelemetry-operations-java/issues/421 | ||
| // has been fixed. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,13 +42,15 @@ | |
| import io.opentelemetry.sdk.metrics.data.AggregationTemporality; | ||
| import io.opentelemetry.sdk.metrics.data.MetricData; | ||
| import io.opentelemetry.sdk.metrics.export.MetricExporter; | ||
| import io.opentelemetry.sdk.resources.Resource; | ||
| import java.io.IOException; | ||
| import java.time.Duration; | ||
| import java.util.ArrayList; | ||
| import java.util.Collection; | ||
| import java.util.List; | ||
| import java.util.concurrent.atomic.AtomicBoolean; | ||
| import java.util.Map; | ||
| import java.util.Set; | ||
| import java.util.concurrent.ConcurrentHashMap; | ||
| import java.util.function.Supplier; | ||
| import java.util.logging.Level; | ||
| import java.util.logging.Logger; | ||
| import java.util.stream.Collectors; | ||
|
|
@@ -69,13 +71,12 @@ class SpannerCloudMonitoringExporter implements MetricExporter { | |
| // This the quota limit from Cloud Monitoring. More details in | ||
| // https://cloud.google.com/monitoring/quotas#custom_metrics_quotas. | ||
| private static final int EXPORT_BATCH_SIZE_LIMIT = 200; | ||
| private final AtomicBoolean spannerExportFailureLogged = new AtomicBoolean(false); | ||
| private final AtomicBoolean lastExportSkippedData = new AtomicBoolean(false); | ||
| private final Set<String> spannerExportFailureLoggedProjects = ConcurrentHashMap.newKeySet(); | ||
| private final MetricServiceClient client; | ||
| private final String spannerProjectId; | ||
| private final Supplier<String> fallbackProjectIdSupplier; | ||
|
|
||
| static SpannerCloudMonitoringExporter create( | ||
| String projectId, | ||
| Supplier<String> fallbackProjectIdSupplier, | ||
| @Nullable Credentials credentials, | ||
| @Nullable String monitoringHost, | ||
| String universeDomain) | ||
|
|
@@ -114,13 +115,19 @@ static SpannerCloudMonitoringExporter create( | |
| settingsBuilder.createServiceTimeSeriesSettings().setSimpleTimeoutNoRetriesDuration(timeout); | ||
|
|
||
| return new SpannerCloudMonitoringExporter( | ||
| projectId, MetricServiceClient.create(settingsBuilder.build())); | ||
| fallbackProjectIdSupplier, MetricServiceClient.create(settingsBuilder.build())); | ||
| } | ||
|
|
||
| @VisibleForTesting | ||
| SpannerCloudMonitoringExporter(String projectId, MetricServiceClient client) { | ||
| SpannerCloudMonitoringExporter(MetricServiceClient client) { | ||
| this(() -> null, client); | ||
| } | ||
|
|
||
| @VisibleForTesting | ||
| SpannerCloudMonitoringExporter( | ||
| Supplier<String> fallbackProjectIdSupplier, MetricServiceClient client) { | ||
| this.client = client; | ||
| this.spannerProjectId = projectId; | ||
| this.fallbackProjectIdSupplier = fallbackProjectIdSupplier; | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -140,37 +147,16 @@ MetricServiceClient getMetricServiceClient() { | |
|
|
||
| /** Export client built in metrics */ | ||
| private CompletableResultCode exportSpannerClientMetrics(Collection<MetricData> collection) { | ||
| // Filter spanner metrics. Only include metrics that contain a valid project. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think you can safely remove this code when using this strategy. Or technically you can, but this then changes the current behavior of clients that use multiple different projects, which I don't think is a side-effect that we want from this fix. Previously, metrics with mismatched project IDs would be filtered out and not exported. Now, they are all set to whatever project ID is used by the first An alternative to setting a fixed project ID that is used for all metrics, is to dynamically collect and then batch export the metrics per project ID (which in the vast majority of cases would be just one project). That would remove the requirement to try to set a project ID the first time a The up and downsides of the strategy in https://github.com/googleapis/google-cloud-java/compare/spanner-export-metrics-per-project are:
The downside mentioned above could partly be mitigated by combining it with the strategy in this pull request, and dynamically setting the project ID that is used for non-Spanner metrics to the project ID of the first DatabaseClient. |
||
| List<MetricData> spannerMetricData = collection.stream().collect(Collectors.toList()); | ||
|
|
||
| // Log warnings for metrics that will be skipped. | ||
| boolean mustFilter = false; | ||
| if (spannerMetricData.stream() | ||
| .map(metricData -> metricData.getResource()) | ||
| .anyMatch(this::shouldSkipPointDataDueToProjectId)) { | ||
| logger.log( | ||
| Level.WARNING, "Some metric data contain a different projectId. These will be skipped."); | ||
| mustFilter = true; | ||
| } | ||
|
|
||
| if (mustFilter) { | ||
| spannerMetricData = | ||
| spannerMetricData.stream() | ||
| .filter(this::shouldSkipMetricData) | ||
| .collect(Collectors.toList()); | ||
| } | ||
| lastExportSkippedData.set(mustFilter); | ||
|
|
||
| // Skips exporting if there's none | ||
| if (spannerMetricData.isEmpty()) { | ||
| if (collection.isEmpty()) { | ||
| return CompletableResultCode.ofSuccess(); | ||
| } | ||
|
|
||
| List<TimeSeries> spannerTimeSeries; | ||
| try { | ||
| spannerTimeSeries = | ||
| SpannerCloudMonitoringExporterUtils.convertToSpannerTimeSeries( | ||
| spannerMetricData, this.spannerProjectId); | ||
| collection, fallbackProjectIdSupplier.get()); | ||
| } catch (Throwable e) { | ||
| logger.log( | ||
| Level.WARNING, | ||
|
|
@@ -179,37 +165,60 @@ private CompletableResultCode exportSpannerClientMetrics(Collection<MetricData> | |
| return CompletableResultCode.ofFailure(); | ||
| } | ||
|
|
||
| ProjectName projectName = ProjectName.of(spannerProjectId); | ||
| if (spannerTimeSeries.isEmpty()) { | ||
| return CompletableResultCode.ofSuccess(); | ||
| } | ||
|
|
||
| Map<String, List<TimeSeries>> timeSeriesByProject = | ||
| spannerTimeSeries.stream() | ||
| .collect( | ||
| Collectors.groupingBy( | ||
| timeSeries -> | ||
| timeSeries | ||
| .getResource() | ||
| .getLabelsMap() | ||
| .get(BuiltInMetricsConstant.PROJECT_ID_KEY.getKey()))); | ||
|
|
||
| List<ApiFuture<List<Empty>>> futures = new ArrayList<>(); | ||
| for (Map.Entry<String, List<TimeSeries>> entry : timeSeriesByProject.entrySet()) { | ||
| ProjectName projectName = ProjectName.of(entry.getKey()); | ||
| ApiFuture<List<Empty>> future = exportTimeSeriesInBatch(projectName, entry.getValue()); | ||
| ApiFutures.addCallback( | ||
| future, | ||
| new ApiFutureCallback<List<Empty>>() { | ||
| @Override | ||
| public void onFailure(Throwable throwable) { | ||
| logExportFailure(throwable, projectName); | ||
| } | ||
|
|
||
| @Override | ||
| public void onSuccess(List<Empty> ignored) { | ||
| spannerExportFailureLoggedProjects.remove(projectName.getProject()); | ||
| } | ||
| }, | ||
| MoreExecutors.directExecutor()); | ||
| futures.add(future); | ||
| } | ||
|
|
||
| ApiFuture<List<Empty>> futureList = exportTimeSeriesInBatch(projectName, spannerTimeSeries); | ||
| ApiFuture<List<List<Empty>>> groupedFuture = ApiFutures.allAsList(futures); | ||
| ApiFuture<List<Empty>> futureList = | ||
| ApiFutures.transform( | ||
| groupedFuture, | ||
| groupedResults -> | ||
| groupedResults.stream().flatMap(List::stream).collect(Collectors.toList()), | ||
| MoreExecutors.directExecutor()); | ||
|
|
||
| CompletableResultCode spannerExportCode = new CompletableResultCode(); | ||
| ApiFutures.addCallback( | ||
| futureList, | ||
| new ApiFutureCallback<List<Empty>>() { | ||
| @Override | ||
| public void onFailure(Throwable throwable) { | ||
| if (spannerExportFailureLogged.compareAndSet(false, true)) { | ||
| String msg = "createServiceTimeSeries request failed for spanner metrics."; | ||
| if (throwable instanceof PermissionDeniedException) { | ||
| // TODO: Add the link of public documentation when available in the log message. | ||
| msg += | ||
| String.format( | ||
| " Need monitoring metric writer permission on project=%s. Follow" | ||
| + " https://cloud.google.com/spanner/docs/view-manage-client-side-metrics#access-client-side-metrics" | ||
| + " to set up permissions", | ||
| projectName.getProject()); | ||
| } | ||
| logger.log(Level.WARNING, msg, throwable); | ||
| } | ||
| spannerExportCode.fail(); | ||
| } | ||
|
|
||
| @Override | ||
| public void onSuccess(List<Empty> empty) { | ||
| // When an export succeeded reset the export failure flag to false so if there's a | ||
| // transient failure it'll be logged. | ||
| spannerExportFailureLogged.set(false); | ||
| spannerExportCode.succeed(); | ||
| } | ||
| }, | ||
|
|
@@ -218,16 +227,22 @@ public void onSuccess(List<Empty> empty) { | |
| return spannerExportCode; | ||
| } | ||
|
|
||
| private boolean shouldSkipMetricData(MetricData metricData) { | ||
| return shouldSkipPointDataDueToProjectId(metricData.getResource()); | ||
| } | ||
|
|
||
| private boolean shouldSkipPointDataDueToProjectId(Resource resource) { | ||
| return !spannerProjectId.equals(SpannerCloudMonitoringExporterUtils.getProjectId(resource)); | ||
| } | ||
|
|
||
| boolean lastExportSkippedData() { | ||
| return this.lastExportSkippedData.get(); | ||
| private void logExportFailure(Throwable throwable, ProjectName projectName) { | ||
| if (spannerExportFailureLoggedProjects.add(projectName.getProject())) { | ||
| String msg = "createServiceTimeSeries request failed for spanner metrics."; | ||
| if (throwable instanceof PermissionDeniedException) { | ||
| msg += | ||
| String.format( | ||
| " Need monitoring metric writer permission on project=%s. Follow" | ||
| + " https://cloud.google.com/spanner/docs/view-manage-client-side-metrics" | ||
| + "#access-client-side-metrics" | ||
| + " to set up permissions", | ||
| projectName.getProject()); | ||
| } else { | ||
| msg += String.format(" project=%s.", projectName.getProject()); | ||
| } | ||
| logger.log(Level.WARNING, msg, throwable); | ||
| } | ||
| } | ||
|
|
||
| private ApiFuture<List<Empty>> exportTimeSeriesInBatch( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@rahul2393 I did not understand this solution. getOrCreateOpenTelemetry is called from GapicSpannerRPC while creating SpannerClient. At the time
projectIdshared here could be the projectId of GKE instance for example.So in this case we will be initialising SpannerCloudMonitoringExporter with null projectId ? As by this time
setProjectIdIfAbsentwon't be called, it is called later during database init.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So flow is:
SpannerClientinit → OpenTelemetry/exporter may be created, project supplier returnsnullgetDatabaseClient(DatabaseId)→ database project is set once