diff --git a/pom.xml b/pom.xml index cc60063..1cbb3f1 100644 --- a/pom.xml +++ b/pom.xml @@ -30,7 +30,6 @@ url-shortener-demo jdbc project-course - slo slo-workload diff --git a/slo-workload/README.md b/slo-workload/README.md index 7b874bf..4397789 100644 --- a/slo-workload/README.md +++ b/slo-workload/README.md @@ -5,14 +5,20 @@ reliability of YDB Java clients under load and chaos using the [YDB SLO action](https://github.com/ydb-platform/ydb-slo-action). Each submodule is a self-contained, runnable workload that follows the same -contract as the SDK SLO workload in [`../slo`](../slo): it reads its -configuration from environment variables, runs setup/run/teardown phases, and -pushes OpenTelemetry (OTLP) metrics that the action scrapes and compares -between the current PR run and a baseline run. +contract: it reads its configuration from environment variables, runs +setup/run/teardown phases, and pushes OpenTelemetry (OTLP) metrics that the +action scrapes and compares between the current PR run and a baseline run. + +Shared harness code lives in [`core`](core) (`Config`, `Metrics`, KV row +model, rate-limited runner). Every workload plugs a `KvClient` adapter into +that runner so all of them emit the same metric contract. | Module | Component under test | Description | | --- | --- | --- | +| [`query`](query) | `ydb-java-sdk` (query client) | Native SDK KV workload | | [`jdbc`](jdbc) | `ydb-jdbc-driver` | Plain JDBC KV workload (no framework) | +| [`spring-data-jdbc`](spring-data-jdbc) | `ydb-jdbc-driver` + `spring-data-jdbc-ydb` + `spring-ydb-retry` | Spring Data JDBC KV workload | +| [`spring-data-jpa`](spring-data-jpa) | `ydb-jdbc-driver` + Hibernate 6 + `spring-ydb-retry` | Spring Data JPA KV workload | ## How a workload behaves @@ -76,11 +82,27 @@ KV tunables are passed on the command line and parsed by JCommander: --partition-size Auto-partitioning partition size in MB (default 1) --min-partition-count Minimum number of table partitions (default 6) --max-partition-count Maximum number of table partitions (default 1000) ---duration Override WORKLOAD_DURATION when > 0 +--duration / --time Override WORKLOAD_DURATION when > 0 +--shutdown-time Extra grace seconds for in-flight ops on shutdown (default 30) +--max-attempts Per-operation attempt cap, initial + retries (default 10) +--max-workers Hard cap on workers per operation type (default 64) ``` -Unknown flags are ignored, so a workload accepts command strings designed for -other SDKs without erroring. +Unknown flags are rejected — a typo in the ydb-slo-action invocation should +fail loudly rather than silently fall back to defaults. + +The Spring-backed workloads expose one more knob via the `SLO_HIKARI_POOL_SIZE` +environment variable (default `130`, sized for `2 × max-workers` plus headroom). +Raise it together with `--max-workers` or the workload measures Hikari +contention rather than the JDBC driver. + +### Cross-implementation comparability + +Every implementation derives the primary-key `hash` column from `id` with the +same client-side mix (`RowGenerator.numericHash`). A table written by the +`query` workload is therefore byte-compatible with the `jdbc` and Spring-Data +workloads — useful when one prefills the table and another reads from it +during cross-driver experiments. ## How CI uses this module diff --git a/slo/pom.xml b/slo-workload/core/pom.xml similarity index 51% rename from slo/pom.xml rename to slo-workload/core/pom.xml index 8c3f49c..fb3ddf5 100644 --- a/slo/pom.xml +++ b/slo-workload/core/pom.xml @@ -6,83 +6,66 @@ tech.ydb.examples - ydb-sdk-examples + slo-workload 1.1.0-SNAPSHOT + ../pom.xml - ydb-slo-workload - YDB SLO workload - SLO workload application for testing YDB Java SDK reliability under load and chaos - - - 1.82 - 1.59.0 - 2.2.2 - + slo-workload-core + YDB SLO workload core + + Driver-agnostic core of the YDB SLO workloads: OTLP metrics, env config, + the KV row model and the load-generating runner. Every concrete workload + (native query client, plain JDBC, Spring Data) plugs a KvClient into this + runner so all of them emit the exact same metric contract. + - - tech.ydb - ydb-sdk-query - - com.beust jcommander - ${jcommander.version} org.hdrhistogram HdrHistogram - ${hdrhistogram.version} + + + + com.google.guava + guava io.opentelemetry opentelemetry-api - ${opentelemetry.version} io.opentelemetry opentelemetry-sdk - ${opentelemetry.version} io.opentelemetry opentelemetry-sdk-metrics - ${opentelemetry.version} io.opentelemetry opentelemetry-exporter-otlp - ${opentelemetry.version} - org.apache.logging.log4j - log4j-slf4j2-impl + org.slf4j + slf4j-api - ydb-slo-workload org.apache.maven.plugins - maven-dependency-plugin - - - org.apache.maven.plugins - maven-jar-plugin + maven-compiler-plugin - - - true - libs/ - tech.ydb.slo.Main - - + ${maven.compiler.release} diff --git a/slo/src/main/java/tech/ydb/slo/Config.java b/slo-workload/core/src/main/java/tech/ydb/slo/core/Config.java similarity index 65% rename from slo/src/main/java/tech/ydb/slo/Config.java rename to slo-workload/core/src/main/java/tech/ydb/slo/core/Config.java index 2efdc7e..7fbaa94 100644 --- a/slo/src/main/java/tech/ydb/slo/Config.java +++ b/slo-workload/core/src/main/java/tech/ydb/slo/core/Config.java @@ -1,20 +1,8 @@ -package tech.ydb.slo; - -/** - * Configuration for the SLO workload, populated from environment variables - * provided by the YDB SLO action runtime. - * - *

The action sets these variables on the workload container: - *

    - *
  • {@code YDB_CONNECTION_STRING} or {@code YDB_ENDPOINT} + {@code YDB_DATABASE} — YDB connection
  • - *
  • {@code WORKLOAD_REF} — value used as the {@code ref} label on all metrics
  • - *
  • {@code WORKLOAD_NAME} — workload name (also used as part of the table path)
  • - *
  • {@code WORKLOAD_DURATION} — workload run duration in seconds (0 = unlimited)
  • - *
  • {@code OTEL_EXPORTER_OTLP_ENDPOINT} — OTLP endpoint for pushing metrics
  • - *
- */ +package tech.ydb.slo.core; + public final class Config { private final String connectionString; + private final String jdbcUrl; private final String token; private final String ref; private final String workloadName; @@ -23,6 +11,7 @@ public final class Config { private Config( String connectionString, + String jdbcUrl, String token, String ref, String workloadName, @@ -30,6 +19,7 @@ private Config( String otlpEndpoint ) { this.connectionString = connectionString; + this.jdbcUrl = jdbcUrl; this.token = token; this.ref = ref; this.workloadName = workloadName; @@ -37,10 +27,18 @@ private Config( this.otlpEndpoint = otlpEndpoint; } + + public String connectionString() { return connectionString; } + + + public String jdbcUrl() { + return jdbcUrl; + } + public String token() { return token; } @@ -61,33 +59,45 @@ public String otlpEndpoint() { return otlpEndpoint; } - /** - * Loads configuration from environment variables. - * - * @return configuration instance - * @throws IllegalStateException if required variables are missing or invalid - */ - public static Config fromEnv() { + + + public static Config fromEnv(String defaultWorkloadName) { String connectionString = resolveConnectionString(); if (connectionString == null || connectionString.isEmpty()) { throw new IllegalStateException( - "YDB connection is not configured: set YDB_CONNECTION_STRING or YDB_ENDPOINT + YDB_DATABASE" + "YDB connection is not configured: set YDB_CONNECTION_STRING, " + + "YDB_JDBC_URL or YDB_ENDPOINT + YDB_DATABASE" ); } String token = envOrDefault("YDB_TOKEN", ""); String ref = envOrDefault("WORKLOAD_REF", "unknown"); - String workloadName = envOrDefault("WORKLOAD_NAME", "java-slo-workload"); + String workloadName = envOrDefault("WORKLOAD_NAME", defaultWorkloadName); int durationSeconds = parseInt(envOrDefault("WORKLOAD_DURATION", "600"), 600); String otlpEndpoint = envOrDefault("OTEL_EXPORTER_OTLP_ENDPOINT", ""); - return new Config(connectionString, token, ref, workloadName, durationSeconds, otlpEndpoint); + return new Config( + connectionString, + toJdbcUrl(connectionString), + token, + ref, + workloadName, + durationSeconds, + otlpEndpoint + ); } + + private static String resolveConnectionString() { + String jdbc = System.getenv("YDB_JDBC_URL"); + if (jdbc != null && !jdbc.isEmpty()) { + return stripJdbcPrefix(jdbc); + } + String cs = System.getenv("YDB_CONNECTION_STRING"); if (cs != null && !cs.isEmpty()) { - return cs; + return stripJdbcPrefix(cs); } String endpoint = System.getenv("YDB_ENDPOINT"); @@ -95,9 +105,26 @@ private static String resolveConnectionString() { if (endpoint == null || endpoint.isEmpty() || database == null || database.isEmpty()) { return null; } + return composeConnectionString(endpoint, database); + } + + private static String stripJdbcPrefix(String value) { + if (value.startsWith("jdbc:ydb:")) { + return value.substring("jdbc:ydb:".length()); + } + return value; + } + + + + private static String toJdbcUrl(String connectionString) { + if (connectionString.startsWith("jdbc:")) { + return connectionString; + } + return "jdbc:ydb:" + connectionString; + } - // Compose connection string in the form expected by GrpcTransport.forConnectionString: - // grpc://host:port/database + private static String composeConnectionString(String endpoint, String database) { if (endpoint.endsWith("/") && database.startsWith("/")) { return endpoint + database.substring(1); } diff --git a/slo-workload/core/src/main/java/tech/ydb/slo/core/Launcher.java b/slo-workload/core/src/main/java/tech/ydb/slo/core/Launcher.java new file mode 100644 index 0000000..eecf16a --- /dev/null +++ b/slo-workload/core/src/main/java/tech/ydb/slo/core/Launcher.java @@ -0,0 +1,163 @@ +package tech.ydb.slo.core; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.ParameterException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import tech.ydb.slo.core.kv.KvClient; +import tech.ydb.slo.core.kv.KvWorkloadParams; +import tech.ydb.slo.core.kv.WorkloadRunner; + +public final class Launcher { + private static final Logger logger = LoggerFactory.getLogger(Launcher.class); + + + + @FunctionalInterface + public interface ClientFactory { + KvClient create(Config config, KvWorkloadParams params, String tablePath) throws Exception; + } + + private Launcher() { + + } + + + + public static void launch( + String programName, + String defaultWorkloadName, + String[] args, + ClientFactory factory + ) { + System.exit(run(programName, defaultWorkloadName, args, factory)); + } + + + + public static int run( + String programName, + String defaultWorkloadName, + String[] args, + ClientFactory factory + ) { + Config config; + try { + config = Config.fromEnv(defaultWorkloadName); + } catch (IllegalStateException e) { + logger.error("invalid environment configuration: {}", e.getMessage()); + return 2; + } + + KvWorkloadParams params = new KvWorkloadParams(); + try { + JCommander.newBuilder() + .programName(programName) + + + .acceptUnknownOptions(false) + .addObject(params) + .build() + .parse(args); + } catch (ParameterException e) { + logger.error("invalid CLI arguments: {}", e.getMessage()); + return 2; + } + + + if (params.durationSeconds() <= 0) { + params.setDurationSeconds(config.durationSeconds()); + } + + String tablePath = tablePathFor(config); + + logger.info("starting SLO workload: name={}, ref={}, duration={}s, readRps={}, writeRps={}, table={}", + config.workloadName(), + config.ref(), + params.durationSeconds(), + params.readRps(), + params.writeRps(), + tablePath); + + int exitCode = 0; + Metrics metrics = Metrics.create(config); + KvClient client; + try { + client = factory.create(config, params, tablePath); + } catch (Exception e) { + logger.error("failed to create workload client", e); + closeQuietly(metrics, "metrics"); + return 1; + } + + WorkloadRunner runner = new WorkloadRunner(client, metrics, params, tablePath); + try { + runner.setup(); + runner.run(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + logger.warn("workload interrupted"); + exitCode = 1; + } catch (Throwable t) { + logger.error("workload failed", t); + exitCode = 1; + } finally { + try { + runner.teardown(); + } catch (Throwable t) { + logger.warn("teardown failed", t); + } + try { + metrics.flush(); + } catch (Throwable t) { + logger.warn("metrics flush failed", t); + } + closeQuietly(metrics, "metrics"); + closeQuietly(client, "workload client"); + } + + return exitCode; + } + + + + public static String tablePathFor(Config config) { + return sanitize(config.workloadName()) + "_" + sanitize(config.ref()); + } + + private static void closeQuietly(AutoCloseable closeable, String name) { + if (closeable == null) { + return; + } + try { + closeable.close(); + } catch (Throwable t) { + logger.warn("failed to close {}: {}", name, t.toString()); + } + } + + + + private static String sanitize(String value) { + StringBuilder sb = new StringBuilder(value.length()); + for (int i = 0; i < value.length(); i++) { + char c = value.charAt(i); + if (Character.isLetterOrDigit(c) || c == '_') { + sb.append(c); + } else { + sb.append('_'); + } + } + if (sb.length() == 0) { + sb.append('_'); + } else if (Character.isDigit(sb.charAt(0))) { + sb.insert(0, '_'); + } + + if (sb.length() > 64) { + sb.setLength(64); + } + return sb.toString(); + } +} diff --git a/slo-workload/jdbc/src/main/java/tech/ydb/slo/Metrics.java b/slo-workload/core/src/main/java/tech/ydb/slo/core/Metrics.java similarity index 67% rename from slo-workload/jdbc/src/main/java/tech/ydb/slo/Metrics.java rename to slo-workload/core/src/main/java/tech/ydb/slo/core/Metrics.java index 767a747..0867444 100644 --- a/slo-workload/jdbc/src/main/java/tech/ydb/slo/Metrics.java +++ b/slo-workload/core/src/main/java/tech/ydb/slo/core/Metrics.java @@ -1,4 +1,4 @@ -package tech.ydb.slo; +package tech.ydb.slo.core; import java.time.Duration; import java.util.Map; @@ -19,29 +19,6 @@ import org.HdrHistogram.AtomicHistogram; import org.HdrHistogram.Histogram; -/** - * Collects and pushes SLO workload metrics to the OTLP endpoint configured by - * the YDB SLO action runtime. - * - *

Metrics emitted (matching the contract from - * {@code ydb-platform/ydb-slo-action}): - *

    - *
  • {@code sdk.operations.total} — counter, labeled by - * {@code operation_type} and {@code operation_status}
  • - *
  • {@code sdk.errors.total} — counter, labeled by - * {@code operation_type} and {@code error_kind}
  • - *
  • {@code sdk.retry.attempts.total} — counter, labeled by - * {@code operation_type} and {@code operation_status}
  • - *
  • {@code sdk.pending.operations} — up/down counter, labeled by - * {@code operation_type}
  • - *
  • {@code sdk.operation.latency.p50.seconds} / - * {@code .p95.seconds} / {@code .p99.seconds} — - * observable gauges fed from per-operation HDR histograms
  • - *
- * - *

Every metric carries the {@code ref} label so the report action can - * separate current and baseline series. - */ public final class Metrics implements AutoCloseable { public enum OperationType { @@ -83,7 +60,7 @@ public String label() { private static final AttributeKey ATTR_REF = AttributeKey.stringKey("ref"); - // HDR histograms record latencies in microseconds with high precision up to 60 s. + private static final long HDR_MIN_MICROS = 1L; private static final long HDR_MAX_MICROS = 60L * 1_000_000L; private static final int HDR_SIGNIFICANT_DIGITS = 3; @@ -95,7 +72,7 @@ public String label() { private final LongCounter retryAttemptsTotal; private final LongUpDownCounter pendingOperations; - private final Map histograms = new ConcurrentHashMap<>(); + private final Map histograms; private Metrics( SdkMeterProvider meterProvider, @@ -103,7 +80,8 @@ private Metrics( LongCounter operationsTotal, LongCounter errorsTotal, LongCounter retryAttemptsTotal, - LongUpDownCounter pendingOperations + LongUpDownCounter pendingOperations, + Map histograms ) { this.meterProvider = meterProvider; this.ref = ref; @@ -111,14 +89,11 @@ private Metrics( this.errorsTotal = errorsTotal; this.retryAttemptsTotal = retryAttemptsTotal; this.pendingOperations = pendingOperations; + this.histograms = histograms; } - /* - * Builds a {@code Metrics} instance configured to push OTLP metrics every - * second to the endpoint from {@code config.otlpEndpoint()}. If the - * endpoint is empty, all metrics are still observable in-process but never - * exported. - */ + + public static Metrics create(Config config) { String ref = config.ref(); @@ -167,26 +142,26 @@ public static Metrics create(Config config) { Map histograms = new ConcurrentHashMap<>(); - // Pre-create one histogram per operation_type so the first export - // already produces gauge series. We only track successful operations: - // failure latency is dominated by retry budgets / timeouts and would - // skew the percentiles without telling us anything useful about SDK - // performance. The SLO action's metrics.yaml filters by - // operation_status="success" anyway. + + + + + + for (OperationType type : OperationType.values()) { histograms.put(type, newHistogram()); } - // Build the three percentile gauges as raw observers — their values - // are produced by a single batch callback below, which reads - // p50/p95/p99 from the same histogram snapshot and then resets the - // histogram. Reading all three percentiles from one snapshot avoids - // races where p99 could be observed against a freshly-reset histogram - // populated by p50, and resetting after each export means the gauge - // reflects only latencies recorded during the last export interval — - // matching the JS SDK's behaviour and avoiding cold-start tail drag - // on the JVM (without reset, JIT-warmup outliers stick to p99 for - // the rest of the run). + + + + + + + + + + ObservableDoubleMeasurement p50Observer = meter.gaugeBuilder("sdk.operation.latency.p50.seconds") .setUnit("s") .setDescription("p50 operation latency in seconds") @@ -207,23 +182,22 @@ public static Metrics create(Config config) { p50Observer, p95Observer, p99Observer ); - Metrics metrics = new Metrics( + return new Metrics( provider, ref, operationsTotal, errorsTotal, retryAttemptsTotal, - pendingOperations + pendingOperations, + histograms ); - metrics.histograms.putAll(histograms); - return metrics; } private static String metricsEndpoint(String otlpEndpoint) { - // OTLP HTTP exporter expects the full /v1/metrics path. The SLO action - // sets OTEL_EXPORTER_OTLP_ENDPOINT to the base URL (e.g. - // http://ydb-prometheus:9090/api/v1/otlp), so we append the suffix - // unless the user has already provided it. + + + + String trimmed = otlpEndpoint.endsWith("/") ? otlpEndpoint.substring(0, otlpEndpoint.length() - 1) : otlpEndpoint; @@ -233,10 +207,8 @@ private static String metricsEndpoint(String otlpEndpoint) { return trimmed + "/v1/metrics"; } - /* - * Records a started operation and returns a span used to record the - * outcome. - */ + + public Span startOperation(OperationType type) { pendingOperations.add(1, Attributes.of( ATTR_REF, ref, @@ -245,10 +217,8 @@ public Span startOperation(OperationType type) { return new Span(this, type, System.nanoTime()); } - /** - * Forces a final flush of pending metrics. Should be called before exit - * to make sure the report action sees the last seconds of data. - */ + + public void flush() { meterProvider.forceFlush().join(10, TimeUnit.SECONDS); } @@ -278,12 +248,12 @@ private void recordOutcome( ATTR_OPERATION_TYPE, type.label() )); - // Latency is recorded only for successful operations. Failed - // operations spend most of their time inside the retry budget / - // timeout machinery, so their latency reflects the retry policy - // rather than the SDK's performance. Mixing those samples into the - // percentile gauges produces noisy spikes during chaos scenarios - // and tells us nothing actionable. + + + + + + if (status == OperationStatus.SUCCESS) { Histogram histogram = histograms.computeIfAbsent(type, k -> newHistogram()); long clamped = Math.max(HDR_MIN_MICROS, Math.min(HDR_MAX_MICROS, latencyMicros)); @@ -297,13 +267,8 @@ private void recordOutcome( } } - /** - * Observes p50/p95/p99 for every populated histogram in one go and then - * resets the histogram. Called from a single OTel batch callback so all - * three percentiles are read from a consistent snapshot — without that, - * a concurrent record could land between the p50 and p99 reads and - * produce inconsistent values across gauges. - */ + + private static void observeAndResetPercentiles( Map histograms, String ref, @@ -313,23 +278,21 @@ private static void observeAndResetPercentiles( ) { for (Map.Entry entry : histograms.entrySet()) { OperationType type = entry.getKey(); - Histogram histogram = entry.getValue(); + Histogram live = entry.getValue(); - long p50Micros; - long p95Micros; - long p99Micros; - if (histogram.getTotalCount() == 0) { + Histogram snapshot = live.copy(); + live.reset(); + if (snapshot.getTotalCount() == 0) { continue; } - p50Micros = histogram.getValueAtPercentile(50.0); - p95Micros = histogram.getValueAtPercentile(95.0); - p99Micros = histogram.getValueAtPercentile(99.0); - histogram.reset(); - - // Percentile gauges are always tagged with operation_status="success" - // because we only record successful samples (see recordOutcome). - // The SLO action's metrics.yaml filters on this same label, so the - // gauges line up with what the report expects. + long p50Micros = snapshot.getValueAtPercentile(50.0); + long p95Micros = snapshot.getValueAtPercentile(95.0); + long p99Micros = snapshot.getValueAtPercentile(99.0); + + + + + Attributes attrs = Attributes.of( ATTR_REF, ref, ATTR_OPERATION_TYPE, type.label(), @@ -345,9 +308,6 @@ private static Histogram newHistogram() { return new AtomicHistogram(HDR_MIN_MICROS, HDR_MAX_MICROS, HDR_SIGNIFICANT_DIGITS); } - /** - * One in-flight operation. Call exactly one of the {@code finish} methods. - */ public static final class Span { private final Metrics metrics; private final OperationType type; diff --git a/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/KvClient.java b/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/KvClient.java new file mode 100644 index 0000000..25371f6 --- /dev/null +++ b/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/KvClient.java @@ -0,0 +1,21 @@ +package tech.ydb.slo.core.kv; + +public interface KvClient extends AutoCloseable { + + + + void createTable(KvWorkloadParams params, String tablePath) throws Exception; + + + + void dropTable(String tablePath); + + + + KvSession openSession() throws Exception; + + @Override + default void close() throws Exception { + + } +} diff --git a/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/KvSchema.java b/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/KvSchema.java new file mode 100644 index 0000000..b3e6eae --- /dev/null +++ b/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/KvSchema.java @@ -0,0 +1,36 @@ +package tech.ydb.slo.core.kv; + +public final class KvSchema { + + public static final String CREATE_TABLE_TEMPLATE = "" + + "CREATE TABLE IF NOT EXISTS `%s` (" + + " hash Uint64," + + " id Uint64," + + " payload_str Utf8," + + " payload_double Double," + + " payload_timestamp Timestamp," + + " payload_hash Uint64," + + " PRIMARY KEY (hash, id)" + + ") WITH (" + + " UNIFORM_PARTITIONS = %d," + + " AUTO_PARTITIONING_BY_SIZE = ENABLED," + + " AUTO_PARTITIONING_PARTITION_SIZE_MB = %d," + + " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = %d," + + " AUTO_PARTITIONING_MAX_PARTITIONS_COUNT = %d" + + ")"; + + public static final String DROP_TABLE_TEMPLATE = "DROP TABLE IF EXISTS `%s`"; + + public static final String UPSERT_TEMPLATE = "" + + "UPSERT INTO `%s` (" + + " hash, id, payload_str, payload_double, payload_timestamp, payload_hash" + + ") VALUES (?, ?, ?, ?, ?, ?)"; + + public static final String SELECT_TEMPLATE = "" + + "SELECT id, payload_str, payload_double, payload_timestamp, payload_hash" + + " FROM `%s`" + + " WHERE id = ? AND hash = ?"; + + private KvSchema() { + } +} diff --git a/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/KvSession.java b/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/KvSession.java new file mode 100644 index 0000000..4c7dab3 --- /dev/null +++ b/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/KvSession.java @@ -0,0 +1,17 @@ +package tech.ydb.slo.core.kv; + +public interface KvSession extends AutoCloseable { + + + + OpOutcome read(long id, int timeoutMs); + + + + OpOutcome write(Row row, int timeoutMs); + + @Override + default void close() { + + } +} diff --git a/slo/src/main/java/tech/ydb/slo/kv/KvWorkloadParams.java b/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/KvWorkloadParams.java similarity index 73% rename from slo/src/main/java/tech/ydb/slo/kv/KvWorkloadParams.java rename to slo-workload/core/src/main/java/tech/ydb/slo/core/kv/KvWorkloadParams.java index 9adf6e7..2fbe33c 100644 --- a/slo/src/main/java/tech/ydb/slo/kv/KvWorkloadParams.java +++ b/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/KvWorkloadParams.java @@ -1,15 +1,7 @@ -package tech.ydb.slo.kv; +package tech.ydb.slo.core.kv; import com.beust.jcommander.Parameter; -/** - * Tunable parameters for the KV workload. - * - *

Defaults match the SLO workloads in the Go and JavaScript SDKs so the - * three runs are comparable. JCommander annotations let the operator override - * any field from the command line, e.g. - * {@code --read-rps 500 --write-rps 50}. - */ @SuppressWarnings("FieldMayBeFinal") public final class KvWorkloadParams { @@ -62,11 +54,29 @@ public final class KvWorkloadParams { private int maxPartitionCount = 1_000; @Parameter( - names = {"--duration"}, + names = {"--duration", "--time"}, description = "Run duration in seconds (overrides WORKLOAD_DURATION when > 0)" ) private int durationSeconds = 0; + @Parameter( + names = {"--shutdown-time"}, + description = "Extra seconds, on top of --duration, given to in-flight ops before force-shutdown" + ) + private int shutdownTimeSeconds = 30; + + @Parameter( + names = {"--max-attempts"}, + description = "Maximum total attempts per operation (initial + retries)" + ) + private int maxAttempts = 10; + + @Parameter( + names = {"--max-workers"}, + description = "Hard cap on the number of worker threads per operation type" + ) + private int maxWorkers = 64; + public int readRps() { return readRps; } @@ -99,12 +109,8 @@ public int maxPartitionCount() { return maxPartitionCount; } - /** - * Effective run duration. If the CLI flag was omitted (left at 0), falls - * back to the value supplied via the {@code WORKLOAD_DURATION} environment - * variable through {@code Config}. - * @return Effective run duration value - */ + + public int durationSeconds() { return durationSeconds; } @@ -112,4 +118,16 @@ public int durationSeconds() { public void setDurationSeconds(int durationSeconds) { this.durationSeconds = durationSeconds; } + + public int shutdownTimeSeconds() { + return shutdownTimeSeconds; + } + + public int maxAttempts() { + return maxAttempts; + } + + public int maxWorkers() { + return maxWorkers; + } } diff --git a/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/OpOutcome.java b/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/OpOutcome.java new file mode 100644 index 0000000..34bda8a --- /dev/null +++ b/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/OpOutcome.java @@ -0,0 +1,37 @@ +package tech.ydb.slo.core.kv; + +public final class OpOutcome { + private final boolean success; + private final int retryAttempts; + private final String errorKind; + + private OpOutcome(boolean success, int retryAttempts, String errorKind) { + this.success = success; + this.retryAttempts = Math.max(0, retryAttempts); + this.errorKind = errorKind; + } + + + + public static OpOutcome success(int retryAttempts) { + return new OpOutcome(true, retryAttempts, null); + } + + + + public static OpOutcome error(int retryAttempts, String errorKind) { + return new OpOutcome(false, retryAttempts, errorKind); + } + + public boolean isSuccess() { + return success; + } + + public int retryAttempts() { + return retryAttempts; + } + + public String errorKind() { + return errorKind; + } +} diff --git a/slo/src/main/java/tech/ydb/slo/kv/Row.java b/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/Row.java similarity index 60% rename from slo/src/main/java/tech/ydb/slo/kv/Row.java rename to slo-workload/core/src/main/java/tech/ydb/slo/core/kv/Row.java index 9f1c292..1d10f08 100644 --- a/slo/src/main/java/tech/ydb/slo/kv/Row.java +++ b/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/Row.java @@ -1,24 +1,7 @@ -package tech.ydb.slo.kv; +package tech.ydb.slo.core.kv; import java.time.Instant; -/** - * A single row of the KV workload table. - * - *

The schema mirrors the one used by SLO workloads in other YDB SDKs - * (Go, JavaScript) so reports across SDKs are comparable: - *

- * hash              Uint64 (primary key, computed server-side via Digest::NumericHash(id))
- * id                Uint64 (primary key)
- * payload_str       Utf8
- * payload_double    Double
- * payload_timestamp Timestamp
- * payload_hash      Uint64
- * 
- * - *

The {@code hash} column is computed by YDB at insert time via - * {@code Digest::NumericHash($id)}, so we don't carry it on the client. - */ public final class Row { private final long id; private final String payloadStr; diff --git a/slo-workload/jdbc/src/main/java/tech/ydb/slo/kv/RowGenerator.java b/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/RowGenerator.java similarity index 71% rename from slo-workload/jdbc/src/main/java/tech/ydb/slo/kv/RowGenerator.java rename to slo-workload/core/src/main/java/tech/ydb/slo/core/kv/RowGenerator.java index 04fdb9d..60765b0 100644 --- a/slo-workload/jdbc/src/main/java/tech/ydb/slo/kv/RowGenerator.java +++ b/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/RowGenerator.java @@ -1,17 +1,10 @@ -package tech.ydb.slo.kv; +package tech.ydb.slo.core.kv; import java.time.Instant; import java.util.Base64; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicLong; -/** - * Generates rows for the KV workload. - * - *

Each row gets a monotonically increasing {@code id} and a random payload. - * The format mirrors the SLO workloads in the Go and JS SDKs so the resulting - * tables are comparable. - */ public final class RowGenerator { private static final int MIN_PAYLOAD_LENGTH = 20; private static final int MAX_PAYLOAD_LENGTH = 40; @@ -22,20 +15,15 @@ public RowGenerator(long startId) { this.nextId = new AtomicLong(startId); } - /** - * Generates a new row with a fresh monotonically increasing id. - * @return a new row - */ + + public Row generate() { long id = nextId.getAndIncrement(); return generate(id); } - /** - * Generates a row with an explicit id (used during prefill to control IDs). - * @param id row id - * @return a new row - */ + + public static Row generate(long id) { long payloadHash = ThreadLocalRandom.current().nextLong(); double payloadDouble = ThreadLocalRandom.current().nextDouble(); @@ -52,4 +40,13 @@ private static String randomPayloadString() { ThreadLocalRandom.current().nextBytes(bytes); return Base64.getEncoder().withoutPadding().encodeToString(bytes); } + + + + public static long numericHash(long id) { + long z = id + 0x9E3779B97F4A7C15L; + z = (z ^ (z >>> 30)) * 0xBF58476D1CE4E5B9L; + z = (z ^ (z >>> 27)) * 0x94D049BB133111EBL; + return z ^ (z >>> 31); + } } diff --git a/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/WorkloadRunner.java b/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/WorkloadRunner.java new file mode 100644 index 0000000..b3a5c69 --- /dev/null +++ b/slo-workload/core/src/main/java/tech/ydb/slo/core/kv/WorkloadRunner.java @@ -0,0 +1,309 @@ +package tech.ydb.slo.core.kv; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +import com.google.common.util.concurrent.RateLimiter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import tech.ydb.slo.core.Metrics; + +public final class WorkloadRunner { + private static final Logger logger = LoggerFactory.getLogger(WorkloadRunner.class); + + + + private static final double PREFILL_SUCCESS_THRESHOLD = 0.5; + + private final KvClient client; + private final Metrics metrics; + private final KvWorkloadParams params; + private final String tablePath; + private final RowGenerator generator; + + public WorkloadRunner(KvClient client, Metrics metrics, KvWorkloadParams params, String tablePath) { + this.client = client; + this.metrics = metrics; + this.params = params; + this.tablePath = tablePath; + this.generator = new RowGenerator(params.prefillCount()); + } + + + + public void setup() throws Exception { + logger.info("creating table {}", tablePath); + client.createTable(params, tablePath); + logger.info("table {} created", tablePath); + + if (params.prefillCount() <= 0) { + logger.info("prefill count <= 0, skipping prefill"); + return; + } + + logger.info("prefilling {} rows into {}", params.prefillCount(), tablePath); + int parallelism = Math.min(params.maxWorkers(), Math.max(1, params.prefillCount())); + ExecutorService prefillPool = Executors.newFixedThreadPool( + parallelism, namedThreadFactory("slo-prefill-") + ); + AtomicLong nextId = new AtomicLong(0); + AtomicInteger failed = new AtomicInteger(); + AtomicInteger sessionOpenFailures = new AtomicInteger(); + try { + for (int w = 0; w < parallelism; w++) { + prefillPool.execute(() -> { + try (KvSession session = client.openSession()) { + long id; + while ((id = nextId.getAndIncrement()) < params.prefillCount()) { + OpOutcome outcome = session.write( + RowGenerator.generate(id), params.writeTimeoutMs()); + if (!outcome.isSuccess()) { + int f = failed.incrementAndGet(); + if (f <= 5) { + logger.warn("prefill row {} failed: {}", id, outcome.errorKind()); + } + } + } + } catch (Exception e) { + + + + + + sessionOpenFailures.incrementAndGet(); + long firstUnclaimed = nextId.getAndSet(params.prefillCount()); + if (firstUnclaimed < params.prefillCount()) { + failed.addAndGet((int) (params.prefillCount() - firstUnclaimed)); + } + logger.error("prefill worker failed to open session: {}", e.toString()); + } + }); + } + } finally { + prefillPool.shutdown(); + if (!prefillPool.awaitTermination(5, TimeUnit.MINUTES)) { + prefillPool.shutdownNow(); + } + } + + int total = params.prefillCount(); + int failedCount = failed.get(); + int succeeded = total - failedCount; + if (sessionOpenFailures.get() == parallelism) { + throw new IllegalStateException( + "all " + parallelism + " prefill workers failed to open a session — " + + "check YDB connectivity and credentials" + ); + } + if (succeeded < total * PREFILL_SUCCESS_THRESHOLD) { + throw new IllegalStateException( + "prefill completed with " + failedCount + " failed rows out of " + total + + " (success rate < " + (int) (PREFILL_SUCCESS_THRESHOLD * 100) + + "%); reads would target an empty key-space, refusing to run" + ); + } + if (failedCount > 0) { + logger.warn("prefill completed with {} failed rows out of {}", failedCount, total); + } else { + logger.info("prefill completed"); + } + } + + + + public void run() throws InterruptedException { + long durationSeconds = params.durationSeconds(); + long endNanos = durationSeconds > 0 + ? System.nanoTime() + TimeUnit.SECONDS.toNanos(durationSeconds) + : Long.MAX_VALUE; + + + + + AtomicLong writesIssued = new AtomicLong(); + + int readWorkers = workerCount(params.readRps()); + int writeWorkers = workerCount(params.writeRps()); + + if (readWorkers == 0 && writeWorkers == 0) { + logger.warn("both read and write RPS are <= 0, run phase has nothing to do"); + return; + } + + ExecutorService readPool = null; + ExecutorService writePool = null; + try { + if (readWorkers > 0) { + readPool = Executors.newFixedThreadPool(readWorkers, namedThreadFactory("slo-read-")); + RateLimiter readLimiter = RateLimiter.create(params.readRps()); + for (int i = 0; i < readWorkers; i++) { + readPool.execute(() -> readWorkerLoop(endNanos, readLimiter, writesIssued)); + } + } else { + logger.info("read RPS <= 0, skipping read workers"); + } + + if (writeWorkers > 0) { + writePool = Executors.newFixedThreadPool(writeWorkers, namedThreadFactory("slo-write-")); + RateLimiter writeLimiter = RateLimiter.create(params.writeRps()); + for (int i = 0; i < writeWorkers; i++) { + writePool.execute(() -> writeWorkerLoop(endNanos, writeLimiter, writesIssued)); + } + } else { + logger.info("write RPS <= 0, skipping write workers"); + } + + + long graceNanos = TimeUnit.SECONDS.toNanos(params.shutdownTimeSeconds()); + long waitNanos = durationSeconds > 0 + ? Math.max(0L, endNanos - System.nanoTime()) + graceNanos + : Long.MAX_VALUE; + + if (readPool != null) { + readPool.shutdown(); + } + if (writePool != null) { + writePool.shutdown(); + } + + if (readPool != null) { + long started = System.nanoTime(); + if (!readPool.awaitTermination(waitNanos, TimeUnit.NANOSECONDS)) { + logger.warn("read pool did not drain within deadline, forcing shutdown"); + readPool.shutdownNow(); + } + waitNanos = Math.max(0L, waitNanos - (System.nanoTime() - started)); + } + if (writePool != null) { + if (!writePool.awaitTermination(waitNanos, TimeUnit.NANOSECONDS)) { + logger.warn("write pool did not drain within deadline, forcing shutdown"); + writePool.shutdownNow(); + } + } + } finally { + forceShutdown(readPool, "read pool"); + forceShutdown(writePool, "write pool"); + } + } + + + + public void teardown() { + logger.info("dropping table {}", tablePath); + client.dropTable(tablePath); + } + + + + private void readWorkerLoop(long endNanos, RateLimiter limiter, AtomicLong writesIssued) { + try (KvSession session = client.openSession()) { + while (!Thread.currentThread().isInterrupted()) { + long remaining = endNanos - System.nanoTime(); + if (remaining <= 0) { + return; + } + + + + if (!limiter.tryAcquire(remaining, TimeUnit.NANOSECONDS)) { + return; + } + try { + readOnce(session, writesIssued.get()); + } catch (Throwable t) { + logger.warn("read op threw unexpectedly: {}", t.toString()); + } + } + } catch (Exception e) { + logger.warn("read worker failed to open session: {}", e.toString()); + } + } + + private void writeWorkerLoop(long endNanos, RateLimiter limiter, AtomicLong writesIssued) { + try (KvSession session = client.openSession()) { + while (!Thread.currentThread().isInterrupted()) { + long remaining = endNanos - System.nanoTime(); + if (remaining <= 0) { + return; + } + if (!limiter.tryAcquire(remaining, TimeUnit.NANOSECONDS)) { + return; + } + try { + writeOnce(session, generator.generate()); + writesIssued.incrementAndGet(); + } catch (Throwable t) { + logger.warn("write op threw unexpectedly: {}", t.toString()); + } + } + } catch (Exception e) { + logger.warn("write worker failed to open session: {}", e.toString()); + } + } + + + + private void readOnce(KvSession session, long writesObserved) { + long upperBound = Math.max(1L, params.prefillCount() + writesObserved); + long id = ThreadLocalRandom.current().nextLong(upperBound); + + Metrics.Span span = metrics.startOperation(Metrics.OperationType.READ); + OpOutcome outcome = session.read(id, params.readTimeoutMs()); + if (outcome.isSuccess()) { + span.finishSuccess(outcome.retryAttempts()); + } else { + span.finishError(outcome.retryAttempts(), outcome.errorKind()); + logger.debug("read {} failed: {}", id, outcome.errorKind()); + } + } + + private void writeOnce(KvSession session, Row row) { + Metrics.Span span = metrics.startOperation(Metrics.OperationType.WRITE); + OpOutcome outcome = session.write(row, params.writeTimeoutMs()); + if (outcome.isSuccess()) { + span.finishSuccess(outcome.retryAttempts()); + } else { + span.finishError(outcome.retryAttempts(), outcome.errorKind()); + logger.debug("write {} failed: {}", row.id(), outcome.errorKind()); + } + } + + + + private int workerCount(int rps) { + if (rps <= 0) { + return 0; + } + return Math.min(params.maxWorkers(), Math.max(1, rps)); + } + + private static ThreadFactory namedThreadFactory(String prefix) { + AtomicInteger counter = new AtomicInteger(); + return r -> { + Thread t = new Thread(r, prefix + counter.getAndIncrement()); + t.setDaemon(true); + return t; + }; + } + + private static void forceShutdown(ExecutorService pool, String name) { + if (pool == null || pool.isTerminated()) { + return; + } + logger.warn("{} still active in cleanup, forcing shutdown", name); + pool.shutdownNow(); + try { + if (!pool.awaitTermination(5, TimeUnit.SECONDS)) { + logger.warn("{} did not terminate after shutdownNow", name); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } +} diff --git a/slo-workload/jdbc/Dockerfile b/slo-workload/jdbc/Dockerfile index 943be48..1331818 100644 --- a/slo-workload/jdbc/Dockerfile +++ b/slo-workload/jdbc/Dockerfile @@ -5,7 +5,9 @@ # and run parameters from environment variables and pushes OTLP metrics to the # endpoint the action provides. # -# Build context: the `ydb-java-examples` repository root. +# Build context: the `ydb-java-examples` repository root. For ydb-jdbc-driver +# CI the context may also contain `./ydb-jdbc-driver`; when present, the driver +# is installed from source and the workload is pinned to that exact version. # # Optional build args: # MAVEN_IMAGE Builder image. Defaults to `maven:3.9-eclipse-temurin-17`. @@ -23,9 +25,26 @@ COPY . /src ARG YDB_JDBC_VERSION="" -# Pin the JDBC driver version under test when provided, then build only the -# workload module (and the parent context it needs). -RUN if [ -n "${YDB_JDBC_VERSION}" ]; then \ +# Install the JDBC driver from source when the driver checkout is present in +# the build context. Otherwise use YDB_JDBC_VERSION when provided. +RUN if [ -d /src/ydb-jdbc-driver ]; then \ + cd /src/ydb-jdbc-driver && \ + mvn -B -q \ + -DskipTests \ + -Dmaven.javadoc.skip=true \ + -Dmaven.source.skip=true \ + -Dgpg.skip=true \ + install && \ + mvn -B -q help:evaluate -Dexpression=project.version -DforceStdout > /tmp/ydb-jdbc.version && \ + YDB_JDBC_VERSION="$(cat /tmp/ydb-jdbc.version)" && \ + cd /src && \ + echo "Pinning ydb-jdbc-driver to source-built ${YDB_JDBC_VERSION}" && \ + mvn -B -q versions:set-property \ + -Dproperty=ydb.jdbc.version \ + -DnewVersion="${YDB_JDBC_VERSION}" \ + -DgenerateBackupPoms=false \ + -pl slo-workload ; \ + elif [ -n "${YDB_JDBC_VERSION}" ]; then \ echo "Pinning ydb-jdbc-driver to ${YDB_JDBC_VERSION}" && \ mvn -B -q versions:set-property \ -Dproperty=ydb.jdbc.version \ diff --git a/slo-workload/jdbc/README.md b/slo-workload/jdbc/README.md index 19ee337..f28d8c9 100644 --- a/slo-workload/jdbc/README.md +++ b/slo-workload/jdbc/README.md @@ -40,14 +40,21 @@ relying on server-side YQL builtins inside parameterized statements. ## Retries -Operations are retried with exponential backoff (up to 10 attempts). An error -is considered retryable when the driver throws a `SQLRecoverableException` or -`SQLTransientException` (which covers the driver's -`YdbRetryableException`, `YdbConditionallyRetryableException`, -`YdbUnavailbaleException` and `YdbTimeoutException`). The number of retries is -recorded in `sdk_retry_attempts_total`, and the failure reason is reported via -the `error_kind` label on `sdk_errors_total` (using the YDB status code when -available). +Reads (`SELECT`) and writes (`UPSERT`) are both idempotent, so retries decide +based on the YDB status code: a `YdbStatusable` whose +`StatusCode.isRetryable(true)` is true is retried (this covers ABORTED, +OVERLOADED, UNAVAILABLE, BAD_SESSION, SESSION_BUSY, UNDETERMINED on idempotent +operations). Anything else falls back to the JDBC marker types +`SQLRecoverableException` / `SQLTransientException`. The retry attempt cap is +configurable via `--max-attempts` (default 10) and backoff is capped at 1s. +Connection-level errors (`SQLRecoverableException`, +`SQLTransientConnectionException`, `SQLNonTransientConnectionException`) +invalidate the worker's cached connection before the next attempt opens a +fresh one. + +The number of retries is recorded in `sdk_retry_attempts_total`, and the +failure reason is reported via the `error_kind` label on `sdk_errors_total` +(using the YDB status code when available). ## Files @@ -58,18 +65,18 @@ jdbc/ ├── README.md └── src/main/ ├── java/tech/ydb/slo/ - │ ├── Config.java Reads env vars, resolves the JDBC URL - │ ├── Main.java Entry point - │ ├── Metrics.java OTLP metrics + HDR histograms - │ └── kv/ - │ ├── KvWorkload.java Setup/run/teardown loop over JDBC - │ ├── KvWorkloadParams.java JCommander-bound CLI flags - │ ├── Row.java Row data class - │ └── RowGenerator.java Random payload generator + │ ├── Main.java Entry point, loads the JDBC driver + │ └── jdbc/ + │ └── JdbcKvClient.java KvClient: connection lifecycle + retry └── resources/ └── log4j2.xml Console logging config ``` +The shared harness (`Config`, `Metrics`, `KvWorkloadParams`, `Row`, +`RowGenerator`, `WorkloadRunner`, `Launcher`) lives in +[`../core`](../core/src/main/java/tech/ydb/slo/core), so every implementation +emits the same metric contract. + ## Building and running locally ```bash diff --git a/slo-workload/jdbc/pom.xml b/slo-workload/jdbc/pom.xml index c4b174e..20475df 100644 --- a/slo-workload/jdbc/pom.xml +++ b/slo-workload/jdbc/pom.xml @@ -11,55 +11,22 @@ ../pom.xml - jdbc + slo-workload-jdbc jar JDBC SLO workload SLO workload exercising the YDB JDBC driver, compatible with ydb-slo-action - - tech.ydb.jdbc - ydb-jdbc-driver - - - - - com.beust - jcommander - - - - - com.google.guava - guava - - - - - org.hdrhistogram - HdrHistogram + tech.ydb.examples + slo-workload-core - - io.opentelemetry - opentelemetry-api - - - io.opentelemetry - opentelemetry-sdk - - - io.opentelemetry - opentelemetry-sdk-metrics - - - io.opentelemetry - opentelemetry-exporter-otlp + tech.ydb.jdbc + ydb-jdbc-driver - org.apache.logging.log4j log4j-slf4j2-impl @@ -72,18 +39,11 @@ org.apache.maven.plugins maven-compiler-plugin - - 17 - - - org.apache.maven.plugins maven-dependency-plugin - org.apache.maven.plugins maven-jar-plugin diff --git a/slo-workload/jdbc/src/main/java/tech/ydb/slo/Config.java b/slo-workload/jdbc/src/main/java/tech/ydb/slo/Config.java deleted file mode 100644 index 2b1ca3a..0000000 --- a/slo-workload/jdbc/src/main/java/tech/ydb/slo/Config.java +++ /dev/null @@ -1,148 +0,0 @@ -package tech.ydb.slo; - -/** - * Configuration for the JDBC SLO workload, populated from environment - * variables provided by the YDB SLO action runtime. - * - *

The action sets these variables on the workload container: - *

    - *
  • {@code YDB_CONNECTION_STRING} or {@code YDB_ENDPOINT} + {@code YDB_DATABASE} — YDB connection
  • - *
  • {@code WORKLOAD_REF} — value used as the {@code ref} label on all metrics
  • - *
  • {@code WORKLOAD_NAME} — workload name (also used as part of the table path)
  • - *
  • {@code WORKLOAD_DURATION} — workload run duration in seconds (0 = unlimited)
  • - *
  • {@code OTEL_EXPORTER_OTLP_ENDPOINT} — OTLP endpoint for pushing metrics
  • - *
- * - *

Because the component under test here is the JDBC driver, the - * YDB connection is expressed as a JDBC URL ({@code jdbc:ydb:...}). The URL is - * resolved in this order: {@code YDB_JDBC_URL} (used verbatim), then - * {@code YDB_CONNECTION_STRING} (prefixed with {@code jdbc:ydb:}), then - * {@code YDB_ENDPOINT} + {@code YDB_DATABASE}. - */ -public final class Config { - private final String jdbcUrl; - private final String token; - private final String ref; - private final String workloadName; - private final int durationSeconds; - private final String otlpEndpoint; - - private Config( - String jdbcUrl, - String token, - String ref, - String workloadName, - int durationSeconds, - String otlpEndpoint - ) { - this.jdbcUrl = jdbcUrl; - this.token = token; - this.ref = ref; - this.workloadName = workloadName; - this.durationSeconds = durationSeconds; - this.otlpEndpoint = otlpEndpoint; - } - - public String jdbcUrl() { - return jdbcUrl; - } - - public String token() { - return token; - } - - public String ref() { - return ref; - } - - public String workloadName() { - return workloadName; - } - - public int durationSeconds() { - return durationSeconds; - } - - public String otlpEndpoint() { - return otlpEndpoint; - } - - /** - * Loads configuration from environment variables. - * - * @return configuration instance - * @throws IllegalStateException if required variables are missing or invalid - */ - public static Config fromEnv() { - String jdbcUrl = resolveJdbcUrl(); - if (jdbcUrl == null || jdbcUrl.isEmpty()) { - throw new IllegalStateException( - "YDB connection is not configured: set YDB_JDBC_URL, " - + "YDB_CONNECTION_STRING or YDB_ENDPOINT + YDB_DATABASE" - ); - } - - String token = envOrDefault("YDB_TOKEN", ""); - String ref = envOrDefault("WORKLOAD_REF", "unknown"); - String workloadName = envOrDefault("WORKLOAD_NAME", "java-slo-jdbc-workload"); - int durationSeconds = parseInt(envOrDefault("WORKLOAD_DURATION", "600"), 600); - String otlpEndpoint = envOrDefault("OTEL_EXPORTER_OTLP_ENDPOINT", ""); - - return new Config(jdbcUrl, token, ref, workloadName, durationSeconds, otlpEndpoint); - } - - private static String resolveJdbcUrl() { - String explicit = System.getenv("YDB_JDBC_URL"); - if (explicit != null && !explicit.isEmpty()) { - return explicit; - } - - String connectionString = System.getenv("YDB_CONNECTION_STRING"); - if (connectionString != null && !connectionString.isEmpty()) { - return toJdbcUrl(connectionString); - } - - String endpoint = System.getenv("YDB_ENDPOINT"); - String database = System.getenv("YDB_DATABASE"); - if (endpoint == null || endpoint.isEmpty() || database == null || database.isEmpty()) { - return null; - } - return toJdbcUrl(composeConnectionString(endpoint, database)); - } - - /** - * Turns a YDB connection string ({@code grpc://host:port/database}) into a - * JDBC URL understood by the YDB JDBC driver. If the value already starts - * with {@code jdbc:}, it is returned unchanged. - */ - private static String toJdbcUrl(String connectionString) { - if (connectionString.startsWith("jdbc:")) { - return connectionString; - } - return "jdbc:ydb:" + connectionString; - } - - private static String composeConnectionString(String endpoint, String database) { - // Compose a connection string in the form grpc://host:port/database. - if (endpoint.endsWith("/") && database.startsWith("/")) { - return endpoint + database.substring(1); - } - if (!endpoint.endsWith("/") && !database.startsWith("/")) { - return endpoint + "/" + database; - } - return endpoint + database; - } - - private static String envOrDefault(String name, String defaultValue) { - String value = System.getenv(name); - return (value == null || value.isEmpty()) ? defaultValue : value; - } - - private static int parseInt(String value, int defaultValue) { - try { - return Integer.parseInt(value); - } catch (NumberFormatException e) { - return defaultValue; - } - } -} diff --git a/slo-workload/jdbc/src/main/java/tech/ydb/slo/Main.java b/slo-workload/jdbc/src/main/java/tech/ydb/slo/Main.java index 997f8b9..b370983 100644 --- a/slo-workload/jdbc/src/main/java/tech/ydb/slo/Main.java +++ b/slo-workload/jdbc/src/main/java/tech/ydb/slo/Main.java @@ -1,160 +1,30 @@ package tech.ydb.slo; -import java.util.Properties; +import tech.ydb.slo.core.Launcher; +import tech.ydb.slo.jdbc.JdbcKvClient; -import com.beust.jcommander.JCommander; -import com.beust.jcommander.ParameterException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import tech.ydb.slo.kv.KvWorkload; -import tech.ydb.slo.kv.KvWorkloadParams; - -/** - * Entry point of the JDBC SLO workload. - * - *

Reads connection details and run parameters from environment variables - * (see {@link Config}), parses workload-specific flags from the command line - * (see {@link KvWorkloadParams}), and runs the KV workload phases — setup, - * run, teardown — pushing metrics to the OTLP endpoint configured by the YDB - * SLO action runtime. - * - *

Exit codes: - *

    - *
  • {@code 0} — workload completed successfully
  • - *
  • {@code 1} — workload failed (an unhandled exception or interrupted run)
  • - *
  • {@code 2} — invalid CLI arguments or environment configuration
  • - *
- */ public final class Main { - private static final Logger logger = LoggerFactory.getLogger(Main.class); - private static final String YDB_DRIVER_CLASS = "tech.ydb.jdbc.YdbDriver"; private Main() { - // utility class + } public static void main(String[] args) { - Config config; try { - config = Config.fromEnv(); - } catch (IllegalStateException e) { - logger.error("invalid environment configuration: {}", e.getMessage()); - System.exit(2); - return; - } - - KvWorkloadParams params = new KvWorkloadParams(); - try { - JCommander.newBuilder() - .programName("ydb-slo-jdbc-workload") - .acceptUnknownOptions(true) - .addObject(params) - .build() - .parse(args); - } catch (ParameterException e) { - logger.error("invalid CLI arguments: {}", e.getMessage()); - System.exit(2); - return; - } - - // CLI duration takes precedence over WORKLOAD_DURATION when supplied. - if (params.durationSeconds() <= 0) { - params.setDurationSeconds(config.durationSeconds()); - } - - try { - // The driver auto-registers via the JDBC SPI, but loading it - // explicitly fails fast with a clear message if it's missing. Class.forName(YDB_DRIVER_CLASS); } catch (ClassNotFoundException e) { - logger.error("YDB JDBC driver not found on classpath: {}", YDB_DRIVER_CLASS); + org.slf4j.LoggerFactory.getLogger(Main.class) + .error("YDB JDBC driver not found on classpath: {}", YDB_DRIVER_CLASS); System.exit(1); return; } - logger.info("starting SLO workload: name={}, ref={}, duration={}s, readRps={}, writeRps={}, url={}", - config.workloadName(), - config.ref(), - params.durationSeconds(), - params.readRps(), - params.writeRps(), - config.jdbcUrl()); - - // The table path embeds workload name and ref so concurrent runs of - // the current and baseline images don't step on each other. Both - // components are sanitized: WORKLOAD_NAME comes from the action input - // and is normally already safe, but we don't trust user input to be - // a valid YDB identifier. - String tablePath = sanitize(config.workloadName()) + "_" + sanitize(config.ref()); - - Properties connectionProperties = new Properties(); - if (config.token() != null && !config.token().isEmpty()) { - connectionProperties.setProperty("token", config.token()); - } - - int exitCode = 0; - Metrics metrics = Metrics.create(config); - KvWorkload workload = new KvWorkload( - config.jdbcUrl(), connectionProperties, metrics, params, tablePath + Launcher.launch( + "ydb-slo-jdbc-workload", + "java-jdbc-kv", + args, + (config, params, tablePath) -> new JdbcKvClient(config, params, tablePath) ); - - try { - workload.setup(); - workload.run(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - logger.warn("workload interrupted"); - exitCode = 1; - } catch (Throwable t) { - logger.error("workload failed", t); - exitCode = 1; - } finally { - try { - workload.teardown(); - } catch (Throwable t) { - logger.warn("teardown failed", t); - } - - try { - metrics.flush(); - } catch (Throwable t) { - logger.warn("metrics flush failed", t); - } - - closeQuietly(metrics, "metrics"); - } - - System.exit(exitCode); - } - - private static void closeQuietly(AutoCloseable closeable, String name) { - if (closeable == null) { - return; - } - try { - closeable.close(); - } catch (Throwable t) { - logger.warn("failed to close {}: {}", name, t.toString()); - } - } - - /** - * Replaces characters that aren't valid in YDB table names with underscores. - * Refs from CI may include slashes ({@code release/1.2}) or dots, which - * the action permits in metrics labels but YDB rejects in table paths. - */ - private static String sanitize(String value) { - StringBuilder sb = new StringBuilder(value.length()); - for (int i = 0; i < value.length(); i++) { - char c = value.charAt(i); - if (Character.isLetterOrDigit(c) || c == '_') { - sb.append(c); - } else { - sb.append('_'); - } - } - return sb.toString(); } } diff --git a/slo-workload/jdbc/src/main/java/tech/ydb/slo/jdbc/JdbcKvClient.java b/slo-workload/jdbc/src/main/java/tech/ydb/slo/jdbc/JdbcKvClient.java new file mode 100644 index 0000000..970ff60 --- /dev/null +++ b/slo-workload/jdbc/src/main/java/tech/ydb/slo/jdbc/JdbcKvClient.java @@ -0,0 +1,268 @@ +package tech.ydb.slo.jdbc; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.SQLNonTransientConnectionException; +import java.sql.SQLRecoverableException; +import java.sql.SQLTransientConnectionException; +import java.sql.SQLTransientException; +import java.sql.Statement; +import java.sql.Timestamp; +import java.util.Properties; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import tech.ydb.jdbc.exception.YdbStatusable; +import tech.ydb.slo.core.Config; +import tech.ydb.slo.core.kv.KvClient; +import tech.ydb.slo.core.kv.KvSchema; +import tech.ydb.slo.core.kv.KvSession; +import tech.ydb.slo.core.kv.KvWorkloadParams; +import tech.ydb.slo.core.kv.OpOutcome; +import tech.ydb.slo.core.kv.Row; +import tech.ydb.slo.core.kv.RowGenerator; + +public final class JdbcKvClient implements KvClient { + private static final Logger logger = LoggerFactory.getLogger(JdbcKvClient.class); + + private static final long INITIAL_BACKOFF_MS = 10L; + private static final long MAX_BACKOFF_MS = 1_000L; + + private final String jdbcUrl; + private final Properties connectionProperties; + private final String tablePath; + private final int maxAttempts; + + public JdbcKvClient(Config config, KvWorkloadParams params, String tablePath) { + this.jdbcUrl = config.jdbcUrl(); + this.tablePath = tablePath; + this.maxAttempts = Math.max(1, params.maxAttempts()); + this.connectionProperties = new Properties(); + if (config.token() != null && !config.token().isEmpty()) { + connectionProperties.setProperty("token", config.token()); + } + } + + @Override + public void createTable(KvWorkloadParams params, String tablePath) throws SQLException { + try (Connection conn = openConnection(); + Statement stmt = conn.createStatement()) { + stmt.execute(String.format( + KvSchema.CREATE_TABLE_TEMPLATE, + tablePath, + params.minPartitionCount(), + params.partitionSizeMb(), + params.minPartitionCount(), + params.maxPartitionCount() + )); + } + } + + @Override + public void dropTable(String tablePath) { + try (Connection conn = openConnection(); + Statement stmt = conn.createStatement()) { + stmt.execute(String.format(KvSchema.DROP_TABLE_TEMPLATE, tablePath)); + } catch (SQLException e) { + logger.warn("failed to drop table {}: {}", tablePath, e.toString()); + } + } + + @Override + public KvSession openSession() throws SQLException { + return new JdbcKvSession(); + } + + private Connection openConnection() throws SQLException { + return DriverManager.getConnection(jdbcUrl, connectionProperties); + } + + private final class JdbcKvSession implements KvSession { + private Connection connection; + private PreparedStatement readStmt; + private PreparedStatement writeStmt; + + @Override + public OpOutcome read(long id, int timeoutMs) { + long hash = RowGenerator.numericHash(id); + int attempts = 0; + SQLException last = null; + while (attempts < maxAttempts) { + attempts++; + try { + read(id, hash, timeoutSeconds(timeoutMs)); + return OpOutcome.success(attempts - 1); + } catch (SQLException e) { + last = e; + if (!isRetryable(e) || attempts >= maxAttempts) { + break; + } + invalidateOnConnectionError(e); + if (!backoff(attempts)) { + + + + break; + } + } + } + return OpOutcome.error(attempts - 1, classifyError(last)); + } + + @Override + public OpOutcome write(Row row, int timeoutMs) { + long hash = RowGenerator.numericHash(row.id()); + int attempts = 0; + SQLException last = null; + while (attempts < maxAttempts) { + attempts++; + try { + write(row, hash, timeoutSeconds(timeoutMs)); + return OpOutcome.success(attempts - 1); + } catch (SQLException e) { + last = e; + if (!isRetryable(e) || attempts >= maxAttempts) { + break; + } + invalidateOnConnectionError(e); + if (!backoff(attempts)) { + break; + } + } + } + return OpOutcome.error(attempts - 1, classifyError(last)); + } + + @Override + public void close() { + closeQuietly(readStmt); + closeQuietly(writeStmt); + closeQuietly(connection); + readStmt = null; + writeStmt = null; + connection = null; + } + + private Connection connection() throws SQLException { + if (connection == null || connection.isClosed()) { + connection = openConnection(); + readStmt = null; + writeStmt = null; + } + return connection; + } + + private PreparedStatement readStmt() throws SQLException { + Connection conn = connection(); + if (readStmt == null) { + readStmt = conn.prepareStatement(String.format(KvSchema.SELECT_TEMPLATE, tablePath)); + } + return readStmt; + } + + private PreparedStatement writeStmt() throws SQLException { + Connection conn = connection(); + if (writeStmt == null) { + writeStmt = conn.prepareStatement(String.format(KvSchema.UPSERT_TEMPLATE, tablePath)); + } + return writeStmt; + } + + private void read(long id, long hash, int timeoutSeconds) throws SQLException { + PreparedStatement stmt = readStmt(); + stmt.setQueryTimeout(timeoutSeconds); + stmt.setLong(1, id); + stmt.setLong(2, hash); + try (ResultSet rs = stmt.executeQuery()) { + while (rs.next()) { + rs.getLong("id"); + } + } + } + + private void write(Row row, long hash, int timeoutSeconds) throws SQLException { + PreparedStatement stmt = writeStmt(); + stmt.setQueryTimeout(timeoutSeconds); + stmt.setLong(1, hash); + stmt.setLong(2, row.id()); + stmt.setString(3, row.payloadStr()); + stmt.setDouble(4, row.payloadDouble()); + stmt.setTimestamp(5, Timestamp.from(row.payloadTimestamp())); + stmt.setLong(6, row.payloadHash()); + stmt.executeUpdate(); + } + + private void invalidateOnConnectionError(SQLException e) { + if (isConnectionError(e)) { + close(); + } + } + } + + private static int timeoutSeconds(int timeoutMs) { + return Math.max(1, (timeoutMs + 999) / 1000); + } + + + + private static boolean isRetryable(SQLException e) { + if (e instanceof YdbStatusable) { + try { + return ((YdbStatusable) e).getStatus().getCode().isRetryable(true); + } catch (RuntimeException ignored) { + + } + } + return e instanceof SQLRecoverableException || e instanceof SQLTransientException; + } + + private static boolean isConnectionError(SQLException e) { + return e instanceof SQLRecoverableException + || e instanceof SQLTransientConnectionException + || e instanceof SQLNonTransientConnectionException; + } + + private static String classifyError(SQLException e) { + if (e == null) { + return "unknown"; + } + if (e instanceof YdbStatusable) { + try { + return "ydb/" + ((YdbStatusable) e).getStatus().getCode().name().toLowerCase(); + } catch (RuntimeException ignored) { + + } + } + return e.getClass().getSimpleName().toLowerCase(); + } + + + + private static boolean backoff(int attempt) { + long delay = Math.min(MAX_BACKOFF_MS, INITIAL_BACKOFF_MS * (1L << Math.min(attempt - 1, 20))); + try { + Thread.sleep(delay); + return true; + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return false; + } + } + + private static void closeQuietly(AutoCloseable closeable) { + if (closeable == null) { + return; + } + try { + closeable.close(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } catch (Exception ignored) { + + } + } +} diff --git a/slo-workload/jdbc/src/main/java/tech/ydb/slo/kv/KvWorkload.java b/slo-workload/jdbc/src/main/java/tech/ydb/slo/kv/KvWorkload.java deleted file mode 100644 index dd74c32..0000000 --- a/slo-workload/jdbc/src/main/java/tech/ydb/slo/kv/KvWorkload.java +++ /dev/null @@ -1,566 +0,0 @@ -package tech.ydb.slo.kv; - -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.SQLRecoverableException; -import java.sql.SQLTransientConnectionException; -import java.sql.SQLTransientException; -import java.sql.Statement; -import java.sql.Timestamp; -import java.util.Properties; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; - -import com.google.common.util.concurrent.RateLimiter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import tech.ydb.jdbc.exception.YdbStatusable; -import tech.ydb.slo.Metrics; - -/** - * Key-value workload for the SLO test, driving the YDB JDBC driver. - * - *

The workload creates a partitioned table, prefills it with rows, and then - * runs read and write loops at fixed RPS for the configured duration. Each - * operation is timed and retried with exponential backoff; the outcome is - * recorded into {@link Metrics} so the SLO action can compare current and - * baseline runs. - * - *

Schema and queries mirror the KV workloads in the Go and JavaScript SDKs - * so the produced metrics are directly comparable across SDKs. Unlike the - * query-service workload, the primary-key {@code hash} column is derived on - * the client (see {@link #numericHash(long)}) instead of via the server-side - * {@code Digest::NumericHash}, which keeps the parameterized JDBC statements - * free of type-inference ambiguity. - * - *

Concurrency model: each operation type (read / write) gets a dedicated - * thread pool sized to the configured RPS. Every worker thread owns its own - * JDBC {@link Connection} (the YDB driver's connections are not thread-safe), - * pulls a permit from a shared Guava {@link RateLimiter}, and executes the - * operation inline. There is no separate driver thread and no work queue. - */ -public final class KvWorkload { - private static final Logger logger = LoggerFactory.getLogger(KvWorkload.class); - - private static final String CREATE_TABLE_QUERY_TEMPLATE = "" - + "CREATE TABLE IF NOT EXISTS `%s` (" - + " hash Uint64," - + " id Uint64," - + " payload_str Utf8," - + " payload_double Double," - + " payload_timestamp Timestamp," - + " payload_hash Uint64," - + " PRIMARY KEY (hash, id)" - + ") WITH (" - + " UNIFORM_PARTITIONS = %d," - + " AUTO_PARTITIONING_BY_SIZE = ENABLED," - + " AUTO_PARTITIONING_PARTITION_SIZE_MB = %d," - + " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = %d," - + " AUTO_PARTITIONING_MAX_PARTITIONS_COUNT = %d" - + ")"; - - private static final String DROP_TABLE_QUERY_TEMPLATE = "DROP TABLE `%s`"; - - private static final String WRITE_QUERY_TEMPLATE = "" - + "UPSERT INTO `%s` (" - + " hash, id, payload_str, payload_double, payload_timestamp, payload_hash" - + ") VALUES (?, ?, ?, ?, ?, ?)"; - - private static final String READ_QUERY_TEMPLATE = "" - + "SELECT id, payload_str, payload_double, payload_timestamp, payload_hash" - + " FROM `%s`" - + " WHERE id = ? AND hash = ?"; - - /* - * Hard cap on the number of worker threads spawned for a single operation - * type. The SLO targets a few hundred RPS in CI; allowing more workers - * than this just wastes threads on JIT-warmup contention without - * improving throughput. - */ - private static final int MAX_WORKERS = 64; - - /* - * Maximum number of attempts (initial + retries) per operation before it - * is recorded as a failure. Mirrors the order of magnitude of the - * query-service SessionRetryContext default. - */ - private static final int MAX_ATTEMPTS = 10; - - private static final long INITIAL_BACKOFF_MS = 10L; - private static final long MAX_BACKOFF_MS = 1_000L; - - /* - * Extra time, on top of the workload duration, given to worker pools to - * complete their last in-flight operations before {@link #run()} forces - * shutdown. - */ - private static final long SHUTDOWN_GRACE_SECONDS = 30L; - - private final String jdbcUrl; - private final Properties connectionProperties; - private final Metrics metrics; - private final KvWorkloadParams params; - private final String tablePath; - private final RowGenerator generator; - - public KvWorkload( - String jdbcUrl, - Properties connectionProperties, - Metrics metrics, - KvWorkloadParams params, - String tablePath - ) { - this.jdbcUrl = jdbcUrl; - this.connectionProperties = connectionProperties; - this.metrics = metrics; - this.params = params; - this.tablePath = tablePath; - this.generator = new RowGenerator(params.prefillCount()); - } - - /* - * Creates the table (if missing) and prefills it with - * {@code params.prefillCount()} rows using a bounded pool of worker - * connections. - */ - public void setup() throws InterruptedException, SQLException { - logger.info("creating table {}", tablePath); - try (Connection conn = openConnection(); - Statement stmt = conn.createStatement()) { - stmt.execute(String.format( - CREATE_TABLE_QUERY_TEMPLATE, - tablePath, - params.minPartitionCount(), - params.partitionSizeMb(), - params.minPartitionCount(), - params.maxPartitionCount() - )); - } - logger.info("table {} created", tablePath); - - if (params.prefillCount() <= 0) { - logger.info("prefill count <= 0, skipping prefill"); - return; - } - - logger.info("prefilling {} rows into {}", params.prefillCount(), tablePath); - int parallelism = Math.min(MAX_WORKERS, Math.max(1, params.prefillCount())); - ExecutorService prefillPool = Executors.newFixedThreadPool( - parallelism, namedThreadFactory("slo-prefill-") - ); - AtomicLong nextId = new AtomicLong(0); - AtomicInteger failed = new AtomicInteger(); - try { - for (int w = 0; w < parallelism; w++) { - prefillPool.execute(() -> { - try (WorkerConnection wc = new WorkerConnection()) { - long id; - while ((id = nextId.getAndIncrement()) < params.prefillCount()) { - SQLException err = writeWithRetry(wc, RowGenerator.generate(id), - params.writeTimeoutMs(), null); - if (err != null) { - int f = failed.incrementAndGet(); - if (f <= 5) { - logger.warn("prefill row {} failed: {}", id, err.toString()); - } - } - } - } - }); - } - } finally { - prefillPool.shutdown(); - if (!prefillPool.awaitTermination(5, TimeUnit.MINUTES)) { - prefillPool.shutdownNow(); - } - } - if (failed.get() > 0) { - logger.warn("prefill completed with {} failed rows out of {}", - failed.get(), params.prefillCount()); - } else { - logger.info("prefill completed"); - } - } - - /* - * Runs the workload until the configured deadline or thread interruption. - */ - public void run() throws InterruptedException { - long durationSeconds = params.durationSeconds(); - long endNanos = durationSeconds > 0 - ? System.nanoTime() + TimeUnit.SECONDS.toNanos(durationSeconds) - : Long.MAX_VALUE; - - // Track how many writes have completed so reads target a key-space - // that's actually been populated. The generator was constructed with - // nextId = prefillCount, so writes pick up where prefill left off. - AtomicLong writesIssued = new AtomicLong(); - - int readWorkers = workerCount(params.readRps()); - int writeWorkers = workerCount(params.writeRps()); - - if (readWorkers == 0 && writeWorkers == 0) { - logger.warn("both read and write RPS are <= 0, run phase has nothing to do"); - return; - } - - ExecutorService readPool = null; - ExecutorService writePool = null; - try { - if (readWorkers > 0) { - readPool = Executors.newFixedThreadPool(readWorkers, namedThreadFactory("slo-read-")); - RateLimiter readLimiter = RateLimiter.create(params.readRps()); - for (int i = 0; i < readWorkers; i++) { - readPool.execute(() -> readWorkerLoop(endNanos, readLimiter, writesIssued)); - } - } else { - logger.info("read RPS <= 0, skipping read workers"); - } - - if (writeWorkers > 0) { - writePool = Executors.newFixedThreadPool(writeWorkers, namedThreadFactory("slo-write-")); - RateLimiter writeLimiter = RateLimiter.create(params.writeRps()); - for (int i = 0; i < writeWorkers; i++) { - writePool.execute(() -> writeWorkerLoop(endNanos, writeLimiter, writesIssued)); - } - } else { - logger.info("write RPS <= 0, skipping write workers"); - } - - // Wait for workers to drain naturally as they hit the deadline. - long graceNanos = TimeUnit.SECONDS.toNanos(SHUTDOWN_GRACE_SECONDS); - long waitNanos = durationSeconds > 0 - ? Math.max(0L, endNanos - System.nanoTime()) + graceNanos - : Long.MAX_VALUE; - - if (readPool != null) { - readPool.shutdown(); - } - if (writePool != null) { - writePool.shutdown(); - } - - if (readPool != null) { - long started = System.nanoTime(); - if (!readPool.awaitTermination(waitNanos, TimeUnit.NANOSECONDS)) { - logger.warn("read pool did not drain within deadline, forcing shutdown"); - readPool.shutdownNow(); - } - waitNanos = Math.max(0L, waitNanos - (System.nanoTime() - started)); - } - if (writePool != null) { - if (!writePool.awaitTermination(waitNanos, TimeUnit.NANOSECONDS)) { - logger.warn("write pool did not drain within deadline, forcing shutdown"); - writePool.shutdownNow(); - } - } - } finally { - forceShutdown(readPool, "read pool"); - forceShutdown(writePool, "write pool"); - } - } - - /* - * Drops the workload table. Called from the {@code finally} block in - * {@code Main} so the database is left clean even on failure. - */ - public void teardown() { - logger.info("dropping table {}", tablePath); - try (Connection conn = openConnection(); - Statement stmt = conn.createStatement()) { - stmt.execute(String.format(DROP_TABLE_QUERY_TEMPLATE, tablePath)); - logger.info("table {} dropped", tablePath); - } catch (SQLException e) { - logger.warn("failed to drop table {}: {}", tablePath, e.toString()); - } - } - - // --- worker loops ------------------------------------------------------ - - private void readWorkerLoop(long endNanos, RateLimiter limiter, AtomicLong writesIssued) { - try (WorkerConnection wc = new WorkerConnection()) { - while (System.nanoTime() < endNanos && !Thread.currentThread().isInterrupted()) { - limiter.acquire(); - try { - readOnce(wc, writesIssued.get()); - } catch (Throwable t) { - logger.warn("read op threw unexpectedly: {}", t.toString()); - } - } - } - } - - private void writeWorkerLoop(long endNanos, RateLimiter limiter, AtomicLong writesIssued) { - try (WorkerConnection wc = new WorkerConnection()) { - while (System.nanoTime() < endNanos && !Thread.currentThread().isInterrupted()) { - limiter.acquire(); - try { - writeOnce(wc, generator.generate()); - writesIssued.incrementAndGet(); - } catch (Throwable t) { - logger.warn("write op threw unexpectedly: {}", t.toString()); - } - } - } - } - - // --- single operations ------------------------------------------------- - - private void readOnce(WorkerConnection wc, long writesObserved) { - long upperBound = Math.max(1L, params.prefillCount() + writesObserved); - long id = ThreadLocalRandom.current().nextLong(upperBound); - long hash = numericHash(id); - - Metrics.Span span = metrics.startOperation(Metrics.OperationType.READ); - int attempts = 0; - SQLException last = null; - while (attempts < MAX_ATTEMPTS) { - attempts++; - try { - wc.read(id, hash, timeoutSeconds(params.readTimeoutMs())); - span.finishSuccess(attempts - 1); - return; - } catch (SQLException e) { - last = e; - if (!isRetryable(e) || attempts >= MAX_ATTEMPTS) { - break; - } - wc.invalidateOnConnectionError(e); - backoff(attempts); - } - } - span.finishError(attempts - 1, classifyError(last)); - logger.debug("read {} failed: {}", id, last == null ? "?" : last.toString()); - } - - private void writeOnce(WorkerConnection wc, Row row) { - Metrics.Span span = metrics.startOperation(Metrics.OperationType.WRITE); - int[] attemptsOut = new int[1]; - SQLException err = writeWithRetry(wc, row, params.writeTimeoutMs(), attemptsOut); - if (err == null) { - span.finishSuccess(attemptsOut[0] - 1); - } else { - span.finishError(Math.max(0, attemptsOut[0] - 1), classifyError(err)); - logger.debug("write {} failed: {}", row.id(), err.toString()); - } - } - - /* - * Writes a single row with retry. When {@code attemptsOut} is non-null, the - * total number of attempts is written to its first element. Returns - * {@code null} on success or the last {@link SQLException} on failure. - * Used both by the run phase (with metrics handled by the caller) and - * prefill (silent). - */ - private SQLException writeWithRetry(WorkerConnection wc, Row row, int timeoutMs, int[] attemptsOut) { - long hash = numericHash(row.id()); - int attempts = 0; - SQLException last = null; - while (attempts < MAX_ATTEMPTS) { - attempts++; - try { - wc.write(row, hash, timeoutSeconds(timeoutMs)); - if (attemptsOut != null) { - attemptsOut[0] = attempts; - } - return null; - } catch (SQLException e) { - last = e; - if (!isRetryable(e) || attempts >= MAX_ATTEMPTS) { - break; - } - wc.invalidateOnConnectionError(e); - backoff(attempts); - } - } - if (attemptsOut != null) { - attemptsOut[0] = attempts; - } - return last; - } - - // --- helpers ----------------------------------------------------------- - - private Connection openConnection() throws SQLException { - return DriverManager.getConnection(jdbcUrl, connectionProperties); - } - - private static int timeoutSeconds(int timeoutMs) { - return Math.max(1, (timeoutMs + 999) / 1000); - } - - private static boolean isRetryable(SQLException e) { - return e instanceof SQLRecoverableException || e instanceof SQLTransientException; - } - - private static boolean isConnectionError(SQLException e) { - return e instanceof SQLRecoverableException || e instanceof SQLTransientConnectionException; - } - - private static String classifyError(SQLException e) { - if (e == null) { - return "unknown"; - } - if (e instanceof YdbStatusable) { - try { - return "ydb/" + ((YdbStatusable) e).getStatus().getCode().name().toLowerCase(); - } catch (RuntimeException ignored) { - // fall through to the generic classification - } - } - return e.getClass().getSimpleName().toLowerCase(); - } - - private static void backoff(int attempt) { - long delay = Math.min(MAX_BACKOFF_MS, INITIAL_BACKOFF_MS * (1L << Math.min(attempt - 1, 20))); - try { - Thread.sleep(delay); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - } - - /** - * Derives the primary-key {@code hash} column from {@code id} using a - * SplitMix64-style mix. Reads and writes both call this so they always - * target the same key. The exact function does not need to match YQL's - * {@code Digest::NumericHash}; it only needs to be deterministic and - * well distributed across partitions. - * @param id row id - * @return derived hash value - */ - private static long numericHash(long id) { - long z = id + 0x9E3779B97F4A7C15L; - z = (z ^ (z >>> 30)) * 0xBF58476D1CE4E5B9L; - z = (z ^ (z >>> 27)) * 0x94D049BB133111EBL; - return z ^ (z >>> 31); - } - - private static int workerCount(int rps) { - if (rps <= 0) { - return 0; - } - return Math.min(MAX_WORKERS, Math.max(1, rps)); - } - - private static ThreadFactory namedThreadFactory(String prefix) { - AtomicInteger counter = new AtomicInteger(); - return r -> { - Thread t = new Thread(r, prefix + counter.getAndIncrement()); - t.setDaemon(true); - return t; - }; - } - - private static void forceShutdown(ExecutorService pool, String name) { - if (pool == null || pool.isTerminated()) { - return; - } - logger.warn("{} still active in cleanup, forcing shutdown", name); - pool.shutdownNow(); - try { - if (!pool.awaitTermination(5, TimeUnit.SECONDS)) { - logger.warn("{} did not terminate after shutdownNow", name); - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - } - - /** - * A JDBC connection owned by a single worker thread, together with lazily - * prepared read/write statements. On a connection-level error the holder - * is invalidated so the next operation transparently reconnects. - */ - private final class WorkerConnection implements AutoCloseable { - private Connection connection; - private PreparedStatement readStmt; - private PreparedStatement writeStmt; - - private Connection connection() throws SQLException { - if (connection == null || connection.isClosed()) { - connection = openConnection(); - readStmt = null; - writeStmt = null; - } - return connection; - } - - private PreparedStatement readStmt() throws SQLException { - Connection conn = connection(); - if (readStmt == null) { - readStmt = conn.prepareStatement(String.format(READ_QUERY_TEMPLATE, tablePath)); - } - return readStmt; - } - - private PreparedStatement writeStmt() throws SQLException { - Connection conn = connection(); - if (writeStmt == null) { - writeStmt = conn.prepareStatement(String.format(WRITE_QUERY_TEMPLATE, tablePath)); - } - return writeStmt; - } - - void read(long id, long hash, int timeoutSeconds) throws SQLException { - PreparedStatement stmt = readStmt(); - stmt.setQueryTimeout(timeoutSeconds); - stmt.setLong(1, id); - stmt.setLong(2, hash); - try (ResultSet rs = stmt.executeQuery()) { - // Touch the result set so we exercise the deserialization path. - while (rs.next()) { - rs.getLong("id"); - } - } - } - - void write(Row row, long hash, int timeoutSeconds) throws SQLException { - PreparedStatement stmt = writeStmt(); - stmt.setQueryTimeout(timeoutSeconds); - stmt.setLong(1, hash); - stmt.setLong(2, row.id()); - stmt.setString(3, row.payloadStr()); - stmt.setDouble(4, row.payloadDouble()); - stmt.setTimestamp(5, Timestamp.from(row.payloadTimestamp())); - stmt.setLong(6, row.payloadHash()); - stmt.executeUpdate(); - } - - void invalidateOnConnectionError(SQLException e) { - if (isConnectionError(e)) { - close(); - } - } - - @Override - public void close() { - closeQuietly(readStmt); - closeQuietly(writeStmt); - closeQuietly(connection); - readStmt = null; - writeStmt = null; - connection = null; - } - - private void closeQuietly(AutoCloseable closeable) { - if (closeable == null) { - return; - } - try { - closeable.close(); - } catch (Exception ignored) { - // best-effort cleanup - } - } - } -} diff --git a/slo-workload/jdbc/src/main/java/tech/ydb/slo/kv/KvWorkloadParams.java b/slo-workload/jdbc/src/main/java/tech/ydb/slo/kv/KvWorkloadParams.java deleted file mode 100644 index 41f2156..0000000 --- a/slo-workload/jdbc/src/main/java/tech/ydb/slo/kv/KvWorkloadParams.java +++ /dev/null @@ -1,115 +0,0 @@ -package tech.ydb.slo.kv; - -import com.beust.jcommander.Parameter; - -/** - * Tunable parameters for the KV workload. - * - *

Defaults match the SLO workloads in the Go and JavaScript SDKs so the - * runs are comparable. JCommander annotations let the operator override - * any field from the command line, e.g. - * {@code --read-rps 500 --write-rps 50}. - */ -@SuppressWarnings("FieldMayBeFinal") -public final class KvWorkloadParams { - - @Parameter( - names = {"--read-rps"}, - description = "Target read operations per second" - ) - private int readRps = 1000; - - @Parameter( - names = {"--write-rps"}, - description = "Target write operations per second" - ) - private int writeRps = 100; - - @Parameter( - names = {"--read-timeout-ms"}, - description = "Per-attempt read timeout in milliseconds" - ) - private int readTimeoutMs = 10_000; - - @Parameter( - names = {"--write-timeout-ms"}, - description = "Per-attempt write timeout in milliseconds" - ) - private int writeTimeoutMs = 10_000; - - @Parameter( - names = {"--prefill-count"}, - description = "Number of rows to prefill before the run phase" - ) - private int prefillCount = 1_000; - - @Parameter( - names = {"--partition-size"}, - description = "Auto-partitioning partition size in MB" - ) - private int partitionSizeMb = 1; - - @Parameter( - names = {"--min-partition-count"}, - description = "Minimum number of table partitions" - ) - private int minPartitionCount = 6; - - @Parameter( - names = {"--max-partition-count"}, - description = "Maximum number of table partitions" - ) - private int maxPartitionCount = 1_000; - - @Parameter( - names = {"--duration"}, - description = "Run duration in seconds (overrides WORKLOAD_DURATION when > 0)" - ) - private int durationSeconds = 0; - - public int readRps() { - return readRps; - } - - public int writeRps() { - return writeRps; - } - - public int readTimeoutMs() { - return readTimeoutMs; - } - - public int writeTimeoutMs() { - return writeTimeoutMs; - } - - public int prefillCount() { - return prefillCount; - } - - public int partitionSizeMb() { - return partitionSizeMb; - } - - public int minPartitionCount() { - return minPartitionCount; - } - - public int maxPartitionCount() { - return maxPartitionCount; - } - - /** - * Effective run duration. If the CLI flag was omitted (left at 0), falls - * back to the value supplied via the {@code WORKLOAD_DURATION} environment - * variable through {@code Config}. - * @return Effective run duration value - */ - public int durationSeconds() { - return durationSeconds; - } - - public void setDurationSeconds(int durationSeconds) { - this.durationSeconds = durationSeconds; - } -} diff --git a/slo-workload/jdbc/src/main/java/tech/ydb/slo/kv/Row.java b/slo-workload/jdbc/src/main/java/tech/ydb/slo/kv/Row.java deleted file mode 100644 index a53ba89..0000000 --- a/slo-workload/jdbc/src/main/java/tech/ydb/slo/kv/Row.java +++ /dev/null @@ -1,64 +0,0 @@ -package tech.ydb.slo.kv; - -import java.time.Instant; - -/** - * A single row of the KV workload table. - * - *

The schema mirrors the one used by SLO workloads in other YDB SDKs - * (Go, JavaScript): - *

- * hash              Uint64 (primary key, derived from id)
- * id                Uint64 (primary key)
- * payload_str       Utf8
- * payload_double    Double
- * payload_timestamp Timestamp
- * payload_hash      Uint64
- * 
- * - *

The {@code hash} primary-key column is derived from {@code id}; the JDBC - * workload computes it on the client (see {@code KvWorkload}) so reads and - * writes target the same key without relying on server-side YQL builtins in - * parameterized statements. - */ -public final class Row { - private final long id; - private final String payloadStr; - private final double payloadDouble; - private final Instant payloadTimestamp; - private final long payloadHash; - - public Row( - long id, - String payloadStr, - double payloadDouble, - Instant payloadTimestamp, - long payloadHash - ) { - this.id = id; - this.payloadStr = payloadStr; - this.payloadDouble = payloadDouble; - this.payloadTimestamp = payloadTimestamp; - this.payloadHash = payloadHash; - } - - public long id() { - return id; - } - - public String payloadStr() { - return payloadStr; - } - - public double payloadDouble() { - return payloadDouble; - } - - public Instant payloadTimestamp() { - return payloadTimestamp; - } - - public long payloadHash() { - return payloadHash; - } -} diff --git a/slo-workload/pom.xml b/slo-workload/pom.xml index b89ae23..a43b80f 100644 --- a/slo-workload/pom.xml +++ b/slo-workload/pom.xml @@ -17,29 +17,35 @@ SLO workload applications for validating YDB Java clients under load and chaos - 17 - 2.3.20 1.59.0 2.2.2 33.4.0-jre - + 1.82 - - - jdk17-examples - - [17 - - - jdbc - - - + 3.5.7 + 0.10.0 + 1.2.2 + 1.4.0 + 1.0.0 + - + + tech.ydb.examples + slo-workload-core + ${project.version} + + + + tech.ydb + ydb-sdk-bom + ${ydb.sdk.version} + pom + import + + io.opentelemetry opentelemetry-bom @@ -48,13 +54,50 @@ import - + + org.springframework.boot + spring-boot-dependencies + ${spring.boot.version} + pom + import + + + + com.beust + jcommander + ${jcommander.version} + + tech.ydb.jdbc ydb-jdbc-driver ${ydb.jdbc.version} + + tech.ydb + spring-ydb-retry + ${spring.ydb.retry.version} + + + + tech.ydb.dialects + spring-data-jdbc-ydb + ${spring.data.jdbc.ydb.version} + + + + tech.ydb.dialects + hibernate-ydb-dialect + ${hibernate.ydb.dialect.version} + + + + tech.ydb.dialects + flyway-ydb-dialect + ${flyway.ydb.dialect.version} + + org.hdrhistogram HdrHistogram @@ -69,17 +112,30 @@ - - - - - org.apache.maven.plugins - maven-compiler-plugin - - ${maven.compiler.release} - - - - - + + + jdk17-slo-workload + + [17 + + + core + query + jdbc + spring-data-jdbc + spring-data-jpa + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 17 + + + + + + diff --git a/slo/Dockerfile b/slo-workload/query/Dockerfile similarity index 85% rename from slo/Dockerfile rename to slo-workload/query/Dockerfile index 3643e4b..6ab28df 100644 --- a/slo/Dockerfile +++ b/slo-workload/query/Dockerfile @@ -63,12 +63,12 @@ RUN cd /src/ydb-java-examples && \ -DnewVersion="${SDK_VERSION}" \ -DgenerateBackupPoms=false -# Build only the slo module (and its required parent/BOM context). The -# examples parent pom lists many modules; `-pl slo -am` keeps the build -# focused on what the workload actually needs. +# Build only the query workload module (and its required parent/core context). +# The examples parent pom lists many modules; `-pl slo-workload/query -am` +# keeps the build focused on what the workload actually needs. RUN cd /src/ydb-java-examples && \ mvn -B -q \ - -pl slo -am \ + -pl slo-workload/query -am \ -DskipTests \ -Dmaven.javadoc.skip=true \ package @@ -78,10 +78,10 @@ FROM ${RUNTIME_IMAGE} WORKDIR /app -# Copy the executable jar plus its transitive dependencies. The slo pom is +# Copy the executable jar plus its transitive dependencies. The query pom is # configured to drop dependencies into target/libs and to set the manifest # Class-Path to libs/, so a single `java -jar` call is enough. -COPY --from=workload-build /src/ydb-java-examples/slo/target/ydb-slo-workload.jar /app/ydb-slo-workload.jar -COPY --from=workload-build /src/ydb-java-examples/slo/target/libs /app/libs +COPY --from=workload-build /src/ydb-java-examples/slo-workload/query/target/ydb-slo-query-workload.jar /app/ydb-slo-query-workload.jar +COPY --from=workload-build /src/ydb-java-examples/slo-workload/query/target/libs /app/libs -ENTRYPOINT ["java", "-jar", "/app/ydb-slo-workload.jar"] +ENTRYPOINT ["java", "-jar", "/app/ydb-slo-query-workload.jar"] diff --git a/slo-workload/query/README.md b/slo-workload/query/README.md new file mode 100644 index 0000000..f139af1 --- /dev/null +++ b/slo-workload/query/README.md @@ -0,0 +1,81 @@ +# Query SLO workload + +A YDB Java SDK SLO workload that exercises the native +[query client](https://github.com/ydb-platform/ydb-java-sdk) under load and +chaos. Schema and metric contract match the JDBC and Spring-Data workloads in +this directory so reports across SDK flavors are directly comparable. + +> See the [parent README](../README.md) for the shared metrics, environment +> variables, CLI flags and CI flow. + +## What it does + +Standalone jar (`tech.ydb.slo.Main`) that runs three phases against a +partitioned KV table: + +1. **Setup** — `CREATE TABLE IF NOT EXISTS` plus a prefill of `--prefill-count` + rows. Setup aborts loudly if more than half the prefill writes fail, so a + misconfigured cluster never produces green metrics on an empty table. +2. **Run** — dedicated read and write thread pools, each paced by a Guava + `RateLimiter` to the target RPS, running until the configured duration. +3. **Teardown** — `DROP TABLE IF EXISTS`. + +Operations go through `tech.ydb.query.tools.SessionRetryContext`. Its retry +budget is configured from `--max-attempts` so the query workload exhibits the +same retry pressure as the JDBC workload under chaos. + +## Schema + +``` +hash Uint64 -- primary key, computed client-side from id +id Uint64 -- primary key +payload_str Utf8 +payload_double Double +payload_timestamp Timestamp +payload_hash Uint64 +``` + +The primary-key `hash` column is derived from `id` with the same SplitMix64 +mix as every other workload in this module (`RowGenerator.numericHash`), so a +table written by `query` is byte-compatible with one written by `jdbc` / +`spring-data-jdbc` / `spring-data-jpa`. + +## Files + +``` +query/ +├── Dockerfile +├── pom.xml +├── README.md +└── src/main/ + ├── java/tech/ydb/slo/ + │ ├── Main.java Entry point + │ └── query/ + │ └── QueryKvClient.java KvClient: SessionRetryContext-backed ops + └── resources/ + └── log4j2.xml Console logging config +``` + +The shared harness (`Config`, `Metrics`, `KvWorkloadParams`, `Row`, +`RowGenerator`, `WorkloadRunner`, `Launcher`) lives in +[`../core`](../core/src/main/java/tech/ydb/slo/core). + +## Building and running locally + +```bash +# From the repository root +mvn -pl slo-workload/query -am -DskipTests package + +export YDB_CONNECTION_STRING="grpc://localhost:2136/local" +export WORKLOAD_REF=local +export WORKLOAD_NAME=java-slo-query + +java -jar slo-workload/query/target/ydb-slo-query-workload.jar \ + --duration 60 --read-rps 100 --write-rps 10 --prefill-count 100 +``` + +Build the container image (context is the repository root): + +```bash +docker build -f slo-workload/query/Dockerfile -t ydb-slo-query-workload . +``` diff --git a/slo-workload/query/pom.xml b/slo-workload/query/pom.xml new file mode 100644 index 0000000..4ea8add --- /dev/null +++ b/slo-workload/query/pom.xml @@ -0,0 +1,64 @@ + + + 4.0.0 + + + tech.ydb.examples + slo-workload + 1.1.0-SNAPSHOT + ../pom.xml + + + slo-workload-query + YDB SLO query workload + + SLO workload for the native YDB Java SDK query client, consumed by + ydb-platform/ydb-slo-action + + + + + tech.ydb.examples + slo-workload-core + + + + tech.ydb + ydb-sdk-query + + + + org.apache.logging.log4j + log4j-slf4j2-impl + + + + + ydb-slo-query-workload + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.apache.maven.plugins + maven-dependency-plugin + + + org.apache.maven.plugins + maven-jar-plugin + + + + true + libs/ + tech.ydb.slo.Main + + + + + + + diff --git a/slo-workload/query/src/main/java/tech/ydb/slo/Main.java b/slo-workload/query/src/main/java/tech/ydb/slo/Main.java new file mode 100644 index 0000000..dbb5500 --- /dev/null +++ b/slo-workload/query/src/main/java/tech/ydb/slo/Main.java @@ -0,0 +1,19 @@ +package tech.ydb.slo; + +import tech.ydb.slo.core.Launcher; +import tech.ydb.slo.query.QueryKvClient; + +public final class Main { + private Main() { + + } + + public static void main(String[] args) { + Launcher.launch( + "ydb-slo-query-workload", + "java-query-kv", + args, + (config, params, tablePath) -> new QueryKvClient(config, params, tablePath) + ); + } +} diff --git a/slo-workload/query/src/main/java/tech/ydb/slo/query/QueryKvClient.java b/slo-workload/query/src/main/java/tech/ydb/slo/query/QueryKvClient.java new file mode 100644 index 0000000..7448f8a --- /dev/null +++ b/slo-workload/query/src/main/java/tech/ydb/slo/query/QueryKvClient.java @@ -0,0 +1,210 @@ +package tech.ydb.slo.query; + +import java.time.Duration; +import java.util.concurrent.atomic.AtomicInteger; + +import tech.ydb.auth.AuthProvider; +import tech.ydb.auth.NopAuthProvider; +import tech.ydb.auth.TokenAuthProvider; +import tech.ydb.common.transaction.TxMode; +import tech.ydb.core.Result; +import tech.ydb.core.Status; +import tech.ydb.core.grpc.GrpcTransport; +import tech.ydb.query.QueryClient; +import tech.ydb.query.settings.ExecuteQuerySettings; +import tech.ydb.query.tools.QueryReader; +import tech.ydb.query.tools.SessionRetryContext; +import tech.ydb.slo.core.Config; +import tech.ydb.slo.core.kv.KvClient; +import tech.ydb.slo.core.kv.KvSchema; +import tech.ydb.slo.core.kv.KvSession; +import tech.ydb.slo.core.kv.KvWorkloadParams; +import tech.ydb.slo.core.kv.OpOutcome; +import tech.ydb.slo.core.kv.Row; +import tech.ydb.slo.core.kv.RowGenerator; +import tech.ydb.table.query.Params; +import tech.ydb.table.result.ResultSetReader; +import tech.ydb.table.values.PrimitiveValue; + +public final class QueryKvClient implements KvClient { + private static final String WRITE_QUERY_TEMPLATE = "" + + "DECLARE $hash AS Uint64;" + + "DECLARE $id AS Uint64;" + + "DECLARE $payload_str AS Utf8;" + + "DECLARE $payload_double AS Double;" + + "DECLARE $payload_timestamp AS Timestamp;" + + "DECLARE $payload_hash AS Uint64;" + + "UPSERT INTO `%s` (" + + " hash, id, payload_str, payload_double, payload_timestamp, payload_hash" + + ") VALUES (" + + " $hash," + + " $id," + + " $payload_str," + + " $payload_double," + + " $payload_timestamp," + + " $payload_hash" + + ");"; + + private static final String READ_QUERY_TEMPLATE = "" + + "DECLARE $id AS Uint64;" + + "DECLARE $hash AS Uint64;" + + "SELECT id, payload_str, payload_double, payload_timestamp, payload_hash" + + " FROM `%s`" + + " WHERE id = $id AND hash = $hash;"; + + private final SessionRetryContext retryCtx; + private final String tablePath; + private final GrpcTransport transport; + private final QueryClient queryClient; + + public QueryKvClient(Config config, KvWorkloadParams params, String tablePath) { + this.tablePath = tablePath; + AuthProvider provider = NopAuthProvider.INSTANCE; + if (config.token() != null && !config.token().isEmpty()) { + provider = new TokenAuthProvider(config.token()); + } + this.transport = GrpcTransport.forConnectionString(config.connectionString()) + .withAuthProvider(provider) + .build(); + this.queryClient = QueryClient.newClient(transport).build(); + + + + this.retryCtx = SessionRetryContext.create(queryClient) + .maxRetries(Math.max(1, params.maxAttempts())) + .build(); + } + + @Override + public void createTable(KvWorkloadParams params, String tablePath) { + Status status = retryCtx.supplyResult(session -> + session.createQuery( + String.format( + KvSchema.CREATE_TABLE_TEMPLATE, + tablePath, + params.minPartitionCount(), + params.partitionSizeMb(), + params.minPartitionCount(), + params.maxPartitionCount() + ), + TxMode.NONE + ).execute() + ).join().getStatus(); + status.expectSuccess("failed to create table " + tablePath); + } + + @Override + public void dropTable(String tablePath) { + Status status = retryCtx.supplyResult(session -> + session.createQuery( + String.format(KvSchema.DROP_TABLE_TEMPLATE, tablePath), + TxMode.NONE + ).execute() + ).join().getStatus(); + if (!status.isSuccess()) { + org.slf4j.LoggerFactory.getLogger(QueryKvClient.class) + .warn("failed to drop table {}: {}", tablePath, status); + } + } + + @Override + public KvSession openSession() { + return new QueryKvSession(retryCtx, tablePath); + } + + @Override + public void close() { + try { + queryClient.close(); + } catch (Exception ignored) { + + } + try { + transport.close(); + } catch (Exception ignored) { + + } + } + + private static final class QueryKvSession implements KvSession { + private final SessionRetryContext retryCtx; + private final String tablePath; + + private QueryKvSession(SessionRetryContext retryCtx, String tablePath) { + this.retryCtx = retryCtx; + this.tablePath = tablePath; + } + + @Override + public OpOutcome read(long id, int timeoutMs) { + long hash = RowGenerator.numericHash(id); + AtomicInteger attempts = new AtomicInteger(); + ExecuteQuerySettings settings = ExecuteQuerySettings.newBuilder() + .withRequestTimeout(Duration.ofMillis(timeoutMs)) + .build(); + + Result result = retryCtx.supplyResult(session -> { + attempts.incrementAndGet(); + return QueryReader.readFrom(session.createQuery( + String.format(READ_QUERY_TEMPLATE, tablePath), + TxMode.SNAPSHOT_RO, + Params.of( + "$id", PrimitiveValue.newUint64(id), + "$hash", PrimitiveValue.newUint64(hash) + ), + settings + )); + }).join(); + + int retryAttempts = Math.max(0, attempts.get() - 1); + if (!result.getStatus().isSuccess()) { + return OpOutcome.error(retryAttempts, classifyStatus(result.getStatus())); + } + + QueryReader reader = result.getValue(); + if (reader.getResultSetCount() > 0) { + ResultSetReader rs = reader.getResultSet(0); + while (rs.next()) { + rs.getColumn("id").getUint64(); + } + } + return OpOutcome.success(retryAttempts); + } + + @Override + public OpOutcome write(Row row, int timeoutMs) { + long hash = RowGenerator.numericHash(row.id()); + AtomicInteger attempts = new AtomicInteger(); + ExecuteQuerySettings settings = ExecuteQuerySettings.newBuilder() + .withRequestTimeout(Duration.ofMillis(timeoutMs)) + .build(); + + Status status = retryCtx.supplyStatus(session -> { + attempts.incrementAndGet(); + return session.createQuery( + String.format(WRITE_QUERY_TEMPLATE, tablePath), + TxMode.SERIALIZABLE_RW, + Params.of( + "$hash", PrimitiveValue.newUint64(hash), + "$id", PrimitiveValue.newUint64(row.id()), + "$payload_str", PrimitiveValue.newText(row.payloadStr()), + "$payload_double", PrimitiveValue.newDouble(row.payloadDouble()), + "$payload_timestamp", PrimitiveValue.newTimestamp(row.payloadTimestamp()), + "$payload_hash", PrimitiveValue.newUint64(row.payloadHash()) + ), + settings + ).execute().thenApply(Result::getStatus); + }).join(); + + int retryAttempts = Math.max(0, attempts.get() - 1); + if (status.isSuccess()) { + return OpOutcome.success(retryAttempts); + } + return OpOutcome.error(retryAttempts, classifyStatus(status)); + } + } + + private static String classifyStatus(Status status) { + return "ydb/" + status.getCode().name().toLowerCase(); + } +} diff --git a/slo/src/main/resources/log4j2.xml b/slo-workload/query/src/main/resources/log4j2.xml similarity index 100% rename from slo/src/main/resources/log4j2.xml rename to slo-workload/query/src/main/resources/log4j2.xml diff --git a/slo-workload/spring-data-jdbc/Dockerfile b/slo-workload/spring-data-jdbc/Dockerfile new file mode 100644 index 0000000..84cf054 --- /dev/null +++ b/slo-workload/spring-data-jdbc/Dockerfile @@ -0,0 +1,57 @@ +# Multi-stage Dockerfile for the Spring Data JDBC SLO workload. +# +# Build context: the `ydb-java-examples` repository root. For ydb-jdbc-driver +# CI the context may also contain `./ydb-jdbc-driver`; when present, the driver +# is installed from source and the workload is pinned to that exact version. +# +# Optional build args: +# MAVEN_IMAGE Builder image. Defaults to `maven:3.9-eclipse-temurin-17`. +# RUNTIME_IMAGE Runtime image. Defaults to `eclipse-temurin:17-jre`. +# YDB_JDBC_VERSION Override the ydb-jdbc-driver version under test. + +ARG MAVEN_IMAGE=maven:3.9-eclipse-temurin-17 +ARG RUNTIME_IMAGE=eclipse-temurin:17-jre + +FROM ${MAVEN_IMAGE} AS workload-build + +WORKDIR /src +COPY . /src + +ARG YDB_JDBC_VERSION="" + +RUN if [ -d /src/ydb-jdbc-driver ]; then \ + cd /src/ydb-jdbc-driver && \ + mvn -B -q \ + -DskipTests \ + -Dmaven.javadoc.skip=true \ + -Dmaven.source.skip=true \ + -Dgpg.skip=true \ + install && \ + mvn -B -q help:evaluate -Dexpression=project.version -DforceStdout > /tmp/ydb-jdbc.version && \ + YDB_JDBC_VERSION="$(cat /tmp/ydb-jdbc.version)" && \ + cd /src && \ + echo "Pinning ydb-jdbc-driver to source-built ${YDB_JDBC_VERSION}" && \ + mvn -B -q versions:set-property \ + -Dproperty=ydb.jdbc.version \ + -DnewVersion="${YDB_JDBC_VERSION}" \ + -DgenerateBackupPoms=false \ + -pl slo-workload ; \ + elif [ -n "${YDB_JDBC_VERSION}" ]; then \ + echo "Pinning ydb-jdbc-driver to ${YDB_JDBC_VERSION}" && \ + mvn -B -q versions:set-property \ + -Dproperty=ydb.jdbc.version \ + -DnewVersion="${YDB_JDBC_VERSION}" \ + -DgenerateBackupPoms=false \ + -pl slo-workload ; \ + fi && \ + mvn -B -q -pl slo-workload/spring-data-jdbc -am \ + -DskipTests \ + -Dmaven.javadoc.skip=true \ + package + +FROM ${RUNTIME_IMAGE} + +WORKDIR /app +COPY --from=workload-build /src/slo-workload/spring-data-jdbc/target/ydb-slo-spring-data-jdbc-workload.jar /app/ydb-slo-spring-data-jdbc-workload.jar + +ENTRYPOINT ["java", "-jar", "/app/ydb-slo-spring-data-jdbc-workload.jar"] diff --git a/slo-workload/spring-data-jdbc/pom.xml b/slo-workload/spring-data-jdbc/pom.xml new file mode 100644 index 0000000..17ae9a4 --- /dev/null +++ b/slo-workload/spring-data-jdbc/pom.xml @@ -0,0 +1,76 @@ + + + 4.0.0 + + + tech.ydb.examples + slo-workload + 1.1.0-SNAPSHOT + ../pom.xml + + + slo-workload-spring-data-jdbc + Spring Data JDBC SLO workload + + SLO workload exercising Spring Data JDBC with spring-ydb-retry over the YDB JDBC driver + + + + + tech.ydb.examples + slo-workload-core + + + + org.springframework.boot + spring-boot-starter-data-jdbc + + + + tech.ydb.dialects + spring-data-jdbc-ydb + + + + tech.ydb + spring-ydb-retry + + + + tech.ydb.jdbc + ydb-jdbc-driver + + + + org.apache.logging.log4j + log4j-slf4j2-impl + + + + + ydb-slo-spring-data-jdbc-workload + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.springframework.boot + spring-boot-maven-plugin + ${spring.boot.version} + + + + repackage + + + + + tech.ydb.slo.springjdbc.SloSpringDataJdbcApplication + + + + + diff --git a/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/KvOperationService.java b/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/KvOperationService.java new file mode 100644 index 0000000..477870e --- /dev/null +++ b/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/KvOperationService.java @@ -0,0 +1,56 @@ +package tech.ydb.slo.springjdbc; + +import java.sql.Timestamp; + +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.stereotype.Service; + +import tech.ydb.retry.YdbTransactional; +import tech.ydb.slo.core.kv.KvSchema; +import tech.ydb.slo.core.kv.Row; + +@Service +public class KvOperationService { + + private static final ThreadLocal ATTEMPTS = ThreadLocal.withInitial(() -> new int[1]); + + + public static void resetAttempts() { + ATTEMPTS.get()[0] = 0; + } + + + public static int currentAttempts() { + return ATTEMPTS.get()[0]; + } + + private final JdbcTemplate jdbc; + + public KvOperationService(JdbcTemplate jdbc) { + this.jdbc = jdbc; + } + + @YdbTransactional(readOnly = true) + public void read(String tablePath, long id, long hash) { + ATTEMPTS.get()[0]++; + jdbc.query(String.format(KvSchema.SELECT_TEMPLATE, tablePath), rs -> { + while (rs.next()) { + rs.getLong("id"); + } + }, id, hash); + } + + @YdbTransactional(idempotent = true) + public void write(String tablePath, Row row, long hash) { + ATTEMPTS.get()[0]++; + jdbc.update( + String.format(KvSchema.UPSERT_TEMPLATE, tablePath), + hash, + row.id(), + row.payloadStr(), + row.payloadDouble(), + Timestamp.from(row.payloadTimestamp()), + row.payloadHash() + ); + } +} diff --git a/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/SloExitCodeHolder.java b/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/SloExitCodeHolder.java new file mode 100644 index 0000000..1e9eb08 --- /dev/null +++ b/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/SloExitCodeHolder.java @@ -0,0 +1,18 @@ +package tech.ydb.slo.springjdbc; + +import org.springframework.boot.ExitCodeGenerator; +import org.springframework.stereotype.Component; + +@Component +public class SloExitCodeHolder implements ExitCodeGenerator { + private int exitCode; + + void setExitCode(int exitCode) { + this.exitCode = exitCode; + } + + @Override + public int getExitCode() { + return exitCode; + } +} diff --git a/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/SloInfrastructureConfiguration.java b/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/SloInfrastructureConfiguration.java new file mode 100644 index 0000000..e07e764 --- /dev/null +++ b/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/SloInfrastructureConfiguration.java @@ -0,0 +1,41 @@ +package tech.ydb.slo.springjdbc; + +import javax.sql.DataSource; + +import com.zaxxer.hikari.HikariDataSource; +import org.springframework.boot.autoconfigure.jdbc.DataSourceProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.transaction.annotation.EnableTransactionManagement; + +import tech.ydb.slo.core.Config; + +@Configuration +@EnableTransactionManagement +public class SloInfrastructureConfiguration { + + @Bean + Config sloConfig() { + return Config.fromEnv("java-spring-data-jdbc-kv"); + } + + @Bean + DataSource sloDataSource(Config sloConfig) { + DataSourceProperties properties = new DataSourceProperties(); + properties.setDriverClassName("tech.ydb.jdbc.YdbDriver"); + properties.setUrl(sloConfig.jdbcUrl()); + HikariDataSource dataSource = properties.initializeDataSourceBuilder() + .type(HikariDataSource.class) + .build(); + if (sloConfig.token() != null && !sloConfig.token().isEmpty()) { + dataSource.addDataSourceProperty("token", sloConfig.token()); + } + return dataSource; + } + + @Bean + JdbcTemplate sloJdbcTemplate(DataSource sloDataSource) { + return new JdbcTemplate(sloDataSource); + } +} diff --git a/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/SloSpringDataJdbcApplication.java b/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/SloSpringDataJdbcApplication.java new file mode 100644 index 0000000..e552d0d --- /dev/null +++ b/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/SloSpringDataJdbcApplication.java @@ -0,0 +1,17 @@ +package tech.ydb.slo.springjdbc; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.WebApplicationType; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.builder.SpringApplicationBuilder; + +@SpringBootApplication +public class SloSpringDataJdbcApplication { + + public static void main(String[] args) { + int exitCode = SpringApplication.exit(new SpringApplicationBuilder(SloSpringDataJdbcApplication.class) + .web(WebApplicationType.NONE) + .run(args)); + System.exit(exitCode); + } +} diff --git a/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/SloWorkloadRunner.java b/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/SloWorkloadRunner.java new file mode 100644 index 0000000..1522ca2 --- /dev/null +++ b/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/SloWorkloadRunner.java @@ -0,0 +1,32 @@ +package tech.ydb.slo.springjdbc; + +import org.springframework.boot.ApplicationArguments; +import org.springframework.boot.ApplicationRunner; +import org.springframework.stereotype.Component; + +import tech.ydb.slo.core.Launcher; + +@Component +public class SloWorkloadRunner implements ApplicationRunner { + private final SpringJdbcKvClient client; + private final SloExitCodeHolder exitCodeHolder; + + public SloWorkloadRunner( + SpringJdbcKvClient client, + SloExitCodeHolder exitCodeHolder + ) { + this.client = client; + this.exitCodeHolder = exitCodeHolder; + } + + @Override + public void run(ApplicationArguments args) { + int exitCode = Launcher.run( + "ydb-slo-spring-data-jdbc-workload", + "java-spring-data-jdbc-kv", + args.getSourceArgs(), + (config, params, tablePath) -> client.forTable(tablePath) + ); + exitCodeHolder.setExitCode(exitCode); + } +} diff --git a/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/SpringJdbcKvClient.java b/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/SpringJdbcKvClient.java new file mode 100644 index 0000000..cdc599f --- /dev/null +++ b/slo-workload/spring-data-jdbc/src/main/java/tech/ydb/slo/springjdbc/SpringJdbcKvClient.java @@ -0,0 +1,142 @@ +package tech.ydb.slo.springjdbc; + +import java.sql.SQLException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.stereotype.Component; + +import tech.ydb.jdbc.exception.YdbStatusable; +import tech.ydb.slo.core.kv.KvClient; +import tech.ydb.slo.core.kv.KvSchema; +import tech.ydb.slo.core.kv.KvSession; +import tech.ydb.slo.core.kv.KvWorkloadParams; +import tech.ydb.slo.core.kv.OpOutcome; +import tech.ydb.slo.core.kv.Row; +import tech.ydb.slo.core.kv.RowGenerator; + +@Component +public class SpringJdbcKvClient { + private final JdbcTemplate jdbc; + private final KvOperationService operations; + + public SpringJdbcKvClient(JdbcTemplate jdbc, KvOperationService operations) { + this.jdbc = jdbc; + this.operations = operations; + } + + + + public KvClient forTable(String tablePath) { + return new BoundClient(jdbc, operations, tablePath); + } + + private static final class BoundClient implements KvClient { + private static final Logger logger = LoggerFactory.getLogger(BoundClient.class); + + private final JdbcTemplate jdbc; + private final KvOperationService operations; + private final String tablePath; + + BoundClient(JdbcTemplate jdbc, KvOperationService operations, String tablePath) { + this.jdbc = jdbc; + this.operations = operations; + this.tablePath = tablePath; + } + + @Override + public void createTable(KvWorkloadParams params, String table) { + jdbc.execute(String.format( + KvSchema.CREATE_TABLE_TEMPLATE, + table, + params.minPartitionCount(), + params.partitionSizeMb(), + params.minPartitionCount(), + params.maxPartitionCount() + )); + } + + @Override + public void dropTable(String table) { + try { + jdbc.execute(String.format(KvSchema.DROP_TABLE_TEMPLATE, table)); + } catch (RuntimeException e) { + logger.warn("failed to drop table {}: {}", table, e.toString()); + } + } + + @Override + public KvSession openSession() { + return new SpringJdbcKvSession(operations, tablePath); + } + } + + private static final class SpringJdbcKvSession implements KvSession { + private final KvOperationService operations; + private final String tablePath; + + private SpringJdbcKvSession(KvOperationService operations, String tablePath) { + this.operations = operations; + this.tablePath = tablePath; + } + + @Override + public OpOutcome read(long id, int timeoutMs) { + KvOperationService.resetAttempts(); + try { + long hash = RowGenerator.numericHash(id); + operations.read(tablePath, id, hash); + return OpOutcome.success(Math.max(0, KvOperationService.currentAttempts() - 1)); + } catch (RuntimeException e) { + return OpOutcome.error( + Math.max(0, KvOperationService.currentAttempts() - 1), + classifyError(e) + ); + } + } + + @Override + public OpOutcome write(Row row, int timeoutMs) { + KvOperationService.resetAttempts(); + try { + long hash = RowGenerator.numericHash(row.id()); + operations.write(tablePath, row, hash); + return OpOutcome.success(Math.max(0, KvOperationService.currentAttempts() - 1)); + } catch (RuntimeException e) { + return OpOutcome.error( + Math.max(0, KvOperationService.currentAttempts() - 1), + classifyError(e) + ); + } + } + + + + private static String classifyError(Throwable e) { + Throwable current = e; + while (current != null) { + if (current instanceof YdbStatusable) { + try { + return "ydb/" + ((YdbStatusable) current).getStatus().getCode().name().toLowerCase(); + } catch (RuntimeException ignored) { + + } + } + current = current.getCause(); + } + current = e; + while (current != null) { + if (current instanceof SQLException) { + String state = ((SQLException) current).getSQLState(); + if (state != null && !state.isEmpty()) { + return "sql/" + state; + } + return "sql/" + current.getClass().getSimpleName().toLowerCase(); + } + current = current.getCause(); + } + return e.getClass().getSimpleName().toLowerCase(); + } + } +} diff --git a/slo-workload/spring-data-jdbc/src/main/resources/application.yml b/slo-workload/spring-data-jdbc/src/main/resources/application.yml new file mode 100644 index 0000000..faede28 --- /dev/null +++ b/slo-workload/spring-data-jdbc/src/main/resources/application.yml @@ -0,0 +1,16 @@ +spring: + main: + banner-mode: "off" + web-application-type: none + datasource: + hikari: + maximum-pool-size: ${SLO_HIKARI_POOL_SIZE:130} + minimum-idle: 16 + connection-timeout: 10000 + +logging: + level: + root: info + tech.ydb: info + tech.ydb.slo: info + org.springframework: warn diff --git a/slo-workload/spring-data-jdbc/src/main/resources/log4j2.xml b/slo-workload/spring-data-jdbc/src/main/resources/log4j2.xml new file mode 100644 index 0000000..bbc8ea2 --- /dev/null +++ b/slo-workload/spring-data-jdbc/src/main/resources/log4j2.xml @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + diff --git a/slo-workload/spring-data-jpa/Dockerfile b/slo-workload/spring-data-jpa/Dockerfile new file mode 100644 index 0000000..c42e1c5 --- /dev/null +++ b/slo-workload/spring-data-jpa/Dockerfile @@ -0,0 +1,57 @@ +# Multi-stage Dockerfile for the Spring Data JPA / Hibernate 6 SLO workload. +# +# Build context: the `ydb-java-examples` repository root. For ydb-jdbc-driver +# CI the context may also contain `./ydb-jdbc-driver`; when present, the driver +# is installed from source and the workload is pinned to that exact version. +# +# Optional build args: +# MAVEN_IMAGE Builder image. Defaults to `maven:3.9-eclipse-temurin-17`. +# RUNTIME_IMAGE Runtime image. Defaults to `eclipse-temurin:17-jre`. +# YDB_JDBC_VERSION Override the ydb-jdbc-driver version under test. + +ARG MAVEN_IMAGE=maven:3.9-eclipse-temurin-17 +ARG RUNTIME_IMAGE=eclipse-temurin:17-jre + +FROM ${MAVEN_IMAGE} AS workload-build + +WORKDIR /src +COPY . /src + +ARG YDB_JDBC_VERSION="" + +RUN if [ -d /src/ydb-jdbc-driver ]; then \ + cd /src/ydb-jdbc-driver && \ + mvn -B -q \ + -DskipTests \ + -Dmaven.javadoc.skip=true \ + -Dmaven.source.skip=true \ + -Dgpg.skip=true \ + install && \ + mvn -B -q help:evaluate -Dexpression=project.version -DforceStdout > /tmp/ydb-jdbc.version && \ + YDB_JDBC_VERSION="$(cat /tmp/ydb-jdbc.version)" && \ + cd /src && \ + echo "Pinning ydb-jdbc-driver to source-built ${YDB_JDBC_VERSION}" && \ + mvn -B -q versions:set-property \ + -Dproperty=ydb.jdbc.version \ + -DnewVersion="${YDB_JDBC_VERSION}" \ + -DgenerateBackupPoms=false \ + -pl slo-workload ; \ + elif [ -n "${YDB_JDBC_VERSION}" ]; then \ + echo "Pinning ydb-jdbc-driver to ${YDB_JDBC_VERSION}" && \ + mvn -B -q versions:set-property \ + -Dproperty=ydb.jdbc.version \ + -DnewVersion="${YDB_JDBC_VERSION}" \ + -DgenerateBackupPoms=false \ + -pl slo-workload ; \ + fi && \ + mvn -B -q -pl slo-workload/spring-data-jpa -am \ + -DskipTests \ + -Dmaven.javadoc.skip=true \ + package + +FROM ${RUNTIME_IMAGE} + +WORKDIR /app +COPY --from=workload-build /src/slo-workload/spring-data-jpa/target/ydb-slo-spring-data-jpa-workload.jar /app/ydb-slo-spring-data-jpa-workload.jar + +ENTRYPOINT ["java", "-jar", "/app/ydb-slo-spring-data-jpa-workload.jar"] diff --git a/slo-workload/spring-data-jpa/pom.xml b/slo-workload/spring-data-jpa/pom.xml new file mode 100644 index 0000000..201204f --- /dev/null +++ b/slo-workload/spring-data-jpa/pom.xml @@ -0,0 +1,76 @@ + + + 4.0.0 + + + tech.ydb.examples + slo-workload + 1.1.0-SNAPSHOT + ../pom.xml + + + slo-workload-spring-data-jpa + Spring Data JPA (Hibernate 6) SLO workload + + SLO workload exercising Spring Data JPA / Hibernate 6 with spring-ydb-retry over the YDB JDBC driver + + + + + tech.ydb.examples + slo-workload-core + + + + org.springframework.boot + spring-boot-starter-data-jpa + + + + tech.ydb.dialects + hibernate-ydb-dialect + + + + tech.ydb + spring-ydb-retry + + + + tech.ydb.jdbc + ydb-jdbc-driver + + + + org.apache.logging.log4j + log4j-slf4j2-impl + + + + + ydb-slo-spring-data-jpa-workload + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.springframework.boot + spring-boot-maven-plugin + ${spring.boot.version} + + + + repackage + + + + + tech.ydb.slo.springjpa.SloSpringDataJpaApplication + + + + + diff --git a/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/KvOperationService.java b/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/KvOperationService.java new file mode 100644 index 0000000..6dcc396 --- /dev/null +++ b/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/KvOperationService.java @@ -0,0 +1,74 @@ +package tech.ydb.slo.springjpa; + +import java.sql.Timestamp; +import java.util.List; + +import jakarta.persistence.EntityManager; +import jakarta.persistence.FlushModeType; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; +import org.hibernate.jpa.AvailableHints; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +import tech.ydb.retry.YdbTransactional; +import tech.ydb.slo.core.kv.KvSchema; +import tech.ydb.slo.core.kv.Row; + +@Service +public class KvOperationService { + + private static final ThreadLocal ATTEMPTS = ThreadLocal.withInitial(() -> new int[1]); + + + public static void resetAttempts() { + ATTEMPTS.get()[0] = 0; + } + + + public static int currentAttempts() { + return ATTEMPTS.get()[0]; + } + + @PersistenceContext + private EntityManager entityManager; + + @YdbTransactional(readOnly = true) + public void read(String tablePath, long id, long hash) { + ATTEMPTS.get()[0]++; + Query query = entityManager.createNativeQuery(String.format(KvSchema.SELECT_TEMPLATE, tablePath)) + .setParameter(1, id) + .setParameter(2, hash) + .setHint(AvailableHints.HINT_READ_ONLY, true); + query.setFlushMode(FlushModeType.COMMIT); + @SuppressWarnings("unchecked") + List rows = query.getResultList(); + for (Object[] row : rows) { + if (row[0] != null) { + ((Number) row[0]).longValue(); + } + } + } + + @YdbTransactional(idempotent = true) + public void write(String tablePath, Row row, long hash) { + ATTEMPTS.get()[0]++; + Query query = entityManager.createNativeQuery(String.format(KvSchema.UPSERT_TEMPLATE, tablePath)) + .setParameter(1, hash) + .setParameter(2, row.id()) + .setParameter(3, row.payloadStr()) + .setParameter(4, row.payloadDouble()) + .setParameter(5, Timestamp.from(row.payloadTimestamp())) + .setParameter(6, row.payloadHash()); + query.setFlushMode(FlushModeType.COMMIT); + query.executeUpdate(); + + + entityManager.clear(); + } + + @Transactional + public void executeDdl(String ddl) { + entityManager.createNativeQuery(ddl).executeUpdate(); + } +} diff --git a/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/SloExitCodeHolder.java b/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/SloExitCodeHolder.java new file mode 100644 index 0000000..1fbc841 --- /dev/null +++ b/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/SloExitCodeHolder.java @@ -0,0 +1,18 @@ +package tech.ydb.slo.springjpa; + +import org.springframework.boot.ExitCodeGenerator; +import org.springframework.stereotype.Component; + +@Component +public class SloExitCodeHolder implements ExitCodeGenerator { + private int exitCode; + + void setExitCode(int exitCode) { + this.exitCode = exitCode; + } + + @Override + public int getExitCode() { + return exitCode; + } +} diff --git a/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/SloInfrastructureConfiguration.java b/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/SloInfrastructureConfiguration.java new file mode 100644 index 0000000..c467732 --- /dev/null +++ b/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/SloInfrastructureConfiguration.java @@ -0,0 +1,33 @@ +package tech.ydb.slo.springjpa; + +import javax.sql.DataSource; + +import com.zaxxer.hikari.HikariDataSource; +import org.springframework.boot.autoconfigure.jdbc.DataSourceProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import tech.ydb.slo.core.Config; + +@Configuration +public class SloInfrastructureConfiguration { + + @Bean + Config sloConfig() { + return Config.fromEnv("java-spring-data-jpa-kv"); + } + + @Bean + DataSource sloDataSource(Config sloConfig) { + DataSourceProperties properties = new DataSourceProperties(); + properties.setDriverClassName("tech.ydb.jdbc.YdbDriver"); + properties.setUrl(sloConfig.jdbcUrl()); + HikariDataSource dataSource = properties.initializeDataSourceBuilder() + .type(HikariDataSource.class) + .build(); + if (sloConfig.token() != null && !sloConfig.token().isEmpty()) { + dataSource.addDataSourceProperty("token", sloConfig.token()); + } + return dataSource; + } +} diff --git a/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/SloSpringDataJpaApplication.java b/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/SloSpringDataJpaApplication.java new file mode 100644 index 0000000..32f9656 --- /dev/null +++ b/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/SloSpringDataJpaApplication.java @@ -0,0 +1,17 @@ +package tech.ydb.slo.springjpa; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.WebApplicationType; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.builder.SpringApplicationBuilder; + +@SpringBootApplication +public class SloSpringDataJpaApplication { + + public static void main(String[] args) { + int exitCode = SpringApplication.exit(new SpringApplicationBuilder(SloSpringDataJpaApplication.class) + .web(WebApplicationType.NONE) + .run(args)); + System.exit(exitCode); + } +} diff --git a/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/SloWorkloadRunner.java b/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/SloWorkloadRunner.java new file mode 100644 index 0000000..37c663d --- /dev/null +++ b/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/SloWorkloadRunner.java @@ -0,0 +1,29 @@ +package tech.ydb.slo.springjpa; + +import org.springframework.boot.ApplicationArguments; +import org.springframework.boot.ApplicationRunner; +import org.springframework.stereotype.Component; + +import tech.ydb.slo.core.Launcher; + +@Component +public class SloWorkloadRunner implements ApplicationRunner { + private final SpringJpaKvClient client; + private final SloExitCodeHolder exitCodeHolder; + + public SloWorkloadRunner(SpringJpaKvClient client, SloExitCodeHolder exitCodeHolder) { + this.client = client; + this.exitCodeHolder = exitCodeHolder; + } + + @Override + public void run(ApplicationArguments args) { + int exitCode = Launcher.run( + "ydb-slo-spring-data-jpa-workload", + "java-spring-data-jpa-kv", + args.getSourceArgs(), + (config, params, tablePath) -> client.forTable(tablePath) + ); + exitCodeHolder.setExitCode(exitCode); + } +} diff --git a/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/SpringJpaKvClient.java b/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/SpringJpaKvClient.java new file mode 100644 index 0000000..f70121d --- /dev/null +++ b/slo-workload/spring-data-jpa/src/main/java/tech/ydb/slo/springjpa/SpringJpaKvClient.java @@ -0,0 +1,137 @@ +package tech.ydb.slo.springjpa; + +import java.sql.SQLException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Component; + +import tech.ydb.jdbc.exception.YdbStatusable; +import tech.ydb.slo.core.kv.KvClient; +import tech.ydb.slo.core.kv.KvSchema; +import tech.ydb.slo.core.kv.KvSession; +import tech.ydb.slo.core.kv.KvWorkloadParams; +import tech.ydb.slo.core.kv.OpOutcome; +import tech.ydb.slo.core.kv.Row; +import tech.ydb.slo.core.kv.RowGenerator; + +@Component +public class SpringJpaKvClient { + private final KvOperationService operations; + + public SpringJpaKvClient(KvOperationService operations) { + this.operations = operations; + } + + + + public KvClient forTable(String tablePath) { + return new BoundClient(operations, tablePath); + } + + private static final class BoundClient implements KvClient { + private static final Logger logger = LoggerFactory.getLogger(BoundClient.class); + + private final KvOperationService operations; + private final String tablePath; + + BoundClient(KvOperationService operations, String tablePath) { + this.operations = operations; + this.tablePath = tablePath; + } + + @Override + public void createTable(KvWorkloadParams params, String table) { + operations.executeDdl(String.format( + KvSchema.CREATE_TABLE_TEMPLATE, + table, + params.minPartitionCount(), + params.partitionSizeMb(), + params.minPartitionCount(), + params.maxPartitionCount() + )); + } + + @Override + public void dropTable(String table) { + try { + operations.executeDdl(String.format(KvSchema.DROP_TABLE_TEMPLATE, table)); + } catch (RuntimeException e) { + logger.warn("failed to drop table {}: {}", table, e.toString()); + } + } + + @Override + public KvSession openSession() { + return new SpringJpaKvSession(operations, tablePath); + } + } + + private static final class SpringJpaKvSession implements KvSession { + private final KvOperationService operations; + private final String tablePath; + + private SpringJpaKvSession(KvOperationService operations, String tablePath) { + this.operations = operations; + this.tablePath = tablePath; + } + + @Override + public OpOutcome read(long id, int timeoutMs) { + KvOperationService.resetAttempts(); + try { + long hash = RowGenerator.numericHash(id); + operations.read(tablePath, id, hash); + return OpOutcome.success(Math.max(0, KvOperationService.currentAttempts() - 1)); + } catch (RuntimeException e) { + return OpOutcome.error( + Math.max(0, KvOperationService.currentAttempts() - 1), + classifyError(e) + ); + } + } + + @Override + public OpOutcome write(Row row, int timeoutMs) { + KvOperationService.resetAttempts(); + try { + long hash = RowGenerator.numericHash(row.id()); + operations.write(tablePath, row, hash); + return OpOutcome.success(Math.max(0, KvOperationService.currentAttempts() - 1)); + } catch (RuntimeException e) { + return OpOutcome.error( + Math.max(0, KvOperationService.currentAttempts() - 1), + classifyError(e) + ); + } + } + + + + private static String classifyError(Throwable e) { + Throwable current = e; + while (current != null) { + if (current instanceof YdbStatusable) { + try { + return "ydb/" + ((YdbStatusable) current).getStatus().getCode().name().toLowerCase(); + } catch (RuntimeException ignored) { + + } + } + current = current.getCause(); + } + current = e; + while (current != null) { + if (current instanceof SQLException) { + String state = ((SQLException) current).getSQLState(); + if (state != null && !state.isEmpty()) { + return "sql/" + state; + } + return "sql/" + current.getClass().getSimpleName().toLowerCase(); + } + current = current.getCause(); + } + return e.getClass().getSimpleName().toLowerCase(); + } + } +} diff --git a/slo-workload/spring-data-jpa/src/main/resources/application.yml b/slo-workload/spring-data-jpa/src/main/resources/application.yml new file mode 100644 index 0000000..39aef42 --- /dev/null +++ b/slo-workload/spring-data-jpa/src/main/resources/application.yml @@ -0,0 +1,25 @@ +spring: + main: + banner-mode: "off" + web-application-type: none + datasource: + driver-class-name: tech.ydb.jdbc.YdbDriver + hikari: + maximum-pool-size: ${SLO_HIKARI_POOL_SIZE:130} + minimum-idle: 16 + connection-timeout: 10000 + jpa: + open-in-view: false + hibernate: + ddl-auto: none + properties: + hibernate: + dialect: tech.ydb.hibernate.dialect.YdbDialect + +logging: + level: + root: info + tech.ydb: info + tech.ydb.slo: info + org.springframework: warn + org.hibernate: warn diff --git a/slo-workload/spring-data-jpa/src/main/resources/log4j2.xml b/slo-workload/spring-data-jpa/src/main/resources/log4j2.xml new file mode 100644 index 0000000..bbc8ea2 --- /dev/null +++ b/slo-workload/spring-data-jpa/src/main/resources/log4j2.xml @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + diff --git a/slo/README.md b/slo/README.md deleted file mode 100644 index 9fdb08c..0000000 --- a/slo/README.md +++ /dev/null @@ -1,119 +0,0 @@ -# YDB Java SDK SLO workload - -This module contains the workload application used by the [YDB SLO Action](https://github.com/ydb-platform/ydb-slo-action) to test the reliability of the YDB Java SDK under load and chaos. - -It is a sibling of the SLO workloads in [`ydb-go-sdk`](https://github.com/ydb-platform/ydb-go-sdk/tree/master/tests/slo) and [`ydb-js-sdk`](https://github.com/ydb-platform/ydb-js-sdk/tree/main/tests/slo): the schema, queries and metrics are kept compatible so reports across SDKs are directly comparable. - -## What it does - -The workload runs three phases: - -1. **Setup** — creates a partitioned KV table and prefills it with rows. -2. **Run** — drives concurrent read and write loops at fixed RPS for the configured duration. Each operation is timed and retried via `tech.ydb.query.tools.SessionRetryContext`; the outcome is recorded as Prometheus-compatible metrics that the action scrapes via OTLP. -3. **Teardown** — drops the workload table even if the run failed, so the cluster is left clean. - -While the workload runs, the SLO action injects chaos (node restarts, network black holes, container pauses). The metrics show how well the SDK copes with those failures. - -## Metrics - -Every metric carries a `ref` label whose value is taken from the `WORKLOAD_REF` environment variable. This is how the report action separates the **current** PR run from the **baseline** run. - -Names below are shown in Prometheus form (with underscores). Internally the workload uses the OpenTelemetry naming convention with dots (e.g. `sdk.operations.total`); the OTLP → Prometheus conversion replaces dots with underscores automatically, so this is what you see when you query Prometheus or write rules in `metrics.yaml`. - -| Metric | Type | Labels | -| ----------------------------------- | --------------- | ------------------------------------------------------- | -| `sdk_operations_total` | counter | `operation_type`, `operation_status` | -| `sdk_errors_total` | counter | `operation_type`, `error_kind` | -| `sdk_retry_attempts_total` | counter | `operation_type`, `operation_status` | -| `sdk_pending_operations` | up/down counter | `operation_type` | -| `sdk_operation_latency_p50_seconds` | gauge | `operation_type`, `operation_status` (always `success`) | -| `sdk_operation_latency_p95_seconds` | gauge | `operation_type`, `operation_status` (always `success`) | -| `sdk_operation_latency_p99_seconds` | gauge | `operation_type`, `operation_status` (always `success`) | - -Latency percentiles are computed from per-operation HDR histograms and reflect only successful operations — failure latency is dominated by retry budgets and timeouts and would mask real SDK regressions during chaos. Counters (`sdk_operations_total`, `sdk_errors_total`) cover both branches, so availability is computed correctly. - -## Inputs - -The workload reads connection details and run parameters from environment variables provided by the action: - -| Variable | Description | -| ------------------------------- | ------------------------------------------------ | -| `YDB_CONNECTION_STRING` | YDB connection string (preferred) | -| `YDB_ENDPOINT` + `YDB_DATABASE` | Legacy, used if `YDB_CONNECTION_STRING` is unset | -| `WORKLOAD_REF` | Value of the `ref` label on every metric | -| `WORKLOAD_NAME` | Workload name (used to compose the table name) | -| `WORKLOAD_DURATION` | Run duration in seconds | -| `OTEL_EXPORTER_OTLP_ENDPOINT` | OTLP HTTP endpoint to push metrics to | - -KV-specific tunables are passed via the command line and parsed by JCommander: - -``` ---read-rps Target read RPS (default 1000) ---write-rps Target write RPS (default 100) ---read-timeout-ms Per-attempt read timeout in milliseconds (default 10000) ---write-timeout-ms Per-attempt write timeout in milliseconds (default 10000) ---prefill-count Rows to prefill before the run phase (default 1000) ---partition-size Auto-partitioning partition size in MB (default 1) ---min-partition-count Minimum number of table partitions (default 6) ---max-partition-count Maximum number of table partitions (default 1000) ---duration Override WORKLOAD_DURATION when > 0 -``` - -Unknown flags are ignored, so the workload accepts `workload_current_command` strings designed for other SDKs without erroring. - -## How CI uses this module - -The CI lives in [`ydb-java-sdk/.github/workflows/slo.yml`](https://github.com/ydb-platform/ydb-java-sdk/blob/master/.github/workflows/slo.yml), not here. The flow is: - -1. Check out the SDK PR (`current`) and the merge-base SDK commit (`baseline`). -2. Check out `ydb-java-examples` for the workload sources. -3. For each version, run `.github/scripts/build-slo-image.sh` from the SDK repo. The script assembles a build context with the SDK and examples checkouts side by side and feeds it to [`slo/Dockerfile`](Dockerfile), which: - - Builds the SDK from source and installs it into an in-image local Maven repository. - - Pins `ydb.sdk.version` in the examples parent pom to that version. - - Builds the `slo` module against the freshly-installed SDK. -4. Pass the two images (`ydb-app-current`, `ydb-app-baseline`) to `ydb-platform/ydb-slo-action/init@v2`. -5. After the run, [`ydb-platform/ydb-slo-action/report@v2`](https://github.com/ydb-platform/ydb-slo-action) compares the two and posts a summary to the PR. - -The build is fully self-contained — the SDK under test does not need to be published to a remote Maven repository. - -## Building locally - -The workload can be built standalone against a published SDK version. From the `ydb-java-examples` repository root: - -```bash -mvn -pl slo -am -DskipTests package -``` - -The resulting jar is at `slo/target/ydb-slo-workload.jar`. To run it against a local YDB: - -```bash -export YDB_CONNECTION_STRING="grpc://localhost:2136?database=/local" -export WORKLOAD_REF=local -export WORKLOAD_NAME=java-query-kv -export WORKLOAD_DURATION=60 - -java -jar slo/target/ydb-slo-workload.jar --read-rps 100 --write-rps 10 --prefill-count 100 -``` - -If `OTEL_EXPORTER_OTLP_ENDPOINT` is not set, metrics are still recorded in-process but never exported — handy for verifying that the workload itself runs cleanly before pushing to CI. - -## Files - -``` -slo/ -├── Dockerfile Multi-stage build (SDK + workload) -├── pom.xml Maven module descriptor -├── README.md This file -└── src/main/ - ├── java/tech/ydb/slo/ - │ ├── Config.java Reads action env vars - │ ├── Main.java Entry point - │ ├── Metrics.java OTLP metrics + HDR histograms - │ └── kv/ - │ ├── KvWorkload.java Setup/run/teardown loop - │ ├── KvWorkloadParams.java JCommander-bound CLI flags - │ ├── Row.java Row data class - │ └── RowGenerator.java Random payload generator - └── resources/ - └── log4j2.xml Console logging config -``` diff --git a/slo/src/main/java/tech/ydb/slo/Main.java b/slo/src/main/java/tech/ydb/slo/Main.java deleted file mode 100644 index 28f76ac..0000000 --- a/slo/src/main/java/tech/ydb/slo/Main.java +++ /dev/null @@ -1,154 +0,0 @@ -package tech.ydb.slo; - -import com.beust.jcommander.JCommander; -import com.beust.jcommander.ParameterException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import tech.ydb.auth.AuthProvider; -import tech.ydb.auth.NopAuthProvider; -import tech.ydb.auth.TokenAuthProvider; -import tech.ydb.core.grpc.GrpcTransport; -import tech.ydb.query.QueryClient; -import tech.ydb.slo.kv.KvWorkload; -import tech.ydb.slo.kv.KvWorkloadParams; - -/** - * Entry point of the SLO workload. - * - *

Reads connection details and run parameters from environment variables - * (see {@link Config}), parses workload-specific flags from the command line - * (see {@link KvWorkloadParams}), and runs the KV workload phases — setup, - * run, teardown — pushing metrics to the OTLP endpoint configured by the YDB - * SLO action runtime. - * - *

Exit codes: - *

    - *
  • {@code 0} — workload completed successfully
  • - *
  • {@code 1} — workload failed (an unhandled exception or interrupted run)
  • - *
  • {@code 2} — invalid CLI arguments
  • - *
- */ -public final class Main { - private static final Logger logger = LoggerFactory.getLogger(Main.class); - - private Main() { - // utility class - } - - public static void main(String[] args) { - Config config; - try { - config = Config.fromEnv(); - } catch (IllegalStateException e) { - logger.error("invalid environment configuration: {}", e.getMessage()); - System.exit(2); - return; - } - - KvWorkloadParams params = new KvWorkloadParams(); - try { - JCommander.newBuilder() - .programName("ydb-slo-workload") - .acceptUnknownOptions(true) - .addObject(params) - .build() - .parse(args); - } catch (ParameterException e) { - logger.error("invalid CLI arguments: {}", e.getMessage()); - System.exit(2); - return; - } - - // CLI duration takes precedence over WORKLOAD_DURATION when supplied. - if (params.durationSeconds() <= 0) { - params.setDurationSeconds(config.durationSeconds()); - } - - logger.info("starting SLO workload: name={}, ref={}, duration={}s, readRps={}, writeRps={}", - config.workloadName(), - config.ref(), - params.durationSeconds(), - params.readRps(), - params.writeRps()); - - // The table path embeds workload name and ref so concurrent runs of - // the current and baseline images don't step on each other. Both - // components are sanitized: WORKLOAD_NAME comes from the action input - // and is normally already safe, but we don't trust user input to be - // a valid YDB identifier. - String tablePath = sanitize(config.workloadName()) + "_" + sanitize(config.ref()); - - int exitCode = 0; - Metrics metrics = Metrics.create(config); - AuthProvider provider = NopAuthProvider.INSTANCE; - if (config.token() != null && !config.token().isEmpty()) { - provider = new TokenAuthProvider(config.token()); - } - GrpcTransport transport = GrpcTransport.forConnectionString(config.connectionString()) - .withAuthProvider(provider) - .build(); - QueryClient queryClient = QueryClient.newClient(transport).build(); - - KvWorkload workload = new KvWorkload(queryClient, metrics, params, tablePath); - - try { - workload.setup(); - workload.run(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - logger.warn("workload interrupted"); - exitCode = 1; - } catch (Throwable t) { - logger.error("workload failed", t); - exitCode = 1; - } finally { - try { - workload.teardown(); - } catch (Throwable t) { - logger.warn("teardown failed", t); - } - - try { - metrics.flush(); - } catch (Throwable t) { - logger.warn("metrics flush failed", t); - } - - closeQuietly(metrics, "metrics"); - closeQuietly(queryClient, "query client"); - closeQuietly(transport, "transport"); - } - - System.exit(exitCode); - } - - private static void closeQuietly(AutoCloseable closeable, String name) { - if (closeable == null) { - return; - } - try { - closeable.close(); - } catch (Throwable t) { - logger.warn("failed to close {}: {}", name, t.toString()); - } - } - - /** - * Replaces characters that aren't valid in YDB table names with underscores. - * Refs from CI may include slashes ({@code release/1.2}) or dots, which - * the action permits in metrics labels but YDB rejects in table paths. - */ - private static String sanitize(String value) { - StringBuilder sb = new StringBuilder(value.length()); - for (int i = 0; i < value.length(); i++) { - char c = value.charAt(i); - if (Character.isLetterOrDigit(c) || c == '_') { - sb.append(c); - } else { - sb.append('_'); - } - } - return sb.toString(); - } -} diff --git a/slo/src/main/java/tech/ydb/slo/Metrics.java b/slo/src/main/java/tech/ydb/slo/Metrics.java deleted file mode 100644 index 767a747..0000000 --- a/slo/src/main/java/tech/ydb/slo/Metrics.java +++ /dev/null @@ -1,381 +0,0 @@ -package tech.ydb.slo; - -import java.time.Duration; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.TimeUnit; - -import io.opentelemetry.api.common.AttributeKey; -import io.opentelemetry.api.common.Attributes; -import io.opentelemetry.api.metrics.LongCounter; -import io.opentelemetry.api.metrics.LongUpDownCounter; -import io.opentelemetry.api.metrics.Meter; -import io.opentelemetry.api.metrics.ObservableDoubleMeasurement; -import io.opentelemetry.exporter.otlp.http.metrics.OtlpHttpMetricExporter; -import io.opentelemetry.sdk.metrics.SdkMeterProvider; -import io.opentelemetry.sdk.metrics.SdkMeterProviderBuilder; -import io.opentelemetry.sdk.metrics.export.PeriodicMetricReader; -import io.opentelemetry.sdk.resources.Resource; -import org.HdrHistogram.AtomicHistogram; -import org.HdrHistogram.Histogram; - -/** - * Collects and pushes SLO workload metrics to the OTLP endpoint configured by - * the YDB SLO action runtime. - * - *

Metrics emitted (matching the contract from - * {@code ydb-platform/ydb-slo-action}): - *

    - *
  • {@code sdk.operations.total} — counter, labeled by - * {@code operation_type} and {@code operation_status}
  • - *
  • {@code sdk.errors.total} — counter, labeled by - * {@code operation_type} and {@code error_kind}
  • - *
  • {@code sdk.retry.attempts.total} — counter, labeled by - * {@code operation_type} and {@code operation_status}
  • - *
  • {@code sdk.pending.operations} — up/down counter, labeled by - * {@code operation_type}
  • - *
  • {@code sdk.operation.latency.p50.seconds} / - * {@code .p95.seconds} / {@code .p99.seconds} — - * observable gauges fed from per-operation HDR histograms
  • - *
- * - *

Every metric carries the {@code ref} label so the report action can - * separate current and baseline series. - */ -public final class Metrics implements AutoCloseable { - - public enum OperationType { - READ("read"), - WRITE("write"); - - private final String label; - - OperationType(String label) { - this.label = label; - } - - public String label() { - return label; - } - } - - public enum OperationStatus { - SUCCESS("success"), - ERROR("error"); - - private final String label; - - OperationStatus(String label) { - this.label = label; - } - - public String label() { - return label; - } - } - - private static final AttributeKey ATTR_OPERATION_TYPE = - AttributeKey.stringKey("operation_type"); - private static final AttributeKey ATTR_OPERATION_STATUS = - AttributeKey.stringKey("operation_status"); - private static final AttributeKey ATTR_ERROR_KIND = - AttributeKey.stringKey("error_kind"); - private static final AttributeKey ATTR_REF = - AttributeKey.stringKey("ref"); - - // HDR histograms record latencies in microseconds with high precision up to 60 s. - private static final long HDR_MIN_MICROS = 1L; - private static final long HDR_MAX_MICROS = 60L * 1_000_000L; - private static final int HDR_SIGNIFICANT_DIGITS = 3; - - private final SdkMeterProvider meterProvider; - private final String ref; - private final LongCounter operationsTotal; - private final LongCounter errorsTotal; - private final LongCounter retryAttemptsTotal; - private final LongUpDownCounter pendingOperations; - - private final Map histograms = new ConcurrentHashMap<>(); - - private Metrics( - SdkMeterProvider meterProvider, - String ref, - LongCounter operationsTotal, - LongCounter errorsTotal, - LongCounter retryAttemptsTotal, - LongUpDownCounter pendingOperations - ) { - this.meterProvider = meterProvider; - this.ref = ref; - this.operationsTotal = operationsTotal; - this.errorsTotal = errorsTotal; - this.retryAttemptsTotal = retryAttemptsTotal; - this.pendingOperations = pendingOperations; - } - - /* - * Builds a {@code Metrics} instance configured to push OTLP metrics every - * second to the endpoint from {@code config.otlpEndpoint()}. If the - * endpoint is empty, all metrics are still observable in-process but never - * exported. - */ - public static Metrics create(Config config) { - String ref = config.ref(); - - Resource resource = Resource.getDefault().toBuilder() - .put("service.name", config.workloadName()) - .put("ref", ref) - .put("sdk", "java") - .build(); - - SdkMeterProviderBuilder providerBuilder = SdkMeterProvider.builder() - .setResource(resource); - - if (config.otlpEndpoint() != null && !config.otlpEndpoint().isEmpty()) { - OtlpHttpMetricExporter exporter = OtlpHttpMetricExporter.builder() - .setEndpoint(metricsEndpoint(config.otlpEndpoint())) - .setTimeout(Duration.ofSeconds(10)) - .build(); - providerBuilder.registerMetricReader( - PeriodicMetricReader.builder(exporter) - .setInterval(Duration.ofSeconds(1)) - .build() - ); - } - - SdkMeterProvider provider = providerBuilder.build(); - Meter meter = provider.get("slo-workload-" + config.workloadName()); - - LongCounter operationsTotal = meter.counterBuilder("sdk.operations.total") - .setDescription("Total number of operations") - .setUnit("{operation}") - .build(); - - LongCounter errorsTotal = meter.counterBuilder("sdk.errors.total") - .setDescription("Total number of errors") - .setUnit("{error}") - .build(); - - LongCounter retryAttemptsTotal = meter.counterBuilder("sdk.retry.attempts.total") - .setDescription("Total number of retry attempts") - .setUnit("{attempt}") - .build(); - - LongUpDownCounter pendingOperations = meter.upDownCounterBuilder("sdk.pending.operations") - .setDescription("Currently in-flight operations") - .build(); - - Map histograms = new ConcurrentHashMap<>(); - - // Pre-create one histogram per operation_type so the first export - // already produces gauge series. We only track successful operations: - // failure latency is dominated by retry budgets / timeouts and would - // skew the percentiles without telling us anything useful about SDK - // performance. The SLO action's metrics.yaml filters by - // operation_status="success" anyway. - for (OperationType type : OperationType.values()) { - histograms.put(type, newHistogram()); - } - - // Build the three percentile gauges as raw observers — their values - // are produced by a single batch callback below, which reads - // p50/p95/p99 from the same histogram snapshot and then resets the - // histogram. Reading all three percentiles from one snapshot avoids - // races where p99 could be observed against a freshly-reset histogram - // populated by p50, and resetting after each export means the gauge - // reflects only latencies recorded during the last export interval — - // matching the JS SDK's behaviour and avoiding cold-start tail drag - // on the JVM (without reset, JIT-warmup outliers stick to p99 for - // the rest of the run). - ObservableDoubleMeasurement p50Observer = meter.gaugeBuilder("sdk.operation.latency.p50.seconds") - .setUnit("s") - .setDescription("p50 operation latency in seconds") - .buildObserver(); - - ObservableDoubleMeasurement p95Observer = meter.gaugeBuilder("sdk.operation.latency.p95.seconds") - .setUnit("s") - .setDescription("p95 operation latency in seconds") - .buildObserver(); - - ObservableDoubleMeasurement p99Observer = meter.gaugeBuilder("sdk.operation.latency.p99.seconds") - .setUnit("s") - .setDescription("p99 operation latency in seconds") - .buildObserver(); - - meter.batchCallback( - () -> observeAndResetPercentiles(histograms, ref, p50Observer, p95Observer, p99Observer), - p50Observer, p95Observer, p99Observer - ); - - Metrics metrics = new Metrics( - provider, - ref, - operationsTotal, - errorsTotal, - retryAttemptsTotal, - pendingOperations - ); - metrics.histograms.putAll(histograms); - return metrics; - } - - private static String metricsEndpoint(String otlpEndpoint) { - // OTLP HTTP exporter expects the full /v1/metrics path. The SLO action - // sets OTEL_EXPORTER_OTLP_ENDPOINT to the base URL (e.g. - // http://ydb-prometheus:9090/api/v1/otlp), so we append the suffix - // unless the user has already provided it. - String trimmed = otlpEndpoint.endsWith("/") - ? otlpEndpoint.substring(0, otlpEndpoint.length() - 1) - : otlpEndpoint; - if (trimmed.endsWith("/v1/metrics")) { - return trimmed; - } - return trimmed + "/v1/metrics"; - } - - /* - * Records a started operation and returns a span used to record the - * outcome. - */ - public Span startOperation(OperationType type) { - pendingOperations.add(1, Attributes.of( - ATTR_REF, ref, - ATTR_OPERATION_TYPE, type.label() - )); - return new Span(this, type, System.nanoTime()); - } - - /** - * Forces a final flush of pending metrics. Should be called before exit - * to make sure the report action sees the last seconds of data. - */ - public void flush() { - meterProvider.forceFlush().join(10, TimeUnit.SECONDS); - } - - @Override - public void close() { - meterProvider.shutdown().join(10, TimeUnit.SECONDS); - } - - private void recordOutcome( - OperationType type, - OperationStatus status, - int attempts, - long latencyMicros, - String errorKind - ) { - Attributes opAttrs = Attributes.of( - ATTR_REF, ref, - ATTR_OPERATION_TYPE, type.label(), - ATTR_OPERATION_STATUS, status.label() - ); - - operationsTotal.add(1, opAttrs); - retryAttemptsTotal.add(Math.max(0L, attempts), opAttrs); - pendingOperations.add(-1, Attributes.of( - ATTR_REF, ref, - ATTR_OPERATION_TYPE, type.label() - )); - - // Latency is recorded only for successful operations. Failed - // operations spend most of their time inside the retry budget / - // timeout machinery, so their latency reflects the retry policy - // rather than the SDK's performance. Mixing those samples into the - // percentile gauges produces noisy spikes during chaos scenarios - // and tells us nothing actionable. - if (status == OperationStatus.SUCCESS) { - Histogram histogram = histograms.computeIfAbsent(type, k -> newHistogram()); - long clamped = Math.max(HDR_MIN_MICROS, Math.min(HDR_MAX_MICROS, latencyMicros)); - histogram.recordValue(clamped); - } else { - errorsTotal.add(1, Attributes.of( - ATTR_REF, ref, - ATTR_OPERATION_TYPE, type.label(), - ATTR_ERROR_KIND, errorKind == null ? "unknown" : errorKind - )); - } - } - - /** - * Observes p50/p95/p99 for every populated histogram in one go and then - * resets the histogram. Called from a single OTel batch callback so all - * three percentiles are read from a consistent snapshot — without that, - * a concurrent record could land between the p50 and p99 reads and - * produce inconsistent values across gauges. - */ - private static void observeAndResetPercentiles( - Map histograms, - String ref, - ObservableDoubleMeasurement p50Out, - ObservableDoubleMeasurement p95Out, - ObservableDoubleMeasurement p99Out - ) { - for (Map.Entry entry : histograms.entrySet()) { - OperationType type = entry.getKey(); - Histogram histogram = entry.getValue(); - - long p50Micros; - long p95Micros; - long p99Micros; - if (histogram.getTotalCount() == 0) { - continue; - } - p50Micros = histogram.getValueAtPercentile(50.0); - p95Micros = histogram.getValueAtPercentile(95.0); - p99Micros = histogram.getValueAtPercentile(99.0); - histogram.reset(); - - // Percentile gauges are always tagged with operation_status="success" - // because we only record successful samples (see recordOutcome). - // The SLO action's metrics.yaml filters on this same label, so the - // gauges line up with what the report expects. - Attributes attrs = Attributes.of( - ATTR_REF, ref, - ATTR_OPERATION_TYPE, type.label(), - ATTR_OPERATION_STATUS, OperationStatus.SUCCESS.label() - ); - p50Out.record(p50Micros / 1_000_000.0, attrs); - p95Out.record(p95Micros / 1_000_000.0, attrs); - p99Out.record(p99Micros / 1_000_000.0, attrs); - } - } - - private static Histogram newHistogram() { - return new AtomicHistogram(HDR_MIN_MICROS, HDR_MAX_MICROS, HDR_SIGNIFICANT_DIGITS); - } - - /** - * One in-flight operation. Call exactly one of the {@code finish} methods. - */ - public static final class Span { - private final Metrics metrics; - private final OperationType type; - private final long startNanos; - private boolean finished; - - private Span(Metrics metrics, OperationType type, long startNanos) { - this.metrics = metrics; - this.type = type; - this.startNanos = startNanos; - } - - public void finishSuccess(int attempts) { - finish(OperationStatus.SUCCESS, attempts, null); - } - - public void finishError(int attempts, String errorKind) { - finish(OperationStatus.ERROR, attempts, errorKind); - } - - private void finish(OperationStatus status, int attempts, String errorKind) { - if (finished) { - return; - } - finished = true; - long latencyMicros = (System.nanoTime() - startNanos) / 1_000L; - metrics.recordOutcome(type, status, attempts, latencyMicros, errorKind); - } - } - -} diff --git a/slo/src/main/java/tech/ydb/slo/kv/KvWorkload.java b/slo/src/main/java/tech/ydb/slo/kv/KvWorkload.java deleted file mode 100644 index 90b6cd2..0000000 --- a/slo/src/main/java/tech/ydb/slo/kv/KvWorkload.java +++ /dev/null @@ -1,463 +0,0 @@ -package tech.ydb.slo.kv; - -import java.time.Duration; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; - -import com.google.common.util.concurrent.RateLimiter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import tech.ydb.common.transaction.TxMode; -import tech.ydb.core.Result; -import tech.ydb.core.Status; -import tech.ydb.query.QueryClient; -import tech.ydb.query.settings.ExecuteQuerySettings; -import tech.ydb.query.tools.QueryReader; -import tech.ydb.query.tools.SessionRetryContext; -import tech.ydb.slo.Metrics; -import tech.ydb.table.query.Params; -import tech.ydb.table.result.ResultSetReader; -import tech.ydb.table.values.PrimitiveValue; - -/** - * Key-value workload for the SLO test. - * - *

The workload creates a partitioned table, prefills it with rows, and then - * runs read and write loops at fixed RPS for the configured duration. Each - * operation is timed and retried via {@link SessionRetryContext}; the outcome - * is recorded into {@link Metrics} so the SLO action can compare current and - * baseline runs. - * - *

Schema and queries mirror the KV workloads in the Go and JavaScript SDKs - * so the produced metrics are directly comparable across SDKs. - * - *

Concurrency model: each operation type (read / write) gets a dedicated - * thread pool sized to the configured RPS. Every worker thread pulls a permit - * from a shared Guava {@link RateLimiter} and executes the operation inline. - * There is no separate driver thread and no work queue, which removes the - * unbounded backlog risk under chaos and keeps the worker count proportional - * to the actual concurrency budget. - */ -public final class KvWorkload { - private static final Logger logger = LoggerFactory.getLogger(KvWorkload.class); - - private static final String CREATE_TABLE_QUERY_TEMPLATE = "" - + "CREATE TABLE IF NOT EXISTS `%s` (" - + " hash Uint64," - + " id Uint64," - + " payload_str Utf8," - + " payload_double Double," - + " payload_timestamp Timestamp," - + " payload_hash Uint64," - + " PRIMARY KEY (hash, id)" - + ") WITH (" - + " UNIFORM_PARTITIONS = %d," - + " AUTO_PARTITIONING_BY_SIZE = ENABLED," - + " AUTO_PARTITIONING_PARTITION_SIZE_MB = %d," - + " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = %d," - + " AUTO_PARTITIONING_MAX_PARTITIONS_COUNT = %d" - + ")"; - - private static final String DROP_TABLE_QUERY_TEMPLATE = "DROP TABLE `%s`"; - - private static final String WRITE_QUERY_TEMPLATE = "" - + "DECLARE $id AS Uint64;" - + "DECLARE $payload_str AS Utf8;" - + "DECLARE $payload_double AS Double;" - + "DECLARE $payload_timestamp AS Timestamp;" - + "DECLARE $payload_hash AS Uint64;" - + "UPSERT INTO `%s` (" - + " id, hash, payload_str, payload_double, payload_timestamp, payload_hash" - + ") VALUES (" - + " $id," - + " Digest::NumericHash($id)," - + " $payload_str," - + " $payload_double," - + " $payload_timestamp," - + " $payload_hash" - + ");"; - - private static final String READ_QUERY_TEMPLATE = "" - + "DECLARE $id AS Uint64;" - + "SELECT id, payload_str, payload_double, payload_timestamp, payload_hash" - + " FROM `%s`" - + " WHERE id = $id AND hash = Digest::NumericHash($id);"; - - /* - * Hard cap on the number of worker threads spawned for a single operation - * type. The SLO targets a few hundred RPS in CI; allowing more workers - * than this just wastes threads on JIT-warmup contention without - * improving throughput. - */ - private static final int MAX_WORKERS = 64; - - /* - * Extra time, on top of the workload duration, given to worker pools to - * complete their last in-flight operations before {@link #run()} forces - * shutdown. Picked to be larger than the default per-attempt timeout so - * a request that started just before the deadline can finish cleanly. - */ - private static final long SHUTDOWN_GRACE_SECONDS = 30L; - - private final SessionRetryContext retryCtx; - private final Metrics metrics; - private final KvWorkloadParams params; - private final String tablePath; - - private final RowGenerator generator; - - public KvWorkload(QueryClient queryClient, Metrics metrics, KvWorkloadParams params, String tablePath) { - this.retryCtx = SessionRetryContext.create(queryClient).build(); - this.metrics = metrics; - this.params = params; - this.tablePath = tablePath; - this.generator = new RowGenerator(params.prefillCount()); - } - - /* - * Creates the table (if missing) and prefills it with - * {@code params.prefillCount()} rows. Prefill uses a fixed-size thread pool - * so we don't open thousands of sessions in parallel on slow runners. - */ - public void setup() throws InterruptedException { - logger.info("creating table {}", tablePath); - Status createStatus = retryCtx.supplyResult(session -> - session.createQuery( - String.format( - CREATE_TABLE_QUERY_TEMPLATE, - tablePath, - params.minPartitionCount(), - params.partitionSizeMb(), - params.minPartitionCount(), - params.maxPartitionCount() - ), - TxMode.NONE - ).execute() - ).join().getStatus(); - createStatus.expectSuccess("failed to create table " + tablePath); - logger.info("table {} created", tablePath); - - logger.info("prefilling {} rows into {}", params.prefillCount(), tablePath); - int parallelism = Math.min(MAX_WORKERS, Math.max(1, Math.min(params.prefillCount(), MAX_WORKERS))); - ExecutorService prefillPool = Executors.newFixedThreadPool( - parallelism, namedThreadFactory("slo-prefill-") - ); - try { - List> futures = new ArrayList<>(); - for (long i = 0; i < params.prefillCount(); i++) { - final long id = i; - futures.add(CompletableFuture.supplyAsync( - () -> writeRowSilently(RowGenerator.generate(id)), - prefillPool - )); - } - - int failed = 0; - for (CompletableFuture f : futures) { - Status s = f.join(); - if (!s.isSuccess()) { - failed++; - if (failed <= 5) { - logger.warn("prefill row failed: {}", s); - } - } - } - if (failed > 0) { - logger.warn("prefill completed with {} failed rows out of {}", failed, params.prefillCount()); - } else { - logger.info("prefill completed"); - } - } finally { - prefillPool.shutdown(); - if (!prefillPool.awaitTermination(30, TimeUnit.SECONDS)) { - prefillPool.shutdownNow(); - } - } - } - - /* - * Runs the workload until the configured deadline or thread interruption. - * - *

Read and write workers run concurrently on dedicated thread pools. - * Each worker pulls a permit from its rate limiter and executes the - * operation inline, so there is no shared work queue and no driver - * thread. Sub-zero RPS disables the corresponding loop entirely (useful - * for write-only or read-only smoke tests). - */ - public void run() throws InterruptedException { - long durationSeconds = params.durationSeconds(); - long endNanos = durationSeconds > 0 - ? System.nanoTime() + TimeUnit.SECONDS.toNanos(durationSeconds) - : Long.MAX_VALUE; - - // Track how many writes have completed so reads target a key-space - // that's actually been populated. The generator itself was - // constructed with nextId = prefillCount, so writes pick up where - // prefill left off. - AtomicLong writesIssued = new AtomicLong(); - - int readWorkers = workerCount(params.readRps()); - int writeWorkers = workerCount(params.writeRps()); - - if (readWorkers == 0 && writeWorkers == 0) { - logger.warn("both read and write RPS are <= 0, run phase has nothing to do"); - return; - } - - ExecutorService readPool = null; - ExecutorService writePool = null; - try { - if (readWorkers > 0) { - readPool = Executors.newFixedThreadPool(readWorkers, namedThreadFactory("slo-read-")); - RateLimiter readLimiter = RateLimiter.create(params.readRps()); - for (int i = 0; i < readWorkers; i++) { - readPool.execute(() -> workerLoop( - endNanos, readLimiter, - () -> readOnce(writesIssued.get()), - "read" - )); - } - } else { - logger.info("read RPS <= 0, skipping read workers"); - } - - if (writeWorkers > 0) { - writePool = Executors.newFixedThreadPool(writeWorkers, namedThreadFactory("slo-write-")); - RateLimiter writeLimiter = RateLimiter.create(params.writeRps()); - for (int i = 0; i < writeWorkers; i++) { - writePool.execute(() -> workerLoop( - endNanos, writeLimiter, - () -> { - writeOnce(generator.generate()); - writesIssued.incrementAndGet(); - }, - "write" - )); - } - } else { - logger.info("write RPS <= 0, skipping write workers"); - } - - // Wait for workers to drain naturally as they hit the deadline. - // shutdown() lets in-flight ops finish; awaitTermination caps the - // wait at duration + grace so the run phase can't hang past the - // configured budget. Workers are stopped via shutdownNow() in - // the finally block if they exceed the grace window. - long graceNanos = TimeUnit.SECONDS.toNanos(SHUTDOWN_GRACE_SECONDS); - long waitNanos = durationSeconds > 0 - ? Math.max(0L, endNanos - System.nanoTime()) + graceNanos - : Long.MAX_VALUE; - - if (readPool != null) { - readPool.shutdown(); - } - if (writePool != null) { - writePool.shutdown(); - } - - long readWaitNanos = waitNanos; - if (readPool != null) { - long started = System.nanoTime(); - if (!readPool.awaitTermination(readWaitNanos, TimeUnit.NANOSECONDS)) { - logger.warn("read pool did not drain within deadline, forcing shutdown"); - readPool.shutdownNow(); - } - waitNanos = Math.max(0L, waitNanos - (System.nanoTime() - started)); - } - if (writePool != null) { - if (!writePool.awaitTermination(waitNanos, TimeUnit.NANOSECONDS)) { - logger.warn("write pool did not drain within deadline, forcing shutdown"); - writePool.shutdownNow(); - } - } - } finally { - forceShutdown(readPool, "read pool"); - forceShutdown(writePool, "write pool"); - } - } - - /* - * Drops the workload table. Called from the {@code finally} block in - * {@code Main} so the database is left clean even on failure. - */ - public void teardown() { - logger.info("dropping table {}", tablePath); - Status status = retryCtx.supplyResult(session -> - session.createQuery( - String.format(DROP_TABLE_QUERY_TEMPLATE, tablePath), - TxMode.NONE - ).execute() - ).join().getStatus(); - if (!status.isSuccess()) { - logger.warn("failed to drop table {}: {}", tablePath, status); - } else { - logger.info("table {} dropped", tablePath); - } - } - - // --- internals --------------------------------------------------------- - - /* - * Loops on a single worker thread until the deadline or interruption, - * pacing each iteration through the shared rate limiter and running the - * operation inline. No work queue is involved — backpressure comes - * naturally from the limiter blocking the worker. - */ - private void workerLoop(long endNanos, RateLimiter limiter, Runnable singleOp, String name) { - while (System.nanoTime() < endNanos && !Thread.currentThread().isInterrupted()) { - limiter.acquire(); - try { - singleOp.run(); - } catch (Throwable t) { - logger.warn("{} op threw unexpectedly: {}", name, t.toString()); - } - } - } - - /* - * Computes the number of worker threads for a given RPS target. - * Returns 0 for non-positive RPS so the caller skips the loop entirely. - */ - private static int workerCount(int rps) { - if (rps <= 0) { - return 0; - } - return Math.min(MAX_WORKERS, Math.max(1, rps)); - } - - /* - * Picks a random id in [0, keyspaceUpper) and reads it back from the table. - * Reads target only ids known to exist (the prefilled range plus rows - * written so far during this run), so a successful read always returns - * a row and exercises the deserialization path. - */ - private void readOnce(long writesObserved) { - long upperBound = Math.max(1L, params.prefillCount() + writesObserved); - long id = ThreadLocalRandom.current().nextLong(upperBound); - - Metrics.Span span = metrics.startOperation(Metrics.OperationType.READ); - AtomicInteger attempts = new AtomicInteger(); - ExecuteQuerySettings settings = ExecuteQuerySettings.newBuilder() - .withRequestTimeout(Duration.ofMillis(params.readTimeoutMs())) - .build(); - - Result result = retryCtx.supplyResult(session -> { - attempts.incrementAndGet(); - return QueryReader.readFrom(session.createQuery( - String.format(READ_QUERY_TEMPLATE, tablePath), - TxMode.SNAPSHOT_RO, - Params.of("$id", PrimitiveValue.newUint64(id)), - settings - )); - }).join(); - - int retryAttempts = Math.max(0, attempts.get() - 1); - - if (!result.getStatus().isSuccess()) { - span.finishError(retryAttempts, classifyStatus(result.getStatus())); - return; - } - - // Touch the result set so we exercise the deserialization path. - // For ids in the prefilled range the row is guaranteed to exist; - // for ids in the just-written range it almost always exists, so - // the absence branch is rare but harmless. - QueryReader reader = result.getValue(); - if (reader.getResultSetCount() > 0) { - ResultSetReader rs = reader.getResultSet(0); - while (rs.next()) { - rs.getColumn("id").getUint64(); - } - } - - span.finishSuccess(retryAttempts); - } - - private void writeOnce(Row row) { - Metrics.Span span = metrics.startOperation(Metrics.OperationType.WRITE); - AtomicInteger attempts = new AtomicInteger(); - - Status status = writeRowInternal(row, attempts); - int retryAttempts = Math.max(0, attempts.get() - 1); - - if (status.isSuccess()) { - span.finishSuccess(retryAttempts); - } else { - span.finishError(retryAttempts, classifyStatus(status)); - logger.debug("write {} failed: {}", row.id(), status); - } - } - - /* - * Writes a single row without recording metrics. Used during prefill so - * the histogram of operation latencies is not polluted with bulk-load - * timings. - */ - private Status writeRowSilently(Row row) { - return writeRowInternal(row, new AtomicInteger()); - } - - private Status writeRowInternal(Row row, AtomicInteger attempts) { - ExecuteQuerySettings settings = ExecuteQuerySettings.newBuilder() - .withRequestTimeout(Duration.ofMillis(params.writeTimeoutMs())) - .build(); - return retryCtx.supplyStatus(session -> { - attempts.incrementAndGet(); - return session.createQuery( - String.format(WRITE_QUERY_TEMPLATE, tablePath), - TxMode.SERIALIZABLE_RW, - Params.of( - "$id", PrimitiveValue.newUint64(row.id()), - "$payload_str", PrimitiveValue.newText(row.payloadStr()), - "$payload_double", PrimitiveValue.newDouble(row.payloadDouble()), - "$payload_timestamp", PrimitiveValue.newTimestamp(row.payloadTimestamp()), - "$payload_hash", PrimitiveValue.newUint64(row.payloadHash()) - ), - settings - ).execute().thenApply(Result::getStatus); - }).join(); - } - - private static String classifyStatus(Status status) { - return "ydb/" + status.getCode().name().toLowerCase(); - } - - private static ThreadFactory namedThreadFactory(String prefix) { - AtomicInteger counter = new AtomicInteger(); - return r -> { - Thread t = new Thread(r, prefix + counter.getAndIncrement()); - t.setDaemon(true); - return t; - }; - } - - /* - * Final cleanup for an executor service. The graceful shutdown is done - * inline in {@link #run()} so deadlines line up with workload duration; - * this method is the safety net invoked from the {@code finally} block, - * forcing shutdown if the pool somehow survived. - */ - private static void forceShutdown(ExecutorService pool, String name) { - if (pool == null || pool.isTerminated()) { - return; - } - logger.warn("{} still active in cleanup, forcing shutdown", name); - pool.shutdownNow(); - try { - if (!pool.awaitTermination(5, TimeUnit.SECONDS)) { - logger.warn("{} did not terminate after shutdownNow", name); - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - } -} diff --git a/slo/src/main/java/tech/ydb/slo/kv/RowGenerator.java b/slo/src/main/java/tech/ydb/slo/kv/RowGenerator.java deleted file mode 100644 index 9daa0da..0000000 --- a/slo/src/main/java/tech/ydb/slo/kv/RowGenerator.java +++ /dev/null @@ -1,57 +0,0 @@ -package tech.ydb.slo.kv; - -import java.time.Instant; -import java.util.Base64; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.atomic.AtomicLong; - -/** - * Generates rows for the KV workload. - * - *

Each row gets a monotonically increasing {@code id} and a random payload. - * The {@code hash} column is computed server-side via - * {@code Digest::NumericHash($id)} at insert time, so it is not carried on - * the client. The format mirrors the SLO workloads in the Go and JS SDKs so - * the resulting tables are interchangeable. - */ -public final class RowGenerator { - private static final int MIN_PAYLOAD_LENGTH = 20; - private static final int MAX_PAYLOAD_LENGTH = 40; - - private final AtomicLong nextId; - - public RowGenerator(long startId) { - this.nextId = new AtomicLong(startId); - } - - /** - * Generates a new row with a fresh monotonically increasing id. - * @return a new row - */ - public Row generate() { - long id = nextId.getAndIncrement(); - return generate(id); - } - - /** - * Generates a row with an explicit id (used during prefill to control IDs). - * @param id - * @return a new row - */ - public static Row generate(long id) { - long payloadHash = ThreadLocalRandom.current().nextLong(); - double payloadDouble = ThreadLocalRandom.current().nextDouble(); - String payloadStr = randomPayloadString(); - Instant payloadTimestamp = Instant.now(); - - return new Row(id, payloadStr, payloadDouble, payloadTimestamp, payloadHash); - } - - private static String randomPayloadString() { - int length = MIN_PAYLOAD_LENGTH - + ThreadLocalRandom.current().nextInt(MAX_PAYLOAD_LENGTH - MIN_PAYLOAD_LENGTH + 1); - byte[] bytes = new byte[length]; - ThreadLocalRandom.current().nextBytes(bytes); - return Base64.getEncoder().withoutPadding().encodeToString(bytes); - } -}