Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1866,7 +1866,7 @@ void GpuThread::run() {
uint64_t scaled_total_inputs = stage.total_inputs * stage.inputs_multiplier;
auto [scaled_input_speed, input_speed_unit] = scale_si(scaled_total_inputs / stage.total_time);
auto [scaled_output_speed, output_speed_unit] = scale_si(stage.total_outputs / stage.total_time);
std::printf("%-20s - %9.3f ms | %7.3f %% | %12" PRIu64 " -> %12" PRIu64
std::printf("%-20s - %9.3f ms | %7.3f %% | %16" PRIu64 " -> %12" PRIu64

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As stated before, same issue if you just increase the print interval enough (as it starts to affect the other columns too).

A better approach would be to dynamically change how the columns are padded based on the widest text that will go into the column, imo.

" | 1 in %11.3f | %7.3f %cips | %7.3f %cops\n",
stage.name.c_str(), stage.total_time * 1e3,
stage.total_time / host_total_time * 100.0,
Expand All @@ -1882,7 +1882,7 @@ void GpuThread::run() {
auto [scaled_input_speed, input_speed_unit] = scale_si(total_inputs / host_total_time);
auto [scaled_output_speed, output_speed_unit] = scale_si(total_outputs / host_total_time);
std::printf(
"total - %9.3f ms | %7.3f %% | %12" PRIu64
"total - %9.3f ms | %7.3f %% | %16" PRIu64
" -> %12" PRIu64 " | | %7.3f %cips | %7.3f %cops\n",
host_total_time * 1e3, kernel_total_time / host_total_time * 100.0,
total_inputs, total_outputs, scaled_input_speed, input_speed_unit,
Expand Down
27 changes: 21 additions & 6 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,19 @@
#include <cstring>
#include <cinttypes>
#include <cstdio>
#include <csignal>
#include <chrono>
#include <optional>
#include <charconv>
#include <algorithm>
#include <random>

static std::atomic_bool running{true};

static void signal_handler(int) {
running.store(false, std::memory_order_relaxed);
}

#ifdef NO_GPU
constexpr bool no_gpu = true;
#else
Expand Down Expand Up @@ -244,7 +251,10 @@ int main_inner(int argc, char **argv) {
}
#endif

for (size_t i = 0;; i++) {
std::signal(SIGINT, signal_handler);
std::signal(SIGTERM, signal_handler);

for (size_t i = 0; running.load(std::memory_order_relaxed); i++) {
if (threads != 0) {
std::lock_guard lock(cpu_outputs.mutex);
while (!cpu_outputs.queue.empty()) {
Expand All @@ -264,10 +274,20 @@ int main_inner(int argc, char **argv) {
std::this_thread::sleep_for(std::chrono::seconds(1));
}

std::printf("\nShutting down...\n");

#ifndef NO_GPU
for (auto &thread : gpu_threads) {
(*thread).stop();
}
std::printf("Waiting for GPU batches to finish...\n");
for (auto &thread : gpu_threads) {
(*thread).join();
}
{
uint64_t total_seeds_checked = seed_range.pos.load(std::memory_order_relaxed) - start_seed;
std::printf("Start seed: %" PRIi64 ", Total seeds checked: %" PRIu64 "\n", start_seed, total_seeds_checked);
}
#endif
#ifndef NO_CPU
for (auto &thread : cpu_threads) {
Expand All @@ -283,11 +303,6 @@ int main_inner(int argc, char **argv) {
}
#endif

#ifndef NO_GPU
for (auto &thread : gpu_threads) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you're going to change this, move the following NO_CPU block into the first NO_CPU block too for consistency.

(*thread).join();
}
#endif
#ifndef NO_CPU
for (auto &thread : cpu_threads) {
(*thread).join();
Expand Down
Loading