From 7668c8934ad927fc29e4927cc0f80c1b740c45cb Mon Sep 17 00:00:00 2001 From: rhaist Date: Thu, 18 Jun 2026 07:09:11 +0000 Subject: [PATCH] Fix LSTM real-time safety (upstream #218) LSTMCell::get_hidden_state() returned an Eigen::VectorXf by value, which heap-allocated on every call. Since the LSTM runs sample-by-sample on the audio thread, this allocated once per inter-layer hop and once for the head on every single sample (e.g. ~48k * num_layers allocations/sec), making the process() hot path not real-time safe. Return a non-owning Eigen::Ref view into the internal state instead, and accept the same type in LSTMCell::process_() so the view binds without copying. Also split the gate matmul into a noalias() product plus a separate bias add so the matrix-vector product evaluates directly into the pre-allocated buffer. Add tools/test/test_lstm_realtime_safe.cpp, modeled on the existing WaveNet/FiLM real-time-safety tests, using the allocation-tracking harness to assert that LSTM::process() performs zero allocations and zero frees across single-layer, multi-layer, multi-channel, large-hidden, and consecutive-call cases. Verified the new tests catch the regression (64 allocs/64 frees over 64 frames before the fix; 0/0 after). --- NAM/lstm.cpp | 11 +- NAM/lstm.h | 15 ++- tools/run_tests.cpp | 8 ++ tools/test/test_lstm_realtime_safe.cpp | 146 +++++++++++++++++++++++++ 4 files changed, 173 insertions(+), 7 deletions(-) create mode 100644 tools/test/test_lstm_realtime_safe.cpp diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp index 32130690..6fb8bfe6 100644 --- a/NAM/lstm.cpp +++ b/NAM/lstm.cpp @@ -28,14 +28,16 @@ nam::lstm::LSTMCell::LSTMCell(const int input_size, const int hidden_size, std:: this->_c[i] = *(weights++); } -void nam::lstm::LSTMCell::process_(const Eigen::VectorXf& x) +void nam::lstm::LSTMCell::process_(const Eigen::Ref& x) { const long hidden_size = this->_get_hidden_size(); const long input_size = this->_get_input_size(); // Assign inputs this->_xh(Eigen::seq(0, input_size - 1)) = x; - // The matmul - this->_ifgo = this->_w * this->_xh + this->_b; + // The matmul. Use noalias() and a separate bias add so Eigen evaluates the + // matrix-vector product directly into the pre-allocated _ifgo without a temporary. + this->_ifgo.noalias() = this->_w * this->_xh; + this->_ifgo += this->_b; // Elementwise updates (apply nonlinearities here) const long i_offset = 0; const long f_offset = hidden_size; @@ -154,7 +156,8 @@ void nam::lstm::LSTM::_process_sample() // Compute output using head weight matrix and bias vector // _output = _head_weight * hidden_state + _head_bias - const Eigen::VectorXf& hidden_state = this->_layers[this->_layers.size() - 1].get_hidden_state(); + // Bind to an Eigen::Ref (a non-owning view) so reading the hidden state does not allocate. + const Eigen::Ref hidden_state = this->_layers[this->_layers.size() - 1].get_hidden_state(); // Compute matrix-vector product: (out_channels x hidden_size) * (hidden_size) = (out_channels) // Store directly in _output (which is already sized correctly in constructor) diff --git a/NAM/lstm.h b/NAM/lstm.h index 607c7d50..92f341dc 100644 --- a/NAM/lstm.h +++ b/NAM/lstm.h @@ -24,12 +24,21 @@ class LSTMCell LSTMCell(const int input_size, const int hidden_size, std::vector::iterator& weights); /// \brief Get the current hidden state - /// \return Hidden state vector - Eigen::VectorXf get_hidden_state() const { return this->_xh(Eigen::placeholders::lastN(this->_get_hidden_size())); }; + /// \return A non-owning view of the hidden state (the tail of the concatenated input/hidden vector). + /// + /// Returns an Eigen::Ref rather than a by-value Eigen::VectorXf so that reading the hidden state + /// does not heap-allocate. This is required for real-time safety: process_() is called once per + /// audio sample, and a by-value return would allocate on every layer hop and head evaluation. + Eigen::Ref get_hidden_state() const + { + return this->_xh.tail(this->_get_hidden_size()); + }; /// \brief Process a single input vector /// \param x Input vector - void process_(const Eigen::VectorXf& x); + /// + /// Accepts an Eigen::Ref so that a hidden-state view from another cell binds without copying. + void process_(const Eigen::Ref& x); private: // Parameters diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp index 61c7ba4a..31d0527b 100644 --- a/tools/run_tests.cpp +++ b/tools/run_tests.cpp @@ -28,6 +28,7 @@ #include "test/test_input_buffer_verification.cpp" #include "test/test_linear.cpp" #include "test/test_lstm.cpp" +#include "test/test_lstm_realtime_safe.cpp" #include "test/test_wavenet_configurable_gating.cpp" #include "test/test_noncontiguous_blocks.cpp" #include "test/test_extensible.cpp" @@ -244,6 +245,13 @@ int main() test_lstm::test_lstm_state_evolution(); test_lstm::test_lstm_no_layers(); + // LSTM real-time safety tests (issue #218) + test_lstm_realtime_safe::test_lstm_process_single_layer_realtime_safe(); + test_lstm_realtime_safe::test_lstm_process_multi_layer_realtime_safe(); + test_lstm_realtime_safe::test_lstm_process_multichannel_realtime_safe(); + test_lstm_realtime_safe::test_lstm_process_large_hidden_realtime_safe(); + test_lstm_realtime_safe::test_lstm_process_consecutive_calls_realtime_safe(); + // Gating activations tests test_gating_activations::TestGatingActivation::test_basic_functionality(); test_gating_activations::TestGatingActivation::test_with_custom_activations(); diff --git a/tools/test/test_lstm_realtime_safe.cpp b/tools/test/test_lstm_realtime_safe.cpp new file mode 100644 index 00000000..6b69f7f9 --- /dev/null +++ b/tools/test/test_lstm_realtime_safe.cpp @@ -0,0 +1,146 @@ +// Test to verify LSTM::process is real-time safe (no allocations/frees). +// +// Regression test for upstream issue #218 ("Fix LSTM real-time safety issues"). +// The LSTM processes audio sample-by-sample on the audio thread, so its hot path +// (LSTM::process -> LSTM::_process_sample -> LSTMCell::process_) must not allocate +// or free any memory. The historical offender was LSTMCell::get_hidden_state() +// returning an Eigen::VectorXf by value, which heap-allocated once per layer hop +// (and once for the head) on every single sample. + +#include +#include +#include +#include +#include + +#include "NAM/lstm.h" +#include "allocation_tracking.h" + +namespace test_lstm_realtime_safe +{ +using namespace allocation_tracking; + +// Build a self-consistent weights vector for an LSTM. +// Layout matches nam::lstm::LSTM / LSTMCell construction order: +// Per layer: weight matrix (4*hidden x (in+hidden), row-major), bias (4*hidden), +// initial hidden state (hidden), initial cell state (hidden). +// Head: weight matrix (out_channels x hidden, row-major), bias (out_channels). +static std::vector make_weights(int num_layers, int input_size, int hidden_size, int out_channels) +{ + std::vector weights; + for (int layer = 0; layer < num_layers; layer++) + { + const int layer_input_size = (layer == 0) ? input_size : hidden_size; + const int w_rows = 4 * hidden_size; + const int w_cols = layer_input_size + hidden_size; + for (int i = 0; i < w_rows * w_cols; i++) + weights.push_back(0.1f); // small weights for numerical stability + for (int i = 0; i < 4 * hidden_size; i++) + weights.push_back(0.0f); // bias + for (int i = 0; i < hidden_size; i++) + weights.push_back(0.0f); // initial hidden state + for (int i = 0; i < hidden_size; i++) + weights.push_back(0.0f); // initial cell state + } + for (int out_ch = 0; out_ch < out_channels; out_ch++) + for (int h = 0; h < hidden_size; h++) + weights.push_back(0.1f); // head weight + for (int out_ch = 0; out_ch < out_channels; out_ch++) + weights.push_back(0.0f); // head bias + return weights; +} + +// Core helper: build an LSTM with the given shape, prewarm it, then assert that +// processing a block of audio performs zero allocations and zero frees. +static void check_no_allocations(const int in_channels, const int out_channels, const int num_layers, + const int input_size, const int hidden_size, const int num_frames, + const char* test_name) +{ + const double sample_rate = 48000.0; + std::vector weights = make_weights(num_layers, input_size, hidden_size, out_channels); + nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, sample_rate); + + // Reset + prewarm before tracking so any one-time allocation happens up front. + lstm.Reset(sample_rate, num_frames); + lstm.prewarm(); + + // Allocate the audio buffers and pointer arrays before tracking starts. + std::vector> input_bufs(in_channels, std::vector(num_frames, 0.25f)); + std::vector> output_bufs(out_channels, std::vector(num_frames, 0.0f)); + std::vector input_ptrs(in_channels); + std::vector output_ptrs(out_channels); + for (int ch = 0; ch < in_channels; ch++) + input_ptrs[ch] = input_bufs[ch].data(); + for (int ch = 0; ch < out_channels; ch++) + output_ptrs[ch] = output_bufs[ch].data(); + + run_allocation_test_no_allocations( + nullptr, // no setup + [&]() { lstm.process(input_ptrs.data(), output_ptrs.data(), num_frames); }, + nullptr, // no teardown + test_name); + + // Sanity: output must be finite. + for (int ch = 0; ch < out_channels; ch++) + for (int i = 0; i < num_frames; i++) + assert(std::isfinite(output_bufs[ch][i])); +} + +// Single-layer, single-channel LSTM is real-time safe. +void test_lstm_process_single_layer_realtime_safe() +{ + check_no_allocations(/*in*/ 1, /*out*/ 1, /*layers*/ 1, /*input_size*/ 1, /*hidden*/ 8, /*frames*/ 64, + "LSTM process (1 layer, hidden=8)"); +} + +// Multi-layer LSTM exercises the inter-layer get_hidden_state() hops, which were +// the primary allocation source before the fix. +void test_lstm_process_multi_layer_realtime_safe() +{ + check_no_allocations(/*in*/ 1, /*out*/ 1, /*layers*/ 3, /*input_size*/ 1, /*hidden*/ 16, /*frames*/ 64, + "LSTM process (3 layers, hidden=16)"); +} + +// Multi-channel input/output path. +void test_lstm_process_multichannel_realtime_safe() +{ + check_no_allocations(/*in*/ 2, /*out*/ 2, /*layers*/ 2, /*input_size*/ 2, /*hidden*/ 8, /*frames*/ 32, + "LSTM process (2 layers, 2in/2out)"); +} + +// Larger hidden size, to make sure nothing scales into a per-call allocation. +void test_lstm_process_large_hidden_realtime_safe() +{ + check_no_allocations(/*in*/ 1, /*out*/ 1, /*layers*/ 2, /*input_size*/ 1, /*hidden*/ 64, /*frames*/ 128, + "LSTM process (2 layers, hidden=64)"); +} + +// Several consecutive process() calls (state persists across calls) must remain allocation-free. +void test_lstm_process_consecutive_calls_realtime_safe() +{ + const double sample_rate = 48000.0; + const int in_channels = 1, out_channels = 1, num_layers = 2, input_size = 1, hidden_size = 16, num_frames = 48; + std::vector weights = make_weights(num_layers, input_size, hidden_size, out_channels); + nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, sample_rate); + lstm.Reset(sample_rate, num_frames); + lstm.prewarm(); + + std::vector input(num_frames, 0.3f); + std::vector output(num_frames, 0.0f); + NAM_SAMPLE* input_ptrs[] = {input.data()}; + NAM_SAMPLE* output_ptrs[] = {output.data()}; + + run_allocation_test_no_allocations( + nullptr, + [&]() { + for (int call = 0; call < 8; call++) + lstm.process(input_ptrs, output_ptrs, num_frames); + }, + nullptr, + "LSTM process (8 consecutive calls)"); + + for (int i = 0; i < num_frames; i++) + assert(std::isfinite(output[i])); +} + +} // namespace test_lstm_realtime_safe