Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/auto_encoder_kl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -802,7 +802,7 @@ struct AutoEncoderKL : public VAE {
}

ggml_tensor* diffusion_to_vae_latents(ggml_context* work_ctx, ggml_tensor* latents) {
ggml_tensor* vae_latents = ggml_dup(work_ctx, latents);
ggml_tensor* vae_latents = ggml_dup_tensor(work_ctx, latents);
if (sd_version_is_flux2(version)) {
int channel_dim = 2;
std::vector<float> latents_mean_vec;
Expand Down Expand Up @@ -840,8 +840,8 @@ struct AutoEncoderKL : public VAE {
return vae_latents;
}

ggml_tensor* vae_to_diffuison_latents(ggml_context* work_ctx, ggml_tensor* latents) {
ggml_tensor* diffusion_latents = ggml_dup(work_ctx, latents);
ggml_tensor* vae_to_diffusion_latents(ggml_context* work_ctx, ggml_tensor* latents) {
ggml_tensor* diffusion_latents = ggml_dup_tensor(work_ctx, latents);
if (sd_version_is_flux2(version)) {
int channel_dim = 2;
std::vector<float> latents_mean_vec;
Expand Down
10 changes: 5 additions & 5 deletions src/ggml_extend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -841,7 +841,7 @@ __STATIC_INLINE__ void sd_tiling_non_square(ggml_tensor* input,
const bool circular_x,
const bool circular_y,
on_tile_process on_processing,
bool slient = false) {
bool silent = false) {
output = ggml_set_f32(output, 0);

int input_width = (int)input->ne[0];
Expand Down Expand Up @@ -871,7 +871,7 @@ __STATIC_INLINE__ void sd_tiling_non_square(ggml_tensor* input,
float tile_overlap_factor_y;
sd_tiling_calc_tiles(num_tiles_y, tile_overlap_factor_y, small_height, p_tile_size_y, tile_overlap_factor, circular_y);

if (!slient) {
if (!silent) {
LOG_DEBUG("num tiles : %d, %d ", num_tiles_x, num_tiles_y);
LOG_DEBUG("optimal overlap : %f, %f (targeting %f)", tile_overlap_factor_x, tile_overlap_factor_y, tile_overlap_factor);
}
Expand Down Expand Up @@ -905,7 +905,7 @@ __STATIC_INLINE__ void sd_tiling_non_square(ggml_tensor* input,
params.mem_buffer = nullptr;
params.no_alloc = false;

if (!slient) {
if (!silent) {
LOG_DEBUG("tile work buffer size: %.2f MB", params.mem_size / 1024.f / 1024.f);
}

Expand All @@ -920,7 +920,7 @@ __STATIC_INLINE__ void sd_tiling_non_square(ggml_tensor* input,
ggml_tensor* input_tile = ggml_new_tensor_4d(tiles_ctx, GGML_TYPE_F32, input_tile_size_x, input_tile_size_y, input->ne[2], input->ne[3]);
ggml_tensor* output_tile = ggml_new_tensor_4d(tiles_ctx, GGML_TYPE_F32, output_tile_size_x, output_tile_size_y, output->ne[2], output->ne[3]);
int num_tiles = num_tiles_x * num_tiles_y;
if (!slient) {
if (!silent) {
LOG_DEBUG("processing %i tiles", num_tiles);
pretty_progress(0, num_tiles, 0.0f);
}
Expand Down Expand Up @@ -973,7 +973,7 @@ __STATIC_INLINE__ void sd_tiling_non_square(ggml_tensor* input,
}
last_x = false;
}
if (!slient) {
if (!silent) {
if (tile_count < num_tiles) {
pretty_progress(num_tiles, num_tiles, last_time);
}
Expand Down
31 changes: 17 additions & 14 deletions src/stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,7 @@ static void log_sample_cache_summary(const SampleCacheRuntime& runtime, size_t t
class StableDiffusionGGML {
public:
ggml_backend_t backend = nullptr; // general backend
ggml_backend_t cpu_backend = nullptr;
ggml_backend_t clip_backend = nullptr;
ggml_backend_t control_net_backend = nullptr;
ggml_backend_t vae_backend = nullptr;
Expand Down Expand Up @@ -531,14 +532,8 @@ class StableDiffusionGGML {
StableDiffusionGGML() = default;

~StableDiffusionGGML() {
if (clip_backend != backend) {
ggml_backend_free(clip_backend);
}
if (control_net_backend != backend) {
ggml_backend_free(control_net_backend);
}
if (vae_backend != backend) {
ggml_backend_free(vae_backend);
if (cpu_backend != backend) {
ggml_backend_free(cpu_backend);
}
ggml_backend_free(backend);
}
Expand Down Expand Up @@ -596,8 +591,16 @@ class StableDiffusionGGML {

if (!backend) {
LOG_DEBUG("Using CPU backend");
backend = ggml_backend_cpu_init();
cpu_backend = ggml_backend_cpu_init();
backend = cpu_backend;
}
}

ggml_backend_t get_cpu_backend() {
if (cpu_backend == nullptr) {
cpu_backend = ggml_backend_cpu_init();
}
return cpu_backend;
}

std::shared_ptr<RNG> get_rng(rng_type_t rng_type) {
Expand Down Expand Up @@ -804,7 +807,7 @@ class StableDiffusionGGML {
clip_backend = backend;
if (clip_on_cpu && !ggml_backend_is_cpu(backend)) {
LOG_INFO("CLIP: Using CPU backend");
clip_backend = ggml_backend_cpu_init();
clip_backend = get_cpu_backend();
}
if (sd_version_is_sd3(version)) {
cond_stage_model = std::make_shared<SD3CLIPEmbedder>(clip_backend,
Expand Down Expand Up @@ -973,7 +976,7 @@ class StableDiffusionGGML {

if (sd_ctx_params->keep_vae_on_cpu && !ggml_backend_is_cpu(backend)) {
LOG_INFO("VAE Autoencoder: Using CPU backend");
vae_backend = ggml_backend_cpu_init();
vae_backend = get_cpu_backend();
} else {
vae_backend = backend;
}
Expand Down Expand Up @@ -1066,7 +1069,7 @@ class StableDiffusionGGML {
ggml_backend_t controlnet_backend = nullptr;
if (sd_ctx_params->keep_control_net_on_cpu && !ggml_backend_is_cpu(backend)) {
LOG_DEBUG("ControlNet: Using CPU backend");
controlnet_backend = ggml_backend_cpu_init();
controlnet_backend = get_cpu_backend();
} else {
controlnet_backend = backend;
}
Expand Down Expand Up @@ -2441,7 +2444,7 @@ class StableDiffusionGGML {
ggml_tensor* encode_first_stage(ggml_context* work_ctx, ggml_tensor* x) {
ggml_tensor* latents = encode_to_vae_latents(work_ctx, x);
if (version != VERSION_SD1_PIX2PIX) {
latents = first_stage_model->vae_to_diffuison_latents(work_ctx, latents);
latents = first_stage_model->vae_to_diffusion_latents(work_ctx, latents);
}
return latents;
}
Expand Down Expand Up @@ -3780,7 +3783,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
}
});

init_latent = sd_ctx->sd->first_stage_model->vae_to_diffuison_latents(work_ctx, init_latent);
init_latent = sd_ctx->sd->first_stage_model->vae_to_diffusion_latents(work_ctx, init_latent);

int64_t t2 = ggml_time_ms();
LOG_INFO("encode_first_stage completed, taking %" PRId64 " ms", t2 - t1);
Expand Down
4 changes: 2 additions & 2 deletions src/tae.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ struct TinyImageAutoEncoder : public VAE {
return ggml_ext_dup_and_cpy_tensor(work_ctx, latents);
}

ggml_tensor* vae_to_diffuison_latents(ggml_context* work_ctx, ggml_tensor* latents) {
ggml_tensor* vae_to_diffusion_latents(ggml_context* work_ctx, ggml_tensor* latents) {
return ggml_ext_dup_and_cpy_tensor(work_ctx, latents);
}

Expand Down Expand Up @@ -633,7 +633,7 @@ struct TinyVideoAutoEncoder : public VAE {
return ggml_ext_dup_and_cpy_tensor(work_ctx, latents);
}

ggml_tensor* vae_to_diffuison_latents(ggml_context* work_ctx, ggml_tensor* latents) {
ggml_tensor* vae_to_diffusion_latents(ggml_context* work_ctx, ggml_tensor* latents) {
return ggml_ext_dup_and_cpy_tensor(work_ctx, latents);
}

Expand Down
4 changes: 2 additions & 2 deletions src/vae.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ struct VAE : public GGMLRunner {

virtual ggml_tensor* vae_output_to_latents(ggml_context* work_ctx, ggml_tensor* vae_output, std::shared_ptr<RNG> rng) = 0;
virtual ggml_tensor* diffusion_to_vae_latents(ggml_context* work_ctx, ggml_tensor* latents) = 0;
virtual ggml_tensor* vae_to_diffuison_latents(ggml_context* work_ctx, ggml_tensor* latents) = 0;
virtual ggml_tensor* vae_to_diffusion_latents(ggml_context* work_ctx, ggml_tensor* latents) = 0;
virtual void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors, const std::string prefix) = 0;
virtual void set_conv2d_scale(float scale) { SD_UNUSED(scale); };
};
Expand Down Expand Up @@ -221,7 +221,7 @@ struct FakeVAE : public VAE {
return ggml_ext_dup_and_cpy_tensor(work_ctx, latents);
}

ggml_tensor* vae_to_diffuison_latents(ggml_context* work_ctx, ggml_tensor* latents) {
ggml_tensor* vae_to_diffusion_latents(ggml_context* work_ctx, ggml_tensor* latents) {
return ggml_ext_dup_and_cpy_tensor(work_ctx, latents);
}

Expand Down
6 changes: 3 additions & 3 deletions src/wan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1160,7 +1160,7 @@ namespace WAN {
}

ggml_tensor* diffusion_to_vae_latents(ggml_context* work_ctx, ggml_tensor* latents) {
ggml_tensor* vae_latents = ggml_dup(work_ctx, latents);
ggml_tensor* vae_latents = ggml_dup_tensor(work_ctx, latents);
int channel_dim = sd_version_is_wan(version) ? 3 : 2;
std::vector<float> latents_mean_vec;
std::vector<float> latents_std_vec;
Expand Down Expand Up @@ -1191,8 +1191,8 @@ namespace WAN {
return vae_latents;
}

ggml_tensor* vae_to_diffuison_latents(ggml_context* work_ctx, ggml_tensor* latents) {
ggml_tensor* diffusion_latents = ggml_dup(work_ctx, latents);
ggml_tensor* vae_to_diffusion_latents(ggml_context* work_ctx, ggml_tensor* latents) {
ggml_tensor* diffusion_latents = ggml_dup_tensor(work_ctx, latents);
int channel_dim = sd_version_is_wan(version) ? 3 : 2;
std::vector<float> latents_mean_vec;
std::vector<float> latents_std_vec;
Expand Down
Loading