From 0933b53fdfaca05611bcb302d047680b501bb871 Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Mon, 4 May 2026 11:26:20 +0200 Subject: [PATCH 01/43] update: add dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 213e0d57..8e9e71d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ dependencies = [ "numba>=0.60.0", "numpy>=2.0.0,<2.6.0", "pandas>=2.2.2,<4.0.0", + "panel>=1.8.10", "plotly>=5.0.0,<6.0.0", "scikit-learn>=1.4.2,<1.9.0", "scipy>=1.13.0", From 7fad5e81210d7d13bce33fa741a5ae7e5ce301da Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Mon, 4 May 2026 11:26:37 +0200 Subject: [PATCH 02/43] update: add import --- shapash/explainer/smart_explainer.py | 82 ++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/shapash/explainer/smart_explainer.py b/shapash/explainer/smart_explainer.py index 3132bfc4..5a41d79b 100644 --- a/shapash/explainer/smart_explainer.py +++ b/shapash/explainer/smart_explainer.py @@ -10,6 +10,8 @@ import numpy as np import pandas as pd +import panel as pn + import shapash.explainer.smart_predictor from shapash.backend import BaseBackend, get_backend_cls_from_name from shapash.backend.shap_backend import get_shap_interaction_values @@ -1806,6 +1808,86 @@ def generate_report( shutil.rmtree(working_dir) raise e + def generate_report_with_panel( + self, + output_file=None, + project_info_file=None, + x_train=None, + y_train=None, + y_test=None, + title_story=None, + title_description=None, + metrics=None, + max_points=200, + display_interaction_plot=False, + nb_top_interactions=5, + ): + """ + Generate an interactive report using Panel to summarize model explainability. + + This method creates a simple interactive report using the Panel library, + allowing users to explore key insights about the model, its predictions, + and feature contributions directly in a Jupyter notebook or Python environment. + + The report includes: + - A title and description section. + - A summary of the model’s predictions and feature contributions. + - Interactive widgets to filter and explore the explanations. + + Parameters + ---------- + output_file : str, optional + Path to save the generated report as an HTML file. + If `None`, the report will be displayed directly in the current environment. + project_info_file : str, optional + Path to a YAML file containing project metadata (not currently used in this method). + x_train : pandas.DataFrame, optional + Training dataset used to fit the model (not currently used in this method). + y_train : pandas.Series or pandas.DataFrame, optional + Target values corresponding to `x_train` (not currently used in this method). + y_test : pandas.Series or pandas.DataFrame, optional + Target values for the test dataset (not currently used in this method). + title_story : str, optional + Title displayed at the top of the report. + title_description : str, optional + Short descriptive text displayed below the main title. + metrics : list of dict, optional + List of metrics to compute and display in the performance section (not currently used in this method). + max_points : int, optional, default=200 + Maximum number of points displayed in contribution plots (not currently used in this method). + display_interaction_plot : bool, optional, default=False + If True, includes interaction plots in the report (not currently used in this method). + nb_top_interactions : int, optional, default=5 + Number of top feature interactions to include in the report (not currently used in this method). + + Returns + ------- + None + Displays the interactive report in the current environment. + + Example + ------- + >>> xpl.generate_raport_with_panel( + ... title_story="Model Explainability Report", + ... title_description="Explore predictions and feature contributions interactively." + ... ) + """ + if title_story is not None: + self.title_story = title_story + if title_description is not None: + self.title_description = title_description + + title = pn.pane.Markdown(f"# {self.title_story}\n\n{self.title_description}") + + summary = self.to_pandas(proba=False, features_to_hide=None, threshold=None, positive=None, max_contrib=None) + summary_panel = pn.widgets.DataFrame(summary, width=800, height=400) + report = pn.Column(title, summary_panel) + + if output_file: + report.save(output_file) + else: + report.show() + def _local_pred(self, index, label=None): """ Compute the model prediction or probability for a single observation. From fe71b68516d63e02febc753ca11d1b1a4c2ab740 Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Mon, 4 May 2026 11:26:59 +0200 Subject: [PATCH 03/43] fix: pandas object --- tutorial/generate_report/shapash_report_example.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tutorial/generate_report/shapash_report_example.py b/tutorial/generate_report/shapash_report_example.py index 4734a587..0ea80f23 100644 --- a/tutorial/generate_report/shapash_report_example.py +++ b/tutorial/generate_report/shapash_report_example.py @@ -19,7 +19,11 @@ if __name__ == "__main__": house_df, house_dict = data_loading("house_prices") y_df = house_df["SalePrice"] - X_df = house_df[house_df.columns.difference(["SalePrice"])] + X_df = house_df[house_df.columns.difference(["SalePrice"])].copy() + + for col in X_df.columns: + if not pd.api.types.is_numeric_dtype(X_df[col]): + X_df[col] = X_df[col].astype(object) categorical_features = [col for col in X_df.columns if X_df[col].dtype == "object"] @@ -62,3 +66,5 @@ }, ], ) + + xpl.generate_report_with_panel(output_file=os.path.join(cur_dir, "output", "report_with_panel.html"), title_story="House prices report with panel", title_description="This document is a data science report of the kaggle house prices tutorial project. It was generated using the Shapash library and Panel.") From a46d82a57f0b160f33c38cd68fabaec7b71579a0 Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Tue, 5 May 2026 11:19:12 +0200 Subject: [PATCH 04/43] dev: first demo --- shapash/explainer/smart_explainer.py | 1 - shapash/report/demo.py | 61 ++++ shapash/report/report_config.yml | 139 ++++++++ shapash/report/report_config_extended.yml | 154 +++++++++ shapash/report/report_engine.py | 387 ++++++++++++++++++++++ 5 files changed, 741 insertions(+), 1 deletion(-) create mode 100644 shapash/report/demo.py create mode 100644 shapash/report/report_config.yml create mode 100644 shapash/report/report_config_extended.yml create mode 100644 shapash/report/report_engine.py diff --git a/shapash/explainer/smart_explainer.py b/shapash/explainer/smart_explainer.py index 5a41d79b..bb40e7b5 100644 --- a/shapash/explainer/smart_explainer.py +++ b/shapash/explainer/smart_explainer.py @@ -9,7 +9,6 @@ import numpy as np import pandas as pd - import panel as pn import shapash.explainer.smart_predictor diff --git a/shapash/report/demo.py b/shapash/report/demo.py new file mode 100644 index 00000000..196f2806 --- /dev/null +++ b/shapash/report/demo.py @@ -0,0 +1,61 @@ +import os +import yaml +from pathlib import Path +from report_engine import ReportBase, generate_report, PALETTE + +# ── Path logic ─────────────────────────────────────────────────────────────── +# This finds the absolute path to the directory containing demo.py +HERE = Path(__file__).resolve().parent + +# Define absolute paths for all files +config_in = HERE / "report_config.yml" +config_out_ext = HERE / "report_config_extended.yml" +report_base_out = HERE / "report_base.html" +report_ext_out = HERE / "report_extended.html" + +# ───────────────────────────────────────────────────────────────────────────── +# Example 1 — use ReportBase as-is +# ───────────────────────────────────────────────────────────────────────────── + +base_report = ReportBase() +# Convert Path objects to strings for the engine +generate_report(base_report, str(config_in), str(report_base_out)) +print(f"✅ Saved: {report_base_out}") + + +# ───────────────────────────────────────────────────────────────────────────── +# Example 2 — subclass to add a new block type +# ───────────────────────────────────────────────────────────────────────────── + + +class ExtendedReport(ReportBase): + def block_progress_bar(self, title: str = "", items: list | None = None, color: str = "blue") -> str: + items = items or [] + c = PALETTE.get(color, PALETTE["blue"]) + bars = "" + for item in items: + pct = max(0, min(100, int(item.get("pct", 0)))) + bars += f""" +
+
+ {item.get("label", "")} + {pct}% +
+
+
+
+
+
""" + h2 = f'

{title}

' if title else "" + return f'
{h2}{bars}
' + + def block_pie_chart(self, title: str = "", data: dict | None = None, color: str = "blue") -> str: + return f'

{title}

Tarte à la crème

' + + +extended_report = ExtendedReport() +generate_report(extended_report, str(config_out_ext), str(report_ext_out)) +print(f"✅ Saved: {report_ext_out} (+ block_progress_bar)") diff --git a/shapash/report/report_config.yml b/shapash/report/report_config.yml new file mode 100644 index 00000000..dbe829f4 --- /dev/null +++ b/shapash/report/report_config.yml @@ -0,0 +1,139 @@ +# report_config.yml +# ───────────────────────────────────────────────────────────────────────────── +# Each entry in `sections` maps to a block_ method on the report class. +# `params` are passed as keyword arguments to that method. +# Add, remove, or reorder sections freely — that is the whole point. +# ───────────────────────────────────────────────────────────────────────────── + +sections: + + # ── Page title ────────────────────────────────────────────────────────────── + - type: header + params: + title: "House Prices — Model Report" + subtitle: "Prototype · block-based report engine" + + # ── Top-level callout ──────────────────────────────────────────────────────── + - type: callout + params: + icon: "⚠" + body: > + This is a prototype report. Blocks are defined in Python and + composed here in YAML. Add, remove or reorder sections without + touching any Python code. + color: orange + + # ── Divider ────────────────────────────────────────────────────────────────── + - type: divider + params: + label: "Project" + + # ── Project metadata ───────────────────────────────────────────────────────── + - type: key_value + params: + title: "Project metadata" + color: purple + items: + Author: "Alice Martin" + Date: "2024-01-15" + Dataset: "Kaggle — House Prices" + Task: "Regression" + Target: "SalePrice" + + # ── Free-text introduction ──────────────────────────────────────────────────── + - type: text + params: + title: "Objective" + body: > + The goal of this project is to predict the final sale price of residential + homes in Ames, Iowa, using 79 explanatory variables describing almost every + aspect of those homes. + color: blue + + # ── Divider ────────────────────────────────────────────────────────────────── + - type: divider + params: + label: "Model" + + # ── Model summary ───────────────────────────────────────────────────────────── + - type: key_value + params: + title: "Model configuration" + color: blue + items: + Class: "RandomForestRegressor" + Library: "scikit-learn 1.4" + n_estimators: 50 + max_depth: "None (unlimited)" + random_state: 1 + + # ── Key metrics as badges ───────────────────────────────────────────────────── + - type: badge_row + params: + title: "Performance snapshot" + badges: + - label: MAE + value: "18 432 $" + color: green + - label: RMSE + value: "27 891 $" + color: blue + - label: R² + value: "0.874" + color: purple + - label: Train size + value: "1 095 rows" + color: gray + - label: Test size + value: "365 rows" + color: gray + + # ── Divider ────────────────────────────────────────────────────────────────── + - type: divider + params: + label: "Explainability" + + # ── Top features — plain text block (no chart yet) ──────────────────────────── + - type: text + params: + title: "Top contributing features" + body: > + Based on mean absolute SHAP values computed on the test set, + the three most influential features are OverallQual (overall material + and finish quality), GrLivArea (above-grade living area in sq ft), + and TotalBsmtSF (total basement area in sq ft). + color: green + + # ── Feature importance as key/value table ───────────────────────────────────── + - type: key_value + params: + title: "Mean |SHAP| — top 5 features" + color: green + items: + OverallQual: "0.412" + GrLivArea: "0.289" + TotalBsmtSF: "0.174" + GarageCars: "0.121" + YearBuilt: "0.098" + + # ── Divider ────────────────────────────────────────────────────────────────── + - type: divider + params: + label: "Notes" + + # ── Custom block example (commented out — shows the extension point) ────────── + # - type: custom + # function: "my_module.render_scatter_plot" + # params: + # x_col: GrLivArea + # y_col: SalePrice + + # ── Closing note ───────────────────────────────────────────────────────────── + - type: callout + params: + icon: "💡" + body: > + To add a new section, define a block_my_section() method in a + subclass of ReportBase — or point a type: custom entry at any + importable Python function. No other changes needed. + color: purple \ No newline at end of file diff --git a/shapash/report/report_config_extended.yml b/shapash/report/report_config_extended.yml new file mode 100644 index 00000000..e711e46f --- /dev/null +++ b/shapash/report/report_config_extended.yml @@ -0,0 +1,154 @@ +# report_config_extended.yml +# ───────────────────────────────────────────────────────────────────────────── +# Extended example with an extra custom block and test pie_chart entry. +# Formatted to match report_config.yml style. +# ───────────────────────────────────────────────────────────────────────────── + +sections: + + # ── Page title ────────────────────────────────────────────────────────────── + - type: header + params: + title: "House Prices — Model Report" + subtitle: "Prototype · block-based report engine" + + # ── Top-level callout ──────────────────────────────────────────────────────── + - type: callout + params: + icon: "⚠" + body: > + This is a prototype report. Blocks are defined in Python and + composed here in YAML. Add, remove or reorder sections without + touching any Python code. + color: orange + + # ── Divider ────────────────────────────────────────────────────────────────── + - type: divider + params: + label: "Project" + + # ── Project metadata ───────────────────────────────────────────────────────── + - type: key_value + params: + title: "Project metadata" + color: purple + items: + Author: "Alice Martin" + Dataset: "Kaggle — House Prices" + Date: "2024-01-15" + Target: "SalePrice" + Task: "Regression" + + # ── Free-text introduction ─────────────────────────────────────────────────── + - type: text + params: + title: "Objective" + body: > + The goal of this project is to predict the final sale price of residential + homes in Ames, Iowa, using 79 explanatory variables describing almost every + aspect of those homes. + color: blue + + # ── Divider ────────────────────────────────────────────────────────────────── + - type: divider + params: + label: "Model" + + # ── Model summary ───────────────────────────────────────────────────────────── + - type: key_value + params: + title: "Model configuration" + color: blue + items: + Class: "RandomForestRegressor" + Library: "scikit-learn 1.4" + max_depth: "None (unlimited)" + n_estimators: 50 + random_state: 1 + + # ── Key metrics as badges ───────────────────────────────────────────────────── + - type: badge_row + params: + title: "Performance snapshot" + badges: + - label: MAE + value: "18 432 $" + color: green + - label: RMSE + value: "27 891 $" + color: blue + - label: R² + value: "0.874" + color: purple + - label: Train size + value: "1 095 rows" + color: gray + - label: Test size + value: "365 rows" + color: gray + + # ── Divider ────────────────────────────────────────────────────────────────── + - type: divider + params: + label: "Explainability" + + # ── Top features — plain text block (no chart yet) ────────────────────────── + - type: text + params: + title: "Top contributing features" + body: > + Based on mean absolute SHAP values computed on the test set, the three + most influential features are OverallQual (overall material and finish quality), + GrLivArea (above-grade living area in sq ft), and TotalBsmtSF (total basement + area in sq ft). + color: green + + # ── Feature importance as key/value table ──────────────────────────────────── + - type: key_value + params: + title: "Mean |SHAP| — top 5 features" + color: green + items: + GarageCars: "0.121" + GrLivArea: "0.289" + OverallQual: "0.412" + TotalBsmtSF: "0.174" + YearBuilt: "0.098" + + # ── Divider ────────────────────────────────────────────────────────────────── + - type: divider + params: + label: "Notes" + + # ── Extended block example ─────────────────────────────────────────────────── + - type: progress_bar + params: + title: "SHAP coverage by feature group" + color: green + items: + - label: "Overall quality features" + pct: 85 + - label: "Area & size features" + pct: 70 + - label: "Garage features" + pct: 45 + - label: "Basement features" + pct: 38 + - label: "Year / age features" + pct: 22 + + # ── Closing note ───────────────────────────────────────────────────────────── + - type: callout + params: + icon: "💡" + body: > + To add a new section, define a block_my_section() method in a + subclass of ReportBase — or point a type: custom entry at any + importable Python function. No other changes needed. + color: purple + + # ── Test block (kept as-is) ────────────────────────────────────────────────── + - type: pie_chart + params: + title: "test" + color: purple diff --git a/shapash/report/report_engine.py b/shapash/report/report_engine.py new file mode 100644 index 00000000..1d3ca357 --- /dev/null +++ b/shapash/report/report_engine.py @@ -0,0 +1,387 @@ +""" +report_engine.py — block-based report engine + legacy generation helpers. + +Shapash Theme Version: White background, gold accents, and sidebar navigation. +""" + +from __future__ import annotations + +import importlib +import logging +import os +import re +from pathlib import Path + +import pandas as pd +import papermill as pm +import yaml +from nbconvert import HTMLExporter + +from shapash.utils.utils import get_project_root + +logger = logging.getLogger(__name__) + + +# ───────────────────────────────────────────────────────────────────────────── +# Colour palette shared by all built-in blocks (Updated for Shapash Light Theme) +# ───────────────────────────────────────────────────────────────────────────── + +PALETTE = { + "gold": {"bg": "#ffffff", "border": "#ffbb00", "title": "#ccac00", "text": "#333333"}, + "blue": {"bg": "#ffffff", "border": "#2255aa", "title": "#2255aa", "text": "#333333"}, + "gray": {"bg": "#ffffff", "border": "#eeeeee", "title": "#666666", "text": "#666666"}, + "orange": {"bg": "#fff9e6", "border": "#ffbb00", "title": "#cc8833", "text": "#444444"}, +} + + +# ───────────────────────────────────────────────────────────────────────────── +# ReportBase +# ───────────────────────────────────────────────────────────────────────────── + + +class ReportBase: + """ + Base class for block-based HTML reports. + Methods named block_ return HTML strings for specific sections. + """ + + def render_block(self, block_cfg: dict) -> str: + """Dispatch one YAML block entry to the matching block_* method.""" + block_type = block_cfg.get("type", "") + params = block_cfg.get("params", {}) + method = getattr(self, f"block_{block_type}", None) + + if method is None: + if block_type == "custom": + return self._render_custom(block_cfg) + logger.warning("Unknown block type '%s' — skipped.", block_type) + return "" + + try: + return method(**params) + except Exception as exc: + logger.error("Block '%s' raised: %s", block_type, exc) + return _error_html(block_type, exc) + + def _render_custom(self, block_cfg: dict) -> str: + """Call an arbitrary importable function.""" + func_path = block_cfg.get("function", "") + params = block_cfg.get("params", {}) + try: + mod_path, fn_name = func_path.rsplit(".", 1) + fn = getattr(importlib.import_module(mod_path), fn_name) + return fn(self, **params) + except Exception as exc: + logger.error("Custom block '%s' raised: %s", func_path, exc) + return _error_html(func_path, exc) + + # ── Built-in blocks (Shapash Styled) ────────────────────────────────────── + + def block_header(self, title: str = "Report", subtitle: str = "") -> str: + """Large page title with centered text.""" + sub = f'

{subtitle}

' if subtitle else "" + return f'

{title}

{sub}
' + + def block_text( + self, + title: str = "", + body: str = "", + color: str = "gray", + ) -> str: + """Standard section with a title and paragraph.""" + h2 = f'

{title}

' if title else "" + return f'
{h2}

{body}

' + + def block_key_value( + self, + title: str = "", + items: dict | None = None, + color: str = "gold", + ) -> str: + """Two-column key/value metadata table.""" + items = items or {} + rows = "".join(f'{k} :{v}' for k, v in items.items()) + h2 = f'

{title}

' if title else "" + return f'
{h2}{rows}
' + + def block_badge_row( + self, + title: str = "", + badges: list | None = None, + ) -> str: + """A row of metrics or badges.""" + badges = badges or [] + pills = "" + for b in badges: + c = PALETTE.get(b.get("color", "gray"), PALETTE["gray"]) + pills += ( + f'' + f'{b.get("label", "")}' + f'{b.get("value", "")}' + ) + h2 = f'

{title}

' if title else "" + return f'
{h2}
{pills}
' + + def block_callout( + self, + body: str = "", + color: str = "gold", + icon: str = "", + ) -> str: + """The distinct Shapash left-border callout box.""" + return f'

{body}

' + + def block_divider(self, label: str = "") -> str: + """Thin light rule for separating sections.""" + return '
' + + +# ───────────────────────────────────────────────────────────────────────────── +# New declarative pipeline +# ───────────────────────────────────────────────────────────────────────────── + + +def generate_report( + report: ReportBase, + config_file: str, + output_file: str, +) -> None: + """Render a ReportBase instance to an HTML file driven by a YAML config.""" + cfg_path = Path(config_file).resolve() + print(f"Loading config → {cfg_path}") + if not cfg_path.exists(): + raise FileNotFoundError(f"Config not found: {cfg_path}") + + with cfg_path.open() as f: + cfg = yaml.safe_load(f) + + sections = cfg.get("sections") + if not sections: + raise ValueError("YAML config must have a top-level 'sections' list.") + + logger.info("Rendering %d block(s)…", len(sections)) + + rendered_blocks = [] + nav_links = [] + + # Process each block and build the sidebar + for block_cfg in sections: + block_html = report.render_block(block_cfg) + params = block_cfg.get("params", {}) + title = params.get("title") + block_type = block_cfg.get("type") + + # If the block has a title (and isn't the main header), add it to the sidebar + if title and block_type != "header": + section_id = re.sub(r"[^a-z0-9]+", "-", title.lower()).strip("-") + + wrapped_html = f'
{block_html}
' + + nav_links.append(f'{title}') + else: + wrapped_html = f'
{block_html}
' + + rendered_blocks.append(wrapped_html) + + # Join the navigation links into a single string + sidebar_html = "\n".join(nav_links) + + out_path = Path(output_file) + out_path.parent.mkdir(parents=True, exist_ok=True) + + logo_path = Path(__file__).resolve().parents[2] / "docs" / "assets" / "images" / "svg" / "shapash-github.svg" + logo_src = os.path.relpath(logo_path, out_path.parent).replace(os.sep, "/") if logo_path.exists() else "" + + # Pass BOTH the body and the sidebar to the HTML template + out_path.write_text(_html_page("\n".join(rendered_blocks), sidebar_html, logo_src), encoding="utf-8") + logger.info("Report saved → %s", output_file) + + +# ───────────────────────────────────────────────────────────────────────────── +# Legacy pipeline +# ───────────────────────────────────────────────────────────────────────────── + + +def execute_report( + working_dir: str, + explainer: object, + project_info_file: str, + x_train: pd.DataFrame | None = None, + y_train: pd.DataFrame | None = None, + y_test: pd.Series | pd.DataFrame | None = None, + config: dict | None = None, + notebook_path: str | None = None, + kernel_name: str | None = None, +) -> None: + """Run the legacy notebook-based report generation pipeline. + + The function serializes the explainer and optional train/test datasets into + ``working_dir``, then executes the report notebook with Papermill. + """ + if config is None: + config = {} + explainer.save(path=os.path.join(working_dir, "smart_explainer.pickle")) + if x_train is not None: + x_train.to_csv(os.path.join(working_dir, "x_train.csv")) + if y_train is not None: + y_train.to_csv(os.path.join(working_dir, "y_train.csv")) + if y_test is not None: + y_test.to_csv(os.path.join(working_dir, "y_test.csv")) + + root_path = get_project_root() + if not notebook_path: + notebook_path = os.path.join(root_path, "shapash", "report", "base_report.ipynb") + + pm.execute_notebook( + notebook_path, + os.path.join(working_dir, "base_report.ipynb"), + parameters=dict(dir_path=working_dir, project_info_file=project_info_file, config=config), + kernel_name=kernel_name, + ) + + +def export_and_save_report(working_dir: str, output_file: str) -> None: + """Export the executed legacy notebook in ``working_dir`` to an HTML file.""" + root_path = get_project_root() + exporter = HTMLExporter( + exclude_input=True, + extra_template_basedirs=[os.path.join(root_path, "shapash", "report", "template")], + template_name="custom", + exclude_anchor_links=True, + ) + body, _ = exporter.from_filename(filename=os.path.join(working_dir, "base_report.ipynb")) + with open(output_file, "w") as f: + f.write(body) + + +# ───────────────────────────────────────────────────────────────────────────── +# Private helpers (Shapash UI Styling) +# ───────────────────────────────────────────────────────────────────────────── + + +def _error_html(block_id: str, exc: Exception) -> str: + return ( + f'
' + f'

⚠ Block "{block_id}" failed

' + f'
{exc}
' + ) + + +def _html_page(body: str, sidebar_html: str = "", logo_src: str = "") -> str: + brand_html = ( + ( + f'" + ) + if logo_src + else '' + ) + + return f""" + + + + + Shapash Report + + + + +
+ {body} +
+ + + +""" From f3a65763d8080a50a52683bf9d5c8ca4dbd1e9cc Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Thu, 7 May 2026 09:59:48 +0200 Subject: [PATCH 05/43] delete: remove legacy --- shapash/report/base_report.ipynb | 184 ----------- shapash/report/demo.py | 61 ---- shapash/report/generation.py | 94 ------ .../generate_report/utils/custom_report.ipynb | 290 ------------------ 4 files changed, 629 deletions(-) delete mode 100644 shapash/report/base_report.ipynb delete mode 100644 shapash/report/demo.py delete mode 100644 shapash/report/generation.py delete mode 100644 tutorial/generate_report/utils/custom_report.ipynb diff --git a/shapash/report/base_report.ipynb b/shapash/report/base_report.ipynb deleted file mode 100644 index fa72fd8c..00000000 --- a/shapash/report/base_report.ipynb +++ /dev/null @@ -1,184 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "filled-favorite", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "coordinate-shower", - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "dir_path = \"\"\n", - "project_info_file = \"\"\n", - "config = dict()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "atlantic-fever", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import warnings\n", - "\n", - "warnings.filterwarnings(\"ignore\")\n", - "from shapash import SmartExplainer\n", - "from shapash.report.project_report import ProjectReport\n", - "from shapash.report.common import load_saved_df\n", - "\n", - "xpl = SmartExplainer.load(os.path.join(dir_path, \"smart_explainer.pickle\"))\n", - "\n", - "x_train = load_saved_df(os.path.join(dir_path, \"x_train.csv\"))\n", - "y_train = load_saved_df(os.path.join(dir_path, \"y_train.csv\"))\n", - "y_test = load_saved_df(os.path.join(dir_path, \"y_test.csv\"))\n", - "\n", - "report = ProjectReport(\n", - " explainer=xpl, project_info_file=project_info_file, x_train=x_train, y_train=y_train, y_test=y_test, config=config\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "altered-medicare", - "metadata": {}, - "outputs": [], - "source": [ - "report.display_title_description()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "specified-vietnamese", - "metadata": {}, - "outputs": [], - "source": [ - "report.display_project_information()" - ] - }, - { - "cell_type": "markdown", - "id": "steady-transfer", - "metadata": {}, - "source": [ - "## Model analysis" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "serial-bulgaria", - "metadata": {}, - "outputs": [], - "source": [ - "report.display_model_analysis()" - ] - }, - { - "cell_type": "markdown", - "id": "beginning-silicon", - "metadata": {}, - "source": [ - "## Dataset analysis" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "planned-mayor", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "report.display_dataset_analysis()" - ] - }, - { - "cell_type": "markdown", - "id": "attempted-bikini", - "metadata": {}, - "source": [ - "## Model explainability" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "secondary-dividend", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "report.display_model_explainability()" - ] - }, - { - "cell_type": "markdown", - "id": "australian-photograph", - "metadata": {}, - "source": [ - "## Model performance" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "breeding-techno", - "metadata": {}, - "outputs": [], - "source": [ - "report.display_model_performance()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "arbitrary-baker", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "celltoolbar": "Tags", - "hide_input": false, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/shapash/report/demo.py b/shapash/report/demo.py deleted file mode 100644 index 196f2806..00000000 --- a/shapash/report/demo.py +++ /dev/null @@ -1,61 +0,0 @@ -import os -import yaml -from pathlib import Path -from report_engine import ReportBase, generate_report, PALETTE - -# ── Path logic ─────────────────────────────────────────────────────────────── -# This finds the absolute path to the directory containing demo.py -HERE = Path(__file__).resolve().parent - -# Define absolute paths for all files -config_in = HERE / "report_config.yml" -config_out_ext = HERE / "report_config_extended.yml" -report_base_out = HERE / "report_base.html" -report_ext_out = HERE / "report_extended.html" - -# ───────────────────────────────────────────────────────────────────────────── -# Example 1 — use ReportBase as-is -# ───────────────────────────────────────────────────────────────────────────── - -base_report = ReportBase() -# Convert Path objects to strings for the engine -generate_report(base_report, str(config_in), str(report_base_out)) -print(f"✅ Saved: {report_base_out}") - - -# ───────────────────────────────────────────────────────────────────────────── -# Example 2 — subclass to add a new block type -# ───────────────────────────────────────────────────────────────────────────── - - -class ExtendedReport(ReportBase): - def block_progress_bar(self, title: str = "", items: list | None = None, color: str = "blue") -> str: - items = items or [] - c = PALETTE.get(color, PALETTE["blue"]) - bars = "" - for item in items: - pct = max(0, min(100, int(item.get("pct", 0)))) - bars += f""" -
-
- {item.get("label", "")} - {pct}% -
-
-
-
-
-
""" - h2 = f'

{title}

' if title else "" - return f'
{h2}{bars}
' - - def block_pie_chart(self, title: str = "", data: dict | None = None, color: str = "blue") -> str: - return f'

{title}

Tarte à la crème

' - - -extended_report = ExtendedReport() -generate_report(extended_report, str(config_out_ext), str(report_ext_out)) -print(f"✅ Saved: {report_ext_out} (+ block_progress_bar)") diff --git a/shapash/report/generation.py b/shapash/report/generation.py deleted file mode 100644 index 24d9930f..00000000 --- a/shapash/report/generation.py +++ /dev/null @@ -1,94 +0,0 @@ -""" -Report generation helper module. -""" - -import os - -import pandas as pd -import papermill as pm -from nbconvert import HTMLExporter - -from shapash.utils.utils import get_project_root - - -def execute_report( - working_dir: str, - explainer: object, - project_info_file: str, - x_train: pd.DataFrame | None = None, - y_train: pd.DataFrame | None = None, - y_test: pd.Series | pd.DataFrame | None = None, - config: dict | None = None, - notebook_path: str | None = None, - kernel_name: str | None = None, -): - """ - Executes the base_report.ipynb notebook and saves the results in working_dir. - - Parameters - ---------- - working_dir : str - Directory in which will be saved the executed notebook. - explainer : shapash.explainer.smart_explainer.SmartExplainer - Compiled shapash explainer. - project_info_file : str - Path to the file used to display some information about the project in the report. - x_train : pd.DataFrame - DataFrame used for training the model. - y_train : pd.Series or pd.DataFrame - Series of labels in the training set. - y_test : pd.Series or pd.DataFrame - Series of labels in the test set. - config : dict, optional - Report configuration options. - notebook_path : str, optional - Path to the notebook used to generate the report. If None, the Shapash base report - notebook will be used. - kernel_name : str, optional - Name of the kernel used to generate the report. This parameter can be usefull if - you have multiple jupyter kernels and that the method does not use the right kernel - by default. - """ - if config is None: - config = {} - explainer.save(path=os.path.join(working_dir, "smart_explainer.pickle")) - if x_train is not None: - x_train.to_csv(os.path.join(working_dir, "x_train.csv")) - if y_train is not None: - y_train.to_csv(os.path.join(working_dir, "y_train.csv")) - if y_test is not None: - y_test.to_csv(os.path.join(working_dir, "y_test.csv")) - root_path = get_project_root() - if notebook_path is None or notebook_path == "": - notebook_path = os.path.join(root_path, "shapash", "report", "base_report.ipynb") - - pm.execute_notebook( - notebook_path, - os.path.join(working_dir, "base_report.ipynb"), - parameters=dict(dir_path=working_dir, project_info_file=project_info_file, config=config), - kernel_name=kernel_name, - ) - - -def export_and_save_report(working_dir: str, output_file: str): - """ - Exports a previously executed notebook and saves it as a static HTML file. - - Parameters - ---------- - working_dir : str - Path to the directory containing the executed notebook. - output_file : str - Path to the html file that will be created. - """ - - exporter = HTMLExporter( - exclude_input=True, - extra_template_basedirs=[os.path.join(get_project_root(), "shapash", "report", "template")], - template_name="custom", - exclude_anchor_links=True, - ) - (body, resources) = exporter.from_filename(filename=os.path.join(working_dir, "base_report.ipynb")) - - with open(output_file, "w") as file: - file.write(body) diff --git a/tutorial/generate_report/utils/custom_report.ipynb b/tutorial/generate_report/utils/custom_report.ipynb deleted file mode 100644 index 29838ec0..00000000 --- a/tutorial/generate_report/utils/custom_report.ipynb +++ /dev/null @@ -1,290 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "threatened-gamma", - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "# These parameter are replaced by papermill during execution but can be used to work interactively on your report\n", - "# You need to use the generate_report once with the parameter working_dir='../working' \n", - "# to use the following values. This way the objects used below are created in the directory.\n", - "dir_path = '../working' \n", - "project_info_file = '../utils/project_info.yml'\n", - "config = dict(\n", - " title_story=\"House prices report\",\n", - " title_description=\"\"\"This document is a data science report of the kaggle house prices tutorial project. \n", - " It was generated using the Shapash library.\"\"\",\n", - " metrics=[\n", - " {\n", - " 'path': 'sklearn.metrics.mean_absolute_error',\n", - " 'name': 'Mean absolute error', \n", - " },\n", - " {\n", - " 'path': 'sklearn.metrics.mean_squared_error',\n", - " 'name': 'Mean squared error',\n", - " }\n", - " ]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "taken-tomorrow", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import pandas as pd\n", - "from shapash import SmartExplainer\n", - "from shapash.report.project_report import ProjectReport\n", - "from shapash.report.common import load_saved_df\n", - "\n", - "xpl = SmartExplainer.load(os.path.join(dir_path, 'smart_explainer.pickle'))\n", - "\n", - "x_train = load_saved_df(os.path.join(dir_path, 'x_train.csv'))\n", - "y_train = load_saved_df(os.path.join(dir_path, 'y_train.csv'))\n", - "y_test = load_saved_df(os.path.join(dir_path, 'y_test.csv'))\n", - "\n", - "report = ProjectReport(\n", - " explainer=xpl, \n", - " project_info_file=project_info_file, \n", - " x_train=x_train, \n", - " y_train=y_train,\n", - " y_test=y_test, \n", - " config=config\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "peaceful-frame", - "metadata": {}, - "outputs": [], - "source": [ - "report.display_title_description()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "decreased-philadelphia", - "metadata": {}, - "outputs": [], - "source": [ - "report.display_project_information()" - ] - }, - { - "cell_type": "markdown", - "id": "fourth-confusion", - "metadata": {}, - "source": [ - "## Model information" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "union-person", - "metadata": {}, - "outputs": [], - "source": [ - "report.display_model_analysis()" - ] - }, - { - "cell_type": "markdown", - "id": "regional-centre", - "metadata": {}, - "source": [ - "## Dataset analysis" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "rational-breakfast", - "metadata": {}, - "outputs": [], - "source": [ - "report.display_dataset_analysis(multivariate_analysis=False)" - ] - }, - { - "cell_type": "markdown", - "id": "fitted-uncle", - "metadata": {}, - "source": [ - "### Relashionship with target variable" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "front-employment", - "metadata": {}, - "outputs": [], - "source": [ - "import seaborn as sns\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "collectible-upgrade", - "metadata": {}, - "outputs": [], - "source": [ - "df_train = report.x_train_pre\n", - "y_train = report.y_train\n", - "df_train['SalePrice'] = y_train" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "monthly-reply", - "metadata": {}, - "outputs": [], - "source": [ - "f, ax = plt.subplots(figsize=(8, 6))\n", - "fig = sns.boxplot(x='OverallQual', y=\"SalePrice\", data=df_train)\n", - "fig.axis(ymin=0, ymax=800000)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "subtle-amazon", - "metadata": {}, - "source": [ - "### Relashionship between training variables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "packed-vermont", - "metadata": {}, - "outputs": [], - "source": [ - "corr_matrix = df_train.corr()\n", - "f, ax = plt.subplots(figsize=(16, 12))\n", - "sns.heatmap(corr_matrix, vmax=.8, square=True, cmap=\"YlGnBu\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "romance-division", - "metadata": {}, - "source": [ - "## Model explainability" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "accessible-favorite", - "metadata": {}, - "outputs": [], - "source": [ - "# Note : Plotly graphs may not show correctly in notebook but still work in html output file.\n", - "report.display_model_explainability()" - ] - }, - { - "cell_type": "markdown", - "id": "unknown-transaction", - "metadata": {}, - "source": [ - "## Model performance" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ignored-career", - "metadata": {}, - "outputs": [], - "source": [ - "report.display_model_performance()" - ] - }, - { - "cell_type": "markdown", - "id": "noble-seafood", - "metadata": {}, - "source": [ - "**The graph below represents y_pred vs y_test :**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "filled-challenge", - "metadata": {}, - "outputs": [], - "source": [ - "y_test = report.y_test\n", - "y_pred = report.y_pred\n", - "\n", - "sns.scatterplot(x=y_test, y=y_pred)\n", - "plt.xlabel('y_test')\n", - "plt.ylabel('y_pred')\n", - "plt.title('y_pred vs y_test')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "efficient-badge", - "metadata": {}, - "source": [ - "You can add as many graphs, text, or other cells as you want.\n", - "\n", - "The code will not be displayed. Only the markdown and output of the cells will be shown on the generated html file." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "passive-peoples", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "celltoolbar": "Tags", - "hide_input": false, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 1d6e1efbe4c580801a5a37b0269d02ed424130d8 Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Thu, 7 May 2026 10:00:31 +0200 Subject: [PATCH 06/43] change name --- shapash/report/report_compat.py | 5 + shapash/report/report_engine.py | 387 -------------------------------- 2 files changed, 5 insertions(+), 387 deletions(-) create mode 100644 shapash/report/report_compat.py delete mode 100644 shapash/report/report_engine.py diff --git a/shapash/report/report_compat.py b/shapash/report/report_compat.py new file mode 100644 index 00000000..217f618f --- /dev/null +++ b/shapash/report/report_compat.py @@ -0,0 +1,5 @@ +"""Compatibility exports for the smart_report package.""" + +from shapash.report.smart_report import PALETTE, ReportBase + +__all__ = ["PALETTE", "ReportBase"] diff --git a/shapash/report/report_engine.py b/shapash/report/report_engine.py deleted file mode 100644 index 1d3ca357..00000000 --- a/shapash/report/report_engine.py +++ /dev/null @@ -1,387 +0,0 @@ -""" -report_engine.py — block-based report engine + legacy generation helpers. - -Shapash Theme Version: White background, gold accents, and sidebar navigation. -""" - -from __future__ import annotations - -import importlib -import logging -import os -import re -from pathlib import Path - -import pandas as pd -import papermill as pm -import yaml -from nbconvert import HTMLExporter - -from shapash.utils.utils import get_project_root - -logger = logging.getLogger(__name__) - - -# ───────────────────────────────────────────────────────────────────────────── -# Colour palette shared by all built-in blocks (Updated for Shapash Light Theme) -# ───────────────────────────────────────────────────────────────────────────── - -PALETTE = { - "gold": {"bg": "#ffffff", "border": "#ffbb00", "title": "#ccac00", "text": "#333333"}, - "blue": {"bg": "#ffffff", "border": "#2255aa", "title": "#2255aa", "text": "#333333"}, - "gray": {"bg": "#ffffff", "border": "#eeeeee", "title": "#666666", "text": "#666666"}, - "orange": {"bg": "#fff9e6", "border": "#ffbb00", "title": "#cc8833", "text": "#444444"}, -} - - -# ───────────────────────────────────────────────────────────────────────────── -# ReportBase -# ───────────────────────────────────────────────────────────────────────────── - - -class ReportBase: - """ - Base class for block-based HTML reports. - Methods named block_ return HTML strings for specific sections. - """ - - def render_block(self, block_cfg: dict) -> str: - """Dispatch one YAML block entry to the matching block_* method.""" - block_type = block_cfg.get("type", "") - params = block_cfg.get("params", {}) - method = getattr(self, f"block_{block_type}", None) - - if method is None: - if block_type == "custom": - return self._render_custom(block_cfg) - logger.warning("Unknown block type '%s' — skipped.", block_type) - return "" - - try: - return method(**params) - except Exception as exc: - logger.error("Block '%s' raised: %s", block_type, exc) - return _error_html(block_type, exc) - - def _render_custom(self, block_cfg: dict) -> str: - """Call an arbitrary importable function.""" - func_path = block_cfg.get("function", "") - params = block_cfg.get("params", {}) - try: - mod_path, fn_name = func_path.rsplit(".", 1) - fn = getattr(importlib.import_module(mod_path), fn_name) - return fn(self, **params) - except Exception as exc: - logger.error("Custom block '%s' raised: %s", func_path, exc) - return _error_html(func_path, exc) - - # ── Built-in blocks (Shapash Styled) ────────────────────────────────────── - - def block_header(self, title: str = "Report", subtitle: str = "") -> str: - """Large page title with centered text.""" - sub = f'

{subtitle}

' if subtitle else "" - return f'

{title}

{sub}
' - - def block_text( - self, - title: str = "", - body: str = "", - color: str = "gray", - ) -> str: - """Standard section with a title and paragraph.""" - h2 = f'

{title}

' if title else "" - return f'
{h2}

{body}

' - - def block_key_value( - self, - title: str = "", - items: dict | None = None, - color: str = "gold", - ) -> str: - """Two-column key/value metadata table.""" - items = items or {} - rows = "".join(f'{k} :{v}' for k, v in items.items()) - h2 = f'

{title}

' if title else "" - return f'
{h2}{rows}
' - - def block_badge_row( - self, - title: str = "", - badges: list | None = None, - ) -> str: - """A row of metrics or badges.""" - badges = badges or [] - pills = "" - for b in badges: - c = PALETTE.get(b.get("color", "gray"), PALETTE["gray"]) - pills += ( - f'' - f'{b.get("label", "")}' - f'{b.get("value", "")}' - ) - h2 = f'

{title}

' if title else "" - return f'
{h2}
{pills}
' - - def block_callout( - self, - body: str = "", - color: str = "gold", - icon: str = "", - ) -> str: - """The distinct Shapash left-border callout box.""" - return f'

{body}

' - - def block_divider(self, label: str = "") -> str: - """Thin light rule for separating sections.""" - return '
' - - -# ───────────────────────────────────────────────────────────────────────────── -# New declarative pipeline -# ───────────────────────────────────────────────────────────────────────────── - - -def generate_report( - report: ReportBase, - config_file: str, - output_file: str, -) -> None: - """Render a ReportBase instance to an HTML file driven by a YAML config.""" - cfg_path = Path(config_file).resolve() - print(f"Loading config → {cfg_path}") - if not cfg_path.exists(): - raise FileNotFoundError(f"Config not found: {cfg_path}") - - with cfg_path.open() as f: - cfg = yaml.safe_load(f) - - sections = cfg.get("sections") - if not sections: - raise ValueError("YAML config must have a top-level 'sections' list.") - - logger.info("Rendering %d block(s)…", len(sections)) - - rendered_blocks = [] - nav_links = [] - - # Process each block and build the sidebar - for block_cfg in sections: - block_html = report.render_block(block_cfg) - params = block_cfg.get("params", {}) - title = params.get("title") - block_type = block_cfg.get("type") - - # If the block has a title (and isn't the main header), add it to the sidebar - if title and block_type != "header": - section_id = re.sub(r"[^a-z0-9]+", "-", title.lower()).strip("-") - - wrapped_html = f'
{block_html}
' - - nav_links.append(f'{title}') - else: - wrapped_html = f'
{block_html}
' - - rendered_blocks.append(wrapped_html) - - # Join the navigation links into a single string - sidebar_html = "\n".join(nav_links) - - out_path = Path(output_file) - out_path.parent.mkdir(parents=True, exist_ok=True) - - logo_path = Path(__file__).resolve().parents[2] / "docs" / "assets" / "images" / "svg" / "shapash-github.svg" - logo_src = os.path.relpath(logo_path, out_path.parent).replace(os.sep, "/") if logo_path.exists() else "" - - # Pass BOTH the body and the sidebar to the HTML template - out_path.write_text(_html_page("\n".join(rendered_blocks), sidebar_html, logo_src), encoding="utf-8") - logger.info("Report saved → %s", output_file) - - -# ───────────────────────────────────────────────────────────────────────────── -# Legacy pipeline -# ───────────────────────────────────────────────────────────────────────────── - - -def execute_report( - working_dir: str, - explainer: object, - project_info_file: str, - x_train: pd.DataFrame | None = None, - y_train: pd.DataFrame | None = None, - y_test: pd.Series | pd.DataFrame | None = None, - config: dict | None = None, - notebook_path: str | None = None, - kernel_name: str | None = None, -) -> None: - """Run the legacy notebook-based report generation pipeline. - - The function serializes the explainer and optional train/test datasets into - ``working_dir``, then executes the report notebook with Papermill. - """ - if config is None: - config = {} - explainer.save(path=os.path.join(working_dir, "smart_explainer.pickle")) - if x_train is not None: - x_train.to_csv(os.path.join(working_dir, "x_train.csv")) - if y_train is not None: - y_train.to_csv(os.path.join(working_dir, "y_train.csv")) - if y_test is not None: - y_test.to_csv(os.path.join(working_dir, "y_test.csv")) - - root_path = get_project_root() - if not notebook_path: - notebook_path = os.path.join(root_path, "shapash", "report", "base_report.ipynb") - - pm.execute_notebook( - notebook_path, - os.path.join(working_dir, "base_report.ipynb"), - parameters=dict(dir_path=working_dir, project_info_file=project_info_file, config=config), - kernel_name=kernel_name, - ) - - -def export_and_save_report(working_dir: str, output_file: str) -> None: - """Export the executed legacy notebook in ``working_dir`` to an HTML file.""" - root_path = get_project_root() - exporter = HTMLExporter( - exclude_input=True, - extra_template_basedirs=[os.path.join(root_path, "shapash", "report", "template")], - template_name="custom", - exclude_anchor_links=True, - ) - body, _ = exporter.from_filename(filename=os.path.join(working_dir, "base_report.ipynb")) - with open(output_file, "w") as f: - f.write(body) - - -# ───────────────────────────────────────────────────────────────────────────── -# Private helpers (Shapash UI Styling) -# ───────────────────────────────────────────────────────────────────────────── - - -def _error_html(block_id: str, exc: Exception) -> str: - return ( - f'
' - f'

⚠ Block "{block_id}" failed

' - f'
{exc}
' - ) - - -def _html_page(body: str, sidebar_html: str = "", logo_src: str = "") -> str: - brand_html = ( - ( - f'" - ) - if logo_src - else '' - ) - - return f""" - - - - - Shapash Report - - - - -
- {body} -
- - - -""" From 0b5a7b3cdcc4a6a8b6c654a812ed407b6c268852 Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Thu, 7 May 2026 10:01:02 +0200 Subject: [PATCH 07/43] re organized code --- shapash/report/smart_report/__init__.py | 6 + shapash/report/smart_report/assets.py | 176 ++++++++++++ shapash/report/smart_report/blocks.py | 276 +++++++++++++++++++ shapash/report/smart_report/core.py | 94 +++++++ shapash/report/smart_report/layout.py | 140 ++++++++++ shapash/report/smart_report/panel_support.py | 55 ++++ shapash/report/smart_report/validation.py | 84 ++++++ 7 files changed, 831 insertions(+) create mode 100644 shapash/report/smart_report/__init__.py create mode 100644 shapash/report/smart_report/assets.py create mode 100644 shapash/report/smart_report/blocks.py create mode 100644 shapash/report/smart_report/core.py create mode 100644 shapash/report/smart_report/layout.py create mode 100644 shapash/report/smart_report/panel_support.py create mode 100644 shapash/report/smart_report/validation.py diff --git a/shapash/report/smart_report/__init__.py b/shapash/report/smart_report/__init__.py new file mode 100644 index 00000000..ac78168a --- /dev/null +++ b/shapash/report/smart_report/__init__.py @@ -0,0 +1,6 @@ +"""Public entry point for block-based HTML smart reports.""" + +from shapash.report.smart_report.blocks import PALETTE +from shapash.report.smart_report.core import ReportBase + +__all__ = ["PALETTE", "ReportBase"] diff --git a/shapash/report/smart_report/assets.py b/shapash/report/smart_report/assets.py new file mode 100644 index 00000000..29bdcbc6 --- /dev/null +++ b/shapash/report/smart_report/assets.py @@ -0,0 +1,176 @@ +"""Static CSS and JavaScript fragments for HTML report rendering.""" + +REPORT_STYLES = """ + :root { --shapash-gold: #ffbb00; --text-main: #333; --text-light: #777; } + *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } + html { scroll-behavior: smooth; } + body, .report-shell { + background: #fdfdfd; color: var(--text-main); + font-family: 'Helvetica Neue', Arial, sans-serif; + } + .report-shell { + display: flex; min-height: 100vh; width: 100%; + } + .sidebar { + width: 240px; background: #fff; border-right: 1px solid #eee; + position: fixed; height: 100vh; padding: 30px 20px; + overflow-y: auto; + box-shadow: inset -8px 0 12px -12px rgba(0, 0, 0, 0.45); + } + .sidebar-brand { + margin-bottom: 40px; display: flex; align-items: center; gap: 10px; + color: var(--shapash-gold); font-size: 18px; font-weight: bold; + } + .sidebar-brand-logo { display: block; width: 34px; height: 34px; flex: 0 0 auto; } + .sidebar-brand-text { line-height: 1.2; } + .nav-item { + color: var(--text-light); padding: 10px 0; display: block; + text-decoration: none; font-size: 13px; transition: 0.2s; + } + .nav-item:hover { color: var(--text-main); } + .nav-item.active { + color: #551a8b; font-weight: bold; border-left: 3px solid #551a8b; padding-left: 10px; + } + .nav-group { margin-bottom: 2px; } + .nav-group-title { + color: var(--text-main); font-size: 13px; font-weight: 500; + padding: 10px 0; display: block; text-decoration: none; transition: 0.2s; + } + .nav-group-title:hover { color: #551a8b; } + .nav-group-title.active { + color: #551a8b; font-weight: bold; border-left: 3px solid #551a8b; padding-left: 10px; + } + .nav-children { display: block; } + .nav-child { + color: var(--text-light); padding: 6px 0 6px 16px; display: block; + text-decoration: none; font-size: 12px; transition: 0.2s; + } + .nav-child:hover { color: var(--text-main); } + .nav-child.active { + color: #551a8b; font-weight: 600; border-left: 3px solid #551a8b; padding-left: 13px; + } + .container { margin-left: 240px; width: 100%; padding: 60px 80px 60vh; max-width: 1200px; } + .main-header { text-align: center; margin-bottom: 60px; } + .main-header h1 { font-size: 2.4rem; font-weight: 500; color: #000; margin-bottom: 20px; } + .section-title { font-size: 1.6rem; color: #000; margin: 40px 0 20px; font-weight: 700; } + .section-block { margin-bottom: 30px; } + .content-block { margin-bottom: 30px; line-height: 1.6; font-size: 14px; } + .shapash-callout { border-left: 4px solid var(--shapash-gold); background: #fff; padding: 15px 25px; margin: 30px 0; color: #333; line-height: 1.6; font-size: 15px; } + .kv-table, table.dataframe { + width: 100%; + border-collapse: separate; + border-spacing: 0; + margin: 12px 0 24px; + background: #fff; + border: 1px solid #ececec; + border-radius: 12px; + overflow: hidden; + box-shadow: 0 8px 24px rgba(0, 0, 0, 0.04); + } + .kv-table thead th, table.dataframe thead th { + background: #fafafa; + color: #000; + font-weight: 700; + text-align: left; + border-bottom: 1px solid #ececec; + padding: 12px 16px; + } + .kv-table tbody th, table.dataframe tbody th { + color: #000; + font-weight: 600; + text-align: left; + background: #fcfcfc; + } + .kv-table td, .kv-table th, table.dataframe td, table.dataframe th { + padding: 12px 16px; + border-bottom: 1px solid #f1f1f1; + vertical-align: top; + } + .kv-table tbody tr:last-child td, + .kv-table tbody tr:last-child th, + table.dataframe tbody tr:last-child td, + table.dataframe tbody tr:last-child th { + border-bottom: 0; + } + .kv-table tbody tr:nth-child(even) td, + table.dataframe tbody tr:nth-child(even) td { + background: #fdfdfd; + } + .kv-key { font-weight: 700; width: 220px; color: #000; white-space: nowrap; } + .kv-key-label, .kv-key-sep { white-space: nowrap; } + .kv-val { color: var(--text-main); } + .badge { display: inline-block; padding: 6px 14px; border: 1px solid #eee; border-radius: 4px; font-size: 12px; background: #fff; } + .shapash-divider { border-bottom: 1px solid #eee; margin: 50px 0; } + .scroll-section { scroll-margin-top: 40px; } + .panel-plot { width: 100%; overflow-x: auto; } + .panel-plot .bk-root, .panel-plot .plotly-graph-div { width: 100%; max-width: 100%; } + @media (max-width: 900px) { + .sidebar { display: none; } + .container { margin-left: 0; padding: 30px 40px 40vh; } + } +""" + +REPORT_SCRIPT = """ + +""" diff --git a/shapash/report/smart_report/blocks.py b/shapash/report/smart_report/blocks.py new file mode 100644 index 00000000..ec3d3d28 --- /dev/null +++ b/shapash/report/smart_report/blocks.py @@ -0,0 +1,276 @@ +"""Block implementations and report data helpers for smart reports.""" + +from __future__ import annotations + +import pandas as pd + +from shapash.plots.plot_evaluation_metrics import plot_confusion_matrix +from shapash.plots.plot_univariate import plot_distribution +from shapash.report.data_analysis import perform_global_dataframe_analysis +from shapash.report.smart_report.panel_support import render_plotly_pane_html +from shapash.report.smart_report.validation import stats_to_table +from shapash.utils.transform import apply_postprocessing, handle_categorical_missing, inverse_transform +from shapash.utils.utils import compute_sorted_variables_interactions_list_indices + +PALETTE = { + "gold": {"bg": "#ffffff", "border": "#ffbb00", "title": "#ccac00", "text": "#333333"}, + "blue": {"bg": "#ffffff", "border": "#2255aa", "title": "#2255aa", "text": "#333333"}, + "gray": {"bg": "#ffffff", "border": "#eeeeee", "title": "#666666", "text": "#666666"}, + "orange": {"bg": "#fff9e6", "border": "#ffbb00", "title": "#cc8833", "text": "#444444"}, +} + + +class ReportBlockMixin: + """Reusable block rendering and data preparation helpers.""" + + def block_header(self, title: str = "Report", subtitle: str = "") -> str: + sub = f'

{subtitle}

' if subtitle else "" + return f'

{title}

{sub}
' + + def block_text(self, title: str = "", body: str = "", color: str = "gray") -> str: + h2 = f'

{title}

' if title else "" + return f'
{h2}

{body}

' + + def block_key_value(self, title: str = "", items: dict | None = None, color: str = "gold") -> str: + items = items or {} + rows = self._render_key_value_rows(items) + h2 = f'

{title}

' if title else "" + return f'
{h2}{rows}
' + + def block_badge_row(self, title: str = "", badges: list | None = None) -> str: + badges = badges or [] + pills = "" + for badge in badges: + palette = PALETTE.get(badge.get("color", "gray"), PALETTE["gray"]) + pills += ( + f'' + f'{badge.get("label", "")}' + f'{badge.get("value", "")}' + ) + h2 = f'

{title}

' if title else "" + return f'
{h2}
{pills}
' + + def block_callout(self, body: str = "", color: str = "gold", icon: str = "") -> str: + return f'

{body}

' + + def block_divider(self, label: str = "") -> str: + return '
' + + def block_global_analysis(self, title: str = "", color: str = "gray") -> str: + self._require_train_test_data("global_analysis") + test_stats = perform_global_dataframe_analysis(self.x_init) + train_stats = perform_global_dataframe_analysis(self.x_train_pre) if self.x_train_pre is not None else None + stats_table = stats_to_table( + test_stats=test_stats, + train_stats=train_stats, + names=["Prediction dataset", "Training dataset"], + ) + table_html = stats_table.to_html(classes="kv-table", border=0) + return self._wrap_section_content(title, table_html) + + def block_feature_distribution( + self, + feature: str, + title: str = "", + color: str = "blue", + dataset_split: str = "data_train_test", + prediction_label: str = "test", + training_label: str = "train", + width: int = 700, + height: int = 500, + ) -> str: + self._require_train_test_data("feature_distribution") + if feature not in self.df_train_test.columns: + raise ValueError(f"Unknown feature '{feature}' for feature_distribution block.") + + fig = plot_distribution( + df_all=self.df_train_test, + col=feature, + hue=dataset_split, + colors_dict=self._feature_distribution_colors(), + width=width, + height=height, + ) + return self._wrap_section_content(title or self._feature_label(feature), self._plotly_html(fig)) + + def block_correlations_plot( + self, + title: str = "", + color: str = "blue", + max_features: int = 20, + width: int | None = None, + height: int = 500, + ) -> str: + self._require_train_test_data("correlations_plot") + explainer = self._require_explainer("correlations_plot") + resolved_width = width or (900 if len(self.df_train_test["data_train_test"].unique()) > 1 else 500) + fig = explainer.plot.correlations_plot( + self.df_train_test, + optimized=True, + facet_col="data_train_test", + max_features=max_features, + width=resolved_width, + height=height, + ) + return self._wrap_section_content(title, self._plotly_html(fig)) + + def block_feature_importance(self, title: str = "", color: str = "green", label=None) -> str: + explainer = self._require_explainer("feature_importance") + fig = explainer.plot.features_importance(label=label) + return self._wrap_section_content(title, self._plotly_html(fig)) + + def block_contribution_plot( + self, + feature: str, + title: str = "", + color: str = "green", + label=None, + max_points: int | None = None, + ) -> str: + explainer = self._require_explainer("contribution_plot") + fig = explainer.plot.contribution_plot(feature, label=label, max_points=max_points or self.max_points) + for trace in fig.data: + if trace.type == "bar": + trace.marker.color = "lightgrey" + return self._wrap_section_content(title or self._feature_label(feature), self._plotly_html(fig)) + + def block_interactions_plot( + self, + title: str = "", + color: str = "green", + col1: str | None = None, + col2: str | None = None, + max_points: int | None = None, + ) -> str: + explainer = self._require_explainer("interactions_plot") + feature_one, feature_two = self._resolve_interaction_pair(col1, col2) + fig = explainer.plot.interactions_plot(col1=feature_one, col2=feature_two, max_points=max_points or self.max_points) + resolved_title = title or f"{self._feature_label(feature_one)} / {self._feature_label(feature_two)}" + return self._wrap_section_content(resolved_title, self._plotly_html(fig)) + + def block_target_distribution( + self, + title: str = "", + color: str = "blue", + width: int = 700, + height: int = 500, + ) -> str: + self._require_explainer("target_distribution") + if self.y_test is None or self.y_pred is None: + raise ValueError("target_distribution block requires y_test and predicted values from the explainer.") + + target_name = self.target_name or "target" + df_target = pd.concat( + [ + pd.DataFrame({target_name: self.y_pred}).assign(_dataset="pred"), + pd.DataFrame({target_name: self.y_test}).assign(_dataset="true"), + ] + ).reset_index(drop=True) + fig = plot_distribution( + df_all=df_target, + col=target_name, + hue="_dataset", + colors_dict=self._performance_distribution_colors(), + width=width, + height=height, + ) + return self._wrap_section_content(title or "Target distribution", self._plotly_html(fig)) + + def block_confusion_matrix(self, title: str = "", color: str = "orange") -> str: + explainer = self._require_explainer("confusion_matrix") + if self.y_test is None or self.y_pred is None: + raise ValueError("confusion_matrix block requires y_test and predicted values from the explainer.") + fig = plot_confusion_matrix(y_true=self.y_test, y_pred=self.y_pred, colors_dict=explainer.colors_dict) + return self._wrap_section_content(title or "Confusion matrix", self._plotly_html(fig)) + + def _preprocess_train_data(self, x_train: pd.DataFrame | None) -> pd.DataFrame | None: + if x_train is None or self.explainer is None: + return x_train + x_train_pre = inverse_transform(x_train, self.explainer.preprocessing) + x_train_pre = handle_categorical_missing(x_train_pre) + if self.explainer.postprocessing: + x_train_pre = apply_postprocessing(x_train_pre, self.explainer.postprocessing) + return x_train_pre + + @staticmethod + def _get_values_and_name(y: pd.DataFrame | pd.Series | list | None, default_name: str) -> tuple[object, str | None]: + if y is None: + return None, None + if isinstance(y, pd.DataFrame): + if len(y.columns) != 1: + raise ValueError("Number of columns found is greater than 1") + return y.values[:, 0], y.columns[0] + if isinstance(y, pd.Series): + return y.values, y.name + if isinstance(y, list): + return y, default_name + raise ValueError(f"Cannot process following type : {type(y)}") + + @staticmethod + def _create_train_test_df(test: pd.DataFrame | None, train: pd.DataFrame | None) -> pd.DataFrame | None: + if (test is not None and "data_train_test" in test.columns) or ( + train is not None and "data_train_test" in train.columns + ): + raise ValueError('"data_train_test" column must be renamed as it is used in ReportBase') + if test is None and train is None: + return None + frames = [] + if test is not None: + frames.append(test.assign(data_train_test="test")) + if train is not None: + frames.append(train.assign(data_train_test="train")) + return pd.concat(frames).reset_index(drop=True) + + def _require_explainer(self, block_type: str): + if self.explainer is None: + raise ValueError(f"{block_type} block requires an explainer on the report instance.") + return self.explainer + + def _require_train_test_data(self, block_type: str) -> None: + if self.df_train_test is None: + raise ValueError(f"{block_type} block requires x_train and explainer.x_init data on the report instance.") + + def _resolve_interaction_pair(self, col1: str | None, col2: str | None) -> tuple[str, str]: + if col1 and col2: + return col1, col2 + explainer = self._require_explainer("interactions_plot") + list_ind, _ = explainer.plot._select_indices_interactions_plot(selection=None, max_points=self.max_points) + interaction_values = explainer.get_interaction_values(selection=list_ind) + sorted_indices = compute_sorted_variables_interactions_list_indices(interaction_values) + if not sorted_indices: + raise ValueError("No interaction pair available for interactions_plot block.") + first_idx, second_idx = sorted_indices[0] + return explainer.columns_dict[first_idx], explainer.columns_dict[second_idx] + + def _feature_label(self, feature: str) -> str: + if self.explainer is None: + return feature + return self.explainer.features_dict.get(feature, feature) + + def _feature_distribution_colors(self) -> dict: + explainer = self._require_explainer("feature_distribution") + return explainer.colors_dict["report_feature_distribution"] + + @staticmethod + def _performance_distribution_colors() -> dict: + return {"pred": "#2255aa", "true": "#ffbb00"} + + @staticmethod + def _plotly_html(fig) -> str: + return render_plotly_pane_html(fig) + + @staticmethod + def _render_key_value_rows(items: dict) -> str: + return "".join( + f'{key} :' + f'{value}' + for key, value in items.items() + ) + + @staticmethod + def _wrap_section_content(title: str, body_html: str) -> str: + parts = [] + if title: + parts.append(f'

{title}

') + parts.append(body_html) + return f'
{"".join(parts)}
' diff --git a/shapash/report/smart_report/core.py b/shapash/report/smart_report/core.py new file mode 100644 index 00000000..8e340183 --- /dev/null +++ b/shapash/report/smart_report/core.py @@ -0,0 +1,94 @@ +"""Smart report orchestration for block-based HTML reports.""" + +from __future__ import annotations + +import importlib +import logging +from pathlib import Path + +import pandas as pd + +from shapash.report.smart_report.blocks import PALETTE, ReportBlockMixin +from shapash.report.smart_report.layout import build_html_page, render_sections, resolve_logo_src +from shapash.report.smart_report.validation import load_report_config, render_block_error_html + +logger = logging.getLogger(__name__) + + +class ReportBase(ReportBlockMixin): + """Base class for block-based HTML reports.""" + + def __init__( + self, + explainer=None, + x_train: pd.DataFrame | None = None, + y_train: pd.Series | pd.DataFrame | list | None = None, + y_test: pd.Series | pd.DataFrame | list | None = None, + config: dict | None = None, + ): + self.explainer = explainer + self.config = config or {} + self.x_train_init = x_train + self.x_train_pre = self._preprocess_train_data(x_train) + self.x_init = getattr(explainer, "x_init", None) + self.df_train_test = self._create_train_test_df(test=self.x_init, train=self.x_train_pre) + self.y_train, self.target_name_train = self._get_values_and_name(y_train, "target") + self.y_test, self.target_name_test = self._get_values_and_name(y_test, "target") + self.target_name = self.target_name_train or self.target_name_test + self.max_points = self.config.get("max_points", 200) + + if explainer is not None: + if explainer.y_pred is not None: + self.y_pred, _ = self._get_values_and_name(explainer.y_pred, "prediction") + else: + self.y_pred = explainer.model.predict(explainer.x_encoded) + else: + self.y_pred = None + + def generate_report(self, config_file: str, output_file: str) -> None: + """Render a report instance to an HTML file driven by a YAML config.""" + cfg_path = Path(config_file).resolve() + cfg = load_report_config(cfg_path) + print(f"Loading config → {cfg_path}") + + rendered_blocks, sidebar_html = render_sections(self, cfg["sections"]) + + out_path = Path(output_file).resolve() + out_path.parent.mkdir(parents=True, exist_ok=True) + body_html = "\n".join(rendered_blocks) + logo_src = resolve_logo_src(out_path.parent) + out_path.write_text(build_html_page(body=body_html, sidebar_html=sidebar_html, logo_src=logo_src), encoding="utf-8") + logger.info("Report saved → %s", output_file) + + def render_block(self, block_cfg: dict) -> str: + """Dispatch one YAML block entry to the matching block_* method.""" + block_type = block_cfg.get("type", "") + params = block_cfg.get("params", {}) + + if block_type == "group": + return "".join(self.render_block(child_cfg) for child_cfg in block_cfg.get("blocks", [])) + + method = getattr(self, f"block_{block_type}", None) + if method is None: + if block_type == "custom": + return self._render_custom(block_cfg) + logger.warning("Unknown block type '%s' — skipped.", block_type) + return "" + + try: + return method(**params) + except Exception as exc: + logger.error("Block '%s' raised: %s", block_type, exc) + return render_block_error_html(block_type, exc) + + def _render_custom(self, block_cfg: dict) -> str: + """Call an arbitrary importable function.""" + func_path = block_cfg.get("function", "") + params = block_cfg.get("params", {}) + try: + mod_path, fn_name = func_path.rsplit(".", 1) + fn = getattr(importlib.import_module(mod_path), fn_name) + return fn(self, **params) + except Exception as exc: + logger.error("Custom block '%s' raised: %s", func_path, exc) + return render_block_error_html(func_path, exc) diff --git a/shapash/report/smart_report/layout.py b/shapash/report/smart_report/layout.py new file mode 100644 index 00000000..16e258b6 --- /dev/null +++ b/shapash/report/smart_report/layout.py @@ -0,0 +1,140 @@ +"""HTML layout and section rendering helpers for smart reports.""" + +from __future__ import annotations + +import os +import re +from pathlib import Path + +from shapash.report.smart_report.assets import REPORT_SCRIPT, REPORT_STYLES +from shapash.report.smart_report.panel_support import panel_resource_tags + + +def resolve_logo_src(base_dir: Path | None) -> str: + """Resolve the relative path to the bundled Shapash logo.""" + if base_dir is None: + return "" + logo_path = Path(__file__).resolve().parents[3] / "docs" / "assets" / "images" / "svg" / "shapash-github.svg" + return os.path.relpath(logo_path, base_dir).replace(os.sep, "/") if logo_path.exists() else "" + + +def block_title(block_cfg: dict) -> str: + """Return the configured title for a block, if any.""" + return block_cfg.get("params", {}).get("title", "") or "" + + +def section_id(title: str) -> str: + """Create a stable HTML id for a block title.""" + return re.sub(r"[^a-z0-9]+", "-", title.lower()).strip("-") + + +def wrap_section(block_html: str, html_id: str) -> str: + """Wrap rendered HTML in a scroll-trackable section tag.""" + return f'
{block_html}
' + + +def build_nav_link(title: str, html_id: str, extra_class: str = "") -> str: + """Build a sidebar navigation link.""" + classes = " ".join(part for part in ["nav-item", extra_class] if part) + return f'{title}' + + +def render_block_section(report, block_cfg: dict) -> tuple[str, str | None]: + """Render one non-group block and optionally wrap it as a scroll section.""" + block_html = report.render_block(block_cfg) + title = block_title(block_cfg) + if title and block_cfg.get("type") != "header": + html_id = section_id(title) + return wrap_section(block_html, html_id), html_id + return block_html, None + + +def render_group_section(report, block_cfg: dict) -> tuple[list[str], str | None]: + """Render a grouped section with a parent nav item and nested children.""" + rendered_children = [] + child_nav_links = [] + + for child_cfg in block_cfg.get("blocks", []): + child_html, child_section_id = render_block_section(report, child_cfg) + rendered_children.append(child_html) + if child_section_id: + child_nav_links.append(build_nav_link(block_title(child_cfg), child_section_id, extra_class="nav-child")) + + group_title = block_title(block_cfg) + if not group_title: + return rendered_children, None + + group_id = section_id(group_title) + nav_html = ( + '" + ) + return [wrap_section("", group_id), *rendered_children], nav_html + + +def render_sections(report, sections: list[dict]) -> tuple[list[str], str]: + """Render all configured sections and build the sidebar navigation HTML.""" + rendered_blocks: list[str] = [] + nav_links: list[str] = [] + + for block_cfg in sections: + if block_cfg.get("type") == "group": + group_blocks, group_nav = render_group_section(report, block_cfg) + rendered_blocks.extend(group_blocks) + if group_nav: + nav_links.append(group_nav) + continue + + block_html, html_id = render_block_section(report, block_cfg) + rendered_blocks.append(block_html) + if html_id: + nav_links.append(build_nav_link(block_title(block_cfg), html_id)) + + return rendered_blocks, "\n".join(nav_links) + + +def build_sidebar_fragment(sidebar_html: str = "", logo_src: str = "") -> str: + """Render the sidebar brand and navigation markup.""" + brand_html = ( + ( + f'" + ) + if logo_src + else '' + ) + return f"{brand_html}{sidebar_html}" + + +def build_report_fragment(body: str, sidebar_html: str = "", logo_src: str = "") -> str: + """Compose the styled report shell body fragment.""" + return f""" + +
+ +
+ {body} +
+
+ {REPORT_SCRIPT} +""" + + +def build_html_page(body: str, sidebar_html: str = "", logo_src: str = "") -> str: + """Compose the full HTML page for a rendered report.""" + return f""" + + + + + Shapash Report + {panel_resource_tags()} + + + {build_report_fragment(body, sidebar_html, logo_src)} + +""" diff --git a/shapash/report/smart_report/panel_support.py b/shapash/report/smart_report/panel_support.py new file mode 100644 index 00000000..9c8a1ecc --- /dev/null +++ b/shapash/report/smart_report/panel_support.py @@ -0,0 +1,55 @@ +"""Panel helpers for standalone smart report HTML rendering.""" + +from __future__ import annotations + +import re +from functools import lru_cache + +import panel as pn +from panel.io.resources import CDN_DIST, Resources + + +def render_plotly_pane_html(fig) -> str: + """Render a Plotly figure as a standalone Panel fragment.""" + _enable_panel_plotly() + pane = pn.pane.Plotly(fig, config={"responsive": True}) + bundle = pane._repr_mimebundle_() + data = bundle[0] if isinstance(bundle, tuple) else bundle + html = data.get("text/html") + if not html: + raise ValueError("Panel Plotly pane did not return HTML output.") + return f'
{html}
' + + +@lru_cache(maxsize=1) +def panel_resource_tags() -> str: + """Return the CSS and JS tags required to hydrate Panel panes.""" + _enable_panel_plotly() + resources = Resources(mode="cdn") + css_html = _normalize_panel_css(resources.render_css()) + js_html = resources.render_js() if callable(resources.render_js) else resources.render_js + js_html = _ensure_panel_runtime(js_html) + return "\n".join(part for part in [css_html, js_html] if part) + + +def _enable_panel_plotly() -> None: + pn.extension("plotly") + + +def _normalize_panel_css(css_html: str) -> str: + return re.sub( + r'href="static/extensions/panel/([^"?]+)(?:\?v=[^"]+)?', + lambda match: f'href="{CDN_DIST}{match.group(1)}', + css_html, + ) + + +def _ensure_panel_runtime(js_html: str) -> str: + if "panel.min.js" in js_html: + return js_html + + panel_tag = f'' + marker = '' + if marker in js_html: + return js_html.replace(marker, f"{panel_tag}\n\n{marker}") + return f"{js_html}\n{panel_tag}" \ No newline at end of file diff --git a/shapash/report/smart_report/validation.py b/shapash/report/smart_report/validation.py new file mode 100644 index 00000000..b8e4495c --- /dev/null +++ b/shapash/report/smart_report/validation.py @@ -0,0 +1,84 @@ +"""Validation and lightweight utility helpers for HTML reports.""" + +from __future__ import annotations + +from pathlib import Path + +import pandas as pd +import yaml + + +def load_report_config(cfg_path: Path) -> dict: + """Load and validate a report YAML configuration file.""" + if not cfg_path.exists(): + raise FileNotFoundError(f"Config not found: {cfg_path}") + + try: + with cfg_path.open(encoding="utf-8") as file: + cfg = yaml.safe_load(file) + except yaml.YAMLError as exc: + raise ValueError(f"Invalid YAML syntax in '{cfg_path}': {exc}") from exc + + validate_report_schema(cfg, cfg_path) + return cfg + + +def validate_report_schema(cfg: object, cfg_path: Path) -> None: + """Validate the minimal schema expected by the report renderer.""" + if not isinstance(cfg, dict): + raise ValueError(f"Invalid YAML structure in '{cfg_path}': top-level content must be a mapping.") + + sections = cfg.get("sections") + if not isinstance(sections, list) or not sections: + raise ValueError(f"Invalid YAML structure in '{cfg_path}': 'sections' must be a non-empty list.") + + for idx, block in enumerate(sections, start=1): + _validate_block(block, idx, cfg_path) + + +def _validate_block(block: object, idx: int, cfg_path: Path, parent: str = "sections") -> None: + if not isinstance(block, dict): + raise ValueError(f"Invalid YAML structure in '{cfg_path}': {parent}[{idx}] must be a mapping.") + + block_type = block.get("type") + if not isinstance(block_type, str) or not block_type.strip(): + raise ValueError(f"Invalid YAML structure in '{cfg_path}': {parent}[{idx}].type must be a non-empty string.") + + params = block.get("params", {}) + if not isinstance(params, dict): + raise ValueError(f"Invalid YAML structure in '{cfg_path}': {parent}[{idx}].params must be a mapping.") + + if block_type == "custom": + function_path = block.get("function") + if not isinstance(function_path, str) or not function_path.strip(): + raise ValueError( + f"Invalid YAML structure in '{cfg_path}': {parent}[{idx}].function is required for custom blocks." + ) + + if block_type == "group": + child_blocks = block.get("blocks", []) + if not isinstance(child_blocks, list): + raise ValueError( + f"Invalid YAML structure in '{cfg_path}': {parent}[{idx}].blocks must be a list for group blocks." + ) + for child_idx, child_block in enumerate(child_blocks, start=1): + _validate_block(child_block, child_idx, cfg_path, parent=f"{parent}[{idx}].blocks") + + +def render_block_error_html(block_id: str, exc: Exception) -> str: + """Render a consistent HTML error box for block failures.""" + return ( + f'
' + f'

⚠ Block "{block_id}" failed

' + f'
{exc}
' + ) + + +def stats_to_table(test_stats: dict, names: list[str], train_stats: dict | None = None) -> pd.DataFrame: + """Build a stats table and drop columns that are entirely missing.""" + if train_stats is not None: + stats_table = pd.DataFrame({names[1]: pd.Series(train_stats), names[0]: pd.Series(test_stats)}) + else: + stats_table = pd.DataFrame({names[0]: pd.Series(test_stats)}) + + return stats_table.dropna(axis=1, how="all") From 0338608749ed2ff29df2a9df0e450b0b40b4e4b6 Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Thu, 7 May 2026 10:01:19 +0200 Subject: [PATCH 08/43] create test --- .../report/test_smart_report_panel.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tests/unit_tests/report/test_smart_report_panel.py diff --git a/tests/unit_tests/report/test_smart_report_panel.py b/tests/unit_tests/report/test_smart_report_panel.py new file mode 100644 index 00000000..c64c8c68 --- /dev/null +++ b/tests/unit_tests/report/test_smart_report_panel.py @@ -0,0 +1,33 @@ +import unittest + +import plotly.graph_objects as go + +from shapash.report.smart_report.layout import build_html_page +from shapash.report.smart_report.panel_support import panel_resource_tags, render_plotly_pane_html + + +class TestSmartReportPanel(unittest.TestCase): + def test_panel_resource_tags_include_panel_dependencies(self): + tags = panel_resource_tags() + + self.assertIn("cdn.holoviz.org/panel", tags) + self.assertIn("panel.min.js", tags) + self.assertIn("cdn.bokeh.org", tags) + self.assertIn("plotly", tags) + + def test_render_plotly_pane_html_returns_panel_fragment(self): + fig = go.Figure(go.Scatter(x=[1, 2], y=[3, 4])) + + html = render_plotly_pane_html(fig) + + self.assertIn('class="panel-plot"', html) + self.assertIn("data-root-id=", html) + self.assertIn("panel.models.plotly.PlotlyPlot", html) + + def test_build_html_page_includes_panel_resources(self): + html = build_html_page(body="
Body
") + + self.assertIn("cdn.holoviz.org/panel", html) + self.assertIn("panel.min.js", html) + self.assertIn("cdn.bokeh.org", html) + self.assertNotIn("cdn.plot.ly", html) From f282183d8d9e9a4908e3579c23d5f2ea4ffa0dc7 Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Thu, 7 May 2026 10:01:30 +0200 Subject: [PATCH 09/43] move to correct place --- tutorial/generate_report/demo.py | 169 ++++++++++++++++++ tutorial/generate_report/report_config_v1.yml | 81 +++++++++ 2 files changed, 250 insertions(+) create mode 100644 tutorial/generate_report/demo.py create mode 100644 tutorial/generate_report/report_config_v1.yml diff --git a/tutorial/generate_report/demo.py b/tutorial/generate_report/demo.py new file mode 100644 index 00000000..5879ca41 --- /dev/null +++ b/tutorial/generate_report/demo.py @@ -0,0 +1,169 @@ +import importlib +import sys +from pathlib import Path + +import pandas as pd +import plotly.express as px +import yaml +from category_encoders import OrdinalEncoder +from sklearn.ensemble import RandomForestRegressor +from sklearn.model_selection import train_test_split + +HERE = Path(__file__).resolve().parent +REPO_ROOT = HERE.parents[1] + +if str(REPO_ROOT) not in sys.path: + sys.path.insert(0, str(REPO_ROOT)) + +from shapash import SmartExplainer +from shapash.data.data_loader import data_loading +from shapash.report.smart_report import ReportBase + +CONFIG_V1 = HERE / "report_config_v1.yml" +OUTPUT_V1 = HERE / "output" / "report_v1.html" +PROJECT_INFO_FILE = HERE / "utils" / "project_info.yml" + + +class NotebookParityReport(ReportBase): + """Report with custom blocks that mirror sections from the legacy notebook report.""" + + def block_project_information(self, title: str = "Project information", color: str = "gray"): + project_info_path = self.config.get("project_info_file") + if not project_info_path: + raise ValueError("project_information block requires config['project_info_file'].") + + with open(project_info_path, encoding="utf-8") as f: + project_info = yaml.safe_load(f) or {} + + sections_html = [] + for section_name, section_values in project_info.items(): + if not isinstance(section_values, dict): + continue + if section_name.strip().lower() == "model training": + continue + + rows = self._render_key_value_rows(section_values) + sections_html.append( + f'

{section_name}

' + f'{rows}
' + ) + + return self._wrap_section_content(title, "".join(sections_html)) + + def block_model_analysis(self, title: str = "Model information", color: str = "blue"): + explainer = self._require_explainer("model_analysis") + model = explainer.model + model_name = type(model).__name__ + details = { + "Model class": model_name, + "Task": getattr(explainer, "_case", "regression"), + "Feature count": len(explainer.x_init.columns), + "Prediction sample size": len(explainer.x_init), + "Training sample size": len(self.x_train_init) if self.x_train_init is not None else "n/a", + } + rows = self._render_key_value_rows(details) + return self._wrap_section_content(title, f'{rows}
') + + def block_relationship_target( + self, + title: str = "Relationship with target variable", + feature: str = "OverallQual", + color: str = "blue", + max_y: int | None = None, + ): + self._require_train_test_data("relationship_target") + if self.x_train_pre is None or self.y_train is None: + raise ValueError("relationship_target block requires both training features and y_train.") + if feature not in self.x_train_pre.columns: + raise ValueError(f"Unknown feature '{feature}' for relationship_target block.") + + target_name = self.target_name_train or "target" + df_train = self.x_train_pre.copy() + df_train[target_name] = self.y_train + + fig = px.box(df_train, x=feature, y=target_name) + if max_y is not None: + fig.update_yaxes(range=[0, max_y]) + return self._wrap_section_content(title, self._plotly_html(fig)) + + def block_training_correlations( + self, + title: str = "Relationship between training variables", + color: str = "blue", + max_features: int = 30, + ): + if self.x_train_pre is None: + raise ValueError("training_correlations block requires x_train.") + + numeric_train = self.x_train_pre.select_dtypes(include="number") + corr = numeric_train.corr(numeric_only=True) + if max_features > 0 and corr.shape[0] > max_features: + corr = corr.iloc[:max_features, :max_features] + + fig = px.imshow(corr, color_continuous_scale="YlGnBu", zmin=-1, zmax=1, aspect="auto") + return self._wrap_section_content(title, self._plotly_html(fig)) + + def block_performance_metrics( + self, + title: str = "Model performance", + color: str = "orange", + metrics: list | None = None, + ): + if self.y_test is None or self.y_pred is None: + raise ValueError("performance_metrics block requires y_test and y_pred.") + + metric_items = [] + metrics = metrics or [] + for metric_cfg in metrics: + metric_path = metric_cfg.get("path") + metric_name = metric_cfg.get("name", metric_path) + if not metric_path: + continue + module_path, fn_name = metric_path.rsplit(".", 1) + metric_fn = getattr(importlib.import_module(module_path), fn_name) + value = metric_fn(self.y_test, self.y_pred) + metric_items.append({"label": metric_name, "value": f"{value:,.2f}", "color": color}) + + return self.block_badge_row(title=title, badges=metric_items) + + def block_pred_vs_true(self, title: str = "y_pred vs y_test", color: str = "orange"): + if self.y_test is None or self.y_pred is None: + raise ValueError("pred_vs_true block requires y_test and y_pred.") + + scatter_df = pd.DataFrame({"y_test": self.y_test, "y_pred": self.y_pred}) + fig = px.scatter(scatter_df, x="y_test", y="y_pred") + return self._wrap_section_content(title, self._plotly_html(fig)) + + +def build_house_prices_explainer() -> tuple[SmartExplainer, pd.DataFrame, pd.Series, pd.Series]: + """Build the same House Prices explainer used in report tutorials.""" + house_df, house_dict = data_loading("house_prices") + y_df = house_df["SalePrice"] + X_df = house_df[house_df.columns.difference(["SalePrice"])] + + categorical_features = list(X_df.select_dtypes(include=["object", "string", "category"]).columns) + encoder = OrdinalEncoder(cols=categorical_features, handle_unknown="ignore", return_df=True).fit(X_df) + X_encoded = encoder.transform(X_df) + + Xtrain, Xtest, ytrain, ytest = train_test_split(X_encoded, y_df, train_size=0.75, random_state=1) + regressor = RandomForestRegressor(n_estimators=50, random_state=1).fit(Xtrain, ytrain) + + y_pred = pd.DataFrame(regressor.predict(Xtest), columns=["pred"], index=Xtest.index) + + xpl = SmartExplainer(model=regressor, preprocessing=encoder, features_dict=house_dict) + xpl.compile(x=Xtest, y_pred=y_pred, y_target=ytest) + return xpl, Xtrain, ytrain, ytest + + +if __name__ == "__main__": + xpl, Xtrain, ytrain, ytest = build_house_prices_explainer() + + report = NotebookParityReport( + explainer=xpl, + x_train=Xtrain, + y_train=ytrain, + y_test=ytest, + config={"project_info_file": str(PROJECT_INFO_FILE)}, + ) + report.generate_report(config_file=str(CONFIG_V1), output_file=str(OUTPUT_V1)) + print(f"Saved notebook-parity report: {OUTPUT_V1}") \ No newline at end of file diff --git a/tutorial/generate_report/report_config_v1.yml b/tutorial/generate_report/report_config_v1.yml new file mode 100644 index 00000000..09e7c681 --- /dev/null +++ b/tutorial/generate_report/report_config_v1.yml @@ -0,0 +1,81 @@ +# report_config_v1.yml +# Notebook-parity configuration intended to mirror the legacy papermill report sections. + +sections: + - type: header + params: + title: "House prices report" + subtitle: > + This document is a data science report of the kaggle house prices tutorial project. + It was generated using the Shapash library. + + - type: project_information + params: + title: "Project information" + color: gray + + - type: model_analysis + params: + title: "Model information" + color: blue + + - type: key_value + params: + title: "Model Training" + color: blue + items: + Used Algorithm: "We used a RandomForestRegressor algorithm (scikit-learn) but this model could be challenged with other interesting models such as XGBRegressor, Neural Networks, ..." + Parameters Choice: "We did not perform any hyperparameter optimisation and chose to use n_estimators=50. Future works should be planned to perform gridsearch optimizations" + Metrics: "Mean Squared Error metric" + Validation Strategy: "We splitted our data into train (75%) and test (25%)" + Path To Script: "https://github.com/MAIF/shapash/tree/master/tutorial/" + + - type: global_analysis + params: + title: "Dataset analysis" + color: blue + + - type: relationship_target + params: + title: "Relashionship with target variable" + feature: "OverallQual" + color: blue + max_y: 800000 + + - type: training_correlations + params: + title: "Relashionship between training variables" + color: blue + max_features: 30 + + - type: feature_importance + params: + title: "Model explainability" + color: gold + + - type: performance_metrics + params: + title: "Model performance" + color: orange + metrics: + - path: "sklearn.metrics.mean_absolute_error" + name: "Mean absolute error" + - path: "sklearn.metrics.mean_squared_error" + name: "Mean squared error" + + - type: text + params: + body: "The graph below represents y_pred vs y_test :" + color: gray + + - type: pred_vs_true + params: + title: "" + color: orange + + - type: callout + params: + body: > + You can add as many graphs, text, or other cells as you want. + The code will not be displayed. Only the markdown and output of the cells will be shown on the generated html file. + color: gray \ No newline at end of file From e386e62fd9a30223707c63295b248735c39ec893 Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Thu, 7 May 2026 10:17:29 +0200 Subject: [PATCH 10/43] ruff mod --- shapash/report/report_config.yml | 2 +- shapash/report/smart_report/blocks.py | 4 +++- shapash/report/smart_report/core.py | 6 ++++-- shapash/report/smart_report/panel_support.py | 2 +- tutorial/generate_report/demo.py | 2 +- tutorial/generate_report/report_config_v1.yml | 2 +- 6 files changed, 11 insertions(+), 7 deletions(-) diff --git a/shapash/report/report_config.yml b/shapash/report/report_config.yml index dbe829f4..5393e4d3 100644 --- a/shapash/report/report_config.yml +++ b/shapash/report/report_config.yml @@ -136,4 +136,4 @@ sections: To add a new section, define a block_my_section() method in a subclass of ReportBase — or point a type: custom entry at any importable Python function. No other changes needed. - color: purple \ No newline at end of file + color: purple diff --git a/shapash/report/smart_report/blocks.py b/shapash/report/smart_report/blocks.py index ec3d3d28..610a3e96 100644 --- a/shapash/report/smart_report/blocks.py +++ b/shapash/report/smart_report/blocks.py @@ -144,7 +144,9 @@ def block_interactions_plot( ) -> str: explainer = self._require_explainer("interactions_plot") feature_one, feature_two = self._resolve_interaction_pair(col1, col2) - fig = explainer.plot.interactions_plot(col1=feature_one, col2=feature_two, max_points=max_points or self.max_points) + fig = explainer.plot.interactions_plot( + col1=feature_one, col2=feature_two, max_points=max_points or self.max_points + ) resolved_title = title or f"{self._feature_label(feature_one)} / {self._feature_label(feature_two)}" return self._wrap_section_content(resolved_title, self._plotly_html(fig)) diff --git a/shapash/report/smart_report/core.py b/shapash/report/smart_report/core.py index 8e340183..4e99ee8e 100644 --- a/shapash/report/smart_report/core.py +++ b/shapash/report/smart_report/core.py @@ -8,7 +8,7 @@ import pandas as pd -from shapash.report.smart_report.blocks import PALETTE, ReportBlockMixin +from shapash.report.smart_report.blocks import ReportBlockMixin from shapash.report.smart_report.layout import build_html_page, render_sections, resolve_logo_src from shapash.report.smart_report.validation import load_report_config, render_block_error_html @@ -57,7 +57,9 @@ def generate_report(self, config_file: str, output_file: str) -> None: out_path.parent.mkdir(parents=True, exist_ok=True) body_html = "\n".join(rendered_blocks) logo_src = resolve_logo_src(out_path.parent) - out_path.write_text(build_html_page(body=body_html, sidebar_html=sidebar_html, logo_src=logo_src), encoding="utf-8") + out_path.write_text( + build_html_page(body=body_html, sidebar_html=sidebar_html, logo_src=logo_src), encoding="utf-8" + ) logger.info("Report saved → %s", output_file) def render_block(self, block_cfg: dict) -> str: diff --git a/shapash/report/smart_report/panel_support.py b/shapash/report/smart_report/panel_support.py index 9c8a1ecc..4eb9a902 100644 --- a/shapash/report/smart_report/panel_support.py +++ b/shapash/report/smart_report/panel_support.py @@ -52,4 +52,4 @@ def _ensure_panel_runtime(js_html: str) -> str: marker = '' if marker in js_html: return js_html.replace(marker, f"{panel_tag}\n\n{marker}") - return f"{js_html}\n{panel_tag}" \ No newline at end of file + return f"{js_html}\n{panel_tag}" diff --git a/tutorial/generate_report/demo.py b/tutorial/generate_report/demo.py index 5879ca41..44ad593f 100644 --- a/tutorial/generate_report/demo.py +++ b/tutorial/generate_report/demo.py @@ -166,4 +166,4 @@ def build_house_prices_explainer() -> tuple[SmartExplainer, pd.DataFrame, pd.Ser config={"project_info_file": str(PROJECT_INFO_FILE)}, ) report.generate_report(config_file=str(CONFIG_V1), output_file=str(OUTPUT_V1)) - print(f"Saved notebook-parity report: {OUTPUT_V1}") \ No newline at end of file + print(f"Saved notebook-parity report: {OUTPUT_V1}") diff --git a/tutorial/generate_report/report_config_v1.yml b/tutorial/generate_report/report_config_v1.yml index 09e7c681..6ac930ca 100644 --- a/tutorial/generate_report/report_config_v1.yml +++ b/tutorial/generate_report/report_config_v1.yml @@ -78,4 +78,4 @@ sections: body: > You can add as many graphs, text, or other cells as you want. The code will not be displayed. Only the markdown and output of the cells will be shown on the generated html file. - color: gray \ No newline at end of file + color: gray From ce7feb8fc8db45080fc87f9a7965cd2a5a28d99e Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Thu, 7 May 2026 10:31:57 +0200 Subject: [PATCH 11/43] adding docstring --- shapash/report/smart_report/blocks.py | 42 +++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/shapash/report/smart_report/blocks.py b/shapash/report/smart_report/blocks.py index 610a3e96..f2defdd6 100644 --- a/shapash/report/smart_report/blocks.py +++ b/shapash/report/smart_report/blocks.py @@ -24,20 +24,24 @@ class ReportBlockMixin: """Reusable block rendering and data preparation helpers.""" def block_header(self, title: str = "Report", subtitle: str = "") -> str: + """Return the HTML for the report header and its optional subtitle callout.""" sub = f'

{subtitle}

' if subtitle else "" return f'

{title}

{sub}
' def block_text(self, title: str = "", body: str = "", color: str = "gray") -> str: + """Return the HTML for a text section with an optional title.""" h2 = f'

{title}

' if title else "" return f'
{h2}

{body}

' def block_key_value(self, title: str = "", items: dict | None = None, color: str = "gold") -> str: + """Return the HTML for a table of key-value pairs.""" items = items or {} rows = self._render_key_value_rows(items) h2 = f'

{title}

' if title else "" return f'
{h2}{rows}
' def block_badge_row(self, title: str = "", badges: list | None = None) -> str: + """Return the HTML for a row of badge-style metrics.""" badges = badges or [] pills = "" for badge in badges: @@ -51,12 +55,19 @@ def block_badge_row(self, title: str = "", badges: list | None = None) -> str: return f'
{h2}
{pills}
' def block_callout(self, body: str = "", color: str = "gold", icon: str = "") -> str: + """Return the HTML for a highlighted callout paragraph.""" return f'

{body}

' def block_divider(self, label: str = "") -> str: + """Return the HTML for a visual divider between report sections.""" return '
' def block_global_analysis(self, title: str = "", color: str = "gray") -> str: + """Return the HTML for the global dataset statistics comparison table. + + Requires prediction data on the report instance and includes training + data statistics when training data is available. + """ self._require_train_test_data("global_analysis") test_stats = perform_global_dataframe_analysis(self.x_init) train_stats = perform_global_dataframe_analysis(self.x_train_pre) if self.x_train_pre is not None else None @@ -79,6 +90,11 @@ def block_feature_distribution( width: int = 700, height: int = 500, ) -> str: + """Return the HTML for a feature distribution plot across dataset splits. + + The feature must be present in the prepared train/test dataframe stored + on the report instance. + """ self._require_train_test_data("feature_distribution") if feature not in self.df_train_test.columns: raise ValueError(f"Unknown feature '{feature}' for feature_distribution block.") @@ -101,6 +117,11 @@ def block_correlations_plot( width: int | None = None, height: int = 500, ) -> str: + """Return the HTML for the explainer correlation plot. + + When both training and prediction datasets are available, the plot is + faceted by dataset split. + """ self._require_train_test_data("correlations_plot") explainer = self._require_explainer("correlations_plot") resolved_width = width or (900 if len(self.df_train_test["data_train_test"].unique()) > 1 else 500) @@ -115,6 +136,7 @@ def block_correlations_plot( return self._wrap_section_content(title, self._plotly_html(fig)) def block_feature_importance(self, title: str = "", color: str = "green", label=None) -> str: + """Return the HTML for the explainer feature-importance plot.""" explainer = self._require_explainer("feature_importance") fig = explainer.plot.features_importance(label=label) return self._wrap_section_content(title, self._plotly_html(fig)) @@ -127,6 +149,11 @@ def block_contribution_plot( label=None, max_points: int | None = None, ) -> str: + """Return the HTML for a feature contribution plot. + + Requires an explainer with contribution values and uses the configured + maximum point count when no explicit limit is provided. + """ explainer = self._require_explainer("contribution_plot") fig = explainer.plot.contribution_plot(feature, label=label, max_points=max_points or self.max_points) for trace in fig.data: @@ -142,6 +169,11 @@ def block_interactions_plot( col2: str | None = None, max_points: int | None = None, ) -> str: + """Return the HTML for an interaction plot between two features. + + If no feature pair is provided, the strongest available interaction is + selected from the explainer output. + """ explainer = self._require_explainer("interactions_plot") feature_one, feature_two = self._resolve_interaction_pair(col1, col2) fig = explainer.plot.interactions_plot( @@ -157,6 +189,11 @@ def block_target_distribution( width: int = 700, height: int = 500, ) -> str: + """Return the HTML for the true-versus-predicted target distribution plot. + + Requires both ground-truth targets and predicted values on the report + instance. + """ self._require_explainer("target_distribution") if self.y_test is None or self.y_pred is None: raise ValueError("target_distribution block requires y_test and predicted values from the explainer.") @@ -179,6 +216,11 @@ def block_target_distribution( return self._wrap_section_content(title or "Target distribution", self._plotly_html(fig)) def block_confusion_matrix(self, title: str = "", color: str = "orange") -> str: + """Return the HTML for a classification confusion matrix. + + Requires both ground-truth labels and predicted labels on the report + instance. + """ explainer = self._require_explainer("confusion_matrix") if self.y_test is None or self.y_pred is None: raise ValueError("confusion_matrix block requires y_test and predicted values from the explainer.") From 3e5fb8bbcf453f39077fb39e6b4c4a1f3e8bfe99 Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Thu, 7 May 2026 11:34:36 +0200 Subject: [PATCH 12/43] using bokeh instead of ploty --- shapash/report/smart_report/assets.py | 2 +- shapash/report/smart_report/blocks.py | 511 ++++++++++++++++-- shapash/report/smart_report/panel_support.py | 18 +- tutorial/generate_report/demo.py | 156 +++++- tutorial/generate_report/report_config_v1.yml | 11 + 5 files changed, 643 insertions(+), 55 deletions(-) diff --git a/shapash/report/smart_report/assets.py b/shapash/report/smart_report/assets.py index 29bdcbc6..3867b522 100644 --- a/shapash/report/smart_report/assets.py +++ b/shapash/report/smart_report/assets.py @@ -103,7 +103,7 @@ .shapash-divider { border-bottom: 1px solid #eee; margin: 50px 0; } .scroll-section { scroll-margin-top: 40px; } .panel-plot { width: 100%; overflow-x: auto; } - .panel-plot .bk-root, .panel-plot .plotly-graph-div { width: 100%; max-width: 100%; } + .panel-plot .bk-root { width: 100%; max-width: 100%; } @media (max-width: 900px) { .sidebar { display: none; } .container { margin-left: 0; padding: 30px 40px 40vh; } diff --git a/shapash/report/smart_report/blocks.py b/shapash/report/smart_report/blocks.py index f2defdd6..80510fd2 100644 --- a/shapash/report/smart_report/blocks.py +++ b/shapash/report/smart_report/blocks.py @@ -2,12 +2,17 @@ from __future__ import annotations +import numpy as np import pandas as pd +from bokeh.models import BasicTicker, ColorBar, ColumnDataSource, HoverTool, LinearColorMapper, TabPanel, Tabs +from bokeh.palettes import RdYlBu11, YlOrRd9 +from bokeh.plotting import figure +from bokeh.transform import dodge, jitter +from pandas.api.types import is_numeric_dtype +from sklearn.metrics import confusion_matrix -from shapash.plots.plot_evaluation_metrics import plot_confusion_matrix -from shapash.plots.plot_univariate import plot_distribution from shapash.report.data_analysis import perform_global_dataframe_analysis -from shapash.report.smart_report.panel_support import render_plotly_pane_html +from shapash.report.smart_report.panel_support import render_bokeh_pane_html from shapash.report.smart_report.validation import stats_to_table from shapash.utils.transform import apply_postprocessing, handle_categorical_missing, inverse_transform from shapash.utils.utils import compute_sorted_variables_interactions_list_indices @@ -99,15 +104,15 @@ def block_feature_distribution( if feature not in self.df_train_test.columns: raise ValueError(f"Unknown feature '{feature}' for feature_distribution block.") - fig = plot_distribution( - df_all=self.df_train_test, - col=feature, - hue=dataset_split, - colors_dict=self._feature_distribution_colors(), + fig = self._feature_distribution_bokeh( + feature=feature, + dataset_split=dataset_split, + prediction_label=prediction_label, + training_label=training_label, width=width, height=height, ) - return self._wrap_section_content(title or self._feature_label(feature), self._plotly_html(fig)) + return self._wrap_section_content(title or self._feature_label(feature), self._bokeh_html(fig)) def block_correlations_plot( self, @@ -123,23 +128,22 @@ def block_correlations_plot( faceted by dataset split. """ self._require_train_test_data("correlations_plot") - explainer = self._require_explainer("correlations_plot") resolved_width = width or (900 if len(self.df_train_test["data_train_test"].unique()) > 1 else 500) - fig = explainer.plot.correlations_plot( - self.df_train_test, - optimized=True, - facet_col="data_train_test", + fig = self._correlations_bokeh( + df=self.df_train_test, + split_col="data_train_test", max_features=max_features, width=resolved_width, height=height, + title=title, ) - return self._wrap_section_content(title, self._plotly_html(fig)) + return self._wrap_section_content("", self._bokeh_html(fig)) def block_feature_importance(self, title: str = "", color: str = "green", label=None) -> str: """Return the HTML for the explainer feature-importance plot.""" explainer = self._require_explainer("feature_importance") - fig = explainer.plot.features_importance(label=label) - return self._wrap_section_content(title, self._plotly_html(fig)) + fig = self._feature_importance_bokeh(explainer=explainer, title=title) + return self._wrap_section_content("", self._bokeh_html(fig)) def block_contribution_plot( self, @@ -155,11 +159,13 @@ def block_contribution_plot( maximum point count when no explicit limit is provided. """ explainer = self._require_explainer("contribution_plot") - fig = explainer.plot.contribution_plot(feature, label=label, max_points=max_points or self.max_points) - for trace in fig.data: - if trace.type == "bar": - trace.marker.color = "lightgrey" - return self._wrap_section_content(title or self._feature_label(feature), self._plotly_html(fig)) + fig = self._contribution_bokeh( + explainer=explainer, + feature=feature, + max_points=max_points or self.max_points, + title=title or self._feature_label(feature), + ) + return self._wrap_section_content("", self._bokeh_html(fig)) def block_interactions_plot( self, @@ -176,11 +182,14 @@ def block_interactions_plot( """ explainer = self._require_explainer("interactions_plot") feature_one, feature_two = self._resolve_interaction_pair(col1, col2) - fig = explainer.plot.interactions_plot( - col1=feature_one, col2=feature_two, max_points=max_points or self.max_points + fig = self._interactions_bokeh( + explainer=explainer, + col1=feature_one, + col2=feature_two, + max_points=max_points or self.max_points, + title=title or f"{self._feature_label(feature_one)} / {self._feature_label(feature_two)}", ) - resolved_title = title or f"{self._feature_label(feature_one)} / {self._feature_label(feature_two)}" - return self._wrap_section_content(resolved_title, self._plotly_html(fig)) + return self._wrap_section_content("", self._bokeh_html(fig)) def block_target_distribution( self, @@ -205,15 +214,14 @@ def block_target_distribution( pd.DataFrame({target_name: self.y_test}).assign(_dataset="true"), ] ).reset_index(drop=True) - fig = plot_distribution( - df_all=df_target, - col=target_name, - hue="_dataset", - colors_dict=self._performance_distribution_colors(), + fig = self._target_distribution_bokeh( + df_target=df_target, + target_name=target_name, width=width, height=height, + title=title or "Target distribution", ) - return self._wrap_section_content(title or "Target distribution", self._plotly_html(fig)) + return self._wrap_section_content("", self._bokeh_html(fig)) def block_confusion_matrix(self, title: str = "", color: str = "orange") -> str: """Return the HTML for a classification confusion matrix. @@ -224,8 +232,12 @@ def block_confusion_matrix(self, title: str = "", color: str = "orange") -> str: explainer = self._require_explainer("confusion_matrix") if self.y_test is None or self.y_pred is None: raise ValueError("confusion_matrix block requires y_test and predicted values from the explainer.") - fig = plot_confusion_matrix(y_true=self.y_test, y_pred=self.y_pred, colors_dict=explainer.colors_dict) - return self._wrap_section_content(title or "Confusion matrix", self._plotly_html(fig)) + fig = self._confusion_matrix_bokeh( + y_true=self.y_test, + y_pred=self.y_pred, + title=title or "Confusion matrix", + ) + return self._wrap_section_content("", self._bokeh_html(fig)) def _preprocess_train_data(self, x_train: pd.DataFrame | None) -> pd.DataFrame | None: if x_train is None or self.explainer is None: @@ -295,13 +307,440 @@ def _feature_distribution_colors(self) -> dict: explainer = self._require_explainer("feature_distribution") return explainer.colors_dict["report_feature_distribution"] + @staticmethod + def _palette_color(index: int, fallback: list[str]) -> str: + return fallback[index % len(fallback)] + + def _resolve_split_colors(self, split_values: list[str], colors_dict: dict | None = None) -> dict[str, str]: + base = colors_dict or {} + fallback = ["#2255aa", "#ffbb00", "#666666", "#44aa99", "#cc6677"] + return { + split: base.get(split, self._palette_color(idx, fallback)) + for idx, split in enumerate(split_values) + } + + def _feature_distribution_bokeh( + self, + feature: str, + dataset_split: str, + prediction_label: str, + training_label: str, + width: int, + height: int, + ): + df_plot = self.df_train_test[[feature, dataset_split]].dropna().copy() + if df_plot.empty: + raise ValueError(f"No data available for feature '{feature}' after dropping missing values.") + + label_map = {"test": prediction_label, "train": training_label} + split_values = [str(val) for val in df_plot[dataset_split].dropna().unique().tolist()] + split_colors = self._resolve_split_colors(split_values, self._feature_distribution_colors()) + + tools = "pan,wheel_zoom,box_zoom,reset,save" + title = self._feature_label(feature) + if is_numeric_dtype(df_plot[feature]): + bins = min(max(10, int(np.sqrt(len(df_plot)))), 40) + p = figure(width=width, height=height, title=title, tools=tools) + p.yaxis.axis_label = "Count" + p.xaxis.axis_label = title + + for split in split_values: + subset = df_plot[df_plot[dataset_split].astype(str) == split][feature].astype(float) + if subset.empty: + continue + hist, edges = np.histogram(subset, bins=bins) + source = ColumnDataSource( + data={ + "left": edges[:-1], + "right": edges[1:], + "top": hist, + "split": [label_map.get(split, split)] * len(hist), + } + ) + renderer = p.quad( + source=source, + top="top", + bottom=0, + left="left", + right="right", + fill_color=split_colors[split], + line_color=split_colors[split], + fill_alpha=0.35, + legend_label=label_map.get(split, split), + muted_alpha=0.1, + ) + p.add_tools( + HoverTool( + renderers=[renderer], + tooltips=[ + ("Split", "@split"), + ("Bin", "@left{0.000} - @right{0.000}"), + ("Count", "@top"), + ], + ) + ) + + p.legend.click_policy = "mute" + return p + + categories = ( + df_plot[feature] + .astype(str) + .value_counts() + .head(15) + .index.tolist() + ) + filtered = df_plot[df_plot[feature].astype(str).isin(categories)].copy() + p = figure(x_range=categories, width=width, height=height, title=title, tools=tools) + p.yaxis.axis_label = "Count" + p.xaxis.axis_label = title + + n_splits = max(len(split_values), 1) + bar_width = 0.8 / n_splits + start = -0.4 + (bar_width / 2) + + for idx, split in enumerate(split_values): + subset = filtered[filtered[dataset_split].astype(str) == split] + counts = subset[feature].astype(str).value_counts().reindex(categories, fill_value=0) + source = ColumnDataSource( + data={ + "category": categories, + "count": counts.values, + "split": [label_map.get(split, split)] * len(categories), + } + ) + renderer = p.vbar( + x=dodge("category", start + idx * bar_width, range=p.x_range), + top="count", + width=bar_width, + source=source, + color=split_colors[split], + line_color=split_colors[split], + fill_alpha=0.8, + legend_label=label_map.get(split, split), + muted_alpha=0.1, + ) + p.add_tools( + HoverTool( + renderers=[renderer], + tooltips=[ + ("Split", "@split"), + ("Category", "@category"), + ("Count", "@count"), + ], + ) + ) + + p.xaxis.major_label_orientation = 0.8 + p.legend.click_policy = "mute" + return p + + def _correlation_heatmap_figure(self, corr: pd.DataFrame, title: str, width: int, height: int): + corr = corr.fillna(0.0) + labels = list(corr.columns) + corr_long = corr.stack().rename("corr").reset_index().rename(columns={"level_0": "y", "level_1": "x"}) + source = ColumnDataSource(corr_long) + mapper = LinearColorMapper(palette=list(reversed(RdYlBu11)), low=-1, high=1) + + p = figure( + title=title, + x_range=labels, + y_range=list(reversed(labels)), + width=width, + height=height, + tools="pan,wheel_zoom,box_zoom,reset,save", + ) + renderer = p.rect( + x="x", + y="y", + width=1, + height=1, + source=source, + line_color=None, + fill_color={"field": "corr", "transform": mapper}, + ) + p.add_tools( + HoverTool( + renderers=[renderer], + tooltips=[("Feature X", "@x"), ("Feature Y", "@y"), ("Correlation", "@corr{0.000}")], + ) + ) + p.xaxis.major_label_orientation = 0.9 + p.grid.grid_line_color = None + p.add_layout( + ColorBar(color_mapper=mapper, ticker=BasicTicker(desired_num_ticks=7), label_standoff=8, location=(0, 0)), + "right", + ) + return p + + def _correlations_bokeh( + self, + df: pd.DataFrame, + split_col: str, + max_features: int, + width: int, + height: int, + title: str, + ): + numeric_cols = [col for col in df.select_dtypes(include="number").columns if col != split_col] + if not numeric_cols: + raise ValueError("No numeric feature available to compute correlations.") + + if max_features > 0: + numeric_cols = numeric_cols[:max_features] + + split_values = [str(v) for v in df[split_col].dropna().unique().tolist()] if split_col in df.columns else [] + if not split_values: + corr = df[numeric_cols].corr(numeric_only=True) + return self._correlation_heatmap_figure(corr, title=title or "Correlations", width=width, height=height) + + panels = [] + for split in split_values: + subset = df[df[split_col].astype(str) == split][numeric_cols] + corr = subset.corr(numeric_only=True) + panel_title = f"{title} - {split}" if title else f"Correlations - {split}" + panels.append(TabPanel(child=self._correlation_heatmap_figure(corr, panel_title, width, height), title=split)) + return Tabs(tabs=panels) + + def _feature_importance_bokeh(self, explainer, title: str): + if getattr(explainer, "features_imp", None) is None: + explainer.compute_features_import() + features_imp = explainer.features_imp + if isinstance(features_imp, list): + if not features_imp: + raise ValueError("features_imp is empty.") + features_imp = features_imp[0] + + ordered = features_imp.sort_values(ascending=False).head(20) + labels = [explainer.features_dict.get(name, name) for name in ordered.index.tolist()] + source = ColumnDataSource(data={"feature": list(reversed(labels)), "importance": list(reversed(ordered.values))}) + p = figure( + title=title or "Feature importance", + y_range=list(reversed(labels)), + width=900, + height=560, + tools="pan,wheel_zoom,box_zoom,reset,save", + ) + renderer = p.hbar(y="feature", right="importance", height=0.7, source=source, color="#ffbb00") + p.xaxis.axis_label = "Importance" + p.yaxis.axis_label = "Feature" + p.grid.grid_line_alpha = 0.25 + p.add_tools(HoverTool(renderers=[renderer], tooltips=[("Feature", "@feature"), ("Importance", "@importance{0.00}")])) + return p + + @staticmethod + def _select_contrib_frame(contributions, label=None): + if isinstance(contributions, list): + if not contributions: + raise ValueError("Contributions list is empty.") + if isinstance(label, int) and 0 <= label < len(contributions): + return contributions[label] + return contributions[0] + return contributions + + def _contribution_bokeh(self, explainer, feature: str, max_points: int, title: str): + contrib_df = self._select_contrib_frame(explainer.contributions) + if feature not in contrib_df.columns or feature not in explainer.x_init.columns: + raise ValueError(f"Unknown feature '{feature}' for contribution_plot block.") + + plot_df = pd.DataFrame({ + "feature_value": explainer.x_init[feature], + "contribution": contrib_df[feature], + }).dropna() + if max_points and len(plot_df) > max_points: + plot_df = plot_df.sample(n=max_points, random_state=0) + + tools = "pan,wheel_zoom,box_zoom,reset,save" + if is_numeric_dtype(plot_df["feature_value"]): + source = ColumnDataSource(plot_df) + p = figure(title=title, width=900, height=500, tools=tools) + renderer = p.scatter("feature_value", "contribution", source=source, size=6, alpha=0.55, color="#777777") + p.xaxis.axis_label = self._feature_label(feature) + p.yaxis.axis_label = "Contribution" + p.add_tools( + HoverTool( + renderers=[renderer], + tooltips=[(self._feature_label(feature), "@feature_value"), ("Contribution", "@contribution{0.0000}")], + ) + ) + return p + + plot_df["feature_value"] = plot_df["feature_value"].astype(str) + top = plot_df["feature_value"].value_counts().head(20).index.tolist() + plot_df = plot_df[plot_df["feature_value"].isin(top)] + source = ColumnDataSource(plot_df) + p = figure(title=title, x_range=top, width=900, height=500, tools=tools) + renderer = p.scatter( + x=jitter("feature_value", width=0.35, range=p.x_range), + y="contribution", + source=source, + size=6, + alpha=0.5, + color="#777777", + ) + p.xaxis.major_label_orientation = 0.8 + p.xaxis.axis_label = self._feature_label(feature) + p.yaxis.axis_label = "Contribution" + p.add_tools( + HoverTool( + renderers=[renderer], + tooltips=[(self._feature_label(feature), "@feature_value"), ("Contribution", "@contribution{0.0000}")], + ) + ) + return p + + def _interactions_bokeh(self, explainer, col1: str, col2: str, max_points: int, title: str): + if col1 not in explainer.x_init.columns or col2 not in explainer.x_init.columns: + raise ValueError(f"Unknown interaction pair '{col1}', '{col2}'.") + + plot_df = explainer.x_init[[col1, col2]].dropna().copy() + if max_points and len(plot_df) > max_points: + plot_df = plot_df.sample(n=max_points, random_state=0) + + source = ColumnDataSource(plot_df.rename(columns={col1: "x", col2: "y"})) + p = figure(title=title, width=900, height=500, tools="pan,wheel_zoom,box_zoom,reset,save") + renderer = p.scatter("x", "y", source=source, size=6, alpha=0.55, color="#2255aa") + p.xaxis.axis_label = self._feature_label(col1) + p.yaxis.axis_label = self._feature_label(col2) + p.add_tools(HoverTool(renderers=[renderer], tooltips=[(self._feature_label(col1), "@x"), (self._feature_label(col2), "@y")])) + return p + + def _target_distribution_bokeh(self, df_target: pd.DataFrame, target_name: str, width: int, height: int, title: str): + split_col = "_dataset" + split_values = [str(v) for v in df_target[split_col].dropna().unique().tolist()] + label_map = {"pred": "pred", "true": "true"} + split_colors = self._resolve_split_colors(split_values, self._performance_distribution_colors()) + tools = "pan,wheel_zoom,box_zoom,reset,save" + + if is_numeric_dtype(df_target[target_name]): + bins = min(max(10, int(np.sqrt(len(df_target)))), 40) + p = figure(width=width, height=height, title=title, tools=tools) + p.yaxis.axis_label = "Count" + p.xaxis.axis_label = target_name + for split in split_values: + subset = df_target[df_target[split_col].astype(str) == split][target_name].astype(float) + if subset.empty: + continue + hist, edges = np.histogram(subset, bins=bins) + source = ColumnDataSource( + data={ + "left": edges[:-1], + "right": edges[1:], + "top": hist, + "split": [label_map.get(split, split)] * len(hist), + } + ) + renderer = p.quad( + source=source, + top="top", + bottom=0, + left="left", + right="right", + fill_color=split_colors[split], + line_color=split_colors[split], + fill_alpha=0.35, + legend_label=label_map.get(split, split), + muted_alpha=0.1, + ) + p.add_tools( + HoverTool( + renderers=[renderer], + tooltips=[("Split", "@split"), ("Bin", "@left{0.000} - @right{0.000}"), ("Count", "@top")], + ) + ) + p.legend.click_policy = "mute" + return p + + categories = df_target[target_name].astype(str).value_counts().head(15).index.tolist() + filtered = df_target[df_target[target_name].astype(str).isin(categories)].copy() + p = figure(x_range=categories, width=width, height=height, title=title, tools=tools) + p.yaxis.axis_label = "Count" + p.xaxis.axis_label = target_name + + n_splits = max(len(split_values), 1) + bar_width = 0.8 / n_splits + start = -0.4 + (bar_width / 2) + for idx, split in enumerate(split_values): + subset = filtered[filtered[split_col].astype(str) == split] + counts = subset[target_name].astype(str).value_counts().reindex(categories, fill_value=0) + source = ColumnDataSource( + data={ + "category": categories, + "count": counts.values, + "split": [label_map.get(split, split)] * len(categories), + } + ) + renderer = p.vbar( + x=dodge("category", start + idx * bar_width, range=p.x_range), + top="count", + width=bar_width, + source=source, + color=split_colors[split], + line_color=split_colors[split], + fill_alpha=0.8, + legend_label=label_map.get(split, split), + muted_alpha=0.1, + ) + p.add_tools( + HoverTool( + renderers=[renderer], + tooltips=[("Split", "@split"), ("Category", "@category"), ("Count", "@count")], + ) + ) + p.xaxis.major_label_orientation = 0.8 + p.legend.click_policy = "mute" + return p + + def _confusion_matrix_bokeh(self, y_true, y_pred, title: str): + true_series = pd.Series(y_true).astype(str) + pred_series = pd.Series(y_pred).astype(str) + classes = sorted(set(true_series.unique()) | set(pred_series.unique())) + cm = confusion_matrix(true_series, pred_series, labels=classes) + cm_df = ( + pd.DataFrame(cm, index=classes, columns=classes) + .stack() + .rename("count") + .reset_index() + .rename(columns={"level_0": "true", "level_1": "pred"}) + ) + source = ColumnDataSource(cm_df) + + mapper = LinearColorMapper(palette=YlOrRd9, low=float(cm.min()), high=float(max(cm.max(), 1))) + p = figure( + title=title, + x_range=classes, + y_range=list(reversed(classes)), + width=750, + height=650, + tools="pan,wheel_zoom,box_zoom,reset,save", + ) + renderer = p.rect( + x="pred", + y="true", + width=1, + height=1, + source=source, + line_color="white", + fill_color={"field": "count", "transform": mapper}, + ) + p.text(x="pred", y="true", text="count", source=source, text_align="center", text_baseline="middle") + p.xaxis.axis_label = "Predicted" + p.yaxis.axis_label = "True" + p.xaxis.major_label_orientation = 0.8 + p.add_tools(HoverTool(renderers=[renderer], tooltips=[("True", "@true"), ("Predicted", "@pred"), ("Count", "@count")])) + p.add_layout( + ColorBar(color_mapper=mapper, ticker=BasicTicker(desired_num_ticks=6), label_standoff=8, location=(0, 0)), + "right", + ) + return p + @staticmethod def _performance_distribution_colors() -> dict: return {"pred": "#2255aa", "true": "#ffbb00"} @staticmethod - def _plotly_html(fig) -> str: - return render_plotly_pane_html(fig) + def _bokeh_html(fig) -> str: + return render_bokeh_pane_html(fig) @staticmethod def _render_key_value_rows(items: dict) -> str: diff --git a/shapash/report/smart_report/panel_support.py b/shapash/report/smart_report/panel_support.py index 4eb9a902..4309ff57 100644 --- a/shapash/report/smart_report/panel_support.py +++ b/shapash/report/smart_report/panel_support.py @@ -3,28 +3,26 @@ from __future__ import annotations import re -from functools import lru_cache import panel as pn from panel.io.resources import CDN_DIST, Resources -def render_plotly_pane_html(fig) -> str: - """Render a Plotly figure as a standalone Panel fragment.""" - _enable_panel_plotly() - pane = pn.pane.Plotly(fig, config={"responsive": True}) +def render_bokeh_pane_html(fig) -> str: + """Render a Bokeh figure as a standalone Panel fragment.""" + _enable_panel_extensions() + pane = pn.pane.Bokeh(fig) bundle = pane._repr_mimebundle_() data = bundle[0] if isinstance(bundle, tuple) else bundle html = data.get("text/html") if not html: - raise ValueError("Panel Plotly pane did not return HTML output.") + raise ValueError("Panel Bokeh pane did not return HTML output.") return f'
{html}
' -@lru_cache(maxsize=1) def panel_resource_tags() -> str: """Return the CSS and JS tags required to hydrate Panel panes.""" - _enable_panel_plotly() + _enable_panel_extensions() resources = Resources(mode="cdn") css_html = _normalize_panel_css(resources.render_css()) js_html = resources.render_js() if callable(resources.render_js) else resources.render_js @@ -32,8 +30,8 @@ def panel_resource_tags() -> str: return "\n".join(part for part in [css_html, js_html] if part) -def _enable_panel_plotly() -> None: - pn.extension("plotly") +def _enable_panel_extensions() -> None: + pn.extension() def _normalize_panel_css(css_html: str) -> str: diff --git a/tutorial/generate_report/demo.py b/tutorial/generate_report/demo.py index 44ad593f..c1fe4174 100644 --- a/tutorial/generate_report/demo.py +++ b/tutorial/generate_report/demo.py @@ -3,8 +3,10 @@ from pathlib import Path import pandas as pd -import plotly.express as px import yaml +from bokeh.models import BasicTicker, ColorBar, ColumnDataSource, HoverTool, LinearColorMapper, Span +from bokeh.palettes import RdYlBu11 +from bokeh.plotting import figure from category_encoders import OrdinalEncoder from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split @@ -81,10 +83,57 @@ def block_relationship_target( df_train = self.x_train_pre.copy() df_train[target_name] = self.y_train - fig = px.box(df_train, x=feature, y=target_name) + grouped = df_train.groupby(feature)[target_name] + q1 = grouped.quantile(0.25) + q2 = grouped.quantile(0.5) + q3 = grouped.quantile(0.75) + iqr = q3 - q1 + upper = (q3 + 1.5 * iqr).clip(upper=grouped.max()) + lower = (q1 - 1.5 * iqr).clip(lower=grouped.min()) + + cats = [str(c) for c in q1.index.tolist()] + source = ColumnDataSource( + data={ + "cat": cats, + "q1": q1.values, + "q2": q2.values, + "q3": q3.values, + "upper": upper.values, + "lower": lower.values, + } + ) + + p = figure( + title=title, + x_range=cats, + width=900, + height=500, + tools="pan,wheel_zoom,box_zoom,reset,save", + ) + p.segment("cat", "upper", "cat", "q3", source=source, line_color="#444444") + p.segment("cat", "lower", "cat", "q1", source=source, line_color="#444444") + p.vbar("cat", 0.7, "q2", "q3", source=source, fill_color="#9ecae1", line_color="#2b8cbe") + p.vbar("cat", 0.7, "q1", "q2", source=source, fill_color="#fdd0a2", line_color="#d95f0e") + p.rect("cat", "lower", 0.2, 0.001, source=source, line_color="#444444") + p.rect("cat", "upper", 0.2, 0.001, source=source, line_color="#444444") + p.add_tools( + HoverTool( + tooltips=[ + (feature, "@cat"), + ("Q1", "@q1{0,0.00}"), + ("Median", "@q2{0,0.00}"), + ("Q3", "@q3{0,0.00}"), + ("Lower", "@lower{0,0.00}"), + ("Upper", "@upper{0,0.00}"), + ] + ) + ) + p.xaxis.major_label_orientation = 0.8 + p.xaxis.axis_label = feature + p.yaxis.axis_label = target_name if max_y is not None: - fig.update_yaxes(range=[0, max_y]) - return self._wrap_section_content(title, self._plotly_html(fig)) + p.y_range.end = max_y + return self._wrap_section_content("", self._bokeh_html(p)) def block_training_correlations( self, @@ -100,8 +149,54 @@ def block_training_correlations( if max_features > 0 and corr.shape[0] > max_features: corr = corr.iloc[:max_features, :max_features] - fig = px.imshow(corr, color_continuous_scale="YlGnBu", zmin=-1, zmax=1, aspect="auto") - return self._wrap_section_content(title, self._plotly_html(fig)) + corr = corr.fillna(0.0) + x_labels = list(corr.columns) + y_labels = list(corr.index) + corr_long = ( + corr.stack() + .rename("corr") + .reset_index() + .rename(columns={"level_0": "y", "level_1": "x"}) + ) + source = ColumnDataSource(corr_long) + + color_mapper = LinearColorMapper(palette=list(reversed(RdYlBu11)), low=-1, high=1) + p = figure( + title=title, + x_range=x_labels, + y_range=list(reversed(y_labels)), + width=950, + height=650, + tools="pan,wheel_zoom,box_zoom,reset,save", + toolbar_location="right", + ) + renderer = p.rect( + x="x", + y="y", + width=1, + height=1, + source=source, + line_color=None, + fill_color={"field": "corr", "transform": color_mapper}, + ) + + p.add_tools( + HoverTool( + renderers=[renderer], + tooltips=[("Feature X", "@x"), ("Feature Y", "@y"), ("Correlation", "@corr{0.000}")], + ) + ) + p.xaxis.major_label_orientation = 0.9 + p.grid.grid_line_color = None + + color_bar = ColorBar( + color_mapper=color_mapper, + ticker=BasicTicker(desired_num_ticks=7), + label_standoff=8, + location=(0, 0), + ) + p.add_layout(color_bar, "right") + return self._wrap_section_content("", self._bokeh_html(p)) def block_performance_metrics( self, @@ -126,13 +221,58 @@ def block_performance_metrics( return self.block_badge_row(title=title, badges=metric_items) + def block_feature_importance(self, title: str = "Model explainability", color: str = "gold", label=None): + explainer = self._require_explainer("feature_importance") + if getattr(explainer, "features_imp", None) is None: + explainer.compute_features_import() + + features_imp = explainer.features_imp + if isinstance(features_imp, list): + if not features_imp: + raise ValueError("features_imp is empty.") + features_imp = features_imp[0] + + top_n = 20 + ordered = features_imp.sort_values(ascending=False).head(top_n) + display_names = [explainer.features_dict.get(name, name) for name in ordered.index.tolist()] + source = ColumnDataSource( + data={ + "feature": list(reversed(display_names)), + "importance": list(reversed(ordered.values.tolist())), + } + ) + + p = figure( + title=title, + y_range=list(reversed(display_names)), + width=900, + height=560, + tools="pan,wheel_zoom,box_zoom,reset,save", + ) + renderer = p.hbar(y="feature", right="importance", height=0.7, source=source, color="#ffbb00") + p.xaxis.axis_label = "Importance" + p.yaxis.axis_label = "Feature" + p.grid.grid_line_alpha = 0.25 + p.add_tools(HoverTool(renderers=[renderer], tooltips=[("Feature", "@feature"), ("Importance", "@importance{0.00}")])) + return self._wrap_section_content("", self._bokeh_html(p)) + def block_pred_vs_true(self, title: str = "y_pred vs y_test", color: str = "orange"): if self.y_test is None or self.y_pred is None: raise ValueError("pred_vs_true block requires y_test and y_pred.") scatter_df = pd.DataFrame({"y_test": self.y_test, "y_pred": self.y_pred}) - fig = px.scatter(scatter_df, x="y_test", y="y_pred") - return self._wrap_section_content(title, self._plotly_html(fig)) + source = ColumnDataSource(scatter_df) + min_v = float(min(scatter_df["y_test"].min(), scatter_df["y_pred"].min())) + max_v = float(max(scatter_df["y_test"].max(), scatter_df["y_pred"].max())) + p = figure(title=title, width=900, height=500, tools="pan,wheel_zoom,box_zoom,reset,save") + p.scatter("y_test", "y_pred", source=source, size=7, alpha=0.6, color="#2255aa") + ref_line = Span(location=0, dimension="width") + p.renderers.append(ref_line) + p.line([min_v, max_v], [min_v, max_v], line_dash="dashed", color="#777777", line_width=2) + p.xaxis.axis_label = "y_test" + p.yaxis.axis_label = "y_pred" + p.add_tools(HoverTool(tooltips=[("y_test", "@y_test{0,0.00}"), ("y_pred", "@y_pred{0,0.00}")])) + return self._wrap_section_content("", self._bokeh_html(p)) def build_house_prices_explainer() -> tuple[SmartExplainer, pd.DataFrame, pd.Series, pd.Series]: diff --git a/tutorial/generate_report/report_config_v1.yml b/tutorial/generate_report/report_config_v1.yml index 6ac930ca..3622a80a 100644 --- a/tutorial/generate_report/report_config_v1.yml +++ b/tutorial/generate_report/report_config_v1.yml @@ -79,3 +79,14 @@ sections: You can add as many graphs, text, or other cells as you want. The code will not be displayed. Only the markdown and output of the cells will be shown on the generated html file. color: gray + + - type: feature_distribution + params: + title: "Feature distribution" + feature: "OverallQual" + color: blue + dataset_split: "data_train_test" + prediction_label: "test" + training_label: "train" + width: 900 + height: 500 From 607263be9abd9cb0d303ef0b6a1b9468e2f399e7 Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Thu, 7 May 2026 11:56:23 +0200 Subject: [PATCH 13/43] Revert "using bokeh instead of ploty" This reverts commit 3e5fb8bbcf453f39077fb39e6b4c4a1f3e8bfe99. --- shapash/report/smart_report/assets.py | 2 +- shapash/report/smart_report/blocks.py | 511 ++---------------- shapash/report/smart_report/panel_support.py | 18 +- tutorial/generate_report/demo.py | 156 +----- tutorial/generate_report/report_config_v1.yml | 11 - 5 files changed, 55 insertions(+), 643 deletions(-) diff --git a/shapash/report/smart_report/assets.py b/shapash/report/smart_report/assets.py index 3867b522..29bdcbc6 100644 --- a/shapash/report/smart_report/assets.py +++ b/shapash/report/smart_report/assets.py @@ -103,7 +103,7 @@ .shapash-divider { border-bottom: 1px solid #eee; margin: 50px 0; } .scroll-section { scroll-margin-top: 40px; } .panel-plot { width: 100%; overflow-x: auto; } - .panel-plot .bk-root { width: 100%; max-width: 100%; } + .panel-plot .bk-root, .panel-plot .plotly-graph-div { width: 100%; max-width: 100%; } @media (max-width: 900px) { .sidebar { display: none; } .container { margin-left: 0; padding: 30px 40px 40vh; } diff --git a/shapash/report/smart_report/blocks.py b/shapash/report/smart_report/blocks.py index 80510fd2..f2defdd6 100644 --- a/shapash/report/smart_report/blocks.py +++ b/shapash/report/smart_report/blocks.py @@ -2,17 +2,12 @@ from __future__ import annotations -import numpy as np import pandas as pd -from bokeh.models import BasicTicker, ColorBar, ColumnDataSource, HoverTool, LinearColorMapper, TabPanel, Tabs -from bokeh.palettes import RdYlBu11, YlOrRd9 -from bokeh.plotting import figure -from bokeh.transform import dodge, jitter -from pandas.api.types import is_numeric_dtype -from sklearn.metrics import confusion_matrix +from shapash.plots.plot_evaluation_metrics import plot_confusion_matrix +from shapash.plots.plot_univariate import plot_distribution from shapash.report.data_analysis import perform_global_dataframe_analysis -from shapash.report.smart_report.panel_support import render_bokeh_pane_html +from shapash.report.smart_report.panel_support import render_plotly_pane_html from shapash.report.smart_report.validation import stats_to_table from shapash.utils.transform import apply_postprocessing, handle_categorical_missing, inverse_transform from shapash.utils.utils import compute_sorted_variables_interactions_list_indices @@ -104,15 +99,15 @@ def block_feature_distribution( if feature not in self.df_train_test.columns: raise ValueError(f"Unknown feature '{feature}' for feature_distribution block.") - fig = self._feature_distribution_bokeh( - feature=feature, - dataset_split=dataset_split, - prediction_label=prediction_label, - training_label=training_label, + fig = plot_distribution( + df_all=self.df_train_test, + col=feature, + hue=dataset_split, + colors_dict=self._feature_distribution_colors(), width=width, height=height, ) - return self._wrap_section_content(title or self._feature_label(feature), self._bokeh_html(fig)) + return self._wrap_section_content(title or self._feature_label(feature), self._plotly_html(fig)) def block_correlations_plot( self, @@ -128,22 +123,23 @@ def block_correlations_plot( faceted by dataset split. """ self._require_train_test_data("correlations_plot") + explainer = self._require_explainer("correlations_plot") resolved_width = width or (900 if len(self.df_train_test["data_train_test"].unique()) > 1 else 500) - fig = self._correlations_bokeh( - df=self.df_train_test, - split_col="data_train_test", + fig = explainer.plot.correlations_plot( + self.df_train_test, + optimized=True, + facet_col="data_train_test", max_features=max_features, width=resolved_width, height=height, - title=title, ) - return self._wrap_section_content("", self._bokeh_html(fig)) + return self._wrap_section_content(title, self._plotly_html(fig)) def block_feature_importance(self, title: str = "", color: str = "green", label=None) -> str: """Return the HTML for the explainer feature-importance plot.""" explainer = self._require_explainer("feature_importance") - fig = self._feature_importance_bokeh(explainer=explainer, title=title) - return self._wrap_section_content("", self._bokeh_html(fig)) + fig = explainer.plot.features_importance(label=label) + return self._wrap_section_content(title, self._plotly_html(fig)) def block_contribution_plot( self, @@ -159,13 +155,11 @@ def block_contribution_plot( maximum point count when no explicit limit is provided. """ explainer = self._require_explainer("contribution_plot") - fig = self._contribution_bokeh( - explainer=explainer, - feature=feature, - max_points=max_points or self.max_points, - title=title or self._feature_label(feature), - ) - return self._wrap_section_content("", self._bokeh_html(fig)) + fig = explainer.plot.contribution_plot(feature, label=label, max_points=max_points or self.max_points) + for trace in fig.data: + if trace.type == "bar": + trace.marker.color = "lightgrey" + return self._wrap_section_content(title or self._feature_label(feature), self._plotly_html(fig)) def block_interactions_plot( self, @@ -182,14 +176,11 @@ def block_interactions_plot( """ explainer = self._require_explainer("interactions_plot") feature_one, feature_two = self._resolve_interaction_pair(col1, col2) - fig = self._interactions_bokeh( - explainer=explainer, - col1=feature_one, - col2=feature_two, - max_points=max_points or self.max_points, - title=title or f"{self._feature_label(feature_one)} / {self._feature_label(feature_two)}", + fig = explainer.plot.interactions_plot( + col1=feature_one, col2=feature_two, max_points=max_points or self.max_points ) - return self._wrap_section_content("", self._bokeh_html(fig)) + resolved_title = title or f"{self._feature_label(feature_one)} / {self._feature_label(feature_two)}" + return self._wrap_section_content(resolved_title, self._plotly_html(fig)) def block_target_distribution( self, @@ -214,14 +205,15 @@ def block_target_distribution( pd.DataFrame({target_name: self.y_test}).assign(_dataset="true"), ] ).reset_index(drop=True) - fig = self._target_distribution_bokeh( - df_target=df_target, - target_name=target_name, + fig = plot_distribution( + df_all=df_target, + col=target_name, + hue="_dataset", + colors_dict=self._performance_distribution_colors(), width=width, height=height, - title=title or "Target distribution", ) - return self._wrap_section_content("", self._bokeh_html(fig)) + return self._wrap_section_content(title or "Target distribution", self._plotly_html(fig)) def block_confusion_matrix(self, title: str = "", color: str = "orange") -> str: """Return the HTML for a classification confusion matrix. @@ -232,12 +224,8 @@ def block_confusion_matrix(self, title: str = "", color: str = "orange") -> str: explainer = self._require_explainer("confusion_matrix") if self.y_test is None or self.y_pred is None: raise ValueError("confusion_matrix block requires y_test and predicted values from the explainer.") - fig = self._confusion_matrix_bokeh( - y_true=self.y_test, - y_pred=self.y_pred, - title=title or "Confusion matrix", - ) - return self._wrap_section_content("", self._bokeh_html(fig)) + fig = plot_confusion_matrix(y_true=self.y_test, y_pred=self.y_pred, colors_dict=explainer.colors_dict) + return self._wrap_section_content(title or "Confusion matrix", self._plotly_html(fig)) def _preprocess_train_data(self, x_train: pd.DataFrame | None) -> pd.DataFrame | None: if x_train is None or self.explainer is None: @@ -307,440 +295,13 @@ def _feature_distribution_colors(self) -> dict: explainer = self._require_explainer("feature_distribution") return explainer.colors_dict["report_feature_distribution"] - @staticmethod - def _palette_color(index: int, fallback: list[str]) -> str: - return fallback[index % len(fallback)] - - def _resolve_split_colors(self, split_values: list[str], colors_dict: dict | None = None) -> dict[str, str]: - base = colors_dict or {} - fallback = ["#2255aa", "#ffbb00", "#666666", "#44aa99", "#cc6677"] - return { - split: base.get(split, self._palette_color(idx, fallback)) - for idx, split in enumerate(split_values) - } - - def _feature_distribution_bokeh( - self, - feature: str, - dataset_split: str, - prediction_label: str, - training_label: str, - width: int, - height: int, - ): - df_plot = self.df_train_test[[feature, dataset_split]].dropna().copy() - if df_plot.empty: - raise ValueError(f"No data available for feature '{feature}' after dropping missing values.") - - label_map = {"test": prediction_label, "train": training_label} - split_values = [str(val) for val in df_plot[dataset_split].dropna().unique().tolist()] - split_colors = self._resolve_split_colors(split_values, self._feature_distribution_colors()) - - tools = "pan,wheel_zoom,box_zoom,reset,save" - title = self._feature_label(feature) - if is_numeric_dtype(df_plot[feature]): - bins = min(max(10, int(np.sqrt(len(df_plot)))), 40) - p = figure(width=width, height=height, title=title, tools=tools) - p.yaxis.axis_label = "Count" - p.xaxis.axis_label = title - - for split in split_values: - subset = df_plot[df_plot[dataset_split].astype(str) == split][feature].astype(float) - if subset.empty: - continue - hist, edges = np.histogram(subset, bins=bins) - source = ColumnDataSource( - data={ - "left": edges[:-1], - "right": edges[1:], - "top": hist, - "split": [label_map.get(split, split)] * len(hist), - } - ) - renderer = p.quad( - source=source, - top="top", - bottom=0, - left="left", - right="right", - fill_color=split_colors[split], - line_color=split_colors[split], - fill_alpha=0.35, - legend_label=label_map.get(split, split), - muted_alpha=0.1, - ) - p.add_tools( - HoverTool( - renderers=[renderer], - tooltips=[ - ("Split", "@split"), - ("Bin", "@left{0.000} - @right{0.000}"), - ("Count", "@top"), - ], - ) - ) - - p.legend.click_policy = "mute" - return p - - categories = ( - df_plot[feature] - .astype(str) - .value_counts() - .head(15) - .index.tolist() - ) - filtered = df_plot[df_plot[feature].astype(str).isin(categories)].copy() - p = figure(x_range=categories, width=width, height=height, title=title, tools=tools) - p.yaxis.axis_label = "Count" - p.xaxis.axis_label = title - - n_splits = max(len(split_values), 1) - bar_width = 0.8 / n_splits - start = -0.4 + (bar_width / 2) - - for idx, split in enumerate(split_values): - subset = filtered[filtered[dataset_split].astype(str) == split] - counts = subset[feature].astype(str).value_counts().reindex(categories, fill_value=0) - source = ColumnDataSource( - data={ - "category": categories, - "count": counts.values, - "split": [label_map.get(split, split)] * len(categories), - } - ) - renderer = p.vbar( - x=dodge("category", start + idx * bar_width, range=p.x_range), - top="count", - width=bar_width, - source=source, - color=split_colors[split], - line_color=split_colors[split], - fill_alpha=0.8, - legend_label=label_map.get(split, split), - muted_alpha=0.1, - ) - p.add_tools( - HoverTool( - renderers=[renderer], - tooltips=[ - ("Split", "@split"), - ("Category", "@category"), - ("Count", "@count"), - ], - ) - ) - - p.xaxis.major_label_orientation = 0.8 - p.legend.click_policy = "mute" - return p - - def _correlation_heatmap_figure(self, corr: pd.DataFrame, title: str, width: int, height: int): - corr = corr.fillna(0.0) - labels = list(corr.columns) - corr_long = corr.stack().rename("corr").reset_index().rename(columns={"level_0": "y", "level_1": "x"}) - source = ColumnDataSource(corr_long) - mapper = LinearColorMapper(palette=list(reversed(RdYlBu11)), low=-1, high=1) - - p = figure( - title=title, - x_range=labels, - y_range=list(reversed(labels)), - width=width, - height=height, - tools="pan,wheel_zoom,box_zoom,reset,save", - ) - renderer = p.rect( - x="x", - y="y", - width=1, - height=1, - source=source, - line_color=None, - fill_color={"field": "corr", "transform": mapper}, - ) - p.add_tools( - HoverTool( - renderers=[renderer], - tooltips=[("Feature X", "@x"), ("Feature Y", "@y"), ("Correlation", "@corr{0.000}")], - ) - ) - p.xaxis.major_label_orientation = 0.9 - p.grid.grid_line_color = None - p.add_layout( - ColorBar(color_mapper=mapper, ticker=BasicTicker(desired_num_ticks=7), label_standoff=8, location=(0, 0)), - "right", - ) - return p - - def _correlations_bokeh( - self, - df: pd.DataFrame, - split_col: str, - max_features: int, - width: int, - height: int, - title: str, - ): - numeric_cols = [col for col in df.select_dtypes(include="number").columns if col != split_col] - if not numeric_cols: - raise ValueError("No numeric feature available to compute correlations.") - - if max_features > 0: - numeric_cols = numeric_cols[:max_features] - - split_values = [str(v) for v in df[split_col].dropna().unique().tolist()] if split_col in df.columns else [] - if not split_values: - corr = df[numeric_cols].corr(numeric_only=True) - return self._correlation_heatmap_figure(corr, title=title or "Correlations", width=width, height=height) - - panels = [] - for split in split_values: - subset = df[df[split_col].astype(str) == split][numeric_cols] - corr = subset.corr(numeric_only=True) - panel_title = f"{title} - {split}" if title else f"Correlations - {split}" - panels.append(TabPanel(child=self._correlation_heatmap_figure(corr, panel_title, width, height), title=split)) - return Tabs(tabs=panels) - - def _feature_importance_bokeh(self, explainer, title: str): - if getattr(explainer, "features_imp", None) is None: - explainer.compute_features_import() - features_imp = explainer.features_imp - if isinstance(features_imp, list): - if not features_imp: - raise ValueError("features_imp is empty.") - features_imp = features_imp[0] - - ordered = features_imp.sort_values(ascending=False).head(20) - labels = [explainer.features_dict.get(name, name) for name in ordered.index.tolist()] - source = ColumnDataSource(data={"feature": list(reversed(labels)), "importance": list(reversed(ordered.values))}) - p = figure( - title=title or "Feature importance", - y_range=list(reversed(labels)), - width=900, - height=560, - tools="pan,wheel_zoom,box_zoom,reset,save", - ) - renderer = p.hbar(y="feature", right="importance", height=0.7, source=source, color="#ffbb00") - p.xaxis.axis_label = "Importance" - p.yaxis.axis_label = "Feature" - p.grid.grid_line_alpha = 0.25 - p.add_tools(HoverTool(renderers=[renderer], tooltips=[("Feature", "@feature"), ("Importance", "@importance{0.00}")])) - return p - - @staticmethod - def _select_contrib_frame(contributions, label=None): - if isinstance(contributions, list): - if not contributions: - raise ValueError("Contributions list is empty.") - if isinstance(label, int) and 0 <= label < len(contributions): - return contributions[label] - return contributions[0] - return contributions - - def _contribution_bokeh(self, explainer, feature: str, max_points: int, title: str): - contrib_df = self._select_contrib_frame(explainer.contributions) - if feature not in contrib_df.columns or feature not in explainer.x_init.columns: - raise ValueError(f"Unknown feature '{feature}' for contribution_plot block.") - - plot_df = pd.DataFrame({ - "feature_value": explainer.x_init[feature], - "contribution": contrib_df[feature], - }).dropna() - if max_points and len(plot_df) > max_points: - plot_df = plot_df.sample(n=max_points, random_state=0) - - tools = "pan,wheel_zoom,box_zoom,reset,save" - if is_numeric_dtype(plot_df["feature_value"]): - source = ColumnDataSource(plot_df) - p = figure(title=title, width=900, height=500, tools=tools) - renderer = p.scatter("feature_value", "contribution", source=source, size=6, alpha=0.55, color="#777777") - p.xaxis.axis_label = self._feature_label(feature) - p.yaxis.axis_label = "Contribution" - p.add_tools( - HoverTool( - renderers=[renderer], - tooltips=[(self._feature_label(feature), "@feature_value"), ("Contribution", "@contribution{0.0000}")], - ) - ) - return p - - plot_df["feature_value"] = plot_df["feature_value"].astype(str) - top = plot_df["feature_value"].value_counts().head(20).index.tolist() - plot_df = plot_df[plot_df["feature_value"].isin(top)] - source = ColumnDataSource(plot_df) - p = figure(title=title, x_range=top, width=900, height=500, tools=tools) - renderer = p.scatter( - x=jitter("feature_value", width=0.35, range=p.x_range), - y="contribution", - source=source, - size=6, - alpha=0.5, - color="#777777", - ) - p.xaxis.major_label_orientation = 0.8 - p.xaxis.axis_label = self._feature_label(feature) - p.yaxis.axis_label = "Contribution" - p.add_tools( - HoverTool( - renderers=[renderer], - tooltips=[(self._feature_label(feature), "@feature_value"), ("Contribution", "@contribution{0.0000}")], - ) - ) - return p - - def _interactions_bokeh(self, explainer, col1: str, col2: str, max_points: int, title: str): - if col1 not in explainer.x_init.columns or col2 not in explainer.x_init.columns: - raise ValueError(f"Unknown interaction pair '{col1}', '{col2}'.") - - plot_df = explainer.x_init[[col1, col2]].dropna().copy() - if max_points and len(plot_df) > max_points: - plot_df = plot_df.sample(n=max_points, random_state=0) - - source = ColumnDataSource(plot_df.rename(columns={col1: "x", col2: "y"})) - p = figure(title=title, width=900, height=500, tools="pan,wheel_zoom,box_zoom,reset,save") - renderer = p.scatter("x", "y", source=source, size=6, alpha=0.55, color="#2255aa") - p.xaxis.axis_label = self._feature_label(col1) - p.yaxis.axis_label = self._feature_label(col2) - p.add_tools(HoverTool(renderers=[renderer], tooltips=[(self._feature_label(col1), "@x"), (self._feature_label(col2), "@y")])) - return p - - def _target_distribution_bokeh(self, df_target: pd.DataFrame, target_name: str, width: int, height: int, title: str): - split_col = "_dataset" - split_values = [str(v) for v in df_target[split_col].dropna().unique().tolist()] - label_map = {"pred": "pred", "true": "true"} - split_colors = self._resolve_split_colors(split_values, self._performance_distribution_colors()) - tools = "pan,wheel_zoom,box_zoom,reset,save" - - if is_numeric_dtype(df_target[target_name]): - bins = min(max(10, int(np.sqrt(len(df_target)))), 40) - p = figure(width=width, height=height, title=title, tools=tools) - p.yaxis.axis_label = "Count" - p.xaxis.axis_label = target_name - for split in split_values: - subset = df_target[df_target[split_col].astype(str) == split][target_name].astype(float) - if subset.empty: - continue - hist, edges = np.histogram(subset, bins=bins) - source = ColumnDataSource( - data={ - "left": edges[:-1], - "right": edges[1:], - "top": hist, - "split": [label_map.get(split, split)] * len(hist), - } - ) - renderer = p.quad( - source=source, - top="top", - bottom=0, - left="left", - right="right", - fill_color=split_colors[split], - line_color=split_colors[split], - fill_alpha=0.35, - legend_label=label_map.get(split, split), - muted_alpha=0.1, - ) - p.add_tools( - HoverTool( - renderers=[renderer], - tooltips=[("Split", "@split"), ("Bin", "@left{0.000} - @right{0.000}"), ("Count", "@top")], - ) - ) - p.legend.click_policy = "mute" - return p - - categories = df_target[target_name].astype(str).value_counts().head(15).index.tolist() - filtered = df_target[df_target[target_name].astype(str).isin(categories)].copy() - p = figure(x_range=categories, width=width, height=height, title=title, tools=tools) - p.yaxis.axis_label = "Count" - p.xaxis.axis_label = target_name - - n_splits = max(len(split_values), 1) - bar_width = 0.8 / n_splits - start = -0.4 + (bar_width / 2) - for idx, split in enumerate(split_values): - subset = filtered[filtered[split_col].astype(str) == split] - counts = subset[target_name].astype(str).value_counts().reindex(categories, fill_value=0) - source = ColumnDataSource( - data={ - "category": categories, - "count": counts.values, - "split": [label_map.get(split, split)] * len(categories), - } - ) - renderer = p.vbar( - x=dodge("category", start + idx * bar_width, range=p.x_range), - top="count", - width=bar_width, - source=source, - color=split_colors[split], - line_color=split_colors[split], - fill_alpha=0.8, - legend_label=label_map.get(split, split), - muted_alpha=0.1, - ) - p.add_tools( - HoverTool( - renderers=[renderer], - tooltips=[("Split", "@split"), ("Category", "@category"), ("Count", "@count")], - ) - ) - p.xaxis.major_label_orientation = 0.8 - p.legend.click_policy = "mute" - return p - - def _confusion_matrix_bokeh(self, y_true, y_pred, title: str): - true_series = pd.Series(y_true).astype(str) - pred_series = pd.Series(y_pred).astype(str) - classes = sorted(set(true_series.unique()) | set(pred_series.unique())) - cm = confusion_matrix(true_series, pred_series, labels=classes) - cm_df = ( - pd.DataFrame(cm, index=classes, columns=classes) - .stack() - .rename("count") - .reset_index() - .rename(columns={"level_0": "true", "level_1": "pred"}) - ) - source = ColumnDataSource(cm_df) - - mapper = LinearColorMapper(palette=YlOrRd9, low=float(cm.min()), high=float(max(cm.max(), 1))) - p = figure( - title=title, - x_range=classes, - y_range=list(reversed(classes)), - width=750, - height=650, - tools="pan,wheel_zoom,box_zoom,reset,save", - ) - renderer = p.rect( - x="pred", - y="true", - width=1, - height=1, - source=source, - line_color="white", - fill_color={"field": "count", "transform": mapper}, - ) - p.text(x="pred", y="true", text="count", source=source, text_align="center", text_baseline="middle") - p.xaxis.axis_label = "Predicted" - p.yaxis.axis_label = "True" - p.xaxis.major_label_orientation = 0.8 - p.add_tools(HoverTool(renderers=[renderer], tooltips=[("True", "@true"), ("Predicted", "@pred"), ("Count", "@count")])) - p.add_layout( - ColorBar(color_mapper=mapper, ticker=BasicTicker(desired_num_ticks=6), label_standoff=8, location=(0, 0)), - "right", - ) - return p - @staticmethod def _performance_distribution_colors() -> dict: return {"pred": "#2255aa", "true": "#ffbb00"} @staticmethod - def _bokeh_html(fig) -> str: - return render_bokeh_pane_html(fig) + def _plotly_html(fig) -> str: + return render_plotly_pane_html(fig) @staticmethod def _render_key_value_rows(items: dict) -> str: diff --git a/shapash/report/smart_report/panel_support.py b/shapash/report/smart_report/panel_support.py index 4309ff57..4eb9a902 100644 --- a/shapash/report/smart_report/panel_support.py +++ b/shapash/report/smart_report/panel_support.py @@ -3,26 +3,28 @@ from __future__ import annotations import re +from functools import lru_cache import panel as pn from panel.io.resources import CDN_DIST, Resources -def render_bokeh_pane_html(fig) -> str: - """Render a Bokeh figure as a standalone Panel fragment.""" - _enable_panel_extensions() - pane = pn.pane.Bokeh(fig) +def render_plotly_pane_html(fig) -> str: + """Render a Plotly figure as a standalone Panel fragment.""" + _enable_panel_plotly() + pane = pn.pane.Plotly(fig, config={"responsive": True}) bundle = pane._repr_mimebundle_() data = bundle[0] if isinstance(bundle, tuple) else bundle html = data.get("text/html") if not html: - raise ValueError("Panel Bokeh pane did not return HTML output.") + raise ValueError("Panel Plotly pane did not return HTML output.") return f'
{html}
' +@lru_cache(maxsize=1) def panel_resource_tags() -> str: """Return the CSS and JS tags required to hydrate Panel panes.""" - _enable_panel_extensions() + _enable_panel_plotly() resources = Resources(mode="cdn") css_html = _normalize_panel_css(resources.render_css()) js_html = resources.render_js() if callable(resources.render_js) else resources.render_js @@ -30,8 +32,8 @@ def panel_resource_tags() -> str: return "\n".join(part for part in [css_html, js_html] if part) -def _enable_panel_extensions() -> None: - pn.extension() +def _enable_panel_plotly() -> None: + pn.extension("plotly") def _normalize_panel_css(css_html: str) -> str: diff --git a/tutorial/generate_report/demo.py b/tutorial/generate_report/demo.py index c1fe4174..44ad593f 100644 --- a/tutorial/generate_report/demo.py +++ b/tutorial/generate_report/demo.py @@ -3,10 +3,8 @@ from pathlib import Path import pandas as pd +import plotly.express as px import yaml -from bokeh.models import BasicTicker, ColorBar, ColumnDataSource, HoverTool, LinearColorMapper, Span -from bokeh.palettes import RdYlBu11 -from bokeh.plotting import figure from category_encoders import OrdinalEncoder from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split @@ -83,57 +81,10 @@ def block_relationship_target( df_train = self.x_train_pre.copy() df_train[target_name] = self.y_train - grouped = df_train.groupby(feature)[target_name] - q1 = grouped.quantile(0.25) - q2 = grouped.quantile(0.5) - q3 = grouped.quantile(0.75) - iqr = q3 - q1 - upper = (q3 + 1.5 * iqr).clip(upper=grouped.max()) - lower = (q1 - 1.5 * iqr).clip(lower=grouped.min()) - - cats = [str(c) for c in q1.index.tolist()] - source = ColumnDataSource( - data={ - "cat": cats, - "q1": q1.values, - "q2": q2.values, - "q3": q3.values, - "upper": upper.values, - "lower": lower.values, - } - ) - - p = figure( - title=title, - x_range=cats, - width=900, - height=500, - tools="pan,wheel_zoom,box_zoom,reset,save", - ) - p.segment("cat", "upper", "cat", "q3", source=source, line_color="#444444") - p.segment("cat", "lower", "cat", "q1", source=source, line_color="#444444") - p.vbar("cat", 0.7, "q2", "q3", source=source, fill_color="#9ecae1", line_color="#2b8cbe") - p.vbar("cat", 0.7, "q1", "q2", source=source, fill_color="#fdd0a2", line_color="#d95f0e") - p.rect("cat", "lower", 0.2, 0.001, source=source, line_color="#444444") - p.rect("cat", "upper", 0.2, 0.001, source=source, line_color="#444444") - p.add_tools( - HoverTool( - tooltips=[ - (feature, "@cat"), - ("Q1", "@q1{0,0.00}"), - ("Median", "@q2{0,0.00}"), - ("Q3", "@q3{0,0.00}"), - ("Lower", "@lower{0,0.00}"), - ("Upper", "@upper{0,0.00}"), - ] - ) - ) - p.xaxis.major_label_orientation = 0.8 - p.xaxis.axis_label = feature - p.yaxis.axis_label = target_name + fig = px.box(df_train, x=feature, y=target_name) if max_y is not None: - p.y_range.end = max_y - return self._wrap_section_content("", self._bokeh_html(p)) + fig.update_yaxes(range=[0, max_y]) + return self._wrap_section_content(title, self._plotly_html(fig)) def block_training_correlations( self, @@ -149,54 +100,8 @@ def block_training_correlations( if max_features > 0 and corr.shape[0] > max_features: corr = corr.iloc[:max_features, :max_features] - corr = corr.fillna(0.0) - x_labels = list(corr.columns) - y_labels = list(corr.index) - corr_long = ( - corr.stack() - .rename("corr") - .reset_index() - .rename(columns={"level_0": "y", "level_1": "x"}) - ) - source = ColumnDataSource(corr_long) - - color_mapper = LinearColorMapper(palette=list(reversed(RdYlBu11)), low=-1, high=1) - p = figure( - title=title, - x_range=x_labels, - y_range=list(reversed(y_labels)), - width=950, - height=650, - tools="pan,wheel_zoom,box_zoom,reset,save", - toolbar_location="right", - ) - renderer = p.rect( - x="x", - y="y", - width=1, - height=1, - source=source, - line_color=None, - fill_color={"field": "corr", "transform": color_mapper}, - ) - - p.add_tools( - HoverTool( - renderers=[renderer], - tooltips=[("Feature X", "@x"), ("Feature Y", "@y"), ("Correlation", "@corr{0.000}")], - ) - ) - p.xaxis.major_label_orientation = 0.9 - p.grid.grid_line_color = None - - color_bar = ColorBar( - color_mapper=color_mapper, - ticker=BasicTicker(desired_num_ticks=7), - label_standoff=8, - location=(0, 0), - ) - p.add_layout(color_bar, "right") - return self._wrap_section_content("", self._bokeh_html(p)) + fig = px.imshow(corr, color_continuous_scale="YlGnBu", zmin=-1, zmax=1, aspect="auto") + return self._wrap_section_content(title, self._plotly_html(fig)) def block_performance_metrics( self, @@ -221,58 +126,13 @@ def block_performance_metrics( return self.block_badge_row(title=title, badges=metric_items) - def block_feature_importance(self, title: str = "Model explainability", color: str = "gold", label=None): - explainer = self._require_explainer("feature_importance") - if getattr(explainer, "features_imp", None) is None: - explainer.compute_features_import() - - features_imp = explainer.features_imp - if isinstance(features_imp, list): - if not features_imp: - raise ValueError("features_imp is empty.") - features_imp = features_imp[0] - - top_n = 20 - ordered = features_imp.sort_values(ascending=False).head(top_n) - display_names = [explainer.features_dict.get(name, name) for name in ordered.index.tolist()] - source = ColumnDataSource( - data={ - "feature": list(reversed(display_names)), - "importance": list(reversed(ordered.values.tolist())), - } - ) - - p = figure( - title=title, - y_range=list(reversed(display_names)), - width=900, - height=560, - tools="pan,wheel_zoom,box_zoom,reset,save", - ) - renderer = p.hbar(y="feature", right="importance", height=0.7, source=source, color="#ffbb00") - p.xaxis.axis_label = "Importance" - p.yaxis.axis_label = "Feature" - p.grid.grid_line_alpha = 0.25 - p.add_tools(HoverTool(renderers=[renderer], tooltips=[("Feature", "@feature"), ("Importance", "@importance{0.00}")])) - return self._wrap_section_content("", self._bokeh_html(p)) - def block_pred_vs_true(self, title: str = "y_pred vs y_test", color: str = "orange"): if self.y_test is None or self.y_pred is None: raise ValueError("pred_vs_true block requires y_test and y_pred.") scatter_df = pd.DataFrame({"y_test": self.y_test, "y_pred": self.y_pred}) - source = ColumnDataSource(scatter_df) - min_v = float(min(scatter_df["y_test"].min(), scatter_df["y_pred"].min())) - max_v = float(max(scatter_df["y_test"].max(), scatter_df["y_pred"].max())) - p = figure(title=title, width=900, height=500, tools="pan,wheel_zoom,box_zoom,reset,save") - p.scatter("y_test", "y_pred", source=source, size=7, alpha=0.6, color="#2255aa") - ref_line = Span(location=0, dimension="width") - p.renderers.append(ref_line) - p.line([min_v, max_v], [min_v, max_v], line_dash="dashed", color="#777777", line_width=2) - p.xaxis.axis_label = "y_test" - p.yaxis.axis_label = "y_pred" - p.add_tools(HoverTool(tooltips=[("y_test", "@y_test{0,0.00}"), ("y_pred", "@y_pred{0,0.00}")])) - return self._wrap_section_content("", self._bokeh_html(p)) + fig = px.scatter(scatter_df, x="y_test", y="y_pred") + return self._wrap_section_content(title, self._plotly_html(fig)) def build_house_prices_explainer() -> tuple[SmartExplainer, pd.DataFrame, pd.Series, pd.Series]: diff --git a/tutorial/generate_report/report_config_v1.yml b/tutorial/generate_report/report_config_v1.yml index 3622a80a..6ac930ca 100644 --- a/tutorial/generate_report/report_config_v1.yml +++ b/tutorial/generate_report/report_config_v1.yml @@ -79,14 +79,3 @@ sections: You can add as many graphs, text, or other cells as you want. The code will not be displayed. Only the markdown and output of the cells will be shown on the generated html file. color: gray - - - type: feature_distribution - params: - title: "Feature distribution" - feature: "OverallQual" - color: blue - dataset_split: "data_train_test" - prediction_label: "test" - training_label: "train" - width: 900 - height: 500 From 9829803bc64e10ee577e4dd4e1cc9686daf23a06 Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Mon, 11 May 2026 10:32:02 +0200 Subject: [PATCH 14/43] v2 report working --- shapash/explainer/smart_explainer.py | 200 ++++++------ shapash/report/smart_report/assets.py | 178 +---------- shapash/report/smart_report/blocks.py | 131 ++++++++ shapash/report/smart_report/report_script.js | 60 ++++ shapash/report/smart_report/report_styles.css | 285 ++++++++++++++++++ .../test_report_generation.py | 131 +++----- .../explainer/test_smart_explainer.py | 9 +- tutorial/generate_report/demo.py | 116 +------ tutorial/generate_report/report_config_v1.yml | 3 +- 9 files changed, 616 insertions(+), 497 deletions(-) create mode 100644 shapash/report/smart_report/report_script.js create mode 100644 shapash/report/smart_report/report_styles.css diff --git a/shapash/explainer/smart_explainer.py b/shapash/explainer/smart_explainer.py index bb40e7b5..8a184365 100644 --- a/shapash/explainer/smart_explainer.py +++ b/shapash/explainer/smart_explainer.py @@ -6,17 +6,17 @@ import logging import shutil import tempfile +from pathlib import Path import numpy as np import pandas as pd -import panel as pn +import yaml import shapash.explainer.smart_predictor from shapash.backend import BaseBackend, get_backend_cls_from_name from shapash.backend.shap_backend import get_shap_interaction_values from shapash.manipulation.select_lines import keep_right_contributions from shapash.manipulation.summarize import create_grouped_features_values -from shapash.report import check_report_requirements from shapash.style.style_utils import colors_loading, select_palette from shapash.utils.check import ( check_additional_data, @@ -1671,9 +1671,11 @@ def generate_report( Generate an interactive HTML report summarizing the model and its explainability. This method produces a comprehensive HTML report containing visual and textual - insights about the project, dataset, and model performance. - It leverages a predefined or custom Jupyter notebook template to analyze - the model, generate plots, compute metrics, and export the final report. + insights about the project, dataset, and model performance using the + smart_report block-based HTML renderer. + + A report configuration is provided through a YAML file. If no YAML file is + specified, a default configuration is generated automatically. A project information YAML file is required to describe key project details (e.g., model name, author, date, context). @@ -1705,14 +1707,14 @@ def generate_report( Example: `metrics=[{'name': 'F1 score', 'path': 'sklearn.metrics.f1_score'}]` working_dir : str, optional - Directory used to temporarily store generated files (e.g., notebook, outputs). + Directory used to temporarily store generated files (e.g., report config). If `None`, a temporary directory is automatically created and deleted after report generation. notebook_path : str, optional - Path to a custom notebook used as a template for generating the report. - If `None`, the default Shapash report notebook is used. + Path to a custom YAML configuration file used to generate the report. + If `None`, a default YAML configuration is generated. kernel_name : str, optional - Name of the Jupyter kernel to use for report execution. - Useful when multiple kernels are available and the default one is incorrect. + Deprecated parameter kept for backward compatibility. + Ignored by the smart_report implementation. max_points : int, optional, default=200 Maximum number of points displayed in contribution plots. display_interaction_plot : bool, optional, default=False @@ -1735,7 +1737,7 @@ def generate_report( Notes ----- - - The method internally executes a notebook that generates the report content. + - The method renders the report from block definitions in a YAML configuration. - Temporary files are automatically cleaned up unless a custom `working_dir` is provided. - Interaction plots can be disabled to optimize runtime performance. @@ -1757,11 +1759,13 @@ def generate_report( ... nb_top_interactions=5, ... ) """ - check_report_requirements() + from shapash.report.smart_report import ReportBase + if x_train is not None: x_train = handle_categorical_missing(x_train) - # Avoid Import Errors with requirements specific to the Shapash Report - from shapash.report.generation import execute_report, export_and_save_report + + if kernel_name is not None: + logging.warning("'kernel_name' is ignored by the smart report implementation.") rm_working_dir = False if not working_dir: @@ -1775,29 +1779,81 @@ def generate_report( ) try: - execute_report( - working_dir=working_dir, + config = { + "max_points": max_points, + "display_interaction_plot": display_interaction_plot, + "nb_top_interactions": nb_top_interactions, + } + + report = ReportBase( explainer=self, - project_info_file=project_info_file, x_train=x_train, y_train=y_train, y_test=y_test, - config={ - k: v - for k, v in dict( - title_story=title_story, - title_description=title_description, - metrics=metrics, - max_points=max_points, - display_interaction_plot=display_interaction_plot, - nb_top_interactions=nb_top_interactions, - ).items() - if v is not None - }, - notebook_path=notebook_path, - kernel_name=kernel_name, + config=config, ) - export_and_save_report(working_dir=working_dir, output_file=output_file) + + if notebook_path is not None: + config_file = Path(notebook_path) + else: + config_file = Path(working_dir) / "report_config.yml" + sections = [ + { + "type": "header", + "params": { + "title": title_story or self.title_story or "Shapash report", + "subtitle": title_description or "", + }, + }, + { + "type": "project_information", + "params": { + "title": "Project information", + "color": "gray", + "project_info_file": project_info_file, + }, + }, + {"type": "model_analysis", "params": {"title": "Model information", "color": "blue"}}, + {"type": "global_analysis", "params": {"title": "Dataset analysis", "color": "blue"}}, + {"type": "feature_importance", "params": {"title": "Model explainability", "color": "gold"}}, + ] + + if metrics: + sections.append( + { + "type": "performance_metrics", + "params": { + "title": "Model performance", + "color": "orange", + "metrics": metrics, + }, + } + ) + + if y_test is not None: + if self._case == "classification": + sections.append( + {"type": "confusion_matrix", "params": {"title": "Confusion matrix", "color": "orange"}} + ) + else: + sections.append({"type": "pred_vs_true", "params": {"title": "", "color": "orange"}}) + + if display_interaction_plot: + sections.append( + { + "type": "interactions_plot", + "params": { + "title": "Top interactions", + "color": "green", + "max_points": max_points, + }, + } + ) + + with config_file.open("w", encoding="utf-8") as cfg_stream: + yaml.safe_dump({"sections": sections}, cfg_stream, sort_keys=False, allow_unicode=True) + + report.generate_report(config_file=str(config_file), output_file=output_file) if rm_working_dir: shutil.rmtree(working_dir) @@ -1807,86 +1863,6 @@ def generate_report( shutil.rmtree(working_dir) raise e - def generate_report_with_panel( - self, - output_file=None, - project_info_file=None, - x_train=None, - y_train=None, - y_test=None, - title_story=None, - title_description=None, - metrics=None, - max_points=200, - display_interaction_plot=False, - nb_top_interactions=5, - ): - """ - Generate an interactive report using Panel to summarize model explainability. - - This method creates a simple interactive report using the Panel library, - allowing users to explore key insights about the model, its predictions, - and feature contributions directly in a Jupyter notebook or Python environment. - - The report includes: - - A title and description section. - - A summary of the model’s predictions and feature contributions. - - Interactive widgets to filter and explore the explanations. - - Parameters - ---------- - output_file : str, optional - Path to save the generated report as an HTML file. - If `None`, the report will be displayed directly in the current environment. - project_info_file : str, optional - Path to a YAML file containing project metadata (not currently used in this method). - x_train : pandas.DataFrame, optional - Training dataset used to fit the model (not currently used in this method). - y_train : pandas.Series or pandas.DataFrame, optional - Target values corresponding to `x_train` (not currently used in this method). - y_test : pandas.Series or pandas.DataFrame, optional - Target values for the test dataset (not currently used in this method). - title_story : str, optional - Title displayed at the top of the report. - title_description : str, optional - Short descriptive text displayed below the main title. - metrics : list of dict, optional - List of metrics to compute and display in the performance section (not currently used in this method). - max_points : int, optional, default=200 - Maximum number of points displayed in contribution plots (not currently used in this method). - display_interaction_plot : bool, optional, default=False - If True, includes interaction plots in the report (not currently used in this method). - nb_top_interactions : int, optional, default=5 - Number of top feature interactions to include in the report (not currently used in this method). - - Returns - ------- - None - Displays the interactive report in the current environment. - - Example - ------- - >>> xpl.generate_raport_with_panel( - ... title_story="Model Explainability Report", - ... title_description="Explore predictions and feature contributions interactively." - ... ) - """ - if title_story is not None: - self.title_story = title_story - if title_description is not None: - self.title_description = title_description - - title = pn.pane.Markdown(f"# {self.title_story}\n\n{self.title_description}") - - summary = self.to_pandas(proba=False, features_to_hide=None, threshold=None, positive=None, max_contrib=None) - summary_panel = pn.widgets.DataFrame(summary, width=800, height=400) - report = pn.Column(title, summary_panel) - - if output_file: - report.save(output_file) - else: - report.show() - def _local_pred(self, index, label=None): """ Compute the model prediction or probability for a single observation. diff --git a/shapash/report/smart_report/assets.py b/shapash/report/smart_report/assets.py index 29bdcbc6..d3b8fb6e 100644 --- a/shapash/report/smart_report/assets.py +++ b/shapash/report/smart_report/assets.py @@ -1,176 +1,10 @@ """Static CSS and JavaScript fragments for HTML report rendering.""" -REPORT_STYLES = """ - :root { --shapash-gold: #ffbb00; --text-main: #333; --text-light: #777; } - *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } - html { scroll-behavior: smooth; } - body, .report-shell { - background: #fdfdfd; color: var(--text-main); - font-family: 'Helvetica Neue', Arial, sans-serif; - } - .report-shell { - display: flex; min-height: 100vh; width: 100%; - } - .sidebar { - width: 240px; background: #fff; border-right: 1px solid #eee; - position: fixed; height: 100vh; padding: 30px 20px; - overflow-y: auto; - box-shadow: inset -8px 0 12px -12px rgba(0, 0, 0, 0.45); - } - .sidebar-brand { - margin-bottom: 40px; display: flex; align-items: center; gap: 10px; - color: var(--shapash-gold); font-size: 18px; font-weight: bold; - } - .sidebar-brand-logo { display: block; width: 34px; height: 34px; flex: 0 0 auto; } - .sidebar-brand-text { line-height: 1.2; } - .nav-item { - color: var(--text-light); padding: 10px 0; display: block; - text-decoration: none; font-size: 13px; transition: 0.2s; - } - .nav-item:hover { color: var(--text-main); } - .nav-item.active { - color: #551a8b; font-weight: bold; border-left: 3px solid #551a8b; padding-left: 10px; - } - .nav-group { margin-bottom: 2px; } - .nav-group-title { - color: var(--text-main); font-size: 13px; font-weight: 500; - padding: 10px 0; display: block; text-decoration: none; transition: 0.2s; - } - .nav-group-title:hover { color: #551a8b; } - .nav-group-title.active { - color: #551a8b; font-weight: bold; border-left: 3px solid #551a8b; padding-left: 10px; - } - .nav-children { display: block; } - .nav-child { - color: var(--text-light); padding: 6px 0 6px 16px; display: block; - text-decoration: none; font-size: 12px; transition: 0.2s; - } - .nav-child:hover { color: var(--text-main); } - .nav-child.active { - color: #551a8b; font-weight: 600; border-left: 3px solid #551a8b; padding-left: 13px; - } - .container { margin-left: 240px; width: 100%; padding: 60px 80px 60vh; max-width: 1200px; } - .main-header { text-align: center; margin-bottom: 60px; } - .main-header h1 { font-size: 2.4rem; font-weight: 500; color: #000; margin-bottom: 20px; } - .section-title { font-size: 1.6rem; color: #000; margin: 40px 0 20px; font-weight: 700; } - .section-block { margin-bottom: 30px; } - .content-block { margin-bottom: 30px; line-height: 1.6; font-size: 14px; } - .shapash-callout { border-left: 4px solid var(--shapash-gold); background: #fff; padding: 15px 25px; margin: 30px 0; color: #333; line-height: 1.6; font-size: 15px; } - .kv-table, table.dataframe { - width: 100%; - border-collapse: separate; - border-spacing: 0; - margin: 12px 0 24px; - background: #fff; - border: 1px solid #ececec; - border-radius: 12px; - overflow: hidden; - box-shadow: 0 8px 24px rgba(0, 0, 0, 0.04); - } - .kv-table thead th, table.dataframe thead th { - background: #fafafa; - color: #000; - font-weight: 700; - text-align: left; - border-bottom: 1px solid #ececec; - padding: 12px 16px; - } - .kv-table tbody th, table.dataframe tbody th { - color: #000; - font-weight: 600; - text-align: left; - background: #fcfcfc; - } - .kv-table td, .kv-table th, table.dataframe td, table.dataframe th { - padding: 12px 16px; - border-bottom: 1px solid #f1f1f1; - vertical-align: top; - } - .kv-table tbody tr:last-child td, - .kv-table tbody tr:last-child th, - table.dataframe tbody tr:last-child td, - table.dataframe tbody tr:last-child th { - border-bottom: 0; - } - .kv-table tbody tr:nth-child(even) td, - table.dataframe tbody tr:nth-child(even) td { - background: #fdfdfd; - } - .kv-key { font-weight: 700; width: 220px; color: #000; white-space: nowrap; } - .kv-key-label, .kv-key-sep { white-space: nowrap; } - .kv-val { color: var(--text-main); } - .badge { display: inline-block; padding: 6px 14px; border: 1px solid #eee; border-radius: 4px; font-size: 12px; background: #fff; } - .shapash-divider { border-bottom: 1px solid #eee; margin: 50px 0; } - .scroll-section { scroll-margin-top: 40px; } - .panel-plot { width: 100%; overflow-x: auto; } - .panel-plot .bk-root, .panel-plot .plotly-graph-div { width: 100%; max-width: 100%; } - @media (max-width: 900px) { - .sidebar { display: none; } - .container { margin-left: 0; padding: 30px 40px 40vh; } - } -""" +from pathlib import Path -REPORT_SCRIPT = """ - -""" +REPORT_STYLES = _STYLE_FILE.read_text(encoding="utf-8") +REPORT_SCRIPT = f"" diff --git a/shapash/report/smart_report/blocks.py b/shapash/report/smart_report/blocks.py index f2defdd6..8f8c5c80 100644 --- a/shapash/report/smart_report/blocks.py +++ b/shapash/report/smart_report/blocks.py @@ -2,7 +2,12 @@ from __future__ import annotations +import importlib +from pathlib import Path + import pandas as pd +import plotly.express as px +import yaml from shapash.plots.plot_evaluation_metrics import plot_confusion_matrix from shapash.plots.plot_univariate import plot_distribution @@ -33,6 +38,44 @@ def block_text(self, title: str = "", body: str = "", color: str = "gray") -> st h2 = f'

{title}

' if title else "" return f'
{h2}

{body}

' + def block_project_information( + self, + title: str = "Project information", + color: str = "gray", + project_info_file: str = "", + exclude_sections: list[str] | None = None, + ) -> str: + """Return project information loaded from an external YAML file.""" + if not project_info_file: + raise ValueError("project_information block requires the 'project_info_file' parameter.") + + config_path = Path(project_info_file).expanduser() + if not config_path.is_absolute(): + config_path = Path.cwd() / config_path + config_path = config_path.resolve() + if not config_path.exists(): + raise ValueError(f"project_information file not found: {config_path}") + + with config_path.open(encoding="utf-8") as stream: + project_info = yaml.safe_load(stream) or {} + if not isinstance(project_info, dict): + raise ValueError("project_information YAML must define a top-level mapping.") + + excluded = {name.strip().lower() for name in (exclude_sections or ["model training"]) if isinstance(name, str)} + sections_html = [] + for section_name, section_values in project_info.items(): + if not isinstance(section_values, dict): + continue + if section_name.strip().lower() in excluded: + continue + rows = self._render_key_value_rows(section_values) + sections_html.append( + f'

{section_name}

' + f'{rows}
' + ) + + return self._wrap_section_content(title, "".join(sections_html)) + def block_key_value(self, title: str = "", items: dict | None = None, color: str = "gold") -> str: """Return the HTML for a table of key-value pairs.""" items = items or {} @@ -79,6 +122,94 @@ def block_global_analysis(self, title: str = "", color: str = "gray") -> str: table_html = stats_table.to_html(classes="kv-table", border=0) return self._wrap_section_content(title, table_html) + def block_model_analysis(self, title: str = "Model information", color: str = "blue") -> str: + """Return basic metadata about the fitted model and compiled explainer inputs.""" + explainer = self._require_explainer("model_analysis") + model = explainer.model + details = { + "Model class": type(model).__name__, + "Task": getattr(explainer, "_case", "regression"), + "Feature count": len(explainer.x_init.columns), + "Prediction sample size": len(explainer.x_init), + "Training sample size": len(self.x_train_init) if self.x_train_init is not None else "n/a", + } + rows = self._render_key_value_rows(details) + return self._wrap_section_content(title, f'{rows}
') + + def block_relationship_target( + self, + title: str = "Relationship with target variable", + feature: str = "OverallQual", + color: str = "blue", + max_y: int | None = None, + ) -> str: + """Return a feature/target relationship plot on training data.""" + self._require_train_test_data("relationship_target") + if self.x_train_pre is None or self.y_train is None: + raise ValueError("relationship_target block requires both training features and y_train.") + if feature not in self.x_train_pre.columns: + raise ValueError(f"Unknown feature '{feature}' for relationship_target block.") + + target_name = self.target_name_train or "target" + df_train = self.x_train_pre.copy() + df_train[target_name] = self.y_train + + fig = px.box(df_train, x=feature, y=target_name) + if max_y is not None: + fig.update_yaxes(range=[0, max_y]) + return self._wrap_section_content(title, self._plotly_html(fig)) + + def block_training_correlations( + self, + title: str = "Relationship between training variables", + color: str = "blue", + max_features: int = 30, + ) -> str: + """Return training-only correlation heatmap (legacy notebook block name).""" + if self.x_train_pre is None: + raise ValueError("training_correlations block requires x_train.") + + numeric_train = self.x_train_pre.select_dtypes(include="number") + corr = numeric_train.corr(numeric_only=True) + if max_features > 0 and corr.shape[0] > max_features: + corr = corr.iloc[:max_features, :max_features] + + fig = px.imshow(corr, color_continuous_scale="YlGnBu", zmin=-1, zmax=1, aspect="auto") + return self._wrap_section_content(title, self._plotly_html(fig)) + + def block_performance_metrics( + self, + title: str = "Model performance", + color: str = "orange", + metrics: list | None = None, + ) -> str: + """Return a badge row with configured evaluation metrics computed on y_test/y_pred.""" + if self.y_test is None or self.y_pred is None: + raise ValueError("performance_metrics block requires y_test and y_pred.") + + metric_items = [] + metrics = metrics or [] + for metric_cfg in metrics: + metric_path = metric_cfg.get("path") + metric_name = metric_cfg.get("name", metric_path) + if not metric_path: + continue + module_path, fn_name = metric_path.rsplit(".", 1) + metric_fn = getattr(importlib.import_module(module_path), fn_name) + value = metric_fn(self.y_test, self.y_pred) + metric_items.append({"label": metric_name, "value": f"{value:,.2f}", "color": color}) + + return self.block_badge_row(title=title, badges=metric_items) + + def block_pred_vs_true(self, title: str = "y_pred vs y_test", color: str = "orange") -> str: + """Return a scatter plot of predictions versus true target values.""" + if self.y_test is None or self.y_pred is None: + raise ValueError("pred_vs_true block requires y_test and y_pred.") + + scatter_df = pd.DataFrame({"y_test": self.y_test, "y_pred": self.y_pred}) + fig = px.scatter(scatter_df, x="y_test", y="y_pred") + return self._wrap_section_content(title, self._plotly_html(fig)) + def block_feature_distribution( self, feature: str, diff --git a/shapash/report/smart_report/report_script.js b/shapash/report/smart_report/report_script.js new file mode 100644 index 00000000..d0d72441 --- /dev/null +++ b/shapash/report/smart_report/report_script.js @@ -0,0 +1,60 @@ +document.addEventListener('DOMContentLoaded', function () { + const sections = document.querySelectorAll('.scroll-section[id]'); + const navItems = document.querySelectorAll('.nav-item:not(.nav-group-title):not(.nav-child)'); + const navChildren = document.querySelectorAll('.nav-child'); + const navGroupTitles = document.querySelectorAll('.nav-group-title'); + + function clearActive() { + navItems.forEach(el => el.classList.remove('active')); + navChildren.forEach(el => el.classList.remove('active')); + navGroupTitles.forEach(el => el.classList.remove('active')); + } + + function onScroll() { + let currentId = ''; + sections.forEach(section => { + if (window.scrollY >= (section.offsetTop - 150)) { + currentId = section.getAttribute('id'); + } + }); + if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight - 5) { + if (sections.length > 0) { + currentId = sections[sections.length - 1].getAttribute('id'); + } + } + + clearActive(); + + navItems.forEach(item => { + if (item.getAttribute('href') === '#' + currentId) { + item.classList.add('active'); + } + }); + + let childMatched = false; + navChildren.forEach(child => { + if (child.getAttribute('href') === '#' + currentId) { + child.classList.add('active'); + childMatched = true; + const group = child.closest('.nav-group'); + if (group) { + const parentTitle = group.querySelector('.nav-group-title'); + if (parentTitle) { + parentTitle.classList.add('active'); + } + } + } + }); + + if (!childMatched) { + navGroupTitles.forEach(title => { + if (title.getAttribute('href') === '#' + currentId) { + title.classList.add('active'); + } + }); + } + } + + window.addEventListener('scroll', onScroll); + onScroll(); +}); diff --git a/shapash/report/smart_report/report_styles.css b/shapash/report/smart_report/report_styles.css new file mode 100644 index 00000000..c17838fb --- /dev/null +++ b/shapash/report/smart_report/report_styles.css @@ -0,0 +1,285 @@ +:root { + --shapash-gold: #ffbb00; + --text-main: #333; + --text-light: #777; +} + +*, +*::before, +*::after { + box-sizing: border-box; + margin: 0; + padding: 0; +} + +html { + scroll-behavior: smooth; +} + +body, +.report-shell { + background: #fdfdfd; + color: var(--text-main); + font-family: 'Helvetica Neue', Arial, sans-serif; +} + +.report-shell { + display: flex; + min-height: 100vh; + width: 100%; +} + +.sidebar { + width: 240px; + background: #fff; + border-right: 1px solid #eee; + position: fixed; + height: 100vh; + padding: 30px 20px; + overflow-y: auto; + box-shadow: inset -8px 0 12px -12px rgba(0, 0, 0, 0.45); +} + +.sidebar-brand { + margin-bottom: 40px; + display: flex; + align-items: center; + gap: 10px; + color: var(--shapash-gold); + font-size: 18px; + font-weight: bold; +} + +.sidebar-brand-logo { + display: block; + width: 34px; + height: 34px; + flex: 0 0 auto; +} + +.sidebar-brand-text { + line-height: 1.2; +} + +.nav-item { + color: var(--text-light); + padding: 10px 0; + display: block; + text-decoration: none; + font-size: 13px; + transition: 0.2s; +} + +.nav-item:hover { + color: var(--text-main); +} + +.nav-item.active { + color: #551a8b; + font-weight: bold; + border-left: 3px solid #551a8b; + padding-left: 10px; +} + +.nav-group { + margin-bottom: 2px; +} + +.nav-group-title { + color: var(--text-main); + font-size: 13px; + font-weight: 500; + padding: 10px 0; + display: block; + text-decoration: none; + transition: 0.2s; +} + +.nav-group-title:hover { + color: #551a8b; +} + +.nav-group-title.active { + color: #551a8b; + font-weight: bold; + border-left: 3px solid #551a8b; + padding-left: 10px; +} + +.nav-children { + display: block; +} + +.nav-child { + color: var(--text-light); + padding: 6px 0 6px 16px; + display: block; + text-decoration: none; + font-size: 12px; + transition: 0.2s; +} + +.nav-child:hover { + color: var(--text-main); +} + +.nav-child.active { + color: #551a8b; + font-weight: 600; + border-left: 3px solid #551a8b; + padding-left: 13px; +} + +.container { + margin-left: 240px; + width: 100%; + padding: 60px 80px 60vh; + max-width: 1200px; +} + +.main-header { + text-align: center; + margin-bottom: 60px; +} + +.main-header h1 { + font-size: 2.4rem; + font-weight: 500; + color: #000; + margin-bottom: 20px; +} + +.section-title { + font-size: 1.6rem; + color: #000; + margin: 40px 0 20px; + font-weight: 700; +} + +.section-block { + margin-bottom: 30px; +} + +.content-block { + margin-bottom: 30px; + line-height: 1.6; + font-size: 14px; +} + +.shapash-callout { + border-left: 4px solid var(--shapash-gold); + background: #fff; + padding: 15px 25px; + margin: 30px 0; + color: #333; + line-height: 1.6; + font-size: 15px; +} + +.kv-table, +table.dataframe { + width: 100%; + border-collapse: separate; + border-spacing: 0; + margin: 12px 0 24px; + background: #fff; + border: 1px solid #ececec; + border-radius: 12px; + overflow: hidden; + box-shadow: 0 8px 24px rgba(0, 0, 0, 0.04); +} + +.kv-table thead th, +table.dataframe thead th { + background: #fafafa; + color: #000; + font-weight: 700; + text-align: left; + border-bottom: 1px solid #ececec; + padding: 12px 16px; +} + +.kv-table tbody th, +table.dataframe tbody th { + color: #000; + font-weight: 600; + text-align: left; + background: #fcfcfc; +} + +.kv-table td, +.kv-table th, +table.dataframe td, +table.dataframe th { + padding: 12px 16px; + border-bottom: 1px solid #f1f1f1; + vertical-align: top; +} + +.kv-table tbody tr:last-child td, +.kv-table tbody tr:last-child th, +table.dataframe tbody tr:last-child td, +table.dataframe tbody tr:last-child th { + border-bottom: 0; +} + +.kv-table tbody tr:nth-child(even) td, +table.dataframe tbody tr:nth-child(even) td { + background: #fdfdfd; +} + +.kv-key { + font-weight: 700; + width: 220px; + color: #000; + white-space: nowrap; +} + +.kv-key-label, +.kv-key-sep { + white-space: nowrap; +} + +.kv-val { + color: var(--text-main); +} + +.badge { + display: inline-block; + padding: 6px 14px; + border: 1px solid #eee; + border-radius: 4px; + font-size: 12px; + background: #fff; +} + +.shapash-divider { + border-bottom: 1px solid #eee; + margin: 50px 0; +} + +.scroll-section { + scroll-margin-top: 40px; +} + +.panel-plot { + width: 100%; + overflow-x: auto; +} + +.panel-plot .bk-root, +.panel-plot .plotly-graph-div { + width: 100%; + max-width: 100%; +} + +@media (max-width: 900px) { + .sidebar { + display: none; + } + + .container { + margin-left: 0; + padding: 30px 40px 40vh; + } +} \ No newline at end of file diff --git a/tests/integration_tests/test_report_generation.py b/tests/integration_tests/test_report_generation.py index 5c54aa2e..5c493945 100644 --- a/tests/integration_tests/test_report_generation.py +++ b/tests/integration_tests/test_report_generation.py @@ -2,15 +2,16 @@ import shutil import tempfile import unittest +from pathlib import Path import catboost as cb import category_encoders as ce import numpy as np import pandas as pd +import yaml from category_encoders import OrdinalEncoder from shapash import SmartExplainer -from shapash.report.generation import execute_report, export_and_save_report current_path = os.path.dirname(os.path.abspath(__file__)) @@ -32,122 +33,68 @@ def setUp(self): self.xpl.compile(x=df_encoded[["x1", "x2", "x3", "x4"]]) self.df = df_encoded - def test_execute_report_1(self): - tmp_dir_path = tempfile.mkdtemp() - - execute_report( - working_dir=tmp_dir_path, - explainer=self.xpl, - project_info_file=os.path.join(current_path, "../data/metadata.yaml"), - config=None, - notebook_path=None, - ) - assert os.path.exists(os.path.join(tmp_dir_path, "smart_explainer.pickle")) - assert os.path.exists(os.path.join(tmp_dir_path, "base_report.ipynb")) - - shutil.rmtree(tmp_dir_path) - - def test_execute_report_2(self): - tmp_dir_path = tempfile.mkdtemp() - - execute_report( - working_dir=tmp_dir_path, - explainer=self.xpl, - project_info_file=os.path.join(current_path, "../data/metadata.yaml"), - x_train=self.df[["x1", "x2", "x3", "x4"]], - config=None, - notebook_path=None, - ) - assert os.path.exists(os.path.join(tmp_dir_path, "x_train.csv")) - assert os.path.exists(os.path.join(tmp_dir_path, "smart_explainer.pickle")) - assert os.path.exists(os.path.join(tmp_dir_path, "base_report.ipynb")) - - shutil.rmtree(tmp_dir_path) - - def test_execute_report_3(self): - tmp_dir_path = tempfile.mkdtemp() - - execute_report( - working_dir=tmp_dir_path, - explainer=self.xpl, - project_info_file=os.path.join(current_path, "../data/metadata.yaml"), - x_train=self.df[["x1", "x2", "x3", "x4"]], - y_test=self.df["y"], - config=None, - notebook_path=None, - ) - assert os.path.exists(os.path.join(tmp_dir_path, "x_train.csv")) - assert os.path.exists(os.path.join(tmp_dir_path, "y_test.csv")) - assert os.path.exists(os.path.join(tmp_dir_path, "smart_explainer.pickle")) - assert os.path.exists(os.path.join(tmp_dir_path, "base_report.ipynb")) - - shutil.rmtree(tmp_dir_path) - - def test_execute_report_4(self): - tmp_dir_path = tempfile.mkdtemp() - - execute_report( - working_dir=tmp_dir_path, - explainer=self.xpl, - project_info_file=os.path.join(current_path, "../data/metadata.yaml"), - x_train=self.df[["x1", "x2", "x3", "x4"]], - y_train=self.df["y"], - y_test=self.df["y"], - config=None, - notebook_path=None, - ) - assert os.path.exists(os.path.join(tmp_dir_path, "x_train.csv")) - assert os.path.exists(os.path.join(tmp_dir_path, "y_test.csv")) - assert os.path.exists(os.path.join(tmp_dir_path, "y_train.csv")) - assert os.path.exists(os.path.join(tmp_dir_path, "smart_explainer.pickle")) - assert os.path.exists(os.path.join(tmp_dir_path, "base_report.ipynb")) - - shutil.rmtree(tmp_dir_path) - - def test_execute_report_5(self): + def test_generate_report_default_config(self): tmp_dir_path = tempfile.mkdtemp() + outfile = os.path.join(tmp_dir_path, "report.html") self.xpl.palette_name = "eurybia" - execute_report( - working_dir=tmp_dir_path, - explainer=self.xpl, + self.xpl.generate_report( + output_file=outfile, project_info_file=os.path.join(current_path, "../data/metadata.yaml"), x_train=self.df[["x1", "x2", "x3", "x4"]], y_train=self.df["y"], y_test=self.df["y"], - notebook_path=None, + working_dir=tmp_dir_path, ) self.xpl.palette_name = "default" - assert os.path.exists(os.path.join(tmp_dir_path, "x_train.csv")) - assert os.path.exists(os.path.join(tmp_dir_path, "y_test.csv")) - assert os.path.exists(os.path.join(tmp_dir_path, "y_train.csv")) - assert os.path.exists(os.path.join(tmp_dir_path, "smart_explainer.pickle")) - assert os.path.exists(os.path.join(tmp_dir_path, "base_report.ipynb")) + assert os.path.exists(outfile) + assert os.path.exists(os.path.join(tmp_dir_path, "report_config.yml")) shutil.rmtree(tmp_dir_path) - def test_generate_report_1(self): + def test_generate_report_with_custom_yaml_config(self): tmp_dir_path = tempfile.mkdtemp() - outfile = os.path.join(tmp_dir_path, "report.html") + cfg_path = Path(tmp_dir_path) / "custom_report_config.yml" + outfile = str(Path(tmp_dir_path) / "report_custom.html") + + config = { + "sections": [ + { + "type": "header", + "params": {"title": "Integration report", "subtitle": "custom yaml"}, + }, + { + "type": "project_information", + "params": { + "title": "Project information", + "project_info_file": os.path.join(current_path, "../data/metadata.yaml"), + }, + }, + ] + } + with cfg_path.open("w", encoding="utf-8") as stream: + yaml.safe_dump(config, stream, sort_keys=False, allow_unicode=True) self.xpl.generate_report( output_file=outfile, project_info_file=os.path.join(current_path, "../data/metadata.yaml"), + notebook_path=str(cfg_path), ) assert os.path.exists(outfile) shutil.rmtree(tmp_dir_path) - def test_export_and_save_report_1(self): + def test_generate_report_interactions_enabled(self): tmp_dir_path = tempfile.mkdtemp() + outfile = os.path.join(tmp_dir_path, "report_interactions.html") - execute_report( - working_dir=tmp_dir_path, - explainer=self.xpl, + self.xpl.generate_report( + output_file=outfile, project_info_file=os.path.join(current_path, "../data/metadata.yaml"), + x_train=self.df[["x1", "x2", "x3", "x4"]], + display_interaction_plot=True, + working_dir=tmp_dir_path, ) - - outfile = os.path.join(tmp_dir_path, "report.html") - export_and_save_report(working_dir=tmp_dir_path, output_file=outfile) assert os.path.exists(outfile) + shutil.rmtree(tmp_dir_path) diff --git a/tests/unit_tests/explainer/test_smart_explainer.py b/tests/unit_tests/explainer/test_smart_explainer.py index c5736e1f..e7496d57 100644 --- a/tests/unit_tests/explainer/test_smart_explainer.py +++ b/tests/unit_tests/explainer/test_smart_explainer.py @@ -1108,9 +1108,8 @@ def test_run_app_2(self, mock_get_host_name, mock_custom_thread, mock_smartapp): xpl.run_app() assert xpl.y_target is not None - @patch("shapash.report.generation.export_and_save_report") - @patch("shapash.report.generation.execute_report") - def test_generate_report(self, mock_execute_report, mock_export_and_save_report): + @patch("shapash.report.smart_report.ReportBase") + def test_generate_report(self, mock_report_base): """ Test generate report method """ @@ -1123,8 +1122,8 @@ def test_generate_report(self, mock_execute_report, mock_export_and_save_report) xpl = SmartExplainer(clf) xpl.compile(x=df[["x1", "x2"]]) xpl.generate_report(output_file="test", project_info_file="test") - mock_execute_report.assert_called_once() - mock_export_and_save_report.assert_called_once() + mock_report_base.assert_called_once() + mock_report_base.return_value.generate_report.assert_called_once() def test_compute_features_stability_1(self): df = pd.DataFrame(np.random.randint(1, 100, size=(15, 4)), columns=list("ABCD")) diff --git a/tutorial/generate_report/demo.py b/tutorial/generate_report/demo.py index 44ad593f..8157fb3e 100644 --- a/tutorial/generate_report/demo.py +++ b/tutorial/generate_report/demo.py @@ -1,10 +1,7 @@ -import importlib import sys from pathlib import Path import pandas as pd -import plotly.express as px -import yaml from category_encoders import OrdinalEncoder from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split @@ -24,117 +21,6 @@ PROJECT_INFO_FILE = HERE / "utils" / "project_info.yml" -class NotebookParityReport(ReportBase): - """Report with custom blocks that mirror sections from the legacy notebook report.""" - - def block_project_information(self, title: str = "Project information", color: str = "gray"): - project_info_path = self.config.get("project_info_file") - if not project_info_path: - raise ValueError("project_information block requires config['project_info_file'].") - - with open(project_info_path, encoding="utf-8") as f: - project_info = yaml.safe_load(f) or {} - - sections_html = [] - for section_name, section_values in project_info.items(): - if not isinstance(section_values, dict): - continue - if section_name.strip().lower() == "model training": - continue - - rows = self._render_key_value_rows(section_values) - sections_html.append( - f'

{section_name}

' - f'{rows}
' - ) - - return self._wrap_section_content(title, "".join(sections_html)) - - def block_model_analysis(self, title: str = "Model information", color: str = "blue"): - explainer = self._require_explainer("model_analysis") - model = explainer.model - model_name = type(model).__name__ - details = { - "Model class": model_name, - "Task": getattr(explainer, "_case", "regression"), - "Feature count": len(explainer.x_init.columns), - "Prediction sample size": len(explainer.x_init), - "Training sample size": len(self.x_train_init) if self.x_train_init is not None else "n/a", - } - rows = self._render_key_value_rows(details) - return self._wrap_section_content(title, f'{rows}
') - - def block_relationship_target( - self, - title: str = "Relationship with target variable", - feature: str = "OverallQual", - color: str = "blue", - max_y: int | None = None, - ): - self._require_train_test_data("relationship_target") - if self.x_train_pre is None or self.y_train is None: - raise ValueError("relationship_target block requires both training features and y_train.") - if feature not in self.x_train_pre.columns: - raise ValueError(f"Unknown feature '{feature}' for relationship_target block.") - - target_name = self.target_name_train or "target" - df_train = self.x_train_pre.copy() - df_train[target_name] = self.y_train - - fig = px.box(df_train, x=feature, y=target_name) - if max_y is not None: - fig.update_yaxes(range=[0, max_y]) - return self._wrap_section_content(title, self._plotly_html(fig)) - - def block_training_correlations( - self, - title: str = "Relationship between training variables", - color: str = "blue", - max_features: int = 30, - ): - if self.x_train_pre is None: - raise ValueError("training_correlations block requires x_train.") - - numeric_train = self.x_train_pre.select_dtypes(include="number") - corr = numeric_train.corr(numeric_only=True) - if max_features > 0 and corr.shape[0] > max_features: - corr = corr.iloc[:max_features, :max_features] - - fig = px.imshow(corr, color_continuous_scale="YlGnBu", zmin=-1, zmax=1, aspect="auto") - return self._wrap_section_content(title, self._plotly_html(fig)) - - def block_performance_metrics( - self, - title: str = "Model performance", - color: str = "orange", - metrics: list | None = None, - ): - if self.y_test is None or self.y_pred is None: - raise ValueError("performance_metrics block requires y_test and y_pred.") - - metric_items = [] - metrics = metrics or [] - for metric_cfg in metrics: - metric_path = metric_cfg.get("path") - metric_name = metric_cfg.get("name", metric_path) - if not metric_path: - continue - module_path, fn_name = metric_path.rsplit(".", 1) - metric_fn = getattr(importlib.import_module(module_path), fn_name) - value = metric_fn(self.y_test, self.y_pred) - metric_items.append({"label": metric_name, "value": f"{value:,.2f}", "color": color}) - - return self.block_badge_row(title=title, badges=metric_items) - - def block_pred_vs_true(self, title: str = "y_pred vs y_test", color: str = "orange"): - if self.y_test is None or self.y_pred is None: - raise ValueError("pred_vs_true block requires y_test and y_pred.") - - scatter_df = pd.DataFrame({"y_test": self.y_test, "y_pred": self.y_pred}) - fig = px.scatter(scatter_df, x="y_test", y="y_pred") - return self._wrap_section_content(title, self._plotly_html(fig)) - - def build_house_prices_explainer() -> tuple[SmartExplainer, pd.DataFrame, pd.Series, pd.Series]: """Build the same House Prices explainer used in report tutorials.""" house_df, house_dict = data_loading("house_prices") @@ -158,7 +44,7 @@ def build_house_prices_explainer() -> tuple[SmartExplainer, pd.DataFrame, pd.Ser if __name__ == "__main__": xpl, Xtrain, ytrain, ytest = build_house_prices_explainer() - report = NotebookParityReport( + report = ReportBase( explainer=xpl, x_train=Xtrain, y_train=ytrain, diff --git a/tutorial/generate_report/report_config_v1.yml b/tutorial/generate_report/report_config_v1.yml index 6ac930ca..809bc2f5 100644 --- a/tutorial/generate_report/report_config_v1.yml +++ b/tutorial/generate_report/report_config_v1.yml @@ -13,6 +13,7 @@ sections: params: title: "Project information" color: gray + project_info_file: "tutorial/generate_report/utils/project_info.yml" - type: model_analysis params: @@ -42,7 +43,7 @@ sections: color: blue max_y: 800000 - - type: training_correlations + - type: correlations_plot params: title: "Relashionship between training variables" color: blue From 2d85c85394a45948022973c6bee284a9edf6e39d Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Mon, 11 May 2026 10:32:44 +0200 Subject: [PATCH 15/43] ruff mod --- shapash/report/smart_report/report_styles.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shapash/report/smart_report/report_styles.css b/shapash/report/smart_report/report_styles.css index c17838fb..fe62f203 100644 --- a/shapash/report/smart_report/report_styles.css +++ b/shapash/report/smart_report/report_styles.css @@ -282,4 +282,4 @@ table.dataframe tbody tr:nth-child(even) td { margin-left: 0; padding: 30px 40px 40vh; } -} \ No newline at end of file +} From 4ebacf290652a7d03260aa47e2090ca7e99ed82c Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Mon, 11 May 2026 13:17:08 +0200 Subject: [PATCH 16/43] removing unecessary files and adding blocks --- shapash/explainer/smart_predictor.py | 34 ++++ shapash/plots/plot_compacity.py | 7 +- shapash/plots/plot_evaluation_metrics.py | 8 +- shapash/report/report_config.yml | 139 ---------------- shapash/report/report_config_extended.yml | 154 ------------------ shapash/report/smart_report/blocks.py | 79 +++++++-- shapash/report/smart_report/report_styles.css | 61 ++++++- tutorial/generate_report/report_config_v1.yml | 11 -- .../generate_report/shapash_report_example.py | 28 ++-- 9 files changed, 179 insertions(+), 342 deletions(-) delete mode 100644 shapash/report/report_config.yml delete mode 100644 shapash/report/report_config_extended.yml diff --git a/shapash/explainer/smart_predictor.py b/shapash/explainer/smart_predictor.py index d854df28..089cf1ab 100644 --- a/shapash/explainer/smart_predictor.py +++ b/shapash/explainer/smart_predictor.py @@ -344,6 +344,40 @@ def check_dataset_features(self, x): x = x[features_order] assert all(column in self.features_types.keys() for column in x.columns) + for feature in x.columns: + expected_dtype = self.features_types[feature] + if str(x[feature].dtypes) == expected_dtype: + continue + + try: + if expected_dtype.startswith("int") or expected_dtype.startswith("uint"): + if not pd.api.types.is_integer_dtype(x[feature].dtypes): + raise ValueError + x[feature] = x[feature].astype(expected_dtype) + elif expected_dtype.startswith("float"): + if not pd.api.types.is_float_dtype(x[feature].dtypes): + raise ValueError + x[feature] = x[feature].astype(expected_dtype) + elif expected_dtype == "bool": + if not pd.api.types.is_bool_dtype(x[feature].dtypes): + raise ValueError + x[feature] = x[feature].astype(expected_dtype) + elif expected_dtype in ["object", "string", "str"]: + if not ( + pd.api.types.is_object_dtype(x[feature].dtypes) + or pd.api.types.is_string_dtype(x[feature].dtypes) + ): + raise ValueError + if expected_dtype != "str": + x[feature] = x[feature].astype(expected_dtype) + except Exception: + raise ValueError( + """ + Types of features in x doesn't match with the expected one in features_types. + x input must be initial dataset without preprocessing applied. + """ + ) + if not all([str(x[feature].dtypes) == self.features_types[feature] for feature in x.columns]): raise ValueError( """ diff --git a/shapash/plots/plot_compacity.py b/shapash/plots/plot_compacity.py index 7ad34f91..7cb869a4 100644 --- a/shapash/plots/plot_compacity.py +++ b/shapash/plots/plot_compacity.py @@ -1,3 +1,4 @@ +import numpy as np from plotly import graph_objs as go from plotly.offline import plot from plotly.subplots import make_subplots @@ -70,9 +71,13 @@ def plot_compacity( fig.update_annotations(font=style_dict["dict_title_compacity"]["font"]) # First plot: number of features required for a given approximation + features_needed_plot = np.asarray(features_needed) + if np.issubdtype(features_needed_plot.dtype, np.integer): + features_needed_plot = features_needed_plot.astype(np.int64) + fig.add_trace( go.Histogram( - x=features_needed, + x=features_needed_plot, histnorm="percent", cumulative={"enabled": True}, name="", diff --git a/shapash/plots/plot_evaluation_metrics.py b/shapash/plots/plot_evaluation_metrics.py index e8b8aece..462a8d6e 100644 --- a/shapash/plots/plot_evaluation_metrics.py +++ b/shapash/plots/plot_evaluation_metrics.py @@ -241,9 +241,13 @@ def _prediction_classification_plot( subtitle = f"Response: {label_value}" # Plot distribution + violin_x = df_pred["target"].values.flatten() + if np.issubdtype(np.asarray(violin_x).dtype, np.integer): + violin_x = np.asarray(violin_x, dtype=np.int64) + fig.add_trace( go.Violin( - x=df_pred["target"].values.flatten(), + x=violin_x, y=df_pred["proba_values"].values.flatten(), points=False, legendgroup="M", @@ -405,6 +409,8 @@ def _prediction_regression_plot(y_target, y_pred, prediction_error, list_ind, st y_target = y_target_tmp y_target_values = y_target.values.flatten() + if np.issubdtype(np.asarray(y_target_values).dtype, np.integer): + y_target_values = np.asarray(y_target_values, dtype=np.int64) y_pred = y_pred.loc[y_target.index] prediction_error = np.array(prediction_error.loc[y_target.index]) diff --git a/shapash/report/report_config.yml b/shapash/report/report_config.yml deleted file mode 100644 index 5393e4d3..00000000 --- a/shapash/report/report_config.yml +++ /dev/null @@ -1,139 +0,0 @@ -# report_config.yml -# ───────────────────────────────────────────────────────────────────────────── -# Each entry in `sections` maps to a block_ method on the report class. -# `params` are passed as keyword arguments to that method. -# Add, remove, or reorder sections freely — that is the whole point. -# ───────────────────────────────────────────────────────────────────────────── - -sections: - - # ── Page title ────────────────────────────────────────────────────────────── - - type: header - params: - title: "House Prices — Model Report" - subtitle: "Prototype · block-based report engine" - - # ── Top-level callout ──────────────────────────────────────────────────────── - - type: callout - params: - icon: "⚠" - body: > - This is a prototype report. Blocks are defined in Python and - composed here in YAML. Add, remove or reorder sections without - touching any Python code. - color: orange - - # ── Divider ────────────────────────────────────────────────────────────────── - - type: divider - params: - label: "Project" - - # ── Project metadata ───────────────────────────────────────────────────────── - - type: key_value - params: - title: "Project metadata" - color: purple - items: - Author: "Alice Martin" - Date: "2024-01-15" - Dataset: "Kaggle — House Prices" - Task: "Regression" - Target: "SalePrice" - - # ── Free-text introduction ──────────────────────────────────────────────────── - - type: text - params: - title: "Objective" - body: > - The goal of this project is to predict the final sale price of residential - homes in Ames, Iowa, using 79 explanatory variables describing almost every - aspect of those homes. - color: blue - - # ── Divider ────────────────────────────────────────────────────────────────── - - type: divider - params: - label: "Model" - - # ── Model summary ───────────────────────────────────────────────────────────── - - type: key_value - params: - title: "Model configuration" - color: blue - items: - Class: "RandomForestRegressor" - Library: "scikit-learn 1.4" - n_estimators: 50 - max_depth: "None (unlimited)" - random_state: 1 - - # ── Key metrics as badges ───────────────────────────────────────────────────── - - type: badge_row - params: - title: "Performance snapshot" - badges: - - label: MAE - value: "18 432 $" - color: green - - label: RMSE - value: "27 891 $" - color: blue - - label: R² - value: "0.874" - color: purple - - label: Train size - value: "1 095 rows" - color: gray - - label: Test size - value: "365 rows" - color: gray - - # ── Divider ────────────────────────────────────────────────────────────────── - - type: divider - params: - label: "Explainability" - - # ── Top features — plain text block (no chart yet) ──────────────────────────── - - type: text - params: - title: "Top contributing features" - body: > - Based on mean absolute SHAP values computed on the test set, - the three most influential features are OverallQual (overall material - and finish quality), GrLivArea (above-grade living area in sq ft), - and TotalBsmtSF (total basement area in sq ft). - color: green - - # ── Feature importance as key/value table ───────────────────────────────────── - - type: key_value - params: - title: "Mean |SHAP| — top 5 features" - color: green - items: - OverallQual: "0.412" - GrLivArea: "0.289" - TotalBsmtSF: "0.174" - GarageCars: "0.121" - YearBuilt: "0.098" - - # ── Divider ────────────────────────────────────────────────────────────────── - - type: divider - params: - label: "Notes" - - # ── Custom block example (commented out — shows the extension point) ────────── - # - type: custom - # function: "my_module.render_scatter_plot" - # params: - # x_col: GrLivArea - # y_col: SalePrice - - # ── Closing note ───────────────────────────────────────────────────────────── - - type: callout - params: - icon: "💡" - body: > - To add a new section, define a block_my_section() method in a - subclass of ReportBase — or point a type: custom entry at any - importable Python function. No other changes needed. - color: purple diff --git a/shapash/report/report_config_extended.yml b/shapash/report/report_config_extended.yml deleted file mode 100644 index e711e46f..00000000 --- a/shapash/report/report_config_extended.yml +++ /dev/null @@ -1,154 +0,0 @@ -# report_config_extended.yml -# ───────────────────────────────────────────────────────────────────────────── -# Extended example with an extra custom block and test pie_chart entry. -# Formatted to match report_config.yml style. -# ───────────────────────────────────────────────────────────────────────────── - -sections: - - # ── Page title ────────────────────────────────────────────────────────────── - - type: header - params: - title: "House Prices — Model Report" - subtitle: "Prototype · block-based report engine" - - # ── Top-level callout ──────────────────────────────────────────────────────── - - type: callout - params: - icon: "⚠" - body: > - This is a prototype report. Blocks are defined in Python and - composed here in YAML. Add, remove or reorder sections without - touching any Python code. - color: orange - - # ── Divider ────────────────────────────────────────────────────────────────── - - type: divider - params: - label: "Project" - - # ── Project metadata ───────────────────────────────────────────────────────── - - type: key_value - params: - title: "Project metadata" - color: purple - items: - Author: "Alice Martin" - Dataset: "Kaggle — House Prices" - Date: "2024-01-15" - Target: "SalePrice" - Task: "Regression" - - # ── Free-text introduction ─────────────────────────────────────────────────── - - type: text - params: - title: "Objective" - body: > - The goal of this project is to predict the final sale price of residential - homes in Ames, Iowa, using 79 explanatory variables describing almost every - aspect of those homes. - color: blue - - # ── Divider ────────────────────────────────────────────────────────────────── - - type: divider - params: - label: "Model" - - # ── Model summary ───────────────────────────────────────────────────────────── - - type: key_value - params: - title: "Model configuration" - color: blue - items: - Class: "RandomForestRegressor" - Library: "scikit-learn 1.4" - max_depth: "None (unlimited)" - n_estimators: 50 - random_state: 1 - - # ── Key metrics as badges ───────────────────────────────────────────────────── - - type: badge_row - params: - title: "Performance snapshot" - badges: - - label: MAE - value: "18 432 $" - color: green - - label: RMSE - value: "27 891 $" - color: blue - - label: R² - value: "0.874" - color: purple - - label: Train size - value: "1 095 rows" - color: gray - - label: Test size - value: "365 rows" - color: gray - - # ── Divider ────────────────────────────────────────────────────────────────── - - type: divider - params: - label: "Explainability" - - # ── Top features — plain text block (no chart yet) ────────────────────────── - - type: text - params: - title: "Top contributing features" - body: > - Based on mean absolute SHAP values computed on the test set, the three - most influential features are OverallQual (overall material and finish quality), - GrLivArea (above-grade living area in sq ft), and TotalBsmtSF (total basement - area in sq ft). - color: green - - # ── Feature importance as key/value table ──────────────────────────────────── - - type: key_value - params: - title: "Mean |SHAP| — top 5 features" - color: green - items: - GarageCars: "0.121" - GrLivArea: "0.289" - OverallQual: "0.412" - TotalBsmtSF: "0.174" - YearBuilt: "0.098" - - # ── Divider ────────────────────────────────────────────────────────────────── - - type: divider - params: - label: "Notes" - - # ── Extended block example ─────────────────────────────────────────────────── - - type: progress_bar - params: - title: "SHAP coverage by feature group" - color: green - items: - - label: "Overall quality features" - pct: 85 - - label: "Area & size features" - pct: 70 - - label: "Garage features" - pct: 45 - - label: "Basement features" - pct: 38 - - label: "Year / age features" - pct: 22 - - # ── Closing note ───────────────────────────────────────────────────────────── - - type: callout - params: - icon: "💡" - body: > - To add a new section, define a block_my_section() method in a - subclass of ReportBase — or point a type: custom entry at any - importable Python function. No other changes needed. - color: purple - - # ── Test block (kept as-is) ────────────────────────────────────────────────── - - type: pie_chart - params: - title: "test" - color: purple diff --git a/shapash/report/smart_report/blocks.py b/shapash/report/smart_report/blocks.py index 8f8c5c80..74a6949b 100644 --- a/shapash/report/smart_report/blocks.py +++ b/shapash/report/smart_report/blocks.py @@ -3,6 +3,7 @@ from __future__ import annotations import importlib +import importlib.metadata from pathlib import Path import pandas as pd @@ -43,7 +44,6 @@ def block_project_information( title: str = "Project information", color: str = "gray", project_info_file: str = "", - exclude_sections: list[str] | None = None, ) -> str: """Return project information loaded from an external YAML file.""" if not project_info_file: @@ -61,13 +61,10 @@ def block_project_information( if not isinstance(project_info, dict): raise ValueError("project_information YAML must define a top-level mapping.") - excluded = {name.strip().lower() for name in (exclude_sections or ["model training"]) if isinstance(name, str)} sections_html = [] for section_name, section_values in project_info.items(): if not isinstance(section_values, dict): continue - if section_name.strip().lower() in excluded: - continue rows = self._render_key_value_rows(section_values) sections_html.append( f'

{section_name}

' @@ -123,18 +120,72 @@ def block_global_analysis(self, title: str = "", color: str = "gray") -> str: return self._wrap_section_content(title, table_html) def block_model_analysis(self, title: str = "Model information", color: str = "blue") -> str: - """Return basic metadata about the fitted model and compiled explainer inputs.""" + """Return model metadata and parameters in a notebook-parity layout.""" explainer = self._require_explainer("model_analysis") model = explainer.model - details = { - "Model class": type(model).__name__, - "Task": getattr(explainer, "_case", "regression"), - "Feature count": len(explainer.x_init.columns), - "Prediction sample size": len(explainer.x_init), - "Training sample size": len(self.x_train_init) if self.x_train_init is not None else "n/a", - } - rows = self._render_key_value_rows(details) - return self._wrap_section_content(title, f'{rows}
') + + model_module = model.__class__.__module__ + model_package = model_module.split(".")[0] + package_name = "scikit-learn" if model_package == "sklearn" else model_package + try: + library_version = importlib.metadata.version(package_name) + except importlib.metadata.PackageNotFoundError: + library_version = f"not found for {model_package}" + + model_params = getattr(model, "__dict__", {}) + params_items = list(model_params.items()) + split_idx = len(params_items) // 2 + + def _truncate(value, max_len): + text = str(value) + return text if len(text) <= max_len else text[: max_len - 3] + "..." + + def _render_param_rows(items): + return "".join( + ( + "" + f'{_truncate(key, 50)}' + f'{_truncate(val, 300)}' + "" + ) + for key, val in items + ) + + table_header = ( + "" + 'Parameter key' + 'Parameter value' + "" + ) + + table_left = ( + '' + f"{table_header}" + f"{_render_param_rows(params_items[:split_idx])}" + "
" + ) + table_right = ( + '' + f"{table_header}" + f"{_render_param_rows(params_items[split_idx:])}" + "
" + ) + + content = ( + '
' + f'

Model used : {model.__class__.__name__}

' + f'

Library : {model_module}

' + f'

Library version : {library_version}

' + '

Model parameters :

' + "
" + '
' + f'
{table_left}
' + f'
{table_right}
' + "
" + "
" + ) + + return self._wrap_section_content(title, content) def block_relationship_target( self, diff --git a/shapash/report/smart_report/report_styles.css b/shapash/report/smart_report/report_styles.css index fe62f203..70bcb895 100644 --- a/shapash/report/smart_report/report_styles.css +++ b/shapash/report/smart_report/report_styles.css @@ -1,9 +1,11 @@ +/* Theme tokens */ :root { --shapash-gold: #ffbb00; --text-main: #333; --text-light: #777; } +/* Reset */ *, *::before, *::after { @@ -16,11 +18,12 @@ html { scroll-behavior: smooth; } +/* Page shell */ body, .report-shell { background: #fdfdfd; color: var(--text-main); - font-family: 'Helvetica Neue', Arial, sans-serif; + font-family: "Helvetica Neue", Arial, sans-serif; } .report-shell { @@ -29,6 +32,7 @@ body, width: 100%; } +/* Sidebar */ .sidebar { width: 240px; background: #fff; @@ -61,6 +65,7 @@ body, line-height: 1.2; } +/* Navigation */ .nav-item { color: var(--text-light); padding: 10px 0; @@ -130,6 +135,7 @@ body, padding-left: 13px; } +/* Main content */ .container { margin-left: 240px; width: 100%; @@ -166,6 +172,7 @@ body, font-size: 14px; } +/* Callouts and separators */ .shapash-callout { border-left: 4px solid var(--shapash-gold); background: #fff; @@ -176,6 +183,12 @@ body, font-size: 15px; } +.shapash-divider { + border-bottom: 1px solid #eee; + margin: 50px 0; +} + +/* Generic key/value and dataframe tables */ .kv-table, table.dataframe { width: 100%; @@ -244,6 +257,7 @@ table.dataframe tbody tr:nth-child(even) td { color: var(--text-main); } +/* Badges */ .badge { display: inline-block; padding: 6px 14px; @@ -253,11 +267,28 @@ table.dataframe tbody tr:nth-child(even) td { background: #fff; } -.shapash-divider { - border-bottom: 1px solid #eee; - margin: 50px 0; +/* Model analysis specific layout */ +.model-analysis-meta { + margin-bottom: 10px; +} + +.model-analysis-line { + margin-bottom: 12px; +} + +.model-analysis-tables { + display: flex; + gap: 20px; + flex-wrap: wrap; + align-items: flex-start; } +.model-analysis-table-col { + flex: 0 1 calc(50% - 10px); + min-width: 0; +} + +/* Plot wrappers */ .scroll-section { scroll-margin-top: 40px; } @@ -273,6 +304,17 @@ table.dataframe tbody tr:nth-child(even) td { max-width: 100%; } +/* Responsive adjustments */ +@media (max-width: 1200px) { + .model-analysis-tables { + gap: 16px; + } + + .model-analysis-table-col { + flex: 0 1 calc(50% - 8px); + } +} + @media (max-width: 900px) { .sidebar { display: none; @@ -283,3 +325,14 @@ table.dataframe tbody tr:nth-child(even) td { padding: 30px 40px 40vh; } } + +@media (max-width: 760px) { + .model-analysis-tables { + gap: 14px; + } + + .model-analysis-table-col { + flex: 1 1 100%; + min-width: 0; + } +} diff --git a/tutorial/generate_report/report_config_v1.yml b/tutorial/generate_report/report_config_v1.yml index 809bc2f5..43479214 100644 --- a/tutorial/generate_report/report_config_v1.yml +++ b/tutorial/generate_report/report_config_v1.yml @@ -20,17 +20,6 @@ sections: title: "Model information" color: blue - - type: key_value - params: - title: "Model Training" - color: blue - items: - Used Algorithm: "We used a RandomForestRegressor algorithm (scikit-learn) but this model could be challenged with other interesting models such as XGBRegressor, Neural Networks, ..." - Parameters Choice: "We did not perform any hyperparameter optimisation and chose to use n_estimators=50. Future works should be planned to perform gridsearch optimizations" - Metrics: "Mean Squared Error metric" - Validation Strategy: "We splitted our data into train (75%) and test (25%)" - Path To Script: "https://github.com/MAIF/shapash/tree/master/tutorial/" - - type: global_analysis params: title: "Dataset analysis" diff --git a/tutorial/generate_report/shapash_report_example.py b/tutorial/generate_report/shapash_report_example.py index 0ea80f23..192ce922 100644 --- a/tutorial/generate_report/shapash_report_example.py +++ b/tutorial/generate_report/shapash_report_example.py @@ -1,7 +1,11 @@ """ -This script can be used to generate the report example. -For more information, please refer to the tutorial 'tuto-shapash-report01.ipynb' -that generates the same report. +Generate the report example with the new smart_report implementation. + +The report layout is driven by the YAML file `report_config_v1.yml` and rendered +through `SmartExplainer.generate_report`. + +For more information, please refer to the tutorial +`tuto-shapash-report01.ipynb` that generates the same report. """ import os import sys @@ -46,25 +50,13 @@ ) xpl.compile(x=Xtest, y_pred=y_pred, y_target=ytest) + report_config_file = os.path.join(cur_dir, "report_config_v1.yml") + xpl.generate_report( output_file=os.path.join(cur_dir, "output", "report.html"), project_info_file=os.path.join(cur_dir, "utils", "project_info.yml"), x_train=Xtrain, y_train=ytrain, y_test=ytest, - title_story="House prices report", - title_description="""This document is a data science report of the kaggle house prices tutorial project. - It was generated using the Shapash library.""", - metrics=[ - { - "path": "sklearn.metrics.mean_absolute_error", - "name": "Mean absolute error", - }, - { - "path": "sklearn.metrics.mean_squared_error", - "name": "Mean squared error", - }, - ], + notebook_path=report_config_file, ) - - xpl.generate_report_with_panel(output_file=os.path.join(cur_dir, "output", "report_with_panel.html"), title_story="House prices report with panel", title_description="This document is a data science report of the kaggle house prices tutorial project. It was generated using the Shapash library and Panel.") From a64ffb8c4f990e1f38180ec2f5d93ea177fb1ab8 Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Mon, 11 May 2026 15:37:49 +0200 Subject: [PATCH 17/43] default report good --- shapash/report/smart_report/assets.py | 10 - shapash/report/smart_report/blocks.py | 277 +++++++++++++++++- shapash/report/smart_report/layout.py | 59 ++-- shapash/report/smart_report/report_styles.css | 87 +++++- tutorial/generate_report/demo.py | 55 ---- tutorial/generate_report/report_config_v1.yml | 103 ++++--- 6 files changed, 460 insertions(+), 131 deletions(-) delete mode 100644 shapash/report/smart_report/assets.py delete mode 100644 tutorial/generate_report/demo.py diff --git a/shapash/report/smart_report/assets.py b/shapash/report/smart_report/assets.py deleted file mode 100644 index d3b8fb6e..00000000 --- a/shapash/report/smart_report/assets.py +++ /dev/null @@ -1,10 +0,0 @@ -"""Static CSS and JavaScript fragments for HTML report rendering.""" - -from pathlib import Path - -_ASSETS_DIR = Path(__file__).resolve().parent -_STYLE_FILE = _ASSETS_DIR / "report_styles.css" -_SCRIPT_FILE = _ASSETS_DIR / "report_script.js" - -REPORT_STYLES = _STYLE_FILE.read_text(encoding="utf-8") -REPORT_SCRIPT = f"" diff --git a/shapash/report/smart_report/blocks.py b/shapash/report/smart_report/blocks.py index 74a6949b..f8887545 100644 --- a/shapash/report/smart_report/blocks.py +++ b/shapash/report/smart_report/blocks.py @@ -5,6 +5,7 @@ import importlib import importlib.metadata from pathlib import Path +from uuid import uuid4 import pandas as pd import plotly.express as px @@ -44,6 +45,7 @@ def block_project_information( title: str = "Project information", color: str = "gray", project_info_file: str = "", + section_name: str | None = None, ) -> str: """Return project information loaded from an external YAML file.""" if not project_info_file: @@ -61,13 +63,18 @@ def block_project_information( if not isinstance(project_info, dict): raise ValueError("project_information YAML must define a top-level mapping.") + if section_name is not None: + if section_name not in project_info: + raise ValueError(f"Unknown project_information section: {section_name}") + project_info = {section_name: project_info[section_name]} + sections_html = [] - for section_name, section_values in project_info.items(): + for current_section_name, section_values in project_info.items(): if not isinstance(section_values, dict): continue rows = self._render_key_value_rows(section_values) sections_html.append( - f'

{section_name}

' + f'

{current_section_name}

' f'{rows}
' ) @@ -325,23 +332,90 @@ def block_feature_importance(self, title: str = "", color: str = "green", label= def block_contribution_plot( self, - feature: str, + feature: str | None = None, title: str = "", color: str = "green", label=None, max_points: int | None = None, + include_all_features: bool = False, + group_id: str = "contribution", ) -> str: """Return the HTML for a feature contribution plot. Requires an explainer with contribution values and uses the configured maximum point count when no explicit limit is provided. + + When include_all_features is True, renders a dropdown and one plot per + feature so users can navigate contribution plots in-place. Plot type + selection (violin vs scatter) is delegated to explainer.plot.contribution_plot. """ explainer = self._require_explainer("contribution_plot") - fig = explainer.plot.contribution_plot(feature, label=label, max_points=max_points or self.max_points) - for trace in fig.data: - if trace.type == "bar": - trace.marker.color = "lightgrey" - return self._wrap_section_content(title or self._feature_label(feature), self._plotly_html(fig)) + + if not include_all_features: + if feature is None: + raise ValueError("contribution_plot block requires 'feature' when include_all_features=False.") + fig = explainer.plot.contribution_plot(feature, label=label, max_points=max_points or self.max_points) + for trace in fig.data: + if trace.type == "bar": + trace.marker.color = "lightgrey" + return self._wrap_section_content(title or self._feature_label(feature), self._plotly_html(fig)) + + if getattr(explainer, "x_init", None) is None: + raise ValueError("contribution_plot block with include_all_features=True requires explainer.x_init.") + + feature_names = list(explainer.x_init.columns) + if not feature_names: + return self._wrap_section_content(title, '

No feature available.

') + + sorted_features = sorted( + feature_names, + key=lambda current_feature: (str(self._feature_label(current_feature)).lower(), str(current_feature)), + ) + + instance_id = uuid4().hex[:8] + selector_id = f"{group_id}-selector-{instance_id}" + feature_panels = [] + feature_options = [] + + for idx, feature_name in enumerate(sorted_features): + fig = explainer.plot.contribution_plot(feature_name, label=label, max_points=max_points or self.max_points) + for trace in fig.data: + if trace.type == "bar": + trace.marker.color = "lightgrey" + + feature_id = f"{group_id}-feature-{idx}-{instance_id}" + feature_label = self._feature_label(feature_name) + feature_options.append(f'') + feature_panels.append( + f'' + ) + + controls_html = ( + '
' + f'' + f'' + '
' + ) + script_html = ( + '' + ) + + resolved_title = title or "Features contribution plots" + return self._wrap_section_content(resolved_title, f'{controls_html}{"".join(feature_panels)}{script_html}') def block_interactions_plot( self, @@ -397,6 +471,79 @@ def block_target_distribution( ) return self._wrap_section_content(title or "Target distribution", self._plotly_html(fig)) + def block_target_analysis( + self, + title: str = "Target analysis", + show_train: bool = True, + width: int = 700, + height: int = 500, + ) -> str: + """Return a univariate-style analysis block focused only on the target variable.""" + from shapash.report.common import compute_col_types, series_dtype + from shapash.report.data_analysis import perform_univariate_dataframe_analysis + + if self.y_test is None: + raise ValueError("target_analysis block requires y_test.") + + target_name = self.target_name or "target" + y_test_series = pd.Series(self.y_test, name=target_name) + y_train_series = pd.Series(self.y_train, name=target_name) if self.y_train is not None and show_train else None + + analysis_source = pd.DataFrame({target_name: y_test_series}) + if y_train_series is not None: + analysis_source = pd.concat([analysis_source, pd.DataFrame({target_name: y_train_series})], ignore_index=True) + + col_types = compute_col_types(analysis_source) + test_stats = perform_univariate_dataframe_analysis(pd.DataFrame({target_name: y_test_series}), col_types=col_types) + train_stats = ( + perform_univariate_dataframe_analysis(pd.DataFrame({target_name: y_train_series}), col_types=col_types) + if y_train_series is not None + else None + ) + + names = ["Prediction dataset", "Training dataset"] + target_stats = stats_to_table( + test_stats=test_stats[target_name], + train_stats=train_stats[target_name] if train_stats is not None else None, + names=names, + ) + + distribution_df = pd.concat( + [ + pd.DataFrame({target_name: y_test_series}).assign(data_train_test="test"), + pd.DataFrame({target_name: y_train_series}).assign(data_train_test="train") + if y_train_series is not None + else pd.DataFrame(columns=[target_name, "data_train_test"]), + ], + ignore_index=True, + ) + fig = plot_distribution( + df_all=distribution_df, + col=target_name, + hue="data_train_test", + colors_dict=self._feature_distribution_colors(), + width=width, + height=height, + ) + + dtype_label = str(series_dtype(y_test_series)) + target_header = ( + '
' + f'

{target_name} ' + f'({dtype_label})

' + '
' + ) + panel_html = ( + '
' + '
' + f'
{target_stats.to_html(classes="kv-table", border=0)}
' + f'
{self._plotly_html(fig)}
' + '
' + '
' + ) + + return self._wrap_section_content(title, f'{target_header}{panel_html}') + def block_confusion_matrix(self, title: str = "", color: str = "orange") -> str: """Return the HTML for a classification confusion matrix. @@ -409,6 +556,120 @@ def block_confusion_matrix(self, title: str = "", color: str = "orange") -> str: fig = plot_confusion_matrix(y_true=self.y_test, y_pred=self.y_pred, colors_dict=explainer.colors_dict) return self._wrap_section_content(title or "Confusion matrix", self._plotly_html(fig)) + def block_univariate_analysis( + self, + title: str = "Univariate analysis", + show_train: bool = True, + group_id: str = "univariate", + ) -> str: + """Return the HTML for a univariate analysis of all features. + + For each feature, renders a distribution plot and a summary statistics + table. When training data is available and show_train is True, statistics + are shown for both prediction and training datasets side by side. + + Parameters + ---------- + title : str + Section title displayed above the analysis. + show_train : bool + Whether to include training data alongside prediction data. + group_id : str + HTML identifier prefix used to namespace the dropdown and feature panels. + """ + from shapash.report.common import compute_col_types, series_dtype + from shapash.report.data_analysis import perform_univariate_dataframe_analysis + + self._require_train_test_data("univariate_analysis") + explainer = self._require_explainer("univariate_analysis") + + df = self.df_train_test + col_splitter = "data_train_test" + names = ["Prediction dataset", "Training dataset"] + + col_types = compute_col_types(df) + n_splits = df[col_splitter].nunique() + + test_stats = perform_univariate_dataframe_analysis( + df.loc[df[col_splitter] == "test"], col_types=col_types + ) + train_stats = ( + perform_univariate_dataframe_analysis( + df.loc[df[col_splitter] == "train"], col_types=col_types + ) + if n_splits > 1 and show_train + else None + ) + + list_cols_labels = sorted( + explainer.features_dict.get(col, col) + for col in df.drop(col_splitter, axis=1).columns + ) + + feature_panels = [] + feature_options = [] + instance_id = uuid4().hex[:8] + selector_id = f"{group_id}-selector-{instance_id}" + + for idx, col_label in enumerate(list_cols_labels): + col = explainer.inv_features_dict.get(col_label, col_label) + if col not in test_stats: + continue + + fig = plot_distribution( + df_all=df, + col=col, + hue=col_splitter, + colors_dict=self._feature_distribution_colors(), + ) + col_stats = stats_to_table( + test_stats=test_stats[col], + train_stats=train_stats[col] if train_stats is not None else None, + names=names, + ) + + feature_id = f"{group_id}-feature-{idx}-{instance_id}" + dtype_label = str(series_dtype(df[col])) + + feature_options.append( + f'' + ) + feature_panels.append( + f'' + ) + + if not feature_panels: + return self._wrap_section_content(title, '

No feature available.

') + + controls_html = ( + '
' + f'' + f'' + '
' + ) + script_html = ( + '' + ) + + return self._wrap_section_content(title, f'{controls_html}{"".join(feature_panels)}{script_html}') + def _preprocess_train_data(self, x_train: pd.DataFrame | None) -> pd.DataFrame | None: if x_train is None or self.explainer is None: return x_train diff --git a/shapash/report/smart_report/layout.py b/shapash/report/smart_report/layout.py index 16e258b6..ec19881d 100644 --- a/shapash/report/smart_report/layout.py +++ b/shapash/report/smart_report/layout.py @@ -6,9 +6,15 @@ import re from pathlib import Path -from shapash.report.smart_report.assets import REPORT_SCRIPT, REPORT_STYLES from shapash.report.smart_report.panel_support import panel_resource_tags +_ASSETS_DIR = Path(__file__).resolve().parent +_STYLE_FILE = _ASSETS_DIR / "report_styles.css" +_SCRIPT_FILE = _ASSETS_DIR / "report_script.js" + +REPORT_STYLES = _STYLE_FILE.read_text(encoding="utf-8") +REPORT_SCRIPT = f"" + def resolve_logo_src(base_dir: Path | None) -> str: """Resolve the relative path to the bundled Shapash logo.""" @@ -49,29 +55,42 @@ def render_block_section(report, block_cfg: dict) -> tuple[str, str | None]: return block_html, None -def render_group_section(report, block_cfg: dict) -> tuple[list[str], str | None]: - """Render a grouped section with a parent nav item and nested children.""" - rendered_children = [] - child_nav_links = [] +def render_section_tree(report, block_cfg: dict, depth: int = 0) -> tuple[list[str], list[str]]: + """Render one block tree (block or nested group) and its navigation entries.""" + if block_cfg.get("type") != "group": + block_html, html_id = render_block_section(report, block_cfg) + nav_links: list[str] = [] + if html_id: + extra_class = f"nav-child nav-level-{depth}" if depth > 0 else "" + nav_links.append(build_nav_link(block_title(block_cfg), html_id, extra_class=extra_class)) + return [block_html], nav_links + rendered_blocks: list[str] = [] + child_nav_links: list[str] = [] for child_cfg in block_cfg.get("blocks", []): - child_html, child_section_id = render_block_section(report, child_cfg) - rendered_children.append(child_html) - if child_section_id: - child_nav_links.append(build_nav_link(block_title(child_cfg), child_section_id, extra_class="nav-child")) + child_blocks, child_nav = render_section_tree(report, child_cfg, depth=depth + 1) + rendered_blocks.extend(child_blocks) + child_nav_links.extend(child_nav) group_title = block_title(block_cfg) if not group_title: - return rendered_children, None + return rendered_blocks, child_nav_links group_id = section_id(group_title) + if depth == 0: + title_class = "nav-group-title" + children_class = "nav-children" + else: + title_class = f"nav-child nav-group-title nav-level-{depth}" + children_class = f"nav-children nav-level-{depth + 1}" + nav_html = ( '" ) - return [wrap_section("", group_id), *rendered_children], nav_html + return [wrap_section("", group_id), *rendered_blocks], [nav_html] def render_sections(report, sections: list[dict]) -> tuple[list[str], str]: @@ -80,17 +99,9 @@ def render_sections(report, sections: list[dict]) -> tuple[list[str], str]: nav_links: list[str] = [] for block_cfg in sections: - if block_cfg.get("type") == "group": - group_blocks, group_nav = render_group_section(report, block_cfg) - rendered_blocks.extend(group_blocks) - if group_nav: - nav_links.append(group_nav) - continue - - block_html, html_id = render_block_section(report, block_cfg) - rendered_blocks.append(block_html) - if html_id: - nav_links.append(build_nav_link(block_title(block_cfg), html_id)) + block_tree, block_nav = render_section_tree(report, block_cfg) + rendered_blocks.extend(block_tree) + nav_links.extend(block_nav) return rendered_blocks, "\n".join(nav_links) diff --git a/shapash/report/smart_report/report_styles.css b/shapash/report/smart_report/report_styles.css index 70bcb895..3f1a3557 100644 --- a/shapash/report/smart_report/report_styles.css +++ b/shapash/report/smart_report/report_styles.css @@ -135,6 +135,30 @@ body, padding-left: 13px; } +.nav-level-2 { + padding-left: 28px; +} + +.nav-level-2.active { + padding-left: 25px; +} + +.nav-level-3 { + padding-left: 40px; +} + +.nav-level-3.active { + padding-left: 37px; +} + +.nav-level-4 { + padding-left: 52px; +} + +.nav-level-4.active { + padding-left: 49px; +} + /* Main content */ .container { margin-left: 240px; @@ -288,6 +312,57 @@ table.dataframe tbody tr:nth-child(even) td { min-width: 0; } +/* Univariate analysis picker */ +.univariate-picker { + display: flex; + flex-wrap: wrap; + align-items: center; + gap: 10px; + margin: 8px 0 18px; +} + +.univariate-picker label { + font-size: 13px; + font-weight: 600; + color: var(--text-main); +} + +.univariate-select { + min-width: 260px; + max-width: 100%; + padding: 8px 10px; + border: 1px solid #d8d8d8; + border-radius: 8px; + background: #fff; + color: var(--text-main); + font-size: 13px; +} + +.univariate-feature-panel { + margin-top: 4px; +} + +.analysis-side-by-side { + display: flex; + gap: 20px; + align-items: flex-start; + flex-wrap: wrap; +} + +.analysis-side-table { + flex: 1 1 360px; + min-width: 320px; +} + +.analysis-side-plot { + flex: 1 1 480px; + min-width: 320px; +} + +.analysis-side-table .kv-table { + margin-top: 0; +} + /* Plot wrappers */ .scroll-section { scroll-margin-top: 40px; @@ -313,6 +388,10 @@ table.dataframe tbody tr:nth-child(even) td { .model-analysis-table-col { flex: 0 1 calc(50% - 8px); } + + .analysis-side-by-side { + gap: 16px; + } } @media (max-width: 900px) { @@ -335,4 +414,10 @@ table.dataframe tbody tr:nth-child(even) td { flex: 1 1 100%; min-width: 0; } -} + + .analysis-side-table, + .analysis-side-plot { + flex: 1 1 100%; + min-width: 0; + } +} \ No newline at end of file diff --git a/tutorial/generate_report/demo.py b/tutorial/generate_report/demo.py deleted file mode 100644 index 8157fb3e..00000000 --- a/tutorial/generate_report/demo.py +++ /dev/null @@ -1,55 +0,0 @@ -import sys -from pathlib import Path - -import pandas as pd -from category_encoders import OrdinalEncoder -from sklearn.ensemble import RandomForestRegressor -from sklearn.model_selection import train_test_split - -HERE = Path(__file__).resolve().parent -REPO_ROOT = HERE.parents[1] - -if str(REPO_ROOT) not in sys.path: - sys.path.insert(0, str(REPO_ROOT)) - -from shapash import SmartExplainer -from shapash.data.data_loader import data_loading -from shapash.report.smart_report import ReportBase - -CONFIG_V1 = HERE / "report_config_v1.yml" -OUTPUT_V1 = HERE / "output" / "report_v1.html" -PROJECT_INFO_FILE = HERE / "utils" / "project_info.yml" - - -def build_house_prices_explainer() -> tuple[SmartExplainer, pd.DataFrame, pd.Series, pd.Series]: - """Build the same House Prices explainer used in report tutorials.""" - house_df, house_dict = data_loading("house_prices") - y_df = house_df["SalePrice"] - X_df = house_df[house_df.columns.difference(["SalePrice"])] - - categorical_features = list(X_df.select_dtypes(include=["object", "string", "category"]).columns) - encoder = OrdinalEncoder(cols=categorical_features, handle_unknown="ignore", return_df=True).fit(X_df) - X_encoded = encoder.transform(X_df) - - Xtrain, Xtest, ytrain, ytest = train_test_split(X_encoded, y_df, train_size=0.75, random_state=1) - regressor = RandomForestRegressor(n_estimators=50, random_state=1).fit(Xtrain, ytrain) - - y_pred = pd.DataFrame(regressor.predict(Xtest), columns=["pred"], index=Xtest.index) - - xpl = SmartExplainer(model=regressor, preprocessing=encoder, features_dict=house_dict) - xpl.compile(x=Xtest, y_pred=y_pred, y_target=ytest) - return xpl, Xtrain, ytrain, ytest - - -if __name__ == "__main__": - xpl, Xtrain, ytrain, ytest = build_house_prices_explainer() - - report = ReportBase( - explainer=xpl, - x_train=Xtrain, - y_train=ytrain, - y_test=ytest, - config={"project_info_file": str(PROJECT_INFO_FILE)}, - ) - report.generate_report(config_file=str(CONFIG_V1), output_file=str(OUTPUT_V1)) - print(f"Saved notebook-parity report: {OUTPUT_V1}") diff --git a/tutorial/generate_report/report_config_v1.yml b/tutorial/generate_report/report_config_v1.yml index 43479214..cd6b62e8 100644 --- a/tutorial/generate_report/report_config_v1.yml +++ b/tutorial/generate_report/report_config_v1.yml @@ -11,57 +11,94 @@ sections: - type: project_information params: - title: "Project information" + title: "General Information" color: gray project_info_file: "tutorial/generate_report/utils/project_info.yml" + section_name: "General information" - - type: model_analysis + - type: project_information params: - title: "Model information" - color: blue + title: "Dataset Information" + color: gray + project_info_file: "tutorial/generate_report/utils/project_info.yml" + section_name: "Dataset information" - - type: global_analysis + - type: project_information params: - title: "Dataset analysis" - color: blue + title: "Data Preparation" + color: gray + project_info_file: "tutorial/generate_report/utils/project_info.yml" + section_name: "Data Preparation" - - type: relationship_target + - type: project_information params: - title: "Relashionship with target variable" - feature: "OverallQual" - color: blue - max_y: 800000 + title: "Model Training" + color: gray + project_info_file: "tutorial/generate_report/utils/project_info.yml" + section_name: "Model training" - - type: correlations_plot + - type: model_analysis params: - title: "Relashionship between training variables" + title: "Model analysis" color: blue - max_features: 30 - - type: feature_importance + - type: group params: - title: "Model explainability" - color: gold + title: "Dataset analysis" + blocks: + - type: global_analysis + params: + title: "Global analysis" + color: blue - - type: performance_metrics - params: - title: "Model performance" - color: orange - metrics: - - path: "sklearn.metrics.mean_absolute_error" - name: "Mean absolute error" - - path: "sklearn.metrics.mean_squared_error" - name: "Mean squared error" + - type: univariate_analysis + params: + title: "Univariate analysis" + + - type: target_analysis + params: + title: "Target analysis" + show_train: true - - type: text + - type: correlations_plot + params: + title: "Multivariate analysis" + color: blue + max_features: 30 + + - type: group params: - body: "The graph below represents y_pred vs y_test :" - color: gray + title: "Model explainability" + blocks: + - type: feature_importance + params: + title: "Global feature importance plot" + color: gold - - type: pred_vs_true + - type: contribution_plot + params: + title: "Features contribution plots" + include_all_features: true + color: gold + + - type: group params: - title: "" - color: orange + title: "Model performance" + blocks: + - type: target_distribution + params: + title: "Univariate analysis of target variable" + color: orange + + - type: performance_metrics + params: + title: "Metrics" + color: orange + metrics: + - path: "sklearn.metrics.mean_absolute_error" + name: "Mean absolute error" + - path: "sklearn.metrics.mean_squared_error" + name: "Mean squared error" - type: callout params: From 1202bccb26baa260d1d4899bf02ef8ed58aed036 Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Mon, 11 May 2026 15:39:08 +0200 Subject: [PATCH 18/43] ruff mod --- shapash/report/smart_report/blocks.py | 61 +++++++++---------- shapash/report/smart_report/report_styles.css | 2 +- 2 files changed, 30 insertions(+), 33 deletions(-) diff --git a/shapash/report/smart_report/blocks.py b/shapash/report/smart_report/blocks.py index f8887545..1f69633c 100644 --- a/shapash/report/smart_report/blocks.py +++ b/shapash/report/smart_report/blocks.py @@ -389,8 +389,8 @@ def block_contribution_plot( feature_panels.append( f'' + f"{self._plotly_html(fig)}" + "
" ) controls_html = ( @@ -400,18 +400,18 @@ def block_contribution_plot( '
' ) script_html = ( - '' + "})();" + "" ) resolved_title = title or "Features contribution plots" @@ -491,10 +491,14 @@ def block_target_analysis( analysis_source = pd.DataFrame({target_name: y_test_series}) if y_train_series is not None: - analysis_source = pd.concat([analysis_source, pd.DataFrame({target_name: y_train_series})], ignore_index=True) + analysis_source = pd.concat( + [analysis_source, pd.DataFrame({target_name: y_train_series})], ignore_index=True + ) col_types = compute_col_types(analysis_source) - test_stats = perform_univariate_dataframe_analysis(pd.DataFrame({target_name: y_test_series}), col_types=col_types) + test_stats = perform_univariate_dataframe_analysis( + pd.DataFrame({target_name: y_test_series}), col_types=col_types + ) train_stats = ( perform_univariate_dataframe_analysis(pd.DataFrame({target_name: y_train_series}), col_types=col_types) if y_train_series is not None @@ -529,9 +533,9 @@ def block_target_analysis( dtype_label = str(series_dtype(y_test_series)) target_header = ( '
' - f'

{target_name} ' + f"

{target_name} " f'({dtype_label})

' - '
' + "" ) panel_html = ( '
' @@ -542,7 +546,7 @@ def block_target_analysis( '
' ) - return self._wrap_section_content(title, f'{target_header}{panel_html}') + return self._wrap_section_content(title, f"{target_header}{panel_html}") def block_confusion_matrix(self, title: str = "", color: str = "orange") -> str: """Return the HTML for a classification confusion matrix. @@ -590,20 +594,15 @@ def block_univariate_analysis( col_types = compute_col_types(df) n_splits = df[col_splitter].nunique() - test_stats = perform_univariate_dataframe_analysis( - df.loc[df[col_splitter] == "test"], col_types=col_types - ) + test_stats = perform_univariate_dataframe_analysis(df.loc[df[col_splitter] == "test"], col_types=col_types) train_stats = ( - perform_univariate_dataframe_analysis( - df.loc[df[col_splitter] == "train"], col_types=col_types - ) + perform_univariate_dataframe_analysis(df.loc[df[col_splitter] == "train"], col_types=col_types) if n_splits > 1 and show_train else None ) list_cols_labels = sorted( - explainer.features_dict.get(col, col) - for col in df.drop(col_splitter, axis=1).columns + explainer.features_dict.get(col, col) for col in df.drop(col_splitter, axis=1).columns ) feature_panels = [] @@ -631,9 +630,7 @@ def block_univariate_analysis( feature_id = f"{group_id}-feature-{idx}-{instance_id}" dtype_label = str(series_dtype(df[col])) - feature_options.append( - f'' - ) + feature_options.append(f'') feature_panels.append( f'' ) script_html = ( - '' + "})();" + "" ) return self._wrap_section_content(title, f'{controls_html}{"".join(feature_panels)}{script_html}') diff --git a/shapash/report/smart_report/report_styles.css b/shapash/report/smart_report/report_styles.css index 3f1a3557..f65dc20d 100644 --- a/shapash/report/smart_report/report_styles.css +++ b/shapash/report/smart_report/report_styles.css @@ -420,4 +420,4 @@ table.dataframe tbody tr:nth-child(even) td { flex: 1 1 100%; min-width: 0; } -} \ No newline at end of file +} From 41dda074af5b3caa79724eeb5458f449ea42e161 Mon Sep 17 00:00:00 2001 From: Caetano Godinat Date: Wed, 13 May 2026 14:11:44 +0200 Subject: [PATCH 19/43] stable version --- shapash/explainer/smart_explainer.py | 10 ++--- shapash/report/smart_report/blocks.py | 40 +++---------------- shapash/report/smart_report/panel_support.py | 10 +++-- shapash/report/smart_report/report_script.js | 20 ++++++++++ .../test_report_generation.py | 2 +- .../default_report.yml} | 28 ++----------- .../project_information.yml} | 2 +- .../generate_report/shapash_report_example.py | 12 +++--- .../tuto-shapash-report01.ipynb | 10 ++--- 9 files changed, 55 insertions(+), 79 deletions(-) rename tutorial/generate_report/{report_config_v1.yml => config/default_report.yml} (72%) rename tutorial/generate_report/{utils/project_info.yml => config/project_information.yml} (99%) diff --git a/shapash/explainer/smart_explainer.py b/shapash/explainer/smart_explainer.py index 8a184365..d5798b66 100644 --- a/shapash/explainer/smart_explainer.py +++ b/shapash/explainer/smart_explainer.py @@ -1661,7 +1661,7 @@ def generate_report( title_description=None, metrics=None, working_dir=None, - notebook_path=None, + yaml_path=None, kernel_name=None, max_points=200, display_interaction_plot=False, @@ -1709,7 +1709,7 @@ def generate_report( working_dir : str, optional Directory used to temporarily store generated files (e.g., report config). If `None`, a temporary directory is automatically created and deleted after report generation. - notebook_path : str, optional + yaml_path : str, optional Path to a custom YAML configuration file used to generate the report. If `None`, a default YAML configuration is generated. kernel_name : str, optional @@ -1745,7 +1745,7 @@ def generate_report( ------- >>> xpl.generate_report( ... output_file="report.html", - ... project_info_file="utils/project_info.yml", + ... project_info_file="config/project_information.yml", ... x_train=x_train, ... y_train=y_train, ... y_test=y_test, @@ -1793,8 +1793,8 @@ def generate_report( config=config, ) - if notebook_path is not None: - config_file = Path(notebook_path) + if yaml_path is not None: + config_file = Path(yaml_path) else: config_file = Path(working_dir) / "report_config.yml" sections = [ diff --git a/shapash/report/smart_report/blocks.py b/shapash/report/smart_report/blocks.py index 1f69633c..bd2acb51 100644 --- a/shapash/report/smart_report/blocks.py +++ b/shapash/report/smart_report/blocks.py @@ -388,7 +388,7 @@ def block_contribution_plot( feature_options.append(f'') feature_panels.append( f'" ) @@ -396,26 +396,12 @@ def block_contribution_plot( controls_html = ( '
' f'' - f'' + f'' '
' ) - script_html = ( - "" - ) resolved_title = title or "Features contribution plots" - return self._wrap_section_content(resolved_title, f'{controls_html}{"".join(feature_panels)}{script_html}') + return self._wrap_section_content(resolved_title, f'{controls_html}{"".join(feature_panels)}') def block_interactions_plot( self, @@ -633,7 +619,7 @@ def block_univariate_analysis( feature_options.append(f'') feature_panels.append( f'