diff --git a/MANIFEST.in b/MANIFEST.in
index c6b2b0ad..6fc3e078 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,7 +1,4 @@
recursive-include shapash/webapp/assets *
-recursive-include shapash/report/html *
-recursive-include shapash/report/template *
include LICENSE
include README.md
-include shapash/report/base_report.ipynb
diff --git a/README.md b/README.md
index 8e07854a..ab1750c1 100644
--- a/README.md
+++ b/README.md
@@ -198,8 +198,8 @@ app = xpl.run_app()
[Live Demo Shapash-Monitor](https://shapash-demo.ossbymaif.fr/)
- Step 4: Generate the Shapash Report
- > This step allows to generate a standalone html report of your project using the different splits
- of your dataset and also the metrics you used:
+ > This step generates a standalone HTML report from a block-based layout.
+ You can optionally provide a YAML file to customize report sections and blocks.
```python
xpl.generate_report(
@@ -208,6 +208,7 @@ xpl.generate_report(
x_train=xtrain,
y_train=ytrain,
y_test=ytest,
+ yaml_path="path/to/report_config.yml", # Optional: custom block configuration
title_story="House prices report",
title_description="""This document is a data science report of the kaggle house prices tutorial project.
It was generated using the Shapash library.""",
diff --git a/docs/assets/images/logos/shapash-fond-clair.png b/docs/assets/images/logos/shapash-fond-clair.png
new file mode 100644
index 00000000..6300ec34
Binary files /dev/null and b/docs/assets/images/logos/shapash-fond-clair.png differ
diff --git a/docs/overview.rst b/docs/overview.rst
index 64fc9755..5fe44a0f 100644
--- a/docs/overview.rst
+++ b/docs/overview.rst
@@ -90,8 +90,8 @@ The 4 steps to display results:
app = xpl.run_app()
- Step 4: Generate the Shapash Report
- > This step allows to generate a standalone html report of your project using the different splits
- of your dataset and also the metrics you used:
+ > This step generates a standalone HTML report from a configurable block-based layout.
+ > You can provide a YAML configuration file to customize sections and blocks.
.. code:: ipython
@@ -101,6 +101,7 @@ The 4 steps to display results:
x_train=Xtrain,
y_train=ytrain,
y_test=ytest,
+ yaml_path='path/to/report_config.yml', # Optional: custom block configuration
title_story="House prices report",
title_description="""This document is a data science report of the kaggle house prices tutorial project.
It was generated using the Shapash library.""",
diff --git a/pyproject.toml b/pyproject.toml
index 213e0d57..5add615f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,7 @@ dependencies = [
"numba>=0.60.0",
"numpy>=2.0.0,<2.6.0",
"pandas>=2.2.2,<4.0.0",
+ "panel>=1.8.10",
"plotly>=5.0.0,<6.0.0",
"scikit-learn>=1.4.2,<1.9.0",
"scipy>=1.13.0",
@@ -147,8 +148,6 @@ exclude = ["tests/*", "*.ipynb"]
"shapash/plots/plot_scatter_prediction.py" = ["PLW0127", "PLW3301"]
"shapash/report/__init__.py" = ["B904"]
"shapash/report/plots.py" = ["A002"]
-"shapash/report/visualisation.py" = ["UP031"]
-"shapash/report/project_report.py" = ["S101", "S701"]
"shapash/utils/columntransformer_backend.py" = ["PLW0127"]
"shapash/utils/explanation_metrics.py" = ["S101"]
"shapash/utils/io.py" = ["S301"]
diff --git a/shapash/explainer/smart_explainer.py b/shapash/explainer/smart_explainer.py
index 3132bfc4..3472584f 100644
--- a/shapash/explainer/smart_explainer.py
+++ b/shapash/explainer/smart_explainer.py
@@ -6,6 +6,7 @@
import logging
import shutil
import tempfile
+from pathlib import Path
import numpy as np
import pandas as pd
@@ -15,7 +16,6 @@
from shapash.backend.shap_backend import get_shap_interaction_values
from shapash.manipulation.select_lines import keep_right_contributions
from shapash.manipulation.summarize import create_grouped_features_values
-from shapash.report import check_report_requirements
from shapash.style.style_utils import colors_loading, select_palette
from shapash.utils.check import (
check_additional_data,
@@ -1660,19 +1660,21 @@ def generate_report(
title_description=None,
metrics=None,
working_dir=None,
- notebook_path=None,
- kernel_name=None,
+ yaml_path=None,
max_points=200,
display_interaction_plot=False,
nb_top_interactions=5,
+ block_instance=None,
):
"""
Generate an interactive HTML report summarizing the model and its explainability.
This method produces a comprehensive HTML report containing visual and textual
- insights about the project, dataset, and model performance.
- It leverages a predefined or custom Jupyter notebook template to analyze
- the model, generate plots, compute metrics, and export the final report.
+ insights about the project, dataset, and model performance using the
+ smart_report block-based HTML renderer.
+
+ A report configuration is provided through a YAML file. If no YAML file is
+ specified, a default configuration is generated automatically.
A project information YAML file is required to describe key project details
(e.g., model name, author, date, context).
@@ -1704,14 +1706,11 @@ def generate_report(
Example:
`metrics=[{'name': 'F1 score', 'path': 'sklearn.metrics.f1_score'}]`
working_dir : str, optional
- Directory used to temporarily store generated files (e.g., notebook, outputs).
+ Directory used to temporarily store generated files (e.g., report config).
If `None`, a temporary directory is automatically created and deleted after report generation.
- notebook_path : str, optional
- Path to a custom notebook used as a template for generating the report.
- If `None`, the default Shapash report notebook is used.
- kernel_name : str, optional
- Name of the Jupyter kernel to use for report execution.
- Useful when multiple kernels are available and the default one is incorrect.
+ yaml_path : str, optional
+ Path to a custom YAML configuration file used to generate the report.
+ If `None`, a default YAML configuration is generated.
max_points : int, optional, default=200
Maximum number of points displayed in contribution plots.
display_interaction_plot : bool, optional, default=False
@@ -1719,6 +1718,9 @@ def generate_report(
(Note: this can increase computation time.)
nb_top_interactions : int, optional, default=5
Number of top feature interactions to include in the report.
+ block_instance : object, optional
+ Optional custom block object used to resolve block methods during report generation.
+ It should implement methods named `block_` for YAML block entries.
Returns
-------
@@ -1734,7 +1736,7 @@ def generate_report(
Notes
-----
- - The method internally executes a notebook that generates the report content.
+ - The method renders the report from block definitions in a YAML configuration.
- Temporary files are automatically cleaned up unless a custom `working_dir` is provided.
- Interaction plots can be disabled to optimize runtime performance.
@@ -1742,7 +1744,7 @@ def generate_report(
-------
>>> xpl.generate_report(
... output_file="report.html",
- ... project_info_file="utils/project_info.yml",
+ ... project_info_file="config/project_information.yml",
... x_train=x_train,
... y_train=y_train,
... y_test=y_test,
@@ -1756,11 +1758,11 @@ def generate_report(
... nb_top_interactions=5,
... )
"""
- check_report_requirements()
+ from shapash.report.blocks import ReportBlockMixin
+ from shapash.report.core import generate_report as generate_smart_report
+
if x_train is not None:
x_train = handle_categorical_missing(x_train)
- # Avoid Import Errors with requirements specific to the Shapash Report
- from shapash.report.generation import execute_report, export_and_save_report
rm_working_dir = False
if not working_dir:
@@ -1774,29 +1776,38 @@ def generate_report(
)
try:
- execute_report(
- working_dir=working_dir,
- explainer=self,
- project_info_file=project_info_file,
- x_train=x_train,
- y_train=y_train,
- y_test=y_test,
- config={
- k: v
- for k, v in dict(
- title_story=title_story,
- title_description=title_description,
- metrics=metrics,
- max_points=max_points,
- display_interaction_plot=display_interaction_plot,
- nb_top_interactions=nb_top_interactions,
- ).items()
- if v is not None
- },
- notebook_path=notebook_path,
- kernel_name=kernel_name,
- )
- export_and_save_report(working_dir=working_dir, output_file=output_file)
+ config = {
+ "max_points": max_points,
+ "display_interaction_plot": display_interaction_plot,
+ "nb_top_interactions": nb_top_interactions,
+ }
+
+ if block_instance is None:
+ report_runtime = ReportBlockMixin(
+ explainer=self,
+ x_train=x_train,
+ y_train=y_train,
+ y_test=y_test,
+ config=config,
+ )
+ else:
+ report_runtime = block_instance
+ ReportBlockMixin.__init__(
+ report_runtime,
+ explainer=self,
+ x_train=x_train,
+ y_train=y_train,
+ y_test=y_test,
+ config=config,
+ )
+
+ if yaml_path is not None:
+ config_file = Path(yaml_path)
+ else:
+ yaml_path = Path(__file__).resolve().parent.parent / "report" / "default_report.yml"
+ config_file = yaml_path
+
+ generate_smart_report(runtime=report_runtime, config_file=str(config_file), output_file=output_file)
if rm_working_dir:
shutil.rmtree(working_dir)
diff --git a/shapash/explainer/smart_predictor.py b/shapash/explainer/smart_predictor.py
index d854df28..089cf1ab 100644
--- a/shapash/explainer/smart_predictor.py
+++ b/shapash/explainer/smart_predictor.py
@@ -344,6 +344,40 @@ def check_dataset_features(self, x):
x = x[features_order]
assert all(column in self.features_types.keys() for column in x.columns)
+ for feature in x.columns:
+ expected_dtype = self.features_types[feature]
+ if str(x[feature].dtypes) == expected_dtype:
+ continue
+
+ try:
+ if expected_dtype.startswith("int") or expected_dtype.startswith("uint"):
+ if not pd.api.types.is_integer_dtype(x[feature].dtypes):
+ raise ValueError
+ x[feature] = x[feature].astype(expected_dtype)
+ elif expected_dtype.startswith("float"):
+ if not pd.api.types.is_float_dtype(x[feature].dtypes):
+ raise ValueError
+ x[feature] = x[feature].astype(expected_dtype)
+ elif expected_dtype == "bool":
+ if not pd.api.types.is_bool_dtype(x[feature].dtypes):
+ raise ValueError
+ x[feature] = x[feature].astype(expected_dtype)
+ elif expected_dtype in ["object", "string", "str"]:
+ if not (
+ pd.api.types.is_object_dtype(x[feature].dtypes)
+ or pd.api.types.is_string_dtype(x[feature].dtypes)
+ ):
+ raise ValueError
+ if expected_dtype != "str":
+ x[feature] = x[feature].astype(expected_dtype)
+ except Exception:
+ raise ValueError(
+ """
+ Types of features in x doesn't match with the expected one in features_types.
+ x input must be initial dataset without preprocessing applied.
+ """
+ )
+
if not all([str(x[feature].dtypes) == self.features_types[feature] for feature in x.columns]):
raise ValueError(
"""
diff --git a/shapash/plots/plot_compacity.py b/shapash/plots/plot_compacity.py
index 7ad34f91..7cb869a4 100644
--- a/shapash/plots/plot_compacity.py
+++ b/shapash/plots/plot_compacity.py
@@ -1,3 +1,4 @@
+import numpy as np
from plotly import graph_objs as go
from plotly.offline import plot
from plotly.subplots import make_subplots
@@ -70,9 +71,13 @@ def plot_compacity(
fig.update_annotations(font=style_dict["dict_title_compacity"]["font"])
# First plot: number of features required for a given approximation
+ features_needed_plot = np.asarray(features_needed)
+ if np.issubdtype(features_needed_plot.dtype, np.integer):
+ features_needed_plot = features_needed_plot.astype(np.int64)
+
fig.add_trace(
go.Histogram(
- x=features_needed,
+ x=features_needed_plot,
histnorm="percent",
cumulative={"enabled": True},
name="",
diff --git a/shapash/plots/plot_evaluation_metrics.py b/shapash/plots/plot_evaluation_metrics.py
index e8b8aece..462a8d6e 100644
--- a/shapash/plots/plot_evaluation_metrics.py
+++ b/shapash/plots/plot_evaluation_metrics.py
@@ -241,9 +241,13 @@ def _prediction_classification_plot(
subtitle = f"Response: {label_value} "
# Plot distribution
+ violin_x = df_pred["target"].values.flatten()
+ if np.issubdtype(np.asarray(violin_x).dtype, np.integer):
+ violin_x = np.asarray(violin_x, dtype=np.int64)
+
fig.add_trace(
go.Violin(
- x=df_pred["target"].values.flatten(),
+ x=violin_x,
y=df_pred["proba_values"].values.flatten(),
points=False,
legendgroup="M",
@@ -405,6 +409,8 @@ def _prediction_regression_plot(y_target, y_pred, prediction_error, list_ind, st
y_target = y_target_tmp
y_target_values = y_target.values.flatten()
+ if np.issubdtype(np.asarray(y_target_values).dtype, np.integer):
+ y_target_values = np.asarray(y_target_values, dtype=np.int64)
y_pred = y_pred.loc[y_target.index]
prediction_error = np.array(prediction_error.loc[y_target.index])
diff --git a/shapash/report/base_report.ipynb b/shapash/report/base_report.ipynb
deleted file mode 100644
index fa72fd8c..00000000
--- a/shapash/report/base_report.ipynb
+++ /dev/null
@@ -1,184 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "filled-favorite",
- "metadata": {},
- "outputs": [],
- "source": [
- "%load_ext autoreload\n",
- "%autoreload 2"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "coordinate-shower",
- "metadata": {
- "tags": [
- "parameters"
- ]
- },
- "outputs": [],
- "source": [
- "dir_path = \"\"\n",
- "project_info_file = \"\"\n",
- "config = dict()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "atlantic-fever",
- "metadata": {},
- "outputs": [],
- "source": [
- "import os\n",
- "import warnings\n",
- "\n",
- "warnings.filterwarnings(\"ignore\")\n",
- "from shapash import SmartExplainer\n",
- "from shapash.report.project_report import ProjectReport\n",
- "from shapash.report.common import load_saved_df\n",
- "\n",
- "xpl = SmartExplainer.load(os.path.join(dir_path, \"smart_explainer.pickle\"))\n",
- "\n",
- "x_train = load_saved_df(os.path.join(dir_path, \"x_train.csv\"))\n",
- "y_train = load_saved_df(os.path.join(dir_path, \"y_train.csv\"))\n",
- "y_test = load_saved_df(os.path.join(dir_path, \"y_test.csv\"))\n",
- "\n",
- "report = ProjectReport(\n",
- " explainer=xpl, project_info_file=project_info_file, x_train=x_train, y_train=y_train, y_test=y_test, config=config\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "altered-medicare",
- "metadata": {},
- "outputs": [],
- "source": [
- "report.display_title_description()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "specified-vietnamese",
- "metadata": {},
- "outputs": [],
- "source": [
- "report.display_project_information()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "steady-transfer",
- "metadata": {},
- "source": [
- "## Model analysis"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "serial-bulgaria",
- "metadata": {},
- "outputs": [],
- "source": [
- "report.display_model_analysis()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "beginning-silicon",
- "metadata": {},
- "source": [
- "## Dataset analysis"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "planned-mayor",
- "metadata": {
- "scrolled": false
- },
- "outputs": [],
- "source": [
- "report.display_dataset_analysis()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "attempted-bikini",
- "metadata": {},
- "source": [
- "## Model explainability"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "secondary-dividend",
- "metadata": {
- "scrolled": false
- },
- "outputs": [],
- "source": [
- "report.display_model_explainability()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "australian-photograph",
- "metadata": {},
- "source": [
- "## Model performance"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "breeding-techno",
- "metadata": {},
- "outputs": [],
- "source": [
- "report.display_model_performance()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "arbitrary-baker",
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "celltoolbar": "Tags",
- "hide_input": false,
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.11"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/shapash/report/blocks.py b/shapash/report/blocks.py
new file mode 100644
index 00000000..78c0900e
--- /dev/null
+++ b/shapash/report/blocks.py
@@ -0,0 +1,1131 @@
+"""Block implementations and report data helpers for smart reports."""
+
+from __future__ import annotations
+
+import importlib
+import importlib.metadata
+import inspect
+from functools import wraps
+from pathlib import Path
+from typing import Any
+
+import pandas as pd
+import panel as pn
+import yaml
+
+from shapash.plots.plot_evaluation_metrics import plot_confusion_matrix
+from shapash.plots.plot_univariate import plot_distribution
+from shapash.report.common import compute_col_types, series_dtype
+from shapash.report.data_analysis import perform_global_dataframe_analysis, perform_univariate_dataframe_analysis
+from shapash.report.panel_support import make_plotly_pane
+from shapash.report.validation import stats_to_table
+from shapash.utils.transform import apply_postprocessing, handle_categorical_missing, inverse_transform
+from shapash.utils.utils import compute_sorted_variables_interactions_list_indices
+
+PALETTE = {
+ "gold": {"bg": "#ffffff", "border": "#f4c000", "title": "#f4c000", "text": "#343736"},
+ "blue": {"bg": "#ffffff", "border": "#2255aa", "title": "#2255aa", "text": "#343736"},
+ "gray": {"bg": "#ffffff", "border": "#eeeeee", "title": "#666666", "text": "#666666"},
+ "orange": {"bg": "#fff9e6", "border": "#f4c000", "title": "#cc8833", "text": "#444444"},
+}
+
+TARGET_DISTRIBUTION_COLORS = {"pred": "#2255aa", "true": "#f4c000"}
+
+
+def _dedupe_css_classes(*class_groups: Any) -> list[str]:
+ classes: list[str] = []
+ for group in class_groups:
+ if not group:
+ continue
+ if isinstance(group, str):
+ items = [group]
+ else:
+ items = list(group)
+ for item in items:
+ if item and item not in classes:
+ classes.append(item)
+ return classes
+
+
+def _add_css_classes(viewable: pn.viewable.Viewable, *classes: str) -> pn.viewable.Viewable:
+ current = getattr(viewable, "css_classes", None)
+ merged = _dedupe_css_classes(current, classes)
+ if merged:
+ viewable.css_classes = merged
+ return viewable
+
+
+def _auto_style_viewable(viewable: Any, method_name: str | None = None) -> Any:
+ if isinstance(viewable, pn.pane.Markdown):
+ return _add_css_classes(viewable, "content-block")
+
+ if isinstance(viewable, pn.pane.DataFrame):
+ classes = ["kv-table"]
+ if getattr(viewable, "width_policy", None) == "min":
+ classes.append("fit-content-table")
+ return _add_css_classes(viewable, *classes)
+
+ if isinstance(viewable, pn.pane.Plotly):
+ return viewable
+
+ if isinstance(viewable, pn.widgets.Select):
+ return viewable
+
+ param_function_type = getattr(pn.param, "ParamFunction", None)
+ if param_function_type is not None and isinstance(viewable, param_function_type):
+ return viewable
+
+ param_method_type = getattr(pn.param, "ParamMethod", None)
+ if param_method_type is not None and isinstance(viewable, param_method_type):
+ return viewable
+
+ if isinstance(viewable, pn.Row):
+ if method_name == "block_badge_row":
+ for child in getattr(viewable, "objects", []):
+ if isinstance(child, pn.pane.Markdown):
+ _add_css_classes(child, "badge-pill")
+ return viewable
+
+ if isinstance(viewable, pn.Column):
+ if method_name == "block_project_information":
+ _add_css_classes(viewable, "project-info-grid")
+ for child in getattr(viewable, "objects", []):
+ if isinstance(child, pn.Column):
+ _add_css_classes(child, "project-info-card")
+ for grandchild in getattr(child, "objects", []):
+ _auto_style_viewable(grandchild, method_name=method_name)
+ else:
+ _auto_style_viewable(child, method_name=method_name)
+ return viewable
+
+ for child in getattr(viewable, "objects", []):
+ _auto_style_viewable(child, method_name=method_name)
+ return viewable
+
+ method_info = f" in '{method_name}'" if method_name else ""
+ allowed_types = "Markdown, DataFrame, Plotly, Select, ParamFunction, ParamMethod, Row, Column"
+ raise TypeError(
+ f"Unsupported Panel object type returned{method_info}: {type(viewable).__name__}. "
+ f"Allowed Panel return types: {allowed_types}."
+ )
+
+
+def block(method):
+ """Wrap block output in a standard report section container."""
+ signature = inspect.signature(method)
+
+ @wraps(method)
+ def wrapped(self, *args, **kwargs):
+ bound = signature.bind_partial(self, *args, **kwargs)
+ bound.apply_defaults()
+ default_title = bound.arguments.get("title", "")
+ result = method(self, *args, **kwargs)
+
+ resolved_title = default_title
+ body_items = result
+ if isinstance(result, tuple) and len(result) == 2:
+ resolved_title, body_items = result
+
+ items = body_items if isinstance(body_items, list) else [body_items]
+ blocks: list[pn.viewable.Viewable] = []
+ if resolved_title:
+ heading_prefix = "###" if getattr(self, "_inside_group", False) else "#"
+ blocks.append(_add_css_classes(pn.pane.Markdown(f"{heading_prefix} {resolved_title}"), "section-title"))
+ blocks.extend(
+ _auto_style_viewable(self._coerce_viewable(item), method_name=method.__name__)
+ for item in items
+ if item is not None
+ )
+ return pn.Column(*blocks, css_classes=["section-block"], sizing_mode="stretch_width")
+
+ return wrapped
+
+
+class ReportBlockMixin:
+ """Base mixin providing built-in and user-extensible smart report blocks."""
+
+ def __init__(
+ self,
+ explainer=None,
+ x_train: pd.DataFrame | None = None,
+ y_train: pd.Series | pd.DataFrame | list | None = None,
+ y_test: pd.Series | pd.DataFrame | list | None = None,
+ config: dict | None = None,
+ ) -> None:
+ self.explainer = explainer
+ self.config = {} if config is None else config
+ self.x_train_init = x_train
+ self.x_train_pre = self._preprocess_train_data(x_train)
+ self.x_init = getattr(explainer, "x_init", None)
+ self.df_train_test = self._create_train_test_df(test=self.x_init, train=self.x_train_pre)
+ self.y_train, self.target_name_train = self._get_values_and_name(y_train, "target")
+ self.y_test, self.target_name_test = self._get_values_and_name(y_test, "target")
+ self.target_name = self.target_name_train if self.target_name_train is not None else self.target_name_test
+ self.max_points = self.config.get("max_points", 200)
+ self._inside_group = False
+
+ if explainer is not None:
+ if explainer.y_pred is not None:
+ self.y_pred, _ = self._get_values_and_name(explainer.y_pred, "prediction")
+ else:
+ self.y_pred = explainer.model.predict(explainer.x_encoded)
+ else:
+ self.y_pred = None
+
+ def block_header(self, title: str = "Report", subtitle: str = "") -> pn.Column:
+ """Render the report header section.
+
+ Parameters
+ ----------
+ title : str, default="Report"
+ Main report title displayed as a first-level heading.
+ subtitle : str, default=""
+ Optional markdown text displayed below the title.
+
+ Returns
+ -------
+ pn.Column
+ Panel column containing the title and optional subtitle.
+
+ Examples
+ --------
+ >>> runtime.block_header(title="Model report", subtitle="Summary for Q2")
+ """
+ blocks: list[pn.viewable.Viewable] = [pn.pane.Markdown(f"# {title}", css_classes=["main-header"])]
+ if subtitle:
+ blocks.append(
+ pn.pane.Markdown(
+ subtitle,
+ css_classes=["shapash-callout"],
+ )
+ )
+ return pn.Column(*blocks, sizing_mode="stretch_width")
+
+ @block
+ def block_text(self, title: str = "", body: str = "") -> pn.Column:
+ """Render a markdown text section.
+
+ Parameters
+ ----------
+ title : str, default=""
+ Optional section title.
+ body : str, default=""
+ Markdown content displayed in the block body.
+
+ Returns
+ -------
+ pn.Column
+ Panel column containing the wrapped text section.
+
+ Examples
+ --------
+ >>> runtime.block_text(title="Context", body="This report compares train and test data.")
+ """
+ content: list[pn.viewable.Viewable] = []
+ if body:
+ content.append(pn.pane.Markdown(body))
+ return content
+
+ @block
+ def block_project_information(
+ self,
+ title: str = "Project information",
+ project_info_file: str = "",
+ section_name: str | None = None,
+ ) -> pn.Column:
+ """Render project metadata from a YAML configuration file.
+
+ Parameters
+ ----------
+ title : str, default="Project information"
+ Section title displayed above the metadata cards.
+ project_info_file : str, default=""
+ Path to a YAML file containing top-level mapping sections.
+ section_name : str or None, default=None
+ Optional section key to render only one subsection from the YAML file.
+
+ Returns
+ -------
+ pn.Column
+ Panel column containing one or more key-value tables.
+
+ Examples
+ --------
+ >>> runtime.block_project_information(project_info_file="project_info.yaml")
+ """
+ if not project_info_file:
+ raise ValueError("project_information block requires the 'project_info_file' parameter.")
+
+ config_path = Path(project_info_file).expanduser()
+ if not config_path.is_absolute():
+ config_path = Path.cwd() / config_path
+ config_path = config_path.resolve()
+ if not config_path.exists():
+ raise ValueError(f"project_information file not found: {config_path}")
+
+ with config_path.open(encoding="utf-8") as stream:
+ project_info = yaml.safe_load(stream)
+ if project_info is None:
+ project_info = {}
+ if not isinstance(project_info, dict):
+ raise ValueError("project_information YAML must define a top-level mapping.")
+
+ if section_name is not None:
+ if section_name not in project_info:
+ raise ValueError(f"Unknown project_information section: {section_name}")
+ project_info = {section_name: project_info[section_name]}
+
+ blocks: list[pn.viewable.Viewable] = []
+ for current_section_name, section_values in project_info.items():
+ if not isinstance(section_values, dict):
+ continue
+ df = pd.DataFrame(
+ {"Key": list(section_values.keys()), "Value": [str(value) for value in section_values.values()]}
+ )
+ blocks.append(
+ pn.Column(
+ pn.pane.Markdown(f"### {current_section_name}"),
+ pn.pane.DataFrame(df, index=False, sizing_mode="stretch_width"),
+ sizing_mode="stretch_width",
+ )
+ )
+
+ if not blocks:
+ blocks = [pn.pane.Markdown("No project information available.")]
+
+ project_info_grid = pn.Column(
+ *blocks,
+ css_classes=["project-info-grid"],
+ sizing_mode="stretch_width",
+ )
+
+ return [project_info_grid]
+
+ @block
+ def block_badge_row(self, title: str = "", badges: list | None = None) -> pn.Column:
+ """Render a row of summary badges.
+
+ Parameters
+ ----------
+ title : str, default=""
+ Optional section title.
+ badges : list or None, default=None
+ List of dictionaries with keys such as ``label``, ``value``, and ``color``.
+
+ Returns
+ -------
+ pn.Column
+ Panel column containing a horizontal row of badge elements.
+
+ Examples
+ --------
+ >>> runtime.block_badge_row(badges=[{"label": "AUC", "value": "0.89", "color": "blue"}])
+ """
+ if badges is None:
+ badges = []
+ pills: list[pn.viewable.Viewable] = []
+ for badge in badges:
+ color_name = badge.get("color", "gray")
+ if color_name not in PALETTE:
+ color_name = "gray"
+ pills.append(
+ pn.pane.Markdown(
+ f"**{badge.get('label', '')}**: {badge.get('value', '')}",
+ css_classes=[f"badge-pill-{color_name}"],
+ )
+ )
+
+ return [pn.Row(*pills, sizing_mode="stretch_width")]
+
+ def block_callout(self, body: str = "") -> pn.Column:
+ """Render a highlighted callout message.
+
+ Parameters
+ ----------
+ body : str, default=""
+ Markdown message to emphasize in the report.
+
+ Returns
+ -------
+ pn.Column
+ Panel column containing a styled callout pane.
+
+ Examples
+ --------
+ >>> runtime.block_callout(body="Use this report for decision support only.")
+ """
+ return pn.Column(
+ pn.pane.Markdown(
+ body,
+ css_classes=["shapash-callout"],
+ ),
+ sizing_mode="stretch_width",
+ )
+
+ @block
+ def block_global_analysis(self, title: str = "") -> pn.Column:
+ """Render global summary statistics for prediction and training datasets.
+
+ Parameters
+ ----------
+ title : str, default=""
+ Optional section title.
+
+ Returns
+ -------
+ pn.Column
+ Panel column containing a global statistics comparison table.
+
+ Examples
+ --------
+ >>> runtime.block_global_analysis(title="Global dataset comparison")
+ """
+ self._require_train_test_data("global_analysis")
+ test_stats = perform_global_dataframe_analysis(self.x_init)
+ train_stats = perform_global_dataframe_analysis(self.x_train_pre) if self.x_train_pre is not None else None
+ stats_table = stats_to_table(
+ test_stats=test_stats,
+ train_stats=train_stats,
+ names=["Prediction dataset", "Training dataset"],
+ )
+ return [pn.pane.DataFrame(stats_table, sizing_mode="stretch_width", css_classes=["kv-table"])]
+
+ @block
+ def block_model_analysis(self, title: str = "Model information") -> pn.Column:
+ """Render model metadata and parameter tables.
+
+ Parameters
+ ----------
+ title : str, default="Model information"
+ Section title displayed above model details.
+
+ Returns
+ -------
+ pn.Column
+ Panel column containing model identity details and parameter tables.
+
+ Examples
+ --------
+ >>> runtime.block_model_analysis()
+ """
+ explainer = self._require_explainer("model_analysis")
+ model = explainer.model
+
+ model_module = model.__class__.__module__
+ model_package = model_module.split(".")[0]
+ package_name = "scikit-learn" if model_package == "sklearn" else model_package
+ try:
+ library_version = importlib.metadata.version(package_name)
+ except importlib.metadata.PackageNotFoundError:
+ library_version = f"not found for {model_package}"
+
+ model_params = getattr(model, "__dict__", {})
+ params_items = list(model_params.items())
+ split_idx = len(params_items) // 2
+
+ def _truncate(value: Any, max_len: int) -> str:
+ text = str(value)
+ return text if len(text) <= max_len else text[: max_len - 3] + "..."
+
+ left_df = pd.DataFrame(
+ {
+ "Parameter key": [_truncate(key, 50) for key, _ in params_items[:split_idx]],
+ "Parameter value": [_truncate(val, 300) for _, val in params_items[:split_idx]],
+ }
+ )
+ right_df = pd.DataFrame(
+ {
+ "Parameter key": [_truncate(key, 50) for key, _ in params_items[split_idx:]],
+ "Parameter value": [_truncate(val, 300) for _, val in params_items[split_idx:]],
+ }
+ )
+
+ content: list[pn.viewable.Viewable] = [
+ pn.pane.Markdown(
+ "\n".join(
+ [
+ f"**Model used**: {model.__class__.__name__}",
+ f"**Library**: {model_module}",
+ f"**Library version**: {library_version}",
+ "**Model parameters**",
+ ]
+ )
+ ),
+ pn.Row(
+ pn.pane.DataFrame(left_df, sizing_mode="stretch_width"),
+ pn.Spacer(width=24),
+ pn.pane.DataFrame(right_df, sizing_mode="stretch_width"),
+ sizing_mode="stretch_width",
+ ),
+ ]
+
+ return content
+
+ def block_performance_metrics(
+ self,
+ title: str = "Model performance",
+ color: str = "orange",
+ metrics: list | None = None,
+ ) -> pn.Column:
+ """Compute and render selected evaluation metrics as badges.
+
+ Parameters
+ ----------
+ title : str, default="Model performance"
+ Section title displayed above metric badges.
+ color : str, default="orange"
+ Badge color name used for rendered metric pills.
+ metrics : list or None, default=None
+ Metric specifications with import path and optional display name.
+
+ Returns
+ -------
+ pn.Column
+ Panel column containing computed metric badges.
+
+ Examples
+ --------
+ >>> runtime.block_performance_metrics(metrics=[{"path": "sklearn.metrics.accuracy_score"}])
+ """
+ if self.y_test is None or self.y_pred is None:
+ raise ValueError("performance_metrics block requires y_test and y_pred.")
+
+ metric_items = []
+ if metrics is None:
+ metrics = []
+ for metric_cfg in metrics:
+ metric_path = metric_cfg.get("path")
+ metric_name = metric_cfg.get("name", metric_path)
+ if not metric_path:
+ continue
+ module_path, fn_name = metric_path.rsplit(".", 1)
+ metric_fn = getattr(importlib.import_module(module_path), fn_name)
+ value = metric_fn(self.y_test, self.y_pred)
+ metric_items.append({"label": metric_name, "value": f"{value:,.2f}", "color": color})
+
+ return self.block_badge_row(title=title, badges=metric_items)
+
+ @block
+ def block_feature_distribution(
+ self,
+ feature: str,
+ title: str = "",
+ dataset_split: str = "data_train_test",
+ width: int = 700,
+ height: int = 500,
+ ) -> pn.Column:
+ """Render feature distribution by dataset split.
+
+ Parameters
+ ----------
+ feature : str
+ Feature name to visualize.
+ title : str, default=""
+ Optional custom section title.
+ dataset_split : str, default="data_train_test"
+ Column used as hue to separate train/test distributions.
+ width : int, default=700
+ Plot width in pixels.
+ height : int, default=500
+ Plot height in pixels.
+
+ Returns
+ -------
+ pn.Column
+ Panel column containing the feature distribution plot.
+
+ Examples
+ --------
+ >>> runtime.block_feature_distribution(feature="age")
+ """
+ self._require_train_test_data("feature_distribution")
+ if feature not in self.df_train_test.columns:
+ raise ValueError(f"Unknown feature '{feature}' for feature_distribution block.")
+
+ fig = plot_distribution(
+ df_all=self.df_train_test,
+ col=feature,
+ hue=dataset_split,
+ colors_dict=self._feature_distribution_colors(),
+ width=width,
+ height=height,
+ )
+ if title is None:
+ return self._feature_label(feature), [self._plotly_pane(fig)]
+ return title, [self._plotly_pane(fig)]
+
+ @block
+ def block_correlations_plot(
+ self,
+ title: str = "",
+ max_features: int = 20,
+ width: int | None = None,
+ height: int = 500,
+ ) -> pn.Column:
+ """Render a feature correlation matrix.
+
+ Parameters
+ ----------
+ title : str, default=""
+ Optional section title.
+ max_features : int, default=20
+ Maximum number of features included in the matrix.
+ width : int or None, default=None
+ Optional explicit plot width.
+ height : int, default=500
+ Plot height in pixels.
+
+ Returns
+ -------
+ pn.Column
+ Panel column containing the correlations plot.
+
+ Examples
+ --------
+ >>> runtime.block_correlations_plot(max_features=15)
+ """
+ self._require_train_test_data("correlations_plot")
+ explainer = self._require_explainer("correlations_plot")
+ if width is None:
+ if len(self.df_train_test["data_train_test"].unique()) > 1:
+ resolved_width = 900
+ else:
+ resolved_width = 500
+ else:
+ resolved_width = width
+ fig = explainer.plot.correlations_plot(
+ self.df_train_test,
+ optimized=True,
+ facet_col="data_train_test",
+ max_features=max_features,
+ width=resolved_width,
+ height=height,
+ )
+ return [self._plotly_pane(fig)]
+
+ @block
+ def block_feature_importance(self, title: str = "", label=None) -> pn.Column:
+ """Render global feature importance.
+
+ Parameters
+ ----------
+ title : str, default=""
+ Optional section title.
+ label : Any, default=None
+ Optional class/target label for label-specific importance.
+
+ Returns
+ -------
+ pn.Column
+ Panel column containing the feature-importance figure.
+
+ Examples
+ --------
+ >>> runtime.block_feature_importance()
+ """
+ explainer = self._require_explainer("feature_importance")
+ fig = explainer.plot.features_importance(label=label)
+ return [self._plotly_pane(fig)]
+
+ @block
+ def block_contribution_plot(
+ self,
+ feature: str | None = None,
+ title: str = "",
+ label=None,
+ max_points: int | None = None,
+ include_all_features: bool = False,
+ ) -> pn.Column:
+ """Render feature contribution plots.
+
+ Parameters
+ ----------
+ feature : str or None, default=None
+ Feature name for single-feature mode.
+ title : str, default=""
+ Optional section title.
+ label : Any, default=None
+ Optional class/target label.
+ max_points : int or None, default=None
+ Maximum number of points used by the plotting backend.
+ include_all_features : bool, default=False
+ If True, create an interactive selector over contribution plots for all features.
+
+ Returns
+ -------
+ pn.Column
+ Panel column containing one contribution plot or selector-driven plots.
+
+ Examples
+ --------
+ >>> runtime.block_contribution_plot(feature="age")
+ >>> runtime.block_contribution_plot(include_all_features=True)
+ """
+ explainer = self._require_explainer("contribution_plot")
+
+ if not include_all_features:
+ if feature is None:
+ raise ValueError("contribution_plot block requires 'feature' when include_all_features=False.")
+ if max_points is None:
+ effective_max_points = self.max_points
+ else:
+ effective_max_points = max_points
+ fig = explainer.plot.contribution_plot(feature, label=label, max_points=effective_max_points)
+ for trace in fig.data:
+ if trace.type == "bar":
+ trace.marker.color = "lightgrey"
+ if title is None:
+ return self._feature_label(feature), [self._plotly_pane(fig)]
+ return title, [self._plotly_pane(fig)]
+
+ if getattr(explainer, "x_init", None) is None:
+ raise ValueError("contribution_plot block with include_all_features=True requires explainer.x_init.")
+
+ feature_names = list(explainer.x_init.columns)
+ if not feature_names:
+ return [pn.pane.Markdown("No feature available.")]
+
+ sorted_features = sorted(
+ feature_names,
+ key=lambda current_feature: (str(self._feature_label(current_feature)).lower(), str(current_feature)),
+ )
+
+ feature_panels: dict[str, pn.viewable.Viewable] = {}
+ for feature_name in sorted_features:
+ if max_points is None:
+ effective_max_points = self.max_points
+ else:
+ effective_max_points = max_points
+ fig = explainer.plot.contribution_plot(feature_name, label=label, max_points=effective_max_points)
+ for trace in fig.data:
+ if trace.type == "bar":
+ trace.marker.color = "lightgrey"
+
+ base_label = str(self._feature_label(feature_name))
+ label_text = base_label
+ suffix = 2
+ while label_text in feature_panels:
+ label_text = f"{base_label} ({suffix})"
+ suffix += 1
+ feature_panels[label_text] = self._plotly_pane(fig)
+
+ feature_select = pn.widgets.Select(
+ name="Feature",
+ options=list(feature_panels.keys()),
+ value=next(iter(feature_panels)),
+ sizing_mode="stretch_width",
+ )
+ selected_panel = pn.panel(pn.bind(lambda selected: feature_panels[selected], feature_select))
+
+ if title is None:
+ resolved_title = "Features contribution plots"
+ else:
+ resolved_title = title
+ return resolved_title, [feature_select, selected_panel]
+
+ @block
+ def block_interactions_plot(
+ self,
+ title: str = "",
+ col1: str | None = None,
+ col2: str | None = None,
+ max_points: int | None = None,
+ ) -> pn.Column:
+ """Render an interactions plot between two features.
+
+ Parameters
+ ----------
+ title : str, default=""
+ Optional section title.
+ col1 : str or None, default=None
+ First feature. If None, the method picks a default interaction pair.
+ col2 : str or None, default=None
+ Second feature. If None, the method picks a default interaction pair.
+ max_points : int or None, default=None
+ Maximum number of points used by the plotting backend.
+
+ Returns
+ -------
+ pn.Column
+ Panel column containing the interactions plot.
+
+ Examples
+ --------
+ >>> runtime.block_interactions_plot(col1="age", col2="income")
+ """
+ explainer = self._require_explainer("interactions_plot")
+ feature_one, feature_two = self._resolve_interaction_pair(col1, col2)
+ if max_points is None:
+ effective_max_points = self.max_points
+ else:
+ effective_max_points = max_points
+ fig = explainer.plot.interactions_plot(col1=feature_one, col2=feature_two, max_points=effective_max_points)
+ if title is None:
+ resolved_title = f"{self._feature_label(feature_one)} / {self._feature_label(feature_two)}"
+ else:
+ resolved_title = title
+ return resolved_title, [self._plotly_pane(fig)]
+
+ @block
+ def block_target_distribution(
+ self,
+ title: str = "",
+ width: int = 700,
+ height: int = 500,
+ ) -> pn.Column:
+ """Render prediction-versus-true target distribution.
+
+ Parameters
+ ----------
+ title : str, default=""
+ Optional section title.
+ width : int, default=700
+ Plot width in pixels.
+ height : int, default=500
+ Plot height in pixels.
+
+ Returns
+ -------
+ pn.Column
+ Panel column containing the target distribution comparison plot.
+
+ Examples
+ --------
+ >>> runtime.block_target_distribution()
+ """
+ self._require_explainer("target_distribution")
+ if self.y_test is None or self.y_pred is None:
+ raise ValueError("target_distribution block requires y_test and predicted values from the explainer.")
+
+ if self.target_name is None:
+ target_name = "target"
+ else:
+ target_name = self.target_name
+ df_target = pd.concat(
+ [
+ pd.DataFrame({target_name: self.y_pred}).assign(_dataset="pred"),
+ pd.DataFrame({target_name: self.y_test}).assign(_dataset="true"),
+ ]
+ ).reset_index(drop=True)
+ fig = plot_distribution(
+ df_all=df_target,
+ col=target_name,
+ hue="_dataset",
+ colors_dict=TARGET_DISTRIBUTION_COLORS,
+ width=width,
+ height=height,
+ )
+ if title is None:
+ return "Target distribution", [self._plotly_pane(fig)]
+ return title, [self._plotly_pane(fig)]
+
+ @block
+ def block_target_analysis(
+ self,
+ title: str = "Target analysis",
+ show_train: bool = True,
+ width: int = 700,
+ height: int = 500,
+ ) -> pn.Column:
+ """Render target statistics and target distribution analysis.
+
+ Parameters
+ ----------
+ title : str, default="Target analysis"
+ Section title displayed above target analysis elements.
+ show_train : bool, default=True
+ Whether training target information is included.
+ width : int, default=700
+ Plot width in pixels.
+ height : int, default=500
+ Plot height in pixels.
+
+ Returns
+ -------
+ pn.Column
+ Panel column combining a target statistics table and distribution plot.
+
+ Examples
+ --------
+ >>> runtime.block_target_analysis(show_train=False)
+ """
+ if self.y_test is None:
+ raise ValueError("target_analysis block requires y_test.")
+
+ if self.target_name is None:
+ target_name = "target"
+ else:
+ target_name = self.target_name
+ y_test_series = pd.Series(self.y_test, name=target_name)
+ y_train_series = pd.Series(self.y_train, name=target_name) if self.y_train is not None and show_train else None
+
+ analysis_source = pd.DataFrame({target_name: y_test_series})
+ if y_train_series is not None:
+ analysis_source = pd.concat(
+ [analysis_source, pd.DataFrame({target_name: y_train_series})], ignore_index=True
+ )
+
+ col_types = compute_col_types(analysis_source)
+ test_stats = perform_univariate_dataframe_analysis(
+ pd.DataFrame({target_name: y_test_series}), col_types=col_types
+ )
+ train_stats = (
+ perform_univariate_dataframe_analysis(pd.DataFrame({target_name: y_train_series}), col_types=col_types)
+ if y_train_series is not None
+ else None
+ )
+
+ names = ["Prediction dataset", "Training dataset"]
+ target_stats = stats_to_table(
+ test_stats=test_stats[target_name],
+ train_stats=train_stats[target_name] if train_stats is not None else None,
+ names=names,
+ )
+
+ distribution_frames = [pd.DataFrame({target_name: y_test_series}).assign(data_train_test="test")]
+ if y_train_series is not None:
+ distribution_frames.append(pd.DataFrame({target_name: y_train_series}).assign(data_train_test="train"))
+ distribution_df = pd.concat(distribution_frames, ignore_index=True)
+
+ fig = plot_distribution(
+ df_all=distribution_df,
+ col=target_name,
+ hue="data_train_test",
+ colors_dict=self._feature_distribution_colors(),
+ width=width,
+ height=height,
+ )
+ fig.update_layout(
+ title={
+ **fig.layout.title.to_plotly_json(),
+ "x": 0.5,
+ "xanchor": "center",
+ "y": 0.0,
+ "yanchor": "bottom",
+ },
+ margin={**fig.layout.margin.to_plotly_json(), "t": 10, "b": 100},
+ )
+
+ dtype_label = str(series_dtype(y_test_series))
+ content = [
+ pn.pane.Markdown(f"**{target_name}** ({dtype_label})"),
+ pn.Row(
+ pn.pane.DataFrame(target_stats, width_policy="min"),
+ self._plotly_pane(fig),
+ sizing_mode="stretch_width",
+ ),
+ ]
+ return content
+
+ @block
+ def block_confusion_matrix(self, title: str = "") -> pn.Column:
+ """Render confusion matrix for classification predictions.
+
+ Parameters
+ ----------
+ title : str, default=""
+ Optional section title.
+
+ Returns
+ -------
+ pn.Column
+ Panel column containing the confusion matrix plot.
+
+ Examples
+ --------
+ >>> runtime.block_confusion_matrix()
+ """
+ explainer = self._require_explainer("confusion_matrix")
+ if self.y_test is None or self.y_pred is None:
+ raise ValueError("confusion_matrix block requires y_test and predicted values from the explainer.")
+ fig = plot_confusion_matrix(y_true=self.y_test, y_pred=self.y_pred, colors_dict=explainer.colors_dict)
+ if title is None:
+ return "Confusion matrix", [self._plotly_pane(fig)]
+ return title, [self._plotly_pane(fig)]
+
+ @block
+ def block_univariate_analysis(
+ self,
+ title: str = "Univariate analysis",
+ show_train: bool = True,
+ ) -> pn.Column:
+ """Render per-feature univariate analysis with interactive selection.
+
+ Parameters
+ ----------
+ title : str, default="Univariate analysis"
+ Section title displayed above selector and analysis panel.
+ show_train : bool, default=True
+ Whether train statistics are shown alongside prediction statistics.
+
+ Returns
+ -------
+ pn.Column
+ Panel column with a feature selector and corresponding stats/plot content.
+
+ Examples
+ --------
+ >>> runtime.block_univariate_analysis()
+ """
+ self._require_train_test_data("univariate_analysis")
+ explainer = self._require_explainer("univariate_analysis")
+
+ df = self.df_train_test
+ col_splitter = "data_train_test"
+ names = ["Prediction dataset", "Training dataset"]
+
+ col_types = compute_col_types(df)
+ n_splits = df[col_splitter].nunique()
+
+ test_stats = perform_univariate_dataframe_analysis(df.loc[df[col_splitter] == "test"], col_types=col_types)
+ train_stats = (
+ perform_univariate_dataframe_analysis(df.loc[df[col_splitter] == "train"], col_types=col_types)
+ if n_splits > 1 and show_train
+ else None
+ )
+
+ list_cols_labels = sorted(
+ explainer.features_dict.get(col, col) for col in df.drop(col_splitter, axis=1).columns
+ )
+ feature_panels: dict[str, pn.viewable.Viewable] = {}
+
+ for col_label in list_cols_labels:
+ col = explainer.inv_features_dict.get(col_label, col_label)
+ if col not in test_stats:
+ continue
+
+ fig = plot_distribution(
+ df_all=df,
+ col=col,
+ hue=col_splitter,
+ colors_dict=self._feature_distribution_colors(),
+ )
+ fig.update_layout(
+ title={
+ **fig.layout.title.to_plotly_json(),
+ "x": 0.5,
+ "xanchor": "center",
+ "y": 0.0,
+ "yanchor": "bottom",
+ },
+ margin={**fig.layout.margin.to_plotly_json(), "t": 10, "b": 100},
+ )
+ col_stats = stats_to_table(
+ test_stats=test_stats[col],
+ train_stats=train_stats[col] if train_stats is not None else None,
+ names=names,
+ )
+ dtype_label = str(series_dtype(df[col]))
+ tab_body = pn.Column(
+ pn.pane.Markdown(f"**{col_label}** ({dtype_label})"),
+ pn.Row(
+ pn.pane.DataFrame(col_stats, width_policy="min"),
+ self._plotly_pane(fig),
+ sizing_mode="stretch_width",
+ ),
+ sizing_mode="stretch_width",
+ )
+
+ base_label = str(col_label)
+ label_text = base_label
+ suffix = 2
+ while label_text in feature_panels:
+ label_text = f"{base_label} ({suffix})"
+ suffix += 1
+ feature_panels[label_text] = tab_body
+
+ if len(feature_panels) == 0:
+ return [pn.pane.Markdown("No feature available.")]
+
+ feature_select = pn.widgets.Select(
+ name="Feature",
+ options=list(feature_panels.keys()),
+ value=next(iter(feature_panels)),
+ sizing_mode="stretch_width",
+ )
+ selected_panel = pn.panel(pn.bind(lambda selected: feature_panels[selected], feature_select))
+
+ return [feature_select, selected_panel]
+
+ def _preprocess_train_data(self, x_train: pd.DataFrame | None) -> pd.DataFrame | None:
+ if x_train is None or self.explainer is None:
+ return x_train
+ x_train_pre = inverse_transform(x_train, self.explainer.preprocessing)
+ x_train_pre = handle_categorical_missing(x_train_pre)
+ if self.explainer.postprocessing:
+ x_train_pre = apply_postprocessing(x_train_pre, self.explainer.postprocessing)
+ return x_train_pre
+
+ @staticmethod
+ def _get_values_and_name(y: pd.DataFrame | pd.Series | list | None, default_name: str) -> tuple[object, str | None]:
+ if y is None:
+ return None, None
+ if isinstance(y, pd.DataFrame):
+ if len(y.columns) != 1:
+ raise ValueError("Number of columns found is greater than 1")
+ return y.values[:, 0], y.columns[0]
+ if isinstance(y, pd.Series):
+ return y.values, y.name
+ if isinstance(y, list):
+ return y, default_name
+ raise ValueError(f"Cannot process following type : {type(y)}")
+
+ @staticmethod
+ def _create_train_test_df(test: pd.DataFrame | None, train: pd.DataFrame | None) -> pd.DataFrame | None:
+ if (test is not None and "data_train_test" in test.columns) or (
+ train is not None and "data_train_test" in train.columns
+ ):
+ raise ValueError('"data_train_test" column must be renamed as it is reserved by smart report runtime')
+ if test is None and train is None:
+ return None
+ frames = []
+ if test is not None:
+ frames.append(test.assign(data_train_test="test"))
+ if train is not None:
+ frames.append(train.assign(data_train_test="train"))
+ return pd.concat(frames).reset_index(drop=True)
+
+ def _require_explainer(self, block_type: str):
+ if self.explainer is None:
+ raise ValueError(f"{block_type} block requires an explainer on the report instance.")
+ return self.explainer
+
+ def _require_train_test_data(self, block_type: str) -> None:
+ if self.df_train_test is None:
+ raise ValueError(f"{block_type} block requires x_train and explainer.x_init data on the report instance.")
+
+ def _resolve_interaction_pair(self, col1: str | None, col2: str | None) -> tuple[str, str]:
+ if col1 and col2:
+ return col1, col2
+ explainer = self._require_explainer("interactions_plot")
+ list_ind, _ = explainer.plot._select_indices_interactions_plot(selection=None, max_points=self.max_points)
+ interaction_values = explainer.get_interaction_values(selection=list_ind)
+ sorted_indices = compute_sorted_variables_interactions_list_indices(interaction_values)
+ if not sorted_indices:
+ raise ValueError("No interaction pair available for interactions_plot block.")
+ first_idx, second_idx = sorted_indices[0]
+ return explainer.columns_dict[first_idx], explainer.columns_dict[second_idx]
+
+ def _feature_label(self, feature: str) -> str:
+ if self.explainer is None:
+ return feature
+ return self.explainer.features_dict.get(feature, feature)
+
+ def _feature_distribution_colors(self) -> dict:
+ explainer = self._require_explainer("feature_distribution")
+ return explainer.colors_dict["report_feature_distribution"]
+
+ @staticmethod
+ def _plotly_pane(fig) -> pn.pane.Plotly:
+ return make_plotly_pane(fig)
+
+ @staticmethod
+ def _coerce_viewable(item: Any) -> pn.viewable.Viewable:
+ if isinstance(item, pn.viewable.Viewable):
+ return item
+ if isinstance(item, str):
+ return pn.pane.Markdown(item)
+ raise TypeError(
+ f"Unsupported block return type: {type(item).__name__}. "
+ "Blocks must return Panel objects (Markdown, DataFrame, Plotly, Select, Row, Column) or strings."
+ )
diff --git a/shapash/report/core.py b/shapash/report/core.py
new file mode 100644
index 00000000..5ddec267
--- /dev/null
+++ b/shapash/report/core.py
@@ -0,0 +1,215 @@
+"""Smart report orchestration for block-based HTML reports."""
+
+from __future__ import annotations
+
+import base64
+import html
+import importlib
+import logging
+import re
+from pathlib import Path
+
+import panel as pn
+
+from shapash.report.panel_support import apply_report_css, report_js_text
+from shapash.report.validation import load_report_config, render_block_error
+
+logger = logging.getLogger(__name__)
+
+
+def generate_report(runtime, config_file: str, output_file: str) -> None:
+ """Render a Panel report to an HTML file driven by a YAML config."""
+ cfg_path = Path(config_file).resolve()
+ cfg = load_report_config(cfg_path)
+ print(f"Loading config → {cfg_path}")
+
+ _assign_section_ids(cfg["sections"])
+
+ runtime.render_block = lambda block_cfg: render_block(runtime, block_cfg)
+ rendered_blocks = [render_block(runtime, block_cfg) for block_cfg in cfg["sections"]]
+ nav_bar = build_navigation_bar(cfg["sections"])
+
+ out_path = Path(output_file).resolve()
+ out_path.parent.mkdir(parents=True, exist_ok=True)
+
+ apply_report_css()
+ report_content = pn.Column(
+ *[block for block in rendered_blocks if block is not None],
+ css_classes=["report-content"],
+ sizing_mode="stretch_width",
+ )
+ report_layout = pn.Row(
+ pn.Column(nav_bar, css_classes=["report-sidebar"], width=300, sizing_mode="fixed"),
+ report_content,
+ css_classes=["main-report"],
+ sizing_mode="stretch_width",
+ )
+ report_layout.append(pn.pane.HTML(f"", sizing_mode="stretch_width"))
+ report_layout.save(str(out_path), embed=True, resources="cdn")
+ logger.info("Report saved → %s", output_file)
+
+
+def render_block(runtime, block_cfg: dict):
+ """Dispatch one YAML block entry to the matching block_* method."""
+ block_type = block_cfg.get("type", "")
+ params = block_cfg.get("params", {})
+
+ if block_type == "group":
+ previous_inside_group = getattr(runtime, "_inside_group", False)
+ runtime._inside_group = True
+ try:
+ children = [render_block(runtime, child_cfg) for child_cfg in block_cfg.get("blocks", [])]
+ finally:
+ runtime._inside_group = previous_inside_group
+ children = [child for child in children if child is not None]
+ group_title = params.get("title", "")
+ section_id = block_cfg.get("_section_id")
+ if group_title:
+ group_content = pn.Column(
+ pn.pane.Markdown(f"## {group_title}", css_classes=["group-title"]),
+ *children,
+ sizing_mode="stretch_width",
+ )
+ return _wrap_section_anchor(group_content, section_id)
+ return _wrap_section_anchor(pn.Column(*children, sizing_mode="stretch_width"), section_id)
+
+ method = getattr(runtime, f"block_{block_type}", None)
+ if method is None:
+ if block_type == "custom":
+ return _render_custom(runtime, block_cfg)
+ logger.warning("Unknown block type '%s' — skipped.", block_type)
+ return None
+
+ try:
+ result = method(**params)
+ if isinstance(result, pn.viewable.Viewable):
+ return _wrap_section_anchor(result, block_cfg.get("_section_id"))
+ if isinstance(result, str):
+ return _wrap_section_anchor(pn.pane.Markdown(result), block_cfg.get("_section_id"))
+ return _wrap_section_anchor(pn.panel(result), block_cfg.get("_section_id"))
+ except Exception as exc:
+ logger.error("Block '%s' raised: %s", block_type, exc)
+ return render_block_error(block_type, exc)
+
+
+def _render_custom(runtime, block_cfg: dict):
+ """Call an arbitrary importable function."""
+ func_path = block_cfg.get("function", "")
+ params = block_cfg.get("params", {})
+ try:
+ mod_path, fn_name = func_path.rsplit(".", 1)
+ fn = getattr(importlib.import_module(mod_path), fn_name)
+ result = fn(runtime, **params)
+ if isinstance(result, pn.viewable.Viewable):
+ return result
+ if isinstance(result, str):
+ return pn.pane.Markdown(result)
+ return pn.panel(result)
+ except Exception as exc:
+ logger.error("Custom block '%s' raised: %s", func_path, exc)
+ return render_block_error(func_path, exc)
+
+
+def _slugify(text: str) -> str:
+ """Return a stable slug for navigation anchor IDs."""
+ slug = re.sub(r"[^a-z0-9]+", "-", text.lower()).strip("-")
+ return slug
+
+
+def _block_label(block_cfg: dict) -> str:
+ """Resolve a human-readable block label for the navigation bar."""
+ params = block_cfg.get("params", {})
+ if isinstance(params, dict):
+ title = params.get("title")
+ else:
+ title = ""
+ if isinstance(title, str) and title.strip():
+ return title.strip()
+ block_type = block_cfg.get("type", "section")
+ label = str(block_type).replace("_", " ").strip().title()
+ if label:
+ return label
+ return "Section"
+
+
+def _assign_section_ids(blocks: list[dict], used: set[str] | None = None, prefix: str = "section") -> None:
+ """Assign unique anchor IDs to all blocks (including group children)."""
+ used_ids = used if used is not None else set()
+ for idx, block in enumerate(blocks, start=1):
+ label_slug = _slugify(_block_label(block))
+ if label_slug:
+ base = label_slug
+ else:
+ base = f"{prefix}-{idx}"
+ candidate = base
+ suffix = 2
+ while candidate in used_ids:
+ candidate = f"{base}-{suffix}"
+ suffix += 1
+ used_ids.add(candidate)
+ block["_section_id"] = candidate
+ if block.get("type") == "group":
+ children = block.get("blocks", [])
+ if isinstance(children, list):
+ _assign_section_ids(children, used=used_ids, prefix=f"{candidate}-item")
+
+
+def _wrap_section_anchor(content: pn.viewable.Viewable, section_id: str | None) -> pn.Column:
+ """Wrap one rendered block with an in-page anchor target."""
+ if not section_id:
+ return pn.Column(content, css_classes=["scroll-section"], sizing_mode="stretch_width")
+ anchor = pn.pane.HTML(f'
', sizing_mode="stretch_width")
+ return pn.Column(anchor, content, css_classes=["scroll-section"], sizing_mode="stretch_width")
+
+
+def build_navigation_bar(blocks: list[dict]) -> pn.pane.HTML:
+ """Build a sticky in-page navigation bar using Panel HTML pane."""
+ items_html: list[str] = []
+ item_count = 0
+ for block in blocks:
+ block_type = block.get("type")
+ label = html.escape(_block_label(block))
+ section_id = html.escape(str(block.get("_section_id", "")))
+ if block_type == "group":
+ item_count += 1
+ children_links: list[str] = []
+ for child in block.get("blocks", []):
+ child_label = html.escape(_block_label(child))
+ child_id = html.escape(str(child.get("_section_id", "")))
+ item_count += 1
+ children_links.append(f'{child_label} ')
+ items_html.append(
+ "".join(
+ [
+ '',
+ f'
{label} ',
+ '
',
+ *children_links,
+ "
",
+ "
",
+ ]
+ )
+ )
+ continue
+
+ item_count += 1
+ items_html.append(f'{label} ')
+
+ logo_path = Path(__file__).resolve().parent.parent / "style" / "shapash-fond-clair.png"
+ logo_data = base64.b64encode(logo_path.read_bytes()).decode("ascii")
+ logo_html = '' f'
' "
"
+
+ nav_scale = max(0.62, min(1.0, 24 / max(1, item_count)))
+ nav_html = "".join(
+ [
+ f'',
+ logo_html,
+ '',
+ 'You are here ',
+ 'Top of report ',
+ "
",
+ *items_html,
+ " ",
+ ]
+ )
+ return pn.pane.HTML(nav_html, sizing_mode="stretch_width")
diff --git a/shapash/report/default_report.yml b/shapash/report/default_report.yml
new file mode 100644
index 00000000..fb3854e2
--- /dev/null
+++ b/shapash/report/default_report.yml
@@ -0,0 +1,77 @@
+# default_report.yml
+# Default smart report configuration based on block sections.
+
+sections:
+ - type: header
+ params:
+ title: "House prices report"
+ subtitle: >
+ This document is a data science report of the kaggle house prices tutorial project.
+ It was generated using the Shapash library.
+
+ - type: project_information
+ params:
+ title: "Project information"
+ project_info_file: "tutorial/generate_report/config/project_information.yml"
+
+ - type: model_analysis
+ params:
+ title: "Model analysis"
+
+ - type: group
+ params:
+ title: "Dataset analysis"
+ blocks:
+ - type: global_analysis
+ params:
+ title: "Global analysis"
+
+ - type: univariate_analysis
+ params:
+ title: "Univariate analysis"
+
+ - type: target_analysis
+ params:
+ title: "Target analysis"
+ show_train: true
+
+ - type: correlations_plot
+ params:
+ title: "Multivariate analysis"
+ max_features: 20
+
+ - type: group
+ params:
+ title: "Model explainability"
+ blocks:
+ - type: feature_importance
+ params:
+ title: "Global feature importance plot"
+
+ - type: contribution_plot
+ params:
+ title: "Features contribution plots"
+ include_all_features: true
+
+ - type: group
+ params:
+ title: "Model performance"
+ blocks:
+ - type: target_distribution
+ params:
+ title: "Univariate analysis of target variable"
+
+ - type: performance_metrics
+ params:
+ title: "Metrics"
+ metrics:
+ - path: "sklearn.metrics.mean_absolute_error"
+ name: "Mean absolute error"
+ - path: "sklearn.metrics.mean_squared_error"
+ name: "Mean squared error"
+
+ - type: callout
+ params:
+ body: >
+ You can add as many blocks, charts, and text sections as you want.
+ The generated HTML renders report content only (no source code).
diff --git a/shapash/report/generation.py b/shapash/report/generation.py
deleted file mode 100644
index 24d9930f..00000000
--- a/shapash/report/generation.py
+++ /dev/null
@@ -1,94 +0,0 @@
-"""
-Report generation helper module.
-"""
-
-import os
-
-import pandas as pd
-import papermill as pm
-from nbconvert import HTMLExporter
-
-from shapash.utils.utils import get_project_root
-
-
-def execute_report(
- working_dir: str,
- explainer: object,
- project_info_file: str,
- x_train: pd.DataFrame | None = None,
- y_train: pd.DataFrame | None = None,
- y_test: pd.Series | pd.DataFrame | None = None,
- config: dict | None = None,
- notebook_path: str | None = None,
- kernel_name: str | None = None,
-):
- """
- Executes the base_report.ipynb notebook and saves the results in working_dir.
-
- Parameters
- ----------
- working_dir : str
- Directory in which will be saved the executed notebook.
- explainer : shapash.explainer.smart_explainer.SmartExplainer
- Compiled shapash explainer.
- project_info_file : str
- Path to the file used to display some information about the project in the report.
- x_train : pd.DataFrame
- DataFrame used for training the model.
- y_train : pd.Series or pd.DataFrame
- Series of labels in the training set.
- y_test : pd.Series or pd.DataFrame
- Series of labels in the test set.
- config : dict, optional
- Report configuration options.
- notebook_path : str, optional
- Path to the notebook used to generate the report. If None, the Shapash base report
- notebook will be used.
- kernel_name : str, optional
- Name of the kernel used to generate the report. This parameter can be usefull if
- you have multiple jupyter kernels and that the method does not use the right kernel
- by default.
- """
- if config is None:
- config = {}
- explainer.save(path=os.path.join(working_dir, "smart_explainer.pickle"))
- if x_train is not None:
- x_train.to_csv(os.path.join(working_dir, "x_train.csv"))
- if y_train is not None:
- y_train.to_csv(os.path.join(working_dir, "y_train.csv"))
- if y_test is not None:
- y_test.to_csv(os.path.join(working_dir, "y_test.csv"))
- root_path = get_project_root()
- if notebook_path is None or notebook_path == "":
- notebook_path = os.path.join(root_path, "shapash", "report", "base_report.ipynb")
-
- pm.execute_notebook(
- notebook_path,
- os.path.join(working_dir, "base_report.ipynb"),
- parameters=dict(dir_path=working_dir, project_info_file=project_info_file, config=config),
- kernel_name=kernel_name,
- )
-
-
-def export_and_save_report(working_dir: str, output_file: str):
- """
- Exports a previously executed notebook and saves it as a static HTML file.
-
- Parameters
- ----------
- working_dir : str
- Path to the directory containing the executed notebook.
- output_file : str
- Path to the html file that will be created.
- """
-
- exporter = HTMLExporter(
- exclude_input=True,
- extra_template_basedirs=[os.path.join(get_project_root(), "shapash", "report", "template")],
- template_name="custom",
- exclude_anchor_links=True,
- )
- (body, resources) = exporter.from_filename(filename=os.path.join(working_dir, "base_report.ipynb"))
-
- with open(output_file, "w") as file:
- file.write(body)
diff --git a/shapash/report/html/double_table.html b/shapash/report/html/double_table.html
deleted file mode 100644
index a4acd513..00000000
--- a/shapash/report/html/double_table.html
+++ /dev/null
@@ -1,12 +0,0 @@
-
-
- {% with columns=columns1, rows=rows1 %}
- {% include "table_two_columns.html" %}
- {% endwith %}
-
-
- {% with columns=columns2, rows=rows2 %}
- {% include "table_two_columns.html" %}
- {% endwith %}
-
-
diff --git a/shapash/report/html/dropdown.html b/shapash/report/html/dropdown.html
deleted file mode 100644
index 201da706..00000000
--- a/shapash/report/html/dropdown.html
+++ /dev/null
@@ -1,19 +0,0 @@
-
diff --git a/shapash/report/html/explainability.html b/shapash/report/html/explainability.html
deleted file mode 100644
index a490a45e..00000000
--- a/shapash/report/html/explainability.html
+++ /dev/null
@@ -1,65 +0,0 @@
-{% if labels|length > 1 %}
-{% with menuId='dropdownMenuLabel', menuText='Response', values=labels, menuDivVisible='explain-all' %}
-{% include "dropdown.html" %}
-{% endwith %}
-{% else %}
-{% endif %}
-Global feature importance plot
-{% for label in labels %}
-
- {{ label['feature_importance_plot'] }}
-
-{% endfor %}
-Features contribution plots
-{% for label in labels %}
-
- {% with menuId='dropdownMenu2', menuText='Feature', values=label['features'],
- menuDivVisible='explain-contrib-'~label['index'] %}
- {% include "dropdown.html" %}
- {% endwith %}
- {% for col in label['features'] %}
-
-
{{ col['name'] }} - {{ col['type'] }}
- {% if col['name'] != col['description'] %}
-
{{ col['description'] }}
- {% else %}
- {% endif %}
- {{ col['plot'] }}
-
- {% endfor %}
-
-{% endfor %}
-{% set has_interaction = false %}
-{% for label in labels %}
-{% if label['features_interaction']|length > 0 %}
-{% set has_interaction = true %}
-{% endif %}
-{% endfor %}
-
-{% if has_interaction %}
-Features Top Interaction plots
-{% for label in labels %}
-{% if label['features_interaction']|length > 0 %}
-
- {% with menuId='dropdownMenu3', menuText='Interactions', values=label['features_interaction'],
- menuDivVisible='explain-contrib-interaction-'~label['index'] %}
- {% include "dropdown.html" %}
- {% endwith %}
- {% for col in label['features_interaction'] %}
-
-
{{ col['name'] }} - {{ col['type'] }}
- {% if col['name'] != col['description'] %}
-
{{ col['description'] }}
- {% endif %}
- {{ col['plot'] }}
-
- {% endfor %}
-
-{% endif %}
-{% endfor %}
-{% endif %}
diff --git a/shapash/report/html/table_two_columns.html b/shapash/report/html/table_two_columns.html
deleted file mode 100644
index 962641da..00000000
--- a/shapash/report/html/table_two_columns.html
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
- {% if columns %}
-
- {% for col in columns %}
- {{ col }}
- {% endfor %}
-
- {% endif %}
-
-
- {% for row in rows %}
-
- {{ row['name'] }}
- {{ row['value'] }}
-
- {% endfor %}
-
-
diff --git a/shapash/report/html/univariate.html b/shapash/report/html/univariate.html
deleted file mode 100644
index b29da33f..00000000
--- a/shapash/report/html/univariate.html
+++ /dev/null
@@ -1,18 +0,0 @@
-{% if features|length > 1 %}
-{% with menuId='dropdownMenu1', menuText='Feature', values=features, menuDivVisible=groupId %}
- {% include "dropdown.html" %}
-{% endwith %}
-{% endif %}
-{% for col in features %}
-
-
{{ col['name'] }} - {{ col['type'] }}
- {% if col['name'] != col['description'] and col['description']|length %}
-
{{ col['description'] }}
- {% else %}
- {% endif %}
-
-
{{ col['table'] }}
-
{{ col['image'] }}
-
-
-{% endfor %}
diff --git a/shapash/report/panel_support.py b/shapash/report/panel_support.py
new file mode 100644
index 00000000..050f7d9b
--- /dev/null
+++ b/shapash/report/panel_support.py
@@ -0,0 +1,42 @@
+"""Panel helpers for smart report rendering."""
+
+from __future__ import annotations
+
+from functools import lru_cache
+from pathlib import Path
+
+import panel as pn
+
+
+@lru_cache(maxsize=1)
+def _enable_panel_plotly() -> None:
+ """Enable Plotly support in Panel extension, cached to run only once per session."""
+ pn.extension("plotly")
+
+
+@lru_cache(maxsize=1)
+def report_css_text() -> str:
+ """Load report CSS once for Panel report export."""
+ css_path = Path(__file__).resolve().parent / "report_styles.css"
+ return css_path.read_text(encoding="utf-8")
+
+
+@lru_cache(maxsize=1)
+def report_js_text() -> str:
+ """Load report JavaScript once for Panel report export."""
+ js_path = Path(__file__).resolve().parent / "report_script.js"
+ return js_path.read_text(encoding="utf-8")
+
+
+def apply_report_css() -> None:
+ """Register smart-report CSS in Panel global configuration."""
+ _enable_panel_plotly()
+ css = report_css_text()
+ if css not in pn.config.raw_css:
+ pn.config.raw_css.append(css)
+
+
+def make_plotly_pane(fig) -> pn.pane.Plotly:
+ """Build a responsive Plotly pane for report blocks."""
+ _enable_panel_plotly()
+ return pn.pane.Plotly(fig, config={"responsive": True}, sizing_mode="stretch_width")
diff --git a/shapash/report/project_report.py b/shapash/report/project_report.py
deleted file mode 100644
index 78521ccf..00000000
--- a/shapash/report/project_report.py
+++ /dev/null
@@ -1,580 +0,0 @@
-import importlib.metadata
-import logging
-import os
-import sys
-from datetime import date
-from numbers import Number
-
-import jinja2
-import numpy as np
-import pandas as pd
-import plotly
-
-from shapash import SmartExplainer
-from shapash.plots.plot_evaluation_metrics import plot_confusion_matrix
-from shapash.plots.plot_univariate import plot_distribution
-from shapash.report.common import compute_col_types, display_value, get_callable, series_dtype
-from shapash.report.data_analysis import perform_global_dataframe_analysis, perform_univariate_dataframe_analysis
-from shapash.report.visualisation import (
- print_css_style,
- print_html,
- print_javascript_misc,
- print_md,
-)
-from shapash.utils.io import load_yml
-from shapash.utils.transform import apply_postprocessing, handle_categorical_missing, inverse_transform
-from shapash.utils.utils import compute_sorted_variables_interactions_list_indices, get_project_root, truncate_str
-from shapash.webapp.utils.utils import round_to_k
-
-logging.basicConfig(level=logging.INFO)
-
-template_loader = jinja2.FileSystemLoader(searchpath=os.path.join(get_project_root(), "shapash", "report", "html"))
-template_env = jinja2.Environment(loader=template_loader)
-
-
-class ProjectReport:
- """
- The ProjectReport class allows to generate general information about a
- Data Science project.
- It analyzes the data and the model used in order to provide interesting
- insights that can be shared with non technical person.
-
- Parameters
- ----------
- explainer : shapash.explainer.smart_explainer.SmartExplainer
- A shapash SmartExplainer object that has already be compiled.
- project_info_file : str
- Path to the yml file containing information about the project (author, description, ...).
- config : dict, optional
- Contains configuration options for the report.
-
- Attributes
- ----------
- explainer : shapash.explainer.smart_explainer.SmartExplainer
- A shapash SmartExplainer object that has already be compiled.
- metadata : dict
- Information about the project (author, description, ...).
- x_train : pd.DataFrame
- DataFrame used for training the model.
- y_train : pd.Series or pd.DataFrame
- Series of labels in the train set.
- y_test : pd.Series or pd.DataFrame
- Series of labels in the test set.
- config : dict, optional
- Configuration options for the report.
-
- """
-
- def __init__(
- self,
- explainer: SmartExplainer,
- project_info_file: str,
- x_train: pd.DataFrame | None = None,
- y_train: pd.DataFrame | None = None,
- y_test: pd.DataFrame | None = None,
- config: dict | None = None,
- ):
- self.explainer = explainer
- self.metadata = load_yml(path=project_info_file)
- self.x_train_init = x_train
- if x_train is not None:
- x_train_pre = inverse_transform(x_train, self.explainer.preprocessing)
- self.x_train_pre = handle_categorical_missing(x_train_pre)
-
- if self.explainer.postprocessing:
- self.x_train_pre = apply_postprocessing(self.x_train_pre, self.explainer.postprocessing)
- else:
- self.x_train_pre = None
- self.x_init = self.explainer.x_init
- self.config = config if config is not None else dict()
- self.col_names = list(self.explainer.columns_dict.values())
- self.df_train_test = self._create_train_test_df(test=self.x_init, train=self.x_train_pre)
- if self.explainer.y_pred is not None:
- self.y_pred = np.array(self.explainer.y_pred.T)[0]
- else:
- self.y_pred = self.explainer.model.predict(self.explainer.x_encoded)
- self.y_test, target_name_test = self._get_values_and_name(y_test, "target")
- self.y_train, target_name_train = self._get_values_and_name(y_train, "target")
- self.target_name = target_name_train or target_name_test
-
- if "max_points" in self.config.keys():
- self.max_points = config["max_points"]
- else:
- self.max_points = 200
-
- if "display_interaction_plot" in self.config.keys():
- self.display_interaction_plot = config["display_interaction_plot"]
- else:
- self.display_interaction_plot = False
-
- if "nb_top_interactions" in self.config.keys():
- self.nb_top_interactions = config["nb_top_interactions"]
- else:
- self.nb_top_interactions = 5
-
- if "title_story" in self.config.keys():
- self.title_story = config["title_story"]
- elif self.explainer.title_story != "":
- self.title_story = self.explainer.title_story
- else:
- self.title_story = "Shapash report"
- self.title_description = self.config["title_description"] if "title_description" in self.config.keys() else ""
-
- print_css_style()
- print_javascript_misc()
-
- if "metrics" in self.config.keys():
- if not isinstance(self.config["metrics"], list) or not isinstance(self.config["metrics"][0], dict):
- raise ValueError("The metrics parameter expects a list of dict.")
- for metric in self.config["metrics"]:
- for key in metric:
- if key not in ["path", "name", "use_proba_values"]:
- raise ValueError(f"Unknown key : {key}. Key should be in ['path', 'name', 'use_proba_values']")
- if key == "use_proba_values" and not isinstance(metric["use_proba_values"], bool):
- raise ValueError('"use_proba_values" metric key expects a boolean value.')
-
- @staticmethod
- def _get_values_and_name(
- y: pd.DataFrame | pd.Series | list | None, default_name: str
- ) -> tuple[list, str] | tuple[None, None]:
- """
- Extracts vales and column name from a Pandas Series, DataFrame, or assign a default
- name if y is a list of values.
-
- Parameters
- ----------
- y : list or pd.Series or pd.DataFrame
- Column we want to extract the name and values
- default_name :
- Name assigned if no name was found for y
-
- Returns
- -------
- values : list
- list of values of y
- name : str
- name of y
- """
- if y is None:
- return None, None
- elif isinstance(y, pd.DataFrame):
- assert len(y.columns) == 1, "Number of columns found is greater than 1"
- name = y.columns[0]
- values = y.values[:, 0]
- elif isinstance(y, pd.Series):
- name = y.name
- values = y.values
- elif isinstance(y, list):
- name = default_name
- values = y
- else:
- raise ValueError(f"Cannot process following type : {type(y)}")
- return values, name
-
- @staticmethod
- def _create_train_test_df(test: pd.DataFrame | None, train: pd.DataFrame | None) -> pd.DataFrame | None:
- """
- Creates a DataFrame that contains train and test dataset with the column 'data_train_test'
- allowing to distinguish the values.
-
- Parameters
- ----------
- test : pd.DataFrame, optional
- test dataframe
- train : pd.DataFrame, optional
- train dataframe
-
- Returns
- -------
- pd.DataFrame
- The concatenation of train and test as a dataframe containing train and test values with
- a new 'data_train_test' column allowing to distinguish the values.
- """
- if (test is not None and "data_train_test" in test.columns) or (
- train is not None and "data_train_test" in train.columns
- ):
- raise ValueError('"data_train_test" column must be renamed as it is used in ProjectReport')
- if test is None and train is None:
- return None
- return pd.concat(
- [
- test.assign(data_train_test="test") if test is not None else None,
- train.assign(data_train_test="train") if train is not None else None,
- ]
- ).reset_index(drop=True)
-
- def display_title_description(self):
- """
- Displays title of the report and its description if defined.
- """
- print_html(f"""{self.title_story}
""")
- if self.title_description != "":
- print_html(f'{self.title_description} ')
-
- def display_project_information(self):
- """
- Displays general information about the project as defined in the metdata file.
- """
- for section in self.metadata.keys():
- print_md(f"## {section.title()}")
- for k, v in self.metadata[section].items():
- if k.lower() == "date" and v.lower() == "auto":
- print_md(f"**{k.title()}** : {date.today()}")
- else:
- print_md(f"**{k.title()}** : {v}")
- print_md("---")
-
- def display_model_analysis(self):
- """
- Displays information about the model used : class name, library name, library version,
- model parameters, ...
- """
- print_md(f"**Model used :** {self.explainer.model.__class__.__name__}")
-
- print_md(f"**Library :** {self.explainer.model.__class__.__module__}")
-
- for _, module in sorted(sys.modules.items()):
- if not hasattr(module, "__name__"):
- continue
-
- module_name = module.__name__.split(".")[0]
- expected_name = self.explainer.model.__class__.__module__.split(".")[0]
-
- if expected_name == module_name:
- try:
- package_name = "scikit-learn" if module_name == "sklearn" else module_name
- version = importlib.metadata.version(package_name)
- print_md(f"**Library version :** {version}")
- except importlib.metadata.PackageNotFoundError:
- print_md(f"**Library version :** not found for {module_name}")
- break
-
- print_md("**Model parameters :** ")
- model_params = self.explainer.model.__dict__
- table_template = template_env.get_template("double_table.html")
- print_html(
- table_template.render(
- columns1=["Parameter key", "Parameter value"],
- rows1=[
- {"name": truncate_str(str(k), 50), "value": truncate_str(str(v), 300)}
- for k, v in list(model_params.items())[: len(model_params) // 2 :]
- ], # Getting half of the parameters
- columns2=["Parameter key", "Parameter value"],
- rows2=[
- {"name": truncate_str(str(k), 50), "value": truncate_str(str(v), 300)}
- for k, v in list(model_params.items())[len(model_params) // 2 :]
- ], # Getting 2nd half of the parameters
- )
- )
- print_md("---")
-
- def display_dataset_analysis(
- self,
- global_analysis: bool = True,
- univariate_analysis: bool = True,
- target_analysis: bool = True,
- multivariate_analysis: bool = True,
- ):
- """
- This method performs and displays an exploration of the data given.
- It allows to compare train and test values for each part of the analysis.
-
- The parameters of the method allow to filter which part to display or not.
-
- Parameters
- ----------
- global_analysis : bool
- Whether or not to display the global analysis part.
- univariate_analysis : bool
- Whether or not to display the univariate analysis part.
- target_analysis : bool
- Whether or not to display the target analysis part that plots
- the distribution of the target variable.
- multivariate_analysis : bool
- Whether or not to display the multivariate analysis part
- """
- if global_analysis:
- print_md("### Global analysis")
- self._display_dataset_analysis_global()
-
- if univariate_analysis:
- print_md("### Univariate analysis")
- self._perform_and_display_analysis_univariate(
- df=self.df_train_test,
- col_splitter="data_train_test",
- split_values=["test", "train"],
- names=["Prediction dataset", "Training dataset"],
- group_id="univariate",
- )
- if target_analysis:
- df_target = self._create_train_test_df(
- test=(
- pd.DataFrame({self.target_name: self.y_test}, index=range(len(self.y_test)))
- if self.y_test is not None
- else None
- ),
- train=(
- pd.DataFrame({self.target_name: self.y_train}, index=range(len(self.y_train)))
- if self.y_train is not None
- else None
- ),
- )
- if df_target is not None:
- if target_analysis:
- print_md("### Target analysis")
- self._perform_and_display_analysis_univariate(
- df=df_target,
- col_splitter="data_train_test",
- split_values=["test", "train"],
- names=["Prediction dataset", "Training dataset"],
- group_id="target",
- )
- if multivariate_analysis:
- print_md("### Multivariate analysis")
- fig_corr = self.explainer.plot.correlations_plot(
- self.df_train_test,
- optimized=True,
- facet_col="data_train_test",
- max_features=20,
- width=900 if len(self.df_train_test["data_train_test"].unique()) > 1 else 500,
- height=500,
- )
- print_html(plotly.io.to_html(fig_corr))
- print_md("---")
-
- def _display_dataset_analysis_global(self):
- df_stats_global = self._stats_to_table(
- test_stats=perform_global_dataframe_analysis(self.x_init),
- train_stats=perform_global_dataframe_analysis(self.x_train_pre),
- names=["Prediction dataset", "Training dataset"],
- )
- print_html(df_stats_global.to_html(classes="greyGridTable"))
-
- def _perform_and_display_analysis_univariate(
- self, df: pd.DataFrame, col_splitter: str, split_values: list, names: list, group_id: str
- ):
- col_types = compute_col_types(df)
- n_splits = df[col_splitter].nunique()
- inv_columns_dict = {v: k for k, v in self.explainer.columns_dict.items()}
- test_stats_univariate = perform_univariate_dataframe_analysis(
- df.loc[df[col_splitter] == split_values[0]], col_types=col_types
- )
- if n_splits > 1:
- train_stats_univariate = perform_univariate_dataframe_analysis(
- df.loc[df[col_splitter] == split_values[1]], col_types=col_types
- )
-
- univariate_template = template_env.get_template("univariate.html")
- univariate_features_desc = list()
- list_cols_labels = [
- self.explainer.features_dict.get(col, col) for col in df.drop(col_splitter, axis=1).columns.to_list()
- ]
- for col_label in sorted(list_cols_labels):
- col = self.explainer.inv_features_dict.get(col_label, col_label)
- fig = plot_distribution(
- df_all=df,
- col=col,
- hue=col_splitter,
- colors_dict=self.explainer.colors_dict["report_feature_distribution"],
- )
- df_col_stats = self._stats_to_table(
- test_stats=test_stats_univariate[col],
- train_stats=train_stats_univariate[col] if n_splits > 1 else None,
- names=names,
- )
-
- univariate_features_desc.append(
- {
- "feature_index": int(inv_columns_dict.get(col, 0)),
- "name": col,
- "type": str(series_dtype(df[col])),
- "description": col_label,
- "table": df_col_stats.to_html(classes="greyGridTable"),
- "image": plotly.io.to_html(fig, include_plotlyjs=False, full_html=False),
- }
- )
- print_html(univariate_template.render(features=univariate_features_desc, groupId=group_id))
-
- @staticmethod
- def _stats_to_table(
- test_stats: dict,
- names: list,
- train_stats: dict | None = None,
- ) -> pd.DataFrame:
- if train_stats is not None:
- return pd.DataFrame({names[1]: pd.Series(train_stats), names[0]: pd.Series(test_stats)})
- else:
- return pd.DataFrame({names[0]: pd.Series(test_stats)})
-
- def display_model_explainability(self):
- """
- Displays explainability of the model as computed in SmartPlotter object
- """
- print_md("*Note : the explainability graphs were generated using the test set only.*")
- explainability_template = template_env.get_template("explainability.html")
- inv_columns_dict = {v: k for k, v in self.explainer.columns_dict.items()}
- explain_data = list()
- multiclass = True if (self.explainer._classes and len(self.explainer._classes) > 2) else False
- c_list = self.explainer._classes if multiclass else [1] # list just used for multiclass
- for index_label, label in enumerate(c_list): # Iterating over all labels in multiclass case
- label_value = self.explainer.check_label_name(label)[2] if multiclass else ""
-
- # Feature Importance
- fig_features_importance = self.explainer.plot.features_importance(label=label)
-
- # Contribution Plot
- explain_contrib_data = list()
- list_cols_labels = [self.explainer.features_dict.get(col, col) for col in self.col_names]
- for feature_label in sorted(list_cols_labels):
- feature = self.explainer.inv_features_dict.get(feature_label, feature_label)
- fig = self.explainer.plot.contribution_plot(feature, label=label, max_points=self.max_points)
- # Apparently matkers are not supported during conversion into html
- for el in fig.data:
- if el.type == "bar":
- el.marker.color = "lightgrey"
- explain_contrib_data.append(
- {
- "feature_index": int(inv_columns_dict[feature]),
- "name": feature,
- "description": self.explainer.features_dict[feature],
- "plot": plotly.io.to_html(fig, include_plotlyjs=False, full_html=False),
- }
- )
-
- # Interaction Plot
- explain_contrib_data_interaction = list()
- if self.display_interaction_plot:
- list_ind, _ = self.explainer.plot._select_indices_interactions_plot(
- selection=None, max_points=self.max_points
- )
- interaction_values = self.explainer.get_interaction_values(selection=list_ind)
- sorted_top_features_indices = compute_sorted_variables_interactions_list_indices(interaction_values)
- indices_to_plot = sorted_top_features_indices[: self.nb_top_interactions]
-
- for i, ids in enumerate(indices_to_plot):
- id0, id1 = ids
-
- fig_one_interaction = self.explainer.plot.interactions_plot(
- col1=self.explainer.columns_dict[id0],
- col2=self.explainer.columns_dict[id1],
- max_points=self.max_points,
- )
-
- explain_contrib_data_interaction.append(
- {
- "feature_index": i,
- "name": self.explainer.columns_dict[id0] + " / " + self.explainer.columns_dict[id1],
- "description": self.explainer.features_dict[self.explainer.columns_dict[id0]]
- + " / "
- + self.explainer.features_dict[self.explainer.columns_dict[id1]],
- "plot": plotly.io.to_html(fig_one_interaction, include_plotlyjs=False, full_html=False),
- }
- )
-
- # Aggregating the data
- explain_data.append(
- {
- "index": index_label,
- "name": label_value,
- "feature_importance_plot": plotly.io.to_html(
- fig_features_importance, include_plotlyjs=False, full_html=False
- ),
- "features": explain_contrib_data,
- "features_interaction": explain_contrib_data_interaction,
- }
- )
- print_html(explainability_template.render(labels=explain_data))
- print_md("---")
-
- def display_model_performance(self):
- """
- Displays the performance of the model. The metrics are computed using the config dict.
-
- Metrics should be given as a list of dict. Each dict contains they following keys :
- 'path' (path to the metric function, ex: 'sklearn.metrics.mean_absolute_error'),
- 'name' (optional, name of the metric as displayed in the report),
- and 'use_proba_values' (optional, possible values are False (default) or True
- if the metric uses proba values instead of predicted values).
-
- For example :
- config['metrics'] = [
- {
- 'path': 'sklearn.metrics.mean_squared_error',
- 'name': 'Mean absolute error', # Optional : name that will be displayed next to the metric
- 'y_pred': 'predicted_values' # Optional
- },
- {
- 'path': 'Scoring_AP.utils.lift10', # Custom function path
- 'name': 'Lift10',
- 'y_pred': 'proba_values' # Use proba values instead of predicted values
- }
- ]
- """
- if self.y_test is None:
- logging.info("No labels given for test set. Skipping model performance part")
- return
-
- print_md("### Univariate analysis of target variable")
- df = pd.concat(
- [
- pd.DataFrame({self.target_name: self.y_pred}).assign(_dataset="pred"),
- (
- pd.DataFrame({self.target_name: self.y_test}).assign(_dataset="true")
- if self.y_test is not None
- else None
- ),
- ]
- ).reset_index(drop=True)
- self._perform_and_display_analysis_univariate(
- df=df,
- col_splitter="_dataset",
- split_values=["pred", "true"],
- names=["Prediction values", "True values"],
- group_id="target-distribution",
- )
-
- if "metrics" not in self.config.keys():
- logging.info("No 'metrics' key found in report config dict. Skipping model performance part.")
- return
- print_md("### Metrics")
-
- for metric in self.config["metrics"]:
- if "name" not in metric.keys():
- metric["name"] = metric["path"]
-
- if (
- metric["path"] in ["confusion_matrix", "sklearn.metrics.confusion_matrix"]
- or metric["name"] == "confusion_matrix"
- ):
- print_md(f"**{metric['name']} :**")
- fig = plot_confusion_matrix(
- y_true=self.y_test, y_pred=self.y_pred, colors_dict=self.explainer.colors_dict
- )
- print_html(plotly.io.to_html(fig, include_plotlyjs=False, full_html=False))
- else:
- try:
- metric_fn = get_callable(path=metric["path"])
- # Look if we should use proba values instead of predicted values
- if "use_proba_values" in metric.keys() and metric["use_proba_values"] is True:
- y_pred = self.explainer.proba_values
- else:
- y_pred = self.y_pred
- res = metric_fn(self.y_test, y_pred)
- except Exception as e:
- logging.info(f"Could not compute following metric : {metric['path']}. \n{e}")
- continue
- if isinstance(res, Number):
- res = display_value(round_to_k(res, 3))
- print_md(f"**{metric['name']} :** {res}")
- elif isinstance(res, list | tuple | np.ndarray):
- print_md(f"**{metric['name']} :**")
- print_html(pd.DataFrame(res).to_html(classes="greyGridTable"))
- elif isinstance(res, str):
- print_md(f"**{metric['name']} :**")
- print_html(f"{res} ")
- else:
- logging.info(
- f"Could not compute following metric : {metric['path']}. \n"
- f"Result of type {res} cannot be displayed"
- )
- print_md("---")
diff --git a/shapash/report/report_script.js b/shapash/report/report_script.js
new file mode 100644
index 00000000..b96e9c78
--- /dev/null
+++ b/shapash/report/report_script.js
@@ -0,0 +1,188 @@
+function initReportInteractions() {
+ let scrollFrame = null;
+ const boundScrollRoots = new WeakSet();
+
+ function collectRoots() {
+ const roots = [document];
+ const pending = [document];
+ const seen = new WeakSet();
+ seen.add(document);
+
+ while (pending.length > 0) {
+ const currentRoot = pending.pop();
+ currentRoot.querySelectorAll('*').forEach(element => {
+ if (element.shadowRoot && !seen.has(element.shadowRoot)) {
+ seen.add(element.shadowRoot);
+ roots.push(element.shadowRoot);
+ pending.push(element.shadowRoot);
+ }
+ });
+ }
+
+ return roots;
+ }
+
+ function queryAllRoots(selector) {
+ return collectRoots().flatMap(root => Array.from(root.querySelectorAll(selector)));
+ }
+
+ function queryByIdAcrossRoots(id) {
+ for (const root of collectRoots()) {
+ if (typeof root.getElementById === 'function') {
+ const match = root.getElementById(id);
+ if (match) {
+ return match;
+ }
+ }
+ }
+ return null;
+ }
+
+ function clearActive(navItems, navChildren, navGroupTitles) {
+ navItems.forEach(element => element.classList.remove('active'));
+ navChildren.forEach(element => element.classList.remove('active'));
+ navGroupTitles.forEach(element => element.classList.remove('active'));
+ }
+
+ function bindScrollListeners() {
+ collectRoots().forEach(root => {
+ if (!boundScrollRoots.has(root)) {
+ root.addEventListener('scroll', queueScrollUpdate, true);
+ boundScrollRoots.add(root);
+ }
+ });
+ }
+
+ function bindPanelSelectors() {
+ queryAllRoots('.js-panel-select[data-panel-group]').forEach(select => {
+ if (select.dataset.reportBound === 'true') {
+ return;
+ }
+
+ function updatePanels() {
+ const panelGroup = select.getAttribute('data-panel-group');
+ const panels = queryAllRoots(`.section-block[data-panel-group="${panelGroup}"]`);
+
+ panels.forEach(panel => {
+ panel.style.display = 'none';
+ });
+
+ const selectedPanel = queryByIdAcrossRoots(select.value);
+ if (selectedPanel) {
+ selectedPanel.style.display = 'block';
+ }
+
+ queueScrollUpdate();
+ }
+
+ select.addEventListener('change', updatePanels);
+ select.dataset.reportBound = 'true';
+ updatePanels();
+ });
+ }
+
+ function onScroll() {
+ const sections = queryAllRoots('.scroll-anchor[id]');
+ const navItems = queryAllRoots('.nav-item:not(.nav-group-title):not(.nav-child)');
+ const navChildren = queryAllRoots('.nav-child');
+ const navGroupTitles = queryAllRoots('.nav-group-title');
+ const navCurrentValue = queryAllRoots('.nav-current-value')[0] || null;
+ const sectionPositions = sections
+ .map(section => ({
+ section,
+ top: section.getBoundingClientRect().top,
+ }))
+ .sort((left, right) => left.top - right.top);
+ let currentId = '';
+
+ sectionPositions.forEach(({ section, top }) => {
+ if (top <= 120) {
+ currentId = section.getAttribute('id');
+ }
+ });
+
+ if (!currentId && sectionPositions.length > 0) {
+ const firstVisibleSection = sectionPositions.find(({ top }) => top > 0);
+ currentId = firstVisibleSection ? firstVisibleSection.section.getAttribute('id') : '';
+ }
+
+ clearActive(navItems, navChildren, navGroupTitles);
+ let matchedLabel = 'Top of report';
+
+ navItems.forEach(item => {
+ if (item.getAttribute('href') === '#' + currentId) {
+ item.classList.add('active');
+ matchedLabel = item.textContent.trim();
+ }
+ });
+
+ let childMatched = false;
+ navChildren.forEach(child => {
+ if (child.getAttribute('href') === '#' + currentId) {
+ child.classList.add('active');
+ childMatched = true;
+ matchedLabel = child.textContent.trim();
+ const group = child.closest('.nav-group');
+ if (group) {
+ const parentTitle = group.querySelector('.nav-group-title');
+ if (parentTitle) {
+ parentTitle.classList.add('active');
+ }
+ }
+ }
+ });
+
+ if (!childMatched) {
+ navGroupTitles.forEach(title => {
+ if (title.getAttribute('href') === '#' + currentId) {
+ title.classList.add('active');
+ matchedLabel = title.textContent.trim();
+ }
+ });
+ }
+
+ if (navCurrentValue) {
+ navCurrentValue.textContent = matchedLabel || 'Top of report';
+ }
+ }
+
+ function queueScrollUpdate() {
+ if (scrollFrame !== null) {
+ return;
+ }
+
+ scrollFrame = window.requestAnimationFrame(() => {
+ scrollFrame = null;
+ bindScrollListeners();
+ bindPanelSelectors();
+ onScroll();
+ });
+ }
+
+ window.addEventListener('resize', queueScrollUpdate);
+ window.addEventListener('hashchange', queueScrollUpdate);
+ queueScrollUpdate();
+
+ let attempts = 0;
+ function refreshUntilReady() {
+ queueScrollUpdate();
+ attempts += 1;
+ if (attempts >= 120) {
+ return;
+ }
+
+ const hasNavigation = queryAllRoots('.nav-item').length > 0;
+ const hasSections = queryAllRoots('.scroll-anchor[id]').length > 0;
+ if (!hasNavigation || !hasSections) {
+ window.requestAnimationFrame(refreshUntilReady);
+ }
+ }
+
+ refreshUntilReady();
+}
+
+if (document.readyState === 'loading') {
+ document.addEventListener('DOMContentLoaded', initReportInteractions);
+} else {
+ initReportInteractions();
+}
diff --git a/shapash/report/report_styles.css b/shapash/report/report_styles.css
new file mode 100644
index 00000000..df1c4093
--- /dev/null
+++ b/shapash/report/report_styles.css
@@ -0,0 +1,275 @@
+:root {
+ --shapash-yellow: #f4c000;
+ --shapash-black: #343736;
+}
+
+.main-report {
+ padding: 24px 32px;
+ align-items: flex-start;
+ gap: 20px;
+ overflow: visible !important;
+}
+
+.report-sidebar {
+ align-self: flex-start;
+ position: sticky;
+ top: 16px;
+ z-index: 30;
+ max-height: calc(100vh - 32px);
+ overflow: hidden;
+}
+
+.report-content {
+ flex: 1 1 auto;
+ min-width: 0;
+}
+
+/* Generic key/value and dataframe tables */
+.kv-table,
+table.dataframe {
+ width: 100%;
+ border-collapse: separate;
+ border-spacing: 0;
+ margin: 12px 0 24px;
+ background: #fff;
+ border: 1px solid #ececec;
+ border-radius: 12px;
+ overflow: hidden;
+ box-shadow: 0 8px 24px rgba(0, 0, 0, 0.04);
+}
+
+.kv-table th,
+.kv-table td,
+table.dataframe th,
+table.dataframe td {
+ text-align: center;
+ vertical-align: middle;
+}
+
+.shapash-callout {
+ padding: 14px 20px;
+ border-left: 4px solid var(--shapash-yellow);
+}
+
+.badge-pill {
+ border: 1px solid #eeeeee;
+ border-radius: 999px;
+ padding: 6px 12px;
+ display: inline-block;
+}
+
+.badge-pill-gold {
+ border-color: var(--shapash-yellow);
+}
+
+.badge-pill-blue {
+ border-color: #2255aa;
+}
+
+.badge-pill-gray {
+ border-color: #eeeeee;
+}
+
+.badge-pill-orange {
+ border-color: var(--shapash-yellow);
+}
+
+.project-info-grid {
+ display: grid !important;
+ grid-template-columns: repeat(2, minmax(0, 1fr));
+ gap: 16px;
+ align-items: stretch;
+}
+
+.project-info-card {
+ display: flex;
+ flex-direction: column;
+ height: 100%;
+ min-width: 0;
+}
+
+.project-info-card .kv-table {
+ flex: 1 1 auto;
+ margin-bottom: 0;
+}
+
+.project-info-card .kv-table table.dataframe {
+ height: 100%;
+}
+
+.fit-content-table {
+ width: fit-content;
+ max-width: 100%;
+ overflow-x: auto;
+}
+
+.fit-content-table table.dataframe {
+ width: max-content;
+ table-layout: auto;
+}
+
+.report-nav {
+ position: static;
+ z-index: 20;
+ display: flex;
+ flex-direction: column;
+ align-items: stretch;
+ flex-wrap: nowrap;
+ gap: calc(8px * var(--nav-scale, 1));
+ margin: 0;
+ padding: calc(10px * var(--nav-scale, 1)) calc(12px * var(--nav-scale, 1));
+ border: 1px solid #ececec;
+ border-radius: 12px;
+ background: rgba(255, 255, 255, 0.96);
+ backdrop-filter: blur(4px);
+ box-shadow: 0 6px 18px rgba(0, 0, 0, 0.06);
+ height: calc(100vh - 32px);
+ overflow-y: auto;
+ overflow-x: hidden;
+ -ms-overflow-style: none;
+ scrollbar-width: none;
+}
+
+.report-nav::-webkit-scrollbar {
+ width: 0;
+ height: 0;
+}
+
+.nav-logo {
+ display: flex;
+ align-items: flex-start;
+ justify-content: flex-start;
+ padding: calc(4px * var(--nav-scale, 1));
+ margin-bottom: calc(6px * var(--nav-scale, 1));
+}
+
+.nav-logo img {
+ display: block;
+ width: min(70px, 100%);
+ height: auto;
+}
+
+.nav-current {
+ display: flex;
+ flex-direction: column;
+ gap: 2px;
+ padding: calc(8px * var(--nav-scale, 1));
+ border-radius: 10px;
+ border: 1px solid #f2d878;
+ background: #fff8dc;
+ margin-bottom: calc(4px * var(--nav-scale, 1));
+}
+
+.nav-current-label {
+ font-size: calc(0.72rem * var(--nav-scale, 1));
+ text-transform: uppercase;
+ letter-spacing: 0.04em;
+ color: #7a6a2f;
+}
+
+.nav-current-value {
+ font-size: calc(0.9rem * var(--nav-scale, 1));
+ font-weight: 700;
+}
+
+.nav-group {
+ display: flex;
+ flex-direction: column;
+ align-items: stretch;
+ gap: calc(8px * var(--nav-scale, 1));
+ flex-wrap: nowrap;
+ padding: calc(4px * var(--nav-scale, 1)) 0;
+}
+
+.nav-group-children {
+ display: flex;
+ flex-direction: column;
+ align-items: stretch;
+ gap: calc(6px * var(--nav-scale, 1));
+ flex-wrap: nowrap;
+ padding-left: calc(10px * var(--nav-scale, 1));
+}
+
+.nav-item {
+ display: block;
+ padding: calc(6px * var(--nav-scale, 1)) calc(10px * var(--nav-scale, 1));
+ border-radius: 8px;
+ border: 1px solid #dddddd;
+ color: var(--shapash-black);
+ text-decoration: none;
+ font-size: calc(0.9rem * var(--nav-scale, 1));
+ line-height: 1.2;
+ background: #fff;
+}
+
+.nav-group-title {
+ border-color: #d4d4d4;
+ font-weight: 700;
+}
+
+.nav-child {
+ border-style: dashed;
+ font-size: calc(0.84rem * var(--nav-scale, 1));
+}
+
+.nav-item:hover,
+.nav-item.active {
+ border-color: var(--shapash-yellow);
+ background: #fff9e6;
+}
+
+.nav-item.active {
+ box-shadow: inset 3px 0 0 var(--shapash-yellow);
+ font-weight: 700;
+}
+
+.scroll-anchor {
+ display: block;
+ position: relative;
+ top: -10px;
+ visibility: hidden;
+}
+
+/* Responsive adjustments */
+@media (max-width: 1200px) {
+
+ .main-report {
+ padding: 16px;
+ gap: 12px;
+ }
+
+ .kv-table,
+ table.dataframe {
+ display: block;
+ overflow-x: auto;
+ -webkit-overflow-scrolling: touch;
+ }
+
+ .kv-val,
+ .content-block {
+ overflow-wrap: anywhere;
+ word-break: break-word;
+ }
+
+ .project-info-grid {
+ grid-template-columns: 1fr;
+ }
+
+ .project-info-card {
+ width: 100%;
+ height: auto;
+ }
+
+ .report-nav {
+ position: static;
+ padding: 8px;
+ height: auto;
+ }
+
+ .report-sidebar {
+ position: static;
+ top: auto;
+ max-height: none;
+ overflow: visible;
+ }
+}
diff --git a/shapash/report/template/custom/conf.json b/shapash/report/template/custom/conf.json
deleted file mode 100644
index a5974f9f..00000000
--- a/shapash/report/template/custom/conf.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
- "base_template": "classic",
- "mimetypes": {
- "text/html": true
- },
- "preprocessors": {
- "100-pygments": {
- "enabled": true,
- "type": "nbconvert.preprocessors.CSSHTMLHeaderPreprocessor"
- }
- }
-}
diff --git a/shapash/report/template/custom/index.html.j2 b/shapash/report/template/custom/index.html.j2
deleted file mode 100644
index 58a6be6f..00000000
--- a/shapash/report/template/custom/index.html.j2
+++ /dev/null
@@ -1,65 +0,0 @@
-{%- extends 'classic/index.html.j2' -%}
-
-{%- block header -%}
-
-{{ super() }}
-
-
-
-
-
-
-{%- endblock header -%}
-
-{% block body_header %}
-
-
-
-
-
-
-
-{% endblock body_header %}
-
-{% block input_group -%}
-{% endblock input_group %}
-
-{% block body_footer %}
-
-
-
-
-
-
-{% endblock body_footer %}
diff --git a/shapash/report/validation.py b/shapash/report/validation.py
new file mode 100644
index 00000000..5ea058a0
--- /dev/null
+++ b/shapash/report/validation.py
@@ -0,0 +1,85 @@
+"""Validation of the yaml configuration and helper functions for report rendering."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pandas as pd
+import panel as pn
+import yaml
+
+
+def load_report_config(cfg_path: Path) -> dict:
+ """Load and validate a report YAML configuration file."""
+ if not cfg_path.exists():
+ raise FileNotFoundError(f"Config not found: {cfg_path}")
+
+ try:
+ with cfg_path.open(encoding="utf-8") as file:
+ cfg = yaml.safe_load(file)
+ except yaml.YAMLError as exc:
+ raise ValueError(f"Invalid YAML syntax in '{cfg_path}': {exc}") from exc
+
+ validate_report_schema(cfg, cfg_path)
+ return cfg
+
+
+def validate_report_schema(cfg: object, cfg_path: Path) -> None:
+ """Validate the minimal schema expected by the report renderer."""
+ if not isinstance(cfg, dict):
+ raise ValueError(f"Invalid YAML structure in '{cfg_path}': top-level content must be a mapping.")
+
+ sections = cfg.get("sections")
+ if not isinstance(sections, list) or not sections:
+ raise ValueError(f"Invalid YAML structure in '{cfg_path}': 'sections' must be a non-empty list.")
+
+ for idx, block in enumerate(sections, start=1):
+ _validate_block(block, idx, cfg_path)
+
+
+def _validate_block(block: object, idx: int, cfg_path: Path, parent: str = "sections") -> None:
+ if not isinstance(block, dict):
+ raise ValueError(f"Invalid YAML structure in '{cfg_path}': {parent}[{idx}] must be a mapping.")
+
+ block_type = block.get("type")
+ if not isinstance(block_type, str) or not block_type.strip():
+ raise ValueError(f"Invalid YAML structure in '{cfg_path}': {parent}[{idx}].type must be a non-empty string.")
+
+ params = block.get("params", {})
+ if not isinstance(params, dict):
+ raise ValueError(f"Invalid YAML structure in '{cfg_path}': {parent}[{idx}].params must be a mapping.")
+
+ if block_type == "custom":
+ function_path = block.get("function")
+ if not isinstance(function_path, str) or not function_path.strip():
+ raise ValueError(
+ f"Invalid YAML structure in '{cfg_path}': {parent}[{idx}].function is required for custom blocks."
+ )
+
+ if block_type == "group":
+ child_blocks = block.get("blocks", [])
+ if not isinstance(child_blocks, list):
+ raise ValueError(
+ f"Invalid YAML structure in '{cfg_path}': {parent}[{idx}].blocks must be a list for group blocks."
+ )
+ for child_idx, child_block in enumerate(child_blocks, start=1):
+ _validate_block(child_block, child_idx, cfg_path, parent=f"{parent}[{idx}].blocks")
+
+
+def render_block_error(block_id: str, exc: Exception):
+ """Render a consistent error panel for block failures."""
+ return pn.pane.Alert(
+ f'Block "{block_id}" failed\n\n{exc}',
+ alert_type="danger",
+ sizing_mode="stretch_width",
+ )
+
+
+def stats_to_table(test_stats: dict, names: list[str], train_stats: dict | None = None) -> pd.DataFrame:
+ """Build a stats table and drop columns that are entirely missing."""
+ if train_stats is not None:
+ stats_table = pd.DataFrame({names[1]: pd.Series(train_stats), names[0]: pd.Series(test_stats)})
+ else:
+ stats_table = pd.DataFrame({names[0]: pd.Series(test_stats)})
+
+ return stats_table.dropna(axis=1, how="all")
diff --git a/shapash/report/visualisation.py b/shapash/report/visualisation.py
deleted file mode 100644
index d91601ec..00000000
--- a/shapash/report/visualisation.py
+++ /dev/null
@@ -1,146 +0,0 @@
-import matplotlib.pyplot as plt
-import pandas as pd
-from IPython.display import HTML, Latex, Markdown, display
-
-
-def print_md(text: str):
- """
- Renders markdown text.
- """
- display(Markdown(text))
-
-
-def print_latex(text: str):
- """
- Renders Latex text.
- """
- display(Latex(text))
-
-
-def print_html(text: str):
- """
- Renders HTML text.
- """
- display(HTML(text))
-
-
-def print_css_style():
- """Print the CSS"""
- print_html(
- """
-
- """
- )
-
-
-def print_javascript_misc():
- """Print the JS"""
- print_html(
- """
-
- """
- )
-
-
-def convert_fig_to_html(fig):
- """Convert Matplotlib figure 'fig' into a tag for HTML use using base64 encoding."""
- import base64
- import io
-
- s = io.BytesIO()
- fig.savefig(s, format="png", bbox_inches="tight")
- plt.close()
- s = base64.b64encode(s.getvalue()).decode("utf-8").replace("\n", "")
- return ' ' % s
-
-
-def html_str_df_and_image(df: pd.DataFrame, fig: plt.Figure) -> str:
- """Convert dataframe to HTML display"""
- return f"""
-
-
{df.to_html(classes="greyGridTable")}
-
{convert_fig_to_html(fig)}
-
- """
-
-
-def print_figure(fig):
- """Print a figure as HTML"""
- print_html(convert_fig_to_html(fig))
diff --git a/shapash/style/shapash-fond-clair.png b/shapash/style/shapash-fond-clair.png
new file mode 100644
index 00000000..6300ec34
Binary files /dev/null and b/shapash/style/shapash-fond-clair.png differ
diff --git a/tests/data/report_test_config.yml b/tests/data/report_test_config.yml
new file mode 100644
index 00000000..55b0dcce
--- /dev/null
+++ b/tests/data/report_test_config.yml
@@ -0,0 +1,10 @@
+sections:
+ - type: header
+ params:
+ title: "Integration report"
+ subtitle: "default test config"
+
+ - type: project_information
+ params:
+ title: "Project information"
+ project_info_file: "tests/data/metadata.yaml"
diff --git a/tests/integration_tests/test_report_generation.py b/tests/integration_tests/test_report_generation.py
index 5c54aa2e..305f4987 100644
--- a/tests/integration_tests/test_report_generation.py
+++ b/tests/integration_tests/test_report_generation.py
@@ -2,17 +2,19 @@
import shutil
import tempfile
import unittest
+from pathlib import Path
import catboost as cb
import category_encoders as ce
import numpy as np
import pandas as pd
+import yaml
from category_encoders import OrdinalEncoder
from shapash import SmartExplainer
-from shapash.report.generation import execute_report, export_and_save_report
current_path = os.path.dirname(os.path.abspath(__file__))
+report_test_cfg_path = os.path.join(current_path, "../data/report_test_config.yml")
class TestGeneration(unittest.TestCase):
@@ -32,122 +34,68 @@ def setUp(self):
self.xpl.compile(x=df_encoded[["x1", "x2", "x3", "x4"]])
self.df = df_encoded
- def test_execute_report_1(self):
- tmp_dir_path = tempfile.mkdtemp()
-
- execute_report(
- working_dir=tmp_dir_path,
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../data/metadata.yaml"),
- config=None,
- notebook_path=None,
- )
- assert os.path.exists(os.path.join(tmp_dir_path, "smart_explainer.pickle"))
- assert os.path.exists(os.path.join(tmp_dir_path, "base_report.ipynb"))
-
- shutil.rmtree(tmp_dir_path)
-
- def test_execute_report_2(self):
- tmp_dir_path = tempfile.mkdtemp()
-
- execute_report(
- working_dir=tmp_dir_path,
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../data/metadata.yaml"),
- x_train=self.df[["x1", "x2", "x3", "x4"]],
- config=None,
- notebook_path=None,
- )
- assert os.path.exists(os.path.join(tmp_dir_path, "x_train.csv"))
- assert os.path.exists(os.path.join(tmp_dir_path, "smart_explainer.pickle"))
- assert os.path.exists(os.path.join(tmp_dir_path, "base_report.ipynb"))
-
- shutil.rmtree(tmp_dir_path)
-
- def test_execute_report_3(self):
- tmp_dir_path = tempfile.mkdtemp()
-
- execute_report(
- working_dir=tmp_dir_path,
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../data/metadata.yaml"),
- x_train=self.df[["x1", "x2", "x3", "x4"]],
- y_test=self.df["y"],
- config=None,
- notebook_path=None,
- )
- assert os.path.exists(os.path.join(tmp_dir_path, "x_train.csv"))
- assert os.path.exists(os.path.join(tmp_dir_path, "y_test.csv"))
- assert os.path.exists(os.path.join(tmp_dir_path, "smart_explainer.pickle"))
- assert os.path.exists(os.path.join(tmp_dir_path, "base_report.ipynb"))
-
- shutil.rmtree(tmp_dir_path)
-
- def test_execute_report_4(self):
- tmp_dir_path = tempfile.mkdtemp()
-
- execute_report(
- working_dir=tmp_dir_path,
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../data/metadata.yaml"),
- x_train=self.df[["x1", "x2", "x3", "x4"]],
- y_train=self.df["y"],
- y_test=self.df["y"],
- config=None,
- notebook_path=None,
- )
- assert os.path.exists(os.path.join(tmp_dir_path, "x_train.csv"))
- assert os.path.exists(os.path.join(tmp_dir_path, "y_test.csv"))
- assert os.path.exists(os.path.join(tmp_dir_path, "y_train.csv"))
- assert os.path.exists(os.path.join(tmp_dir_path, "smart_explainer.pickle"))
- assert os.path.exists(os.path.join(tmp_dir_path, "base_report.ipynb"))
-
- shutil.rmtree(tmp_dir_path)
-
- def test_execute_report_5(self):
+ def test_generate_report_default_config(self):
tmp_dir_path = tempfile.mkdtemp()
+ outfile = os.path.join(tmp_dir_path, "report.html")
self.xpl.palette_name = "eurybia"
- execute_report(
- working_dir=tmp_dir_path,
- explainer=self.xpl,
+ self.xpl.generate_report(
+ output_file=outfile,
project_info_file=os.path.join(current_path, "../data/metadata.yaml"),
x_train=self.df[["x1", "x2", "x3", "x4"]],
y_train=self.df["y"],
y_test=self.df["y"],
- notebook_path=None,
+ working_dir=tmp_dir_path,
+ yaml_path=report_test_cfg_path,
)
self.xpl.palette_name = "default"
- assert os.path.exists(os.path.join(tmp_dir_path, "x_train.csv"))
- assert os.path.exists(os.path.join(tmp_dir_path, "y_test.csv"))
- assert os.path.exists(os.path.join(tmp_dir_path, "y_train.csv"))
- assert os.path.exists(os.path.join(tmp_dir_path, "smart_explainer.pickle"))
- assert os.path.exists(os.path.join(tmp_dir_path, "base_report.ipynb"))
+ assert os.path.exists(outfile)
shutil.rmtree(tmp_dir_path)
- def test_generate_report_1(self):
+ def test_generate_report_with_custom_yaml_config(self):
tmp_dir_path = tempfile.mkdtemp()
- outfile = os.path.join(tmp_dir_path, "report.html")
+ cfg_path = Path(tmp_dir_path) / "custom_report_config.yml"
+ outfile = str(Path(tmp_dir_path) / "report_custom.html")
+
+ config = {
+ "sections": [
+ {
+ "type": "header",
+ "params": {"title": "Integration report", "subtitle": "custom yaml"},
+ },
+ {
+ "type": "project_information",
+ "params": {
+ "title": "Project information",
+ "project_info_file": os.path.join(current_path, "../data/metadata.yaml"),
+ },
+ },
+ ]
+ }
+ with cfg_path.open("w", encoding="utf-8") as stream:
+ yaml.safe_dump(config, stream, sort_keys=False, allow_unicode=True)
self.xpl.generate_report(
output_file=outfile,
project_info_file=os.path.join(current_path, "../data/metadata.yaml"),
+ yaml_path=str(cfg_path),
)
assert os.path.exists(outfile)
shutil.rmtree(tmp_dir_path)
- def test_export_and_save_report_1(self):
+ def test_generate_report_interactions_enabled(self):
tmp_dir_path = tempfile.mkdtemp()
+ outfile = os.path.join(tmp_dir_path, "report_interactions.html")
- execute_report(
- working_dir=tmp_dir_path,
- explainer=self.xpl,
+ self.xpl.generate_report(
+ output_file=outfile,
project_info_file=os.path.join(current_path, "../data/metadata.yaml"),
+ x_train=self.df[["x1", "x2", "x3", "x4"]],
+ display_interaction_plot=True,
+ working_dir=tmp_dir_path,
)
-
- outfile = os.path.join(tmp_dir_path, "report.html")
- export_and_save_report(working_dir=tmp_dir_path, output_file=outfile)
assert os.path.exists(outfile)
+
shutil.rmtree(tmp_dir_path)
diff --git a/tests/unit_tests/explainer/test_smart_explainer.py b/tests/unit_tests/explainer/test_smart_explainer.py
index c5736e1f..a07ef8bb 100644
--- a/tests/unit_tests/explainer/test_smart_explainer.py
+++ b/tests/unit_tests/explainer/test_smart_explainer.py
@@ -1108,9 +1108,8 @@ def test_run_app_2(self, mock_get_host_name, mock_custom_thread, mock_smartapp):
xpl.run_app()
assert xpl.y_target is not None
- @patch("shapash.report.generation.export_and_save_report")
- @patch("shapash.report.generation.execute_report")
- def test_generate_report(self, mock_execute_report, mock_export_and_save_report):
+ @patch("shapash.report.core.generate_report")
+ def test_generate_report(self, mock_generate_report):
"""
Test generate report method
"""
@@ -1123,8 +1122,13 @@ def test_generate_report(self, mock_execute_report, mock_export_and_save_report)
xpl = SmartExplainer(clf)
xpl.compile(x=df[["x1", "x2"]])
xpl.generate_report(output_file="test", project_info_file="test")
- mock_execute_report.assert_called_once()
- mock_export_and_save_report.assert_called_once()
+ runtime_arg = mock_generate_report.call_args.kwargs["runtime"]
+ assert runtime_arg.explainer is xpl
+ mock_generate_report.assert_called_once_with(
+ runtime=runtime_arg,
+ config_file=unittest.mock.ANY,
+ output_file="test",
+ )
def test_compute_features_stability_1(self):
df = pd.DataFrame(np.random.randint(1, 100, size=(15, 4)), columns=list("ABCD"))
diff --git a/tests/unit_tests/report/test_project_report.py b/tests/unit_tests/report/test_project_report.py
deleted file mode 100644
index 4a66020f..00000000
--- a/tests/unit_tests/report/test_project_report.py
+++ /dev/null
@@ -1,250 +0,0 @@
-import os
-import unittest
-from unittest.mock import patch
-
-import catboost as cb
-import numpy as np
-import pandas as pd
-from category_encoders import OrdinalEncoder
-
-from shapash import SmartExplainer
-from shapash.report.project_report import ProjectReport
-
-expected_attrs = [
- "explainer",
- "metadata",
- "x_train_init",
- "y_test",
- "x_init",
- "config",
- "col_names",
- "df_train_test",
- "title_story",
- "title_description",
-]
-
-current_path = os.path.dirname(os.path.abspath(__file__))
-
-
-class TestProjectReport(unittest.TestCase):
- def setUp(self):
- self.df = pd.DataFrame(range(0, 21), columns=["id"])
- self.df["y"] = self.df["id"].apply(lambda x: 1 if x < 10 else 0)
- self.df["x1"] = np.random.randint(1, 123, self.df.shape[0])
- self.df["x2"] = np.random.randint(1, 3, self.df.shape[0])
- self.df = self.df.set_index("id")
- self.clf = cb.CatBoostClassifier(n_estimators=1).fit(self.df[["x1", "x2"]], self.df["y"])
- self.xpl = SmartExplainer(model=self.clf)
- self.xpl.compile(x=self.df[["x1", "x2"]])
- self.report1 = ProjectReport(
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../../data/metadata.yaml"),
- )
- self.report2 = ProjectReport(
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../../data/metadata.yaml"),
- x_train=self.df[["x1", "x2"]],
- )
-
- def test_init_1(self):
- report = ProjectReport(
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../../data/metadata.yaml"),
- )
- for attr in expected_attrs:
- assert hasattr(report, attr)
-
- def test_init_2(self):
- report = ProjectReport(
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../../data/metadata.yaml"),
- x_train=self.df[["x1", "x2"]],
- )
- for attr in expected_attrs:
- assert hasattr(report, attr)
-
- def test_init_3(self):
- report = ProjectReport(
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../../data/metadata.yaml"),
- x_train=self.df[["x1", "x2"]],
- y_test=self.df["y"],
- )
- for attr in expected_attrs:
- assert hasattr(report, attr)
-
- def test_init_4(self):
- report = ProjectReport(
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../../data/metadata.yaml"),
- x_train=self.df[["x1", "x2"]],
- y_test=self.df["y"],
- config={},
- )
- for attr in expected_attrs:
- assert hasattr(report, attr)
-
- def test_init_5(self):
- ProjectReport(
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../../data/metadata.yaml"),
- x_train=self.df[["x1", "x2"]],
- y_test=self.df["y"],
- config={"metrics": [{"path": "sklearn.metrics.mean_squared_error"}]},
- )
-
- def test_init_6(self):
- self.assertRaises(
- ValueError,
- ProjectReport,
- self.xpl,
- os.path.join(current_path, "../../data/metadata.yaml"),
- self.df[["x1", "x2"]],
- self.df["y"],
- {"metrics": ["sklearn.metrics.mean_squared_error"]},
- )
-
- @patch("shapash.report.project_report.print_html")
- def test_display_title_description_1(self, mock_print_html):
- self.report1.display_title_description()
- mock_print_html.assert_called_once()
-
- @patch("shapash.report.project_report.print_html")
- def test_display_title_description_2(self, mock_print_html):
- report = ProjectReport(
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../../data/metadata.yaml"),
- x_train=self.df[["x1", "x2"]],
- y_test=self.df["y"],
- config={
- "title_story": "My project report",
- "title_description": """This document is a data science project report.""",
- },
- )
- report.display_title_description()
- self.assertEqual(mock_print_html.call_count, 2)
-
- @patch("shapash.report.project_report.print_md")
- def test_display_general_information_1(self, mock_print_html):
- report = ProjectReport(
- explainer=self.xpl, project_info_file=os.path.join(current_path, "../../data/metadata.yaml")
- )
- report.display_project_information()
- self.assertTrue(mock_print_html.called)
-
- @patch("shapash.report.project_report.print_md")
- def test_display_model_information_1(self, mock_print_md):
- report = ProjectReport(
- explainer=self.xpl, project_info_file=os.path.join(current_path, "../../data/metadata.yaml")
- )
- report.display_model_analysis()
- self.assertTrue(mock_print_md.called)
-
- def test_display_dataset_analysis_1(self):
- report = ProjectReport(
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../../data/metadata.yaml"),
- x_train=self.df[["x1", "x2"]],
- )
- report.display_dataset_analysis()
-
- def test_display_dataset_analysis_2(self):
- report = ProjectReport(
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../../data/metadata.yaml"),
- )
- report.display_dataset_analysis()
-
- def test_display_dataset_analysis_3(self):
- """
- Test we don't have a problem when only categorical features
- """
- df = self.df.copy()
- df["x1"] = "a"
- df["x2"] = df["x2"].astype(str)
- encoder = OrdinalEncoder(cols=["x1", "x2"], handle_unknown="ignore", return_df=True).fit(df)
-
- df = encoder.transform(df)
-
- clf = cb.CatBoostClassifier(n_estimators=1).fit(df[["x1", "x2"]], df["y"])
- xpl = SmartExplainer(model=clf)
- xpl.compile(x=df[["x1", "x2"]])
- report = ProjectReport(
- explainer=xpl,
- project_info_file=os.path.join(current_path, "../../data/metadata.yaml"),
- x_train=df[["x1", "x2"]],
- )
-
- report.display_dataset_analysis()
-
- def test_display_model_explainability_1(self):
- report = ProjectReport(
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../../data/metadata.yaml"),
- )
- report.display_model_explainability()
-
- def test_display_model_explainability_2(self):
- """
- Tests multiclass case
- """
- df = pd.DataFrame(range(0, 21), columns=["id"])
- df["y"] = df["id"].apply(lambda x: 0 if x < 5 else 1 if (5 <= x < 10) else 2 if (10 <= x < 15) else 3)
- df["x1"] = np.random.randint(1, 123, df.shape[0])
- df["x2"] = np.random.randint(1, 3, df.shape[0])
- df = df.set_index("id")
- clf = cb.CatBoostClassifier(n_estimators=1).fit(df[["x1", "x2"]], df["y"])
- xpl = SmartExplainer(model=clf)
- xpl.compile(x=df[["x1", "x2"]])
- report = ProjectReport(explainer=xpl, project_info_file=os.path.join(current_path, "../../data/metadata.yaml"))
- report.display_model_explainability()
-
- @patch("shapash.report.project_report.logging")
- def test_display_model_performance_1(self, mock_logging):
- """
- No y_test given
- """
- report = ProjectReport(
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../../data/metadata.yaml"),
- )
- report.display_model_performance()
- mock_logging.info.assert_called_once()
-
- @patch("shapash.report.project_report.logging")
- def test_display_model_performance_2(self, mock_logging):
- report = ProjectReport(
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../../data/metadata.yaml"),
- y_test=self.df["y"],
- config=dict(metrics=[{"path": "sklearn.metrics.mean_squared_error"}]),
- )
- report.display_model_performance()
- self.assertEqual(mock_logging.call_count, 0)
-
- @patch("shapash.report.project_report.logging")
- def test_display_model_performance_3(self, mock_logging):
- """
- No metrics given in ProjectReport
- """
- report = ProjectReport(
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../../data/metadata.yaml"),
- y_test=self.df["y"],
- )
- report.display_model_performance()
- mock_logging.info.assert_called_once()
-
- @patch("shapash.report.project_report.logging")
- def test_display_model_performance_4(self, mock_logging):
- """
- Test use of proba values.
- """
- report = ProjectReport(
- explainer=self.xpl,
- project_info_file=os.path.join(current_path, "../../data/metadata.yaml"),
- y_test=self.df["y"],
- config=dict(metrics=[{"path": "sklearn.metrics.log_loss", "use_proba_values": True}]),
- )
- report.display_model_performance()
- self.assertEqual(mock_logging.call_count, 0)
diff --git a/tests/unit_tests/report/test_report_generation.py b/tests/unit_tests/report/test_report_generation.py
new file mode 100644
index 00000000..8eb0d924
--- /dev/null
+++ b/tests/unit_tests/report/test_report_generation.py
@@ -0,0 +1,167 @@
+import unittest
+
+import panel as pn
+import pandas as pd
+import plotly.graph_objects as go
+
+from shapash.report.blocks import ReportBlockMixin, block
+from shapash.report.panel_support import apply_report_css, make_plotly_pane, report_css_text
+
+
+class TestSmartReportPanel(unittest.TestCase):
+ def test_make_plotly_pane_returns_panel_plotly(self):
+ fig = go.Figure(go.Scatter(x=[1, 2], y=[3, 4]))
+
+ pane = make_plotly_pane(fig)
+
+ self.assertIsInstance(pane, pn.pane.Plotly)
+ self.assertEqual(pane.object, fig)
+ self.assertEqual(pane.sizing_mode, "stretch_width")
+
+ def test_report_css_text_loads_stylesheet_content(self):
+ css = report_css_text()
+
+ self.assertIn(".kv-table", css)
+ self.assertIn("@media (max-width: 1200px)", css)
+
+ def test_apply_report_css_registers_styles_once(self):
+ css = report_css_text()
+
+ apply_report_css()
+ first_count = pn.config.raw_css.count(css)
+
+ apply_report_css()
+ second_count = pn.config.raw_css.count(css)
+
+ self.assertEqual(first_count, 1)
+ self.assertEqual(second_count, 1)
+
+
+class _DummyBlocks(ReportBlockMixin):
+ @block
+ def block_demo(self, title: str = "Demo"):
+ return [pn.pane.Markdown("Body")]
+
+ @block
+ def block_dynamic_title(self, title: str = ""):
+ return "Resolved title", [pn.pane.Markdown("Dynamic body")]
+
+ @block
+ def block_scalar_body(self, title: str = "Scalar"):
+ return "plain text"
+
+ @block
+ def block_table(self, title: str = "Table"):
+ return [pn.pane.DataFrame(pd.DataFrame({"a": [1], "b": [2]}))]
+
+ @block
+ def block_badge_row(self, title: str = "Badges"):
+ return [pn.Row(pn.pane.Markdown("One"), pn.pane.Markdown("Two"))]
+
+ @block
+ def block_select_allowed(self, title: str = "Selector"):
+ return [pn.widgets.Select(name="Feature", options=["a", "b"], value="a")]
+
+ @block
+ def block_plotly_allowed(self, title: str = "Plotly"):
+ fig = go.Figure(go.Scatter(x=[1, 2], y=[3, 4]))
+ return [pn.pane.Plotly(fig)]
+
+ @block
+ def block_bind_allowed(self, title: str = "Bind"):
+ selector = pn.widgets.Select(name="Feature", options=["a", "b"], value="a")
+ selected_panel = pn.panel(pn.bind(lambda selected: pn.pane.Markdown(selected), selector))
+ return [selector, selected_panel]
+
+ @block
+ def block_panel_type_not_allowed(self, title: str = "HTML"):
+ return [pn.pane.HTML("html ")]
+
+ @block
+ def block_non_panel_type_not_allowed(self, title: str = "Object"):
+ return [object()]
+
+
+class TestBlockDecorator(unittest.TestCase):
+ def test_block_decorator_wraps_with_title_from_signature(self):
+ runtime = _DummyBlocks()
+
+ result = runtime.block_demo()
+
+ self.assertIsInstance(result, pn.Column)
+ self.assertEqual(len(result.objects), 2)
+ self.assertIsInstance(result.objects[0], pn.pane.Markdown)
+ self.assertIn("Demo", result.objects[0].object)
+
+ def test_block_decorator_supports_dynamic_title_tuple(self):
+ runtime = _DummyBlocks()
+
+ result = runtime.block_dynamic_title()
+
+ self.assertIsInstance(result, pn.Column)
+ self.assertEqual(len(result.objects), 2)
+ self.assertIsInstance(result.objects[0], pn.pane.Markdown)
+ self.assertIn("Resolved title", result.objects[0].object)
+
+ def test_block_decorator_coerces_scalar_body_to_markdown(self):
+ runtime = _DummyBlocks()
+
+ result = runtime.block_scalar_body()
+
+ self.assertIsInstance(result, pn.Column)
+ self.assertEqual(len(result.objects), 2)
+ self.assertIsInstance(result.objects[1], pn.pane.Markdown)
+ self.assertIn("plain text", result.objects[1].object)
+
+ def test_block_decorator_auto_stylizes_body_by_type(self):
+ runtime = _DummyBlocks()
+
+ text_result = runtime.block_demo()
+ table_result = runtime.block_table()
+
+ self.assertIn("content-block", text_result.objects[1].css_classes)
+ self.assertIn("kv-table", table_result.objects[1].css_classes)
+
+ def test_block_decorator_auto_styles_badge_rows(self):
+ runtime = _DummyBlocks()
+
+ result = runtime.block_badge_row()
+
+ badge_row = result.objects[1]
+ self.assertIsInstance(badge_row, pn.Row)
+ self.assertIn("badge-pill", badge_row.objects[0].css_classes)
+ self.assertIn("badge-pill", badge_row.objects[1].css_classes)
+
+ def test_block_decorator_allows_select_and_plotly(self):
+ runtime = _DummyBlocks()
+
+ select_result = runtime.block_select_allowed()
+ plotly_result = runtime.block_plotly_allowed()
+
+ self.assertIsInstance(select_result.objects[1], pn.widgets.Select)
+ self.assertIsInstance(plotly_result.objects[1], pn.pane.Plotly)
+
+ def test_block_decorator_allows_bind_param_function(self):
+ runtime = _DummyBlocks()
+
+ result = runtime.block_bind_allowed()
+
+ self.assertIsInstance(result.objects[1], pn.widgets.Select)
+ self.assertEqual(type(result.objects[2]).__name__, "ParamFunction")
+
+ def test_block_decorator_rejects_panel_type_without_style_definition(self):
+ runtime = _DummyBlocks()
+
+ with self.assertRaises(TypeError) as context:
+ runtime.block_panel_type_not_allowed()
+
+ self.assertIn("Unsupported Panel object type returned", str(context.exception))
+ self.assertIn("Allowed Panel return types", str(context.exception))
+
+ def test_block_decorator_rejects_non_panel_return_type(self):
+ runtime = _DummyBlocks()
+
+ with self.assertRaises(TypeError) as context:
+ runtime.block_non_panel_type_not_allowed()
+
+ self.assertIn("Unsupported block return type", str(context.exception))
diff --git a/tests/unit_tests/report/test_smart_report_panel.py b/tests/unit_tests/report/test_smart_report_panel.py
new file mode 100644
index 00000000..8eb0d924
--- /dev/null
+++ b/tests/unit_tests/report/test_smart_report_panel.py
@@ -0,0 +1,167 @@
+import unittest
+
+import panel as pn
+import pandas as pd
+import plotly.graph_objects as go
+
+from shapash.report.blocks import ReportBlockMixin, block
+from shapash.report.panel_support import apply_report_css, make_plotly_pane, report_css_text
+
+
+class TestSmartReportPanel(unittest.TestCase):
+ def test_make_plotly_pane_returns_panel_plotly(self):
+ fig = go.Figure(go.Scatter(x=[1, 2], y=[3, 4]))
+
+ pane = make_plotly_pane(fig)
+
+ self.assertIsInstance(pane, pn.pane.Plotly)
+ self.assertEqual(pane.object, fig)
+ self.assertEqual(pane.sizing_mode, "stretch_width")
+
+ def test_report_css_text_loads_stylesheet_content(self):
+ css = report_css_text()
+
+ self.assertIn(".kv-table", css)
+ self.assertIn("@media (max-width: 1200px)", css)
+
+ def test_apply_report_css_registers_styles_once(self):
+ css = report_css_text()
+
+ apply_report_css()
+ first_count = pn.config.raw_css.count(css)
+
+ apply_report_css()
+ second_count = pn.config.raw_css.count(css)
+
+ self.assertEqual(first_count, 1)
+ self.assertEqual(second_count, 1)
+
+
+class _DummyBlocks(ReportBlockMixin):
+ @block
+ def block_demo(self, title: str = "Demo"):
+ return [pn.pane.Markdown("Body")]
+
+ @block
+ def block_dynamic_title(self, title: str = ""):
+ return "Resolved title", [pn.pane.Markdown("Dynamic body")]
+
+ @block
+ def block_scalar_body(self, title: str = "Scalar"):
+ return "plain text"
+
+ @block
+ def block_table(self, title: str = "Table"):
+ return [pn.pane.DataFrame(pd.DataFrame({"a": [1], "b": [2]}))]
+
+ @block
+ def block_badge_row(self, title: str = "Badges"):
+ return [pn.Row(pn.pane.Markdown("One"), pn.pane.Markdown("Two"))]
+
+ @block
+ def block_select_allowed(self, title: str = "Selector"):
+ return [pn.widgets.Select(name="Feature", options=["a", "b"], value="a")]
+
+ @block
+ def block_plotly_allowed(self, title: str = "Plotly"):
+ fig = go.Figure(go.Scatter(x=[1, 2], y=[3, 4]))
+ return [pn.pane.Plotly(fig)]
+
+ @block
+ def block_bind_allowed(self, title: str = "Bind"):
+ selector = pn.widgets.Select(name="Feature", options=["a", "b"], value="a")
+ selected_panel = pn.panel(pn.bind(lambda selected: pn.pane.Markdown(selected), selector))
+ return [selector, selected_panel]
+
+ @block
+ def block_panel_type_not_allowed(self, title: str = "HTML"):
+ return [pn.pane.HTML("html ")]
+
+ @block
+ def block_non_panel_type_not_allowed(self, title: str = "Object"):
+ return [object()]
+
+
+class TestBlockDecorator(unittest.TestCase):
+ def test_block_decorator_wraps_with_title_from_signature(self):
+ runtime = _DummyBlocks()
+
+ result = runtime.block_demo()
+
+ self.assertIsInstance(result, pn.Column)
+ self.assertEqual(len(result.objects), 2)
+ self.assertIsInstance(result.objects[0], pn.pane.Markdown)
+ self.assertIn("Demo", result.objects[0].object)
+
+ def test_block_decorator_supports_dynamic_title_tuple(self):
+ runtime = _DummyBlocks()
+
+ result = runtime.block_dynamic_title()
+
+ self.assertIsInstance(result, pn.Column)
+ self.assertEqual(len(result.objects), 2)
+ self.assertIsInstance(result.objects[0], pn.pane.Markdown)
+ self.assertIn("Resolved title", result.objects[0].object)
+
+ def test_block_decorator_coerces_scalar_body_to_markdown(self):
+ runtime = _DummyBlocks()
+
+ result = runtime.block_scalar_body()
+
+ self.assertIsInstance(result, pn.Column)
+ self.assertEqual(len(result.objects), 2)
+ self.assertIsInstance(result.objects[1], pn.pane.Markdown)
+ self.assertIn("plain text", result.objects[1].object)
+
+ def test_block_decorator_auto_stylizes_body_by_type(self):
+ runtime = _DummyBlocks()
+
+ text_result = runtime.block_demo()
+ table_result = runtime.block_table()
+
+ self.assertIn("content-block", text_result.objects[1].css_classes)
+ self.assertIn("kv-table", table_result.objects[1].css_classes)
+
+ def test_block_decorator_auto_styles_badge_rows(self):
+ runtime = _DummyBlocks()
+
+ result = runtime.block_badge_row()
+
+ badge_row = result.objects[1]
+ self.assertIsInstance(badge_row, pn.Row)
+ self.assertIn("badge-pill", badge_row.objects[0].css_classes)
+ self.assertIn("badge-pill", badge_row.objects[1].css_classes)
+
+ def test_block_decorator_allows_select_and_plotly(self):
+ runtime = _DummyBlocks()
+
+ select_result = runtime.block_select_allowed()
+ plotly_result = runtime.block_plotly_allowed()
+
+ self.assertIsInstance(select_result.objects[1], pn.widgets.Select)
+ self.assertIsInstance(plotly_result.objects[1], pn.pane.Plotly)
+
+ def test_block_decorator_allows_bind_param_function(self):
+ runtime = _DummyBlocks()
+
+ result = runtime.block_bind_allowed()
+
+ self.assertIsInstance(result.objects[1], pn.widgets.Select)
+ self.assertEqual(type(result.objects[2]).__name__, "ParamFunction")
+
+ def test_block_decorator_rejects_panel_type_without_style_definition(self):
+ runtime = _DummyBlocks()
+
+ with self.assertRaises(TypeError) as context:
+ runtime.block_panel_type_not_allowed()
+
+ self.assertIn("Unsupported Panel object type returned", str(context.exception))
+ self.assertIn("Allowed Panel return types", str(context.exception))
+
+ def test_block_decorator_rejects_non_panel_return_type(self):
+ runtime = _DummyBlocks()
+
+ with self.assertRaises(TypeError) as context:
+ runtime.block_non_panel_type_not_allowed()
+
+ self.assertIn("Unsupported block return type", str(context.exception))
diff --git a/tutorial/generate_report/config/default_report.yml b/tutorial/generate_report/config/default_report.yml
new file mode 100644
index 00000000..fb3854e2
--- /dev/null
+++ b/tutorial/generate_report/config/default_report.yml
@@ -0,0 +1,77 @@
+# default_report.yml
+# Default smart report configuration based on block sections.
+
+sections:
+ - type: header
+ params:
+ title: "House prices report"
+ subtitle: >
+ This document is a data science report of the kaggle house prices tutorial project.
+ It was generated using the Shapash library.
+
+ - type: project_information
+ params:
+ title: "Project information"
+ project_info_file: "tutorial/generate_report/config/project_information.yml"
+
+ - type: model_analysis
+ params:
+ title: "Model analysis"
+
+ - type: group
+ params:
+ title: "Dataset analysis"
+ blocks:
+ - type: global_analysis
+ params:
+ title: "Global analysis"
+
+ - type: univariate_analysis
+ params:
+ title: "Univariate analysis"
+
+ - type: target_analysis
+ params:
+ title: "Target analysis"
+ show_train: true
+
+ - type: correlations_plot
+ params:
+ title: "Multivariate analysis"
+ max_features: 20
+
+ - type: group
+ params:
+ title: "Model explainability"
+ blocks:
+ - type: feature_importance
+ params:
+ title: "Global feature importance plot"
+
+ - type: contribution_plot
+ params:
+ title: "Features contribution plots"
+ include_all_features: true
+
+ - type: group
+ params:
+ title: "Model performance"
+ blocks:
+ - type: target_distribution
+ params:
+ title: "Univariate analysis of target variable"
+
+ - type: performance_metrics
+ params:
+ title: "Metrics"
+ metrics:
+ - path: "sklearn.metrics.mean_absolute_error"
+ name: "Mean absolute error"
+ - path: "sklearn.metrics.mean_squared_error"
+ name: "Mean squared error"
+
+ - type: callout
+ params:
+ body: >
+ You can add as many blocks, charts, and text sections as you want.
+ The generated HTML renders report content only (no source code).
diff --git a/tutorial/generate_report/config/default_report_custom.yml b/tutorial/generate_report/config/default_report_custom.yml
new file mode 100644
index 00000000..91ad3f83
--- /dev/null
+++ b/tutorial/generate_report/config/default_report_custom.yml
@@ -0,0 +1,87 @@
+# default_report_custom.yml
+# Custom report with a user-defined block example.
+
+sections:
+ - type: header
+ params:
+ title: "House prices report"
+ subtitle: >
+ This document is a data science report of the kaggle house prices tutorial project.
+ It was generated using the Shapash library.
+
+ - type: user_note
+ params:
+ title: "Custom user block"
+ body: "This section was added by a user-defined block class."
+
+ - type: prediction_diagnostics
+ params:
+ title: "Custom prediction diagnostics"
+ color_feature: "GrLivArea"
+
+ - type: project_information
+ params:
+ title: "Project information"
+ project_info_file: "tutorial/generate_report/config/project_information.yml"
+
+ - type: model_analysis
+ params:
+ title: "Model analysis"
+
+ - type: group
+ params:
+ title: "Dataset analysis"
+ blocks:
+ - type: global_analysis
+ params:
+ title: "Global analysis"
+
+ - type: univariate_analysis
+ params:
+ title: "Univariate analysis"
+
+ - type: target_analysis
+ params:
+ title: "Target analysis"
+ show_train: true
+
+ - type: correlations_plot
+ params:
+ title: "Multivariate analysis"
+ max_features: 20
+
+ - type: group
+ params:
+ title: "Model explainability"
+ blocks:
+ - type: feature_importance
+ params:
+ title: "Global feature importance plot"
+
+ - type: contribution_plot
+ params:
+ title: "Features contribution plots"
+ include_all_features: true
+
+ - type: group
+ params:
+ title: "Model performance"
+ blocks:
+ - type: target_distribution
+ params:
+ title: "Univariate analysis of target variable"
+
+ - type: performance_metrics
+ params:
+ title: "Metrics"
+ metrics:
+ - path: "sklearn.metrics.mean_absolute_error"
+ name: "Mean absolute error"
+ - path: "sklearn.metrics.mean_squared_error"
+ name: "Mean squared error"
+
+ - type: callout
+ params:
+ body: >
+ You can add as many blocks, charts, and text sections as you want.
+ The generated HTML renders report content only (no source code).
diff --git a/tutorial/generate_report/utils/project_info.yml b/tutorial/generate_report/config/project_information.yml
similarity index 100%
rename from tutorial/generate_report/utils/project_info.yml
rename to tutorial/generate_report/config/project_information.yml
diff --git a/tutorial/generate_report/shapash_report_example.py b/tutorial/generate_report/shapash_report_example.py
index 4734a587..18663469 100644
--- a/tutorial/generate_report/shapash_report_example.py
+++ b/tutorial/generate_report/shapash_report_example.py
@@ -1,13 +1,20 @@
"""
-This script can be used to generate the report example.
-For more information, please refer to the tutorial 'tuto-shapash-report01.ipynb'
-that generates the same report.
+Generate the report example with the new smart_report implementation.
+
+The report layout is driven by the YAML file `default_report.yml` and rendered
+through `SmartExplainer.generate_report`.
+
+For more information, please refer to the tutorial
+`tuto-shapash-report01.ipynb` that generates the same report.
"""
import os
import sys
import pandas as pd
from category_encoders import OrdinalEncoder
+import panel as pn
+import plotly.express as px
+import plotly.graph_objects as go
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
@@ -15,11 +22,130 @@
from shapash import SmartExplainer
from shapash.data.data_loader import data_loading
+from shapash.report.blocks import ReportBlockMixin, block
+
+# Custom block class can be defined by inheriting from ReportBlockMixin and implementing block methods.
+class UserReportBlocks(ReportBlockMixin):
+ """Example of user-defined custom blocks for report generation."""
+
+ @block
+ def block_user_note(
+ self,
+ title: str = "Analyst note",
+ body: str = "This report includes a custom user cell.",
+ ) -> str:
+ return title, [pn.pane.Markdown(body)]
+
+ @block
+ def block_prediction_diagnostics(
+ self,
+ title: str = "Prediction diagnostics",
+ color_feature: str | None = None,
+ ) -> str:
+ """Display a richer custom block with complementary prediction graphs."""
+
+ ##############################
+ #------data preparation------#
+ ##############################
+
+ if self.y_test is None or self.y_pred is None:
+ return title, [pn.pane.Markdown("Prediction diagnostics requires both y_test and y_pred.")]
+
+ diagnostics = pd.DataFrame(
+ {
+ "actual": pd.Series(self.y_test).reset_index(drop=True),
+ "predicted": pd.Series(self.y_pred).reset_index(drop=True),
+ }
+ )
+ diagnostics["residual"] = diagnostics["actual"] - diagnostics["predicted"]
+ diagnostics["abs_error"] = diagnostics["residual"].abs()
+
+ if color_feature and self.x_init is not None and color_feature in self.x_init.columns:
+ diagnostics[color_feature] = pd.Series(self.x_init[color_feature]).reset_index(drop=True)
+ scatter = px.scatter(
+ diagnostics,
+ x="actual",
+ y="predicted",
+ color=color_feature,
+ hover_data=["residual", "abs_error"],
+ title="Actual vs predicted",
+ labels={"actual": "Actual", "predicted": "Predicted"},
+ )
+ else:
+ scatter = px.scatter(
+ diagnostics,
+ x="actual",
+ y="predicted",
+ color="abs_error",
+ color_continuous_scale="Tealgrn",
+ hover_data=["residual", "abs_error"],
+ title="Actual vs predicted",
+ labels={"actual": "Actual", "predicted": "Predicted", "abs_error": "Absolute error"},
+ )
+
+ min_axis = min(diagnostics["actual"].min(), diagnostics["predicted"].min())
+ max_axis = max(diagnostics["actual"].max(), diagnostics["predicted"].max())
+ scatter.add_trace(
+ go.Scatter(
+ x=[min_axis, max_axis],
+ y=[min_axis, max_axis],
+ mode="lines",
+ line={"dash": "dash", "color": "#666666"},
+ name="Ideal fit",
+ showlegend=False,
+ )
+ )
+ scatter.update_layout(margin=dict(l=20, r=20, t=50, b=20))
+
+ residual_hist = px.histogram(
+ diagnostics,
+ x="residual",
+ nbins=30,
+ title="Residual distribution",
+ labels={"residual": "Actual - Predicted"},
+ color_discrete_sequence=["#2E8B57"],
+ )
+ residual_hist.add_vline(x=0, line_dash="dash", line_color="#666666")
+ residual_hist.update_layout(margin=dict(l=20, r=20, t=50, b=20))
+
+ ###################################
+ #------block rendering logic------#
+ ###################################
+
+ summary = pn.pane.Markdown(
+ "**Quick diagnostics:** "
+ f"MAE = {diagnostics['abs_error'].mean():.2f}, "
+ f"mean residual = {diagnostics['residual'].mean():.2f}, "
+ f"max absolute error = {diagnostics['abs_error'].max():.2f}"
+ )
+
+ # Avoid using stretch_both here: it can cause rendering issues.
+ scatter_pane = pn.pane.Plotly(
+ scatter,
+ config={"displayModeBar": False, "responsive": True},
+ sizing_mode="stretch_width",
+ height=360,
+ )
+ residual_pane = pn.pane.Plotly(
+ residual_hist,
+ config={"displayModeBar": False, "responsive": True},
+ sizing_mode="stretch_width",
+ height=360,
+ )
+ charts = pn.Row(scatter_pane, residual_pane, sizing_mode="stretch_width")
+
+ # Return a title, and a list of Panel objects to be rendered in the report.
+ return title, [summary, charts]
if __name__ == "__main__":
house_df, house_dict = data_loading("house_prices")
y_df = house_df["SalePrice"]
- X_df = house_df[house_df.columns.difference(["SalePrice"])]
+ X_df = house_df[house_df.columns.difference(["SalePrice"])].copy()
+
+ # Ensure non-numeric columns are treated as categorical before encoding.
+ for col in X_df.columns:
+ if not pd.api.types.is_numeric_dtype(X_df[col]):
+ X_df[col] = X_df[col].astype(object)
categorical_features = [col for col in X_df.columns if X_df[col].dtype == "object"]
@@ -31,6 +157,7 @@
regressor = RandomForestRegressor(n_estimators=50).fit(Xtrain, ytrain)
+ # Keep y_pred as dataframe to match SmartExplainer report expectations.
y_pred = pd.DataFrame(regressor.predict(Xtest), columns=["pred"], index=Xtest.index)
cur_dir = os.path.dirname(os.path.abspath(__file__))
@@ -40,25 +167,21 @@
preprocessing=encoder, # Optional: compile step can use inverse_transform method
features_dict=house_dict,
)
+ # Compile once before report generation.
xpl.compile(x=Xtest, y_pred=y_pred, y_target=ytest)
+ output_file = os.path.join(cur_dir, "output", "report.html")
+ project_info_file = os.path.join(cur_dir, "config", "project_information.yml")
+ custom_report_config_file = os.path.join(cur_dir, "config", "default_report_custom.yml")
+
xpl.generate_report(
- output_file=os.path.join(cur_dir, "output", "report.html"),
- project_info_file=os.path.join(cur_dir, "utils", "project_info.yml"),
+ output_file=output_file,
+ project_info_file=project_info_file,
x_train=Xtrain,
y_train=ytrain,
y_test=ytest,
- title_story="House prices report",
- title_description="""This document is a data science report of the kaggle house prices tutorial project.
- It was generated using the Shapash library.""",
- metrics=[
- {
- "path": "sklearn.metrics.mean_absolute_error",
- "name": "Mean absolute error",
- },
- {
- "path": "sklearn.metrics.mean_squared_error",
- "name": "Mean squared error",
- },
- ],
+ # Load tutorial-specific report layout where custom block types are declared.
+ yaml_path=custom_report_config_file,
+ # Use the custom block class to enable user-defined blocks in the report.
+ block_instance=UserReportBlocks(),
)
diff --git a/tutorial/generate_report/tuto-shapash-report01.ipynb b/tutorial/generate_report/tuto-shapash-report01.ipynb
index 53ae5906..34e64ab6 100644
--- a/tutorial/generate_report/tuto-shapash-report01.ipynb
+++ b/tutorial/generate_report/tuto-shapash-report01.ipynb
@@ -7,34 +7,34 @@
"source": [
"# Shapash Report\n",
"\n",
- "> The Shapash Report feature allows data scientists to deliver to anyone who is interested in their project **a document that freezes different aspects of their work as a basis of an audit report**. This document can be easily shared across teams and does not require anything else than a working internet connexion.\n",
+ "The Shapash Report feature allows data scientists to deliver to anyone interested in their project **a document that freezes different aspects of their work as a basis for an audit report**.\n",
"\n",
- "The shapash `generate_report` method allows to generate a report of your project. \n",
- "The result is a standalone HTML file that does not require any external dependency or server to work. \n",
- "The only requirement for the document to display properly is an active internet connexion. \n",
+ "The shapash `generate_report` method generates an HTML report for your project. \n",
+ "The report is generated as a single HTML file with embedded branding (including the logo), so the file can be moved and opened locally without breaking the logo. \n",
+ "Some interactive resources may still rely on CDN assets depending on your environment, so an internet connection can still be required for full interactivity. \n",
"\n",
- "The report contains the following information :\n",
+ "The report contains the following information:\n",
"1. General information about the project\n",
"2. Description of the dataset used\n",
"3. Documentation about data preparation and feature engineering\n",
- "4. Details about your model used (library, parameters...)\n",
+ "4. Details about your model (library, parameters...)\n",
"5. Exploration of the data with a focus on the difference between train and test sets\n",
"6. Global explainability of the model\n",
"7. Model performance\n",
"\n",
- "> The first three points are generated using a YML file that the user should fill. An example is available [here](https://github.com/MAIF/shapash/blob/master/tutorial/report/utils/project_info.yml).\n",
+ "The first three points are generated using a YAML file that the user should fill. An example is available [here](https://github.com/MAIF/shapash/blob/master/tutorial/report/config/project_information.yml).\n",
"\n",
- "This tutorial presents an example of how one can generate the Shapash Report. \n",
+ "This tutorial presents an example of how to generate the Shapash Report.\n",
"\n",
"Content:\n",
"- Set up an example project\n",
"- Create and fill your project information that will be displayed in the report\n",
"- Generate the base Shapash Report\n",
- "- *Go further*: Generate a custom report\n",
+ "- Go further: generate a custom report with a custom YAML configuration\n",
"\n",
"Data from Kaggle [House Prices](https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data)\n",
"\n",
- "> Note : you may need to download the HTML report locally and open it in your browser otherwise it may not show properly."
+ "> Note: Open the generated HTML file in a browser to view the final report."
]
},
{
@@ -45,6 +45,9 @@
"outputs": [],
"source": [
"import pandas as pd\n",
+ "import panel as pn\n",
+ "import plotly.express as px\n",
+ "import plotly.graph_objects as go\n",
"from category_encoders import OrdinalEncoder\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from sklearn.model_selection import train_test_split"
@@ -144,7 +147,7 @@
"source": [
"import yaml\n",
"\n",
- "with open(r'utils/project_info.yml') as file:\n",
+ "with open(r'config/project_information.yml') as file:\n",
" project_info = yaml.full_load(file)\n",
"\n",
"print(yaml.dump(project_info, sort_keys=False))"
@@ -196,7 +199,8 @@
"metadata": {},
"outputs": [],
"source": [
- "from shapash import SmartExplainer"
+ "from shapash import SmartExplainer\n",
+ "from shapash.report.blocks import ReportBlockMixin, block"
]
},
{
@@ -256,7 +260,7 @@
"source": [
"xpl.generate_report(\n",
" output_file='output/report.html', \n",
- " project_info_file='utils/project_info.yml',\n",
+ " project_info_file='config/project_information.yml',\n",
" x_train=Xtrain,\n",
" y_train=ytrain,\n",
" y_test=ytest,\n",
@@ -281,8 +285,7 @@
"id": "central-karma",
"metadata": {},
"source": [
- "> Note: You might want to specify the jupyter kernel used when generating the report.\n",
- "You should consider using the `kernel_name` parameter to indicate what kernel to use."
+ "> Note: `generate_report` no longer includes the deprecated `kernel_name` parameter."
]
},
{
@@ -298,21 +301,18 @@
"id": "grateful-terror",
"metadata": {},
"source": [
- "Now let's customize our report by adding some new sections.\n",
+ "Now let's customize our report by adding user-defined blocks and a custom YAML layout.\n",
"\n",
- "To do so :\n",
- "- First, **copy the base report notebook** you can find [here](https://github.com/MAIF/shapash/blob/master/shapash/report/base_report.ipynb). This is the notebook that is used to generate the shapash report. It is executed and then converted to an HTML file. Only the output of each cell is kept and the code is deleted.\n",
- "- Then, delete or add cells depending on what you want to change.\n",
- "- Finally, add the parameter `notebook_path=\"path/to/your/custom/report.ipynb\"` in the `generate_report` method.\n",
+ "To do so:\n",
+ "- Start from the default report YAML configuration.\n",
+ "- Add custom block types in YAML (for example, `user_note` or `prediction_diagnostics`).\n",
+ "- Implement matching methods named `block_` in a class inheriting from `ReportBlockMixin`.\n",
+ "- Pass your custom file with `yaml_path=\"path/to/your/custom/report.yml\"`.\n",
+ "- Pass your block class instance with `block_instance=YourCustomBlocks()`.\n",
"\n",
- "> **Tip** : You can use the `working_dir` parameter to easily work inside your custom notebook before using the `generate_report` method. This way you can load the parameters used inside the notebook by papermill. Replace the `dir_path` inside your custom notebook with your own `working_dir` where are saved the different instances used.\n",
+ "For this tutorial, we use `config/default_report_custom.yml` and add a custom diagnostics section rendered by a user block.\n",
"\n",
- "For our simple example, we created [this notebook](https://github.com/MAIF/shapash/blob/master/tutorial/report/utils/custom_report.ipynb). \n",
- "- We removed the multivariate analysis using the `report.display_dataset_analysis(multivariate_analysis=False)` (see notebook utils/custom_report.ipynb for more information)\n",
- "- It includes new sections **Relashionship with target variable** and **Relashionship between training variables** in which we included new simple graphs for this example. \n",
- "- We also added new cells at the end of the **metrics** section.\n",
- "\n",
- "Next, we use this notebook to generate our new custom report :"
+ "Next, we define the custom block class and generate the custom report:"
]
},
{
@@ -322,37 +322,130 @@
"metadata": {},
"outputs": [],
"source": [
+ "class UserReportBlocks(ReportBlockMixin):\n",
+ " @block\n",
+ " def block_user_note(\n",
+ " self,\n",
+ " title: str = \"Analyst note\",\n",
+ " body: str = \"This report includes a custom user cell.\",\n",
+ " ):\n",
+ " return title, [pn.pane.Markdown(body)]\n",
+ "\n",
+ " @block\n",
+ " def block_prediction_diagnostics(\n",
+ " self,\n",
+ " title: str = \"Prediction diagnostics\",\n",
+ " color_feature: str | None = None,\n",
+ " ):\n",
+ " if self.y_test is None or self.y_pred is None:\n",
+ " return title, [pn.pane.Markdown(\"Prediction diagnostics requires both y_test and y_pred.\")]\n",
+ "\n",
+ " diagnostics = pd.DataFrame(\n",
+ " {\n",
+ " \"actual\": pd.Series(self.y_test).reset_index(drop=True),\n",
+ " \"predicted\": pd.Series(self.y_pred).reset_index(drop=True),\n",
+ " }\n",
+ " )\n",
+ " diagnostics[\"residual\"] = diagnostics[\"actual\"] - diagnostics[\"predicted\"]\n",
+ " diagnostics[\"abs_error\"] = diagnostics[\"residual\"].abs()\n",
+ "\n",
+ " if color_feature and self.x_init is not None and color_feature in self.x_init.columns:\n",
+ " diagnostics[color_feature] = pd.Series(self.x_init[color_feature]).reset_index(drop=True)\n",
+ " scatter = px.scatter(\n",
+ " diagnostics,\n",
+ " x=\"actual\",\n",
+ " y=\"predicted\",\n",
+ " color=color_feature,\n",
+ " hover_data=[\"residual\", \"abs_error\"],\n",
+ " title=\"Actual vs predicted\",\n",
+ " labels={\"actual\": \"Actual\", \"predicted\": \"Predicted\"},\n",
+ " )\n",
+ " else:\n",
+ " scatter = px.scatter(\n",
+ " diagnostics,\n",
+ " x=\"actual\",\n",
+ " y=\"predicted\",\n",
+ " color=\"abs_error\",\n",
+ " color_continuous_scale=\"Tealgrn\",\n",
+ " hover_data=[\"residual\", \"abs_error\"],\n",
+ " title=\"Actual vs predicted\",\n",
+ " labels={\n",
+ " \"actual\": \"Actual\",\n",
+ " \"predicted\": \"Predicted\",\n",
+ " \"abs_error\": \"Absolute error\",\n",
+ " },\n",
+ " )\n",
+ "\n",
+ " min_axis = min(diagnostics[\"actual\"].min(), diagnostics[\"predicted\"].min())\n",
+ " max_axis = max(diagnostics[\"actual\"].max(), diagnostics[\"predicted\"].max())\n",
+ " scatter.add_trace(\n",
+ " go.Scatter(\n",
+ " x=[min_axis, max_axis],\n",
+ " y=[min_axis, max_axis],\n",
+ " mode=\"lines\",\n",
+ " line={\"dash\": \"dash\", \"color\": \"#666666\"},\n",
+ " name=\"Ideal fit\",\n",
+ " showlegend=False,\n",
+ " )\n",
+ " )\n",
+ " scatter.update_layout(margin=dict(l=20, r=20, t=50, b=20))\n",
+ "\n",
+ " residual_hist = px.histogram(\n",
+ " diagnostics,\n",
+ " x=\"residual\",\n",
+ " nbins=30,\n",
+ " title=\"Residual distribution\",\n",
+ " labels={\"residual\": \"Actual - Predicted\"},\n",
+ " color_discrete_sequence=[\"#2E8B57\"],\n",
+ " )\n",
+ " residual_hist.add_vline(x=0, line_dash=\"dash\", line_color=\"#666666\")\n",
+ " residual_hist.update_layout(margin=dict(l=20, r=20, t=50, b=20))\n",
+ "\n",
+ " summary = pn.pane.Markdown(\n",
+ " (\n",
+ " \"**Quick diagnostics:** \"\n",
+ " f\"MAE = {diagnostics['abs_error'].mean():.2f}, \"\n",
+ " f\"mean residual = {diagnostics['residual'].mean():.2f}, \"\n",
+ " f\"max absolute error = {diagnostics['abs_error'].max():.2f}\"\n",
+ " )\n",
+ " )\n",
+ "\n",
+ " scatter_pane = pn.pane.Plotly(\n",
+ " scatter,\n",
+ " config={\"displayModeBar\": False, \"responsive\": True},\n",
+ " sizing_mode=\"stretch_width\",\n",
+ " height=360,\n",
+ " )\n",
+ " residual_pane = pn.pane.Plotly(\n",
+ " residual_hist,\n",
+ " config={\"displayModeBar\": False, \"responsive\": True},\n",
+ " sizing_mode=\"stretch_width\",\n",
+ " height=360,\n",
+ " )\n",
+ " charts = pn.Row(scatter_pane, residual_pane, sizing_mode=\"stretch_width\")\n",
+ " return title, [summary, charts]\n",
+ "\n",
"xpl.generate_report(\n",
- " output_file='output/custom_report.html', \n",
- " project_info_file='utils/project_info.yml',\n",
+ " output_file='output/custom_report.html',\n",
+ " project_info_file='config/project_information.yml',\n",
" x_train=Xtrain,\n",
" y_train=ytrain,\n",
" y_test=ytest,\n",
- " title_story=\"House prices report\",\n",
- " title_description=\"\"\"This document is a data science report of the kaggle house prices tutorial project. \n",
- " It was generated using the Shapash library.\"\"\",\n",
- " metrics=[\n",
- " {\n",
- " 'path': 'sklearn.metrics.mean_absolute_error',\n",
- " 'name': 'Mean absolute error', \n",
- " },\n",
- " {\n",
- " 'path': 'sklearn.metrics.mean_squared_error',\n",
- " 'name': 'Mean squared error',\n",
- " }\n",
- " ],\n",
- " working_dir='working',\n",
- " notebook_path=\"utils/custom_report.ipynb\"\n",
+ " yaml_path='config/default_report_custom.yml',\n",
+ " block_instance=UserReportBlocks(),\n",
")"
]
},
{
- "cell_type": "code",
- "execution_count": null,
- "id": "direct-cheese",
+ "cell_type": "markdown",
+ "id": "1f61a585",
"metadata": {},
- "outputs": [],
- "source": []
+ "source": [
+ "The custom report uses block types declared in `config/default_report_custom.yml`.\n",
+ "For each custom `type`, Shapash resolves a method named `block_` from the provided `block_instance`.\n",
+ "\n",
+ "After running the cell above, open `output/custom_report.html` in your browser."
+ ]
}
],
"metadata": {
diff --git a/tutorial/generate_report/utils/custom_report.ipynb b/tutorial/generate_report/utils/custom_report.ipynb
deleted file mode 100644
index 29838ec0..00000000
--- a/tutorial/generate_report/utils/custom_report.ipynb
+++ /dev/null
@@ -1,290 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "threatened-gamma",
- "metadata": {
- "tags": [
- "parameters"
- ]
- },
- "outputs": [],
- "source": [
- "# These parameter are replaced by papermill during execution but can be used to work interactively on your report\n",
- "# You need to use the generate_report once with the parameter working_dir='../working' \n",
- "# to use the following values. This way the objects used below are created in the directory.\n",
- "dir_path = '../working' \n",
- "project_info_file = '../utils/project_info.yml'\n",
- "config = dict(\n",
- " title_story=\"House prices report\",\n",
- " title_description=\"\"\"This document is a data science report of the kaggle house prices tutorial project. \n",
- " It was generated using the Shapash library.\"\"\",\n",
- " metrics=[\n",
- " {\n",
- " 'path': 'sklearn.metrics.mean_absolute_error',\n",
- " 'name': 'Mean absolute error', \n",
- " },\n",
- " {\n",
- " 'path': 'sklearn.metrics.mean_squared_error',\n",
- " 'name': 'Mean squared error',\n",
- " }\n",
- " ]\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "taken-tomorrow",
- "metadata": {},
- "outputs": [],
- "source": [
- "import os\n",
- "import pandas as pd\n",
- "from shapash import SmartExplainer\n",
- "from shapash.report.project_report import ProjectReport\n",
- "from shapash.report.common import load_saved_df\n",
- "\n",
- "xpl = SmartExplainer.load(os.path.join(dir_path, 'smart_explainer.pickle'))\n",
- "\n",
- "x_train = load_saved_df(os.path.join(dir_path, 'x_train.csv'))\n",
- "y_train = load_saved_df(os.path.join(dir_path, 'y_train.csv'))\n",
- "y_test = load_saved_df(os.path.join(dir_path, 'y_test.csv'))\n",
- "\n",
- "report = ProjectReport(\n",
- " explainer=xpl, \n",
- " project_info_file=project_info_file, \n",
- " x_train=x_train, \n",
- " y_train=y_train,\n",
- " y_test=y_test, \n",
- " config=config\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "peaceful-frame",
- "metadata": {},
- "outputs": [],
- "source": [
- "report.display_title_description()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "decreased-philadelphia",
- "metadata": {},
- "outputs": [],
- "source": [
- "report.display_project_information()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "fourth-confusion",
- "metadata": {},
- "source": [
- "## Model information"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "union-person",
- "metadata": {},
- "outputs": [],
- "source": [
- "report.display_model_analysis()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "regional-centre",
- "metadata": {},
- "source": [
- "## Dataset analysis"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "rational-breakfast",
- "metadata": {},
- "outputs": [],
- "source": [
- "report.display_dataset_analysis(multivariate_analysis=False)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "fitted-uncle",
- "metadata": {},
- "source": [
- "### Relashionship with target variable"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "front-employment",
- "metadata": {},
- "outputs": [],
- "source": [
- "import seaborn as sns\n",
- "import matplotlib.pyplot as plt"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "collectible-upgrade",
- "metadata": {},
- "outputs": [],
- "source": [
- "df_train = report.x_train_pre\n",
- "y_train = report.y_train\n",
- "df_train['SalePrice'] = y_train"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "monthly-reply",
- "metadata": {},
- "outputs": [],
- "source": [
- "f, ax = plt.subplots(figsize=(8, 6))\n",
- "fig = sns.boxplot(x='OverallQual', y=\"SalePrice\", data=df_train)\n",
- "fig.axis(ymin=0, ymax=800000)\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "subtle-amazon",
- "metadata": {},
- "source": [
- "### Relashionship between training variables"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "packed-vermont",
- "metadata": {},
- "outputs": [],
- "source": [
- "corr_matrix = df_train.corr()\n",
- "f, ax = plt.subplots(figsize=(16, 12))\n",
- "sns.heatmap(corr_matrix, vmax=.8, square=True, cmap=\"YlGnBu\")\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "romance-division",
- "metadata": {},
- "source": [
- "## Model explainability"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "accessible-favorite",
- "metadata": {},
- "outputs": [],
- "source": [
- "# Note : Plotly graphs may not show correctly in notebook but still work in html output file.\n",
- "report.display_model_explainability()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "unknown-transaction",
- "metadata": {},
- "source": [
- "## Model performance"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ignored-career",
- "metadata": {},
- "outputs": [],
- "source": [
- "report.display_model_performance()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "noble-seafood",
- "metadata": {},
- "source": [
- "**The graph below represents y_pred vs y_test :**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "filled-challenge",
- "metadata": {},
- "outputs": [],
- "source": [
- "y_test = report.y_test\n",
- "y_pred = report.y_pred\n",
- "\n",
- "sns.scatterplot(x=y_test, y=y_pred)\n",
- "plt.xlabel('y_test')\n",
- "plt.ylabel('y_pred')\n",
- "plt.title('y_pred vs y_test')\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "efficient-badge",
- "metadata": {},
- "source": [
- "You can add as many graphs, text, or other cells as you want.\n",
- "\n",
- "The code will not be displayed. Only the markdown and output of the cells will be shown on the generated html file."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "passive-peoples",
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "celltoolbar": "Tags",
- "hide_input": false,
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.11"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}