Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
0933b53
update: add dependency
dalestee May 4, 2026
7fad5e8
update: add import
dalestee May 4, 2026
fe71b68
fix: pandas object
dalestee May 4, 2026
a46d82a
dev: first demo
dalestee May 5, 2026
f3a6576
delete: remove legacy
dalestee May 7, 2026
1d6e1ef
change name
dalestee May 7, 2026
0b5a7b3
re organized code
dalestee May 7, 2026
0338608
create test
dalestee May 7, 2026
f282183
move to correct place
dalestee May 7, 2026
e386e62
ruff mod
dalestee May 7, 2026
ce7feb8
adding docstring
dalestee May 7, 2026
3e5fb8b
using bokeh instead of ploty
dalestee May 7, 2026
607263b
Revert "using bokeh instead of ploty"
dalestee May 7, 2026
9829803
v2 report working
dalestee May 11, 2026
2d85c85
ruff mod
dalestee May 11, 2026
4ebacf2
removing unecessary files and adding blocks
dalestee May 11, 2026
a64ffb8
default report good
dalestee May 11, 2026
1202bcc
ruff mod
dalestee May 11, 2026
41dda07
stable version
dalestee May 13, 2026
0e59c53
refactoring
dalestee May 13, 2026
ab53fab
ruff
dalestee May 13, 2026
7966155
fix test
dalestee May 13, 2026
c16fbd0
update: change logo and update color sheme
dalestee May 19, 2026
65deaa6
add: more detailled documentation in blocks
dalestee May 19, 2026
57bd07d
add: change the notebook for tutorial
dalestee May 19, 2026
3b12461
add: change tutorial
dalestee May 20, 2026
b67c956
delete: remove legacy code
dalestee May 21, 2026
9954265
add: responsivenes
dalestee May 27, 2026
3027e70
delete: removing old files and functions
dalestee May 29, 2026
122e52c
BIG MODS: refactoring and using pane now
dalestee Jun 2, 2026
82b68dc
nav bar
dalestee Jun 3, 2026
3e4f200
ruff mods and docstring
dalestee Jun 3, 2026
f740cb5
del: removing color
dalestee Jun 3, 2026
87b7723
indicator and other additions
dalestee Jun 4, 2026
1531138
ruff
dalestee Jun 4, 2026
bd499b2
update: améliorer lisibilité
dalestee Jun 8, 2026
2cbd9b4
fix: fixing test error missing yaml
dalestee Jun 8, 2026
e14abb0
add: adding logo on the top left
dalestee Jun 9, 2026
8a2074f
format: ruff
dalestee Jun 9, 2026
d1f8c11
verification: check if objects returned by blocks are correct
dalestee Jun 9, 2026
8db4bfc
remove: runtime
dalestee Jun 9, 2026
9598ea4
add: example with more detailed plots
dalestee Jun 10, 2026
7084a4c
add: complete tutorials
dalestee Jun 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
recursive-include shapash/webapp/assets *
recursive-include shapash/report/html *
recursive-include shapash/report/template *

include LICENSE
include README.md
include shapash/report/base_report.ipynb
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ app = xpl.run_app()
[Live Demo Shapash-Monitor](https://shapash-demo.ossbymaif.fr/)

- Step 4: Generate the Shapash Report
> This step allows to generate a standalone html report of your project using the different splits
of your dataset and also the metrics you used:
> This step generates a standalone HTML report from a block-based layout.
You can optionally provide a YAML file to customize report sections and blocks.

```python
xpl.generate_report(
Expand All @@ -208,6 +208,7 @@ xpl.generate_report(
x_train=xtrain,
y_train=ytrain,
y_test=ytest,
yaml_path="path/to/report_config.yml", # Optional: custom block configuration
title_story="House prices report",
title_description="""This document is a data science report of the kaggle house prices tutorial project.
It was generated using the Shapash library.""",
Expand Down
Binary file added docs/assets/images/logos/shapash-fond-clair.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 3 additions & 2 deletions docs/overview.rst
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ The 4 steps to display results:
app = xpl.run_app()

- Step 4: Generate the Shapash Report
> This step allows to generate a standalone html report of your project using the different splits
of your dataset and also the metrics you used:
> This step generates a standalone HTML report from a configurable block-based layout.
> You can provide a YAML configuration file to customize sections and blocks.

.. code:: ipython

Expand All @@ -101,6 +101,7 @@ The 4 steps to display results:
x_train=Xtrain,
y_train=ytrain,
y_test=ytest,
yaml_path='path/to/report_config.yml', # Optional: custom block configuration
title_story="House prices report",
title_description="""This document is a data science report of the kaggle house prices tutorial project.
It was generated using the Shapash library.""",
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ dependencies = [
"numba>=0.60.0",
"numpy>=2.0.0,<2.6.0",
"pandas>=2.2.2,<4.0.0",
"panel>=1.8.10",
"plotly>=5.0.0,<6.0.0",
"scikit-learn>=1.4.2,<1.9.0",
"scipy>=1.13.0",
Expand Down Expand Up @@ -147,8 +148,6 @@ exclude = ["tests/*", "*.ipynb"]
"shapash/plots/plot_scatter_prediction.py" = ["PLW0127", "PLW3301"]
"shapash/report/__init__.py" = ["B904"]
"shapash/report/plots.py" = ["A002"]
"shapash/report/visualisation.py" = ["UP031"]
"shapash/report/project_report.py" = ["S101", "S701"]
"shapash/utils/columntransformer_backend.py" = ["PLW0127"]
"shapash/utils/explanation_metrics.py" = ["S101"]
"shapash/utils/io.py" = ["S301"]
Expand Down
93 changes: 52 additions & 41 deletions shapash/explainer/smart_explainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import logging
import shutil
import tempfile
from pathlib import Path

import numpy as np
import pandas as pd
Expand All @@ -15,7 +16,6 @@
from shapash.backend.shap_backend import get_shap_interaction_values
from shapash.manipulation.select_lines import keep_right_contributions
from shapash.manipulation.summarize import create_grouped_features_values
from shapash.report import check_report_requirements
from shapash.style.style_utils import colors_loading, select_palette
from shapash.utils.check import (
check_additional_data,
Expand Down Expand Up @@ -1660,19 +1660,21 @@ def generate_report(
title_description=None,
metrics=None,
working_dir=None,
notebook_path=None,
kernel_name=None,
yaml_path=None,
max_points=200,
display_interaction_plot=False,
nb_top_interactions=5,
block_instance=None,
):
"""
Generate an interactive HTML report summarizing the model and its explainability.

This method produces a comprehensive HTML report containing visual and textual
insights about the project, dataset, and model performance.
It leverages a predefined or custom Jupyter notebook template to analyze
the model, generate plots, compute metrics, and export the final report.
insights about the project, dataset, and model performance using the
smart_report block-based HTML renderer.

A report configuration is provided through a YAML file. If no YAML file is
specified, a default configuration is generated automatically.

A project information YAML file is required to describe key project details
(e.g., model name, author, date, context).
Expand Down Expand Up @@ -1704,21 +1706,21 @@ def generate_report(
Example:
`metrics=[{'name': 'F1 score', 'path': 'sklearn.metrics.f1_score'}]`
working_dir : str, optional
Directory used to temporarily store generated files (e.g., notebook, outputs).
Directory used to temporarily store generated files (e.g., report config).
If `None`, a temporary directory is automatically created and deleted after report generation.
notebook_path : str, optional
Path to a custom notebook used as a template for generating the report.
If `None`, the default Shapash report notebook is used.
kernel_name : str, optional
Name of the Jupyter kernel to use for report execution.
Useful when multiple kernels are available and the default one is incorrect.
yaml_path : str, optional
Path to a custom YAML configuration file used to generate the report.
If `None`, a default YAML configuration is generated.
max_points : int, optional, default=200
Maximum number of points displayed in contribution plots.
display_interaction_plot : bool, optional, default=False
If True, includes interaction plots in the report.
(Note: this can increase computation time.)
nb_top_interactions : int, optional, default=5
Number of top feature interactions to include in the report.
block_instance : object, optional
Optional custom block object used to resolve block methods during report generation.
It should implement methods named `block_<type>` for YAML block entries.

Returns
-------
Expand All @@ -1734,15 +1736,15 @@ def generate_report(

Notes
-----
- The method internally executes a notebook that generates the report content.
- The method renders the report from block definitions in a YAML configuration.
- Temporary files are automatically cleaned up unless a custom `working_dir` is provided.
- Interaction plots can be disabled to optimize runtime performance.

Example
-------
>>> xpl.generate_report(
... output_file="report.html",
... project_info_file="utils/project_info.yml",
... project_info_file="config/project_information.yml",
... x_train=x_train,
... y_train=y_train,
... y_test=y_test,
Expand All @@ -1756,11 +1758,11 @@ def generate_report(
... nb_top_interactions=5,
... )
"""
check_report_requirements()
from shapash.report.blocks import ReportBlockMixin
from shapash.report.core import generate_report as generate_smart_report

if x_train is not None:
x_train = handle_categorical_missing(x_train)
# Avoid Import Errors with requirements specific to the Shapash Report
from shapash.report.generation import execute_report, export_and_save_report

rm_working_dir = False
if not working_dir:
Expand All @@ -1774,29 +1776,38 @@ def generate_report(
)

try:
execute_report(
working_dir=working_dir,
explainer=self,
project_info_file=project_info_file,
x_train=x_train,
y_train=y_train,
y_test=y_test,
config={
k: v
for k, v in dict(
title_story=title_story,
title_description=title_description,
metrics=metrics,
max_points=max_points,
display_interaction_plot=display_interaction_plot,
nb_top_interactions=nb_top_interactions,
).items()
if v is not None
},
notebook_path=notebook_path,
kernel_name=kernel_name,
)
export_and_save_report(working_dir=working_dir, output_file=output_file)
config = {
"max_points": max_points,
"display_interaction_plot": display_interaction_plot,
"nb_top_interactions": nb_top_interactions,
}

if block_instance is None:
report_runtime = ReportBlockMixin(
explainer=self,
x_train=x_train,
y_train=y_train,
y_test=y_test,
config=config,
)
else:
report_runtime = block_instance
ReportBlockMixin.__init__(
report_runtime,
explainer=self,
x_train=x_train,
y_train=y_train,
y_test=y_test,
config=config,
)

if yaml_path is not None:
config_file = Path(yaml_path)
else:
yaml_path = Path(__file__).resolve().parent.parent / "report" / "default_report.yml"
config_file = yaml_path

generate_smart_report(runtime=report_runtime, config_file=str(config_file), output_file=output_file)

if rm_working_dir:
shutil.rmtree(working_dir)
Expand Down
34 changes: 34 additions & 0 deletions shapash/explainer/smart_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,40 @@ def check_dataset_features(self, x):
x = x[features_order]

assert all(column in self.features_types.keys() for column in x.columns)
for feature in x.columns:
expected_dtype = self.features_types[feature]
if str(x[feature].dtypes) == expected_dtype:
continue

try:
if expected_dtype.startswith("int") or expected_dtype.startswith("uint"):
if not pd.api.types.is_integer_dtype(x[feature].dtypes):
raise ValueError
x[feature] = x[feature].astype(expected_dtype)
elif expected_dtype.startswith("float"):
if not pd.api.types.is_float_dtype(x[feature].dtypes):
raise ValueError
x[feature] = x[feature].astype(expected_dtype)
elif expected_dtype == "bool":
if not pd.api.types.is_bool_dtype(x[feature].dtypes):
raise ValueError
x[feature] = x[feature].astype(expected_dtype)
elif expected_dtype in ["object", "string", "str"]:
if not (
pd.api.types.is_object_dtype(x[feature].dtypes)
or pd.api.types.is_string_dtype(x[feature].dtypes)
):
raise ValueError
if expected_dtype != "str":
x[feature] = x[feature].astype(expected_dtype)
except Exception:
raise ValueError(
"""
Types of features in x doesn't match with the expected one in features_types.
x input must be initial dataset without preprocessing applied.
"""
)

if not all([str(x[feature].dtypes) == self.features_types[feature] for feature in x.columns]):
raise ValueError(
"""
Expand Down
7 changes: 6 additions & 1 deletion shapash/plots/plot_compacity.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
from plotly import graph_objs as go
from plotly.offline import plot
from plotly.subplots import make_subplots
Expand Down Expand Up @@ -70,9 +71,13 @@ def plot_compacity(
fig.update_annotations(font=style_dict["dict_title_compacity"]["font"])

# First plot: number of features required for a given approximation
features_needed_plot = np.asarray(features_needed)
if np.issubdtype(features_needed_plot.dtype, np.integer):
features_needed_plot = features_needed_plot.astype(np.int64)

fig.add_trace(
go.Histogram(
x=features_needed,
x=features_needed_plot,
histnorm="percent",
cumulative={"enabled": True},
name="",
Expand Down
8 changes: 7 additions & 1 deletion shapash/plots/plot_evaluation_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,13 @@ def _prediction_classification_plot(
subtitle = f"Response: <b>{label_value}</b>"

# Plot distribution
violin_x = df_pred["target"].values.flatten()
if np.issubdtype(np.asarray(violin_x).dtype, np.integer):
violin_x = np.asarray(violin_x, dtype=np.int64)

fig.add_trace(
go.Violin(
x=df_pred["target"].values.flatten(),
x=violin_x,
y=df_pred["proba_values"].values.flatten(),
points=False,
legendgroup="M",
Expand Down Expand Up @@ -405,6 +409,8 @@ def _prediction_regression_plot(y_target, y_pred, prediction_error, list_ind, st
y_target = y_target_tmp

y_target_values = y_target.values.flatten()
if np.issubdtype(np.asarray(y_target_values).dtype, np.integer):
y_target_values = np.asarray(y_target_values, dtype=np.int64)

y_pred = y_pred.loc[y_target.index]
prediction_error = np.array(prediction_error.loc[y_target.index])
Expand Down
Loading