From fb230feeb6acc8b5da972133750eeb5278d0f7c2 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Sun, 8 Mar 2026 20:00:45 +0530
Subject: [PATCH 01/12] feat(CategoricalImputer): add errors param to handle
 multimodal variables (#904)

---
 docs/whats_new/v_190.rst                      |  1 +
 feature_engine/imputation/categorical.py      | 54 +++++++++++--
 .../test_categorical_imputer.py               | 77 ++++++++++++++++++-
 3 files changed, 122 insertions(+), 10 deletions(-)

diff --git a/docs/whats_new/v_190.rst b/docs/whats_new/v_190.rst
index 3ee3222fb..f1b6e22da 100644
--- a/docs/whats_new/v_190.rst
+++ b/docs/whats_new/v_190.rst
@@ -53,6 +53,7 @@ New transformers
 Enhancements
 ~~~~~~~~~~~~
 
+- Added `errors` parameter to `CategoricalImputer` to handle categorical variables with multiple frequent categories instead of automatically raising a `ValueError`. (`DirekKakkar <https://github.com/DirekKakkar>`_)
 - Our variable handling functions now return empty lists when no variables of the desired type are found. (`Soledad Galli <https://github.com/solegalli>`_)
 
 BUG
diff --git a/feature_engine/imputation/categorical.py b/feature_engine/imputation/categorical.py
index 8c4000a0c..40c0a1276 100644
--- a/feature_engine/imputation/categorical.py
+++ b/feature_engine/imputation/categorical.py
@@ -2,6 +2,7 @@
 # License: BSD 3 clause
 
 from typing import List, Optional, Union
+import warnings
 
 import pandas as pd
 
@@ -88,6 +89,18 @@ class CategoricalImputer(BaseImputer):
         type object or categorical. If True, the imputer will select all variables or
         accept all variables entered by the user, including those cast as numeric.
 
+    errors : str, default='raise'
+        Indicates what to do when the selected imputation_method='frequent'
+        and a variable has more than 1 mode.
+
+        If 'raise', raises a ValueError and stops the fit.
+
+        If 'warn', raises a UserWarning and continues, imputing using the
+        first most frequent category found.
+
+        If 'ignore', continues without warnings, imputing using the first
+        most frequent category found.
+
     Attributes
     ----------
     {imputer_dict_}
@@ -135,6 +148,7 @@ def __init__(
         variables: Union[None, int, str, List[Union[str, int]]] = None,
         return_object: bool = False,
         ignore_format: bool = False,
+        errors: str = "raise",
     ) -> None:
         if imputation_method not in ["missing", "frequent"]:
             raise ValueError(
@@ -144,11 +158,18 @@ def __init__(
         if not isinstance(ignore_format, bool):
             raise ValueError("ignore_format takes only booleans True and False")
 
+        if errors not in ("raise", "warn", "ignore"):
+            raise ValueError(
+                "errors takes only values 'raise', 'warn', or 'ignore'. "
+                f"Got {errors} instead."
+            )
+
         self.imputation_method = imputation_method
         self.fill_value = fill_value
         self.variables = _check_variables_input_value(variables)
         self.return_object = return_object
         self.ignore_format = ignore_format
+        self.errors = errors
 
     def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
         """
@@ -189,9 +210,19 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
 
                 # Some variables may contain more than 1 mode:
                 if len(mode_vals) > 1:
-                    raise ValueError(
-                        f"The variable {var} contains multiple frequent categories."
-                    )
+                    if self.errors == "raise":
+                        raise ValueError(
+                            f"The variable {var} contains multiple frequent categories. "
+                            f"Set errors='warn' or errors='ignore' to allow imputation "
+                            f"using the first most frequent category found."
+                        )
+                    elif self.errors == "warn":
+                        warnings.warn(
+                            f"Variable {var} has multiple frequent categories. "
+                            f"The first category found, {mode_vals[0]}, will be used "
+                            f"for imputation.",
+                            UserWarning,
+                        )
 
                 self.imputer_dict_ = {var: mode_vals[0]}
 
@@ -208,10 +239,19 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
                         varnames_str = ", ".join(varnames)
                     else:
                         varnames_str = varnames[0]
-                    raise ValueError(
-                        f"The variable(s) {varnames_str} contain(s) multiple frequent "
-                        f"categories."
-                    )
+
+                    if self.errors == "raise":
+                        raise ValueError(
+                            f"The variable(s) {varnames_str} contain(s) multiple frequent "
+                            f"categories. Set errors='warn' or errors='ignore' to allow "
+                            f"imputation using the first most frequent category found."
+                        )
+                    elif self.errors == "warn":
+                        warnings.warn(
+                            f"Variable(s) {varnames_str} have multiple frequent categories. "
+                            f"The first category found will be used for imputation.",
+                            UserWarning,
+                        )
 
                 self.imputer_dict_ = mode_vals.iloc[0].to_dict()
 
diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 182e8826b..1e55212d5 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -1,8 +1,19 @@
+import numpy as np
+import pandas as pd
 import pandas as pd
 import pytest
+import warnings
 
 from feature_engine.imputation import CategoricalImputer
 
+# --- Shared fixture: perfectly multimodal variable ---
+@pytest.fixture
+def multimodal_df():
+    return pd.DataFrame({
+        "city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"],
+        "country": ["UK", "UK", "FR", "FR", "DE", "DE"],
+    })
+
 
 def test_impute_with_string_missing_and_automatically_find_variables(df_na):
     # set up transformer
@@ -150,14 +161,22 @@ def test_error_when_imputation_method_not_frequent_or_missing():
 
 
 def test_error_when_variable_contains_multiple_modes(df_na):
-    msg = "The variable Name contains multiple frequent categories."
+    msg = (
+        "The variable Name contains multiple frequent categories. "
+        "Set errors='warn' or errors='ignore' to allow imputation "
+        "using the first most frequent category found."
+    )
     imputer = CategoricalImputer(imputation_method="frequent", variables="Name")
     with pytest.raises(ValueError) as record:
         imputer.fit(df_na)
     # check that error message matches
     assert str(record.value) == msg
 
-    msg = "The variable(s) Name contain(s) multiple frequent categories."
+    msg = (
+        "The variable(s) Name contain(s) multiple frequent categories. "
+        "Set errors='warn' or errors='ignore' to allow imputation "
+        "using the first most frequent category found."
+    )
     imputer = CategoricalImputer(imputation_method="frequent")
     with pytest.raises(ValueError) as record:
         imputer.fit(df_na)
@@ -166,7 +185,11 @@ def test_error_when_variable_contains_multiple_modes(df_na):
 
     df_ = df_na.copy()
     df_["Name_dup"] = df_["Name"]
-    msg = "The variable(s) Name, Name_dup contain(s) multiple frequent categories."
+    msg = (
+        "The variable(s) Name, Name_dup contain(s) multiple frequent categories. "
+        "Set errors='warn' or errors='ignore' to allow imputation "
+        "using the first most frequent category found."
+    )
     imputer = CategoricalImputer(imputation_method="frequent")
     with pytest.raises(ValueError) as record:
         imputer.fit(df_)
@@ -305,3 +328,51 @@ def test_error_when_ignore_format_is_not_boolean(ignore_format):
 
     # check that error message matches
     assert str(record.value) == msg
+
+
+def test_errors_raise_on_multimodal_is_default(multimodal_df):
+    """Default behaviour: raise ValueError on multimodal variable."""
+    imputer = CategoricalImputer(imputation_method="frequent")
+    with pytest.raises(ValueError, match="multiple frequent categories"):
+        imputer.fit(multimodal_df)
+
+
+def test_errors_warn_emits_userwarning(multimodal_df):
+    """errors='warn': UserWarning must be emitted."""
+    imputer = CategoricalImputer(imputation_method="frequent", errors="warn")
+    with pytest.warns(UserWarning, match="multiple frequent categories"):
+        imputer.fit(multimodal_df)
+
+
+def test_errors_warn_uses_first_mode(multimodal_df):
+    """errors='warn': imputer_dict_ should contain the first mode."""
+    imputer = CategoricalImputer(imputation_method="frequent", errors="warn")
+    with pytest.warns(UserWarning):
+        imputer.fit(multimodal_df)
+    expected = multimodal_df["city"].mode()[0]
+    assert imputer.imputer_dict_["city"] == expected
+
+
+def test_errors_ignore_no_warning_raised(multimodal_df):
+    """errors='ignore': no warnings should be emitted."""
+    imputer = CategoricalImputer(imputation_method="frequent", errors="ignore")
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")  # Promote all warnings to errors
+        imputer.fit(multimodal_df)  # Should NOT raise
+    assert imputer.imputer_dict_["city"] == multimodal_df["city"].mode()[0]
+
+
+def test_errors_invalid_value_raises():
+    """Passing an unsupported value for errors should raise ValueError at init."""
+    with pytest.raises(ValueError, match="errors takes only values"):
+        CategoricalImputer(imputation_method="frequent", errors="bad_value")
+
+
+def test_errors_param_ignored_when_imputation_method_is_missing():
+    """errors param has no effect for imputation_method='missing'."""
+    df = pd.DataFrame({"city": ["London", np.nan, "Paris"]})
+    imputer = CategoricalImputer(imputation_method="missing", errors="warn")
+    # Should fit without warnings since there's no mode computation
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        imputer.fit(df)

From 81be3489fb56fc80ab1f8906bc5d12111bb19858 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Sun, 8 Mar 2026 20:41:13 +0530
Subject: [PATCH 02/12] style: fix flake8 line length in CategoricalImputer

---
 feature_engine/imputation/categorical.py      | 28 +++++++++++--------
 .../test_categorical_imputer.py               | 11 +++++---
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/feature_engine/imputation/categorical.py b/feature_engine/imputation/categorical.py
index 40c0a1276..cc1c2e2d2 100644
--- a/feature_engine/imputation/categorical.py
+++ b/feature_engine/imputation/categorical.py
@@ -212,15 +212,16 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
                 if len(mode_vals) > 1:
                     if self.errors == "raise":
                         raise ValueError(
-                            f"The variable {var} contains multiple frequent categories. "
-                            f"Set errors='warn' or errors='ignore' to allow imputation "
-                            f"using the first most frequent category found."
+                            f"The variable {var} contains multiple "
+                            f"frequent categories. Set errors='warn' or "
+                            f"errors='ignore' to allow imputation using "
+                            f"the first most frequent category found."
                         )
                     elif self.errors == "warn":
                         warnings.warn(
-                            f"Variable {var} has multiple frequent categories. "
-                            f"The first category found, {mode_vals[0]}, will be used "
-                            f"for imputation.",
+                            f"Variable {var} has multiple frequent "
+                            f"categories. The first category found, "
+                            f"{mode_vals[0]}, will be used for imputation.",
                             UserWarning,
                         )
 
@@ -242,14 +243,17 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
 
                     if self.errors == "raise":
                         raise ValueError(
-                            f"The variable(s) {varnames_str} contain(s) multiple frequent "
-                            f"categories. Set errors='warn' or errors='ignore' to allow "
-                            f"imputation using the first most frequent category found."
+                            f"The variable(s) {varnames_str} contain(s) "
+                            f"multiple frequent categories. Set "
+                            f"errors='warn' or errors='ignore' to allow "
+                            f"imputation using the first most frequent "
+                            f"category found."
                         )
                     elif self.errors == "warn":
                         warnings.warn(
-                            f"Variable(s) {varnames_str} have multiple frequent categories. "
-                            f"The first category found will be used for imputation.",
+                            f"Variable(s) {varnames_str} have multiple "
+                            f"frequent categories. The first category "
+                            f"found will be used for imputation.",
                             UserWarning,
                         )
 
@@ -301,4 +305,4 @@ def _more_tags(self):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.allow_nan = True
-        return tags
+        return tags
\ No newline at end of file
diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 1e55212d5..c6ea41d89 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -6,13 +6,16 @@
 
 from feature_engine.imputation import CategoricalImputer
 
+
 # --- Shared fixture: perfectly multimodal variable ---
 @pytest.fixture
 def multimodal_df():
-    return pd.DataFrame({
-        "city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"],
-        "country": ["UK", "UK", "FR", "FR", "DE", "DE"],
-    })
+    return pd.DataFrame(
+        {
+            "city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"],
+            "country": ["UK", "UK", "FR", "FR", "DE", "DE"],
+        }
+    )
 
 
 def test_impute_with_string_missing_and_automatically_find_variables(df_na):

From 4fb5b7aa6cd37077cd91a046df8bf921e02e52b6 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Sun, 8 Mar 2026 20:48:01 +0530
Subject: [PATCH 03/12] style: fix import order and duplicate pandas import

---
 feature_engine/imputation/categorical.py      | 32 +++++++------------
 .../test_categorical_imputer.py               |  1 -
 2 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/feature_engine/imputation/categorical.py b/feature_engine/imputation/categorical.py
index cc1c2e2d2..2d1f48e97 100644
--- a/feature_engine/imputation/categorical.py
+++ b/feature_engine/imputation/categorical.py
@@ -1,34 +1,26 @@
 # Authors: Soledad Galli <solegalli@protonmail.com>
 # License: BSD 3 clause
 
-from typing import List, Optional, Union
 import warnings
+from typing import List, Optional, Union
 
 import pandas as pd
 
-from feature_engine._check_init_parameters.check_variables import (
-    _check_variables_input_value,
-)
+from feature_engine._check_init_parameters.check_variables import \
+    _check_variables_input_value
 from feature_engine._docstrings.fit_attributes import (
-    _feature_names_in_docstring,
-    _imputer_dict_docstring,
-    _n_features_in_docstring,
-    _variables_attribute_docstring,
-)
-from feature_engine._docstrings.methods import (
-    _fit_transform_docstring,
-    _transform_imputers_docstring,
-)
+    _feature_names_in_docstring, _imputer_dict_docstring,
+    _n_features_in_docstring, _variables_attribute_docstring)
+from feature_engine._docstrings.methods import (_fit_transform_docstring,
+                                                _transform_imputers_docstring)
 from feature_engine._docstrings.substitute import Substitution
 from feature_engine.dataframe_checks import check_X
 from feature_engine.imputation.base_imputer import BaseImputer
 from feature_engine.tags import _return_tags
-from feature_engine.variable_handling import (
-    check_all_variables,
-    check_categorical_variables,
-    find_all_variables,
-    find_categorical_variables,
-)
+from feature_engine.variable_handling import (check_all_variables,
+                                              check_categorical_variables,
+                                              find_all_variables,
+                                              find_categorical_variables)
 
 
 @Substitution(
@@ -305,4 +297,4 @@ def _more_tags(self):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.allow_nan = True
-        return tags
\ No newline at end of file
+        return tags
diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index c6ea41d89..788a7b924 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -1,6 +1,5 @@
 import numpy as np
 import pandas as pd
-import pandas as pd
 import pytest
 import warnings
 

From 835133f4c12b072f09310d6a17c4f81aaadbc11f Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Sun, 8 Mar 2026 22:49:48 +0530
Subject: [PATCH 04/12] test: add coverage for errors='ignore' branches

---
 .../test_categorical_imputer.py               | 27 ++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 788a7b924..995db0c69 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -1,7 +1,8 @@
+import warnings
+
 import numpy as np
 import pandas as pd
 import pytest
-import warnings
 
 from feature_engine.imputation import CategoricalImputer
 
@@ -378,3 +379,27 @@ def test_errors_param_ignored_when_imputation_method_is_missing():
     with warnings.catch_warnings():
         warnings.simplefilter("error")
         imputer.fit(df)
+
+
+def test_errors_ignore_single_variable():
+    """errors='ignore' on single multimodal variable — silent, uses first mode."""
+    X = pd.DataFrame(
+        {"city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"]}
+    )
+    imputer = CategoricalImputer(imputation_method="frequent", errors="ignore")
+    imputer.fit(X)
+    assert imputer.imputer_dict_["city"] == X["city"].mode()[0]
+
+
+def test_errors_ignore_multiple_variables():
+    """errors='ignore' on multiple multimodal variables — silent, uses first mode."""
+    X = pd.DataFrame(
+        {
+            "city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"],
+            "country": ["UK", "UK", "FR", "FR", "DE", "DE"],
+        }
+    )
+    imputer = CategoricalImputer(imputation_method="frequent", errors="ignore")
+    imputer.fit(X)
+    assert imputer.imputer_dict_["city"] == X["city"].mode()[0]
+    assert imputer.imputer_dict_["country"] == X["country"].mode()[0]
\ No newline at end of file

From 81f31d8af4613b2fbfd2b7ebbdbc6f3fa087c4b7 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Sun, 8 Mar 2026 22:53:33 +0530
Subject: [PATCH 05/12] style: add missing newline at end of test file

---
 tests/test_imputation/test_categorical_imputer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 995db0c69..de4ce0bc4 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -402,4 +402,4 @@ def test_errors_ignore_multiple_variables():
     imputer = CategoricalImputer(imputation_method="frequent", errors="ignore")
     imputer.fit(X)
     assert imputer.imputer_dict_["city"] == X["city"].mode()[0]
-    assert imputer.imputer_dict_["country"] == X["country"].mode()[0]
\ No newline at end of file
+    assert imputer.imputer_dict_["country"] == X["country"].mode()[0]

From 9e3bb5cc8edccd7f3648170a571d04b6ed67f54d Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Sun, 8 Mar 2026 23:16:13 +0530
Subject: [PATCH 06/12] style: fix import order in count_frequency and
 base_encoder

---
 docs/whats_new/v_190.rst                      |   1 +
 fail_detail.txt                               |  77 ++++++++++++++++++
 feature_engine/encoding/base_encoder.py       |  44 ++++++----
 feature_engine/encoding/count_frequency.py    |  32 +++-----
 test_results.txt                              | Bin 0 -> 13284 bytes
 test_results_utf8.txt                         |  21 +++++
 .../test_count_frequency_encoder.py           |  63 +++++++++++++-
 7 files changed, 198 insertions(+), 40 deletions(-)
 create mode 100644 fail_detail.txt
 create mode 100644 test_results.txt
 create mode 100644 test_results_utf8.txt

diff --git a/docs/whats_new/v_190.rst b/docs/whats_new/v_190.rst
index f1b6e22da..7f9ed486a 100644
--- a/docs/whats_new/v_190.rst
+++ b/docs/whats_new/v_190.rst
@@ -54,6 +54,7 @@ Enhancements
 ~~~~~~~~~~~~
 
 - Added `errors` parameter to `CategoricalImputer` to handle categorical variables with multiple frequent categories instead of automatically raising a `ValueError`. (`DirekKakkar <https://github.com/DirekKakkar>`_)
+- Added ``unseen='warn'`` option to `CountFrequencyEncoder`: unseen categories are encoded as ``NaN`` and a ``UserWarning`` is raised listing the unseen categories per variable. (`DirekKakkar <https://github.com/DirekKakkar>`_)
 - Our variable handling functions now return empty lists when no variables of the desired type are found. (`Soledad Galli <https://github.com/solegalli>`_)
 
 BUG
diff --git a/fail_detail.txt b/fail_detail.txt
new file mode 100644
index 000000000..514d0fb79
--- /dev/null
+++ b/fail_detail.txt
@@ -0,0 +1,77 @@
+﻿============================= test session starts =============================
+platform win32 -- Python 3.14.0, pytest-9.0.2, pluggy-1.6.0
+rootdir: F:\feature_engine
+configfile: pyproject.toml
+plugins: anyio-4.12.1, dash-4.0.0, cov-7.0.0, timeout-2.4.0
+collected 1 item
+
+tests\test_encoding\test_count_frequency_encoder.py F
+
+================================== FAILURES ===================================
+______________________ test_unseen_invalid_value_raises _______________________
+
+    def test_unseen_invalid_value_raises():
+        """Invalid unseen value should raise ValueError at init."""
+        with pytest.raises(ValueError, match="unseen takes only values"):
+>           CountFrequencyEncoder(unseen="bad_value")
+
+tests\test_encoding\test_count_frequency_encoder.py:537: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+
+self = <[AttributeError("'CountFrequencyEncoder' object has no attribute 'encoding_method'") raised in repr()] CountFrequencyEncoder object at 0x11445ccaf90>
+encoding_method = 'count', variables = None, missing_values = 'raise'
+ignore_format = False, unseen = 'bad_value'
+
+    def __init__(
+        self,
+        encoding_method: str = "count",
+        variables: Union[None, int, str, List[Union[str, int]]] = None,
+        missing_values: str = "raise",
+        ignore_format: bool = False,
+        unseen: str = "ignore",
+    ) -> None:
+    
+        if encoding_method not in ["count", "frequency"]:
+            raise ValueError(
+                "encoding_method takes only values 'count' and 'frequency'. "
+                f"Got {encoding_method} instead."
+            )
+    
+>       check_parameter_unseen(unseen, ["ignore", "raise", "encode", "warn"])
+
+feature_engine\encoding\count_frequency.py:171: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+
+unseen = 'bad_value', accepted_values = ['ignore', 'raise', 'encode', 'warn']
+
+    def check_parameter_unseen(unseen, accepted_values):
+        if not isinstance(accepted_values, list) or not all(
+            isinstance(item, str) for item in accepted_values
+        ):
+            raise ValueError(
+                "accepted_values should be a list of strings. "
+                f" Got {accepted_values} instead."
+            )
+        if unseen not in accepted_values:
+>           raise ValueError(
+                f"Parameter `unseen` takes only values {', '.join(accepted_values)}."
+                f" Got {unseen} instead."
+            )
+E           ValueError: Parameter `unseen` takes only values ignore, raise, encode, warn. Got bad_value instead.
+
+feature_engine\encoding\_helper_functions.py:10: ValueError
+
+During handling of the above exception, another exception occurred:
+
+    def test_unseen_invalid_value_raises():
+        """Invalid unseen value should raise ValueError at init."""
+>       with pytest.raises(ValueError, match="unseen takes only values"):
+             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+E       AssertionError: Regex pattern did not match.
+E         Expected regex: 'unseen takes only values'
+E         Actual message: 'Parameter `unseen` takes only values ignore, raise, encode, warn. Got bad_value instead.'
+
+tests\test_encoding\test_count_frequency_encoder.py:536: AssertionError
+=========================== short test summary info ===========================
+FAILED tests/test_encoding/test_count_frequency_encoder.py::test_unseen_invalid_value_raises
+============================== 1 failed in 0.28s ==============================
diff --git a/feature_engine/encoding/base_encoder.py b/feature_engine/encoding/base_encoder.py
index b4ae3478f..276bc1e26 100644
--- a/feature_engine/encoding/base_encoder.py
+++ b/feature_engine/encoding/base_encoder.py
@@ -6,27 +6,21 @@
 from sklearn.utils.validation import check_is_fitted
 
 from feature_engine._base_transformers.mixins import GetFeatureNamesOutMixin
-from feature_engine._check_init_parameters.check_variables import (
-    _check_variables_input_value,
-)
+from feature_engine._check_init_parameters.check_variables import \
+    _check_variables_input_value
 from feature_engine._docstrings.init_parameters.all_trasnformers import (
-    _missing_values_docstring,
-    _variables_categorical_docstring,
-)
-from feature_engine._docstrings.init_parameters.encoders import _ignore_format_docstring
+    _missing_values_docstring, _variables_categorical_docstring)
+from feature_engine._docstrings.init_parameters.encoders import \
+    _ignore_format_docstring
 from feature_engine._docstrings.substitute import Substitution
-from feature_engine.dataframe_checks import (
-    _check_optional_contains_na,
-    _check_X_matches_training_df,
-    check_X,
-)
+from feature_engine.dataframe_checks import (_check_optional_contains_na,
+                                             _check_X_matches_training_df,
+                                             check_X)
 from feature_engine.tags import _return_tags
-from feature_engine.variable_handling import (
-    check_all_variables,
-    check_categorical_variables,
-    find_all_variables,
-    find_categorical_variables,
-)
+from feature_engine.variable_handling import (check_all_variables,
+                                              check_categorical_variables,
+                                              find_all_variables,
+                                              find_categorical_variables)
 
 
 @Substitution(
@@ -221,6 +215,18 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
     def _encode(self, X: pd.DataFrame) -> pd.DataFrame:
         # replace categories by the learned parameters
         for feature in self.encoder_dict_.keys():
+            # Detect unseen categories BEFORE mapping so we can name them
+            if self.unseen == "warn":
+                unseen_cats = set(X[feature].dropna().unique()) - set(
+                    self.encoder_dict_[feature].keys()
+                )
+                if unseen_cats:
+                    warnings.warn(
+                        f"Variable {feature!r} contains unseen categories: "
+                        f"{unseen_cats}. These will be encoded as NaN.",
+                        UserWarning,
+                    )
+
             X[feature] = X[feature].map(self.encoder_dict_[feature])
 
             # if original variables are cast as categorical, they will remain
@@ -266,6 +272,8 @@ def _check_nan_values_after_transformation(self, X):
                     "During the encoding, NaN values were introduced in the feature(s) "
                     f"{nan_columns_str}."
                 )
+            # 'warn': per-variable warnings were already issued in _encode before
+            # the mapping, so nothing more to do here.
 
     def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame:
         """Convert the encoded variable back to the original values.
diff --git a/feature_engine/encoding/count_frequency.py b/feature_engine/encoding/count_frequency.py
index ae6507627..854e3ea26 100644
--- a/feature_engine/encoding/count_frequency.py
+++ b/feature_engine/encoding/count_frequency.py
@@ -6,34 +6,26 @@
 import pandas as pd
 
 from feature_engine._docstrings.fit_attributes import (
-    _feature_names_in_docstring,
-    _n_features_in_docstring,
-    _variables_attribute_docstring,
-)
+    _feature_names_in_docstring, _n_features_in_docstring,
+    _variables_attribute_docstring)
 from feature_engine._docstrings.init_parameters.all_trasnformers import (
-    _missing_values_docstring,
-    _variables_categorical_docstring,
-)
+    _missing_values_docstring, _variables_categorical_docstring)
 from feature_engine._docstrings.init_parameters.encoders import (
-    _ignore_format_docstring,
-    _unseen_docstring,
-)
-from feature_engine._docstrings.methods import (
-    _fit_transform_docstring,
-    _inverse_transform_docstring,
-    _transform_encoders_docstring,
-)
+    _ignore_format_docstring, _unseen_docstring)
+from feature_engine._docstrings.methods import (_fit_transform_docstring,
+                                                _inverse_transform_docstring,
+                                                _transform_encoders_docstring)
 from feature_engine._docstrings.substitute import Substitution
 from feature_engine.dataframe_checks import check_X
 from feature_engine.encoding._helper_functions import check_parameter_unseen
-from feature_engine.encoding.base_encoder import (
-    CategoricalInitMixinNA,
-    CategoricalMethodsMixin,
-)
+from feature_engine.encoding.base_encoder import (CategoricalInitMixinNA,
+                                                  CategoricalMethodsMixin)
 
 _unseen_docstring = (
     _unseen_docstring
     + """ If `'encode'`, unseen categories will be encoded as 0 (zero)."""
+    + """ If `'warn'`, unseen categories will be encoded as NaN and a"""
+    + """ UserWarning is raised listing the unseen categories per variable."""
 )
 
 
@@ -166,7 +158,7 @@ def __init__(
                 f"Got {encoding_method} instead."
             )
 
-        check_parameter_unseen(unseen, ["ignore", "raise", "encode"])
+        check_parameter_unseen(unseen, ["ignore", "raise", "encode", "warn"])
         super().__init__(variables, missing_values, ignore_format)
         self.encoding_method = encoding_method
         self.unseen = unseen
diff --git a/test_results.txt b/test_results.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c032dbfd42a24aa8e2dfe7c18d787ab9dcb783ad
GIT binary patch
literal 13284
zcmds;QBNC35XbkqQoq9qsY)v~G{%5Kq&!3ksVem$O4PnU>k!)zH^!!S20}l4+y8He
z<@Vw`u#M?bbh3T7ySF?4*_qkd+3`PrKX=PsnVY(SEA*Y|o4djd-NcRk^VA)<Ty=$>
zmL1`_``UfwPTWX+P2HjT&fT&4JJ&d4*LEAaf2=EWZ`_4ie7qdJ?bcn(ZR-1p`(Cvt
zzSd}c>~?(ly56?Bkvnxqx-QgHOLg16KJ@(3J#+h&{@cFq{mWi$%yr@atPk||L~T#B
z3|wDy=6XL<k1-A1s10xAqI)9x%l5DRsE6tU<fm#ea9w@Iq7&?4B)EO3k|X~;QaPhB
z)`{=+50&)P!@52b@osd*Q83d};omsW-G!bf;sJbXi93A5p}uG+gvV;0sNA^PQ|mL8
zc6>=FovN2(jXDw4Q`ORs&4lr0A{w9*su=&sb7x(xGL^-(@GsN5%<@pJ2kN)*^wU$2
zy~6!)b#EM^7dT1ZaUJESDxHdxhn}N<>KUH)ggt(Dpjw-;XW~v6i59u%YOdBp^%1{g
z<8?iDlf=I2#~PFIPW-#a9tZtVcP73fdjq$pyWD;7wYk5)aK8!l8~4J!)3xhf>1j`&
zOx*ZJbq|jNuRS-gBTdLRA1LxxBjV0ILyYz?L+DIb+BJ}D7Yj%^8hoO89_xcuX`1Ps
z2ZsA5*>1_w-Y>1b?MyexG^_H%LRzi~g*=jaiEhcKc(|xH8&Rj#V^(DR23B9z*8^Ff
zuD;DrJyI=x3jV-e)S{4gQ$LFviM4cDJWaZ|A$d(Jm3UtGYWTJ-eej5K&Dnu`A$-KH
z_w@0m@R)=5#*SjvGulY3V+Y>f#mB~DABtYQyOH-SSu=yrJByMP)V$EZ3-&jLde%>^
z$Nu!5%w+I_&;9-fFM+k5Uuy8XO(Xldt3%S3_NA4{Wj%a$XvipbW$6CW6Fz>7dVVjz
zom?~Tk_$I^Lv6S9fMuY`8n<0q9X^+y=3aK$H)DB;V4$<;s@Y%G$^$Rc*sQ9Lh`nQH
zdn6seh7)b6)9}rEq~UjmG3_p$s6#bcG4*polnZ~=Y||>x<4=vfrK?KA(C-W5RjpnY
z(rS8GjqnYj7@4i|&7wR_8o=7NCw_*v>;1+}MaWXFX{$j=j1jp)dg^g*CD!cK;EZLq
z31?dEUJcG*@0&2D-KS%(uq=hv9*g>SBR#tdE!|42rK?9gkH)6mp|2kE&Dy)2co)BV
z%-3u3cH)Q|J0Z@u;yrHIC7N!tM;yrCv{SKpe8}y@d;NJZRsEql?>L+xRa?&IIc@6{
zGo?=vcJ-ycEi$LqA&;$AozK`wtiLgRv#}f^9JXGu8;avs^F?wxiFf`)FgTMumQGsi
zh8QX9ExSV^a${fn!E1H>4zH6~JHLz~LOuFNrh>(PzqB+LwL8^L;vG2}b#}>TIA6DN
zlzTYa^ToELPJ5mv_R$x^RMoYcv89b?FNUqOoXqXb*wXHl=fPI)gV8wTv5feJoQLwX
ztX?|P#=zoPz;;_i49~?fH^6l&I|7PO3unk-nLUx%NvvzlXv%XJv+Oos?6>N1q}}f}
z;rQfb?A%LPy?+(!7}0oEHzM1XeZ<*JJ>Ims)`ZtAd{yq^IPH8Zz)s?QZWW!`6;8^A
zK8jf8L{<0unR~5VsGr==SGiCvX;*nxL_XB=S%&4C<v72%w;Jo6B&4jp%^ehW7r4so
z%IfN1XG+GWn|QX!c-=2@%JzLU-+ZQ0kIdS|b>+G2B3X4?@^d>r^N=2Ij}`8c9mn-j
zT8@-WB{S%HoaMKxawz7^Id|Vy*4^iBP3O*JCdIjVYwkPGnd@>f_tjhIH*n~BzJ%<;
zmblQ0_`r<OgPoGvoCI8!Q`&Q%)TYd6AO{O9RVk?=Ba3rS=wg3r&F6Nm>CF(m4kaa)
zf!^WWwX4-`O|jqQ_|f&}Nh48i)E;`?TNWsuLUEOGiDr|<YLc-C>R1X&T$%e-7%kIs
zS9pm=F!!|H)i_*rEui6z_W{Tp34G4n`-;@OS1V4%KB)!!A+XrWQra@gW8TBll0Ked
zp7R{6JXx_ns!wvPXJylPgpQIY+!qJI#OIZ0<P^?)laH$9lm;tnB@=)CPL9DqJ(F)i
zE-&}wG57R+qFybV8+v#jDrleAwgP!i7vrLJea#`1KpPYm%AyYUv$3GK?t0CM--VW-
zNm!KaNN&opmRoV4J!oFY$EaqKuWR(6JB8$_PKMx}WrQHdSeTfac_at>W1I}sl)t~b
z6c?Ncdh<f_YW{GLmQusepSPNQ&Ky7M9-B4wGQc@AdF8BM*`;K~d@?x>V}N&8JzzPo
z7swX+6MANxM+@`yQAPh5<Av}2R8)@iWO0u|Ptb2;{NJpBVO-8@;XfVxw}Jm~l<PC4
zM2C>+-22a16k=Y>`bkJr)wc_p`(I}E$67hRhXw76ayQd@pNV5w4syc!eudrirD%xh
W6B2IgAOE-D1erBt(Xs6RGyeh<<Bb&n

literal 0
HcmV?d00001

diff --git a/test_results_utf8.txt b/test_results_utf8.txt
new file mode 100644
index 000000000..2dd401b35
--- /dev/null
+++ b/test_results_utf8.txt
@@ -0,0 +1,21 @@
+﻿.........................................F                               [100%]
+================================== FAILURES ===================================
+______________________ test_unseen_invalid_value_raises _______________________
+tests\test_encoding\test_count_frequency_encoder.py:537: in test_unseen_invalid_value_raises
+    CountFrequencyEncoder(unseen="bad_value")
+feature_engine\encoding\count_frequency.py:171: in __init__
+    check_parameter_unseen(unseen, ["ignore", "raise", "encode", "warn"])
+feature_engine\encoding\_helper_functions.py:10: in check_parameter_unseen
+    raise ValueError(
+E   ValueError: Parameter `unseen` takes only values ignore, raise, encode, warn. Got bad_value instead.
+
+During handling of the above exception, another exception occurred:
+tests\test_encoding\test_count_frequency_encoder.py:536: in test_unseen_invalid_value_raises
+    with pytest.raises(ValueError, match="unseen takes only values"):
+         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+E   AssertionError: Regex pattern did not match.
+E     Expected regex: 'unseen takes only values'
+E     Actual message: 'Parameter `unseen` takes only values ignore, raise, encode, warn. Got bad_value instead.'
+=========================== short test summary info ===========================
+FAILED tests/test_encoding/test_count_frequency_encoder.py::test_unseen_invalid_value_raises
+1 failed, 41 passed in 0.51s
diff --git a/tests/test_encoding/test_count_frequency_encoder.py b/tests/test_encoding/test_count_frequency_encoder.py
index 55e13b1cc..feb4a5a5d 100644
--- a/tests/test_encoding/test_count_frequency_encoder.py
+++ b/tests/test_encoding/test_count_frequency_encoder.py
@@ -237,7 +237,7 @@ def test_no_error_triggered_when_df_contains_unseen_categories_and_unseen_is_enc
         encoder.transform(df_enc_rare)
 
 
-@pytest.mark.parametrize("errors", ["raise", "ignore", "encode"])
+@pytest.mark.parametrize("errors", ["raise", "ignore", "encode", "warn"])
 def test_fit_raises_error_if_df_contains_na(errors, df_enc_na):
     # test case 4: when dataset contains na, fit method
     encoder = CountFrequencyEncoder(unseen=errors)
@@ -251,7 +251,7 @@ def test_fit_raises_error_if_df_contains_na(errors, df_enc_na):
     assert str(record.value) == msg
 
 
-@pytest.mark.parametrize("errors", ["raise", "ignore", "encode"])
+@pytest.mark.parametrize("errors", ["raise", "ignore", "encode", "warn"])
 def test_transform_raises_error_if_df_contains_na(errors, df_enc, df_enc_na):
     # test case 4: when dataset contains na, transform method
     encoder = CountFrequencyEncoder(unseen=errors)
@@ -476,3 +476,62 @@ def test_inverse_transform_raises_non_fitted_error():
     # Test when fit is not called prior to transform.
     with pytest.raises(NotFittedError):
         enc.inverse_transform(df1)
+
+
+# ---------------------------------------------------------------------------
+# Tests for unseen='warn'
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def train_test_dfs_warn():
+    X_train = pd.DataFrame({"color": ["red", "red", "blue", "green", "blue"]})
+    X_test = pd.DataFrame({"color": ["red", "blue", "yellow"]})  # 'yellow' unseen
+    return X_train, X_test
+
+
+def test_unseen_warn_emits_userwarning(train_test_dfs_warn):
+    """unseen='warn': UserWarning emitted for unseen categories."""
+    X_train, X_test = train_test_dfs_warn
+    encoder = CountFrequencyEncoder(encoding_method="count", unseen="warn")
+    encoder.fit(X_train)
+    with pytest.warns(UserWarning, match="unseen categories"):
+        encoder.transform(X_test)
+
+
+def test_unseen_warn_encodes_as_nan(train_test_dfs_warn):
+    """unseen='warn': unseen categories should become NaN."""
+    X_train, X_test = train_test_dfs_warn
+    encoder = CountFrequencyEncoder(encoding_method="count", unseen="warn")
+    encoder.fit(X_train)
+    with pytest.warns(UserWarning):
+        X_tr = encoder.transform(X_test)
+    # 'yellow' is unseen — should be NaN
+    assert pd.isna(X_tr.loc[X_tr.index[2], "color"])
+
+
+def test_unseen_warn_known_categories_encoded_correctly(train_test_dfs_warn):
+    """unseen='warn': known categories still encoded correctly."""
+    X_train, X_test = train_test_dfs_warn
+    encoder = CountFrequencyEncoder(encoding_method="count", unseen="warn")
+    encoder.fit(X_train)
+    with pytest.warns(UserWarning):
+        X_tr = encoder.transform(X_test)
+    # 'red' appears 2 times in training
+    assert X_tr.loc[X_tr.index[0], "color"] == 2
+
+
+def test_unseen_warn_no_warning_when_no_unseen(train_test_dfs_warn):
+    """unseen='warn': no warning if all categories were seen during fit."""
+    X_train, _ = train_test_dfs_warn
+    X_test_seen = pd.DataFrame({"color": ["red", "blue"]})
+    encoder = CountFrequencyEncoder(encoding_method="count", unseen="warn")
+    encoder.fit(X_train)
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")  # Fail if any warning raised
+        encoder.transform(X_test_seen)
+
+
+def test_unseen_invalid_value_raises():
+    """Invalid unseen value should raise ValueError at init."""
+    with pytest.raises(ValueError, match="takes only values"):
+        CountFrequencyEncoder(unseen="bad_value")

From 9e008faa554c119282ccc6217c47da50e5ebecf3 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Sun, 8 Mar 2026 23:32:34 +0530
Subject: [PATCH 07/12] chore: remove accidental test_results.txt file

---
 test_results.txt | Bin 13284 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 test_results.txt

diff --git a/test_results.txt b/test_results.txt
deleted file mode 100644
index c032dbfd42a24aa8e2dfe7c18d787ab9dcb783ad..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 13284
zcmds;QBNC35XbkqQoq9qsY)v~G{%5Kq&!3ksVem$O4PnU>k!)zH^!!S20}l4+y8He
z<@Vw`u#M?bbh3T7ySF?4*_qkd+3`PrKX=PsnVY(SEA*Y|o4djd-NcRk^VA)<Ty=$>
zmL1`_``UfwPTWX+P2HjT&fT&4JJ&d4*LEAaf2=EWZ`_4ie7qdJ?bcn(ZR-1p`(Cvt
zzSd}c>~?(ly56?Bkvnxqx-QgHOLg16KJ@(3J#+h&{@cFq{mWi$%yr@atPk||L~T#B
z3|wDy=6XL<k1-A1s10xAqI)9x%l5DRsE6tU<fm#ea9w@Iq7&?4B)EO3k|X~;QaPhB
z)`{=+50&)P!@52b@osd*Q83d};omsW-G!bf;sJbXi93A5p}uG+gvV;0sNA^PQ|mL8
zc6>=FovN2(jXDw4Q`ORs&4lr0A{w9*su=&sb7x(xGL^-(@GsN5%<@pJ2kN)*^wU$2
zy~6!)b#EM^7dT1ZaUJESDxHdxhn}N<>KUH)ggt(Dpjw-;XW~v6i59u%YOdBp^%1{g
z<8?iDlf=I2#~PFIPW-#a9tZtVcP73fdjq$pyWD;7wYk5)aK8!l8~4J!)3xhf>1j`&
zOx*ZJbq|jNuRS-gBTdLRA1LxxBjV0ILyYz?L+DIb+BJ}D7Yj%^8hoO89_xcuX`1Ps
z2ZsA5*>1_w-Y>1b?MyexG^_H%LRzi~g*=jaiEhcKc(|xH8&Rj#V^(DR23B9z*8^Ff
zuD;DrJyI=x3jV-e)S{4gQ$LFviM4cDJWaZ|A$d(Jm3UtGYWTJ-eej5K&Dnu`A$-KH
z_w@0m@R)=5#*SjvGulY3V+Y>f#mB~DABtYQyOH-SSu=yrJByMP)V$EZ3-&jLde%>^
z$Nu!5%w+I_&;9-fFM+k5Uuy8XO(Xldt3%S3_NA4{Wj%a$XvipbW$6CW6Fz>7dVVjz
zom?~Tk_$I^Lv6S9fMuY`8n<0q9X^+y=3aK$H)DB;V4$<;s@Y%G$^$Rc*sQ9Lh`nQH
zdn6seh7)b6)9}rEq~UjmG3_p$s6#bcG4*polnZ~=Y||>x<4=vfrK?KA(C-W5RjpnY
z(rS8GjqnYj7@4i|&7wR_8o=7NCw_*v>;1+}MaWXFX{$j=j1jp)dg^g*CD!cK;EZLq
z31?dEUJcG*@0&2D-KS%(uq=hv9*g>SBR#tdE!|42rK?9gkH)6mp|2kE&Dy)2co)BV
z%-3u3cH)Q|J0Z@u;yrHIC7N!tM;yrCv{SKpe8}y@d;NJZRsEql?>L+xRa?&IIc@6{
zGo?=vcJ-ycEi$LqA&;$AozK`wtiLgRv#}f^9JXGu8;avs^F?wxiFf`)FgTMumQGsi
zh8QX9ExSV^a${fn!E1H>4zH6~JHLz~LOuFNrh>(PzqB+LwL8^L;vG2}b#}>TIA6DN
zlzTYa^ToELPJ5mv_R$x^RMoYcv89b?FNUqOoXqXb*wXHl=fPI)gV8wTv5feJoQLwX
ztX?|P#=zoPz;;_i49~?fH^6l&I|7PO3unk-nLUx%NvvzlXv%XJv+Oos?6>N1q}}f}
z;rQfb?A%LPy?+(!7}0oEHzM1XeZ<*JJ>Ims)`ZtAd{yq^IPH8Zz)s?QZWW!`6;8^A
zK8jf8L{<0unR~5VsGr==SGiCvX;*nxL_XB=S%&4C<v72%w;Jo6B&4jp%^ehW7r4so
z%IfN1XG+GWn|QX!c-=2@%JzLU-+ZQ0kIdS|b>+G2B3X4?@^d>r^N=2Ij}`8c9mn-j
zT8@-WB{S%HoaMKxawz7^Id|Vy*4^iBP3O*JCdIjVYwkPGnd@>f_tjhIH*n~BzJ%<;
zmblQ0_`r<OgPoGvoCI8!Q`&Q%)TYd6AO{O9RVk?=Ba3rS=wg3r&F6Nm>CF(m4kaa)
zf!^WWwX4-`O|jqQ_|f&}Nh48i)E;`?TNWsuLUEOGiDr|<YLc-C>R1X&T$%e-7%kIs
zS9pm=F!!|H)i_*rEui6z_W{Tp34G4n`-;@OS1V4%KB)!!A+XrWQra@gW8TBll0Ked
zp7R{6JXx_ns!wvPXJylPgpQIY+!qJI#OIZ0<P^?)laH$9lm;tnB@=)CPL9DqJ(F)i
zE-&}wG57R+qFybV8+v#jDrleAwgP!i7vrLJea#`1KpPYm%AyYUv$3GK?t0CM--VW-
zNm!KaNN&opmRoV4J!oFY$EaqKuWR(6JB8$_PKMx}WrQHdSeTfac_at>W1I}sl)t~b
z6c?Ncdh<f_YW{GLmQusepSPNQ&Ky7M9-B4wGQc@AdF8BM*`;K~d@?x>V}N&8JzzPo
z7swX+6MANxM+@`yQAPh5<Av}2R8)@iWO0u|Ptb2;{NJpBVO-8@;XfVxw}Jm~l<PC4
zM2C>+-22a16k=Y>`bkJr)wc_p`(I}E$67hRhXw76ayQd@pNV5w4syc!eudrirD%xh
W6B2IgAOE-D1erBt(Xs6RGyeh<<Bb&n


From 7a30e5a6126a68a386c35384dff7fcbdec090d2f Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Wed, 11 Mar 2026 17:00:32 +0530
Subject: [PATCH 08/12] changes to the test_count_frequency_encoder.py

---
 .../test_count_frequency_encoder.py           | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/tests/test_encoding/test_count_frequency_encoder.py b/tests/test_encoding/test_count_frequency_encoder.py
index feb4a5a5d..62253108a 100644
--- a/tests/test_encoding/test_count_frequency_encoder.py
+++ b/tests/test_encoding/test_count_frequency_encoder.py
@@ -535,3 +535,31 @@ def test_unseen_invalid_value_raises():
     """Invalid unseen value should raise ValueError at init."""
     with pytest.raises(ValueError, match="takes only values"):
         CountFrequencyEncoder(unseen="bad_value")
+
+
+# =============================================================================
+# NEW TESTS — added to fix codecov patch coverage
+# =============================================================================
+
+def test_check_parameter_unseen_raises_when_accepted_values_is_not_a_list():
+    """
+    Covers the first raise ValueError in check_parameter_unseen():
+
+        if not isinstance(accepted_values, list) or not all(
+            isinstance(item, str) for item in accepted_values
+        ):
+            raise ValueError("accepted_values should be a list of strings ...")
+
+    check_parameter_unseen() is an internal helper. CountFrequencyEncoder always
+    calls it with a hardcoded valid list, so the guard is never triggered through
+    normal usage — it must be tested by importing and calling the function directly.
+    """
+    from feature_engine.encoding._helper_functions import check_parameter_unseen
+
+    # accepted_values is not a list at all
+    with pytest.raises(ValueError, match="accepted_values should be a list of strings"):
+        check_parameter_unseen("raise", "raise")
+
+    # accepted_values is a list but contains a non-string element
+    with pytest.raises(ValueError, match="accepted_values should be a list of strings"):
+        check_parameter_unseen("raise", ["raise", "ignore", 42])
\ No newline at end of file

From 74160bc4050e91c8a0ce154261a325affc04d1dd Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Wed, 11 Mar 2026 17:10:07 +0530
Subject: [PATCH 09/12] changes to the test_count_frequency_encoder.py

---
 tests/test_encoding/test_count_frequency_encoder.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_encoding/test_count_frequency_encoder.py b/tests/test_encoding/test_count_frequency_encoder.py
index 62253108a..7d7989fc6 100644
--- a/tests/test_encoding/test_count_frequency_encoder.py
+++ b/tests/test_encoding/test_count_frequency_encoder.py
@@ -6,6 +6,7 @@
 from sklearn.exceptions import NotFittedError
 
 from feature_engine.encoding import CountFrequencyEncoder
+from feature_engine.encoding._helper_functions import check_parameter_unseen
 
 
 # init parameters
@@ -554,8 +555,6 @@ def test_check_parameter_unseen_raises_when_accepted_values_is_not_a_list():
     calls it with a hardcoded valid list, so the guard is never triggered through
     normal usage — it must be tested by importing and calling the function directly.
     """
-    from feature_engine.encoding._helper_functions import check_parameter_unseen
-
     # accepted_values is not a list at all
     with pytest.raises(ValueError, match="accepted_values should be a list of strings"):
         check_parameter_unseen("raise", "raise")

From a9ed2c029b5b4dc8f7f1427ba0e4f6d9cf811845 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 13 Mar 2026 15:34:14 +0530
Subject: [PATCH 10/12] fixing the ci/circleci:test_style

---
 .gitignore                                          | 1 +
 tests/test_encoding/test_count_frequency_encoder.py | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 3ba72acd9..0096d1595 100644
--- a/.gitignore
+++ b/.gitignore
@@ -86,6 +86,7 @@ celerybeat-schedule
 # Environments
 .env
 .venv
+.venv_wsl
 env/
 venv/
 ENV/
diff --git a/tests/test_encoding/test_count_frequency_encoder.py b/tests/test_encoding/test_count_frequency_encoder.py
index 7d7989fc6..dbf237bfd 100644
--- a/tests/test_encoding/test_count_frequency_encoder.py
+++ b/tests/test_encoding/test_count_frequency_encoder.py
@@ -561,4 +561,5 @@ def test_check_parameter_unseen_raises_when_accepted_values_is_not_a_list():
 
     # accepted_values is a list but contains a non-string element
     with pytest.raises(ValueError, match="accepted_values should be a list of strings"):
-        check_parameter_unseen("raise", ["raise", "ignore", 42])
\ No newline at end of file
+        check_parameter_unseen("raise", ["raise", "ignore", 42])
+        
\ No newline at end of file

From 27150285b7222f9b52bf6a1fd82b0bdfe1cbb41e Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 13 Mar 2026 15:39:01 +0530
Subject: [PATCH 11/12] fixing the ci/circleci:test_style

---
 tests/test_encoding/test_count_frequency_encoder.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_encoding/test_count_frequency_encoder.py b/tests/test_encoding/test_count_frequency_encoder.py
index dbf237bfd..c447a1e37 100644
--- a/tests/test_encoding/test_count_frequency_encoder.py
+++ b/tests/test_encoding/test_count_frequency_encoder.py
@@ -562,4 +562,3 @@ def test_check_parameter_unseen_raises_when_accepted_values_is_not_a_list():
     # accepted_values is a list but contains a non-string element
     with pytest.raises(ValueError, match="accepted_values should be a list of strings"):
         check_parameter_unseen("raise", ["raise", "ignore", 42])
-        
\ No newline at end of file

From 0b468f3bd201f5b38d4a9fc5fe2d56f9248546a7 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Mon, 16 Mar 2026 18:59:18 +0530
Subject: [PATCH 12/12] test: cover single-variable warn branch in
 CategoricalImputer and remove accidental test output files

---
 fail_detail.txt                               | 77 -------------------
 test_results_utf8.txt                         | 21 -----
 .../test_categorical_imputer.py               | 13 ++++
 3 files changed, 13 insertions(+), 98 deletions(-)
 delete mode 100644 fail_detail.txt
 delete mode 100644 test_results_utf8.txt

diff --git a/fail_detail.txt b/fail_detail.txt
deleted file mode 100644
index 514d0fb79..000000000
--- a/fail_detail.txt
+++ /dev/null
@@ -1,77 +0,0 @@
-﻿============================= test session starts =============================
-platform win32 -- Python 3.14.0, pytest-9.0.2, pluggy-1.6.0
-rootdir: F:\feature_engine
-configfile: pyproject.toml
-plugins: anyio-4.12.1, dash-4.0.0, cov-7.0.0, timeout-2.4.0
-collected 1 item
-
-tests\test_encoding\test_count_frequency_encoder.py F
-
-================================== FAILURES ===================================
-______________________ test_unseen_invalid_value_raises _______________________
-
-    def test_unseen_invalid_value_raises():
-        """Invalid unseen value should raise ValueError at init."""
-        with pytest.raises(ValueError, match="unseen takes only values"):
->           CountFrequencyEncoder(unseen="bad_value")
-
-tests\test_encoding\test_count_frequency_encoder.py:537: 
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
-
-self = <[AttributeError("'CountFrequencyEncoder' object has no attribute 'encoding_method'") raised in repr()] CountFrequencyEncoder object at 0x11445ccaf90>
-encoding_method = 'count', variables = None, missing_values = 'raise'
-ignore_format = False, unseen = 'bad_value'
-
-    def __init__(
-        self,
-        encoding_method: str = "count",
-        variables: Union[None, int, str, List[Union[str, int]]] = None,
-        missing_values: str = "raise",
-        ignore_format: bool = False,
-        unseen: str = "ignore",
-    ) -> None:
-    
-        if encoding_method not in ["count", "frequency"]:
-            raise ValueError(
-                "encoding_method takes only values 'count' and 'frequency'. "
-                f"Got {encoding_method} instead."
-            )
-    
->       check_parameter_unseen(unseen, ["ignore", "raise", "encode", "warn"])
-
-feature_engine\encoding\count_frequency.py:171: 
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
-
-unseen = 'bad_value', accepted_values = ['ignore', 'raise', 'encode', 'warn']
-
-    def check_parameter_unseen(unseen, accepted_values):
-        if not isinstance(accepted_values, list) or not all(
-            isinstance(item, str) for item in accepted_values
-        ):
-            raise ValueError(
-                "accepted_values should be a list of strings. "
-                f" Got {accepted_values} instead."
-            )
-        if unseen not in accepted_values:
->           raise ValueError(
-                f"Parameter `unseen` takes only values {', '.join(accepted_values)}."
-                f" Got {unseen} instead."
-            )
-E           ValueError: Parameter `unseen` takes only values ignore, raise, encode, warn. Got bad_value instead.
-
-feature_engine\encoding\_helper_functions.py:10: ValueError
-
-During handling of the above exception, another exception occurred:
-
-    def test_unseen_invalid_value_raises():
-        """Invalid unseen value should raise ValueError at init."""
->       with pytest.raises(ValueError, match="unseen takes only values"):
-             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-E       AssertionError: Regex pattern did not match.
-E         Expected regex: 'unseen takes only values'
-E         Actual message: 'Parameter `unseen` takes only values ignore, raise, encode, warn. Got bad_value instead.'
-
-tests\test_encoding\test_count_frequency_encoder.py:536: AssertionError
-=========================== short test summary info ===========================
-FAILED tests/test_encoding/test_count_frequency_encoder.py::test_unseen_invalid_value_raises
-============================== 1 failed in 0.28s ==============================
diff --git a/test_results_utf8.txt b/test_results_utf8.txt
deleted file mode 100644
index 2dd401b35..000000000
--- a/test_results_utf8.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-﻿.........................................F                               [100%]
-================================== FAILURES ===================================
-______________________ test_unseen_invalid_value_raises _______________________
-tests\test_encoding\test_count_frequency_encoder.py:537: in test_unseen_invalid_value_raises
-    CountFrequencyEncoder(unseen="bad_value")
-feature_engine\encoding\count_frequency.py:171: in __init__
-    check_parameter_unseen(unseen, ["ignore", "raise", "encode", "warn"])
-feature_engine\encoding\_helper_functions.py:10: in check_parameter_unseen
-    raise ValueError(
-E   ValueError: Parameter `unseen` takes only values ignore, raise, encode, warn. Got bad_value instead.
-
-During handling of the above exception, another exception occurred:
-tests\test_encoding\test_count_frequency_encoder.py:536: in test_unseen_invalid_value_raises
-    with pytest.raises(ValueError, match="unseen takes only values"):
-         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-E   AssertionError: Regex pattern did not match.
-E     Expected regex: 'unseen takes only values'
-E     Actual message: 'Parameter `unseen` takes only values ignore, raise, encode, warn. Got bad_value instead.'
-=========================== short test summary info ===========================
-FAILED tests/test_encoding/test_count_frequency_encoder.py::test_unseen_invalid_value_raises
-1 failed, 41 passed in 0.51s
diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index de4ce0bc4..1c0640a58 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -403,3 +403,16 @@ def test_errors_ignore_multiple_variables():
     imputer.fit(X)
     assert imputer.imputer_dict_["city"] == X["city"].mode()[0]
     assert imputer.imputer_dict_["country"] == X["country"].mode()[0]
+
+
+def test_errors_warn_single_variable():
+    """errors='warn' on single multimodal variable — warns, uses first mode."""
+    X = pd.DataFrame(
+        {"city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"]}
+    )
+    imputer = CategoricalImputer(
+        imputation_method="frequent", variables=["city"], errors="warn"
+    )
+    with pytest.warns(UserWarning, match="Variable city has multiple frequent"):
+        imputer.fit(X)
+    assert imputer.imputer_dict_["city"] == X["city"].mode()[0]