glamod · ludwiglierhammer · May 29, 2026 · May 29, 2026 · May 29, 2026
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -23,7 +23,6 @@ repos:
       - id: fix-byte-order-marker
       - id: name-tests-test
         args: [ '--pytest-test-first' ]
-        exclude: ^tests/_duplicates.py$
       - id: no-commit-to-branch
         args: [ '--branch', 'main' ]
       - id: trailing-whitespace

diff --git a/docs/tool-overview-databundle.rst b/docs/tool-overview-databundle.rst
@@ -84,22 +84,4 @@ Now the meteorological data can be maqpped to the Common Data Model (CDM_) using
 
 For more information how the mapping is working, please see :ref:`tool-overview-mapper` and/or :ref:`how-to-register-a-new-data-model-mapping`.
 
-:ref:`dupdetect`
-^^^^^^^^^^^^^^^^
-
-After mapping to the CDM format it is useful to check if the CDM tables contain any duplicates. The duplicate checker included in the ``cdm_reader_mapper`` toolbox is based on python record linkage toolkit RecordLinkage_.
-
-The first step is to call the method function :func:`.DataBundle.duplicate_check`. This function scans the CDM tables for any duplicates.
-
-.. code-block:: console
-
-    db_dup = db.duplicate_check()
-
-Afterwards there are two options how to deal with the detected duplicates:
-
-1. :func:`.DataBundle.flag_duplicates`
-2. :func:`.DataBundle.remove_duplicates`
-
-The first function flags the detected duplicates. For more information about the flags see `CDM code tables for duplicate_status`_ and `CDM code tables for report_quality`_. The second function removes the detected duplicates.
-
 .. include:: hyperlinks.rst
diff --git a/src/cdm_reader_mapper/__init__.py b/src/cdm_reader_mapper/__init__.py
@@ -19,10 +19,6 @@
 from .core.reader import read
 from .core.writer import write
 from .data import test_data
-from .duplicates.duplicates import (
-    DupDetect,
-    duplicate_check,
-)
 from .mdf_reader.reader import read_data, read_mdf
 from .mdf_reader.writer import write_data
 from .metmetpy import (
@@ -35,11 +31,9 @@
 
 __all__ = [
     "DataBundle",
-    "DupDetect",
     "cdm_tables",
     "correct_datetime",
     "correct_pt",
-    "duplicate_check",
     "map_model",
     "read",
     "read_data",

diff --git a/src/cdm_reader_mapper/core/databundle.py b/src/cdm_reader_mapper/core/databundle.py
@@ -17,7 +17,6 @@
     split_by_index,
 )
 from cdm_reader_mapper.common.iterators import ParquetStreamReader, is_valid_iterator
-from cdm_reader_mapper.duplicates.duplicates import DupDetect, duplicate_check
 from cdm_reader_mapper.metmetpy import (
     correct_datetime,
     correct_pt,
@@ -154,7 +153,6 @@ def __init__(
         self._mask: pd.DataFrame | ParquetStreamReader = mask
         self._imodel = imodel
         self._mode = mode
-        self.DupDetect: DupDetect | None = None
 
     def __len__(self) -> int:
         """
@@ -1414,208 +1412,3 @@ def write(
             mode=mode,
             **kwargs,
         )
-
-    def duplicate_check(self, inplace: bool = False, **kwargs: Any) -> DataBundle | None:
-        r"""
-        Duplicate check in :py:attr:`data`.
-
-        Parameters
-        ----------
-        inplace : bool, default: False
-            If True overwrite :py:attr:`data` in :py:class:`~DataBundle`
-            else return a copy of :py:class:`~DataBundle` with :py:attr:`data` as CDM tables.
-        \**kwargs : Any
-            Additional keyword-arguments for duplicate check.
-
-        Returns
-        -------
-        :py:class:`~DataBundle` or None
-            DataBundle containing new :py:class:`~DupDetect` class for further duplicate check methods or None if "inplace=True".
-
-        See Also
-        --------
-        DataBundle.get_duplicates : Get duplicate matches in `data`.
-        DataBundle.flag_duplicates : Flag detected duplicates in `data`.
-        DataBundle.remove_duplicates : Remove detected duplicates in `data`.
-
-        Notes
-        -----
-        Following columns have to be provided:
-
-          * `longitude`
-          * `latitude`
-          * `primary_station_id`
-          * `report_timestamp`
-          * `station_course`
-          * `station_speed`
-
-        This adds a new class :py:class:`~DupDetect` to :py:class:`~DataBundle`.
-        This class is necessary for further duplicate check methods.
-
-        For more information see :py:func:`duplicate_check`
-
-        Examples
-        --------
-        >>> db.duplicate_check()
-        """
-        db_ = self._get_db(inplace)
-        if db_ is None:
-            return None
-        if db_._mode == "tables" and "header" in db_._data:
-            data = db_._data["header"]
-        else:
-            data = db_._data
-        db_.DupDetect = duplicate_check(data, **kwargs)
-        return self._return_db(db_, inplace)
-
-    def flag_duplicates(self, inplace: bool = False, **kwargs: Any) -> DataBundle | None:
-        r"""
-        Flag detected duplicates in :py:attr:`data`.
-
-        Parameters
-        ----------
-        inplace : bool, default: False
-            If True overwrite :py:attr:`data` in :py:class:`~DataBundle`
-            else return a copy of :py:class:`~DataBundle` with :py:attr:`data` containing flagged duplicates.
-        \**kwargs : Any
-            Additional keyword-arguments for flagging duplicates.
-
-        Returns
-        -------
-        :py:class:`~DataBundle` or None
-            DataBundle containing duplicate flags in :py:attr:`data` or None if "inplace=True".
-
-        Raises
-        ------
-        RuntimeError
-            Before flagging duplicates, a duplictate check has to be done, :py:func:`DataBundle.duplicate_check`.
-
-        See Also
-        --------
-        DataBundle.remove_duplicates : Remove detected duplicates in `data`.
-        DataBundle.get_duplicates : Get duplicate matches in `data`.
-        DataBundle.duplicate_check : Duplicate check in `data`.
-
-        Notes
-        -----
-        For more information see :py:func:`DupDetect.flag_duplicates`
-
-        Examples
-        --------
-        Flag duplicates without overwriting :py:attr:`data`.
-
-        >>> flagged_tables = db.flag_duplicates()
-
-        Flag duplicates with overwriting :py:attr:`data`.
-
-        >>> db.flag_duplicates(inplace=True)
-        >>> flagged_tables = db.data
-        """
-        db_ = self._get_db(inplace)
-        if db_ is None:
-            return None
-
-        if db_.DupDetect is None:
-            raise RuntimeError("Before flagging duplicates, a duplictate check has to be done: 'db.duplicate_check()'")
-
-        db_.DupDetect.flag_duplicates(**kwargs)
-
-        if db_._mode == "tables" and "header" in db_._data:
-            db_._data["header"] = db_.DupDetect.result
-        else:
-            db_._data = db_.DupDetect.result
-        return self._return_db(db_, inplace)
-
-    def get_duplicates(self, **kwargs: Any) -> pd.DataFrame:
-        r"""
-        Get duplicate matches in :py:attr:`data`.
-
-        Parameters
-        ----------
-        \**kwargs : Any
-            Additional keyword-arguments used for getting duplicates.
-
-        Returns
-        -------
-        pd.DataFrame
-            DataFrame containing duplicate matches.
-
-        Raises
-        ------
-        RuntimeError
-            Before getting duplicates, a duplictate check has to be done, :py:func:`DataBundle.duplicate_check`.
-
-        See Also
-        --------
-        DataBundle.remove_duplicates : Remove detected duplicates in `data`.
-        DataBundle.flag_duplicates : Flag detected duplicates in `data`.
-        DataBundle.duplicate_check : Duplicate check in `data`.
-
-        Notes
-        -----
-        For more information see :py:func:`DupDetect.get_duplicates`
-
-        Examples
-        --------
-        >>> matches = db.get_duplicates()
-        """
-        if self.DupDetect is None:
-            raise RuntimeError("Before getting duplicates, a duplictate check has to be done: 'db.duplicate_check()'")
-        return self.DupDetect.get_duplicates(**kwargs)
-
-    def remove_duplicates(self, inplace: bool = False, **kwargs: Any) -> DataBundle | None:
-        r"""
-        Remove detected duplicates in :py:attr:`data`.
-
-        Parameters
-        ----------
-        inplace : bool, default: False
-            If True overwrite :py:attr:`data` in :py:class:`~DataBundle`
-            else return a copy of :py:class:`~DataBundle` with :py:attr:`data` containing no duplicates.
-        \**kwargs : Any
-            Additional keyword-arguments used to remove duplicates.
-
-        Returns
-        -------
-        :py:class:`~DataBundle` or None
-            DataBundle without duplicated rows or None if "inplace=True".
-
-        Raises
-        ------
-        RuntimeError
-            Before removing duplicates, a duplictate check has to be done, :py:func:`DataBundle.duplicate_check`.
-
-        See Also
-        --------
-        DataBundle.flag_duplicates : Flag detected duplicates in `data`.
-        DataBundle.get_duplicates : Get duplicate matches in `data`.
-        DataBundle.duplicate_check : Duplicate check in `data`.
-
-        Notes
-        -----
-        For more information see :py:func:`DupDetect.remove_duplicates`
-
-        Examples
-        --------
-        Remove duplicates without overwriting :py:attr:`data`.
-
-        >>> removed_tables = db.remove_duplicates()
-
-        Remove duplicates with overwriting :py:attr:`data`.
-
-        >>> db.remove_duplicates(inplace=True)
-        >>> removed_tables = db.data
-        """
-        db_ = self._get_db(inplace)
-        if db_ is None:
-            return None
-
-        if db_.DupDetect is None:
-            raise RuntimeError("Before removing duplicates, a duplictate check has to be done: 'db.duplicate_check()'")
-
-        db_.DupDetect.remove_duplicates(**kwargs)
-        header_ = db_.DupDetect.result
-        if not isinstance(db_._data, pd.DataFrame):
-            raise TypeError("data has unsupported type: {type(db_._data)}.")
-        db_._data = db_._data[db_._data.index.isin(header_.index)]
-        return self._return_db(db_, inplace)
diff --git a/src/cdm_reader_mapper/duplicates/__init__.py b/src/cdm_reader_mapper/duplicates/__init__.py
diff --git a/src/cdm_reader_mapper/duplicates/_duplicate_settings.py b/src/cdm_reader_mapper/duplicates/_duplicate_settings.py