From 8461f22e15da3fc2d4bf3b7979163c30cbd33567 Mon Sep 17 00:00:00 2001
From: ajpotts <ajpotts@users.noreply.github.com>
Date: Fri, 6 Mar 2026 14:40:48 -0500
Subject: [PATCH 1/2] Closes #5474:  Fix docstring formatting errors in
 arkouda.pandas.dataframe

---
 arkouda/pandas/dataframe.py | 440 +++++++++++++++++++-----------------
 1 file changed, 237 insertions(+), 203 deletions(-)

diff --git a/arkouda/pandas/dataframe.py b/arkouda/pandas/dataframe.py
index 70f23bf2c5c..ba6d5eaefc8 100644
--- a/arkouda/pandas/dataframe.py
+++ b/arkouda/pandas/dataframe.py
@@ -466,43 +466,39 @@ def sample(self, n=None, frac=None, replace=False, weights=None, random_state=No
         """
         Return a random sample from each group.
 
-        You can either specify the number of elements
-        or the fraction of elements to be sampled. random_state can be used for reproducibility
+        You can specify either the number of elements to sample or the fraction
+        of elements to sample. ``random_state`` can be used for reproducibility.
 
         Parameters
         ----------
-        n: int, optional
-            Number of items to return for each group.
-            Cannot be used with frac and must be no larger than
-            the smallest group unless replace is True.
-            Default is one if frac is None.
-
-        frac: float, optional
-            Fraction of items to return. Cannot be used with n.
-
-        replace: bool, default False
-            Allow or disallow sampling of the same row more than once.
-
-        weights: pdarray, optional
-            Default None results in equal probability weighting.
-            If passed a pdarray, then values must have the same length as the underlying DataFrame
-            and will be used as sampling probabilities after normalization within each group.
-            Weights must be non-negative with at least one positive element within each group.
-
-        random_state: int or ak.random.Generator, optional
-            If int, seed for random number generator.
-            If ak.random.Generator, use as given.
+        n : int, optional
+            Number of items to return for each group. Cannot be used with
+            ``frac`` and must be no larger than the smallest group unless
+            ``replace`` is ``True``. Defaults to ``1`` if ``frac`` is ``None``.
+        frac : float, optional
+            Fraction of items to return. Cannot be used with ``n``.
+        replace : bool, default=False
+            Whether to allow sampling of the same row more than once.
+        weights : pdarray, optional
+            If ``None``, all rows are given equal probability. If a ``pdarray`` is
+            provided, it must have the same length as the underlying ``DataFrame``
+            and will be used as sampling probabilities after normalization within
+            each group. Weights must be non-negative, with at least one positive
+            element in each group.
+        random_state : int or ak.random.Generator, optional
+            If an ``int``, it is used as the seed for the random number generator.
+            If an ``ak.random.Generator``, it is used directly.
 
         Returns
         -------
         DataFrame
-            A new DataFrame containing items randomly sampled from each group
+            A new ``DataFrame`` containing items randomly sampled from each group,
             sorted according to the grouped columns.
 
         Examples
         --------
         >>> import arkouda as ak
-        >>> df = ak.DataFrame({"A":[3,1,2,1,2,3],"B":[3,4,5,6,7,8]})
+        >>> df = ak.DataFrame({"A": [3, 1, 2, 1, 2, 3], "B": [3, 4, 5, 6, 7, 8]})
         >>> df
            A  B
         0  3  3
@@ -512,19 +508,33 @@ def sample(self, n=None, frac=None, replace=False, weights=None, random_state=No
         4  2  7
         5  3  8 (6 rows x 2 columns)
 
+        Sample one row per group:
+
         >>> df.groupby("A").sample(random_state=6)
            A  B
         3  1  6
         4  2  7
         5  3  8 (3 rows x 2 columns)
 
-        >>> df.groupby("A").sample(frac=0.5, random_state=3, weights=ak.array([1,1,1,0,0,0]))
+        Sample a fraction of each group with weights:
+
+        >>> df.groupby("A").sample(
+        ...     frac=0.5,
+        ...     random_state=3,
+        ...     weights=ak.array([1, 1, 1, 0, 0, 0]),
+        ... )
            A  B
         1  1  4
         2  2  5
         0  3  3 (3 rows x 2 columns)
 
-        >>> df.groupby("A").sample(n=3, replace=True, random_state=ak.random.default_rng(7))
+        Sample with replacement:
+
+        >>> df.groupby("A").sample(
+        ...     n=3,
+        ...     replace=True,
+        ...     random_state=ak.random.default_rng(7),
+        ... )
            A  B
         1  1  4
         3  1  6
@@ -535,7 +545,6 @@ def sample(self, n=None, frac=None, replace=False, weights=None, random_state=No
         0  3  3
         5  3  8
         5  3  8 (9 rows x 2 columns)
-
         """
         return self.df[
             self.gb.sample(
@@ -825,50 +834,49 @@ def and_(self):
 
 class DataFrame(UserDict):
     """
-    A DataFrame structure based on arkouda arrays.
+    A DataFrame structure based on Arkouda arrays.
 
     Parameters
     ----------
-    initialdata : List or dictionary of lists, tuples, or pdarrays
-        Each list/dictionary entry corresponds to one column of the data and
-        should be a homogenous type. Different columns may have different
+    initialdata : list or dict of lists, tuples, or pdarrays
+        Each list or dictionary entry corresponds to one column of data and
+        should be a homogeneous type. Different columns may have different
         types. If using a dictionary, keys should be strings.
-
     index : Index, pdarray, or Strings
         Index for the resulting frame. Defaults to an integer range.
-
-    columns : List, tuple, pdarray, or Strings
+    columns : list, tuple, pdarray, or Strings
         Column labels to use if the data does not include them. Elements must
-        be strings. Defaults to an stringified integer range.
+        be strings. Defaults to a stringified integer range.
 
     Examples
     --------
-    >>> import arkouda as ak
+    Create an empty ``DataFrame`` and add a column of data:
 
-    Create an empty DataFrame and add a column of data:
     >>> import arkouda as ak
     >>> df = ak.DataFrame()
-    >>> df['a'] = ak.array([1,2,3])
+    >>> df["a"] = ak.array([1, 2, 3])
     >>> df
        a
     0  1
     1  2
     2  3 (3 rows x 1 columns)
 
-    Create a new DataFrame using a dictionary of data:
+    Create a new ``DataFrame`` using a dictionary of data:
 
-    >>> userName = ak.array(['Alice', 'Bob', 'Alice', 'Carol', 'Bob', 'Alice'])
+    >>> userName = ak.array(["Alice", "Bob", "Alice", "Carol", "Bob", "Alice"])
     >>> userID = ak.array([111, 222, 111, 333, 222, 111])
     >>> item = ak.array([0, 0, 1, 1, 2, 0])
     >>> day = ak.array([5, 5, 6, 5, 6, 6])
     >>> amount = ak.array([0.5, 0.6, 1.1, 1.2, 4.3, 0.6])
-    >>> df = ak.DataFrame({
-    ...     'userName': userName,
-    ...     'userID': userID,
-    ...     'item': item,
-    ...     'day': day,
-    ...     'amount': amount
-    ... })
+    >>> df = ak.DataFrame(
+    ...     {
+    ...         "userName": userName,
+    ...         "userID": userID,
+    ...         "item": item,
+    ...         "day": day,
+    ...         "amount": amount,
+    ...     }
+    ... )
     >>> df
       userName  userID  item  day  amount
     0    Alice     111     0    5     0.5
@@ -879,22 +887,25 @@ class DataFrame(UserDict):
     5    Alice     111     0    6     0.6 (6 rows x 5 columns)
 
     Indexing works slightly differently than with pandas:
+
     >>> df[0]
     {'userName': np.str_('Alice'), 'userID': np.int64(111), 'item': np.int64(0),
     'day': np.int64(5), 'amount': np.float64(0.5)}
-    >>> df['userID']
+    >>> df["userID"]
     array([111 222 111 333 222 111])
-
-    >>> df['userName']
+    >>> df["userName"]
     array(['Alice', 'Bob', 'Alice', 'Carol', 'Bob', 'Alice'])
 
-    >>> df[ak.array([1,3,5])]
+    Select rows by integer array:
+
+    >>> df[ak.array([1, 3, 5])]
       userName  userID  item  day  amount
     1      Bob     222     0    5     0.6
     3    Carol     333     1    5     1.2
     5    Alice     111     0    6     0.6 (3 rows x 5 columns)
 
-    Compute the stride:
+    Slice rows:
+
     >>> df[1:5:1]
       userName  userID  item  day  amount
     1      Bob     222     0    5     0.6
@@ -902,13 +913,15 @@ class DataFrame(UserDict):
     3    Carol     333     1    5     1.2
     4      Bob     222     2    6     4.3 (4 rows x 5 columns)
 
-    >>> df[ak.array([1,2,3])]
+    >>> df[ak.array([1, 2, 3])]
       userName  userID  item  day  amount
     1      Bob     222     0    5     0.6
     2    Alice     111     1    6     1.1
     3    Carol     333     1    5     1.2 (3 rows x 5 columns)
 
-    >>> df[['userID', 'day']]
+    Select columns by name:
+
+    >>> df[["userID", "day"]]
        userID  day
     0     111    5
     1     222    5
@@ -916,7 +929,6 @@ class DataFrame(UserDict):
     3     333    5
     4     222    6
     5     111    6 (6 rows x 2 columns)
-
     """
 
     objType = "DataFrame"
@@ -1609,44 +1621,47 @@ def drop(
         inplace: bool = False,
     ) -> Union[None, DataFrame]:
         """
-        Drop column/s or row/s from the dataframe.
+        Drop rows or columns from the DataFrame.
 
         Parameters
         ----------
-        keys : str, int or list
-            The labels to be dropped on the given axis.
-        axis : int or str
-            The axis on which to drop from. 0/'index' - drop rows, 1/'columns' - drop columns.
-        inplace: bool, default=False
-            When True, perform the operation on the calling object.
-            When False, return a new object.
+        keys : Union[str, int, List[Union[str, int]]]
+            Label or list of labels to drop along the specified axis.
+        axis : Union[str, int], default=0
+            Axis along which to drop.
+
+            - ``0`` or ``"index"`` — drop rows
+            - ``1`` or ``"columns"`` — drop columns
+        inplace : bool, default=False
+            If ``True``, perform the operation on the calling object.
+            If ``False``, return a new object.
 
         Returns
         -------
-        DataFrame or None
-            DateFrame when `inplace=False`;
-            None when `inplace=True`
+        Union[None, DataFrame]
+            ``DataFrame`` when ``inplace=False``; otherwise ``None``.
 
         Examples
         --------
         >>> import arkouda as ak
-        >>> df = ak.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
+        >>> df = ak.DataFrame({"col1": [1, 2], "col2": [3, 4]})
         >>> df
            col1  col2
         0     1     3
         1     2     4 (2 rows x 2 columns)
 
-        Drop column
-        >>> df.drop('col1', axis = 1)
+        Drop a column:
+
+        >>> df.drop("col1", axis=1)
            col2
         0     3
         1     4 (2 rows x 1 columns)
 
-        Drop row
-        >>> df.drop(0, axis = 0)
+        Drop a row:
+
+        >>> df.drop(0, axis=0)
            col1  col2
         1     2     4 (1 rows x 2 columns)
-
         """
         if isinstance(keys, str) or isinstance(keys, int):
             keys = [keys]
@@ -2181,38 +2196,35 @@ def rename(
 
         Parameters
         ----------
-        mapper : callable or dict-like, Optional
+        mapper : Optional[Union[Callable, Dict]]
             Function or dictionary mapping existing values to new values.
-            Nonexistent names will not raise an error.
-            Uses the value of axis to determine if renaming column or index
-        index : callable or dict-like, Optional
-            Function or dictionary mapping existing index names to
-            new index names. Nonexistent names will not raise an
-            error.
-            When this is set, axis is ignored.
-        column : callable or dict-like, Optional
-            Function or dictionary mapping existing column names to
-            new column names. Nonexistent names will not raise an
-            error.
-            When this is set, axis is ignored.
-        axis: int or str, default=0
-            Indicates which axis to perform the rename.
-            0/"index" - Indexes
-            1/"column" - Columns
-        inplace: bool, default=False
-            When True, perform the operation on the calling object.
-            When False, return a new object.
+            Nonexistent names will not raise an error. The value of ``axis``
+            determines whether the mapping is applied to the index or columns.
+        index : Optional[Union[Callable, Dict]]
+            Function or dictionary mapping existing index names to new index names.
+            Nonexistent names will not raise an error. When this is set, ``axis``
+            is ignored.
+        column : Optional[Union[Callable, Dict]]
+            Function or dictionary mapping existing column names to new column
+            names. Nonexistent names will not raise an error. When this is set,
+            ``axis`` is ignored.
+        axis : Union[str, int], default=0
+            Axis to perform the rename operation on.
+
+            - ``0`` or ``"index"`` — rename index values
+            - ``1`` or ``"column"`` — rename column names
+        inplace : bool, default=False
+            If ``True``, perform the operation on the calling object.
+            If ``False``, return a new object.
 
         Returns
         -------
-        DataFrame or None
-            DateFrame when `inplace=False`;
-            None when `inplace=True`.
+        Optional[DataFrame]
+            ``DataFrame`` when ``inplace=False``; otherwise ``None``.
 
         Examples
         --------
         >>> import arkouda as ak
-
         >>> df = ak.DataFrame({"A": ak.array([1, 2, 3]), "B": ak.array([4, 5, 6])})
         >>> df
            A  B
@@ -2221,26 +2233,28 @@ def rename(
         2  3  6 (3 rows x 2 columns)
 
         Rename columns using a mapping:
-        >>> df.rename(column={'A':'a', 'B':'c'})
+
+        >>> df.rename(column={"A": "a", "B": "c"})
            a  c
         0  1  4
         1  2  5
         2  3  6 (3 rows x 2 columns)
 
         Rename indexes using a mapping:
-        >>> df.rename(index={0:99, 2:11})
+
+        >>> df.rename(index={0: 99, 2: 11})
             A  B
         99  1  4
         1   2  5
         11  3  6 (3 rows x 2 columns)
 
-        Rename using an axis style parameter:
-        >>> df.rename(str.lower, axis='column')
+        Rename using the axis-style parameter:
+
+        >>> df.rename(str.lower, axis="column")
            a  b
         0  1  4
         1  2  5
         2  3  6 (3 rows x 2 columns)
-
         """
         if column is not None and index is not None:
             raise RuntimeError("Only column or index can be renamed, cannot rename both at once")
@@ -2499,7 +2513,7 @@ def tail(self, n=5):
 
     def sample(self, n=5) -> DataFrame:
         """
-        Return a random sample of `n` rows.
+        Return a random sample of ``n`` rows.
 
         Parameters
         ----------
@@ -2509,10 +2523,10 @@ def sample(self, n=5) -> DataFrame:
         Returns
         -------
         DataFrame
-            The sampled `n` rows of the DataFrame.
+            A ``DataFrame`` containing ``n`` randomly sampled rows.
 
-        Example
-        -------
+        Examples
+        --------
         >>> import arkouda as ak
         >>> df = ak.DataFrame({"A": ak.arange(5), "B": -1 * ak.arange(5)})
         >>> df
@@ -2524,12 +2538,12 @@ def sample(self, n=5) -> DataFrame:
         4  4 -4 (5 rows x 2 columns)
 
         Random output of size 3:
+
         >>> df.sample(n=3)  # doctest: +SKIP
            A  B
         4  4 -4
         3  3 -3
         1  1 -1 (3 rows x 2 columns)
-
         """
         self.update_nrows()
         if self._nrows <= n:
@@ -2634,25 +2648,24 @@ def _build_groupby(
 
     def memory_usage(self, index=True, unit="B") -> Series:
         """
-        Return the memory usage of each column in bytes.
+        Return the memory usage of each column.
 
-        The memory usage can optionally include the contribution of
-        the index.
+        The memory usage can optionally include the contribution of the index.
 
         Parameters
         ----------
-        index : bool, default True
-            Specifies whether to include the memory usage of the DataFrame's
-            index in returned Series. If ``index=True``, the memory usage of
-            the index is the first item in the output.
-        unit : str, default = "B"
-            Unit to return. One of {'B', 'KB', 'MB', 'GB'}.
+        index : bool, default=True
+            Whether to include the memory usage of the DataFrame's index in the
+            returned ``Series``. If ``True``, the memory usage of the index appears
+            as the first item in the output.
+        unit : str, default="B"
+            Unit to return. One of ``{"B", "KB", "MB", "GB"}``.
 
         Returns
         -------
         Series
-            A Series whose index is the original column names and whose values
-            is the memory usage of each column in bytes.
+            A ``Series`` whose index contains the original column names and whose
+            values represent the memory usage of each column in the specified unit.
 
         See Also
         --------
@@ -2664,8 +2677,8 @@ def memory_usage(self, index=True, unit="B") -> Series:
         Examples
         --------
         >>> import arkouda as ak
-        >>> dtypes = {"int64":ak.int64, "float64":ak.float64,  "bool":ak.bool_}
-        >>> data = dict([(t, ak.ones(5000, dtype=dtypes[t])) for t in dtypes.keys()])
+        >>> dtypes = {"int64": ak.int64, "float64": ak.float64, "bool": ak.bool_}
+        >>> data = {t: ak.ones(5000, dtype=dtypes[t]) for t in dtypes}
         >>> df = ak.DataFrame(data)
         >>> df.head()
            int64  float64  bool
@@ -2696,9 +2709,9 @@ def memory_usage(self, index=True, unit="B") -> Series:
         dtype: float64
 
         To get the approximate total memory usage:
+
         >>> df.memory_usage(index=True).sum()
         np.int64(125000)
-
         """
         from arkouda.numpy.pdarraycreation import array
         from arkouda.numpy.util import convert_bytes
@@ -2852,30 +2865,31 @@ def to_pandas(self, datalimit=maxTransferBytes, retain_index=False):
 
     def to_markdown(self, mode="wt", index=True, tablefmt="grid", storage_options=None, **kwargs):
         r"""
-        Print DataFrame in Markdown-friendly format.
+        Print the DataFrame in a Markdown-friendly format.
 
         Parameters
         ----------
         mode : str, optional
-            Mode in which file is opened, "wt" by default.
-        index : bool, optional, default True
-            Add index (row) labels.
-        tablefmt: str = "grid"
-            Table format to call from tablulate:
-            https://pypi.org/project/tabulate/
-        storage_options: dict, optional
-            Extra options that make sense for a particular storage connection,
-            e.g. host, port, username, password, etc., if using a URL that will be parsed by fsspec,
-            e.g., starting “s3://”, “gcs://”.
-            An error will be raised if providing this argument with a non-fsspec URL.
-            See the fsspec and backend storage implementation docs for the set
-            of allowed keys and values.
+            Mode in which the file is opened, by default ``"wt"``.
+        index : bool, optional, default=True
+            Whether to include index (row) labels.
+        tablefmt : str, default="grid"
+            Table format passed to ``tabulate``.
+            See https://pypi.org/project/tabulate/ for available formats.
+        storage_options : dict, optional
+            Extra options for a particular storage connection (for example
+            host, port, username, password) when using a URL handled by
+            ``fsspec`` such as ``"s3://"``, ``"gcs://"``. An error will be
+            raised if this argument is provided with a non-fsspec URL.
+            See the fsspec and backend storage implementation documentation
+            for the set of allowed keys and values.
         **kwargs
-            These parameters will be passed to tabulate.
+            Additional keyword arguments passed to ``tabulate``.
 
-        Note
-        ----
-        This function should only be called on small DataFrames as it calls pandas.DataFrame.to_markdown:
+        Notes
+        -----
+        This function should only be used with small DataFrames because it
+        calls ``pandas.DataFrame.to_markdown`` internally:
         https://pandas.pydata.org/pandas-docs/version/1.2.4/reference/api/pandas.DataFrame.to_markdown.html
 
         Examples
@@ -2892,7 +2906,8 @@ def to_markdown(self, mode="wt", index=True, tablefmt="grid", storage_options=No
         +----+------------+------------+
 
         Suppress the index:
-        >>> print(df.to_markdown(index = False))
+
+        >>> print(df.to_markdown(index=False))
         +------------+------------+
         | animal_1   | animal_2   |
         +============+============+
@@ -2900,7 +2915,6 @@ def to_markdown(self, mode="wt", index=True, tablefmt="grid", storage_options=No
         +------------+------------+
         | pig        | quetzal    |
         +------------+------------+
-
         """
         return self.to_pandas().to_markdown(
             mode=mode,
@@ -3352,32 +3366,36 @@ def read_csv(cls, filename: str, col_delim: str = ","):
     @classmethod
     def load(cls, prefix_path, file_format="INFER"):
         """
-        Load dataframe from file.
+        Load a DataFrame from a file.
 
-        file_format needed for consistency with other load functions.
+        The ``file_format`` parameter is included for consistency with other
+        Arkouda ``load`` functions.
 
         Parameters
         ----------
         prefix_path : str
-            The prefix path for the data.
-
-        file_format : string, default = "INFER"
+            The prefix path for the stored data.
+        file_format : str, default="INFER"
+            File format of the stored data. If ``"INFER"``, the format will be
+            inferred automatically.
 
         Returns
         -------
         DataFrame
-            A dataframe loaded from the prefix_path.
+            A ``DataFrame`` loaded from ``prefix_path``.
 
         Examples
         --------
         >>> import arkouda as ak
-
-        To store data in <my_dir>/my_data_LOCALE0000,
-        use "<my_dir>/my_data" as the prefix.
         >>> import os.path
         >>> from pathlib import Path
-        >>> my_path = os.path.join(os.getcwd(), 'hdf5_output','my_data')
+
+        To store data in ``<my_dir>/my_data_LOCALE0000``, use
+        ``"<my_dir>/my_data"`` as the prefix.
+
+        >>> my_path = os.path.join(os.getcwd(), "hdf5_output", "my_data")
         >>> Path(my_path).mkdir(parents=True, exist_ok=True)
+
         >>> df = ak.DataFrame({"A": ak.arange(5), "B": -1 * ak.arange(5)})
         >>> df.to_parquet(my_path + "/my_data")
 
@@ -3388,7 +3406,6 @@ def load(cls, prefix_path, file_format="INFER"):
         2 -2  2
         3 -3  3
         4 -4  4 (5 rows x 2 columns)
-
         """
         from arkouda.pandas.io import (
             _dict_recombine_segarrays_categoricals,
@@ -3865,18 +3882,19 @@ def groupby(self, keys, use_series=True, as_index=True, dropna=True):
     @typechecked
     def isin(self, values: Union[pdarray, Dict, Series, DataFrame]) -> DataFrame:
         """
-        Determine whether each element in the DataFrame is contained in values.
+        Determine whether each element in the DataFrame is contained in ``values``.
 
         Parameters
         ----------
-        values : pdarray, dict, Series, or DataFrame
-            The values to check for in DataFrame. Series can only have a single index.
+        values : Union[pdarray, Dict, Series, DataFrame]
+            The values to check for in the DataFrame. A ``Series`` must have a
+            single index.
 
         Returns
         -------
         DataFrame
-            Arkouda DataFrame of booleans showing whether each element in the DataFrame is
-            contained in values.
+            Arkouda ``DataFrame`` of booleans indicating whether each element in
+            the DataFrame is contained in ``values``.
 
         See Also
         --------
@@ -3884,34 +3902,38 @@ def isin(self, values: Union[pdarray, Dict, Series, DataFrame]) -> DataFrame:
 
         Notes
         -----
-        - Pandas supports values being an iterable type. In arkouda, we replace this with pdarray.
-        - Pandas supports ~ operations. Currently, ak.DataFrame does not support this.
+        - Pandas supports ``values`` being any iterable type. In Arkouda, this is
+          replaced with ``pdarray``.
+        - Pandas supports ``~`` operations. Currently, ``ak.DataFrame`` does not.
 
         Examples
         --------
         >>> import arkouda as ak
-        >>> df = ak.DataFrame({'col_A': ak.array([7, 3]), 'col_B':ak.array([1, 9])})
+        >>> df = ak.DataFrame({"col_A": ak.array([7, 3]), "col_B": ak.array([1, 9])})
         >>> df
            col_A  col_B
         0      7      1
         1      3      9 (2 rows x 2 columns)
 
-        When `values` is a pdarray, check every value in the DataFrame to determine if
-        it exists in values.
+        When ``values`` is a ``pdarray``, every value in the DataFrame is checked
+        to determine whether it exists in ``values``.
+
         >>> df.isin(ak.array([0, 1]))
            col_A  col_B
         0  False   True
         1  False  False (2 rows x 2 columns)
 
-        When `values` is a dict, the values in the dict are passed to check the column
-        indicated by the key.
-        >>> df.isin({'col_A': ak.array([0, 3])})
+        When ``values`` is a ``dict``, the dictionary values are used to check
+        the column indicated by each key.
+
+        >>> df.isin({"col_A": ak.array([0, 3])})
            col_A  col_B
         0  False  False
         1   True  False (2 rows x 2 columns)
 
-        When `values` is a Series, each column is checked if values is present positionally.
-        This means that for `True` to be returned, the indexes must be the same.
+        When ``values`` is a ``Series``, each column is checked positionally.
+        For ``True`` to be returned, the indexes must match.
+
         >>> i = ak.Index(ak.arange(2))
         >>> s = ak.Series(data=[3, 9], index=i)
         >>> df.isin(s)
@@ -3919,14 +3941,14 @@ def isin(self, values: Union[pdarray, Dict, Series, DataFrame]) -> DataFrame:
         0  False  False
         1  False   True (2 rows x 2 columns)
 
-        When `values` is a DataFrame, the index and column must match.
-        Note that 9 is not found because the column name does not match.
-        >>> other_df = ak.DataFrame({'col_A':ak.array([7, 3]), 'col_C':ak.array([0, 9])})
+        When ``values`` is a ``DataFrame``, the index and columns must match.
+        Note that ``9`` is not found because the column name differs.
+
+        >>> other_df = ak.DataFrame({"col_A": ak.array([7, 3]), "col_C": ak.array([0, 9])})
         >>> df.isin(other_df)
            col_A  col_B
         0   True  False
         1   True  False (2 rows x 2 columns)
-
         """
         from arkouda.numpy import cumsum
         from arkouda.numpy.pdarraycreation import array, zeros
@@ -4276,22 +4298,28 @@ def isna(self) -> DataFrame:
         """
         Detect missing values.
 
-        Return a boolean same-sized object indicating if the values are NA.
-        numpy.NaN values get mapped to True values.
-        Everything else gets mapped to False values.
+        Return a boolean object of the same size indicating whether each value
+        is missing. ``numpy.nan`` values are mapped to ``True``. All other values
+        are mapped to ``False``.
 
         Returns
         -------
         DataFrame
-            Mask of bool values for each element in DataFrame
-            that indicates whether an element is an NA value.
+            Boolean mask for each element in the ``DataFrame`` indicating
+            whether the value is NA.
 
         Examples
         --------
         >>> import arkouda as ak
         >>> import numpy as np
-        >>> df = ak.DataFrame({"A": [np.nan, 2, 2, 3], "B": [3, np.nan, 5, 6],
-        ...          "C": [1, np.nan, 2, np.nan], "D":["a","b","c","d"]})
+        >>> df = ak.DataFrame(
+        ...     {
+        ...         "A": [np.nan, 2, 2, 3],
+        ...         "B": [3, np.nan, 5, 6],
+        ...         "C": [1, np.nan, 2, np.nan],
+        ...         "D": ["a", "b", "c", "d"],
+        ...     }
+        ... )
         >>> df
              A    B    C  D
         0  NaN  3.0  1.0  a
@@ -4305,7 +4333,6 @@ def isna(self) -> DataFrame:
         1  False   True   True  False
         2  False  False  False  False
         3  False  False   True  False (4 rows x 4 columns)
-
         """
         from arkouda import full, isnan
         from arkouda.numpy.util import is_numeric
@@ -4986,64 +5013,71 @@ def assign(self, **kwargs) -> DataFrame:
         r"""
         Assign new columns to a DataFrame.
 
-        Return a new object with all original columns in addition to new ones.
-        Existing columns that are re-assigned will be overwritten.
+        Return a new object with all original columns in addition to the
+        newly assigned ones. Existing columns that are reassigned will
+        be overwritten.
 
         Parameters
         ----------
         **kwargs : dict of {str: callable or Series}
-            The column names are keywords. If the values are
-            callable, they are computed on the DataFrame and
-            assigned to the new columns. The callable must not
-            change input DataFrame (though pandas doesn't check it).
-            If the values are not callable, (e.g. a Series, scalar, or array),
-            they are simply assigned.
+            The column names are the keyword arguments. If the values are
+            callable, they are computed on the DataFrame and assigned to
+            the new columns. The callable must not modify the input
+            DataFrame.
+
+            If the values are not callable (for example a ``Series``,
+            scalar, or array), they are directly assigned.
 
         Returns
         -------
         DataFrame
-            A new DataFrame with the new columns in addition to
-            all the existing columns.
+            A new ``DataFrame`` with the new columns added alongside the
+            existing columns.
 
         Notes
         -----
-        Assigning multiple columns within the same ``assign`` is possible.
-        Later items in '\*\*kwargs' may refer to newly created or modified
-        columns in 'df'; items are computed and assigned into 'df' in order.
+        Assigning multiple columns within the same ``assign`` call is
+        supported. Later items in ``**kwargs`` may refer to newly created
+        or modified columns in ``df``. Items are computed and assigned
+        to ``df`` in order.
 
         Examples
         --------
         >>> import arkouda as ak
-        >>> df = ak.DataFrame({'temp_c': [17.0, 25.0]},
-        ...                   index=['Portland', 'Berkeley'])
+        >>> df = ak.DataFrame(
+        ...     {"temp_c": [17.0, 25.0]},
+        ...     index=["Portland", "Berkeley"],
+        ... )
         >>> df
                   temp_c
         Portland    17.0
         Berkeley    25.0 (2 rows x 1 columns)
 
-        Where the value is a callable, evaluated on `df`:
+        When the value is a callable, it is evaluated on ``df``:
+
         >>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32)
                   temp_c  temp_f
         Portland    17.0    62.6
         Berkeley    25.0    77.0 (2 rows x 2 columns)
 
-        Alternatively, the same behavior can be achieved by directly
-        referencing an existing Series or sequence:
+        The same behavior can be achieved by referencing an existing
+        column directly:
 
-        >>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32)
+        >>> df.assign(temp_f=df["temp_c"] * 9 / 5 + 32)
                   temp_c  temp_f
         Portland    17.0    62.6
         Berkeley    25.0    77.0 (2 rows x 2 columns)
 
-        You can create multiple columns within the same assign where one
-        of the columns depends on another one defined within the same assign:
+        Multiple columns can be created in a single call where one depends
+        on another defined within the same ``assign``:
 
-        >>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32,
-        ...           temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9)
+        >>> df.assign(
+        ...     temp_f=lambda x: x["temp_c"] * 9 / 5 + 32,
+        ...     temp_k=lambda x: (x["temp_f"] + 459.67) * 5 / 9,
+        ... )
                   temp_c  temp_f  temp_k
         Portland    17.0    62.6  290.15
         Berkeley    25.0    77.0  298.15 (2 rows x 3 columns)
-
         """
         data = self.copy(deep=None)
 

From be4381aede947e011575646f85a20809857b021f Mon Sep 17 00:00:00 2001
From: ajpotts <ajpotts@users.noreply.github.com>
Date: Fri, 6 Mar 2026 15:01:49 -0500
Subject: [PATCH 2/2] Closes #5476:  Fix docstring formatting errors in
 groupbyclass

---
 arkouda/pandas/groupbyclass.py | 129 ++++++++++++++++++---------------
 1 file changed, 70 insertions(+), 59 deletions(-)

diff --git a/arkouda/pandas/groupbyclass.py b/arkouda/pandas/groupbyclass.py
index 2e3892bf836..0379e98dc58 100644
--- a/arkouda/pandas/groupbyclass.py
+++ b/arkouda/pandas/groupbyclass.py
@@ -1436,31 +1436,31 @@ def nunique(self, values: groupable) -> Tuple[groupable, pdarray]:
         """
         Group another array of values and return the number of unique values in each group.
 
-        Group using the permutation stored in the GroupBy instance.
+        Grouping uses the permutation stored in the ``GroupBy`` instance.
 
         Parameters
         ----------
-        values : pdarray, int64
-            The values to group and find unique values
+        values : groupable
+            Values to group and compute the number of unique values for each group.
 
         Returns
         -------
         Tuple[groupable, pdarray]
             unique_keys : groupable
-                The unique keys, in grouped order
-            group_nunique : groupable
-                Number of unique values per unique key in the GroupBy instance
+                The unique keys in grouped order.
+            group_nunique : pdarray
+                Number of unique values for each key in the ``GroupBy`` instance.
 
         Raises
         ------
         TypeError
-            Raised if the dtype(s) of values array(s) does/do not support
-            the nunique method
+            Raised if the dtype(s) of the ``values`` array(s) do not support
+            the ``nunique`` operation.
         ValueError
-            Raised if the key array size does not match the values size or
-            if the operator is not in the GroupBy.Reductions array
+            Raised if the key array size does not match the ``values`` size or
+            if the operator is not present in ``GroupBy.Reductions``.
         RuntimeError
-            Raised if nunique is not supported for the values dtype
+            Raised if ``nunique`` is not supported for the ``values`` dtype.
 
         Examples
         --------
@@ -1468,20 +1468,24 @@ def nunique(self, values: groupable) -> Tuple[groupable, pdarray]:
         >>> data = ak.array([3, 4, 3, 1, 1, 4, 3, 4, 1, 4])
         >>> data
         array([3 4 3 1 1 4 3 4 1 4])
+
         >>> labels = ak.array([1, 1, 1, 2, 2, 2, 3, 3, 3, 4])
         >>> labels
         array([1 1 1 2 2 2 3 3 3 4])
+
         >>> g = ak.GroupBy(labels)
         >>> g.keys
         array([1 1 1 2 2 2 3 3 3 4])
+
         >>> g.nunique(data)
         (array([1 2 3 4]), array([2 2 3 1]))
 
-        Group (1,1,1) has values [3,4,3] -> there are 2 unique values 3&4
-        Group (2,2,2) has values [1,1,4] -> 2 unique values 1&4
-        Group (3,3,3) has values [3,4,1] -> 3 unique values
-        Group (4) has values [4] -> 1 unique value
+        Group explanations:
 
+        - Group ``(1, 1, 1)`` has values ``[3, 4, 3]`` → 2 unique values (3, 4)
+        - Group ``(2, 2, 2)`` has values ``[1, 1, 4]`` → 2 unique values (1, 4)
+        - Group ``(3, 3, 3)`` has values ``[3, 4, 1]`` → 3 unique values
+        - Group ``(4)`` has values ``[4]`` → 1 unique value
         """
         # TO DO: defer to self.aggregate once logic is ported over to Chapel
         # return self.aggregate(values, "nunique")
@@ -2160,32 +2164,32 @@ def broadcast(
 
         Parameters
         ----------
-        values : pdarray, Strings
-            The values to put in each group's segment
-        permute : bool
-            If True (default), permute broadcast values back to the ordering
-            of the original array on which GroupBy was called. If False, the
-            broadcast values are grouped by value.
+        values : Union[pdarray, Strings]
+            Values to place in each group's segment.
+        permute : bool, default=True
+            If ``True``, permute broadcast values back to the ordering of the
+            original array on which ``GroupBy`` was called. If ``False``, the
+            broadcast values are returned in grouped order.
 
         Returns
         -------
-        pdarray, Strings
-            The broadcasted values
+        Union[pdarray, Strings]
+            The broadcasted values.
 
         Raises
         ------
         TypeError
-            Raised if value is not a pdarray object
+            Raised if ``values`` is not a ``pdarray`` or ``Strings`` object.
         ValueError
-            Raised if the values array does not have one
-            value per segment
+            Raised if the ``values`` array does not contain exactly one value
+            per segment.
 
         Notes
         -----
-        This function is a sparse analog of ``np.broadcast``. If a
-        GroupBy object represents a sparse matrix (tensor), then
-        this function takes a (dense) column vector and replicates
-        each value to the non-zero elements in the corresponding row.
+        This function is a sparse analog of ``np.broadcast``. If a ``GroupBy``
+        object represents a sparse matrix (tensor), this function takes a
+        dense column vector and replicates each value to the non-zero elements
+        in the corresponding row.
 
         Examples
         --------
@@ -2194,25 +2198,27 @@ def broadcast(
         >>> values = ak.array([3, 5])
         >>> g = ak.GroupBy(a)
 
-        By default, result is in original order
+        By default, the result is in the original order:
+
         >>> g.broadcast(values)
         array([3 5 3 5 3])
 
-        With permute=False, result is in grouped order
+        With ``permute=False``, the result is returned in grouped order:
+
         >>> g.broadcast(values, permute=False)
         array([3 3 3 5 5])
+
         >>> a = ak.randint(1, 5, 10, seed=1)
         >>> a
         array([2 4 4 2 1 4 1 2 4 3])
         >>> g = ak.GroupBy(a)
-        >>> keys,counts = g.size()
+        >>> keys, counts = g.size()
         >>> g.broadcast(counts > 2)
         array([True True True True False True False True True False])
         >>> g.broadcast(counts == 3)
         array([True False False True False False False True False False])
         >>> g.broadcast(counts < 4)
         array([True False False True True False True True False True])
-
         """
         from arkouda.core.client import generic_msg
         from arkouda.numpy.pdarraycreation import arange
@@ -2480,52 +2486,57 @@ def broadcast(
 
     Parameters
     ----------
-    segments : pdarray, int64
-        Offsets of the start of each row in the sparse matrix or grouped array.
-        Must be sorted in ascending order.
-    values : pdarray, Strings
-        The values to broadcast, one per row (or group)
-    size : int
-        The total number of nonzeros in the matrix. If permutation is given, this
-        argument is ignored and the size is inferred from the permutation array.
-    permutation : pdarray, int64
-        The permutation to go from the original ordering of nonzeros to the ordering
-        grouped by row. To broadcast values back to the original ordering, this
-        permutation will be inverted. If no permutation is supplied, it is assumed
-        that the original nonzeros were already grouped by row. In this case, the
-        size argument must be given.
+    segments : pdarray
+        Offsets marking the start of each row in the sparse matrix or grouped
+        array. Must be sorted in ascending order.
+    values : Union[pdarray, Strings]
+        Values to broadcast, one per row (or group).
+    size : Union[int, np.int64, np.uint64], default=-1
+        Total number of nonzeros in the matrix. If ``permutation`` is given,
+        this argument is ignored and the size is inferred from the
+        permutation array.
+    permutation : Union[pdarray, None], optional
+        Permutation that maps the original ordering of nonzeros to the
+        ordering grouped by row. To broadcast values back to the original
+        ordering, this permutation will be inverted.
+
+        If no permutation is supplied, it is assumed that the original
+        nonzeros were already grouped by row. In that case, the ``size``
+        argument must be provided.
 
     Returns
     -------
-    pdarray, Strings
-        The broadcast values, one per nonzero
+    Union[pdarray, Strings]
+        Broadcast values, one per nonzero element.
 
     Raises
     ------
     ValueError
-        - If segments and values are different sizes
-        - If segments are empty
-        - If number of nonzeros (either user-specified or inferred from permutation)
-          is less than one
+        - If ``segments`` and ``values`` have different sizes.
+        - If ``segments`` is empty.
+        - If the number of nonzeros (either user-specified or inferred from
+          ``permutation``) is less than one.
 
     Examples
     --------
     >>> import arkouda as ak
-    >>>
-    # Define a sparse matrix with 3 rows and 7 nonzeros
+
+    Define a sparse matrix with 3 rows and 7 nonzeros:
+
     >>> row_starts = ak.array([0, 2, 5])
     >>> nnz = 7
 
-    Broadcast the row number to each nonzero element
+    Broadcast the row number to each nonzero element:
+
     >>> row_number = ak.arange(3)
     >>> ak.broadcast(row_starts, row_number, nnz)
     array([0 0 1 1 1 2 2])
 
-    If the original nonzeros were in reverse order...
+    If the original nonzeros were in reverse order:
+
     >>> permutation = ak.arange(6, -1, -1)
     >>> ak.broadcast(row_starts, row_number, permutation=permutation)
     array([2 2 1 1 1 0 0])
-
     """
     from arkouda.core.client import generic_msg
     from arkouda.numpy.pdarraycreation import arange