larray-project · gdementen · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026
diff --git a/doc/source/changes/version_0_35_1.rst.inc b/doc/source/changes/version_0_35_1.rst.inc
@@ -5,6 +5,24 @@ New features
 
 * added explicit support for Python 3.14.
 
+* the editor is now associated with some file extensions on Windows, which
+  means that double-clicking on files with these extensions in the Windows File
+  Explorer will open them in the editor. The following extensions are
+  associated: .h5, .hdf, .feather, .parquet, .ddb and .duckdb
+  (closes :editor_issue:`302`).
+
+  Known issues
+  ------------
+  * This does not work when multiple users share a single Python environment
+    because the file association is done only for the user which installed the
+    editor.
+  * The reported file type is very long and weird. For example, for .h5 files,
+    it is ".h5 larray-editor.AssocFile.h5 file" instead of just "H5 File". This
+    is harmless though.
+  * We choose not to associate the editor with file formats we support but
+    which likely have a better viewer installed on the computer (.xlsx, .csv,
+    .dta, .sas7bdat, text files, IODE files and zip files)
+
 
 Miscellaneous improvements
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -43,6 +61,11 @@ Fixes
 * avoid warnings when displaying data with any column entirely non-numeric
   (including NaN). Closes :editor_issue:`311`.
 
+* fixed `compare()` not working for object or string arrays (closes
+  :editor_issue:`163`). The background color and "maximum absolute
+  relative difference" label were always wrong and the comparison failed
+  completely when the first array was an object array containing any 0 value.
+
 * fixed the mechanism writing warning/error messages happening during the
   editor initialization. The errors are now correctly written in the user
   TEMP directory / larray-editor-stderr.log
diff --git a/larray_editor/arrayadapter.py b/larray_editor/arrayadapter.py
@@ -1337,11 +1337,7 @@ def get_finite_numeric_values(array: np.ndarray) -> np.ndarray:
         #        format (or plotting?)
         finite_value = np.abs(finite_value)
     elif dtype.type is np.object_:
-        # change non numeric to nan
-        finite_value = np.where(is_number_value_vectorized(finite_value),
-                                finite_value,
-                                np.nan)
-        finite_value = finite_value.astype(np.float64)
+        finite_value = non_numeric_to_nan(finite_value)
     elif np.issubdtype(dtype, np.bool_):
         finite_value = finite_value.astype(np.int8)
     elif not np.issubdtype(dtype, np.number):
@@ -1356,6 +1352,31 @@ def get_finite_numeric_values(array: np.ndarray) -> np.ndarray:
     return np.where(np.isfinite(finite_value), finite_value, np.nan)
 
 
+def non_numeric_to_nan(array: np.ndarray) -> np.ndarray:
+    dtype = array.dtype
+    if np.issubdtype(dtype, np.number):
+        return array
+    elif dtype.type is np.object_:
+        array = np.where(is_number_value_vectorized(array),
+                         array,
+                         np.nan)
+        return array.astype(np.float64)
+    # not numeric nor object => all non-numeric
+    else:
+        return np.full(array.shape, np.nan, dtype=np.float64)
+
+
+def ensure_numeric_array(array: la.Array) -> la.Array:
+    dtype = array.dtype
+    if np.issubdtype(dtype, np.number):
+        return array
+    else:
+        return la.Array(
+            non_numeric_to_nan(array.data),
+            axes=array.axes
+        )
+
+
 # only used in LArray adapter. it should use the same code path as the rest
 # though
 def get_color_value(array, global_vmin, global_vmax, axis=None):

diff --git a/larray_editor/comparator.py b/larray_editor/comparator.py
@@ -6,6 +6,7 @@
 from qtpy.QtWidgets import (QWidget, QVBoxLayout, QListWidget, QSplitter, QHBoxLayout,
                             QLabel, QCheckBox, QLineEdit, QComboBox, QMessageBox)
 
+from larray_editor.arrayadapter import ensure_numeric_array
 from larray_editor.utils import _, print_exception, align_arrays
 from larray_editor.arraywidget import ArrayEditorWidget
 from larray_editor.editor import AbstractEditorWindow
@@ -149,6 +150,9 @@ def _update_from_arrays(self):
         stack_axis = self.stack_axis
         align_method = self.get_align_method()
         try:
+            # this also converts string arrays to object arrays because
+            # align_arrays computes the common dtype of the arrays *and* the
+            # fill_value, which is nan by default
             aligned_arrays = align_arrays(self.arrays,
                                           join=align_method,
                                           fill_value=self.fill_value)
@@ -179,66 +183,66 @@ def _update_from_combined_array(self):
             isclose = eq
         self._diff_below_tolerance = isclose
 
-        try:
-            with np.errstate(divide='ignore', invalid='ignore'):
-                diff = self._combined_array - self._array0
-                reldiff = diff / self._array0
-            # make reldiff 0 where the values are the same than array0 even for
-            # special values (0, nan, inf, -inf)
-            # at this point reldiff can still contain nan and infs
-            reldiff = la.where(eq, 0, reldiff)
-
-            # 1) compute maxabsreldiff for the label
-            #    this should NOT exclude nans or infs
-            relmin = reldiff.min(skipna=False)
-            relmax = reldiff.max(skipna=False)
-            maxabsreldiff = max(abs(relmin), abs(relmax))
-
-            # 2) compute bg_value
-            # replace -inf by min(reldiff), +inf by max(reldiff)
-            reldiff_for_bg = reldiff.copy()
-            isneginf = reldiff == -np.inf
-            isposinf = reldiff == np.inf
-            isinf = isneginf | isposinf
-
-            # given the way reldiff is constructed, it cannot contain only infs
-            # (because inf/inf is nan) it can contain only infs and nans though,
-            # in which case finite_relXXX will be nan, so unless the array
-            # is empty, finite_relXXX should never be inf
-            finite_relmin = np.nanmin(reldiff, where=~isinf, initial=np.inf)
-            finite_relmax = np.nanmax(reldiff, where=~isinf, initial=-np.inf)
-            # special case when reldiff contains only 0 and infs (to avoid
-            # coloring the inf cells white in that case)
-            if finite_relmin == 0 and finite_relmax == 0 and isinf.any():
-                finite_relmin = -1
-                finite_relmax = 1
-            reldiff_for_bg[isneginf] = finite_relmin
-            reldiff_for_bg[isposinf] = finite_relmax
-
-            # make sure that "acceptable" differences show as white
-            reldiff_for_bg = la.where(isclose, 0, reldiff_for_bg)
-
-            # We need a separate version for bg and the label, so that when we
-            # modify atol/rtol, the background color is updated but not the
-            # maxreldiff label
-            maxabsreldiff_for_bg = max(abs(np.nanmin(reldiff_for_bg)),
-                                       abs(np.nanmax(reldiff_for_bg)))
-            if maxabsreldiff_for_bg:
-                # scale reldiff to range 0-1 with 0.5 for reldiff = 0
-                self._bg_value = (reldiff_for_bg / maxabsreldiff_for_bg) / 2 + 0.5
-            # if the only differences are nans on either side
-            elif not isclose.all():
-                # use white (0.5) everywhere except where reldiff is nan, so
-                # that nans are grey
-                self._bg_value = reldiff_for_bg + 0.5
-            else:
-                # do NOT use full_like as we don't want to inherit array dtype
-                self._bg_value = la.full(self._combined_array.axes, 0.5)
-        except TypeError:
-            # str/object array
-            maxabsreldiff = la.nan
+        # we cannot use raw numpy arrays yet because we need the arrays to
+        # broadcast properly to compute diff and reldiff
+        combined_array = ensure_numeric_array(self._combined_array)
+        array0 = ensure_numeric_array(self._array0)
+        with np.errstate(divide='ignore', invalid='ignore'):
+            diff = combined_array - array0
+            reldiff = diff / array0
+
+        # make reldiff 0 where the values are the same than array0 even for
+        # special values (0, nan, inf, -inf)
+        # at this point reldiff can still contain nan and infs
+        reldiff = la.where(eq, 0, reldiff)
+
+        # 1) compute maxabsreldiff for the label
+        #    this should NOT exclude nans or infs
+        relmin = reldiff.min(skipna=False)
+        relmax = reldiff.max(skipna=False)
+        maxabsreldiff = max(abs(relmin), abs(relmax))
+
+        # 2) compute bg_value
+        # replace -inf by min(reldiff), +inf by max(reldiff)
+        reldiff_for_bg = reldiff.copy()
+        isneginf = reldiff == -np.inf
+        isposinf = reldiff == np.inf
+        isinf = isneginf | isposinf
+
+        # given the way reldiff is constructed, it cannot contain only infs
+        # (because inf/inf is nan) it can contain only infs and nans though,
+        # in which case finite_relXXX will be nan, so unless the array
+        # is empty, finite_relXXX should never be inf
+        finite_relmin = np.nanmin(reldiff, where=~isinf, initial=np.inf)
+        finite_relmax = np.nanmax(reldiff, where=~isinf, initial=-np.inf)
+
+        # special case when reldiff contains only 0 and infs (to avoid
+        # coloring the inf cells white in that case)
+        if finite_relmin == 0 and finite_relmax == 0 and isinf.any():
+            finite_relmin = -1
+            finite_relmax = 1
+        reldiff_for_bg[isneginf] = finite_relmin
+        reldiff_for_bg[isposinf] = finite_relmax
+
+        # make sure that "acceptable" differences show as white
+        reldiff_for_bg = la.where(isclose, 0, reldiff_for_bg)
+
+        # We need a separate version for bg and the label, so that when we
+        # modify atol/rtol, the background color is updated but not the
+        # maxreldiff label
+        maxabsreldiff_for_bg = max(abs(np.nanmin(reldiff_for_bg)),
+                                   abs(np.nanmax(reldiff_for_bg)))
+        if maxabsreldiff_for_bg:
+            # scale reldiff to range 0-1 with 0.5 for reldiff = 0
+            self._bg_value = (reldiff_for_bg / maxabsreldiff_for_bg) / 2 + 0.5
+        # if the only differences are nans on either side
+        elif not isclose.all():
+            # use white (0.5) everywhere except where reldiff is nan, so
+            # that nans are grey
+            self._bg_value = reldiff_for_bg + 0.5
+        else:
             # do NOT use full_like as we don't want to inherit array dtype
-            self._bg_value = la.full(self._combined_array.axes, 0.5)
+            self._bg_value = la.full(combined_array.axes, 0.5)
 
         # using percents does not look good when the numbers are very small
         self.maxdiff_label.setText(str(maxabsreldiff))