Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions doc/source/changes/version_0_35_1.rst.inc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,24 @@ New features

* added explicit support for Python 3.14.

* the editor is now associated with some file extensions on Windows, which
means that double-clicking on files with these extensions in the Windows File
Explorer will open them in the editor. The following extensions are
associated: .h5, .hdf, .feather, .parquet, .ddb and .duckdb
(closes :editor_issue:`302`).

Known issues
------------
* This does not work when multiple users share a single Python environment
because the file association is done only for the user which installed the
editor.
* The reported file type is very long and weird. For example, for .h5 files,
it is ".h5 larray-editor.AssocFile.h5 file" instead of just "H5 File". This
is harmless though.
* We choose not to associate the editor with file formats we support but
which likely have a better viewer installed on the computer (.xlsx, .csv,
.dta, .sas7bdat, text files, IODE files and zip files)


Miscellaneous improvements
^^^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down Expand Up @@ -43,6 +61,11 @@ Fixes
* avoid warnings when displaying data with any column entirely non-numeric
(including NaN). Closes :editor_issue:`311`.

* fixed `compare()` not working for object or string arrays (closes
:editor_issue:`163`). The background color and "maximum absolute
relative difference" label were always wrong and the comparison failed
completely when the first array was an object array containing any 0 value.

* fixed the mechanism writing warning/error messages happening during the
editor initialization. The errors are now correctly written in the user
TEMP directory / larray-editor-stderr.log
31 changes: 26 additions & 5 deletions larray_editor/arrayadapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1337,11 +1337,7 @@ def get_finite_numeric_values(array: np.ndarray) -> np.ndarray:
# format (or plotting?)
finite_value = np.abs(finite_value)
elif dtype.type is np.object_:
# change non numeric to nan
finite_value = np.where(is_number_value_vectorized(finite_value),
finite_value,
np.nan)
finite_value = finite_value.astype(np.float64)
finite_value = non_numeric_to_nan(finite_value)
elif np.issubdtype(dtype, np.bool_):
finite_value = finite_value.astype(np.int8)
elif not np.issubdtype(dtype, np.number):
Expand All @@ -1356,6 +1352,31 @@ def get_finite_numeric_values(array: np.ndarray) -> np.ndarray:
return np.where(np.isfinite(finite_value), finite_value, np.nan)


def non_numeric_to_nan(array: np.ndarray) -> np.ndarray:
dtype = array.dtype
if np.issubdtype(dtype, np.number):
return array
elif dtype.type is np.object_:
array = np.where(is_number_value_vectorized(array),
array,
np.nan)
return array.astype(np.float64)
# not numeric nor object => all non-numeric
else:
return np.full(array.shape, np.nan, dtype=np.float64)


def ensure_numeric_array(array: la.Array) -> la.Array:
dtype = array.dtype
if np.issubdtype(dtype, np.number):
return array
else:
return la.Array(
non_numeric_to_nan(array.data),
axes=array.axes
)


# only used in LArray adapter. it should use the same code path as the rest
# though
def get_color_value(array, global_vmin, global_vmax, axis=None):
Expand Down
122 changes: 63 additions & 59 deletions larray_editor/comparator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from qtpy.QtWidgets import (QWidget, QVBoxLayout, QListWidget, QSplitter, QHBoxLayout,
QLabel, QCheckBox, QLineEdit, QComboBox, QMessageBox)

from larray_editor.arrayadapter import ensure_numeric_array
from larray_editor.utils import _, print_exception, align_arrays
from larray_editor.arraywidget import ArrayEditorWidget
from larray_editor.editor import AbstractEditorWindow
Expand Down Expand Up @@ -149,6 +150,9 @@ def _update_from_arrays(self):
stack_axis = self.stack_axis
align_method = self.get_align_method()
try:
# this also converts string arrays to object arrays because
# align_arrays computes the common dtype of the arrays *and* the
# fill_value, which is nan by default
aligned_arrays = align_arrays(self.arrays,
join=align_method,
fill_value=self.fill_value)
Expand Down Expand Up @@ -179,66 +183,66 @@ def _update_from_combined_array(self):
isclose = eq
self._diff_below_tolerance = isclose

try:
with np.errstate(divide='ignore', invalid='ignore'):
diff = self._combined_array - self._array0
reldiff = diff / self._array0
# make reldiff 0 where the values are the same than array0 even for
# special values (0, nan, inf, -inf)
# at this point reldiff can still contain nan and infs
reldiff = la.where(eq, 0, reldiff)

# 1) compute maxabsreldiff for the label
# this should NOT exclude nans or infs
relmin = reldiff.min(skipna=False)
relmax = reldiff.max(skipna=False)
maxabsreldiff = max(abs(relmin), abs(relmax))

# 2) compute bg_value
# replace -inf by min(reldiff), +inf by max(reldiff)
reldiff_for_bg = reldiff.copy()
isneginf = reldiff == -np.inf
isposinf = reldiff == np.inf
isinf = isneginf | isposinf

# given the way reldiff is constructed, it cannot contain only infs
# (because inf/inf is nan) it can contain only infs and nans though,
# in which case finite_relXXX will be nan, so unless the array
# is empty, finite_relXXX should never be inf
finite_relmin = np.nanmin(reldiff, where=~isinf, initial=np.inf)
finite_relmax = np.nanmax(reldiff, where=~isinf, initial=-np.inf)
# special case when reldiff contains only 0 and infs (to avoid
# coloring the inf cells white in that case)
if finite_relmin == 0 and finite_relmax == 0 and isinf.any():
finite_relmin = -1
finite_relmax = 1
reldiff_for_bg[isneginf] = finite_relmin
reldiff_for_bg[isposinf] = finite_relmax

# make sure that "acceptable" differences show as white
reldiff_for_bg = la.where(isclose, 0, reldiff_for_bg)

# We need a separate version for bg and the label, so that when we
# modify atol/rtol, the background color is updated but not the
# maxreldiff label
maxabsreldiff_for_bg = max(abs(np.nanmin(reldiff_for_bg)),
abs(np.nanmax(reldiff_for_bg)))
if maxabsreldiff_for_bg:
# scale reldiff to range 0-1 with 0.5 for reldiff = 0
self._bg_value = (reldiff_for_bg / maxabsreldiff_for_bg) / 2 + 0.5
# if the only differences are nans on either side
elif not isclose.all():
# use white (0.5) everywhere except where reldiff is nan, so
# that nans are grey
self._bg_value = reldiff_for_bg + 0.5
else:
# do NOT use full_like as we don't want to inherit array dtype
self._bg_value = la.full(self._combined_array.axes, 0.5)
except TypeError:
# str/object array
maxabsreldiff = la.nan
# we cannot use raw numpy arrays yet because we need the arrays to
# broadcast properly to compute diff and reldiff
combined_array = ensure_numeric_array(self._combined_array)
array0 = ensure_numeric_array(self._array0)
with np.errstate(divide='ignore', invalid='ignore'):
diff = combined_array - array0
reldiff = diff / array0

# make reldiff 0 where the values are the same than array0 even for
# special values (0, nan, inf, -inf)
# at this point reldiff can still contain nan and infs
reldiff = la.where(eq, 0, reldiff)

# 1) compute maxabsreldiff for the label
# this should NOT exclude nans or infs
relmin = reldiff.min(skipna=False)
relmax = reldiff.max(skipna=False)
maxabsreldiff = max(abs(relmin), abs(relmax))

# 2) compute bg_value
# replace -inf by min(reldiff), +inf by max(reldiff)
reldiff_for_bg = reldiff.copy()
isneginf = reldiff == -np.inf
isposinf = reldiff == np.inf
isinf = isneginf | isposinf

# given the way reldiff is constructed, it cannot contain only infs
# (because inf/inf is nan) it can contain only infs and nans though,
# in which case finite_relXXX will be nan, so unless the array
# is empty, finite_relXXX should never be inf
finite_relmin = np.nanmin(reldiff, where=~isinf, initial=np.inf)
finite_relmax = np.nanmax(reldiff, where=~isinf, initial=-np.inf)

# special case when reldiff contains only 0 and infs (to avoid
# coloring the inf cells white in that case)
if finite_relmin == 0 and finite_relmax == 0 and isinf.any():
finite_relmin = -1
finite_relmax = 1
reldiff_for_bg[isneginf] = finite_relmin
reldiff_for_bg[isposinf] = finite_relmax

# make sure that "acceptable" differences show as white
reldiff_for_bg = la.where(isclose, 0, reldiff_for_bg)

# We need a separate version for bg and the label, so that when we
# modify atol/rtol, the background color is updated but not the
# maxreldiff label
maxabsreldiff_for_bg = max(abs(np.nanmin(reldiff_for_bg)),
abs(np.nanmax(reldiff_for_bg)))
if maxabsreldiff_for_bg:
# scale reldiff to range 0-1 with 0.5 for reldiff = 0
self._bg_value = (reldiff_for_bg / maxabsreldiff_for_bg) / 2 + 0.5
# if the only differences are nans on either side
elif not isclose.all():
# use white (0.5) everywhere except where reldiff is nan, so
# that nans are grey
self._bg_value = reldiff_for_bg + 0.5
else:
# do NOT use full_like as we don't want to inherit array dtype
self._bg_value = la.full(self._combined_array.axes, 0.5)
self._bg_value = la.full(combined_array.axes, 0.5)

# using percents does not look good when the numbers are very small
self.maxdiff_label.setText(str(maxabsreldiff))
Expand Down
Loading