Source code for umami.calculations.residual.discretized_misfit

from collections import OrderedDict

import numpy as np


[docs]def discretized_misfit( model_grid, data_grid, name, misfit_field, field_1, field_2, field_1_percentile_edges, field_2_percentile_edges, ): """Calculate a discretized misfit on a Landlab grid field. The following Binder notebook shows example usage of this umami calculation. .. image:: https://static.mybinder.org/badge_logo.svg :target: https://mybinder.org/v2/gh/TerrainBento/umami/master?filepath=notebooks%2FDiscretizedMisfit.ipynb The ``discretized_misfit`` calculation first classifies each grid cell in the landscape into categories based on ``field_1``, ``field_2`` and the percentile edges for each (using the data grid). This results in a set of categories, which may or may not be congiguous in space. This category field is then stored as a property of the ``Residual`` called ``Residual.category``. For each category, the sum of squared residuals is calculated based on the ``misfit_field``. Since this calculation returns one value for each category, rather than one value in total, a ``name`` must be provided. This is a string that will be formatted with the values for ``{field_1_level}`` and ``{field_2_level}``. The output is an ordered dictionary with ``name`` as the keys, and the sum of squares misfit as the values. For example, if ``field_1`` was the ``drainage_area`` and ``field_2`` was ``topographic__elevation``, and the parameter ``name`` was ``da_{field_1_level}_z_{field_2_level}``, then the following four categories would be identified: +--------------+-------------------------------------------------------+ | Name | Contents | +==============+=======================================================+ | ``da_0_z_0`` | Cells with the lower half of drainage area, and then | | | within those cells, the lowest half of elevation | +--------------+-------------------------------------------------------+ | ``da_0_z_1`` | Cells with the lower half of drainage area, and then | | | within those cells, the higher half of elevation | +--------------+-------------------------------------------------------+ | ``da_1_z_0`` | Cells with the higher half of drainage area, and then | | | within those cells, the lowest half of elevation | +--------------+-------------------------------------------------------+ | ``da_1_z_1`` | Cells with the higher half of drainage area, and then | | | within those cells, the higher half of elevation | +--------------+-------------------------------------------------------+ Within each of these four categories, the sum of squared residuals on the ``misfit_field`` is calculated and returned. Parameters ---------- model_grid : Landlab model grid data_grid : Landlab model grid name : str misfit_field : str An at-node Landlab grid field that is present on the model grid. field_1 : str An at-node Landlab grid field that is present on the model grid. field_2 : str An at-node Landlab grid field that is present on the model grid. field_1_percentile_edges : list A list of percentile edges applied to ``field_1``. For example, ``[0, 60, 100]`` specifies splitting ``field_1`` into two parts, separated at the 60th percentile. field_2_percentile_edges : list A list of percentile edges applied to ``field_2``. For example, ``[0, 60, 100]`` specifies splitting ``field_2`` into two parts, separated at the 60th percentile. Returns ------- OrderedDict Examples -------- First an example that only uses the ``discretized_misfit`` function. >>> import numpy as np >>> from landlab import RasterModelGrid >>> from landlab.components import FlowAccumulator >>> from umami.calculations import discretized_misfit >>> np.random.seed(42) >>> model = RasterModelGrid((50, 50)) >>> z_model = model.add_zeros("node", "topographic__elevation") >>> z_model += model.x_of_node + model.y_of_node >>> data = RasterModelGrid((50, 50)) >>> z_data = data.add_zeros("node", "topographic__elevation") >>> z_data += data.x_of_node + data.y_of_node >>> z_data[data.core_nodes] += np.random.random(data.core_nodes.shape) >>> data_fa = FlowAccumulator(data) >>> data_fa.run_one_step() >>> model_fa = FlowAccumulator(model) >>> model_fa.run_one_step() >>> cat, out = discretized_misfit( ... model, ... data, ... "da_{field_1_level}_z_{field_2_level}", ... "topographic__elevation", ... "drainage_area", ... "topographic__elevation", ... [0, 50, 100], ... [0, 30, 60, 100]) >>> for key, value in out.items(): ... print(key, np.round(value, decimals=3)) da_0_z_0 0.713 da_0_z_1 0.711 da_0_z_2 0.712 da_1_z_0 0.414 da_1_z_1 0.441 da_1_z_2 0.432 >>> cat[:5] array([0, 0, 0, 0, 0]) Next, the same calculations are shown as part of an umami ``Residual``. >>> from io import StringIO >>> from umami import Residual >>> file_like=StringIO(''' ... dm: ... _func: discretized_misfit ... name: da_{field_1_level}_z_{field_2_level} ... misfit_field: topographic__elevation ... field_1: drainage_area ... field_2: topographic__elevation ... field_1_percentile_edges: ... - 0 ... - 50 ... - 100 ... field_2_percentile_edges: ... - 0 ... - 30 ... - 60 ... - 100 ... ''') >>> residual = Residual(model, data) >>> residual.add_from_file(file_like) >>> residual.names ['da_0_z_0', 'da_0_z_1', 'da_0_z_2', 'da_1_z_0', 'da_1_z_1', 'da_1_z_2'] >>> residual.calculate() >>> for key, value in zip(residual.names, residual.values): ... print(key, np.round(value, decimals=3)) da_0_z_0 0.713 da_0_z_1 0.711 da_0_z_2 0.712 da_1_z_0 0.414 da_1_z_1 0.441 da_1_z_2 0.432 >>> residual.category[:5] array([0, 0, 0, 0, 0]) """ category = _get_category_labels( data_grid, field_1, field_2, field_1_percentile_edges, field_2_percentile_edges, ) difference = ( model_grid.at_node[misfit_field] - data_grid.at_node[misfit_field] ) n_f1_levels = np.size(field_1_percentile_edges) - 1 n_f2_levels = np.size(field_2_percentile_edges) - 1 out = OrderedDict() for c in range(0, np.max(category)): f1l, f2l = np.unravel_index(c, (n_f1_levels, n_f2_levels)) n = name.format(field_1_level=f1l, field_2_level=f2l) loc = category == (c + 1) sq_resids = np.power(difference[loc], 2.0) misfit = np.sqrt(np.mean(sq_resids)) out[n] = misfit return category, out
def _get_category_labels( grid, field_1, field_2, field_1_percentile_edges, field_2_percentile_edges ): f1 = grid.at_node[field_1] f2 = grid.at_node[field_2] # first bin by field 1, then within field_1, bin by field_2 is_core = np.zeros_like(f1, dtype=bool) is_core[grid.core_nodes] = True # calc the percentiles of the field 1 distribution f1_edges = np.percentile(f1[is_core], field_1_percentile_edges) # work through each bin and label them. category = np.zeros_like(f1, dtype=np.int) val = 1 for i in range(len(f1_edges) - 1): # selected nodes f1_min = f1_edges[i] f1_max = f1_edges[i + 1] if i != len(f1_edges) - 2: f1_sel = (f1 >= f1_min) & (f1 < f1_max) & (is_core) else: f1_sel = (f1 >= f1_min) & (is_core) # get the f2 edges for this particular part of f1-space. vals_sel = f2[f1_sel] f2_edges = np.percentile(vals_sel, field_2_percentile_edges) for j in range(len(f2_edges) - 1): f2_min = f2_edges[j] f2_max = f2_edges[j + 1] if j != len(f2_edges) - 2: f2_sel = (f2 >= f2_min) & (f2 < f2_max) & (f1_sel) else: f2_sel = (f2 >= f2_min) & (f1_sel) sel_nodes = np.nonzero(f2_sel)[0] if len(sel_nodes) > 0: category[sel_nodes] = val val += 1 # increment no matter what for consistency of naming. return category