Source code for priority_flow.data_loader

"""
Data loading functions for PriorityFlow package.

This module provides functions to load the test data files that were converted
from the R package's TestDomain_Inputs directory.
"""

import numpy as np
import os
from pathlib import Path

# Get the directory where this module is located
_MODULE_DIR = Path(__file__).parent
_DATA_DIR = _MODULE_DIR / "data"

[docs] def load_dem(): """ Load the Digital Elevation Model (DEM) test data. This is a small elevation dataset (215km by 172km at 1km spatial resolution) converted from the R package's TestDomain_Inputs. Returns ------- numpy.ndarray A 2D array of elevation values with shape (215, 172) representing the domain dimensions (nrow=215, ncol=172). Examples -------- >>> import priority_flow.data_loader as dl >>> dem = dl.load_dem() >>> print(f"DEM shape: {dem.shape}") >>> print(f"Elevation range: {dem.min():.2f} to {dem.max():.2f}") """ data_path = _DATA_DIR / "DEM_t.npy" if not data_path.exists(): raise FileNotFoundError(f"DEM data file not found at {data_path}") return np.load(data_path)
[docs] def load_watershed_mask(): """ Load the watershed mask test data. A mask showing the watershed drainage area for the test domain. Returns ------- numpy.ndarray A 2D array of 0's and 1's showing the watershed extent (1=inside the watershed, 0=outside the watershed) with shape (215, 172). Examples -------- >>> import priority_flow.data_loader as dl >>> mask = dl.load_watershed_mask() >>> print(f"Watershed mask shape: {mask.shape}") >>> print(f"Watershed cells: {np.sum(mask)}") """ data_path = _DATA_DIR / "watershed_mask_t.npy" if not data_path.exists(): raise FileNotFoundError(f"Watershed mask data file not found at {data_path}") return np.load(data_path)
[docs] def load_river_mask(): """ Load the river mask test data. A mask showing an example river network for the test domain. Returns ------- numpy.ndarray A 2D array of 0's and 1's showing the location of river cells (1=river, 0=non-river) with shape (215, 172). Examples -------- >>> import priority_flow.data_loader as dl >>> river_mask = dl.load_river_mask() >>> print(f"River mask shape: {river_mask.shape}") >>> print(f"River cells: {np.sum(river_mask)}") """ data_path = _DATA_DIR / "river_mask_t.npy" if not data_path.exists(): raise FileNotFoundError(f"River mask data file not found at {data_path}") return np.load(data_path)
[docs] def load_all_test_data(): """ Load all test data files at once. Returns ------- dict A dictionary containing all test data arrays with keys: - 'dem': Digital Elevation Model - 'watershed_mask': Watershed drainage area mask - 'river_mask': River network mask Examples -------- >>> import priority_flow.data_loader as dl >>> data = dl.load_all_test_data() >>> print(f"Available data: {list(data.keys())}") >>> print(f"DEM shape: {data['dem'].shape}") """ return { 'dem': load_dem(), 'watershed_mask': load_watershed_mask(), 'river_mask': load_river_mask() }
[docs] def get_data_info(): """ Get information about the available test data files. Returns ------- dict A dictionary containing metadata about each data file. """ info = {} for name, load_func in [('DEM', load_dem), ('Watershed Mask', load_watershed_mask), ('River Mask', load_river_mask)]: try: data = load_func() info[name] = { 'shape': data.shape, 'dtype': str(data.dtype), 'min': float(data.min()), 'max': float(data.max()), 'size': data.size } except FileNotFoundError as e: info[name] = {'error': str(e)} return info