"""
Data loading functions for PriorityFlow package.
This module provides functions to load the test data files that were converted
from the R package's TestDomain_Inputs directory.
"""
import numpy as np
import os
from pathlib import Path
# Get the directory where this module is located
_MODULE_DIR = Path(__file__).parent
_DATA_DIR = _MODULE_DIR / "data"
[docs]
def load_dem():
"""
Load the Digital Elevation Model (DEM) test data.
This is a small elevation dataset (215km by 172km at 1km spatial resolution)
converted from the R package's TestDomain_Inputs.
Returns
-------
numpy.ndarray
A 2D array of elevation values with shape (215, 172) representing
the domain dimensions (nrow=215, ncol=172).
Examples
--------
>>> import priority_flow.data_loader as dl
>>> dem = dl.load_dem()
>>> print(f"DEM shape: {dem.shape}")
>>> print(f"Elevation range: {dem.min():.2f} to {dem.max():.2f}")
"""
data_path = _DATA_DIR / "DEM_t.npy"
if not data_path.exists():
raise FileNotFoundError(f"DEM data file not found at {data_path}")
return np.load(data_path)
[docs]
def load_watershed_mask():
"""
Load the watershed mask test data.
A mask showing the watershed drainage area for the test domain.
Returns
-------
numpy.ndarray
A 2D array of 0's and 1's showing the watershed extent
(1=inside the watershed, 0=outside the watershed) with shape (215, 172).
Examples
--------
>>> import priority_flow.data_loader as dl
>>> mask = dl.load_watershed_mask()
>>> print(f"Watershed mask shape: {mask.shape}")
>>> print(f"Watershed cells: {np.sum(mask)}")
"""
data_path = _DATA_DIR / "watershed_mask_t.npy"
if not data_path.exists():
raise FileNotFoundError(f"Watershed mask data file not found at {data_path}")
return np.load(data_path)
[docs]
def load_river_mask():
"""
Load the river mask test data.
A mask showing an example river network for the test domain.
Returns
-------
numpy.ndarray
A 2D array of 0's and 1's showing the location of river cells
(1=river, 0=non-river) with shape (215, 172).
Examples
--------
>>> import priority_flow.data_loader as dl
>>> river_mask = dl.load_river_mask()
>>> print(f"River mask shape: {river_mask.shape}")
>>> print(f"River cells: {np.sum(river_mask)}")
"""
data_path = _DATA_DIR / "river_mask_t.npy"
if not data_path.exists():
raise FileNotFoundError(f"River mask data file not found at {data_path}")
return np.load(data_path)
[docs]
def load_all_test_data():
"""
Load all test data files at once.
Returns
-------
dict
A dictionary containing all test data arrays with keys:
- 'dem': Digital Elevation Model
- 'watershed_mask': Watershed drainage area mask
- 'river_mask': River network mask
Examples
--------
>>> import priority_flow.data_loader as dl
>>> data = dl.load_all_test_data()
>>> print(f"Available data: {list(data.keys())}")
>>> print(f"DEM shape: {data['dem'].shape}")
"""
return {
'dem': load_dem(),
'watershed_mask': load_watershed_mask(),
'river_mask': load_river_mask()
}
[docs]
def get_data_info():
"""
Get information about the available test data files.
Returns
-------
dict
A dictionary containing metadata about each data file.
"""
info = {}
for name, load_func in [('DEM', load_dem),
('Watershed Mask', load_watershed_mask),
('River Mask', load_river_mask)]:
try:
data = load_func()
info[name] = {
'shape': data.shape,
'dtype': str(data.dtype),
'min': float(data.min()),
'max': float(data.max()),
'size': data.size
}
except FileNotFoundError as e:
info[name] = {'error': str(e)}
return info