import itertools
import sys
from math import ceil
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from osgeo import gdal
import geopandas as gpd
import rasterstats as rs
import warnings
# from src.core import utils as u
[docs]
def convert_coords_to_index(df: gpd.GeoDataFrame,
dataset: gdal.Dataset) -> gpd.GeoDataFrame:
"""_summary_
Args:
df (gpd.GeoDataFrame): _description_
dataset (gdal.Dataset): _description_
Returns:
gpd.GeoDataFrame: _description_
"""
# band = dataset.GetRasterBand(1)
# cols = dataset.RasterXSize
# rows = dataset.RasterYSize
df2 = pd.concat([df, pd.DataFrame(columns=["col_min", "row_min", "col_max", "row_max"],
index=df.index.values)], axis=1)
transform = dataset.GetGeoTransform()
xOrigin = transform[0]
yOrigin = transform[3]
pixelWidth = transform[1]
pixelHeight = -transform[5]
for i in range(len(df)):
df2.loc[i, "col_min"] = ceil((df["minx"].iloc[i] - xOrigin)/pixelWidth)
df2.loc[i, "row_max"] = ceil(
(yOrigin - df["miny"].iloc[i])/pixelHeight)
df2.loc[i, "col_max"] = ceil((df["maxx"].iloc[i]-xOrigin)/pixelWidth)
df2.loc[i, "row_min"] = ceil(
(yOrigin - df["maxy"].iloc[i])/pixelHeight)
# px_1 = int((df["minx"].iloc[i] - xOrigin)/pixelWidth)
# py_1 = int((yOrigin - df["miny"].iloc[i])/pixelHeight)
# px_2 = int((df["maxx"].iloc[i]-xOrigin)/pixelWidth)
# py_2 = int((yOrigin - df["maxy"].iloc[i])/pixelHeight)
# with rio.open(file) as dataset:
# py1, px1 = dataset.index(df["minx"].iloc[i], df["miny"].iloc[i])
# py2, px2 = dataset.index(df["maxx"].iloc[i], df["maxy"].iloc[i])
# a = 1
# print(px1-px_1)
# print(px2-px_2)
# print(py1-py_1)
# print(py2-py_2)
return df2
[docs]
def find_neighbors(gdf: gpd.GeoDataFrame,
id: str) -> gpd.GeoDataFrame:
"""_summary_
Args:
gdf (gpd.GeoDataFrame): _description_
id (str): _description_
Returns:
gpd.GeoDataFrame: _description_
"""
# https://gis.stackexchange.com/questions/281652/finding-all-neighbors-using-geopandas
# Drop the column if it exist
if 'NEIGHBORS' in gdf.columns:
gdf = gdf.drop(columns=["NEIGHBORS", "KEEP"])
# add NEIGHBORS column
gdf = gdf.reindex(columns=gdf.columns.tolist() + ['NEIGHBORS', 'KEEP'])
gdf["NEIGHBORS"] = ''
gdf["KEEP"] = ''
columns = gdf.columns.tolist()
count = 0
for index, CP in gdf.iterrows():
# get 'not disjoint' countries
neighbors = gdf[~gdf.geometry.disjoint(CP.geometry)][id].tolist()
keep = gdf[~gdf.geometry.disjoint(CP.geometry)].Dissolve.tolist()
# remove own name of the country from the list
keep = [bool(numb) for numb, name in zip(
keep, neighbors) if CP[id] != name]
neighbors = [name for name in neighbors if CP[id] != name]
# add names of neighbors as NEIGHBORS value
# Catch an exception here
# try:
# gdf.loc[index, 'NEIGHBORS'] = neighbors
# gdf.loc[index, 'KEEP'] = keep
# except ValueError:
if isinstance(neighbors, list):
gdf.at[index, "NEIGHBORS"] = neighbors
gdf.at[index, "KEEP"] = keep
# j = columns.index("KEEP");
# gdf["NEIGHBORS"].iloc[count] = neighbors
# gdf['KEEP'].iloc[count] = keep
else:
gdf.at[index, "NEIGHBORS"] = list([int(neighbors)])
gdf.at[index, "KEEP"] = list([int(keep)])
# gdf["NEIGHBORS"].iloc[count] = list([int(neighbors)])
# gdf['KEEP'].iloc[count] = list([int(keep)])
count += 1
return gdf
[docs]
def identify_small_CPs(CE_fishnet: gpd.GeoDataFrame,
CP_fishnet: gpd.GeoDataFrame,
thereshold: float):
"""_summary_
Args:
CE_fishnet (gpd.GeoDataFrame): _description_
CP_fishnet (gpd.GeoDataFrame): _description_
thereshold (float): _description_
Returns:
_type_: _description_
"""
# Get the area of the CE grid
CE_area = CE_fishnet.area[0]
# Get area for each CP feature
CP_fishnet = CP_fishnet.dropna(subset=['CEid'])
CP_fishnet = CP_fishnet.explode()
# CP_fishnet["CPid"] = range(1,len(CP_fishnet)+1)
CP_fishnet["Area"] = CP_fishnet.area
CP_fishnet = CP_fishnet.dropna(subset=['Area'])
# Mask to drop values with extremly tiny areas
mask_area = CP_fishnet["Area"] > 1.0
CP_fishnet = CP_fishnet[mask_area]
CP_fishnet["CPid"] = range(1, len(CP_fishnet)+1)
mask_CP = CP_fishnet["Area"] < thereshold*CE_area
CP_fishnet["Dissolve"] = 0
CP_fishnet.loc[mask_CP, "Dissolve"] = 1
# Get the CP ousite the subbasin
# This need to be changed for anohter
mask_SUB = np.isnan(CP_fishnet["CATid"])
index_drop = CP_fishnet.index[mask_SUB]
# Drop this value from the main dataframe
CP_fishnet = CP_fishnet.drop(index=index_drop)
# CP_fishnet.at[mask_SUB, "Dissolve"] = 0
CP_fishnet.index = CP_fishnet["CPid"].values
# CP_fishnet["CPid"] = CP_fishnet.index
CP_fishnet.index = CP_fishnet.index.rename("index")
return CP_fishnet
[docs]
def remove_border_CPs(CE_fishnet: gpd.GeoDataFrame,
CP_fishnet: gpd.GeoDataFrame,
FAC: str) -> list:
"""_summary_
Args:
CE_fishnet (gpd.GeoDataFrame): _description_
CP_fishnet (gpd.GeoDataFrame): _description_
FAC (str): _description_
Returns:
list: _description_
"""
# Get the bounds
# bounds = CE_fishnet["geometry"].bounds
# CE_fishnet = pd.concat([CE_fishnet, bounds], axis=1)
# FAC_dataset = gdal.Open(FAC)
# CE_fishnet = convert_coords_to_index(CE_fishnet, FAC_dataset)
CP_fishnet = pd.concat([CP_fishnet, pd.DataFrame(columns=["maxFAC"],
index=CP_fishnet.index)], axis=1)
columnsCP = CP_fishnet.columns.tolist()
for _, CE in CE_fishnet.iterrows():
# Get the index for all the subbasin features
idx, = np.where(CP_fishnet["CEid"] == CE["CEid"])
# Get all features inside the CE
CE_features = CP_fishnet.iloc[idx]
# Check if the CE is empty or not
if CE_features.empty:
continue
# If there is not features to dissolve, get rid off
stats = rs.zonal_stats(CE_features, FAC, stats=['max'])
CP_fishnet.iloc[idx, columnsCP.index("maxFAC")] = [
s['max'] for s in stats]
# Update values
CE_features = CP_fishnet.iloc[idx]
# Find neighbors
CE_features = find_neighbors(CE_features, "CPid")
for i, CP in CE_features.iterrows():
# Take only the CP labeled with dissolve
if CP["Dissolve"]:
# Here I delete the Isolated CP in the border
if CP["maxFAC"] is None:
CP_fishnet.loc[i, "CPid"] = 0.0
CP_fishnet.loc[i, "Dissolve"] = 0
CP_fishnet.loc[i, "CEid"] = 0
# Delete the CP less than 400 m2. DEM 20x20
if CP["maxFAC"] is None and CP["Area"] <= 400:
CP_fishnet.loc[i, "CPid"] = 0.0
CP_fishnet.loc[i, "Dissolve"] = 0
CP_fishnet.loc[i, "CEid"] = 0
# Check if there are no neighbors
if not CP["NEIGHBORS"]:
CP_fishnet.loc[i, "CPid"] = 0.0
CP_fishnet.loc[i, "maxFAC"] = None
CP_fishnet.loc[i, "Dissolve"] = 0
# Drop the dissolved values in this iterations
CP_fishnet = CP_fishnet[CP_fishnet["CPid"] != 0]
CP_fishnet = CP_fishnet.dissolve(by="CPid", aggfunc="max")
# Save file
# CP_fishnet.index = range(len(CP_fishnet))
CP_fishnet.index = CP_fishnet.index.rename("ind")
CP_fishnet.loc[:, "CPid"] = CP_fishnet.index.values
CP_fishnet.loc[:, "Area"] = CP_fishnet.area
return CP_fishnet, CE_fishnet
[docs]
def remove_smallCP(CE_fishnet: gpd.GeoDataFrame,
CP_fishnet: gpd.GeoDataFrame,
thereshold: float) -> gpd.GeoDataFrame:
"""_summary_
Args:
CE_fishnet (gpd.GeoDataFrame): _description_
CP_fishnet (gpd.GeoDataFrame): _description_
thereshold (float): _description_
Returns:
gpd.GeoDataFrame: _description_
"""
CP_fishnet.index = CP_fishnet["CPid"].values
CE_area = CE_fishnet.area[0]
mask_CP = CP_fishnet["Area"] < thereshold*CE_area
CP_fishnet["Dissolve"] = 0
CP_fishnet.loc[mask_CP, "Dissolve"] = 1
count = 0
while CP_fishnet["Dissolve"].max() == 1:
# dissolve_vals = CP_fishnet["Dissolve"].values
for _, CE in CE_fishnet.iterrows():
# Track the already processed CPs
tracked_cps = []
# Get the index for all the subbasin features
idx, = np.where(CP_fishnet["CEid"] == CE["CEid"])
# Get all features inside the CE
CE_features = CP_fishnet.iloc[idx]
# Check if there are not CPs to dissolve to avoid computing the
# neighbors and save time
if CE_features["Dissolve"].max() == 0:
continue
CE_features = find_neighbors(CE_features, "CPid")
for i, CP in CE_features.iterrows():
# Check if the CP was already processed
if CP["CPid"] in tracked_cps:
continue
# Check if need to dissolve
if CP["Dissolve"]:
# Get the neighbors and the condition
neighbors = CE_features.loc[i, "NEIGHBORS"]
dissolve = CE_features.loc[i, "KEEP"]
# Check if there are neighbors to dissolve also
if True in dissolve:
# Filter by dissolve or not dissolve
to_dissolve = list(
itertools.compress(neighbors, dissolve))
not_dissolve = list(itertools.compress(
neighbors, np.logical_not(dissolve)))
# 1- There are only CPs to dissolve.
if not not_dissolve:
to_dissolve.append(i)
idx_max = CE_features.loc[to_dissolve,
"maxFAC"].idxmax()
CP_fishnet.loc[to_dissolve, "CPid"] = idx_max
# Track this CPs
tracked_cps.extend(to_dissolve)
# 2- There are both, neighbors to dissolve and not to
elif to_dissolve and not_dissolve:
to_dissolve.append(i)
# idx_max = CE_features.loc[not_dissolve, "maxFAC"].idxmax()
idx_max = CE_features.loc[to_dissolve, "maxFAC"].idxmax(
)
CP_fishnet.loc[to_dissolve, "CPid"] = idx_max
# Track this CP
tracked_cps.extend(to_dissolve)
else:
if not CP["NEIGHBORS"]:
# This means the CPs does not have neighbors
CP_fishnet.loc[i, "CPid"] = 0
continue
# Here the CPs with no neighbors to dissolve are processed
idx_max = CE_features.loc[CP["NEIGHBORS"], "maxFAC"].idxmax(
)
CP_fishnet.loc[i, "CPid"] = idx_max
CP_fishnet = CP_fishnet[CP_fishnet["CPid"] != 0]
CP_fishnet = CP_fishnet.dissolve(by="CPid", aggfunc="max")
# Re-do the dissolve labeling
CP_fishnet["Area"] = CP_fishnet.area
mask_CP = CP_fishnet["Area"] < thereshold*CE_area
CP_fishnet["Dissolve"] = 0
CP_fishnet.loc[mask_CP, "Dissolve"] = 1
# Save file
CP_fishnet.index = CP_fishnet.index.rename("index")
CP_fishnet.loc[:, "CPid"] = CP_fishnet.index.values
count += 1
# Break the loop if a maximun number of iterations was exeeced
if count > 15:
break
return CP_fishnet
[docs]
def dissolve_pixels(CE_fishnet: gpd.GeoDataFrame,
CP_fishnet: gpd.GeoDataFrame,
pixel_size_area) -> gpd.GeoDataFrame:
"""
The ressult from the previous process might lead to have multipolygon features
We need to make sure this is going to be well dissolved.
Also, all the pixel-size features need to be merged with the bigger neighbors
in order to avoid errors while computing zonal area in further steps.
Args:
CE_fishnet (gpd.GeoDataFrame): _description_
CP_fishnet (gpd.GeoDataFrame): _description_
area_th (_type_): _description_
Returns:
gpd.GeoDataFrame: _description_
"""
#
CP_fishnet = CP_fishnet.explode()
# mask_pixel_feats = CP_fishnet["Area"] > 2*abs(pixel_size_area)
CP_fishnet["Area"] = CP_fishnet.area
# Drop values less than three pixels size
mask_pixels = CP_fishnet["Area"] <= 3*abs(pixel_size_area)
CP_fishnet["Dissolve"] = 0
CP_fishnet.loc[mask_pixels, "Dissolve"] = 1
CP_fishnet.loc[:, "CPid"] = range(1, len(CP_fishnet)+1)
CP_fishnet.index = range(1, len(CP_fishnet)+1)
for _, CE in CE_fishnet.iterrows():
# Get the index for all the subbasin features
idx, = np.where(CP_fishnet["CEid"] == CE["CEid"])
# Get all features inside the CE
CE_features = CP_fishnet.iloc[idx]
# Check if the CE is empty
if CE_features.empty or CE_features["Dissolve"].max() == 0:
continue
tracked_cps = []
CE_features = find_neighbors(CE_features, "CPid")
for i, CP in CE_features.iterrows():
# Track the already processed CPs
# Check if the CP was already processed
if CP["CPid"] in tracked_cps:
continue
# Check if need to dissolve
if CP["Dissolve"]:
# Get the neighbors and the condition
neighbors = CE_features.loc[i, "NEIGHBORS"]
assert neighbors, (
f"CP dissolve failed: CPid={CP['CPid']} in CEid={CE['CEid']} has no neighbors. "
"Suggested fix: rerun with a coarser GRID_DIMENSION. "
"If the issue persists, report this dataset/dimension to the library maintainers."
)
candidates = CE_features.loc[neighbors, "maxFAC"]
assert (not candidates.empty) and candidates.notna().any(), (
f"CP dissolve failed: CPid={CP['CPid']} in CEid={CE['CEid']} has no valid maxFAC "
"among neighbors. Suggested fix: rerun with a coarser GRID_DIMENSION. "
"If the issue persists, report this dataset/dimension to the library maintainers."
)
idx_max_area = candidates.idxmax()
CP_fishnet.loc[CP["CPid"], "CPid"] = idx_max_area
tracked_cps.append(CP["CPid"])
continue
CP_fishnet = CP_fishnet.dissolve(by="CPid", aggfunc="max")
# Re-do the dissolve labeling
# Save file
CP_fishnet.index = CP_fishnet.index.rename("index")
CP_fishnet.index = range(1, len(CP_fishnet)+1)
CP_fishnet["CPid"] = CP_fishnet.index.values
CP_fishnet["Area"] = CP_fishnet.area
# Still find some pixels after dissolving them
mask_pixels = CP_fishnet["Area"] <= 3*abs(pixel_size_area)
CP_fishnet["Dissolve"] = 0
CP_fishnet.loc[mask_pixels, "Dissolve"] = 1
df = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
CP_fishnet = CP_fishnet[~mask_pixels]
df = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
CP_fishnet.loc[:, "CPid"] = range(1, len(CP_fishnet)+1)
CP_fishnet.index = range(1, len(CP_fishnet)+1)
# df = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
return CP_fishnet
[docs]
def force_4CP(CE_fishnet: gpd.GeoDataFrame,
CP_fishnet: gpd.GeoDataFrame,
area_th: float) -> gpd.GeoDataFrame:
"""_summary_
Args:
CE_fishnet (gpd.GeoDataFrame): _description_
CP_fishnet (gpd.GeoDataFrame): _description_
area_th (float): _description_
Returns:
gpd.GeoDataFrame: _description_
"""
# Explode the fishnet
CP_fishnet = CP_fishnet.explode()
CP_fishnet.index = CP_fishnet["CPid"].values
CE_area = CE_fishnet.area.max()
mask_CP = CP_fishnet["Area"] < area_th*CE_area
CP_fishnet.loc[mask_CP, "Dissolve"] = 1
CP_fishnet.index = range(1, len(CP_fishnet)+1)
CP_fishnet["CPid"] = CP_fishnet.index.values
for _, CE in CE_fishnet.iterrows():
# Get the index for all the subbasin features
idx, = np.where(CP_fishnet["CEid"] == CE["CEid"])
# Get all features inside the CE
CE_features = CP_fishnet.iloc[idx]
# Check if the CE is empty
if CE_features.empty:
continue
# Get the CPid
while len(CE_features) > 4:
# Find neighbors
CE_features = find_neighbors(CE_features, "CPid")
# CE_features.loc[:, "Area"] = CE_features.area
columns = CE_features.columns.tolist()
# Find the smallest CP
idx_small, = np.where(
CE_features.loc[:, "Area"].values == np.amin(CE_features["Area"]))
# Find the neighbor with the highest flow accumulation
neighbors = CE_features.iloc[idx_small,
columns.index("NEIGHBORS")].values[0]
# Check that there are no more neighbours
# if not neighbors:
# continue
maxFAC = np.amax(CE_features.loc[neighbors, "maxFAC"])
idx_maxFAC, = np.where(
CE_features.loc[neighbors, "maxFAC"] == maxFAC)
# Get the cpid of the replaced value
idx_replaced = CE_features.iloc[idx_small, columns.index("CPid")]
# Replace the value in the main dataframe
CP_fishnet.loc[idx_replaced, "CPid"] = neighbors[idx_maxFAC[0]]
# Now dissolve all CE_features and rearange the things
CP_fishnet = CP_fishnet.dissolve(by="CPid", aggfunc="max")
CP_fishnet.index = CP_fishnet.index.rename("index")
CP_fishnet.index = range(1, len(CP_fishnet)+1)
CP_fishnet["CPid"] = CP_fishnet.index.values
CP_fishnet["Area"] = CP_fishnet.area
# Replace the CPid value into the small CP
idx, = np.where(CP_fishnet["CEid"] == CE["CEid"])
CE_features = CP_fishnet.iloc[idx]
# df = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
# CE_features.iloc[idx_small, columns.index(
# "CPid")] = neighbors[idx_maxFAC[0]]
# Save file
# CP_fishnet = CP_fishnet.dissolve(by="CPid", aggfunc="max")
# CP_fishnet.index = CP_fishnet.index.rename("index")
# CP_fishnet.loc[:, "CPid"] = CP_fishnet.index.values
# CP_fishnet.at[:, "Area"] = CP_fishnet.area
# Given that this is the final dissolving step, here I will check for
# duplicate geometries and drop them all if that's the case
CP_fishnet['geometry_str'] = CP_fishnet['geometry'].apply(str)
CP_fishnet = CP_fishnet.explode()
CP_fishnet['normalized_geometry'] = CP_fishnet['geometry'].apply(
lambda geom: str(Polygon(geom.exterior.coords)))
CP_fishnet = CP_fishnet.dissolve(by='normalized_geometry')
CP_fishnet.index = CP_fishnet.index.rename("index")
CP_fishnet.index = range(1, len(CP_fishnet)+1)
CP_fishnet["CPid"] = CP_fishnet.index.values
CP_fishnet = CP_fishnet.drop(columns=["geometry_str"])
# df1 = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
# indexes_to_skip,processed_indexes = u.drop_duplicated_geometries(CP_fishnet["geometry"])
return CP_fishnet
# def compute_flow_path(flow_dir: np.ndarray,
# flow_acc: np.ndarray,
# flow_th: float) -> np.ndarray:
# # Mask the flow accumulation based on the threshold
# flow_accu_mask = (flow_acc > flow_th)
# rows, cols = np.indices(flow_dir.shape)
# stream_network = np.zeros_like(flow_dir)
# counter = 1
# for row in range(flow_dir.shape[0]):
# for col in range(flow_dir.shape[1]):
# if flow_accu_mask[row, col]:
# next_row = row
# next_col = col
# # counter = 1
# while True:
# direction = flow_accu_mask[next_row, next_col]
# if direction == 0:
# break
# next_row += [-1, -1, 0, 1, 1, 1, 0, -1][direction - 1]
# next_col += [0, 1, 1, 1, 0, -1, -1, -1][direction - 1]
# if stream_network[next_row, next_col] > 0:
# break
# stream_network[row, col] = counter
# # counter +=1
# return stream_network
# Compute the mean altitude within each CE and CP
[docs]
def mean_altitudes(CE_fishnet: gpd.GeoDataFrame,
CP_fishnet: gpd.GeoDataFrame,
DEM: str):
"""_summary_
Args:
CE_fishnet (gpd.GeoDataFrame): _description_
CP_fishnet (gpd.GeoDataFrame): _description_
DEM (str): _description_
Returns:
_type_: _description_
"""
# Add altitude column to each dataset
CE_fishnet = CE_fishnet.reindex(
columns=CE_fishnet.columns.tolist() + ['altitude'])
CE_fishnet["altitude"] = None
CP_fishnet = CP_fishnet.reindex(
columns=CP_fishnet.columns.tolist() + ['altitude'])
CP_fishnet["altitude"] = None
# Compute the zonal statistics
stats_CE = rs.zonal_stats(CE_fishnet, DEM, stats=['mean'])
CE_fishnet.loc[:, "altitude"] = [s['mean'] for s in stats_CE]
stats_CP = rs.zonal_stats(CP_fishnet, DEM, stats=['mean'])
CP_fishnet.loc[:, "altitude"] = [s['mean'] for s in stats_CP]
return CP_fishnet, CE_fishnet
[docs]
def mean_canopy(CE_fishnet: gpd.GeoDataFrame,
CP_fishnet: gpd.GeoDataFrame,
Canopy_name: str):
"""_summary_
Args:
CE_fishnet (gpd.GeoDataFrame): _description_
CP_fishnet (gpd.GeoDataFrame): _description_
Canopy (str): _description_
# Returns:
# _type_: _description_
# """
# Add aspect column to each dataset
CE_fishnet = CE_fishnet.reindex(
columns=CE_fishnet.columns.tolist() + ['Canopy'])
CE_fishnet["Canopy"] = None
CP_fishnet = CP_fishnet.reindex(
columns=CP_fishnet.columns.tolist() + ['Canopy'])
CP_fishnet["Canopy"] = None
# Compute the zonal statistics
stats_CE = rs.zonal_stats(CE_fishnet, Canopy_name, stats=['mean'])
CE_fishnet.loc[:, "Canopy"] = [s['mean'] for s in stats_CE]
stats_CP = rs.zonal_stats(CP_fishnet, Canopy_name, stats=['mean'])
CP_fishnet.loc[:, "Canopy"] = [s['mean'] for s in stats_CP]
return CP_fishnet, CE_fishnet
# def main_path(flow_dir: np.ndarray,
# flow_acc: np.ndarray,
# flow_th: float) -> np.ndarray:
# # Create a mask to extract only the cells with flow accumulation greater than a certain threshold
# flow_accumulation_mask = (flow_acc > flow_th)
# # Find the outlet cell of the catchment (i.e., the cell with the minimum flow accumulation)
# outlet_row, outlet_col = np.unravel_index(
# np.argmin(flow_acc), flow_acc.shape)
# # Create an empty list to store the cells in the main stream
# main_stream_cells = []
# # Trace the main stream from the outlet cell to the start of the main stream
# next_row, next_col = outlet_row, outlet_col
# while True:
# main_stream_cells.append((next_row, next_col))
# direction = flow_dir[next_row, next_col]
# if direction == 0: # Check if the current cell is a sink cell and exit the loop if it is
# break
# # Calculate the row and column indices of the next cell in the main stream
# next_row += [-1, -1, 0, 1, 1, 1, 0, -1][direction - 1]
# next_col += [0, 1, 1, 1, 0, -1, -1, -1][direction - 1]
# # Check if the next row or column index is out of bounds and exit the loop if it is
# if next_row < 0 or next_row >= flow_dir.shape[0] or next_col < 0 or next_col >= flow_dir.shape[1]:
# break
# # Check if the current cell has more than one upstream cell and exit the loop if it does
# upstream_count = 0
# for i, j in [(0, 1), (1, 1), (1, 0), (1, -1), (0, -1), (-1, -1), (-1, 0), (-1, 1)]:
# row = next_row + i
# col = next_col + j
# if row >= 0 and row < flow_dir.shape[0] and col >= 0 and col < flow_dir.shape[1]:
# if flow_dir[row, col] == (8 - direction):
# upstream_count += 1
# if upstream_count > 1:
# break
# if upstream_count > 1:
# break
# # Create a new raster to represent the main stream cells
# main_stream_raster = np.zeros_like(flow_dir)
# for cell in main_stream_cells:
# main_stream_raster[cell[0], cell[1]] = 1
[docs]
def routing_table(CP_fishnet: gpd.GeoDataFrame,
CE_fishnet: gpd.GeoDataFrame,
FAC: str,
CP_array: np.ndarray,
CE_array: np.ndarray,
ncols: float,
nrows: float) -> pd.DataFrame:
"""_summary_
Args:
CP_fishnet (gpd.GeoDataFrame): _description_
CE_fishnet (gpd.GeoDataFrame): _description_
FAC (str): _description_
CP_array (np.ndarray): _description_
CE_array (np.ndarray): _description_
ncols (float): _description_
nrows (float): _description_
Returns:
pd.DataFrame: _description_
"""
# Renaming index to make sure it starts from 1
CP_fishnet["CPid"] = range(1, len(CP_fishnet)+1)
CP_fishnet = CP_fishnet.sort_values(by=["CPid"])
CP_fishnet.index = CP_fishnet["CPid"].values
# Create dataframe to store the routing data
routing = pd.DataFrame(columns=["CPid", "inCPid", "oldCP", "FAC"],
index=CP_fishnet.index)
# Assign values to the routing dataframe
# Compute the boundaries for each square within the CE and CP fishnets
CE_fishnet = pd.concat([CE_fishnet,
CE_fishnet["geometry"].bounds], axis=1)
routing["CPid"] = CP_fishnet["CPid"].values
# Get the FAC array
FAC_dataset = gdal.Open(FAC, gdal.GA_ReadOnly)
band = FAC_dataset.GetRasterBand(1)
FAC_array = band.ReadAsArray()
FAC_array[FAC_array < 0] = 0
# Get the CE square indexes
CE_fishnet = convert_coords_to_index(CE_fishnet, FAC_dataset)
CE_columns = CE_fishnet.columns.tolist()
# df1 = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
# df2 = pd.DataFrame(CE_fishnet.drop(columns='geometry'))
# Loop into each CE
for index, feat in CP_fishnet.iterrows():
# Find the rows and cols where the CP value is stored
# Find the index which correspond to the CEid in the main dataframe
idx_CE, = np.where(CE_fishnet["CEid"] == feat["CEid"])
# Maybe the CE does not exist?
if len(idx_CE) == 0:
continue
# Slice the CE array using the corners into the main CE dataset
row_min = CE_fishnet.iloc[idx_CE[0], CE_columns.index("row_min")]
row_max = CE_fishnet.iloc[idx_CE[0], CE_columns.index("row_max")]
col_min = CE_fishnet.iloc[idx_CE[0], CE_columns.index("col_min")]
col_max = CE_fishnet.iloc[idx_CE[0], CE_columns.index("col_max")]
CE = CE_array[row_min:row_max,
col_min:col_max]
# Check the size of the sliced subset.
# Sometimes the sliced subset is smaller than the actual subset size
# because the indexes are derived from float values and some rounding
# artifacts can affect the precision of the index
if CE.shape[0] != nrows:
# row_min = row_min
row_max = row_min + nrows
CE = CE_array[row_min:row_max,
col_min:col_max]
elif CE.shape[1] != ncols:
# col_min = col_min
col_max = col_min + ncols
CE = CE_array[row_min:row_max,
col_min:col_max]
# Get the unique rows and cols that exist in the sliced array
unique_rows, _ = np.unique(CE, return_counts=True, axis=0)
if unique_rows.shape[0] > 1:
diff = np.diff(CE, axis=0)
idx_diff_zero, _ = np.where(diff != 0)
# Compare the value with respect to the number of cols
if idx_diff_zero[0] > 0:
# if counts_rows[0] < counts_rows[1]:
val = -1
else:
val = 1
row_min = row_min + val
row_max = row_min + nrows
# row_max = row_min-1+nrows
CE = CE_array[row_min:row_max,
col_min:col_max]
unique_cols, _ = np.unique(CE, return_counts=True, axis=1)
if unique_cols.shape[1] > 1:
# Find the differences along the column
diff = np.diff(CE, axis=1)
_, idy_diff_zero = np.where(diff != 0)
# Compare the value with respect to the number of cols
if idy_diff_zero[0] > 0:
# if counts_cols[0] > counts_cols[1]:
val = -1
else:
val = 1
col_min = col_min + val
col_max = col_min + ncols
# col_max = col_min-1+ncols
CE = CE_array[row_min:row_max,
col_min:col_max]
# Find the CP values whitin the CE subset
CE = CE_array[row_min-1:row_max+1,
col_min-1:col_max+1]
CP = CP_array[row_min-1:row_max+1,
col_min-1:col_max+1]
subFAC = FAC_array[row_min-1:row_max+1,
col_min-1:col_max+1]
# Mask the FAC based on this CP
mask_CP = (CP == feat['CPid']).astype('uint8')
# Apply mask
# Get the outlet indexes for the CP
outlet_row, outlet_col = np.unravel_index(
np.argmax(subFAC*mask_CP), subFAC.shape)
if isinstance(outlet_row, np.ndarray):
sys.exit("There is more than one outlet point")
# Create mask for subFAC
mask_FAC = np.zeros(subFAC.shape).astype("uint8")
mask_FAC[outlet_row - 1:outlet_row + 2,
outlet_col - 1:outlet_col + 2] = 1
# Get the FAC values on the mask
# Find location of next pixel into which outlet flows
inlet_row, inlet_col = np.unravel_index(
np.argmax(subFAC*mask_FAC), subFAC.shape)
# Add the CP where it discharges
routing.loc[index, "inCPid"] = CP[inlet_row, inlet_col]
routing.loc[index, "FAC"] = subFAC[outlet_row, outlet_col]
if routing.loc[index, "CPid"] == routing.loc[index, "inCPid"]:
outlet_point = routing.loc[index, "CPid"]
if routing.loc[index, "inCPid"] == 0:
continue
return routing
[docs]
def get_rtable(CP_fishnet: gpd.GeoDataFrame,
routing: pd.DataFrame):
"""_summary_
Args:
CP_fishnet (gpd.GeoDataFrame): _description_
routing (pd.DataFrame): _description_
Returns:
_type_: _description_
"""
# *There are probably cases where a given CP drains into a non existence CP.
# *So, here we make sure that we drop all the CP where this happens into the main data frame.
# *This is because the CPs in the border can be so tiny that they do not account for the
# *area threshold that we defined.
# Create the rouring table here
rtable = pd.DataFrame(columns=["oldCPid", "newCPid",
"upstreamCPs", "oldupstreams"],
index=routing.index)
routing["diff"] = routing["CPid"] - routing["inCPid"]
routing["FAC"] = pd.to_numeric(routing["FAC"])
# Find where the difference is zero
# This will help us to find the outlet CP
idx_outlet = routing.index[routing["diff"] == 0].tolist()
if len(idx_outlet) > 1:
assert False, "There are more than 1 outlet point."
elif len(idx_outlet) == 0:
idx_outlet = routing["FAC"].idxmax()
# Check if it drains into a non-existent CP
if routing.loc[idx_outlet, "inCPid"] == 0:
# All good and can continue
routing.loc[idx_outlet, "inCPid"] = [idx_outlet]
# Set the values as listed value
idx_outlet = [idx_outlet]
else:
assert False, "The outlet point was not found"
# assert len(idx_outlet) == 1, "There are more than 1 outlet point."
# idx_outlet = routing.mask(routing["diff"] != 0)
# Add this value in the rtable as the first value
rtable.loc[1, "oldCPid"] = routing.loc[idx_outlet, "CPid"].values[0]
rtable.loc[1, "newCPid"] = 1
# Get column list
columns = rtable.columns.tolist()
# Set to zero the already tracked CP
routing.loc[idx_outlet, "inCPid"] = -99999
new_id_counter = 1
for i, _ in routing.iterrows():
# Find the upstream CPs
index_routing = rtable.loc[i, "oldCPid"] == routing["inCPid"]
idx_outlet = routing.index[index_routing]
if not idx_outlet.empty:
# Find the upstream CPs
upstreams_cps = routing.loc[idx_outlet, "CPid"].values
# Rename the upstream CPs
upstreams_cps_newid = list(
range(new_id_counter+1, new_id_counter+len(idx_outlet)+1))
# Append the CPs vertically. +1 to step out the current index
rtable.iloc[new_id_counter:new_id_counter +
len(idx_outlet), columns.index("newCPid")] = upstreams_cps_newid
rtable.iloc[new_id_counter:new_id_counter +
len(idx_outlet), columns.index("oldCPid")] = upstreams_cps
rtable.iloc[i-1,
columns.index("upstreamCPs")] = upstreams_cps_newid
rtable.iloc[i-1, columns.index("oldupstreams")] = upstreams_cps
new_id_counter += len(idx_outlet)
routing.loc[idx_outlet, "inCPid"] = -99999
# Get the CPs where the its downstream is zero
idx_zero_inCP = routing["inCPid"] != -99999
index_zero_flow_in = routing.index[idx_zero_inCP]
# Check if there is NAN values in the rtable
idx_nan_inCP = rtable["oldCPid"].isnull()
index_nan = rtable.index[idx_nan_inCP]
if len(index_nan) > 0:
# warnings.warn(
# f"The CPs {index_nan} drop water into non existent CP \n")
# warnings.warn(
# "This might no be a problem. But it may be a source of error in the next steps \n")
# assert False, "Error found in the definition of routing function"
# Fill this nan values in the newCPid with a flag to keep tracking it in the next steps
# CP_fishnet.loc[index_zero_flow_in, "CPid"] = -9999
# rtable.loc[index_nan, "newCPid"] = -9999
rtable.loc[index_nan, "newCPid"] = index_nan
rtable.loc[index_nan, "oldCPid"] = index_zero_flow_in
rtable['newCPid'] = rtable['newCPid'].astype('int')
rtable['oldCPid'] = rtable['oldCPid'].astype('int')
# idx = np.where()
# rtable.index = CP_fishnet.index
# CP_fishnet = CP_fishnet.join(rtable[["oldCPid"]])
# CP_fishnet = CP_fishnet.dropna(subset=['oldCPid'])
# CP_fishnet = CP_fishnet.sort_values(by=["CPid"])
# rtable = rtable.dropna(subset=['oldCPid'])
# idx_nan_inCP = rtable["oldCPid"].isnull()
# index_drop = rtable.index[idx_nan_inCP]
# idx_fishnet_drop, = np.where(CP_fishnet["CPid"] == index_drop)
# index_fishnet_drop = CP_fishnet.index[idx_fishnet_drop]
# CP_fishnet = CP_fishnet.drop(index=rtable[""])
# rtable.index = range(1, len(rtable)+1)
# CP_fishnet.index = rtable.index
# CP_fishnet["CPid"] = CP_fishnet.index
# CP_fishnet = CP_fishnet.drop(columns=["minx", "miny",
# "maxx", "maxy",
# "col_min", "row_min",
# "col_max", "row_max"])
# rtable = rtable.sort_values(by=["newCPid"])
return rtable, CP_fishnet
[docs]
def renumber_fishnets(CP_fishnet: gpd.GeoDataFrame,
CE_fishnet: gpd.GeoDataFrame,
rtable: pd.DataFrame,
routing: pd.DataFrame) -> gpd.GeoDataFrame:
"""_summary_
Args:
CP_fishnet (gpd.GeoDataFrame): _description_
CE_fishnet (gpd.GeoDataFrame): _description_
rtable (pd.DataFrame): _description_
routing (pd.DataFrame): _description_
Returns:
gpd.GeoDataFrame: _description_
"""
# Renumbering the CPs
CP_fishnet["newCPid"] = 0
CP_fishnet["newCEid"] = 0
CE_fishnet["newCEid"] = 0
CE_track_list = []
CP_fishnet = CP_fishnet.sort_values(by=["CPid"])
CP_fishnet.index = CP_fishnet["CPid"].values
rtable = rtable.sort_values(by=["oldCPid"])
df = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
rtable.index = rtable["oldCPid"].values
# CP_fishnet["CPid"] = range(1, len(CP_fishnet)+1)
# CP_fishnet["newCPid"] = pd.to_numeric(,dtype_backend="Int64")
# CP_fishnet["newCPid"] = rtable["newCPid"].values
# get the columns to use them as indexers
# columns_rtable = rtable.columns.tolist()
columns_CPfishnet = CP_fishnet.columns.tolist()
columns_CEfishnet = CE_fishnet.columns.tolist()
idx_zero_inCP = routing["inCPid"] == 0
index_zero_flow_in = routing.index[idx_zero_inCP]
# Iterate over the dataframe to rename CP fishnet
for i, _ in CP_fishnet.iterrows():
# Check if the current CP drops into a non existent
# if i in index_zero_flow_in:
# CP_fishnet.loc[idx_old, "newCPid"] = 0
# continue
# Find the index of the old CPid in the main dataframe
index_routing = CP_fishnet["CPid"] == rtable.loc[i, "oldCPid"]
idx_old = CP_fishnet.index[index_routing]
# idx_old, = np.where(
# CP_fishnet["CPid"].values == rtable.loc[i, "oldCPid"])
# Replace the value with the new CPid value
CP_fishnet.loc[idx_old, "newCPid"] = rtable.loc[i, "newCPid"]
# Change the index in the main dataframe
CP_fishnet.index = CP_fishnet["newCPid"].values
# df1 = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
# Sort values
CP_fishnet = CP_fishnet.sort_values(by=["newCPid"])
CP_fishnet["newCEid"] = np.array(CP_fishnet["newCEid"]).astype(int)
# Renumbering the CEs. This is possible since the values are
# already sorted in the main dataframe based on the new CPids
CE_id = 0
for i, CP in CP_fishnet.iterrows():
# Check if the CE has already been tracked
if CP["CEid"] in CE_track_list:
continue
# Track the CE in the CP fishnet
CE_track_list.append(CP["CEid"])
# Find the value in the dataset
idx_new, = np.where(CE_fishnet["CEid"] == CE_track_list[CE_id])
# Add the value in the column
CE_fishnet.iloc[idx_new, columns_CEfishnet.index("newCEid")] = CE_id+1
# Find the value in the CPfishnet dataset
idx_new2, = np.where(CP_fishnet["CEid"] == CE_track_list[CE_id])
CP_fishnet.iloc[idx_new2, columns_CPfishnet.index("newCEid")] = CE_id+1
CE_id += 1
CE_fishnet = CE_fishnet.sort_values(by=["newCEid"])
CE_fishnet.index = CE_fishnet["newCEid"].values
# Find where the CE is zero
idx_drop_CE, = np.where(CE_fishnet["newCEid"] == 0)
# Find index
indexes_drop = CE_fishnet.index[idx_drop_CE]
# Drop by index
CE_fishnet = CE_fishnet.drop(index=indexes_drop)
# # Re-do the indexing to fix it
# CP_fishnet = CP_fishnet.sort_values(by=["newCPid"])
# CP_fishnet["newCPid"] = range(1, len(CP_fishnet)+1)
# CP_fishnet.index = range(1, len(CP_fishnet)+1)
# df1 = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
return CP_fishnet, CE_fishnet
[docs]
def get_downstream_CP(rtable: pd.DataFrame) -> pd.DataFrame:
"""_summary_
Args:
rtable (pd.DataFrame): _description_
Returns:
pd.DataFrame: _description_
"""
# Create an empty table to store the values
rtable.index = rtable["newCPid"].values
downstreamCPs = pd.DataFrame(columns=["downstreamCPs"],
index=rtable.index)
# Convert lists into an array.
# Get the lenght of each list to get the maximum value
lists_len = [len(i)
for i in rtable["upstreamCPs"].values if isinstance(i, list)]
assert lists_len, (
"Downstream routing failed: no upstreamCPs were identified in rtable. "
"Suggested fix: rerun with a coarser GRID_DIMENSION. "
"If the issue persists, report this dataset/dimension to the library maintainers."
)
# Create a zero array to store the values
list_upstream_array = np.zeros(
[len(rtable), max(lists_len)]).astype("uint16")
# Fill up the array using the lists in the main dataframe
list_to_upstream_append = []
for i, _ in rtable.iterrows():
if isinstance(rtable.loc[i, "upstreamCPs"], list):
list_up = rtable.loc[i, "upstreamCPs"]
list_upstream_array[i-1, 0:len(list_up)] = list_up
list_to_upstream_append.append(
list_upstream_array[i-1, :].tolist())
else:
list_to_upstream_append.append(
list_upstream_array[i-1, :].tolist())
# rtable.loc[i, "upstreamCPs"] = list_upstream_array[i-1, :].tolist()
rtable["upstreamCPs"] = list_to_upstream_append
# Now identify the Downstream CPs
for i, table in rtable.iterrows():
# Find the index of the CP in wwhich the current CP drains
idx_down, _ = np.where(list_upstream_array == table["newCPid"])
# Check if the list is empty
if len(idx_down) == 0:
downstreamCPs.loc[i, "downstreamCPs"] = 0
else:
downstreamCPs.loc[i,
"downstreamCPs"] = rtable.loc[idx_down[0]+1, "newCPid"]
# Concat to the rtable
rtable = pd.concat([rtable, downstreamCPs], axis=1)
return rtable
[docs]
def outlet_routes(rtable: pd.DataFrame) -> pd.DataFrame:
"""_summary_
Args:
rtable (pd.DataFrame): _description_
Returns:
pd.DataFrame: _description_
"""
# Create the array to store the lists
allroute_lists = pd.DataFrame(columns=["outletRoutes"],
index=rtable.index)
# Start looping the array
columns_rtable = rtable.columns.tolist()
for i, _ in rtable.iterrows():
route_list = []
count_ups = 0
# Set the upstream CP to
up = rtable.loc[i, "newCPid"]
# Set the number of the upstream CPs to 1
# nu = up
while up > 1:
# append Nth CP to list
route_list.append(up)
up = rtable.loc[up, "downstreamCPs"]
count_ups += 1
route_list = np.unique(route_list)
route_list = route_list.tolist()
if count_ups > 10*len(rtable):
# TODO: Check what would rise this issue. Although this does not affect the correct functioning of the process
# raise ValueError
break
# go down from Nth position until end
# up = rtable.iloc[up-1, "downstreamCPs"]
# up = rtable.iloc[up-1, 4]
# nu -= 1
route_list.append(1)
allroute_lists.loc[i, "outletRoutes"] = route_list
# Convert it into a numpy array
lists_len = [
len(i) for i in allroute_lists["outletRoutes"].values if isinstance(i, list)]
# Create a zero array to store the values
allroute_lists_array = np.zeros([len(allroute_lists), max(lists_len)])
# Fill up the array using the lists in the main dataframe
for i in range(len(allroute_lists)):
if isinstance(allroute_lists.loc[i+1, "outletRoutes"], list):
list_up = allroute_lists.loc[i+1, "outletRoutes"]
allroute_lists_array[i, 0:len(list_up)] = list_up
return allroute_lists_array
[docs]
def cumulative_areas(CP_fishnet: gpd.GeoDataFrame,
CE_fishnet: gpd.GeoDataFrame,
out_routes: np.ndarray) -> pd.DataFrame:
"""_summary_
Args:
CP_fishnet (gpd.GeoDataFrame): _description_
CE_fishnet (gpd.GeoDataFrame): _description_
out_routes (np.ndarray): _description_
Returns:
pd.DataFrame: _description_
"""
# Update areas of the CP and get the CE area
CE_area = CE_fishnet.area[1]
CP_fishnet.indeẋ = range(1, len(CP_fishnet)+1)
df1 = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
CP_fishnet["Area"] = CP_fishnet.area
# Get the percentage of that area
CP_fishnet["pctSurface"] = (CP_fishnet["Area"]/CE_area)*100
# Cumulative areas
CP_fishnet["cumulPctSurf"] = 0.0
upstreamCPs = []
for i in range(len(out_routes)):
if i == 0:
upstreamCPs.append(0)
continue
# Create a copy of the main dataframe
temp_df = out_routes.copy()
# find the row of the current CP
idx_row, _ = np.where(temp_df == i)
# Mask the downstreams values
temp_df = temp_df[idx_row, :]
mask_df = temp_df < i
# Drop the downstream values
temp_df = temp_df[~mask_df]
# Get the unique values
temp_df = np.unique(temp_df).astype("uint16")
# party = CP_fishnet.loc[temp_df, "pctSurface"]
# sum = CP_fishnet.loc[temp_df, "pctSurface"].sum()
upstreamCPs.append(temp_df.tolist())
CP_fishnet.loc[i, "cumulPctSurf"] = CP_fishnet.loc[temp_df,
"pctSurface"].sum()
# Compute cumul areas in km2
CP_fishnet.loc[i, "cumulArea"] = CP_fishnet.loc[temp_df,
"Area"].sum()*1e-6
# CP_fishnet.loc[i, "cumulPctSurf"] = CP_fishnet.iloc[temp_df, 10].sum()
# Now, fix the last upstream CP with the area
CP_fishnet.loc[i+1, "cumulPctSurf"] = CP_fishnet.loc[i+1,
"pctSurface"]
# Compute cumul areas in km2
CP_fishnet.loc[i+1, "cumulArea"] = CP_fishnet.loc[i+1,
"Area"]*1e-6
# Convert it into a numpy array
lists_len = [
len(i) for i in upstreamCPs if isinstance(i, list)]
# Create a zero array to store the values
upstreamcps_array = np.zeros([len(upstreamCPs), max(lists_len)])
# Fill up the array using the lists in the main dataframe
for i in range(len(upstreamCPs)):
if isinstance(upstreamCPs[i], list):
list_up = upstreamCPs[i]
upstreamcps_array[i, 0:len(list_up)] = list_up
upstreamcps_array = np.delete(upstreamcps_array, 0, 0)
upstreamcps_array = np.pad(upstreamcps_array, ((0, 1), (0, 0)))
# Assign the value to the last entry
upstreamcps_array[-1, 0] = len(upstreamCPs)
# return allroute_lists_array
return CP_fishnet, upstreamcps_array