Source code for pycequeau.physiographic.CPfishnet

import itertools
import sys
from math import ceil
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from osgeo import gdal
import geopandas as gpd
import rasterstats as rs
import warnings
# from src.core import utils as u



[docs]
def convert_coords_to_index(df: gpd.GeoDataFrame,
                            dataset: gdal.Dataset) -> gpd.GeoDataFrame:
    """_summary_

    Args:
        df (gpd.GeoDataFrame): _description_
        dataset (gdal.Dataset): _description_

    Returns:
        gpd.GeoDataFrame: _description_
    """
    # band = dataset.GetRasterBand(1)
    # cols = dataset.RasterXSize
    # rows = dataset.RasterYSize
    df2 = pd.concat([df, pd.DataFrame(columns=["col_min", "row_min", "col_max", "row_max"],
                                      index=df.index.values)], axis=1)
    transform = dataset.GetGeoTransform()
    xOrigin = transform[0]
    yOrigin = transform[3]
    pixelWidth = transform[1]
    pixelHeight = -transform[5]
    for i in range(len(df)):
        df2.loc[i, "col_min"] = ceil((df["minx"].iloc[i] - xOrigin)/pixelWidth)
        df2.loc[i, "row_max"] = ceil(
            (yOrigin - df["miny"].iloc[i])/pixelHeight)
        df2.loc[i, "col_max"] = ceil((df["maxx"].iloc[i]-xOrigin)/pixelWidth)
        df2.loc[i, "row_min"] = ceil(
            (yOrigin - df["maxy"].iloc[i])/pixelHeight)
        # px_1 = int((df["minx"].iloc[i] - xOrigin)/pixelWidth)
        # py_1 = int((yOrigin - df["miny"].iloc[i])/pixelHeight)
        # px_2 = int((df["maxx"].iloc[i]-xOrigin)/pixelWidth)
        # py_2 = int((yOrigin - df["maxy"].iloc[i])/pixelHeight)

        # with rio.open(file) as dataset:
        #     py1, px1 = dataset.index(df["minx"].iloc[i], df["miny"].iloc[i])
        #     py2, px2 = dataset.index(df["maxx"].iloc[i], df["maxy"].iloc[i])
        #     a = 1
        # print(px1-px_1)
        # print(px2-px_2)
        # print(py1-py_1)
        # print(py2-py_2)
    return df2




[docs]
def find_neighbors(gdf: gpd.GeoDataFrame,
                   id: str) -> gpd.GeoDataFrame:
    """_summary_

    Args:
        gdf (gpd.GeoDataFrame): _description_
        id (str): _description_

    Returns:
        gpd.GeoDataFrame: _description_
    """
    # https://gis.stackexchange.com/questions/281652/finding-all-neighbors-using-geopandas
    # Drop the column if it exist
    if 'NEIGHBORS' in gdf.columns:
        gdf = gdf.drop(columns=["NEIGHBORS", "KEEP"])
    # add NEIGHBORS column
    gdf = gdf.reindex(columns=gdf.columns.tolist() + ['NEIGHBORS', 'KEEP'])
    gdf["NEIGHBORS"] = ''
    gdf["KEEP"] = ''
    columns = gdf.columns.tolist()
    count = 0
    for index, CP in gdf.iterrows():
        # get 'not disjoint' countries
        neighbors = gdf[~gdf.geometry.disjoint(CP.geometry)][id].tolist()
        keep = gdf[~gdf.geometry.disjoint(CP.geometry)].Dissolve.tolist()
        # remove own name of the country from the list
        keep = [bool(numb) for numb, name in zip(
            keep, neighbors) if CP[id] != name]
        neighbors = [name for name in neighbors if CP[id] != name]
        # add names of neighbors as NEIGHBORS value
        # Catch an exception here
        # try:
        #     gdf.loc[index, 'NEIGHBORS'] = neighbors
        #     gdf.loc[index, 'KEEP'] = keep
        # except ValueError:
        if isinstance(neighbors, list):
            gdf.at[index, "NEIGHBORS"] = neighbors
            gdf.at[index, "KEEP"] = keep
            # j = columns.index("KEEP");
            
            # gdf["NEIGHBORS"].iloc[count] = neighbors
            # gdf['KEEP'].iloc[count] = keep
        else:
            gdf.at[index, "NEIGHBORS"] = list([int(neighbors)])
            gdf.at[index, "KEEP"] = list([int(keep)])
            # gdf["NEIGHBORS"].iloc[count] = list([int(neighbors)])
            # gdf['KEEP'].iloc[count] = list([int(keep)])
        count += 1
    return gdf




[docs]
def identify_small_CPs(CE_fishnet: gpd.GeoDataFrame,
                       CP_fishnet: gpd.GeoDataFrame,
                       thereshold: float):
    """_summary_

    Args:
        CE_fishnet (gpd.GeoDataFrame): _description_
        CP_fishnet (gpd.GeoDataFrame): _description_
        thereshold (float): _description_

    Returns:
        _type_: _description_
    """
    # Get the area of the CE grid
    CE_area = CE_fishnet.area[0]
    # Get area for each CP feature
    CP_fishnet = CP_fishnet.dropna(subset=['CEid'])
    CP_fishnet = CP_fishnet.explode()
    # CP_fishnet["CPid"] = range(1,len(CP_fishnet)+1)
    CP_fishnet["Area"] = CP_fishnet.area
    CP_fishnet = CP_fishnet.dropna(subset=['Area'])
    # Mask to drop values with extremly tiny areas
    mask_area = CP_fishnet["Area"] > 1.0
    CP_fishnet = CP_fishnet[mask_area]
    CP_fishnet["CPid"] = range(1, len(CP_fishnet)+1)
    mask_CP = CP_fishnet["Area"] < thereshold*CE_area
    CP_fishnet["Dissolve"] = 0
    CP_fishnet.loc[mask_CP, "Dissolve"] = 1
    # Get the CP ousite the subbasin
    # This need to be changed for anohter
    mask_SUB = np.isnan(CP_fishnet["CATid"])
    index_drop = CP_fishnet.index[mask_SUB]
    # Drop this value from the main dataframe
    CP_fishnet = CP_fishnet.drop(index=index_drop)
    # CP_fishnet.at[mask_SUB, "Dissolve"] = 0
    CP_fishnet.index = CP_fishnet["CPid"].values
    # CP_fishnet["CPid"] = CP_fishnet.index
    CP_fishnet.index = CP_fishnet.index.rename("index")
    return CP_fishnet




[docs]
def remove_border_CPs(CE_fishnet: gpd.GeoDataFrame,
                      CP_fishnet: gpd.GeoDataFrame,
                      FAC: str) -> list:
    """_summary_

    Args:
        CE_fishnet (gpd.GeoDataFrame): _description_
        CP_fishnet (gpd.GeoDataFrame): _description_
        FAC (str): _description_

    Returns:
        list: _description_
    """
    # Get the bounds
    # bounds = CE_fishnet["geometry"].bounds
    # CE_fishnet = pd.concat([CE_fishnet, bounds], axis=1)
    # FAC_dataset = gdal.Open(FAC)
    # CE_fishnet = convert_coords_to_index(CE_fishnet, FAC_dataset)

    CP_fishnet = pd.concat([CP_fishnet, pd.DataFrame(columns=["maxFAC"],
                                                     index=CP_fishnet.index)], axis=1)
    columnsCP = CP_fishnet.columns.tolist()
    for _, CE in CE_fishnet.iterrows():
        # Get the index for all the subbasin features
        idx, = np.where(CP_fishnet["CEid"] == CE["CEid"])
        # Get all features inside the CE
        CE_features = CP_fishnet.iloc[idx]
        # Check if the CE is empty or not
        if CE_features.empty:
            continue
        # If there is not features to dissolve, get rid off
        stats = rs.zonal_stats(CE_features, FAC, stats=['max'])
        CP_fishnet.iloc[idx, columnsCP.index("maxFAC")] = [
            s['max'] for s in stats]
        # Update values
        CE_features = CP_fishnet.iloc[idx]
        # Find neighbors
        CE_features = find_neighbors(CE_features, "CPid")
        for i, CP in CE_features.iterrows():
            # Take only the CP labeled with dissolve
            if CP["Dissolve"]:
                # Here I delete the Isolated CP in the border
                if CP["maxFAC"] is None:
                    CP_fishnet.loc[i, "CPid"] = 0.0
                    CP_fishnet.loc[i, "Dissolve"] = 0
                    CP_fishnet.loc[i, "CEid"] = 0
                # Delete the CP less than 400 m2. DEM 20x20
                if CP["maxFAC"] is None and CP["Area"] <= 400:
                    CP_fishnet.loc[i, "CPid"] = 0.0
                    CP_fishnet.loc[i, "Dissolve"] = 0
                    CP_fishnet.loc[i, "CEid"] = 0
                # Check if there are no neighbors
                if not CP["NEIGHBORS"]:
                    CP_fishnet.loc[i, "CPid"] = 0.0
                    CP_fishnet.loc[i, "maxFAC"] = None
                    CP_fishnet.loc[i, "Dissolve"] = 0
    # Drop the dissolved values in this iterations
    CP_fishnet = CP_fishnet[CP_fishnet["CPid"] != 0]
    CP_fishnet = CP_fishnet.dissolve(by="CPid", aggfunc="max")
    # Save file
    # CP_fishnet.index = range(len(CP_fishnet))
    CP_fishnet.index = CP_fishnet.index.rename("ind")
    CP_fishnet.loc[:, "CPid"] = CP_fishnet.index.values
    CP_fishnet.loc[:, "Area"] = CP_fishnet.area

    return CP_fishnet, CE_fishnet




[docs]
def remove_smallCP(CE_fishnet: gpd.GeoDataFrame,
                   CP_fishnet: gpd.GeoDataFrame,
                   thereshold: float) -> gpd.GeoDataFrame:
    """_summary_

    Args:
        CE_fishnet (gpd.GeoDataFrame): _description_
        CP_fishnet (gpd.GeoDataFrame): _description_
        thereshold (float): _description_

    Returns:
        gpd.GeoDataFrame: _description_
    """
    CP_fishnet.index = CP_fishnet["CPid"].values
    CE_area = CE_fishnet.area[0]
    mask_CP = CP_fishnet["Area"] < thereshold*CE_area
    CP_fishnet["Dissolve"] = 0
    CP_fishnet.loc[mask_CP, "Dissolve"] = 1
    count = 0
    while CP_fishnet["Dissolve"].max() == 1:
        # dissolve_vals = CP_fishnet["Dissolve"].values
        for _, CE in CE_fishnet.iterrows():
            # Track the already processed CPs
            tracked_cps = []
            # Get the index for all the subbasin features
            idx, = np.where(CP_fishnet["CEid"] == CE["CEid"])
            # Get all features inside the CE
            CE_features = CP_fishnet.iloc[idx]
            # Check if there are not CPs to dissolve to avoid computing the
            # neighbors and save time
            if CE_features["Dissolve"].max() == 0:
                continue
            CE_features = find_neighbors(CE_features, "CPid")
            for i, CP in CE_features.iterrows():
                # Check if the CP was already processed
                if CP["CPid"] in tracked_cps:
                    continue
                # Check if need to dissolve
                if CP["Dissolve"]:
                    # Get the neighbors and the condition
                    neighbors = CE_features.loc[i, "NEIGHBORS"]
                    dissolve = CE_features.loc[i, "KEEP"]
                    # Check if there are neighbors to dissolve also
                    if True in dissolve:
                        # Filter by dissolve or not dissolve
                        to_dissolve = list(
                            itertools.compress(neighbors, dissolve))
                        not_dissolve = list(itertools.compress(
                            neighbors, np.logical_not(dissolve)))
                        # 1- There are only CPs to dissolve.
                        if not not_dissolve:
                            to_dissolve.append(i)
                            idx_max = CE_features.loc[to_dissolve,
                                                      "maxFAC"].idxmax()
                            CP_fishnet.loc[to_dissolve, "CPid"] = idx_max
                            # Track this CPs
                            tracked_cps.extend(to_dissolve)
                        # 2- There are both, neighbors to dissolve and not to
                        elif to_dissolve and not_dissolve:
                            to_dissolve.append(i)
                            # idx_max = CE_features.loc[not_dissolve, "maxFAC"].idxmax()
                            idx_max = CE_features.loc[to_dissolve, "maxFAC"].idxmax(
                            )
                            CP_fishnet.loc[to_dissolve, "CPid"] = idx_max
                            # Track this CP
                            tracked_cps.extend(to_dissolve)
                    else:
                        if not CP["NEIGHBORS"]:
                            # This means the CPs does not have neighbors
                            CP_fishnet.loc[i, "CPid"] = 0
                            continue
                        # Here the CPs with no neighbors to dissolve are processed
                        idx_max = CE_features.loc[CP["NEIGHBORS"], "maxFAC"].idxmax(
                        )
                        CP_fishnet.loc[i, "CPid"] = idx_max

        CP_fishnet = CP_fishnet[CP_fishnet["CPid"] != 0]
        CP_fishnet = CP_fishnet.dissolve(by="CPid", aggfunc="max")
        # Re-do the dissolve labeling
        CP_fishnet["Area"] = CP_fishnet.area
        mask_CP = CP_fishnet["Area"] < thereshold*CE_area
        CP_fishnet["Dissolve"] = 0
        CP_fishnet.loc[mask_CP, "Dissolve"] = 1
        # Save file
        CP_fishnet.index = CP_fishnet.index.rename("index")
        CP_fishnet.loc[:, "CPid"] = CP_fishnet.index.values
        count += 1
        # Break the loop if a maximun number of iterations was exeeced
        if count > 15:
            break
    return CP_fishnet




[docs]
def dissolve_pixels(CE_fishnet: gpd.GeoDataFrame,
                    CP_fishnet: gpd.GeoDataFrame,
                    pixel_size_area) -> gpd.GeoDataFrame:
    """
    The ressult from the previous process might lead to have multipolygon features
    We need to  make sure this is going to be well dissolved.
    Also, all the pixel-size features need to be merged with the bigger neighbors
    in order to avoid errors while computing zonal area in further steps.

    Args:
        CE_fishnet (gpd.GeoDataFrame): _description_
        CP_fishnet (gpd.GeoDataFrame): _description_
        area_th (_type_): _description_

    Returns:
        gpd.GeoDataFrame: _description_
    """
    #
    CP_fishnet = CP_fishnet.explode()
    # mask_pixel_feats = CP_fishnet["Area"] > 2*abs(pixel_size_area)
    CP_fishnet["Area"] = CP_fishnet.area
    # Drop values less than three pixels size
    mask_pixels = CP_fishnet["Area"] <= 3*abs(pixel_size_area)
    CP_fishnet["Dissolve"] = 0
    CP_fishnet.loc[mask_pixels, "Dissolve"] = 1
    CP_fishnet.loc[:, "CPid"] = range(1, len(CP_fishnet)+1)
    CP_fishnet.index = range(1, len(CP_fishnet)+1)
    for _, CE in CE_fishnet.iterrows():
        # Get the index for all the subbasin features
        idx, = np.where(CP_fishnet["CEid"] == CE["CEid"])
        # Get all features inside the CE
        CE_features = CP_fishnet.iloc[idx]
        # Check if the CE is empty
        if CE_features.empty or CE_features["Dissolve"].max() == 0:
            continue
        tracked_cps = []
        CE_features = find_neighbors(CE_features, "CPid")
        for i, CP in CE_features.iterrows():
            # Track the already processed CPs
            # Check if the CP was already processed
            if CP["CPid"] in tracked_cps:
                continue
            # Check if need to dissolve
            if CP["Dissolve"]:
                # Get the neighbors and the condition
                neighbors = CE_features.loc[i, "NEIGHBORS"]
                assert neighbors, (
                    f"CP dissolve failed: CPid={CP['CPid']} in CEid={CE['CEid']} has no neighbors. "
                    "Suggested fix: rerun with a coarser GRID_DIMENSION. "
                    "If the issue persists, report this dataset/dimension to the library maintainers."
                )
                candidates = CE_features.loc[neighbors, "maxFAC"]
                assert (not candidates.empty) and candidates.notna().any(), (
                    f"CP dissolve failed: CPid={CP['CPid']} in CEid={CE['CEid']} has no valid maxFAC "
                    "among neighbors. Suggested fix: rerun with a coarser GRID_DIMENSION. "
                    "If the issue persists, report this dataset/dimension to the library maintainers."
                )
                idx_max_area = candidates.idxmax()
                CP_fishnet.loc[CP["CPid"], "CPid"] = idx_max_area
                tracked_cps.append(CP["CPid"])
            continue

    CP_fishnet = CP_fishnet.dissolve(by="CPid", aggfunc="max")
    # Re-do the dissolve labeling
    # Save file
    CP_fishnet.index = CP_fishnet.index.rename("index")
    CP_fishnet.index = range(1, len(CP_fishnet)+1)
    CP_fishnet["CPid"] = CP_fishnet.index.values
    CP_fishnet["Area"] = CP_fishnet.area

    # Still find some pixels after dissolving them
    mask_pixels = CP_fishnet["Area"] <= 3*abs(pixel_size_area)
    CP_fishnet["Dissolve"] = 0
    CP_fishnet.loc[mask_pixels, "Dissolve"] = 1
    df = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
    CP_fishnet = CP_fishnet[~mask_pixels]
    df = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
    CP_fishnet.loc[:, "CPid"] = range(1, len(CP_fishnet)+1)
    CP_fishnet.index = range(1, len(CP_fishnet)+1)
    # df = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
    return CP_fishnet




[docs]
def force_4CP(CE_fishnet: gpd.GeoDataFrame,
              CP_fishnet: gpd.GeoDataFrame,
              area_th: float) -> gpd.GeoDataFrame:
    """_summary_

    Args:
        CE_fishnet (gpd.GeoDataFrame): _description_
        CP_fishnet (gpd.GeoDataFrame): _description_
        area_th (float): _description_

    Returns:
        gpd.GeoDataFrame: _description_
    """
    # Explode the fishnet
    CP_fishnet = CP_fishnet.explode()
    CP_fishnet.index = CP_fishnet["CPid"].values
    CE_area = CE_fishnet.area.max()
    mask_CP = CP_fishnet["Area"] < area_th*CE_area
    CP_fishnet.loc[mask_CP, "Dissolve"] = 1
    CP_fishnet.index = range(1, len(CP_fishnet)+1)
    CP_fishnet["CPid"] = CP_fishnet.index.values
    for _, CE in CE_fishnet.iterrows():

        # Get the index for all the subbasin features
        idx, = np.where(CP_fishnet["CEid"] == CE["CEid"])
        # Get all features inside the CE
        CE_features = CP_fishnet.iloc[idx]
        # Check if the CE is empty
        if CE_features.empty:
            continue
        # Get the CPid
        while len(CE_features) > 4:
            # Find neighbors
            CE_features = find_neighbors(CE_features, "CPid")
            # CE_features.loc[:, "Area"] = CE_features.area
            columns = CE_features.columns.tolist()
            # Find the smallest CP
            idx_small, = np.where(
                CE_features.loc[:, "Area"].values == np.amin(CE_features["Area"]))
            # Find the neighbor with the highest flow accumulation
            neighbors = CE_features.iloc[idx_small,
                                         columns.index("NEIGHBORS")].values[0]
            # Check that there are no more neighbours
            # if not neighbors:
            #     continue
            maxFAC = np.amax(CE_features.loc[neighbors, "maxFAC"])
            idx_maxFAC, = np.where(
                CE_features.loc[neighbors, "maxFAC"] == maxFAC)
            # Get the cpid of the replaced value
            idx_replaced = CE_features.iloc[idx_small, columns.index("CPid")]
            # Replace the value in the main dataframe
            CP_fishnet.loc[idx_replaced, "CPid"] = neighbors[idx_maxFAC[0]]
            # Now dissolve all CE_features and rearange the things
            CP_fishnet = CP_fishnet.dissolve(by="CPid", aggfunc="max")
            CP_fishnet.index = CP_fishnet.index.rename("index")
            CP_fishnet.index = range(1, len(CP_fishnet)+1)
            CP_fishnet["CPid"] = CP_fishnet.index.values
            CP_fishnet["Area"] = CP_fishnet.area
            # Replace the CPid value into the small CP
            idx, = np.where(CP_fishnet["CEid"] == CE["CEid"])
            CE_features = CP_fishnet.iloc[idx]
            # df = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
            # CE_features.iloc[idx_small, columns.index(
            #     "CPid")] = neighbors[idx_maxFAC[0]]

    # Save file
    # CP_fishnet = CP_fishnet.dissolve(by="CPid", aggfunc="max")
    # CP_fishnet.index = CP_fishnet.index.rename("index")
    # CP_fishnet.loc[:, "CPid"] = CP_fishnet.index.values
    # CP_fishnet.at[:, "Area"] = CP_fishnet.area

    # Given that this is the final dissolving step, here I will check for
    # duplicate geometries and drop them all if that's the case
    CP_fishnet['geometry_str'] = CP_fishnet['geometry'].apply(str)
    CP_fishnet = CP_fishnet.explode()
    CP_fishnet['normalized_geometry'] = CP_fishnet['geometry'].apply(
        lambda geom: str(Polygon(geom.exterior.coords)))
    CP_fishnet = CP_fishnet.dissolve(by='normalized_geometry')
    CP_fishnet.index = CP_fishnet.index.rename("index")
    CP_fishnet.index = range(1, len(CP_fishnet)+1)
    CP_fishnet["CPid"] = CP_fishnet.index.values
    CP_fishnet = CP_fishnet.drop(columns=["geometry_str"])
    # df1 = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
    # indexes_to_skip,processed_indexes = u.drop_duplicated_geometries(CP_fishnet["geometry"])
    return CP_fishnet



# def compute_flow_path(flow_dir: np.ndarray,
#                       flow_acc: np.ndarray,
#                       flow_th: float) -> np.ndarray:
#     # Mask the flow accumulation based on the threshold
#     flow_accu_mask = (flow_acc > flow_th)
#     rows, cols = np.indices(flow_dir.shape)
#     stream_network = np.zeros_like(flow_dir)
#     counter = 1
#     for row in range(flow_dir.shape[0]):
#         for col in range(flow_dir.shape[1]):
#             if flow_accu_mask[row, col]:
#                 next_row = row
#                 next_col = col
#                 # counter = 1
#                 while True:
#                     direction = flow_accu_mask[next_row, next_col]
#                     if direction == 0:
#                         break
#                     next_row += [-1, -1, 0, 1, 1, 1, 0, -1][direction - 1]
#                     next_col += [0, 1, 1, 1, 0, -1, -1, -1][direction - 1]
#                     if stream_network[next_row, next_col] > 0:
#                         break
#                 stream_network[row, col] = counter
#         # counter +=1
#     return stream_network

# Compute the mean altitude within each CE and CP

[docs]
def mean_altitudes(CE_fishnet: gpd.GeoDataFrame,
                   CP_fishnet: gpd.GeoDataFrame,
                   DEM: str):
    """_summary_

    Args:
        CE_fishnet (gpd.GeoDataFrame): _description_
        CP_fishnet (gpd.GeoDataFrame): _description_
        DEM (str): _description_

    Returns:
        _type_: _description_
    """
    # Add altitude column to each dataset
    CE_fishnet = CE_fishnet.reindex(
        columns=CE_fishnet.columns.tolist() + ['altitude'])
    CE_fishnet["altitude"] = None
    CP_fishnet = CP_fishnet.reindex(
        columns=CP_fishnet.columns.tolist() + ['altitude'])
    CP_fishnet["altitude"] = None

    # Compute the zonal statistics
    stats_CE = rs.zonal_stats(CE_fishnet, DEM, stats=['mean'])
    CE_fishnet.loc[:, "altitude"] = [s['mean'] for s in stats_CE]
    stats_CP = rs.zonal_stats(CP_fishnet, DEM, stats=['mean'])
    CP_fishnet.loc[:, "altitude"] = [s['mean'] for s in stats_CP]
    return CP_fishnet, CE_fishnet



[docs]
def mean_canopy(CE_fishnet: gpd.GeoDataFrame,
                   CP_fishnet: gpd.GeoDataFrame,
                   Canopy_name: str):
    """_summary_

    Args:
        CE_fishnet (gpd.GeoDataFrame): _description_
        CP_fishnet (gpd.GeoDataFrame): _description_
        Canopy (str): _description_

#     Returns:
#         _type_: _description_
#     """
    # Add aspect column to each dataset
    CE_fishnet = CE_fishnet.reindex(
        columns=CE_fishnet.columns.tolist() + ['Canopy'])
    CE_fishnet["Canopy"] = None
    CP_fishnet = CP_fishnet.reindex(
        columns=CP_fishnet.columns.tolist() + ['Canopy'])
    CP_fishnet["Canopy"] = None
    
    # Compute the zonal statistics
    stats_CE = rs.zonal_stats(CE_fishnet, Canopy_name, stats=['mean'])
    CE_fishnet.loc[:, "Canopy"] = [s['mean'] for s in stats_CE]
    stats_CP = rs.zonal_stats(CP_fishnet, Canopy_name, stats=['mean'])
    CP_fishnet.loc[:, "Canopy"] = [s['mean'] for s in stats_CP]
    return CP_fishnet, CE_fishnet

                     

# def main_path(flow_dir: np.ndarray,
#               flow_acc: np.ndarray,
#               flow_th: float) -> np.ndarray:
#     # Create a mask to extract only the cells with flow accumulation greater than a certain threshold
#     flow_accumulation_mask = (flow_acc > flow_th)

#     # Find the outlet cell of the catchment (i.e., the cell with the minimum flow accumulation)
#     outlet_row, outlet_col = np.unravel_index(
#         np.argmin(flow_acc), flow_acc.shape)

#     # Create an empty list to store the cells in the main stream
#     main_stream_cells = []

#     # Trace the main stream from the outlet cell to the start of the main stream
#     next_row, next_col = outlet_row, outlet_col
#     while True:
#         main_stream_cells.append((next_row, next_col))
#         direction = flow_dir[next_row, next_col]
#         if direction == 0:  # Check if the current cell is a sink cell and exit the loop if it is
#             break
#         # Calculate the row and column indices of the next cell in the main stream
#         next_row += [-1, -1, 0, 1, 1, 1, 0, -1][direction - 1]
#         next_col += [0, 1, 1, 1, 0, -1, -1, -1][direction - 1]
#         # Check if the next row or column index is out of bounds and exit the loop if it is
#         if next_row < 0 or next_row >= flow_dir.shape[0] or next_col < 0 or next_col >= flow_dir.shape[1]:
#             break
#         # Check if the current cell has more than one upstream cell and exit the loop if it does
#         upstream_count = 0
#         for i, j in [(0, 1), (1, 1), (1, 0), (1, -1), (0, -1), (-1, -1), (-1, 0), (-1, 1)]:
#             row = next_row + i
#             col = next_col + j
#             if row >= 0 and row < flow_dir.shape[0] and col >= 0 and col < flow_dir.shape[1]:
#                 if flow_dir[row, col] == (8 - direction):
#                     upstream_count += 1
#                     if upstream_count > 1:
#                         break
#         if upstream_count > 1:
#             break

#     # Create a new raster to represent the main stream cells
#     main_stream_raster = np.zeros_like(flow_dir)
#     for cell in main_stream_cells:
#         main_stream_raster[cell[0], cell[1]] = 1



[docs]
def routing_table(CP_fishnet: gpd.GeoDataFrame,
                  CE_fishnet: gpd.GeoDataFrame,
                  FAC: str,
                  CP_array: np.ndarray,
                  CE_array: np.ndarray,
                  ncols: float,
                  nrows: float) -> pd.DataFrame:
    """_summary_

    Args:
        CP_fishnet (gpd.GeoDataFrame): _description_
        CE_fishnet (gpd.GeoDataFrame): _description_
        FAC (str): _description_
        CP_array (np.ndarray): _description_
        CE_array (np.ndarray): _description_
        ncols (float): _description_
        nrows (float): _description_

    Returns:
        pd.DataFrame: _description_
    """
    # Renaming index to make sure it starts from 1
    CP_fishnet["CPid"] = range(1, len(CP_fishnet)+1)
    CP_fishnet = CP_fishnet.sort_values(by=["CPid"])
    CP_fishnet.index = CP_fishnet["CPid"].values
    # Create dataframe to store the routing data
    routing = pd.DataFrame(columns=["CPid", "inCPid", "oldCP", "FAC"],
                           index=CP_fishnet.index)
    # Assign values to the routing dataframe
    # Compute the boundaries for each square within the CE and CP fishnets
    CE_fishnet = pd.concat([CE_fishnet,
                            CE_fishnet["geometry"].bounds], axis=1)
    routing["CPid"] = CP_fishnet["CPid"].values
    # Get the FAC array
    FAC_dataset = gdal.Open(FAC, gdal.GA_ReadOnly)
    band = FAC_dataset.GetRasterBand(1)
    FAC_array = band.ReadAsArray()
    FAC_array[FAC_array < 0] = 0
    # Get the CE square indexes
    CE_fishnet = convert_coords_to_index(CE_fishnet, FAC_dataset)
    CE_columns = CE_fishnet.columns.tolist()
    # df1 = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
    # df2 = pd.DataFrame(CE_fishnet.drop(columns='geometry'))
    # Loop into each CE

    for index, feat in CP_fishnet.iterrows():
        # Find the rows and cols where the CP value is stored
        # Find the index which correspond to the CEid in the main dataframe
        idx_CE, = np.where(CE_fishnet["CEid"] == feat["CEid"])
        # Maybe the CE does not exist?
        if len(idx_CE) == 0:
            continue
        # Slice the CE array using the corners into the main CE dataset
        row_min = CE_fishnet.iloc[idx_CE[0], CE_columns.index("row_min")]
        row_max = CE_fishnet.iloc[idx_CE[0], CE_columns.index("row_max")]
        col_min = CE_fishnet.iloc[idx_CE[0], CE_columns.index("col_min")]
        col_max = CE_fishnet.iloc[idx_CE[0], CE_columns.index("col_max")]
        CE = CE_array[row_min:row_max,
                      col_min:col_max]
        # Check the size of the sliced subset.
        # Sometimes the sliced subset is smaller than the actual subset size
        # because the indexes are derived from float values and some rounding
        # artifacts can affect the precision of the index
        if CE.shape[0] != nrows:
            # row_min = row_min
            row_max = row_min + nrows
            CE = CE_array[row_min:row_max,
                          col_min:col_max]
        elif CE.shape[1] != ncols:
            # col_min = col_min
            col_max = col_min + ncols
            CE = CE_array[row_min:row_max,
                          col_min:col_max]

        # Get the unique rows and cols that exist in the sliced array
        unique_rows, _ = np.unique(CE, return_counts=True, axis=0)
        if unique_rows.shape[0] > 1:
            diff = np.diff(CE, axis=0)
            idx_diff_zero, _ = np.where(diff != 0)
            # Compare the value with respect to the number of cols
            if idx_diff_zero[0] > 0:
                # if counts_rows[0] < counts_rows[1]:
                val = -1
            else:
                val = 1
            row_min = row_min + val
            row_max = row_min + nrows
            # row_max = row_min-1+nrows
            CE = CE_array[row_min:row_max,
                          col_min:col_max]
        unique_cols, _ = np.unique(CE, return_counts=True, axis=1)
        if unique_cols.shape[1] > 1:
            # Find the differences along the column
            diff = np.diff(CE, axis=1)
            _, idy_diff_zero = np.where(diff != 0)
            # Compare the value with respect to the number of cols
            if idy_diff_zero[0] > 0:
                # if counts_cols[0] > counts_cols[1]:
                val = -1
            else:
                val = 1
            col_min = col_min + val
            col_max = col_min + ncols
            # col_max = col_min-1+ncols
            CE = CE_array[row_min:row_max,
                          col_min:col_max]
        # Find the CP values whitin the CE subset
        CE = CE_array[row_min-1:row_max+1,
                      col_min-1:col_max+1]
        CP = CP_array[row_min-1:row_max+1,
                      col_min-1:col_max+1]
        subFAC = FAC_array[row_min-1:row_max+1,
                           col_min-1:col_max+1]
        # Mask the FAC based on this CP
        mask_CP = (CP == feat['CPid']).astype('uint8')
        # Apply mask
        # Get the outlet indexes for the CP
        outlet_row, outlet_col = np.unravel_index(
            np.argmax(subFAC*mask_CP), subFAC.shape)
        if isinstance(outlet_row, np.ndarray):
            sys.exit("There is more than one outlet point")

        # Create mask for subFAC
        mask_FAC = np.zeros(subFAC.shape).astype("uint8")
        mask_FAC[outlet_row - 1:outlet_row + 2,
                 outlet_col - 1:outlet_col + 2] = 1
        # Get the FAC values on the mask
        # Find location of next pixel into which outlet flows
        inlet_row, inlet_col = np.unravel_index(
            np.argmax(subFAC*mask_FAC), subFAC.shape)
        # Add the CP where it discharges
        routing.loc[index, "inCPid"] = CP[inlet_row, inlet_col]
        routing.loc[index, "FAC"] = subFAC[outlet_row, outlet_col]
        if routing.loc[index, "CPid"] == routing.loc[index, "inCPid"]:
            outlet_point = routing.loc[index, "CPid"]
        if routing.loc[index, "inCPid"] == 0:
            continue
    return routing




[docs]
def get_rtable(CP_fishnet: gpd.GeoDataFrame,
               routing: pd.DataFrame):
    """_summary_

    Args:
        CP_fishnet (gpd.GeoDataFrame): _description_
        routing (pd.DataFrame): _description_

    Returns:
        _type_: _description_
    """
    # *There are probably cases where a given CP drains into a non existence CP.
    # *So, here we make sure that we drop all the CP where this happens into the main data frame.
    # *This is because the CPs in the border can be so tiny that they do not account for the
    # *area threshold that we defined.

    # Create the rouring table here
    rtable = pd.DataFrame(columns=["oldCPid", "newCPid",
                                   "upstreamCPs", "oldupstreams"],
                          index=routing.index)
    routing["diff"] = routing["CPid"] - routing["inCPid"]
    routing["FAC"] = pd.to_numeric(routing["FAC"])
    # Find where the difference is zero
    # This will help us to find the outlet CP
    idx_outlet = routing.index[routing["diff"] == 0].tolist()
    if len(idx_outlet) > 1:
        assert False, "There are more than 1 outlet point."
    elif len(idx_outlet) == 0:
        idx_outlet = routing["FAC"].idxmax()
        # Check if it drains into a non-existent CP
        if routing.loc[idx_outlet, "inCPid"] == 0:
            # All good and can continue
            routing.loc[idx_outlet, "inCPid"] = [idx_outlet]
            # Set the values as listed value
            idx_outlet = [idx_outlet]
        else:
            assert False, "The outlet point was not found"

    # assert len(idx_outlet) == 1, "There are more than 1 outlet point."
    # idx_outlet = routing.mask(routing["diff"] != 0)
    # Add this value in the rtable as the first value
    rtable.loc[1, "oldCPid"] = routing.loc[idx_outlet, "CPid"].values[0]
    rtable.loc[1, "newCPid"] = 1
    # Get column list
    columns = rtable.columns.tolist()
    # Set to zero the already tracked CP
    routing.loc[idx_outlet, "inCPid"] = -99999
    new_id_counter = 1
    for i, _ in routing.iterrows():
        # Find the upstream CPs
        index_routing = rtable.loc[i, "oldCPid"] == routing["inCPid"]
        idx_outlet = routing.index[index_routing]
        if not idx_outlet.empty:
            # Find the upstream CPs
            upstreams_cps = routing.loc[idx_outlet, "CPid"].values
            # Rename the upstream CPs
            upstreams_cps_newid = list(
                range(new_id_counter+1, new_id_counter+len(idx_outlet)+1))
            # Append the CPs vertically. +1 to step out the current index
            rtable.iloc[new_id_counter:new_id_counter +
                        len(idx_outlet), columns.index("newCPid")] = upstreams_cps_newid
            rtable.iloc[new_id_counter:new_id_counter +
                        len(idx_outlet), columns.index("oldCPid")] = upstreams_cps
            rtable.iloc[i-1,
                        columns.index("upstreamCPs")] = upstreams_cps_newid
            rtable.iloc[i-1, columns.index("oldupstreams")] = upstreams_cps
            new_id_counter += len(idx_outlet)
            routing.loc[idx_outlet, "inCPid"] = -99999
    # Get the CPs where the its downstream is zero
    idx_zero_inCP = routing["inCPid"] != -99999
    index_zero_flow_in = routing.index[idx_zero_inCP]
    # Check if there is NAN values in the rtable
    idx_nan_inCP = rtable["oldCPid"].isnull()
    index_nan = rtable.index[idx_nan_inCP]
    if len(index_nan) > 0:
        # warnings.warn(
        #     f"The CPs {index_nan} drop water into non existent CP \n")
        # warnings.warn(
        #     "This might no be a problem. But it may be a source of error in the next steps \n")
        # assert False, "Error found in the definition of routing function"
        # Fill this nan values in the newCPid with a flag to keep tracking it in the next steps
        # CP_fishnet.loc[index_zero_flow_in, "CPid"] = -9999
        # rtable.loc[index_nan, "newCPid"] = -9999
        rtable.loc[index_nan, "newCPid"] = index_nan
        rtable.loc[index_nan, "oldCPid"] = index_zero_flow_in
    rtable['newCPid'] = rtable['newCPid'].astype('int')
    rtable['oldCPid'] = rtable['oldCPid'].astype('int')
    # idx = np.where()
    # rtable.index = CP_fishnet.index
    # CP_fishnet = CP_fishnet.join(rtable[["oldCPid"]])
    # CP_fishnet = CP_fishnet.dropna(subset=['oldCPid'])

    # CP_fishnet = CP_fishnet.sort_values(by=["CPid"])
    # rtable = rtable.dropna(subset=['oldCPid'])
    # idx_nan_inCP = rtable["oldCPid"].isnull()
    # index_drop = rtable.index[idx_nan_inCP]
    # idx_fishnet_drop, = np.where(CP_fishnet["CPid"] == index_drop)
    # index_fishnet_drop = CP_fishnet.index[idx_fishnet_drop]
    # CP_fishnet = CP_fishnet.drop(index=rtable[""])
    # rtable.index = range(1, len(rtable)+1)
    # CP_fishnet.index = rtable.index
    # CP_fishnet["CPid"] = CP_fishnet.index
    # CP_fishnet = CP_fishnet.drop(columns=["minx", "miny",
    #                                       "maxx", "maxy",
    #                                       "col_min", "row_min",
    #                                       "col_max", "row_max"])
    # rtable = rtable.sort_values(by=["newCPid"])
    return rtable, CP_fishnet




[docs]
def renumber_fishnets(CP_fishnet: gpd.GeoDataFrame,
                      CE_fishnet: gpd.GeoDataFrame,
                      rtable: pd.DataFrame,
                      routing: pd.DataFrame) -> gpd.GeoDataFrame:
    """_summary_

    Args:
        CP_fishnet (gpd.GeoDataFrame): _description_
        CE_fishnet (gpd.GeoDataFrame): _description_
        rtable (pd.DataFrame): _description_
        routing (pd.DataFrame): _description_

    Returns:
        gpd.GeoDataFrame: _description_
    """
    # Renumbering the CPs
    CP_fishnet["newCPid"] = 0
    CP_fishnet["newCEid"] = 0
    CE_fishnet["newCEid"] = 0
    CE_track_list = []
    CP_fishnet = CP_fishnet.sort_values(by=["CPid"])
    CP_fishnet.index = CP_fishnet["CPid"].values
    rtable = rtable.sort_values(by=["oldCPid"])
    df = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
    rtable.index = rtable["oldCPid"].values
    # CP_fishnet["CPid"] = range(1, len(CP_fishnet)+1)
    # CP_fishnet["newCPid"] = pd.to_numeric(,dtype_backend="Int64")
    # CP_fishnet["newCPid"] = rtable["newCPid"].values
    # get the columns to use them as indexers
    # columns_rtable = rtable.columns.tolist()
    columns_CPfishnet = CP_fishnet.columns.tolist()
    columns_CEfishnet = CE_fishnet.columns.tolist()
    idx_zero_inCP = routing["inCPid"] == 0
    index_zero_flow_in = routing.index[idx_zero_inCP]
    # Iterate over the dataframe to rename CP fishnet
    for i, _ in CP_fishnet.iterrows():
        # Check if the current CP drops into a non existent
        # if i in index_zero_flow_in:
        #     CP_fishnet.loc[idx_old, "newCPid"] = 0
        #     continue
        # Find the index of the old CPid in the main dataframe
        index_routing = CP_fishnet["CPid"] == rtable.loc[i, "oldCPid"]
        idx_old = CP_fishnet.index[index_routing]
        # idx_old, = np.where(
        #     CP_fishnet["CPid"].values == rtable.loc[i, "oldCPid"])
        # Replace the value with the new CPid value
        CP_fishnet.loc[idx_old, "newCPid"] = rtable.loc[i, "newCPid"]
    # Change the index in the main dataframe
    CP_fishnet.index = CP_fishnet["newCPid"].values
    # df1 = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
    # Sort values
    CP_fishnet = CP_fishnet.sort_values(by=["newCPid"])
    CP_fishnet["newCEid"] = np.array(CP_fishnet["newCEid"]).astype(int)
    # Renumbering the CEs. This is possible since the values are
    # already sorted in the main dataframe based on the new CPids
    CE_id = 0
    for i, CP in CP_fishnet.iterrows():
        # Check if the CE has already been tracked
        if CP["CEid"] in CE_track_list:
            continue
        # Track the CE in the CP fishnet
        CE_track_list.append(CP["CEid"])
        # Find the value in the dataset
        idx_new, = np.where(CE_fishnet["CEid"] == CE_track_list[CE_id])
        # Add the value in the column
        CE_fishnet.iloc[idx_new, columns_CEfishnet.index("newCEid")] = CE_id+1
        # Find the value in the CPfishnet dataset
        idx_new2, = np.where(CP_fishnet["CEid"] == CE_track_list[CE_id])
        CP_fishnet.iloc[idx_new2, columns_CPfishnet.index("newCEid")] = CE_id+1
        CE_id += 1
    CE_fishnet = CE_fishnet.sort_values(by=["newCEid"])
    CE_fishnet.index = CE_fishnet["newCEid"].values
    # Find where the CE is zero
    idx_drop_CE, = np.where(CE_fishnet["newCEid"] == 0)
    # Find index
    indexes_drop = CE_fishnet.index[idx_drop_CE]
    # Drop by  index
    CE_fishnet = CE_fishnet.drop(index=indexes_drop)
    # # Re-do the indexing to fix it
    # CP_fishnet = CP_fishnet.sort_values(by=["newCPid"])
    # CP_fishnet["newCPid"] = range(1, len(CP_fishnet)+1)
    # CP_fishnet.index = range(1, len(CP_fishnet)+1)
    # df1 = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
    return CP_fishnet, CE_fishnet




[docs]
def get_downstream_CP(rtable: pd.DataFrame) -> pd.DataFrame:
    """_summary_

    Args:
        rtable (pd.DataFrame): _description_

    Returns:
        pd.DataFrame: _description_
    """
    # Create an empty table to store the values
    rtable.index = rtable["newCPid"].values
    downstreamCPs = pd.DataFrame(columns=["downstreamCPs"],
                                 index=rtable.index)
    # Convert lists into an array.
    # Get the lenght of each list to get the maximum value
    lists_len = [len(i)
                 for i in rtable["upstreamCPs"].values if isinstance(i, list)]
    assert lists_len, (
        "Downstream routing failed: no upstreamCPs were identified in rtable. "
        "Suggested fix: rerun with a coarser GRID_DIMENSION. "
        "If the issue persists, report this dataset/dimension to the library maintainers."
    )
    # Create a zero array to store the values
    list_upstream_array = np.zeros(
        [len(rtable), max(lists_len)]).astype("uint16")
    # Fill up the array using the lists in the main dataframe
    list_to_upstream_append = []
    for i, _ in rtable.iterrows():
        if isinstance(rtable.loc[i, "upstreamCPs"], list):
            list_up = rtable.loc[i, "upstreamCPs"]
            list_upstream_array[i-1, 0:len(list_up)] = list_up
            list_to_upstream_append.append(
                list_upstream_array[i-1, :].tolist())
        else:
            list_to_upstream_append.append(
                list_upstream_array[i-1, :].tolist())
            # rtable.loc[i, "upstreamCPs"] = list_upstream_array[i-1, :].tolist()
    rtable["upstreamCPs"] = list_to_upstream_append
    # Now identify the Downstream CPs
    for i, table in rtable.iterrows():
        # Find the index of the CP in wwhich the current CP drains
        idx_down, _ = np.where(list_upstream_array == table["newCPid"])
        # Check if the list is empty
        if len(idx_down) == 0:
            downstreamCPs.loc[i, "downstreamCPs"] = 0
        else:
            downstreamCPs.loc[i,
                              "downstreamCPs"] = rtable.loc[idx_down[0]+1, "newCPid"]
    # Concat to the rtable
    rtable = pd.concat([rtable, downstreamCPs], axis=1)
    return rtable




[docs]
def outlet_routes(rtable: pd.DataFrame) -> pd.DataFrame:
    """_summary_

    Args:
        rtable (pd.DataFrame): _description_

    Returns:
        pd.DataFrame: _description_
    """

    # Create the array to store the lists
    allroute_lists = pd.DataFrame(columns=["outletRoutes"],
                                  index=rtable.index)
    # Start looping the array
    columns_rtable = rtable.columns.tolist()
    for i, _ in rtable.iterrows():
        route_list = []
        count_ups = 0
        # Set the upstream CP to
        up = rtable.loc[i, "newCPid"]
        # Set the number of the upstream CPs to 1
        # nu = up
        while up > 1:
            # append Nth CP to list
            route_list.append(up)
            up = rtable.loc[up, "downstreamCPs"]
            count_ups += 1
            route_list = np.unique(route_list)
            route_list = route_list.tolist()
            if count_ups > 10*len(rtable):
                # TODO: Check what would rise this issue. Although this does not affect the correct functioning of the process
                # raise ValueError
                break
            # go down from Nth position until end
            # up = rtable.iloc[up-1, "downstreamCPs"]
            # up = rtable.iloc[up-1, 4]
            # nu -= 1
        route_list.append(1)
        allroute_lists.loc[i, "outletRoutes"] = route_list
    # Convert it into a numpy array
    lists_len = [
        len(i) for i in allroute_lists["outletRoutes"].values if isinstance(i, list)]
    # Create a zero array to store the values
    allroute_lists_array = np.zeros([len(allroute_lists), max(lists_len)])
    # Fill up the array using the lists in the main dataframe
    for i in range(len(allroute_lists)):
        if isinstance(allroute_lists.loc[i+1, "outletRoutes"], list):
            list_up = allroute_lists.loc[i+1, "outletRoutes"]
            allroute_lists_array[i, 0:len(list_up)] = list_up
    return allroute_lists_array




[docs]
def cumulative_areas(CP_fishnet: gpd.GeoDataFrame,
                     CE_fishnet: gpd.GeoDataFrame,
                     out_routes: np.ndarray) -> pd.DataFrame:
    """_summary_

    Args:
        CP_fishnet (gpd.GeoDataFrame): _description_
        CE_fishnet (gpd.GeoDataFrame): _description_
        out_routes (np.ndarray): _description_

    Returns:
        pd.DataFrame: _description_
    """
    # Update areas of the CP and get the CE area
    CE_area = CE_fishnet.area[1]
    CP_fishnet.indeẋ = range(1, len(CP_fishnet)+1)
    df1 = pd.DataFrame(CP_fishnet.drop(columns='geometry'))
    CP_fishnet["Area"] = CP_fishnet.area
    # Get the percentage of that area
    CP_fishnet["pctSurface"] = (CP_fishnet["Area"]/CE_area)*100
    # Cumulative areas
    CP_fishnet["cumulPctSurf"] = 0.0
    upstreamCPs = []
    for i in range(len(out_routes)):
        if i == 0:
            upstreamCPs.append(0)
            continue
        # Create a copy of the main dataframe
        temp_df = out_routes.copy()
        # find the row of the current CP
        idx_row, _ = np.where(temp_df == i)
        # Mask the downstreams values
        temp_df = temp_df[idx_row, :]
        mask_df = temp_df < i
        # Drop the downstream values
        temp_df = temp_df[~mask_df]
        # Get the unique values
        temp_df = np.unique(temp_df).astype("uint16")
        # party = CP_fishnet.loc[temp_df, "pctSurface"]
        # sum = CP_fishnet.loc[temp_df, "pctSurface"].sum()
        upstreamCPs.append(temp_df.tolist())
        CP_fishnet.loc[i, "cumulPctSurf"] = CP_fishnet.loc[temp_df,
                                                           "pctSurface"].sum()
        # Compute cumul areas in km2
        CP_fishnet.loc[i, "cumulArea"] = CP_fishnet.loc[temp_df,
                                                        "Area"].sum()*1e-6
        # CP_fishnet.loc[i, "cumulPctSurf"] = CP_fishnet.iloc[temp_df, 10].sum()

    # Now, fix the last upstream CP with the area
    CP_fishnet.loc[i+1, "cumulPctSurf"] = CP_fishnet.loc[i+1,
                                                         "pctSurface"]
    # Compute cumul areas in km2
    CP_fishnet.loc[i+1, "cumulArea"] = CP_fishnet.loc[i+1,
                                                      "Area"]*1e-6
    # Convert it into a numpy array
    lists_len = [
        len(i) for i in upstreamCPs if isinstance(i, list)]
    # Create a zero array to store the values
    upstreamcps_array = np.zeros([len(upstreamCPs), max(lists_len)])
    # Fill up the array using the lists in the main dataframe
    for i in range(len(upstreamCPs)):
        if isinstance(upstreamCPs[i], list):
            list_up = upstreamCPs[i]
            upstreamcps_array[i, 0:len(list_up)] = list_up
    upstreamcps_array = np.delete(upstreamcps_array, 0, 0)
    upstreamcps_array = np.pad(upstreamcps_array, ((0, 1), (0, 0)))
    # Assign the value to the last entry
    upstreamcps_array[-1, 0] = len(upstreamCPs)
    # return allroute_lists_array
    return CP_fishnet, upstreamcps_array