diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5df5950ac45bd4ead3963d01d44ca30249f59bbc..41bc8c8baeae998a8d10ea57e7e852e25fb69ccc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -16,9 +16,9 @@ tests: - micromamba env list - pip install -e . - pytest -s - only: - - merge-request - - schedules + rules: + - if: $CI_PIPELINE_SOURCE == "merge_request_event" + - if: $CI_COMMIT_BRANCH =~ /.*/ pages: stage: pages diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f2699d6d3f539d880097e528b1ab4ab2a748bf9..6387fd98751302a3760bbcf7e37a29a0241c84c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - function `harmonize_sen2cor_offset`: adds an `offset` property to the assets so it is taken into account by `to_xarray`. - method `ItemCollection.drop_duplicates`: drop duplicated ID returned by pgstac. - method `ItemCollection.drop_non_raster`: drop non raster assets. +- function `extract_points` and method `ItemCollection.extract_points` to extract points time series. - `writer_args` to `ItemCollection.apply_...` methods and function in order to specify the outputs format, e.g. the encoding. - in local.py, `start_datetime` and `end_datetime` can now be used instead of `datetime` in the template used to build a local ItemCollection. - module `extents.py` to manipulate STAC extents. diff --git a/simplestac/utils.py b/simplestac/utils.py index 20459d876e5ea32cd62ec7a14a403e0b00ae5fe3..b0385829a16e458ddbea0fc29ac4e711a84e346e 100644 --- a/simplestac/utils.py +++ b/simplestac/utils.py @@ -19,6 +19,7 @@ from tqdm import tqdm from typing import Union import warnings import datetime +import geopandas as gpd from simplestac.local import stac_asset_info_from_raster, properties_from_assets @@ -90,12 +91,18 @@ class ExtendPystacClasses: # times = pd.to_datetime( with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) - arr = stackstac.stack(self, xy_coords=xy_coords, **kwargs) + try: + arr = stackstac.stack(self, xy_coords=xy_coords, **kwargs) + except ValueError as e: + if "Cannot automatically compute the resolution" in str(e): + raise ValueError(str(e)+"\nOr drop non-raster assets from collection with ItemCollection.drop_non_raster()") + else: + raise e if bbox is not None: arr = arr.rio.clip_box(*bbox) if geometry is not None: - if hasattr(geometry, 'crs') and geometry.crs != arr.rio.crs: + if hasattr(geometry, 'crs') and not geometry.crs.equals(arr.rio.crs): logger.debug(f"Reprojecting geometry from {geometry.crs} to {arr.rio.crs}") geometry = geometry.to_crs(arr.rio.crs) arr = arr.rio.clip(geometry) @@ -486,6 +493,66 @@ class ExtendPystacClasses: if not inplace: return x + def extract_points(self, points, method="nearest", tolerance="pixel", drop=False, **kwargs): + """Extract points from xarray + + Parameters + ---------- + x : xarray.DataArray or xarray.Dataset + points : geopandas.GeoDataFrame or pandas.DataFrame + Points or coordinates of the points + method, tolerance, drop : see xarray.DataArray.sel + Additional keyword arguments passed to xarray.DataArray.sel + If tolerance is "pixel", it is set to half the resolution + of the xarray, supposing it is a rioxarray. + **kwargs: + Additional keyword arguments passed to `ItemCollection.to_xarray()` + + Returns + ------- + xarray.DataArray or xarray.Dataset + The points values with points index as coordinate. + The returned xarray can then be converted to + dataframe with `to_dataframe` or `to_dask_dataframe`. + + Examples + -------- + >>> import xarray as xr + >>> import pandas as pd + >>> import dask.array + >>> import numpy as np + >>> da = xr.DataArray( + ... # np.random.random((100,200)), + ... dask.array.random.random((100,200,10), chunks=10), + ... coords = [('x', np.arange(100)+.5), + ... ('y', np.arange(200)+.5), + ... ('z', np.arange(10)+.5)] + ... ).rename("pixel_value") + >>> df = pd.DataFrame( + ... dict( + ... x=np.random.permutation(range(100))[:100]+np.random.random(100), + ... y=np.random.permutation(range(100))[:100]+np.random.random(100), + ... other=range(100), + ... ) + ... ) + >>> df.index.rename("id_point", inplace=True) + >>> extraction = extract_points(da, df, method="nearest", tolerance=.5) + >>> ext_df = extraction.to_dataframe() + >>> ext_df.reset_index(drop=False, inplace=True) + >>> ext_df.rename({k: k+"_pixel" for k in da.dims}, axis=1, inplace=True) + >>> # join extraction to original dataframe + >>> df.merge(ext_df, on=["id_point"]) + """ + + # avoid starting anything if not all points + if isinstance(points, (gpd.GeoDataFrame, gpd.GeoSeries)): + if not points.geom_type.isin(['Point', 'MultiPoint']).all(): + raise ValueError("All geometries must be of type Point or MultiPoint") + + arr = self.to_xarray(**kwargs)#geometry=points) + if tolerance == "pixel": + tolerance = arr.rio.resolution()[0] / 2 + return extract_points(arr, points, method=method, tolerance=tolerance, drop=drop) class ItemCollection(pystac.ItemCollection, ExtendPystacClasses): pass @@ -609,51 +676,6 @@ def update_item_properties(x: pystac.Item, remove_item_props=DEFAULT_REMOVE_PROP for k in pop_props: x.properties.pop(k) - -def harmonize_sen2cor_offet(x, bands=S2_SEN2COR_BANDS, inplace=False): - """ - Harmonize new Sentinel-2 item collection (Sen2Cor v4+, 2022-01-25) - to the old baseline (v3-): - adds an offset of -1000 to the asset extra field "raster:bands" of the items - with datetime >= 2022-01-25 - - Parameters - ---------- - x: ItemCollection - An item collection of S2 scenes - bands: list - A list of bands to harmonize - - inplace: bool - Whether to modify the collection in place. Defaults to False. - In that case, a cloned collection is returned. - - Returns - ------- - ItemCollection - A collection of S2 scenes with extra_fields["raster:bands"] - added/updated to each band asset with datetime >= 2022-01-25. - - Notes - ----- - References: - - https://planetarycomputer.microsoft.com/dataset/sentinel-2-l2a#Baseline-Change - - https://github.com/microsoft/PlanetaryComputer/issues/134 - """ - - if not inplace: - collection = collection.copy() - for item in collection: - for asset in bands: - if asset in item.assets: - if item.properties["datetime"] >= "2022-01-25": - item.assets[asset].extra_fields["raster:bands"] = [dict(offset=-1000)] - else: - item.assets[asset].extra_fields["raster:bands"] = [dict(offset=0)] - if inplace: - return collection - - def apply_item(x, fun, name, output_dir, overwrite=False, copy=True, bbox=None, geometry=None, writer_args=None, **kwargs): """ @@ -880,4 +902,111 @@ def apply_formula(x, formula): return eval(formula) +def harmonize_sen2cor_offet(x, bands=S2_SEN2COR_BANDS, inplace=False): + """ + Harmonize new Sentinel-2 item collection (Sen2Cor v4+, 2022-01-25) + to the old baseline (v3-): + adds an offset of -1000 to the asset extra field "raster:bands" of the items + with datetime >= 2022-01-25 + + Parameters + ---------- + x: ItemCollection + An item collection of S2 scenes + bands: list + A list of bands to harmonize + + inplace: bool + Whether to modify the collection in place. Defaults to False. + In that case, a cloned collection is returned. + + Returns + ------- + ItemCollection + A collection of S2 scenes with extra_fields["raster:bands"] + added/updated to each band asset with datetime >= 2022-01-25. + + Notes + ----- + References: + - https://planetarycomputer.microsoft.com/dataset/sentinel-2-l2a#Baseline-Change + - https://github.com/microsoft/PlanetaryComputer/issues/134 + """ + + if not inplace: + x = x.copy() + for item in x: + for asset in bands: + if asset in item.assets: + if item.properties["datetime"] >= "2022-01-25": + item.assets[asset].extra_fields["raster:bands"] = [dict(offset=-1000)] + else: + item.assets[asset].extra_fields["raster:bands"] = [dict(offset=0)] + if inplace: + return x + +def extract_points(x, points, method=None, tolerance=None, drop=False): + """Extract points from xarray + + Parameters + ---------- + x : xarray.DataArray or xarray.Dataset + points : geopandas.GeoDataFrame or pandas.DataFrame + Points or coordinates of the points + method, tolerance, drop : see xarray.DataArray.sel + Additional keyword arguments passed to xarray.DataArray.sel + + Returns + ------- + xarray.DataArray or xarray.Dataset + The points values with points index as coordinate. + The returned xarray can then be converted to + dataframe with `to_dataframe` or `to_dask_dataframe`. + + Examples + -------- + >>> import xarray as xr + >>> import pandas as pd + >>> import dask.array + >>> import numpy as np + >>> da = xr.DataArray( + ... # np.random.random((100,200)), + ... dask.array.random.random((100,200,10), chunks=10), + ... coords = [('x', np.arange(100)+.5), + ... ('y', np.arange(200)+.5), + ... ('z', np.arange(10)+.5)] + ... ).rename("pixel_value") + >>> df = pd.DataFrame( + ... dict( + ... x=np.random.permutation(range(100))[:100]+np.random.random(100), + ... y=np.random.permutation(range(100))[:100]+np.random.random(100), + ... other=range(100), + ... ) + ... ) + >>> df.index.rename("id_point", inplace=True) + >>> extraction = extract_points(da, df, method="nearest", tolerance=.5) + >>> ext_df = extraction.to_dataframe() + >>> ext_df.reset_index(drop=False, inplace=True) + >>> ext_df.rename({k: k+"_pixel" for k in da.dims}, axis=1, inplace=True) + >>> # join extraction to original dataframe + >>> df.merge(ext_df, on=["id_point"]) + + """ + # x = da + valid_types = (gpd.GeoDataFrame, gpd.GeoSeries) + if isinstance(points, valid_types): + if not points.geom_type.isin(['Point', 'MultiPoint']).all(): + raise ValueError("All geometries must be of type Point") + + if isinstance(points, valid_types): + if hasattr(points, 'crs') and not points.crs.equals(x.rio.crs): + logger.debug(f"Reprojecting points from {points.crs} to {x.rio.crs}") + points = points.to_crs(x.rio.crs) + points = points.get_coordinates() + + xk = x.dims + coords_cols = [c for c in points.keys() if c in xk] + coords = points[coords_cols] + points = x.sel(coords.to_xarray(), method=method, tolerance=tolerance, drop=drop) + return points ####################################### \ No newline at end of file diff --git a/tests/test_local.py b/tests/test_local.py index 676448ccdbf39841938d0af8d2c20d0c082285ac..3471050eacc44322c622fb492298d52b7f048c85 100644 --- a/tests/test_local.py +++ b/tests/test_local.py @@ -2,6 +2,9 @@ from simplestac.local import collection_format, build_item_collection from simplestac.utils import write_raster, apply_formula import xarray as xr import pystac +from shapely.geometry import MultiPoint +import geopandas as gpd + def test_formatting(): fmt = collection_format() @@ -86,7 +89,7 @@ def test_apply_items_raster_args(s2scene_dir, roi): dtype="int16", scale_factor=0.001, add_offset=0.0, - _FillValue= 2**15 - 1, + _FillValue= -2**15, ), ) ) @@ -95,9 +98,20 @@ def test_apply_items_raster_args(s2scene_dir, roi): assert rb["datatype"] == "int16" assert rb["scale"] == 0.001 assert rb["offset"] == 0.0 - assert rb["nodata"] == 2**15 - 1 + assert rb["nodata"] == -2**15 + +def test_extract_points(s2scene_dir, roi): + col = build_item_collection(s2scene_dir, collection_format()) + points = roi.geometry.apply(lambda x: MultiPoint(list(x.exterior.coords))) + points.index.rename("id_point", inplace=True) + ext = col.extract_points(points) + assert ext.id_point.isin(points.index.values).all() + coords = points.get_coordinates().reset_index(drop=True) + points = gpd.GeoSeries(gpd.points_from_xy(**coords, crs=roi.crs)) + points.index.rename("id_point", inplace=True) + ext = col.extract_points(points) + assert ext.id_point.isin(points.index.values).all() - ############################################################ diff --git a/tests/test_remote.py b/tests/test_remote.py index 3fced923aab8aa668c6cf2d777d0193baa17417f..beb2b8230dbae0616781415dd006cecfd3921e3a 100644 --- a/tests/test_remote.py +++ b/tests/test_remote.py @@ -3,6 +3,21 @@ import planetary_computer as pc import pystac_client URL = "https://planetarycomputer.microsoft.com/api/stac/v1" + +def test_to_xarray(roi, s2scene_pc_dir): + time_range = "2022-01-01/2022-01-31" + + catalog = pystac_client.Client.open(URL, modifier=pc.sign_inplace) + search = catalog.search( + collections=["sentinel-2-l2a"], + bbox=roi.to_crs(4326).total_bounds, + datetime=time_range, + sortby="datetime", + ) + col = ItemCollection(search.item_collection()) + x = col.drop_non_raster().to_xarray() + assert len(x.time) == len(col) + def test_offset_harmonization(roi, s2scene_pc_dir): time_range = "2022-01-20/2022-01-31" @@ -21,7 +36,23 @@ def test_offset_harmonization(roi, s2scene_pc_dir): assert of0 == 0 assert ofN == -1000 +def test_drop_duplicates(roi, s2scene_pc_dir): + time_range = "2022-01-20/2022-01-31" + catalog = pystac_client.Client.open(URL, modifier=pc.sign_inplace) + search = catalog.search( + collections=["sentinel-2-l2a"], + bbox=roi.to_crs(4326).total_bounds, + datetime=time_range, + sortby="datetime", + ) + col = search.item_collection() + col1 = ItemCollection(col.clone()+col.clone()) + col1.drop_duplicates(inplace=True) + assert len(col1) == len(col) + def test_write_assets(roi, s2scene_pc_dir): + + s2scene_pc_dir.rmtree_p().mkdir_p() time_range = "2016-01-01/2016-01-31" catalog = pystac_client.Client.open(URL) @@ -35,7 +66,6 @@ def test_write_assets(roi, s2scene_pc_dir): col = ItemCollection(search.item_collection()).drop_non_raster() bbox = roi.to_crs(col.to_xarray().rio.crs).total_bounds col = pc.sign(col) - s2scene_pc_dir.rmtree_p().mkdir_p() encoding=dict( dtype="int16", scale_factor=0.001, @@ -48,6 +78,7 @@ def test_write_assets(roi, s2scene_pc_dir): item = new_col[0] assert item.id == col[0].id assert len(item.assets) == len(s2scene_pc_dir.dirs()[0].files("*.tif")) + assert item.assets["B08"].href.startswith(s2scene_pc_dir) assert new_col[0].assets["B08"].extra_fields["raster:bands"][0]["scale"] == 0.001