import geopandas as gpd
import hvplot.pandas
import hvplot.xarray # noqa
import numpy as np
import pandas as pd
import xarray as xr
from envrs.download_path import make_url
16.1 Imports
16.2 Read the data
# Read the cube
= make_url("HLS_clip4plots_both_b30_v20.zarr.zip", is_zip=True)
cube_uri
= xr.open_dataset(
full_cube
cube_uri,="zarr",
engine=False,
consolidated
).compute()
# Read the points, reproject tro match the datacube, set the index
= make_url("timeline_points.geojson")
points_uri = (
points
gpd.read_file(points_uri)"spatial_ref"].attrs["crs_wkt"])
.to_crs(full_cube["intact")
.set_index( )
https://git.geo.tuwien.ac.at/api/v4/projects/1266/repository/files/HLS_clip4plots_both_b30_v20.zarr.zip/raw?ref=main&lfs=true
https://git.geo.tuwien.ac.at/api/v4/projects/1266/repository/files/timeline_points.geojson/raw?ref=main&lfs=true
16.3 Specify the variables indicating cloud/shadow presence
= "cirrus cloud"
cirrus_col = "cloud"
cloud_col = "adjacent to cloud"
adjacent_col = "cloud shadow"
shadow_col = [adjacent_col, cloud_col, cirrus_col, shadow_col] tainted_cols
16.4 Select the clearest observations
= (
tainted_frame
full_cube[tainted_cols]="mask")
.to_dataarray(dimany(dim="mask")
.sum(dim=["x", "y"])
.="count_tainted")
.to_dataframe(name="count_tainted")
.sort_values(by
)
= tainted_frame.groupby(pd.Grouper(freq="ME")).head(1)
monthly_clearest = full_cube.sel(time=monthly_clearest.index.values)
monthly_cube
= tainted_frame[tainted_frame["count_tainted"] == 0]
fully_clear = full_cube.sel(time=fully_clear.index.values) clear_cube
16.5 How an image time series looks like
16.5.1 Images need to be converted to 8 bits to set the color stretch
def to_rgb8(cube, r, g, b, vmax, vmin=0):
= cube[[r, g, b]].to_dataarray("band")
selected = (selected - vmin) / (vmax - vmin)
stretched
= (stretched >= 0) & np.isfinite(stretched)
is_positive = stretched.where(is_positive.all(dim="band"), 0)
positive = (
clipped 255 * positive, 0, 255)
np.clip(
.astype(np.uint8)"composite": [f"{r=}, {g=}, {b=}"]})
.expand_dims({
)"band"] = np.array(["r", "g", "b"], dtype="unicode")
clipped[
return clipped
def plot_rgb(*args, dimname="composite"):
return xr.concat(args, dim=dimname).hvplot.rgb(
="x",
x="y",
y="band",
bands=dimname,
by="time",
groupby=True,
subplots=True,
rasterize=1,
data_aspect=False,
xaxis=None,
yaxis="bottom",
widget_location )
16.6 Plot a time series of the images
plot_rgb(="Red", g="Green", b="Blue", vmax=0.15),
to_rgb8(monthly_cube, r="NIRnarrow", g="SWIR1", b="Red", vmax=0.40),
to_rgb8(monthly_cube, r )
16.7 Plot just the “fully” clear images
plot_rgb(="Red", g="Green", b="Blue", vmax=0.15),
to_rgb8(clear_cube, r="NIRnarrow", g="SWIR1", b="Red", vmax=0.40),
to_rgb8(clear_cube, r )
16.8 Satellite pixel time series as tables
16.8.1 Pick two points one that has been deforested, and one that is intact
# https://tutorial.xarray.dev/intermediate/indexing/advanced-indexing.html
# #orthogonal-indexing-in-xarray
= full_cube.sel(
sel_cube =xr.DataArray(points.geometry.x, dims="intact"),
x=xr.DataArray(points.geometry.y, dims="intact"),
y="nearest",
method )
16.8.2 Flatten (xarray dataset/“cube” to pandas dataframe/table)
= (
sel_frame "spatial_ref")
sel_cube.drop_vars(
.to_dataframe()=["x", "y"])
.drop(columns
.reset_index() )
16.8.3 Prepare auxiliary variables for plotting
def normalized_difference(frame, positive, negative):
= frame[positive] - frame[negative]
numerator = frame[positive] + frame[negative]
denominator return numerator / denominator
"NDVI"] = normalized_difference(sel_frame, "NIRnarrow", "Red")
sel_frame["NDMI"] = normalized_difference(sel_frame, "NIRnarrow", "SWIR1")
sel_frame["DOY"] = sel_frame["time"].dt.dayofyear sel_frame[
16.8.4 Split the table into intact and deforested
= sel_frame[~sel_frame["intact"]]
deforested_frame = sel_frame[sel_frame["intact"]] intact_frame
16.9 Clouds and shadows taint our time series
"flag"] = "clear"
intact_frame.loc[:, 1:]].any(axis=1), "flag"] = "cloud/shadow"
intact_frame.loc[intact_frame[tainted_cols[0]], "flag"] = "adjacent"
intact_frame.loc[intact_frame[tainted_cols[
intact_frame.hvplot.scatter(="time", y="Green", by="flag", color=["green", "black", "orange"]
x )
/tmp/ipykernel_2982/3467101171.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
intact_frame.loc[:, "flag"] = "clear"
16.10 Filter out flagged observations
= intact_frame[~intact_frame[tainted_cols].any(axis=1)].copy()
intact_masked = deforested_frame[~deforested_frame[tainted_cols].any(axis=1)].copy() deforested_masked
16.11 Removed the cloud, the timeline becomes clearer
= {
tetracolor_kwargs "x": "time",
"y": ["Blue", "Green", "Red", "NIRnarrow"],
"color": ["blue", "green", "red", "darkgray"],
}
**tetracolor_kwargs) # .legend(loc="upper left", ncols=4); intact_masked.hvplot(
16.12 Spikes could be unmasked clouds/shadows
A massive value difference respective to its neighbors indicates the presence of possible outliers
def despike(frame, columns, min_spike, max_spike):
= frame[columns].sum(axis=1)
summed
# Perform the selections
= summed.iloc[1:-1]
central = summed.shift(-1).iloc[1:-1]
prior = summed.shift(1).iloc[1:-1]
posterior
# remove observations based on their saliency respective to their neighbors
= central - (prior + posterior) / 2
spikyness = spikyness.quantile((min_spike, max_spike))
floor, ceiling = central[spikyness.between(floor, ceiling)]
selected
return frame.loc[selected.index]
= 0.05
cutoff = ["Blue", "Green", "Red", "NIRnarrow", "SWIR1", "SWIR2"]
band_names
= despike(intact_masked, band_names, cutoff, 1 - cutoff)
intact_despiked = despike(deforested_masked, band_names, cutoff, 1 - cutoff) deforested_despiked
(="time", y="NIRnarrow", color="k")
intact_masked.hvplot(x* intact_despiked.hvplot(x="time", y="NIRnarrow", color="darkgray")
)
16.13 The spikes as anomalies
These spikes are anolalies on the context where they appear, but they may or may not be global outliers when compared with the full population.
= intact_masked.iloc[1:-1].drop(index=intact_despiked.index) spike_frame
# DOY = day of the year
(="Red", y="NIRnarrow", c="DOY", colormap="twilight")
intact_masked.hvplot.scatter(x* spike_frame.hvplot.scatter(x="Red", y="NIRnarrow", marker="x", s=90, color="red")
)
The isolated points far above and below the general population would be global outliers, whereas the rest were outliers on their specific context.
16.14 The signature of deforestation
**tetracolor_kwargs) intact_despiked.hvplot(
**tetracolor_kwargs) deforested_despiked.hvplot(
16.15 With Indices
= (
compare_frame "deforested": deforested_despiked, "intact": intact_despiked}, axis=0)
pd.concat({=["history", "index"])
.reset_index(names="index")
.drop(columns )
compare_frame.hvplot.line(="time", y="NDVI", by="history", color=["black", "limegreen"]
x )