import numpy as np
import pandas as pd
import xarray as xr
import rasterio
import rioxarray # noqa
from shapely.geometry import box
from shapely import affinity
import json
from functools import partial
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.colors import ListedColormap, BoundaryNorm
import holoviews as hv
from holoviews.streams import RangeXY
In this notebook, we will investigate the varying backscatter values associated with different land surfaces like water bodies, forests, grasslands and urban areas. We will use backscatter data from the Sentinel-1 satellite and we will utilize the CORINE Land Cover dataset to classify and extrapolate these surfaces, enabling us to analyze how different land cover types influence backscatter responses.
6.1 Load Sentinel-1 Data
For our analysis we are using sigma naught backscatering data from Sentinel-1. The images we are analyzing cover the region south of Vienna and west of Neusiedlersee Lake. We load the data and and apply again a preprocessing function. Here we extract the scaling factor and the date the image was taken from the metadata. We will focus our attention to a smaller area containing a part of the Neusiedlersee Lake and its surrounding land. The obtainedxarray
dataset and is then converted to an array, because we only have one variable, the VV backscatter values.
= Path("~/shared/datasets/rs/sentinel-1/neusiedler").expanduser()
data_path
def _preprocess(x, bbox):
"""
Preprocess file.
Parameters
----------
x : xarray.Dataset
xlims: tuple
ylims: tuple
Returns
-------
xarray.Dataset
"""
file = x.encoding["source"]
with rasterio.open(file) as src:
= pd.to_numeric(src.tags().get("scale_factor"))
scale_factor = pd.to_datetime(src.tags().get("time_begin"))
time_value
= x / scale_factor
x = x.assign_coords(time=time_value).expand_dims("time")
x
= x.rio.clip_box(*bbox, crs="EPSG:4326")
x
return x.rename({"band_data": "sig0"}).squeeze("band").drop_vars("band")
= box(16.6, 47.7, 16.75, 47.8)
bbox = partial(_preprocess, bbox=bbox.bounds)
partial_func
= xr.open_mfdataset(
sig0_ds "*.tif"),
data_path.glob(="rasterio",
engine="nested",
combine="time",
concat_dim=partial_func,
preprocess"time")
).sortby(
= sig0_ds.sig0
sig0_da sig0_da
<xarray.DataArray 'sig0' (time: 8, y: 1221, x: 1230)> Size: 96MB dask.array<getitem, shape=(8, 1221, 1230), dtype=float64, chunksize=(1, 1, 1230), chunktype=numpy.ndarray> Coordinates: * x (x) float64 10kB 5.282e+06 5.282e+06 ... 5.294e+06 5.294e+06 * y (y) float64 10kB 1.571e+06 1.571e+06 ... 1.559e+06 1.559e+06 * time (time) datetime64[ns] 64B 2023-08-05T16:51:22 ... 2023-10-28... spatial_ref int64 8B 0
Let’s have a look at the data by plotting the first timeslice.
=0).plot(robust=True, cmap="Greys_r").axes.set_aspect("equal") sig0_da.isel(time
6.2 Load CORINE Landcover Data
We will load the CORINE Land Cover, which is a pan-European land cover and land use inventory with 44 thematic classes. The resolution of this classification is 100 by 100m and the file was created in 2018 (CORINE Land Cover).
Remember previous notebooks when comparing variables. Lets have a look at some of the metadata of our dataset.
= Path("~/shared/datasets/rs/corine").expanduser()
corine_path
def print_raster(raster, name):
"""
Print Raster Metadata
Parameters
----------
raster: xarray.DataArray|xarray.DataSet
raster to process
y: string
name of product
"""
print(
f"{name} Raster: \n----------------\n"
f"resolution: {raster.rio.resolution()} {raster.rio.crs.units_factor}\n" # noqa
f"bounds: {raster.rio.bounds()}\n"
f"CRS: {raster.rio.crs}\n"
)
= affinity.scale(bbox, xfact=1.2, yfact=1.2)
bbox
= (
cor_da "*.tif"), engine="rasterio")
xr.open_mfdataset(corine_path.glob("band_data": "land_cover"})["land_cover"]
.rename({
.squeeze()*bbox.bounds, crs="EPSG:4326")
.rio.clip_box(
)
cor_da
<xarray.DataArray 'land_cover' (y: 146, x: 147)> Size: 86kB dask.array<getitem, shape=(146, 147), dtype=float32, chunksize=(81, 116), chunktype=numpy.ndarray> Coordinates: band int64 8B 1 * x (x) float64 1kB 4.814e+06 4.814e+06 ... 4.828e+06 4.828e+06 * y (y) float64 1kB 2.767e+06 2.767e+06 ... 2.753e+06 2.753e+06 spatial_ref int64 8B 0 Attributes: DataType: Thematic AREA_OR_POINT: Area RepresentationType: THEMATIC STATISTICS_COVARIANCES: 136.429646247598 STATISTICS_MAXIMUM: 48 STATISTICS_MEAN: 25.753373398066 STATISTICS_MINIMUM: 1 STATISTICS_SKIPFACTORX: 1 STATISTICS_SKIPFACTORY: 1 STATISTICS_STDDEV: 11.680310194836
"Sentinel-1") print_raster(sig0_da,
Sentinel-1 Raster:
----------------
resolution: (10.0, -10.0) ('metre', 1.0)
bounds: (5281990.0, 1558750.0, 5294290.0, 1570960.0)
CRS: EPSG:27704
"Corine Land Cover") print_raster(cor_da,
Corine Land Cover Raster:
----------------
resolution: (100.0, -100.0) ('metre', 1.0)
bounds: (4813700.0, 2752700.0, 4828400.0, 2767300.0)
CRS: PROJCS["ETRS89-extended / LAEA Europe",GEOGCS["ETRS89",DATUM["European_Terrestrial_Reference_System_1989",SPHEROID["GRS 1980",6378137,298.257222101004,AUTHORITY["EPSG","7019"]],AUTHORITY["EPSG","6258"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4258"]],PROJECTION["Lambert_Azimuthal_Equal_Area"],PARAMETER["latitude_of_center",52],PARAMETER["longitude_of_center",10],PARAMETER["false_easting",4321000],PARAMETER["false_northing",3210000],UNIT["metre",1],AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","3035"]]
The long name for CRS corresponds to the grid (EPSG:3035). We will again use rioxarray to reproject_match
to convert the CORINE Land Cover map in the desired projection and resolution. The CORINE Land Cover is originally given as integer values. Hence we use the default nearest neighbour method for resampling.
= cor_da.rio.reproject_match(sig0_da)
cor_da = cor_da.assign_coords({"x": sig0_da.x, "y": sig0_da.y})
cor_da cor_da
<xarray.DataArray 'land_cover' (y: 1221, x: 1230)> Size: 6MB array([[12., 12., 12., ..., 41., 41., 41.], [12., 12., 12., ..., 41., 41., 41.], [12., 12., 12., ..., 41., 41., 41.], ..., [ 2., 2., 2., ..., 41., 41., 41.], [ 2., 2., 2., ..., 41., 41., 41.], [ 2., 2., 2., ..., 41., 41., 41.]], dtype=float32) Coordinates: band int64 8B 1 spatial_ref int64 8B 0 * x (x) float64 10kB 5.282e+06 5.282e+06 ... 5.294e+06 5.294e+06 * y (y) float64 10kB 1.571e+06 1.571e+06 ... 1.559e+06 1.559e+06 Attributes: DataType: Thematic AREA_OR_POINT: Area RepresentationType: THEMATIC STATISTICS_COVARIANCES: 136.429646247598 STATISTICS_MAXIMUM: 48 STATISTICS_MEAN: 25.753373398066 STATISTICS_MINIMUM: 1 STATISTICS_SKIPFACTORX: 1 STATISTICS_SKIPFACTORY: 1 STATISTICS_STDDEV: 11.680310194836
6.2.1 Colormapping and Encoding
For the different land cover types we use the official color encoding which can be found in CORINE Land Cover. The color mapping is stored in a json file under assets.
# Load encoding
with open("../assets/06_color_mapping.json", "r") as f:
= json.load(f)
color_mapping_data
# Get mapping
= {item["value"]: item for item in
color_mapping "land_cover"]}
color_mapping_data[
# Create cmap and norm for plotting
= [info["color"] for info in color_mapping.values()]
colors = [info["value"] for info in color_mapping.values()]
categories = ListedColormap(colors)
cmap = BoundaryNorm(categories + [max(categories) + 1], len(categories)) norm
Now we can plot the CORINE Land Cover dataset.
# Get landcover codes present in the image
= np.unique(cor_da.values[~np.isnan(cor_da.values)].
present_landcover_codes int))
astype(
# Get colors + text for legend
= [
handles
mpatches.Patch(=info["color"], label=(f'{info["value"]} - ' + (info["label"]))
color
)for info in color_mapping.values()
if info["value"] in present_landcover_codes
]
# Create the plot
=(10, 10), cmap=cmap, norm=norm, add_colorbar=False).\
cor_da.plot(figsize"equal")
axes.set_aspect(
plt.legend(=handles,
handles=(1.01, 1),
bbox_to_anchor="upper left",
loc=0,
borderaxespad=7,
fontsize
)"CORINE Land Cover (EPSG:27704)") plt.title(
Text(0.5, 1.0, 'CORINE Land Cover (EPSG:27704)')
Now we are ready to merge the backscatter data (sig0_da
) with the land cover dataset (cor_da
) to have one dataset combining all data.
= xr.merge([sig0_da, cor_da]).drop_vars("band").compute()
var_ds var_ds
<xarray.Dataset> Size: 102MB Dimensions: (x: 1230, y: 1221, time: 8) Coordinates: * x (x) float64 10kB 5.282e+06 5.282e+06 ... 5.294e+06 5.294e+06 * y (y) float64 10kB 1.571e+06 1.571e+06 ... 1.559e+06 1.559e+06 * time (time) datetime64[ns] 64B 2023-08-05T16:51:22 ... 2023-10-28... spatial_ref int64 8B 0 Data variables: sig0 (time, y, x) float64 96MB -6.99 -7.32 -8.78 ... -14.32 -14.22 land_cover (y, x) float32 6MB 12.0 12.0 12.0 12.0 ... 41.0 41.0 41.0 41.0
6.3 Backscatter Variability
With this combined dataset we can study backscatter variability in relation to natural media. For example we can look at the backscatter variability for water by clipping the dataset to only contain the land cover class water, like so:
# 41 = encoded value for water bodies
= var_ds.land_cover == 41
waterbodies_mask "equal") waterbodies_mask.plot().axes.set_aspect(
This gives use backscatter values over water only.
= var_ds.sig0.isel(time=0).where(waterbodies_mask)
waterbodies_sig0 =True, cmap="Greys_r").axes.set_aspect("equal") waterbodies_sig0.plot(robust
To get an idea of the variability we can create a histogram. Radar backscatter from water bodies fluctuates with surface roughness, which changes with wind conditions, creating spatial and temporal variations in signal intensity.
=50, edgecolor="black") waterbodies_sig0.plot.hist(bins
(array([5.0000e+00, 5.0000e+00, 8.0000e+00, 1.4000e+01, 3.5000e+01,
7.8000e+01, 1.5600e+02, 3.0500e+02, 5.4000e+02, 1.0380e+03,
1.9550e+03, 3.6850e+03, 6.9910e+03, 1.2269e+04, 1.9088e+04,
2.6859e+04, 3.6217e+04, 4.5276e+04, 5.0939e+04, 4.9020e+04,
3.9729e+04, 2.5400e+04, 1.2426e+04, 5.4390e+03, 2.6990e+03,
2.0500e+03, 2.1330e+03, 2.6890e+03, 3.1490e+03, 3.9510e+03,
4.2970e+03, 4.1110e+03, 3.6350e+03, 2.5900e+03, 1.6020e+03,
7.5000e+02, 3.5200e+02, 1.2300e+02, 5.6000e+01, 3.7000e+01,
2.8000e+01, 2.1000e+01, 1.3000e+01, 9.0000e+00, 1.0000e+01,
8.0000e+00, 1.0000e+01, 5.0000e+00, 6.0000e+00, 4.0000e+00]),
array([-37.91 , -36.9202, -35.9304, -34.9406, -33.9508, -32.961 ,
-31.9712, -30.9814, -29.9916, -29.0018, -28.012 , -27.0222,
-26.0324, -25.0426, -24.0528, -23.063 , -22.0732, -21.0834,
-20.0936, -19.1038, -18.114 , -17.1242, -16.1344, -15.1446,
-14.1548, -13.165 , -12.1752, -11.1854, -10.1956, -9.2058,
-8.216 , -7.2262, -6.2364, -5.2466, -4.2568, -3.267 ,
-2.2772, -1.2874, -0.2976, 0.6922, 1.682 , 2.6718,
3.6616, 4.6514, 5.6412, 6.631 , 7.6208, 8.6106,
9.6004, 10.5902, 11.58 ]),
<BarContainer object of 50 artists>)
6.4 Variability over Time
Next we will look at the changes in variability in backscatter values over time for each of the CORINE Land Cover types. We do this by creating the following interactive plot. We can spot that backscatter in agricultural fields varies due to seasonal cycles like planting, growing, and harvesting, each of which changes vegetation structure. Changes in backscatter are strongly related to soil moisture content from irrigation or rainfall. Ultimately, phenological stages of crops and canopy moisture dynamics can affect the backscatter signal.
"bokeh")
hv.extension(
= var_ds.sig0.quantile(0.02).item()
robust_min = var_ds.sig0.quantile(0.98).item()
robust_max
= [
bin_edges + j * 0.5
i for i in range(int(robust_min) - 2, int(robust_max) + 2)
for j in range(2)
]
= {"\xa0\xa0\xa0 Complete Land Cover": 1}
land_cover
land_cover.update(
{f"{int(value): 02} {color_mapping[value]['label']}": int(value)
for value in present_landcover_codes
}
)= var_ds.sig0["time"].values
time
= RangeXY()
rangexy
def load_image(time, land_cover, x_range, y_range):
"""
Callback Function Landcover.
Parameters
----------
time: panda.datatime
time slice
landcover: int
land cover type
x_range: array_like
longitude range
y_range: array_like
latitude range
Returns
-------
holoviews.Image
"""
if land_cover == "\xa0\xa0\xa0 Complete Land Cover":
= var_ds.sig0.sel(time=time)
sig0_selected_ds
else:
= int(land_cover.split()[0])
land_cover_value = var_ds.land_cover == land_cover_value
mask_ds = var_ds.sig0.sel(time=time).where(mask_ds)
sig0_selected_ds
= hv.Dataset(sig0_selected_ds)
hv_ds = hv_ds.to(hv.Image, ["x", "y"])
img
if x_range and y_range:
= img.select(x=x_range, y=y_range)
img
return hv.Image(img)
= (
dmap =["Time", "Landcover"], streams=[rangexy])
hv.DynamicMap(load_image, kdims=time, Landcover=land_cover)
.redim.values(Time=True, bins=bin_edges)
.hist(normed
)
= hv.opts.Image(
image_opts ="Greys_r",
cmap=True,
colorbar=["hover"],
tools=(robust_min, robust_max),
clim="equal",
aspect=False,
framewise=500,
frame_height=500,
frame_width
)
= hv.opts.Histogram(width=350, height=555)
hist_opts
dmap.opts(image_opts, hist_opts)