Compare ATL10 freeboard profile with Sentinel-2 optical images
Contents
Compare ATL10 freeboard profile with Sentinel-2 optical images#
⭐ Objectives#
Query optical images in Google Earth Engine and download images overlapped with ICESat-2 track
Compare sea ice features from optical images and ICESat-2 ATL10 product
✅ Setting computing environment#
We’ll be using the following Python libraries in this notebook:
import glob, os
import requests
import numpy as np
from datetime import datetime
from datetime import timedelta
import h5py
import pandas as pd
import os
import warnings
warnings.filterwarnings('ignore')
import geopandas
import rasterio
import ee
import geemap
%matplotlib widget
import matplotlib.pyplot as plt
✅ Google Earth Engine Authentication and Initialization#
Before you start Google Earth Engine (ee
library), you need to authenticate your access with your own Google account. This authentication can be done by running ee.Authenticate()
. Please go to the link and allow the access, and you will be able to get an verification code. Please copy and paste this verification code into the input field and hit enter.
try:
ee.Initialize()
except:
ee.Authenticate()
ee.Initialize()
✅ Query Sentinel-2 images via Google Earth Engine using ATL10 track#
# Read ATL10 track
tutorial_data = "/home/jovyan/shared-public/ICESat-2-Hackweek/sea_ice"
filename = f"{tutorial_data}/processed_ATL10-02_20191113181045_07310501_006_01.h5"
# Create dataframe from the ATL10 file
with h5py.File(filename, 'r') as f:
beam = 'gt2r'
track = pd.DataFrame(data={'lat': f[beam]['freeboard_segment/latitude'][:],
'lon': f[beam]['freeboard_segment/longitude'][:],
'seg_x': f[beam]['freeboard_segment/seg_dist_x'][:],
'freeboard': f[beam]['freeboard_segment/beam_fb_height'][:],
'type': f[beam]['freeboard_segment/heights/height_segment_type'][:]
})
# Create Earth Engine feature collection with the ATL10 track
track_coord = list(zip([x for x in track.lon[::100]], [x for x in track.lat[::100]]))
feature_track = ee.FeatureCollection(ee.Geometry.LineString(coords=track_coord, proj='EPSG:4326', geodesic=True))
# Define region of interest (ROI) using the ICESat-2 track feature collection
roi = feature_track.geometry()
# Read acquisition time of the ATL10 track from filename
time_atl = datetime.strptime(filename[-33:-19], "%Y%m%d%H%M%S")
# Start date (should be the ICESat-2 acquisition date)
t1 = time_atl.strftime("%Y-%m-%d")
# End date (1 day after the ICESat-2 acquistion date)
t2 = (datetime.strptime(t1, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
print(t1, t2)
2019-11-13 2019-11-14
# Query Sentinel-2 images using ATL10 file information
S2 = ee.ImageCollection("COPERNICUS/S2_SR").filterBounds(roi)\
.filterDate(t1, t2)\
.filter(ee.Filter.lt('CLOUD_COVERAGE_ASSESSMENT', 20))\
.filter(ee.Filter.lt('NODATA_PIXEL_PERCENTAGE', 10))
num = S2.size().getInfo() # Number of available images
print(num)
3
# Image ID for all images
ids = [S2.toList(num).getInfo()[i]['id'] for i in range(0, num)]
ids
['COPERNICUS/S2_SR/20191113T183459_20191113T183501_T02CNU',
'COPERNICUS/S2_SR/20191113T183459_20191113T183501_T03CVP',
'COPERNICUS/S2_SR/20191113T183459_20191113T183501_T03CVQ']
time_diff = 3600*3 # Target time difference between ATL10 and Sentinel-2 < 3 hours
Map = geemap.Map()
for i in range(0, num):
time_start = S2.toList(num).getInfo()[i]['properties']['system:time_start']
time_s2 = datetime(1970, 1, 1) + timedelta(seconds = time_start/1000)
img_name = os.path.basename(ids[i])
# Time difference between IS2 and S2 < defined time_diff
if abs(time_atl-time_s2).seconds <= time_diff:
img = ee.Image(ids[i]).select('B4')
dim = img.getInfo()['bands'][0]['dimensions']
if dim[0] > 10000 and dim[1] > 0:
# # Download image into the local folder
# geemap.download_ee_image(img, f"S2_{img_name}.tif", scale=50)
Map.addLayer(img, {'min': 0, 'max': 10000}, img_name)
else:
print(f"SKIP {img_name}: Not a full image")
else:
print(f"SKIP {img_name}: Time difference > {time_diff/3600} hours with ICESat-2 track")
Map.centerObject(img, zoom = 7)
Map.addLayer(feature_track, {}, "IS2 track")
Map
# !pip install geedim
✅ Comparison between ICESat-2 and Sentinel-2#
Now we will compare the Sentinel-2 optical image we downloaded with the ICESat-2 ATL10 track.
# File name of the downloaded Sentinel-2 image
tutorial_data = "/home/jovyan/shared-public/ICESat-2-Hackweek/sea_ice"
img_name = f"{tutorial_data}/S2_20191113T183459_20191113T183501_T03CVQ.tif"
# Read Sentinel-2 image using raterio
img = rasterio.open(img_name)
array = img.read(1)
array[array < 0] = 0
height = array.shape[0]
width = array.shape[1]
cols, rows = np.meshgrid(np.arange(width), np.arange(height))
# XY projected coordinate of the image
xs, ys = rasterio.transform.xy(img.transform, rows, cols)
xs = np.array(xs)
ys = np.array(ys)
img.close()
plt.figure(figsize = (8, 7))
plt.imshow(array, cmap = "Blues_r", vmin = 0, vmax = 10000)
plt.colorbar(shrink = 0.5)
<matplotlib.colorbar.Colorbar at 0x7fe5c5972da0>
Let’s take a look at the ATL10 track data. We read the ATL10 h5 file as pandas
dataframe. In order to assign the geographic coordinate information, we will convert this dataframe into geopandas
geodataframe.
track
lat | lon | seg_x | freeboard | type | |
---|---|---|---|---|---|
0 | -74.000104 | -165.856928 | 2.832969e+07 | 0.117155 | 8 |
1 | -74.000290 | -165.857027 | 2.832971e+07 | 0.052732 | 1 |
2 | -74.000306 | -165.857035 | 2.832972e+07 | 0.047613 | 1 |
3 | -74.000324 | -165.857044 | 2.832972e+07 | 0.022756 | 2 |
4 | -74.000351 | -165.857059 | 2.832972e+07 | 0.000000 | 4 |
... | ... | ... | ... | ... | ... |
50624 | -77.999665 | -168.575384 | 2.878204e+07 | 0.158924 | 1 |
50625 | -77.999727 | -168.575438 | 2.878204e+07 | 0.154560 | 1 |
50626 | -77.999795 | -168.575496 | 2.878205e+07 | 0.152328 | 1 |
50627 | -77.999877 | -168.575567 | 2.878206e+07 | 0.144256 | 1 |
50628 | -77.999941 | -168.575622 | 2.878207e+07 | 0.152755 | 1 |
50629 rows × 5 columns
# Convert pandas dataframe into geopandas geodataframe
gdf = geopandas.GeoDataFrame(track, geometry=geopandas.points_from_xy(track.lon, track.lat))
# Assign crs information into geodatafraem
gdf.crs = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
# Convert crs of the geodataframe into the crs of the Sentinel-2 image
gdf = gdf.to_crs(img.crs)
gdf
lat | lon | seg_x | freeboard | type | geometry | |
---|---|---|---|---|---|---|
0 | -74.000104 | -165.856928 | 2.832969e+07 | 0.117155 | 8 | POINT (473636.020 1787760.734) |
1 | -74.000290 | -165.857027 | 2.832971e+07 | 0.052732 | 1 | POINT (473633.297 1787739.941) |
2 | -74.000306 | -165.857035 | 2.832972e+07 | 0.047613 | 1 | POINT (473633.065 1787738.167) |
3 | -74.000324 | -165.857044 | 2.832972e+07 | 0.022756 | 2 | POINT (473632.809 1787736.199) |
4 | -74.000351 | -165.857059 | 2.832972e+07 | 0.000000 | 4 | POINT (473632.410 1787733.144) |
... | ... | ... | ... | ... | ... | ... |
50624 | -77.999665 | -168.575384 | 2.878204e+07 | 0.158924 | 1 | POINT (417062.908 1339135.748) |
50625 | -77.999727 | -168.575438 | 2.878204e+07 | 0.154560 | 1 | POINT (417062.096 1339128.752) |
50626 | -77.999795 | -168.575496 | 2.878205e+07 | 0.152328 | 1 | POINT (417061.201 1339121.077) |
50627 | -77.999877 | -168.575567 | 2.878206e+07 | 0.144256 | 1 | POINT (417060.120 1339111.843) |
50628 | -77.999941 | -168.575622 | 2.878207e+07 | 0.152755 | 1 | POINT (417059.272 1339104.632) |
50629 rows × 6 columns
# Save coordinate information as independent columns (x, y)
gdf['x'] = gdf.geometry.apply(lambda x: x.x)
gdf['y'] = gdf.geometry.apply(lambda x: x.y)
# Boundary of the image
x_min = xs.min()
x_max = xs.max()
y_min = ys.min()
y_max = ys.max()
# Clip geodataframe fitting to the image extent
gdf = gdf[(gdf["x"]>=x_min) & (gdf["x"]<=x_max) & (gdf["y"]>=y_min) & (gdf["y"]<=y_max)].reset_index(drop=True)
idx = img.index(gdf["x"], gdf["y"])
gdf["pix_x"] = idx[1]
gdf["pix_y"] = idx[0]
gdf
lat | lon | seg_x | freeboard | type | geometry | x | y | pix_x | pix_y | |
---|---|---|---|---|---|---|---|---|---|---|
0 | -76.561369 | -167.431310 | 2.861903e+07 | 0.745570 | 1 | POINT (436941.428 1500872.263) | 436941.427651 | 1.500872e+06 | 740 | 0 |
1 | -76.561430 | -167.431354 | 2.861903e+07 | 0.657554 | 1 | POINT (436940.555 1500865.391) | 436940.555274 | 1.500865e+06 | 740 | 0 |
2 | -76.561478 | -167.431389 | 2.861904e+07 | 0.594501 | 1 | POINT (436939.876 1500860.042) | 436939.875610 | 1.500860e+06 | 740 | 0 |
3 | -76.561538 | -167.431433 | 2.861905e+07 | 0.564235 | 1 | POINT (436939.025 1500853.352) | 436939.024725 | 1.500853e+06 | 740 | 0 |
4 | -76.561600 | -167.431478 | 2.861905e+07 | 0.575839 | 1 | POINT (436938.128 1500846.308) | 436938.127990 | 1.500846e+06 | 740 | 1 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
14625 | -77.545536 | -168.188739 | 2.873052e+07 | 0.182069 | 1 | POINT (423267.294 1390262.450) | 423267.294167 | 1.390262e+06 | 467 | 2212 |
14626 | -77.545623 | -168.188810 | 2.873053e+07 | 0.507950 | 1 | POINT (423266.121 1390252.646) | 423266.120538 | 1.390253e+06 | 467 | 2212 |
14627 | -77.545683 | -168.188859 | 2.873053e+07 | 0.494939 | 1 | POINT (423265.301 1390245.835) | 423265.300710 | 1.390246e+06 | 467 | 2213 |
14628 | -77.545741 | -168.188907 | 2.873054e+07 | 0.526345 | 1 | POINT (423264.511 1390239.308) | 423264.510794 | 1.390239e+06 | 467 | 2213 |
14629 | -77.545798 | -168.188954 | 2.873055e+07 | 0.636491 | 1 | POINT (423263.733 1390232.911) | 423263.732709 | 1.390233e+06 | 467 | 2213 |
14630 rows × 10 columns
Draw overlapped optical image and ATL10 surface types (leads or not).
plt.figure(figsize = (8,7))
plt.pcolormesh(xs, ys, array, vmin = 0, vmax = 10000, cmap = "Blues_r")
plt.colorbar(shrink = 0.5)
plt.scatter(gdf['x'], gdf['y'], c = gdf['type'], s = 5, vmin = 0, vmax = 2, cmap = "jet")
<matplotlib.collections.PathCollection at 0x7fe5b15cd8a0>
Draw optical image and ATL10 freeboard.
plt.figure(figsize = (8,7))
plt.pcolormesh(xs, ys, array, vmin = 0, vmax = 10000, cmap = "Blues_r")
plt.colorbar(shrink = 0.5)
plt.scatter(gdf['x'], gdf['y'], c = gdf['freeboard'], s = 5, vmin = 0, vmax = 1, cmap = "jet")
<matplotlib.collections.PathCollection at 0x7fe5b159a080>
✅ Floe statistics from ICESat-2 and Sentinel-2#
Based on the overlapped Sentinel-2 optical image, we will calculate floe statistics. Since the optical image can be a good reference of lead detection, we compare the lead frequency and floe size distribution from the ATL10 data and Sentinel-2 optical image.
First, let’s do a simple lead detection with the Sentinel-2 brigtness value. The dark area should be lead, and the white area should be sea ice floes.
# Histogram of pixel value
plt.figure(figsize = (6,3))
plt.hist(array.flatten(), range = (0, 12000), bins = 400)
plt.grid(ls = ":", lw = 1);
Based on this histogram, which value is appropriate to separate sea ice floes and leads?
# Threshold value to separate sea ice floes and leads
th = 9000
# Binary lead classification (0: lead, 1: sea ice floe)
classified = np.zeros(np.shape(array))
classified[array >= th] = 1
# Draw the lead classification image
plt.figure(figsize = (7,6))
plt.pcolormesh(xs, ys, classified, cmap = "gray")
plt.colorbar(shrink = 0.5)
plt.scatter(gdf['x'], gdf['y'], c = gdf['freeboard'], s = 5, vmin = 0, vmax = 1, cmap = "jet")
plt.show()
# Resample the raster value to the ICESat-2 track geodataframe
idx = img.index(gdf["x"], gdf["y"])
gdf["b4"] = array[idx[0], idx[1]]
gdf["lead_S2"] = classified[idx[0], idx[1]]
Now we will calculate the floe size statistics.
# Function to calculate sea ice floe length
def get_floe_length(freeboard, lead_mask, seg_dist):
# INPUT:
# freeboard: along-track freeboard measurement of ICESat-2 ATL10 track
# lead_mask: along-track lead detection result (0: lead; 1: non-lead)
# seg_dist: along-track distance of ICESat-2 ATL10 track (unit: meters)
floe_length = np.array([]) # Floe length (unit: m)
lead_length = np.array([]) # Floe length (unit: m)
floe_fb = np.array([]) # np.zeros(len(ice_leads_msk)) # Making big enough array
# Floe starting index
ice_cnt_st = 0
# Floe ending index
ice_cnt_en = 0
# Lead starting index
lead_cnt_st = 0
# Lead ending index
lead_cnt_en = 0
for i in range(1,len(freeboard)):
if (lead_mask[i] == 1) and (lead_mask[i-1] == 0): # start floe & stop lead
# Initialize floe
ice_cnt_st = i
ice_cnt_en = i
# Complete lead
lead_length = np.append(lead_length, abs(seg_dist[lead_cnt_en] - seg_dist[lead_cnt_st]))
elif (lead_mask[i] == 1) and (lead_mask[i-1] == 1): # grow floe
ice_cnt_en += 1
elif (lead_mask[i] == 0) and (lead_mask[i-1] == 0): # grow lead
lead_cnt_en += 1
elif (lead_mask[i] == 0) and (lead_mask[i-1] == 1): # stop floe & start lead
# Complete floe
floe_length = np.append(floe_length, abs(seg_dist[ice_cnt_en] - seg_dist[ice_cnt_st]))
floe_fb = np.append(floe_fb, np.mean(freeboard[ice_cnt_st:ice_cnt_en+1]))
# Initialize lead
lead_cnt_st = i
lead_cnt_en = i
# Removing spurious floes (< 50m, > 10 km, fb < 0.1)
remove_idx = np.where((floe_length < 50) | (floe_length > 10000) | (floe_fb < 0.1))[0]
floe_fb = np.delete(floe_fb, remove_idx)
floe_length = np.delete(floe_length,remove_idx)
remove_idx = np.where(lead_length < 10)[0]
lead_length = np.delete(lead_length,remove_idx)
return floe_length, floe_fb, lead_length
# Freeboard lead mask (lead if freeboard < 0.15 m)
lead_mask = (gdf.freeboard >= 0.15).values # Lead should be 0
# Floe statistics from ATL10 (along-track)
floe_length1, floe_fb1, lead_length1 = get_floe_length(gdf.freeboard, lead_mask, gdf.seg_x)
# Overlapped Sentinel-2 lead mask (lead if pixel value < threshold)
lead_mask = gdf.lead_S2.values # Lead should be 0
# Floe statistics from Sentinel-2 (along-track)
floe_length2, floe_fb2, lead_length2 = get_floe_length(gdf.freeboard, lead_mask, gdf.seg_x)
# FLoe length distribution
plt.figure(figsize = (5,4))
# Histogram on log scale.
# Use non-equal bin sizes, such that they look equal on log scale.
logbins = np.logspace(np.log10(50),np.log10(10000), 20)
plt.hist(floe_length1, bins=logbins, density = True, histtype = "step", label = "ATL10")
plt.hist(floe_length2, bins=logbins, density = True, histtype = "step", label = "S2")
plt.xscale('log')
plt.yscale('log')
plt.xlabel("Floe length (m)")
plt.ylabel("Probability")
plt.title("Floe size distribution")
plt.legend()
plt.show()
# Lead length distribution
plt.figure(figsize = (5,4))
# Histogram on log scale.
# Use non-equal bin sizes, such that they look equal on log scale.
logbins = np.logspace(np.log10(50), np.log10(10000), 20)
plt.hist(lead_length1, bins=logbins, density = True, histtype = "step", label = "ATL10")
plt.hist(lead_length2, bins=logbins, density = True, histtype = "step", label = "S2")
plt.xscale('log')
plt.yscale('log')
plt.xlabel("Lead length (m)")
plt.ylabel("Probability")
plt.title("Lead length distribution")
plt.legend()
plt.show()
Credited by Younghyun Koo (kooala317@gmail.com)