Compare ATL10 freeboard profile with Sentinel-2 optical images#

⭐ Objectives#

  • Query optical images in Google Earth Engine and download images overlapped with ICESat-2 track

  • Compare sea ice features from optical images and ICESat-2 ATL10 product

✅ Setting computing environment#

We’ll be using the following Python libraries in this notebook:

import glob, os
import requests
import numpy as np

from datetime import datetime
from datetime import timedelta

import h5py
import pandas as pd

import os

import warnings
warnings.filterwarnings('ignore')

import geopandas
import rasterio

import ee
import geemap

%matplotlib widget
import matplotlib.pyplot as plt

✅ Google Earth Engine Authentication and Initialization#

Before you start Google Earth Engine (ee library), you need to authenticate your access with your own Google account. This authentication can be done by running ee.Authenticate(). Please go to the link and allow the access, and you will be able to get an verification code. Please copy and paste this verification code into the input field and hit enter.

try:
    ee.Initialize()
except: 
    ee.Authenticate()
    ee.Initialize()

✅ Query Sentinel-2 images via Google Earth Engine using ATL10 track#

# Read ATL10 track
tutorial_data = "/home/jovyan/shared-public/ICESat-2-Hackweek/sea_ice"
filename = f"{tutorial_data}/processed_ATL10-02_20191113181045_07310501_006_01.h5"

# Create dataframe from the ATL10 file
with h5py.File(filename, 'r') as f:
    beam = 'gt2r'
    track = pd.DataFrame(data={'lat': f[beam]['freeboard_segment/latitude'][:],
                               'lon': f[beam]['freeboard_segment/longitude'][:],
                               'seg_x': f[beam]['freeboard_segment/seg_dist_x'][:],
                               'freeboard': f[beam]['freeboard_segment/beam_fb_height'][:],
                               'type': f[beam]['freeboard_segment/heights/height_segment_type'][:]
                              })

# Create Earth Engine feature collection with the ATL10 track
track_coord = list(zip([x for x in track.lon[::100]], [x for x in track.lat[::100]])) 
feature_track = ee.FeatureCollection(ee.Geometry.LineString(coords=track_coord, proj='EPSG:4326', geodesic=True)) 

# Define region of interest (ROI) using the ICESat-2 track feature collection
roi = feature_track.geometry()
# Read acquisition time of the ATL10 track from filename
time_atl = datetime.strptime(filename[-33:-19], "%Y%m%d%H%M%S")
# Start date (should be the ICESat-2 acquisition date)
t1 = time_atl.strftime("%Y-%m-%d")

# End date (1 day after the ICESat-2 acquistion date)
t2 = (datetime.strptime(t1, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")

print(t1, t2)
2019-11-13 2019-11-14
# Query Sentinel-2 images using ATL10 file information
S2 = ee.ImageCollection("COPERNICUS/S2_SR").filterBounds(roi)\
.filterDate(t1, t2)\
.filter(ee.Filter.lt('CLOUD_COVERAGE_ASSESSMENT', 20))\
.filter(ee.Filter.lt('NODATA_PIXEL_PERCENTAGE', 10))
num = S2.size().getInfo() # Number of available images
print(num)
3
# Image ID for all images
ids = [S2.toList(num).getInfo()[i]['id'] for i in range(0, num)] 
ids
['COPERNICUS/S2_SR/20191113T183459_20191113T183501_T02CNU',
 'COPERNICUS/S2_SR/20191113T183459_20191113T183501_T03CVP',
 'COPERNICUS/S2_SR/20191113T183459_20191113T183501_T03CVQ']
time_diff = 3600*3 # Target time difference between ATL10 and Sentinel-2 < 3 hours

Map = geemap.Map()

for i in range(0, num):
    time_start = S2.toList(num).getInfo()[i]['properties']['system:time_start']
    time_s2 = datetime(1970, 1, 1) + timedelta(seconds = time_start/1000)   
    
    img_name = os.path.basename(ids[i])

    # Time difference between IS2 and S2 < defined time_diff
    if abs(time_atl-time_s2).seconds <= time_diff:
        img = ee.Image(ids[i]).select('B4')
        dim = img.getInfo()['bands'][0]['dimensions']
        if dim[0] > 10000 and dim[1] > 0:
            # # Download image into the local folder
            # geemap.download_ee_image(img, f"S2_{img_name}.tif", scale=50)
            Map.addLayer(img, {'min': 0, 'max': 10000}, img_name)
        else:
            print(f"SKIP {img_name}: Not a full image")
    else:
        print(f"SKIP {img_name}: Time difference > {time_diff/3600} hours with ICESat-2 track")

Map.centerObject(img, zoom = 7)
Map.addLayer(feature_track, {}, "IS2 track")
Map
# !pip install geedim

✅ Comparison between ICESat-2 and Sentinel-2#

Now we will compare the Sentinel-2 optical image we downloaded with the ICESat-2 ATL10 track.

# File name of the downloaded Sentinel-2 image
tutorial_data = "/home/jovyan/shared-public/ICESat-2-Hackweek/sea_ice"
img_name = f"{tutorial_data}/S2_20191113T183459_20191113T183501_T03CVQ.tif"
# Read Sentinel-2 image using raterio
img = rasterio.open(img_name)
array = img.read(1)
array[array < 0] = 0

height = array.shape[0]
width = array.shape[1]
cols, rows = np.meshgrid(np.arange(width), np.arange(height))

# XY projected coordinate of the image
xs, ys = rasterio.transform.xy(img.transform, rows, cols)
xs = np.array(xs)
ys = np.array(ys)

img.close()
plt.figure(figsize = (8, 7))
plt.imshow(array, cmap = "Blues_r", vmin = 0, vmax = 10000)
plt.colorbar(shrink = 0.5)
<matplotlib.colorbar.Colorbar at 0x7fe5c5972da0>

Let’s take a look at the ATL10 track data. We read the ATL10 h5 file as pandas dataframe. In order to assign the geographic coordinate information, we will convert this dataframe into geopandas geodataframe.

track
lat lon seg_x freeboard type
0 -74.000104 -165.856928 2.832969e+07 0.117155 8
1 -74.000290 -165.857027 2.832971e+07 0.052732 1
2 -74.000306 -165.857035 2.832972e+07 0.047613 1
3 -74.000324 -165.857044 2.832972e+07 0.022756 2
4 -74.000351 -165.857059 2.832972e+07 0.000000 4
... ... ... ... ... ...
50624 -77.999665 -168.575384 2.878204e+07 0.158924 1
50625 -77.999727 -168.575438 2.878204e+07 0.154560 1
50626 -77.999795 -168.575496 2.878205e+07 0.152328 1
50627 -77.999877 -168.575567 2.878206e+07 0.144256 1
50628 -77.999941 -168.575622 2.878207e+07 0.152755 1

50629 rows × 5 columns

# Convert pandas dataframe into geopandas geodataframe
gdf = geopandas.GeoDataFrame(track, geometry=geopandas.points_from_xy(track.lon, track.lat))

# Assign crs information into geodatafraem
gdf.crs = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"

# Convert crs of the geodataframe into the crs of the Sentinel-2 image
gdf = gdf.to_crs(img.crs)
gdf
lat lon seg_x freeboard type geometry
0 -74.000104 -165.856928 2.832969e+07 0.117155 8 POINT (473636.020 1787760.734)
1 -74.000290 -165.857027 2.832971e+07 0.052732 1 POINT (473633.297 1787739.941)
2 -74.000306 -165.857035 2.832972e+07 0.047613 1 POINT (473633.065 1787738.167)
3 -74.000324 -165.857044 2.832972e+07 0.022756 2 POINT (473632.809 1787736.199)
4 -74.000351 -165.857059 2.832972e+07 0.000000 4 POINT (473632.410 1787733.144)
... ... ... ... ... ... ...
50624 -77.999665 -168.575384 2.878204e+07 0.158924 1 POINT (417062.908 1339135.748)
50625 -77.999727 -168.575438 2.878204e+07 0.154560 1 POINT (417062.096 1339128.752)
50626 -77.999795 -168.575496 2.878205e+07 0.152328 1 POINT (417061.201 1339121.077)
50627 -77.999877 -168.575567 2.878206e+07 0.144256 1 POINT (417060.120 1339111.843)
50628 -77.999941 -168.575622 2.878207e+07 0.152755 1 POINT (417059.272 1339104.632)

50629 rows × 6 columns

# Save coordinate information as independent columns (x, y)
gdf['x'] = gdf.geometry.apply(lambda x: x.x)
gdf['y'] = gdf.geometry.apply(lambda x: x.y)

# Boundary of the image
x_min = xs.min()
x_max = xs.max()
y_min = ys.min()
y_max = ys.max()

# Clip geodataframe fitting to the image extent
gdf = gdf[(gdf["x"]>=x_min) & (gdf["x"]<=x_max) & (gdf["y"]>=y_min) & (gdf["y"]<=y_max)].reset_index(drop=True)

idx = img.index(gdf["x"], gdf["y"])
gdf["pix_x"] = idx[1]
gdf["pix_y"] = idx[0]
gdf
lat lon seg_x freeboard type geometry x y pix_x pix_y
0 -76.561369 -167.431310 2.861903e+07 0.745570 1 POINT (436941.428 1500872.263) 436941.427651 1.500872e+06 740 0
1 -76.561430 -167.431354 2.861903e+07 0.657554 1 POINT (436940.555 1500865.391) 436940.555274 1.500865e+06 740 0
2 -76.561478 -167.431389 2.861904e+07 0.594501 1 POINT (436939.876 1500860.042) 436939.875610 1.500860e+06 740 0
3 -76.561538 -167.431433 2.861905e+07 0.564235 1 POINT (436939.025 1500853.352) 436939.024725 1.500853e+06 740 0
4 -76.561600 -167.431478 2.861905e+07 0.575839 1 POINT (436938.128 1500846.308) 436938.127990 1.500846e+06 740 1
... ... ... ... ... ... ... ... ... ... ...
14625 -77.545536 -168.188739 2.873052e+07 0.182069 1 POINT (423267.294 1390262.450) 423267.294167 1.390262e+06 467 2212
14626 -77.545623 -168.188810 2.873053e+07 0.507950 1 POINT (423266.121 1390252.646) 423266.120538 1.390253e+06 467 2212
14627 -77.545683 -168.188859 2.873053e+07 0.494939 1 POINT (423265.301 1390245.835) 423265.300710 1.390246e+06 467 2213
14628 -77.545741 -168.188907 2.873054e+07 0.526345 1 POINT (423264.511 1390239.308) 423264.510794 1.390239e+06 467 2213
14629 -77.545798 -168.188954 2.873055e+07 0.636491 1 POINT (423263.733 1390232.911) 423263.732709 1.390233e+06 467 2213

14630 rows × 10 columns

Draw overlapped optical image and ATL10 surface types (leads or not).

plt.figure(figsize = (8,7))
plt.pcolormesh(xs, ys, array, vmin = 0, vmax = 10000, cmap = "Blues_r")
plt.colorbar(shrink = 0.5)
plt.scatter(gdf['x'], gdf['y'], c = gdf['type'], s = 5, vmin = 0, vmax = 2, cmap = "jet")
<matplotlib.collections.PathCollection at 0x7fe5b15cd8a0>

Draw optical image and ATL10 freeboard.

plt.figure(figsize = (8,7))
plt.pcolormesh(xs, ys, array, vmin = 0, vmax = 10000, cmap = "Blues_r")
plt.colorbar(shrink = 0.5)
plt.scatter(gdf['x'], gdf['y'], c = gdf['freeboard'], s = 5, vmin = 0, vmax = 1, cmap = "jet")
<matplotlib.collections.PathCollection at 0x7fe5b159a080>

✅ Floe statistics from ICESat-2 and Sentinel-2#

Based on the overlapped Sentinel-2 optical image, we will calculate floe statistics. Since the optical image can be a good reference of lead detection, we compare the lead frequency and floe size distribution from the ATL10 data and Sentinel-2 optical image.

First, let’s do a simple lead detection with the Sentinel-2 brigtness value. The dark area should be lead, and the white area should be sea ice floes.

# Histogram of pixel value
plt.figure(figsize = (6,3))
plt.hist(array.flatten(), range = (0, 12000), bins = 400)
plt.grid(ls = ":", lw = 1);

Based on this histogram, which value is appropriate to separate sea ice floes and leads?

# Threshold value to separate sea ice floes and leads
th = 9000
# Binary lead classification (0: lead, 1: sea ice floe)
classified = np.zeros(np.shape(array))
classified[array >= th] = 1

# Draw the lead classification image
plt.figure(figsize = (7,6))
plt.pcolormesh(xs, ys, classified, cmap = "gray")
plt.colorbar(shrink = 0.5)
plt.scatter(gdf['x'], gdf['y'], c = gdf['freeboard'], s = 5, vmin = 0, vmax = 1, cmap = "jet")
plt.show()
# Resample the raster value to the ICESat-2 track geodataframe
idx = img.index(gdf["x"], gdf["y"])
gdf["b4"] = array[idx[0], idx[1]]
gdf["lead_S2"] = classified[idx[0], idx[1]]

Now we will calculate the floe size statistics.

# Function to calculate sea ice floe length
def get_floe_length(freeboard, lead_mask, seg_dist):
    # INPUT:
    # freeboard: along-track freeboard measurement of ICESat-2 ATL10 track
    # lead_mask: along-track lead detection result (0: lead; 1: non-lead)
    # seg_dist: along-track distance of ICESat-2 ATL10 track (unit: meters)
    
    floe_length = np.array([]) # Floe length (unit: m)
    lead_length = np.array([]) # Floe length (unit: m)
    floe_fb = np.array([]) # np.zeros(len(ice_leads_msk)) # Making big enough array
    
    # Floe starting index
    ice_cnt_st = 0
    # Floe ending index
    ice_cnt_en = 0
    
    # Lead starting index
    lead_cnt_st = 0
    # Lead ending index
    lead_cnt_en = 0
    
    for i in range(1,len(freeboard)):
        if (lead_mask[i] == 1) and (lead_mask[i-1] == 0): # start floe & stop lead
            # Initialize floe
            ice_cnt_st = i
            ice_cnt_en = i
            
            # Complete lead
            lead_length = np.append(lead_length, abs(seg_dist[lead_cnt_en] - seg_dist[lead_cnt_st]))
                                    
        elif (lead_mask[i] == 1) and (lead_mask[i-1] == 1): # grow floe
            ice_cnt_en += 1
            
        elif (lead_mask[i] == 0) and (lead_mask[i-1] == 0): # grow lead
            lead_cnt_en += 1
            
        elif (lead_mask[i] == 0) and (lead_mask[i-1] == 1): # stop floe & start lead
            # Complete floe
            floe_length = np.append(floe_length, abs(seg_dist[ice_cnt_en] - seg_dist[ice_cnt_st]))
            floe_fb = np.append(floe_fb, np.mean(freeboard[ice_cnt_st:ice_cnt_en+1]))
            
            # Initialize lead
            lead_cnt_st = i
            lead_cnt_en = i
        
            
    # Removing spurious floes (< 50m, > 10 km, fb < 0.1)
    remove_idx = np.where((floe_length < 50) | (floe_length > 10000) | (floe_fb < 0.1))[0]  
    floe_fb = np.delete(floe_fb, remove_idx)
    floe_length = np.delete(floe_length,remove_idx)
    
    remove_idx = np.where(lead_length < 10)[0]
    lead_length = np.delete(lead_length,remove_idx)

    return floe_length, floe_fb, lead_length
# Freeboard lead mask (lead if freeboard < 0.15 m)
lead_mask = (gdf.freeboard >= 0.15).values # Lead should be 0

# Floe statistics from ATL10 (along-track)
floe_length1, floe_fb1, lead_length1 = get_floe_length(gdf.freeboard, lead_mask, gdf.seg_x)
# Overlapped Sentinel-2 lead mask (lead if pixel value < threshold)
lead_mask = gdf.lead_S2.values # Lead should be 0

# Floe statistics from Sentinel-2 (along-track)
floe_length2, floe_fb2, lead_length2 = get_floe_length(gdf.freeboard, lead_mask, gdf.seg_x)
# FLoe length distribution

plt.figure(figsize = (5,4))

# Histogram on log scale. 
# Use non-equal bin sizes, such that they look equal on log scale.
logbins = np.logspace(np.log10(50),np.log10(10000), 20)
plt.hist(floe_length1, bins=logbins, density = True, histtype = "step", label = "ATL10")
plt.hist(floe_length2, bins=logbins, density = True, histtype = "step", label = "S2")
plt.xscale('log')
plt.yscale('log')
plt.xlabel("Floe length (m)")
plt.ylabel("Probability")
plt.title("Floe size distribution")
plt.legend()
plt.show()
# Lead length distribution

plt.figure(figsize = (5,4))

# Histogram on log scale. 
# Use non-equal bin sizes, such that they look equal on log scale.
logbins = np.logspace(np.log10(50), np.log10(10000), 20)
plt.hist(lead_length1, bins=logbins, density = True, histtype = "step", label = "ATL10")
plt.hist(lead_length2, bins=logbins, density = True, histtype = "step", label = "S2")
plt.xscale('log')
plt.yscale('log')
plt.xlabel("Lead length (m)")
plt.ylabel("Probability")
plt.title("Lead length distribution")
plt.legend()
plt.show()

Credited by Younghyun Koo (kooala317@gmail.com)