Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

MSI Cell-Point Mapping at Depth 29

LifeWatch ERIC

Convert EarthCARE MSI Level 2 products to HEALPix at depth 29, following the Sentinel-3 cell-point approach used in legacy-converters.

Key difference from notebook 03: No aggregation. Each MSI pixel maps bijectively to a unique HEALPix cell at depth 29 (~12 mm cell edge). This is a lossless spatial index — every original observation is preserved at its exact location.

ApproachDepthCell sizeAggregation
Notebook 03 (resolution-matched)17~458 mmode/mean/rms
This notebook (cell-point)29~12 mmnone
import earthcarekit as eck
import healpix_geo as hpxg
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import xdggs

from earthcare_dggs.convert import lonlat_to_healpix_cells
from earthcare_dggs.settings import ELLIPSOID

ORBIT = "06109D"
DEPTH = 29  # Cell-point: finest HEALPix level (~12 mm cells)

Load MSI products

products = {}
for file_type in ["MSI_CM__2A", "MSI_COP_2A", "MSI_AOT_2A"]:
    try:
        result = eck.search_product(file_type=file_type, orbit_and_frame=ORBIT)
        with eck.read_product(result.filepath[0]) as ds:
            products[file_type] = ds.load()
        print(f"{file_type}: loaded")
    except Exception as e:
        print(f"{file_type}: not available ({e})")

msi_cm = products["MSI_CM__2A"]
print(f"\nSwath shape: {msi_cm['latitude_swath'].shape}")
print(f"Total pixels: {msi_cm['latitude_swath'].size:,}")
MSI_CM__2A: loaded
MSI_COP_2A: loaded
MSI_AOT_2A: loaded

Swath shape: (11216, 357)
Total pixels: 4,004,112

Cell-point mapping at depth 29

Each MSI pixel (lon, lat) is assigned to a unique HEALPix cell at depth 29. At this depth, cells are ~12 mm — far smaller than the MSI ~500 m pixel footprint, so each pixel maps to exactly one cell (bijective). No aggregation is needed.

# Extract coordinates
lat = msi_cm["latitude_swath"].values
lon = msi_cm["longitude_swath"].values
valid = np.isfinite(lat) & np.isfinite(lon)

print(f"Valid pixels: {valid.sum():,} / {valid.size:,}")

# Map every valid pixel to a depth-29 cell
cell_ids = lonlat_to_healpix_cells(
    lon[valid], lat[valid], depth=DEPTH, ellipsoid=ELLIPSOID,
)

n_unique = len(np.unique(cell_ids))
print(f"\nCell IDs assigned: {len(cell_ids):,}")
print(f"Unique cells:      {n_unique:,}")
print(f"Bijective: {n_unique == len(cell_ids)}")
Valid pixels: 4,004,112 / 4,004,112

Cell IDs assigned: 4,004,112
Unique cells:      4,004,112
Bijective: True

Build cell-point dataset

Unlike notebook 03, we preserve each pixel value as-is, indexed by its depth-29 cell ID. Original dtypes are kept (int8, int16, float32) — no conversion to float.

from earthcare_dggs.convert import detect_fill_value
from earthcare_dggs.settings import MSI_VARIABLES

data_vars = {}

for var_name, config in MSI_VARIABLES.items():
    source_ds = products.get(config["source"])
    if source_ds is None or var_name not in source_ds:
        continue

    raw = source_ds[var_name].values.ravel()

    # Keep only pixels with valid coordinates
    raw_valid = raw[valid.ravel()]

    # Mask fill values with NaN (promote to float32 only when fill values exist)
    fill = config.get("fill") or detect_fill_value(raw_valid)
    if fill is not None:
        out = raw_valid.astype(np.float32)
        out[raw_valid == fill] = np.nan
    elif raw_valid.dtype in (np.float32, np.float64):
        out = raw_valid.astype(np.float32)
        out[~np.isfinite(raw_valid) | (raw_valid <= -1e30)] = np.nan
    else:
        out = raw_valid.astype(np.float32)

    data_vars[var_name] = out
    n_valid = np.isfinite(out).sum()
    print(f"  {var_name}: {n_valid:,} valid / {len(out):,} pixels")

print(f"\nTotal variables: {len(data_vars)}")
  cloud_mask: 3,987,502 valid / 4,004,112 pixels
  cloud_type: 3,981,555 valid / 4,004,112 pixels
  cloud_phase: 954,386 valid / 4,004,112 pixels
  surface_classification: 4,004,112 valid / 4,004,112 pixels
  quality_status: 4,004,112 valid / 4,004,112 pixels
  cloud_top_height: 741,865 valid / 4,004,112 pixels
  cloud_top_temperature: 741,865 valid / 4,004,112 pixels
  cloud_top_pressure: 741,865 valid / 4,004,112 pixels
  cloud_optical_thickness: 748,392 valid / 4,004,112 pixels
  cloud_effective_radius: 748,392 valid / 4,004,112 pixels
  cloud_water_path: 748,392 valid / 4,004,112 pixels
  cloud_top_temperature_error: 741,865 valid / 4,004,112 pixels
  isccp_cloud_type: 4,004,112 valid / 4,004,112 pixels
  aerosol_optical_thickness_670nm: 684,230 valid / 4,004,112 pixels
  aerosol_optical_thickness_865nm: 341,654 valid / 4,004,112 pixels
  angstrom_parameter_670nm_865nm: 341,603 valid / 4,004,112 pixels
  aerosol_optical_thickness_670nm_error: 684,230 valid / 4,004,112 pixels
  aerosol_optical_thickness_865nm_error: 341,654 valid / 4,004,112 pixels

Total variables: 18
# Build xarray Dataset indexed by depth-29 cell IDs
msi_cellpoint = xr.Dataset(
    {name: ("cell_ids", data) for name, data in data_vars.items()},
    coords={"cell_ids": cell_ids},
)

msi_cellpoint.attrs.update({
    "source": "EarthCARE MSI L2A",
    "healpix_level": DEPTH,
    "healpix_indexing": "nested",
    "healpix_ellipsoid": ELLIPSOID,
    "orbit_and_frame": ORBIT,
    "resampler": "cell-point (no aggregation)",
})

print(msi_cellpoint)
<xarray.Dataset> Size: 320MB
Dimensions:                                (cell_ids: 4004112)
Coordinates:
  * cell_ids                               (cell_ids) uint64 32MB 26284203768...
Data variables: (12/18)
    cloud_mask                             (cell_ids) float32 16MB 3.0 ... nan
    cloud_type                             (cell_ids) float32 16MB 7.0 ... nan
    cloud_phase                            (cell_ids) float32 16MB 1.0 ... nan
    surface_classification                 (cell_ids) float32 16MB -3.276e+04...
    quality_status                         (cell_ids) float32 16MB 0.0 ... 3.0
    cloud_top_height                       (cell_ids) float32 16MB 5.634e+03 ...
    ...                                     ...
    isccp_cloud_type                       (cell_ids) float32 16MB 8.0 ... -1...
    aerosol_optical_thickness_670nm        (cell_ids) float32 16MB nan ... nan
    aerosol_optical_thickness_865nm        (cell_ids) float32 16MB nan ... nan
    angstrom_parameter_670nm_865nm         (cell_ids) float32 16MB nan ... nan
    aerosol_optical_thickness_670nm_error  (cell_ids) float32 16MB nan ... nan
    aerosol_optical_thickness_865nm_error  (cell_ids) float32 16MB nan ... nan
Attributes:
    source:             EarthCARE MSI L2A
    healpix_level:      29
    healpix_indexing:   nested
    healpix_ellipsoid:  WGS84
    orbit_and_frame:    06109D
    resampler:          cell-point (no aggregation)

Comparison: depth 29 vs depth 17

Show the size difference between the cell-point (depth 29) and aggregated (depth 17) representations.

n_pixels = len(cell_ids)

# Compare with depth 17 (aggregated)
cell_ids_d17 = lonlat_to_healpix_cells(
    lon[valid], lat[valid], depth=17, ellipsoid=ELLIPSOID,
)
n_unique_d17 = len(np.unique(cell_ids_d17))

print(f"{'Approach':<25} {'Depth':>6} {'Cells':>12} {'Cell size':>12}")
print("-" * 60)
print(f"{'Cell-point (this nb)':<25} {DEPTH:>6} {n_pixels:>12,} {'~12 mm':>12}")
print(f"{'Aggregated (nb 03)':<25} {17:>6} {n_unique_d17:>12,} {'~458 m':>12}")
print(f"\nCompression ratio (aggregation): {n_pixels / n_unique_d17:.1f}x fewer cells at depth 17")
Approach                   Depth        Cells    Cell size
------------------------------------------------------------
Cell-point (this nb)          29    4,004,112       ~12 mm
Aggregated (nb 03)            17    4,004,112       ~458 m

Compression ratio (aggregation): 1.0x fewer cells at depth 17

Multi-resolution access

A key advantage of depth 29: we can zoom out to any coarser level on the fly using healpix_geo.nested.zoom_to. Here we demonstrate coarsening from depth 29 to depth 17 (equivalent to notebook 03) and depth 10 (for visualization).

from earthcare_dggs.convert import aggregate_to_healpix

def coarsen_cellpoint(ds, from_depth, to_depth, variables_config):
    """Coarsen a cell-point dataset to a coarser HEALPix depth with aggregation."""
    coarse_ids = hpxg.nested.zoom_to(
        ds.cell_ids.values, depth=from_depth, new_depth=to_depth,
    )
    unique_coarse = np.unique(coarse_ids)
    coarse_to_idx = {int(c): i for i, c in enumerate(unique_coarse)}

    coarse_data = {}
    for var_name in ds.data_vars:
        vals = ds[var_name].values
        mask = np.isfinite(vals)
        if mask.sum() == 0:
            continue

        group_idx = np.array([coarse_to_idx.get(int(c), -1) for c in coarse_ids[mask]])
        keep = group_idx >= 0

        agg = variables_config.get(var_name, {}).get("agg", "mean")
        coarse_data[var_name] = aggregate_to_healpix(
            vals[mask][keep], group_idx[keep], len(unique_coarse), method=agg,
        )

    return xr.Dataset(
        {name: ("cell_ids", data) for name, data in coarse_data.items()},
        coords={"cell_ids": unique_coarse},
    )


# Coarsen to depth 17 (same as notebook 03)
coarse_d17 = coarsen_cellpoint(msi_cellpoint, DEPTH, 17, MSI_VARIABLES)
coarse_d17 = xdggs.decode(
    coarse_d17,
    grid_info=xdggs.HealpixInfo(level=17, indexing_scheme="nested"),
)
print(f"Coarsened to depth 17: {len(coarse_d17.cell_ids):,} cells")

# Coarsen to depth 10 (for visualization)
coarse_d10 = coarsen_cellpoint(msi_cellpoint, DEPTH, 10, MSI_VARIABLES)
coarse_d10 = xdggs.decode(
    coarse_d10,
    grid_info=xdggs.HealpixInfo(level=10, indexing_scheme="nested"),
)
print(f"Coarsened to depth 10: {len(coarse_d10.cell_ids):,} cells")
Coarsened to depth 17: 4,004,112 cells
Coarsened to depth 10: 21,659 cells

Static visualization

Compare the same variable at depth 29 (raw pixels), depth 17, and depth 10.

import cartopy.crs as ccrs
import cartopy.feature as cfeature

var_name = "cloud_mask"

fig, axes = plt.subplots(
    1, 3, figsize=(18, 6),
    subplot_kw={"projection": ccrs.PlateCarree()},
)

panels = [
    (axes[0], msi_cellpoint, DEPTH, f"Depth {DEPTH} (cell-point, {len(cell_ids):,} pixels)"),
    (axes[1], coarse_d17, 17, f"Depth 17 (aggregated, {len(coarse_d17.cell_ids):,} cells)"),
    (axes[2], coarse_d10, 10, f"Depth 10 (aggregated, {len(coarse_d10.cell_ids):,} cells)"),
]

for ax, ds, depth, title in panels:
    ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
    ax.add_feature(cfeature.BORDERS, linewidth=0.3)
    ax.add_feature(cfeature.LAND, alpha=0.1)
    gl = ax.gridlines(draw_labels=True, linewidth=0.3)
    gl.top_labels = False
    gl.right_labels = False

    data = ds[var_name].values
    mask = np.isfinite(data)

    c_lon, c_lat = hpxg.nested.healpix_to_lonlat(
        ds.cell_ids.values[mask], depth=depth, ellipsoid=ELLIPSOID,
    )

    sc = ax.scatter(
        c_lon, c_lat, c=data[mask],
        s=0.3, cmap="Set1", vmin=0, vmax=3,
        transform=ccrs.PlateCarree(),
    )
    ax.set_title(title, fontsize=10)

plt.colorbar(sc, ax=axes, shrink=0.6, label=var_name)
plt.suptitle(
    f"EarthCARE MSI — {var_name} at three depths ({ELLIPSOID}) — Orbit {ORBIT}",
    fontsize=13,
)
plt.tight_layout()
plt.show()
/var/folders/zf/53jxd5nj2j3dmfjqpx41p24c0000gn/T/ipykernel_57219/135473460.py:44: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
  plt.tight_layout()
<Figure size 1800x600 with 4 Axes>

Interactive visualization

coarse_d10.dggs.explore()

Save as DGGS-Zarr

Save the cell-point dataset to Zarr. At depth 29, the cell IDs serve as a lossless spatial index — any coarser resolution can be derived on the fly.

import zarr

output_path = "earthcare_msi_cellpoint_06109D.zarr"

# Write to Zarr directly (skip xdggs.encode — no DGGSIndex on depth-29 dataset)
msi_cellpoint.to_zarr(output_path, mode="w")

# Add DGGS Zarr convention metadata
dggs_convention = {
    "uuid": "7b255807-140c-42ca-97f6-7a1cfecdbc38",
    "name": "dggs",
    "schema_url": "https://raw.githubusercontent.com/zarr-conventions/dggs/refs/tags/v1/schema.json",
    "spec_url": "https://github.com/zarr-conventions/dggs/blob/v1/README.md",
    "description": "Discrete Global Grid Systems convention for zarr",
}

dggs_meta = {
    "name": "healpix",
    "refinement_level": DEPTH,
    "indexing_scheme": "nested",
    "ellipsoid": {
        "name": "wgs84",
        "semimajor_axis": 6378137.0,
        "inverse_flattening": 298.257223563,
    },
    "spatial_dimension": "cell_ids",
    "coordinate": "cell_ids",
    "compression": "none",
}

root = zarr.open_group(output_path, mode="r+")
root.attrs["zarr_conventions"] = [dggs_convention]
root.attrs["dggs"] = dggs_meta

print(f"Saved to {output_path}")
print(f"Cells: {len(msi_cellpoint.cell_ids):,}")
print(f"Variables: {len(msi_cellpoint.data_vars)}")
print(f"\nDGGS metadata: {dggs_meta}")
Saved to earthcare_msi_cellpoint_06109D.zarr
Cells: 4,004,112
Variables: 18

DGGS metadata: {'name': 'healpix', 'refinement_level': 29, 'indexing_scheme': 'nested', 'ellipsoid': {'name': 'wgs84', 'semimajor_axis': 6378137.0, 'inverse_flattening': 298.257223563}, 'spatial_dimension': 'cell_ids', 'coordinate': 'cell_ids', 'compression': 'none'}
/Users/annef/Documents/FAIR2Adapt/DGGS/earthcare-dggs/.pixi/envs/default/lib/python3.13/site-packages/zarr/api/asynchronous.py:247: ZarrUserWarning: Consolidated metadata is currently not part in the Zarr format 3 specification. It may not be supported by other zarr implementations and may change in the future.
  warnings.warn(
# Verify round-trip: re-open and decode with xdggs
ds_check = xr.open_zarr(output_path)
ds_check = xdggs.decode(
    ds_check,
    grid_info=xdggs.HealpixInfo(level=DEPTH, indexing_scheme="nested"),
)
print(ds_check)
print(f"\nIndex type: {type(ds_check.xindexes['cell_ids'])}")
print(f"Cell IDs match: {np.array_equal(ds_check.cell_ids.values, msi_cellpoint.cell_ids.values)}")
<xarray.Dataset> Size: 320MB
Dimensions:                                (cell_ids: 4004112)
Coordinates:
  * cell_ids                               (cell_ids) uint64 32MB 26284203768...
Data variables: (12/18)
    aerosol_optical_thickness_670nm        (cell_ids) float32 16MB ...
    aerosol_optical_thickness_670nm_error  (cell_ids) float32 16MB ...
    aerosol_optical_thickness_865nm        (cell_ids) float32 16MB ...
    aerosol_optical_thickness_865nm_error  (cell_ids) float32 16MB ...
    angstrom_parameter_670nm_865nm         (cell_ids) float32 16MB ...
    cloud_effective_radius                 (cell_ids) float32 16MB ...
    ...                                     ...
    cloud_top_temperature_error            (cell_ids) float32 16MB ...
    cloud_type                             (cell_ids) float32 16MB ...
    cloud_water_path                       (cell_ids) float32 16MB ...
    isccp_cloud_type                       (cell_ids) float32 16MB ...
    quality_status                         (cell_ids) float32 16MB ...
    surface_classification                 (cell_ids) float32 16MB ...
Indexes:
    cell_ids  HealpixIndex(level=29, indexing_scheme=nested, kind=pandas)
Attributes:
    source:             EarthCARE MSI L2A
    healpix_level:      29
    healpix_indexing:   nested
    healpix_ellipsoid:  WGS84
    orbit_and_frame:    06109D
    resampler:          cell-point (no aggregation)
    zarr_conventions:   [{'uuid': '7b255807-140c-42ca-97f6-7a1cfecdbc38', 'na...
    dggs:               {'name': 'healpix', 'refinement_level': 29, 'indexing...

Index type: <class 'xdggs.healpix.HealpixIndex'>
Cell IDs match: True