Combines:
results/posterior_bambi_healpix.nc— joint posterior of the GLMM fixed effects from the Tier-1 HEALPix fit (Phase C; 2 chains × 2000 draws = 4 000 samples).healpix_port/outputs_iberia/climate_tei_pei_future_<horizon>_healpix.npz— DestinE-derived future TEI / PEI on the 110-cell Iberia HEALPix nside=64 NESTED grid.healpix_port/outputs_iberia/sampling_continent_healpix.npz— recent-period sampling effort per cell (held fixed for the projection).healpix_port/outputs_iberia/dataGLMM_extinction.parquet— Tier-1 HEALPix dataGLMM, used to recover the z-score standardisation constants applied to the future predictors.
For each (species × active cell × posterior draw) we compute the
linear predictor η = X · β + species_intercept and apply the
logistic link to get an extirpation probability. We aggregate per
species and write results/projection_headline.json with the
ranking + 95 % HDI per horizon.
Critical step — predictor scaling
The Tier-1 HEALPix GLMM was fit on z-scored predictors. The
standardisation constants (mean + ddof=1 SD on TEI_bs,
TEI_delta, PEI_bs, PEI_delta, sampling) are recovered from
the unscaled raw columns of the parquet. Apply the same mean / SD
to the future predictors before plugging into the design matrix;
using the future-period mean / SD instead would silently invalidate
the projection.
import json
from pathlib import Path
import arviz as az
import numpy as np
import pandas as pd
import xarray as xr
# Make the healpix_port package importable when run with cwd=notebooks/
import sys as _sys
_sys.path.insert(0, str(Path("..").resolve()))
from healpix_port._dggs_metadata import PROJECT_DGGS_ATTRS # noqa: E402
# scipy.special.expit intentionally NOT imported — we report the GLMM
# linear predictor η directly, not its logistic transform. See the
# "We REPORT η directly" comment in the per-species inner loop, and
# 05_outcome.md § Limitations.ROOT = Path("..").resolve()
HPORT = ROOT / "healpix_port"
OUT_DIR = HPORT / "outputs_iberia"
RESULTS_DIR = ROOT / "results"
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
HORIZONS = ["2020_2029", "2030_2039"]
N_DRAWS = 1000
RNG = np.random.default_rng(42)Recover Tier-1 standardisation constants from dataGLMM_extinction.parquet¶
Each sc_<col> column is (<col> - mean) / sd with sd computed as
the unbiased sample SD (ddof=1, matching R’s scale() and
healpix_port/05_regression_healpix.py). Recompute the mean + sd
from the raw columns; cross-check against the saved sc_ columns
to confirm.
parquet_path = OUT_DIR / "dataGLMM_extinction.parquet"
df = pd.read_parquet(parquet_path)
print(f"Loaded {parquet_path.name}: {df.shape[0]:,} rows")
SCALED_COLS = ["sampling", "TEI_bs", "TEI_delta", "PEI_bs", "PEI_delta"]
scaling = {}
for col in SCALED_COLS:
mean = float(df[col].mean())
sd = float(df[col].std(ddof=1))
rederived = (df[col] - mean) / sd
diff = float(np.max(np.abs(rederived - df[f"sc_{col}"])))
scaling[col] = {"mean": mean, "sd": sd, "max_check_diff": diff}
print(
f" {col}: mean={mean:.6f}, sd={sd:.6f}, "
f"max |sc_check - sc_orig| = {diff:.2e}"
)
assert all(v["max_check_diff"] < 1e-6 for v in scaling.values()), (
"Standardisation constants do not match the saved sc_ columns. "
"Tier-1 z-score recovery is invalid."
)
def _z(arr: np.ndarray, col: str) -> np.ndarray:
"""Apply the Tier-1 z-score for column `col` to a future-predictor array."""
return (arr - scaling[col]["mean"]) / scaling[col]["sd"]Load the Tier-1 HEALPix bambi posterior¶
idata = az.from_netcdf(RESULTS_DIR / "posterior_bambi_healpix.nc")
posterior = idata.posterior
PARAM_NAMES = [
"Intercept",
"sc_sampling",
"sc_TEI_bs",
"sc_TEI_delta",
"sc_TEI_bs:sc_TEI_delta",
"sc_PEI_bs",
"sc_PEI_delta",
"sc_PEI_bs:sc_PEI_delta",
"sc_TEI_bs:sc_PEI_bs",
"sc_TEI_delta:sc_PEI_delta",
]
missing = [p for p in PARAM_NAMES if p not in posterior.data_vars]
if missing:
raise KeyError(f"Posterior is missing fixed-effect terms: {missing}")
flat = posterior.stack(sample=("chain", "draw"))
n_samples_total = flat.sizes["sample"]
print(f"Total posterior samples (chain × draw): {n_samples_total}")
beta_full = np.column_stack(
[flat[name].values for name in PARAM_NAMES]
) # (n_samples_total, 10)
print(f"Beta matrix shape: {beta_full.shape} (samples × params)")
# Per-species random intercept (1|species).
re_da = flat["1|species"]
factor_dim = [d for d in re_da.dims if d != "sample"][0]
species_re_full = re_da.transpose("sample", factor_dim).values # (samples, 31)
species_factor_levels = [str(x) for x in posterior.coords[factor_dim].values]
print(
f"Random-intercept species levels ({len(species_factor_levels)}): "
f"{species_factor_levels[:3]} ..."
)
# Subsample to N_DRAWS draws (seeded for reproducibility).
draw_idx = RNG.choice(n_samples_total, size=N_DRAWS, replace=False)
beta = beta_full[draw_idx] # (N_DRAWS, 10)
species_re_draws = species_re_full[draw_idx] # (N_DRAWS, n_spp_re)
print(f"Subsampled to {N_DRAWS} draws.")Sampling effort term (held at recent-period mean per cell)¶
sc = xr.open_dataset(OUT_DIR / "sampling_continent_healpix.nc")
sampling_recent = sc["sampling_recent"].values # (110,)
sampling_total = sc["sampling_total"].values # (110,)
# Match Tier-1 contract: GLMM was fit with `sampling` = samp_total
# (see healpix_port/05_regression_healpix.py:99). For the projection
# we use the same column to keep the sampling z-score self-consistent.
sampling = sampling_total
active_mask = ~np.isnan(sampling)
n_active = int(active_mask.sum())
print(f"Active cells (sampled at least once): {n_active}/{len(sampling)}")
species_in_parquet = sorted(df["species"].unique().tolist())
n_spp_data = len(species_in_parquet)
print(f"Species in dataGLMM: {n_spp_data}")
spp_to_re_col = {
sp: species_factor_levels.index(sp)
for sp in species_in_parquet
if sp in species_factor_levels
}Design-matrix builder¶
Column order matches PARAM_NAMES exactly. eta = X · β.T then
p_extirp = expit(eta + species_intercept).
def build_design_row(
sc_sampling: np.ndarray,
sc_TEI_bs: np.ndarray,
sc_TEI_delta: np.ndarray,
sc_PEI_bs: np.ndarray,
sc_PEI_delta: np.ndarray,
) -> np.ndarray:
n = len(sc_sampling)
return np.column_stack([
np.ones(n),
sc_sampling,
sc_TEI_bs,
sc_TEI_delta,
sc_TEI_bs * sc_TEI_delta,
sc_PEI_bs,
sc_PEI_delta,
sc_PEI_bs * sc_PEI_delta,
sc_TEI_bs * sc_PEI_bs,
sc_TEI_delta * sc_PEI_delta,
])
sc_sampling_active = _z(sampling[active_mask], "sampling")
# (n_active,) — same for all horizons / species.
# Need the historical observation mask per species to restrict
# per-species summary to cells the species was observed in (matches
# how the GLMM was fit — only species-cell pairs with non-NaN
# extinction/colonisation enter the data).
pa = xr.open_dataset(OUT_DIR / "presence_absence_healpix.nc")
prab_baseline = pa["prab_baseline"].values # (31, 110)
prab_recent = pa["prab_recent"].values
prab_species_list = [str(s) for s in pa["species"].values]Project per horizon¶
projection_summary = {"horizons": {}, "method": {}}
for horizon in HORIZONS:
src = OUT_DIR / f"climate_tei_pei_future_{horizon}_healpix.nc"
if not src.exists():
print(f"[skip] {src.name} missing — run 06_destine_clean.py first.")
continue
fut = xr.open_dataset(src)
fut_species = [str(s) for s in fut["species"].values]
# Reorder future arrays to match dataGLMM species order.
if fut_species != species_in_parquet:
order = [fut_species.index(sp) for sp in species_in_parquet
if sp in fut_species]
else:
order = list(range(len(fut_species)))
TEI_bs_fut = fut["tei_bs"].values[order] # (n_spp, 110)
PEI_bs_fut = fut["pei_bs"].values[order]
TEI_delta_fut = fut["tei_delta"].values[order]
PEI_delta_fut = fut["pei_delta"].values[order]
# Map presence-absence rows into the same dataGLMM order.
pa_order = [prab_species_list.index(sp) for sp in species_in_parquet
if sp in prab_species_list]
prab_baseline_ord = prab_baseline[pa_order] # (n_spp, 110)
prab_recent_ord = prab_recent[pa_order]
# Per-cell community-mean η accumulator (across species historically
# observed in the cell). Linear-predictor units (log-odds) — kept
# un-transformed to avoid expit() saturation under future predictors.
eta_per_cell_sum = np.zeros(len(sampling), dtype=np.float64)
n_species_per_cell = np.zeros(len(sampling), dtype=np.int64)
species_records = []
for i, sp in enumerate(species_in_parquet):
# Species' historical range = baseline-occupied OR recent-
# observed cells (matches the GLMM data filter, where each
# row is a species-cell pair where the species was observed
# in at least one of the two periods).
observed_any = (prab_baseline_ord[i] == 1) | (prab_recent_ord[i] == 1)
# Future TEI/PEI for this species at active cells, restricted
# to historically observed cells.
cell_mask = active_mask & observed_any
if cell_mask.sum() == 0:
print(f" {sp}: no overlap of active × observed cells; skipped")
continue
tei_bs_v = TEI_bs_fut[i, cell_mask]
tei_dl_v = TEI_delta_fut[i, cell_mask]
pei_bs_v = PEI_bs_fut[i, cell_mask]
pei_dl_v = PEI_delta_fut[i, cell_mask]
sampling_v = sampling[cell_mask]
valid = (
np.isfinite(tei_bs_v)
& np.isfinite(tei_dl_v)
& np.isfinite(pei_bs_v)
& np.isfinite(pei_dl_v)
& np.isfinite(sampling_v)
)
if valid.sum() == 0:
print(f" {sp}: no valid cells; skipped")
continue
# Map cell_mask → index within the full (110,) cell vector for
# the per-cell raster accumulator.
cell_full_idx = np.flatnonzero(cell_mask)[valid]
sc_TEI_bs_v = _z(tei_bs_v[valid], "TEI_bs")
sc_TEI_delta_v = _z(tei_dl_v[valid], "TEI_delta")
sc_PEI_bs_v = _z(pei_bs_v[valid], "PEI_bs")
sc_PEI_delta_v = _z(pei_dl_v[valid], "PEI_delta")
sc_sampling_v = _z(sampling_v[valid], "sampling")
X = build_design_row(
sc_sampling_v, sc_TEI_bs_v, sc_TEI_delta_v,
sc_PEI_bs_v, sc_PEI_delta_v,
) # (n_valid, 10)
eta = X @ beta.T # (n_valid, N_DRAWS)
if sp in spp_to_re_col:
re_per_draw = species_re_draws[:, spp_to_re_col[sp]] # (N_DRAWS,)
eta = eta + re_per_draw[np.newaxis, :]
# We REPORT η directly, not p = expit(η). Reason: future TEI/PEI
# z-scores under SSP3-7.0 land 5–10× outside the Tier-1 training
# distribution (a multi-decade warming signal vs the 60-year
# training-period delta). expit() saturates near 1.0 in that
# extrapolation regime, making absolute probabilities
# uninterpretable. η preserves relative ranking and the linear
# predictor magnitude is the GLMM's authentic signal — it is
# also what the bambi NUTS posterior is actually sampled over.
# See nanopubs/drafts/05_outcome.md § Limitations.
# Per-species summary across cells × draws:
# - per-cell posterior-mean η (averaged over draws)
# - per-draw species-level mean η (averaged over cells)
eta_post_mean_per_cell = eta.mean(axis=1) # (n_valid,)
eta_post_per_draw = eta.mean(axis=0) # (N_DRAWS,)
post_mean_eta = float(eta_post_per_draw.mean())
# az.hdi on a 1-d array returns shape (2,) = [low, high]
hdi = az.hdi(eta_post_per_draw, hdi_prob=0.95)
hdi_low, hdi_high = float(hdi[0]), float(hdi[1])
# A non-saturating proxy for "fraction of cells projected to
# exceed the moderate-risk threshold of η = 0 (= log-odds 0,
# i.e. p > 0.5)". Robust to extrapolation at the per-cell level.
n_cells_eta_gt_0 = int((eta_post_mean_per_cell > 0).sum())
species_records.append({
"species": sp,
"post_mean_eta": post_mean_eta,
"eta_hdi95_low": hdi_low,
"eta_hdi95_high": hdi_high,
"n_cells": int(valid.sum()),
"n_cells_eta_gt_0": n_cells_eta_gt_0,
})
eta_per_cell_sum[cell_full_idx] += eta_post_mean_per_cell
n_species_per_cell[cell_full_idx] += 1
species_records.sort(
key=lambda r: r["post_mean_eta"], reverse=True,
)
# Per-cell community-mean η (gitignored CF NetCDF). Linear predictor
# in log-odds units; not transformed via expit because future
# predictors are far outside training distribution and expit()
# saturates uninformatively.
with np.errstate(invalid="ignore"):
eta_per_cell_mean = np.where(
n_species_per_cell > 0,
eta_per_cell_sum / np.maximum(n_species_per_cell, 1),
np.nan,
)
# Use Tier-1 cell-centre lon/lat from presence_absence_healpix.nc.
lon_cell = pa["lon"].values.astype(np.float32)
lat_cell = pa["lat"].values.astype(np.float32)
cell_idx = pa["cell_ids"].values.astype(np.int64)
decade_label = horizon.replace("_", "-")
raster_ds = xr.Dataset(
data_vars={
"community_mean_eta": (
('cells',),
eta_per_cell_mean.astype(np.float32),
{
"long_name": (
"per-cell community-mean GLMM linear predictor (η, "
"log-odds of extirpation) under SSP3-7.0"
),
"units": "1",
"grid_mapping": "crs_wgs84",
"comment": (
f"Mean η across {len(species_in_parquet)} species, "
f"each species' contribution averaged over {N_DRAWS} "
"posterior draws. η > 0 → projected p > 0.5; "
"η > +5 → near logit saturation (p ≈ 0.99). "
"Reported as η rather than expit(η) because future "
"predictors lie 5–10× outside training distribution "
"where expit() saturates uninformatively."
),
"_FillValue": np.float32(np.nan),
},
),
# CF grid_mapping container — declares the coordinate
# reference system. WGS84 (EPSG:4326) for the lon/lat
# cell-centre coords; analytical/visualisation projection
# is ETRS89 / LAEA Europe (EPSG:3035) via ccrs.epsg(3035).
"crs_wgs84": (
(),
np.int8(0),
{
"grid_mapping_name": "latitude_longitude",
"long_name": "CRS for cell-centre lon/lat coordinates",
"longitude_of_prime_meridian": 0.0,
"semi_major_axis": 6378137.0,
"inverse_flattening": 298.257223563,
"epsg_code": "EPSG:4326",
"crs_wkt": (
'GEOGCRS["WGS 84",DATUM["World Geodetic System 1984",'
'ELLIPSOID["WGS 84",6378137,298.257223563]],'
'CS[ellipsoidal,2],AXIS["latitude",north],'
'AXIS["longitude",east],ID["EPSG",4326]]'
),
"comment": (
"Analytical / visualisation projection for figures "
"and downstream EU-biodiversity reporting is "
"ETRS89 / LAEA Europe (EPSG:3035) — the canonical "
"EEA / Natura 2000 / EUNIS / INSPIRE equal-area grid."
),
},
),
"lon": (
('cells',),
lon_cell,
{
"long_name": "HEALPix cell-centre longitude",
"standard_name": "longitude",
"units": "degrees_east",
},
),
"lat": (
('cells',),
lat_cell,
{
"long_name": "HEALPix cell-centre latitude",
"standard_name": "latitude",
"units": "degrees_north",
},
),
"n_species_in_cell": (
('cells',),
n_species_per_cell.astype(np.int32),
{
"long_name": (
"number of species contributing to the community-mean "
"extirpation probability for this cell"
),
"units": "species",
"_FillValue": np.int32(-1),
},
),
},
coords={"cell_ids": ("cells", cell_idx)},
attrs={
"Conventions": "CF-1.10",
"title": (
f"Per-cell community-mean Bombus extirpation probability "
f"({decade_label}) on Iberian HEALPix nside=64 NESTED"
),
"horizon": decade_label,
"tier1_posterior": "results/posterior_bambi_healpix.nc",
"n_posterior_draws": int(N_DRAWS),
"rng_seed": 42,
"license_note": (
"DestinE Climate DT redistribution restricted; per-cell "
"raster is gitignored under data licence"
),
**PROJECT_DGGS_ATTRS, # DGGS Zarr Convention v1 — see healpix_port/_dggs_metadata.py
"history": (
f"Created by notebooks/07_projection.py for horizon {horizon}"
),
},
)
raster_ds["cell_ids"].attrs.update({
"long_name": "HEALPix NESTED pixel index (nside=64)",
})
raster_ds.to_netcdf(
RESULTS_DIR / f"projection_{horizon}.nc",
engine="netcdf4",
encoding={
"community_mean_eta": {"zlib": True, "complevel": 4},
"lon": {"zlib": True, "complevel": 4},
"lat": {"zlib": True, "complevel": 4},
"n_species_in_cell": {"zlib": True, "complevel": 4},
},
)
print(
f" Saved per-cell raster -> "
f"results/projection_{horizon}.nc (gitignored)"
)
projection_summary["horizons"][horizon] = {
"n_draws": N_DRAWS,
"species_ranked": species_records,
}
print(f"\n--- Top 5 most-vulnerable species ({horizon}) — ranked by mean η ---")
for rec in species_records[:5]:
print(
f" B. {rec['species']:<14} "
f"post-mean η = {rec['post_mean_eta']:+.3f} "
f"95% HDI [{rec['eta_hdi95_low']:+.3f}, {rec['eta_hdi95_high']:+.3f}] "
f"n_cells = {rec['n_cells']} "
f"(n_cells with η>0: {rec['n_cells_eta_gt_0']})"
)Option B (proper) — nside=128 projection (DestinE-resolution maps)¶
Same GLMM coefficients (calibrated at nside=64), evaluated on the
nside=128 climate predictors. The Tier-1 nside=128 historical
baseline (CRU TS sampled at nside=128 cell centres) and its
standardisation constants are loaded from
healpix_port/outputs_iberia/climate_tei_pei_healpix_nside128.nc
— produced by healpix_port/04_climate_tei_pei_healpix.py’s
Option-B extension. This replaces the earlier parent-inheritance
z-scoring which inflated η for cold-adapted species at extreme
child cells (the within-parent climate variance was much larger
than σ_Tier1_64 expected).
Methodological note: the GLMM coefficients β were trained against
nside=64 z-scores. Applying β to nside=128 z-scores assumes β is
scale-invariant when the substrate scales match (~92 km vs ~46 km).
This is the Option-B trade-off: cleaner standardisation at nside=128
vs perfect coefficient interpretability with nside=64 z-scoring.
Headline ranking remains nside=64 (horizons); nside=128 ranking is
added under horizons_nside128 for transparency.
# Load nside=128 standardisation constants (computed in
# healpix_port/04_climate_tei_pei_healpix.py over (species × cell)
# entries where the species was historically observed).
_hist128_path = OUT_DIR / "climate_tei_pei_healpix_nside128.nc"
if _hist128_path.exists():
_hist128 = xr.open_dataset(_hist128_path)
scaling_128 = {
"TEI_bs": {"mean": float(_hist128.attrs["std_TEI_bs_mu"]),
"sd": float(_hist128.attrs["std_TEI_bs_sd"])},
"TEI_delta": {"mean": float(_hist128.attrs["std_TEI_delta_mu"]),
"sd": float(_hist128.attrs["std_TEI_delta_sd"])},
"PEI_bs": {"mean": float(_hist128.attrs["std_PEI_bs_mu"]),
"sd": float(_hist128.attrs["std_PEI_bs_sd"])},
"PEI_delta": {"mean": float(_hist128.attrs["std_PEI_delta_mu"]),
"sd": float(_hist128.attrs["std_PEI_delta_sd"])},
# sampling stays at Tier-1 nside=64 reference (sampling effort is
# cell-level and inherited; no new distribution at nside=128)
"sampling": scaling["sampling"],
}
print("nside=128 standardisation constants (vs nside=64 in parens):")
for col in ("TEI_bs", "TEI_delta", "PEI_bs", "PEI_delta"):
m128 = scaling_128[col]
m64 = scaling[col]
print(f" {col:<10} μ={m128['mean']:+.4f} (vs {m64['mean']:+.4f}) "
f"σ={m128['sd']:.4f} (vs {m64['sd']:.4f})")
else:
print("[warn] nside=128 baseline missing; will fall back to nside=64 z-scoring")
scaling_128 = scaling
def _z128(arr: np.ndarray, col: str) -> np.ndarray:
return (arr - scaling_128[col]["mean"]) / scaling_128[col]["sd"]
projection_summary["horizons_nside128"] = {}
for horizon in HORIZONS:
src128 = OUT_DIR / f"climate_tei_pei_future_{horizon}_healpix_nside128.nc"
if not src128.exists():
print(f"[skip] {src128.name} missing — run 06_destine_clean.py first.")
continue
fut128 = xr.open_dataset(src128)
parent_row = fut128["parent_row_in_nside64"].values.astype(np.int64)
n128 = fut128.sizes["cells"]
fut128_species = [str(s) for s in fut128["species"].values]
print(f"\n=== Option B nside=128 projection: {horizon} ({n128} cells) ===")
# Reorder future arrays to match dataGLMM species order.
spp_idx = [fut128_species.index(sp) for sp in species_in_parquet]
tei_bs128 = fut128["tei_bs"].values[spp_idx, :]
tei_dl128 = fut128["tei_delta"].values[spp_idx, :]
pei_bs128 = fut128["pei_bs"].values[spp_idx, :]
pei_dl128 = fut128["pei_delta"].values[spp_idx, :]
# Sampling effort per nside=128 cell: inherit from parent.
sampling_128 = sampling[parent_row]
active_128 = ~np.isnan(sampling_128)
n_active_128 = int(active_128.sum())
print(f" Active nside=128 cells (sampled at parent): {n_active_128}/{n128}")
# Historical observation mask per species, inherited.
prab_bs_128 = prab_baseline[:, parent_row] # (31, N_128)
prab_rc_128 = prab_recent[:, parent_row]
prab_bs_128_idx = np.array(
[prab_species_list.index(sp) for sp in species_in_parquet]
)
prab_bs_128 = prab_bs_128[prab_bs_128_idx, :]
prab_rc_128 = prab_rc_128[prab_bs_128_idx, :]
eta_per_cell_sum_128 = np.zeros(n128, dtype=np.float64)
n_species_per_cell_128 = np.zeros(n128, dtype=np.int64)
species_records_128 = []
for i, sp in enumerate(species_in_parquet):
observed = ((prab_bs_128[i] > 0) | (prab_rc_128[i] > 0))
cell_mask_128 = observed & active_128
if cell_mask_128.sum() == 0:
continue
tei_bs_v = tei_bs128[i, cell_mask_128]
tei_dl_v = tei_dl128[i, cell_mask_128]
pei_bs_v = pei_bs128[i, cell_mask_128]
pei_dl_v = pei_dl128[i, cell_mask_128]
sampling_v = sampling_128[cell_mask_128]
valid = (np.isfinite(tei_bs_v) & np.isfinite(tei_dl_v)
& np.isfinite(pei_bs_v) & np.isfinite(pei_dl_v)
& np.isfinite(sampling_v))
if valid.sum() == 0:
continue
cell_full_idx = np.flatnonzero(cell_mask_128)[valid]
sc_TEI_bs_v = _z128(tei_bs_v[valid], "TEI_bs")
sc_TEI_delta_v = _z128(tei_dl_v[valid], "TEI_delta")
sc_PEI_bs_v = _z128(pei_bs_v[valid], "PEI_bs")
sc_PEI_delta_v = _z128(pei_dl_v[valid], "PEI_delta")
sc_sampling_v = _z128(sampling_v[valid], "sampling")
X = build_design_row(
sc_sampling_v, sc_TEI_bs_v, sc_TEI_delta_v,
sc_PEI_bs_v, sc_PEI_delta_v,
)
eta = X @ beta.T
if sp in spp_to_re_col:
re_per_draw = species_re_draws[:, spp_to_re_col[sp]]
eta = eta + re_per_draw[np.newaxis, :]
eta_post_mean_per_cell = eta.mean(axis=1)
eta_post_per_draw = eta.mean(axis=0)
post_mean_eta = float(eta_post_per_draw.mean())
hdi = az.hdi(eta_post_per_draw, hdi_prob=0.95)
species_records_128.append({
"species": sp,
"post_mean_eta": post_mean_eta,
"eta_hdi95_low": float(hdi[0]),
"eta_hdi95_high": float(hdi[1]),
"n_cells": int(valid.sum()),
"n_cells_eta_gt_0": int((eta_post_mean_per_cell > 0).sum()),
})
eta_per_cell_sum_128[cell_full_idx] += eta_post_mean_per_cell
n_species_per_cell_128[cell_full_idx] += 1
species_records_128.sort(key=lambda r: -r["post_mean_eta"])
with np.errstate(invalid="ignore"):
eta_per_cell_mean_128 = np.where(
n_species_per_cell_128 > 0,
eta_per_cell_sum_128 / np.maximum(n_species_per_cell_128, 1),
np.nan,
)
# Cell-centre lon/lat at nside=128 — derive from healpix-geo (WGS84).
from healpix_port._dggs_metadata import HEALPIX_NSIDE # noqa
from healpix_geo import nested as hpg_nested
cell_idx_128 = fut128["cell_ids"].values.astype(np.int64)
lon128, lat128 = hpg_nested.healpix_to_lonlat(
cell_idx_128.astype(np.uint64), 7, "WGS84"
)
lon128 = np.where(lon128 > 180.0, lon128 - 360.0, lon128).astype(np.float32)
lat128 = lat128.astype(np.float32)
raster_128_ds = xr.Dataset(
data_vars={
"community_mean_eta": (
("cells",),
eta_per_cell_mean_128.astype(np.float32),
{
"long_name": (
"per-cell community-mean GLMM linear predictor (η, "
"log-odds of extirpation) under SSP3-7.0 — Option B "
"nside=128 (DestinE resolution; GLMM at nside=64)"
),
"units": "1",
"_FillValue": np.float32(np.nan),
},
),
"lon": (("cells",), lon128,
{"standard_name": "longitude", "units": "degrees_east"}),
"lat": (("cells",), lat128,
{"standard_name": "latitude", "units": "degrees_north"}),
"n_species_in_cell": (
("cells",),
n_species_per_cell_128.astype(np.int32),
{"units": "species", "_FillValue": np.int32(-1)},
),
},
coords={"cell_ids": ("cells", cell_idx_128)},
attrs={
"Conventions": "CF-1.10",
"title": (
f"Per-cell community-mean Bombus extirpation η ({horizon.replace('_', '-')}) "
"on Iberian HEALPix nside=128 NESTED — Option B"
),
"horizon": horizon.replace("_", "-"),
"method": "Option B (GLMM at nside=64, climate predictors at nside=128)",
"tier1_posterior": "results/posterior_bambi_healpix.nc",
"n_posterior_draws": int(N_DRAWS),
**{**PROJECT_DGGS_ATTRS, "dggs_grid_refinement_level": 7},
},
)
raster_128_ds.to_netcdf(
RESULTS_DIR / f"projection_{horizon}_nside128.nc",
engine="netcdf4",
encoding={
"community_mean_eta": {"zlib": True, "complevel": 4},
"lon": {"zlib": True, "complevel": 4},
"lat": {"zlib": True, "complevel": 4},
"n_species_in_cell": {"zlib": True, "complevel": 4},
},
)
print(f" Saved results/projection_{horizon}_nside128.nc (gitignored)")
projection_summary["horizons_nside128"][horizon] = {
"n_draws": N_DRAWS,
"species_ranked": species_records_128,
}
print(f" Top 3 (nside=128): "
+ ", ".join(f"B. {r['species']} (η={r['post_mean_eta']:+.2f})"
for r in species_records_128[:3]))Method block + write JSON¶
projection_summary["method"] = {
"n_posterior_draws": N_DRAWS,
"substrate": "HEALPix-NESTED nside=64 (~92 km cells)",
"scaling_source": (
"Tier-1 healpix_port/outputs_iberia/dataGLMM_extinction.parquet "
"z-score constants (mean + ddof=1 SD per raw column; cross-checked "
"against the saved sc_ columns to <= 1e-6)"
),
"scaling_constants": scaling,
"sampling_effort_assumption": (
"held at recent-period (2000-2014) mean per cell"
),
"data_source": (
"DestinE Climate DT SSP3-7.0 IFS-NEMO standard "
"(licence-restricted; access via DestinE Data Lake)"
),
"tier1_posterior": "results/posterior_bambi_healpix.nc",
"design_columns": PARAM_NAMES,
"posterior_total_samples": int(n_samples_total),
"rng_seed": 42,
"headline_metric": "post_mean_eta",
"headline_metric_comment": (
"Per-species posterior-mean of (mean η across cells), 95% HDI in "
"log-odds units. We report η — the GLMM linear predictor — rather "
"than p = expit(η) because future TEI/PEI z-scores under SSP3-7.0 "
"lie 5–10× outside the Tier-1 training distribution (the future "
"warming signal is ~2× the 1901–2014 training delta), where "
"expit() saturates near 1.0 uninformatively. Ranking by η "
"preserves the GLMM's authentic signal; relative-risk-style "
"comparisons (e.g. n_cells_eta_gt_0) are more substrate-stable "
"than absolute probabilities. See 05_outcome.md § Limitations."
),
}
out_json = RESULTS_DIR / "projection_headline.json"
with open(out_json, "w") as f:
json.dump(projection_summary, f, indent=2)
print(f"\nWrote {out_json}")