Skip to content
65 changes: 65 additions & 0 deletions configs/data/heat_guatemala_tessera_text.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Alignment data config for the Guatemala LST use case (Tessera + expert-legend captions).
#
# Requirements before running:
# 1. The model-ready CSV the dataset loads from ${paths.data_dir} must contain the
# aux_* columns produced by scripts/build_aux_from_original.py (numeric + *_label).
# 2. Tessera tiles at ${paths.data_dir}/heat_guatemala/eo/tessera/tessera_<name_loc>.npy
# (10x10, 128-band) — see the tessera preprocessing step.
_target_: src.data.base_datamodule.BaseDataModule

dataset:
_target_: src.data.heat_guatemala_dataset.HeatGuatemalaDataset
data_dir: ${paths.data_dir}
modalities:
tessera:
year: 2024
size: 10
format: npy
use_target_data: false # alignment is contrastive geo<->text; LST enters only as a concept
use_features: false # Tessera-only EO branch (your results: fusion hurts), avoids circularity
use_aux_data:
aux: # numeric raw values -> concept theta_k / retrieval ground truth
columns:
- aux_ndvi_mean
- aux_ndwi_mean
- aux_forest_cover_perc
- aux_tree_cover_perc
- aux_builtup_age_years
- aux_slope_perc
- aux_socioeconomic
- aux_lst
top: # expert-legend label strings -> caption text
columns:
- aux_ndvi_label
- aux_ndwi_label
- aux_forest_label
- aux_age_label
- aux_slope_label
- aux_socio_label
- aux_height_label
- aux_density_label
- aux_landuse
- aux_blocktype
- aux_interzone
seed: ${seed}
cache_dir: ${paths.cache_dir}

caption_builder:
_target_: src.data.heat_guatemala_caption_builder.HeatGuatemalaCaptionBuilder
templates_fname: v1.json
concepts_fname: v1.json
data_dir: ${paths.data_dir}/heat_guatemala
seed: ${seed}

batch_size: 64
num_workers: 8
pin_memory: true

#split_mode: "spatial_clusters" # honest val/test: hold out whole areas (city blocks autocorrelate)
#spatial_split_distance_m: 500
#split_mode: "random"
split_mode: "from_file"
saved_split_file_name: "split_indices_heat_guatemala_2026-02-20-1148.pth"
train_val_test_split: [0.7, 0.15, 0.15]
save_split: false
seed: ${seed}
25 changes: 25 additions & 0 deletions configs/experiment/heat_alignment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# @package _global_
# Run: python src/train.py experiment=heat_alignment
# Smoke test (weak, expected): python src/train.py experiment=heat_alignment \
# model=geoclip_alignment data.dataset.modalities='{coords: {}}'
defaults:
- override /model: heat_tessera_alignment
- override /data: heat_guatemala_tessera_text
- override /metrics: contrastive_similarities

tags: ["alignment", "heat_island", "guatemala", "tessera"]
seed: 12345

trainer:
min_epochs: 10
max_epochs: 100

data:
batch_size: 64

logger:
wandb:
tags: ${tags}
group: "heat_alignment"
aim:
experiment: "heat_alignment"
34 changes: 34 additions & 0 deletions configs/experiment/heat_guatemala_full_fusion_avg_best.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# @package _global_
# Best config: R2=0.672, RMSE=1.104, MAE=0.896
defaults:
- override /model: heat_full_fusion_avg_best
- override /data: heat_guatemala
- override /metrics: guatemala_regression
tags: ["heat_island", "guatemala", "full_fusion_avg", "best", "regression"]
seed: 12345
trainer:
min_epochs: 1
max_epochs: 100
data:
batch_size: 64
dataset:
modalities:
coords: {}
tessera:
year: 2024
size: 10
format: npy
callbacks:
model_checkpoint:
monitor: val_r2
mode: max
early_stopping:
monitor: val_r2
mode: max
patience: 20
logger:
wandb:
tags: ${tags}
group: "heat_island"
aim:
experiment: "heat_island"
34 changes: 34 additions & 0 deletions configs/experiment/heat_guatemala_full_fusion_cnn_best.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# @package _global_
# Best config: R2=0.647, RMSE=1.144, MAE=0.931
defaults:
- override /model: heat_full_fusion_cnn_best
- override /data: heat_guatemala
- override /metrics: guatemala_regression
tags: ["heat_island", "guatemala", "full_fusion_cnn", "best", "regression"]
seed: 12345
trainer:
min_epochs: 1
max_epochs: 100
data:
batch_size: 64
dataset:
modalities:
coords: {}
tessera:
year: 2024
size: 10
format: npy
callbacks:
model_checkpoint:
monitor: val_r2
mode: max
early_stopping:
monitor: val_r2
mode: max
patience: 20
logger:
wandb:
tags: ${tags}
group: "heat_island"
aim:
experiment: "heat_island"
27 changes: 27 additions & 0 deletions configs/experiment/heat_guatemala_fusion_best.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# @package _global_
# Best config: R2=0.555, RMSE=1.285, MAE=1.039
defaults:
- override /model: heat_fusion_best
- override /data: heat_guatemala
- override /metrics: guatemala_regression
tags: ["heat_island", "guatemala", "fusion", "best", "regression"]
seed: 12345
trainer:
min_epochs: 1
max_epochs: 100
data:
batch_size: 64
callbacks:
model_checkpoint:
monitor: val_r2
mode: max
early_stopping:
monitor: val_r2
mode: max
patience: 20
logger:
wandb:
tags: ${tags}
group: "heat_island"
aim:
experiment: "heat_island"
27 changes: 27 additions & 0 deletions configs/experiment/heat_guatemala_geoclip_best.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# @package _global_
# Best config: R2=0.323, RMSE=1.607, MAE=1.344
defaults:
- override /model: heat_geoclip_best
- override /data: heat_guatemala
- override /metrics: guatemala_regression
tags: ["heat_island", "guatemala", "coords", "best", "regression"]
seed: 12345
trainer:
min_epochs: 1
max_epochs: 100
data:
batch_size: 64
callbacks:
model_checkpoint:
monitor: val_r2
mode: max
early_stopping:
monitor: val_r2
mode: max
patience: 20
logger:
wandb:
tags: ${tags}
group: "heat_island"
aim:
experiment: "heat_island"
27 changes: 27 additions & 0 deletions configs/experiment/heat_guatemala_tabular_best.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# @package _global_
# Best config: R2=0.562, RMSE=1.282, MAE=1.040
defaults:
- override /model: heat_tabular_best
- override /data: heat_guatemala
- override /metrics: guatemala_regression
tags: ["heat_island", "guatemala", "tabular", "best", "regression"]
seed: 12345
trainer:
min_epochs: 1
max_epochs: 100
data:
batch_size: 64
callbacks:
model_checkpoint:
monitor: val_r2
mode: max
early_stopping:
monitor: val_r2
mode: max
patience: 20
logger:
wandb:
tags: ${tags}
group: "heat_island"
aim:
experiment: "heat_island"
34 changes: 34 additions & 0 deletions configs/experiment/heat_guatemala_tessera_avg_best.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# @package _global_
# Best config: R2=0.733, RMSE=1.011, MAE=0.814
defaults:
- override /model: heat_tessera_avg_best
- override /data: heat_guatemala
- override /metrics: guatemala_regression
tags: ["heat_island", "guatemala", "tessera_avg", "best", "regression"]
seed: 12345
trainer:
min_epochs: 1
max_epochs: 100
data:
batch_size: 64
dataset:
modalities:
coords: {}
tessera:
year: 2024
size: 10
format: npy
callbacks:
model_checkpoint:
monitor: val_r2
mode: max
early_stopping:
monitor: val_r2
mode: max
patience: 20
logger:
wandb:
tags: ${tags}
group: "heat_island"
aim:
experiment: "heat_island"
34 changes: 34 additions & 0 deletions configs/experiment/heat_guatemala_tessera_cnn_best.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# @package _global_
# Best config: R2=0.694, RMSE=1.088, MAE=0.877
defaults:
- override /model: heat_tessera_cnn_best
- override /data: heat_guatemala
- override /metrics: guatemala_regression
tags: ["heat_island", "guatemala", "tessera_cnn", "best", "regression"]
seed: 12345
trainer:
min_epochs: 1
max_epochs: 100
data:
batch_size: 64
dataset:
modalities:
coords: {}
tessera:
year: 2024
size: 10
format: npy
callbacks:
model_checkpoint:
monitor: val_r2
mode: max
early_stopping:
monitor: val_r2
mode: max
patience: 20
logger:
wandb:
tags: ${tags}
group: "heat_island"
aim:
experiment: "heat_island"
34 changes: 34 additions & 0 deletions configs/model/heat_full_fusion_avg_best.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
_target_: src.models.predictive_model.PredictiveModel
geo_encoder:
_target_: src.models.components.geo_encoders.encoder_wrapper.EncoderWrapper
encoder_branches:
- encoder:
_target_: src.models.components.geo_encoders.geoclip.GeoClipCoordinateEncoder
- encoder:
_target_: src.models.components.geo_encoders.tabular_encoder.TabularEncoder
output_dim: 64
geo_data_name: tabular
- encoder:
_target_: src.models.components.geo_encoders.average_encoder.AverageEncoder
geo_data_name: tessera
fusion_strategy: concat
prediction_head:
_target_: src.models.components.pred_heads.mlp_regression_head.MLPRegressionPredictionHead
nn_layers: 3
hidden_dim: 512
trainable_modules: [geo_encoder.encoder_branches.1, prediction_head]
normalize_features: false
metrics: ${metrics}
optimizer:
_target_: torch.optim.Adam
_partial_: true
lr: 0.0001
weight_decay: 0.0
scheduler:
_target_: torch.optim.lr_scheduler.ReduceLROnPlateau
_partial_: true
mode: min
factor: 0.1
patience: 10
loss_fn:
_target_: torch.nn.MSELoss
38 changes: 38 additions & 0 deletions configs/model/heat_full_fusion_cnn_best.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
_target_: src.models.predictive_model.PredictiveModel
geo_encoder:
_target_: src.models.components.geo_encoders.encoder_wrapper.EncoderWrapper
encoder_branches:
- encoder:
_target_: src.models.components.geo_encoders.geoclip.GeoClipCoordinateEncoder
- encoder:
_target_: src.models.components.geo_encoders.tabular_encoder.TabularEncoder
output_dim: 64
geo_data_name: tabular
- encoder:
_target_: src.models.components.geo_encoders.cnn_encoder.CNNEncoder
geo_data_name: tessera
resnet_version: 18
pretrained_cnn: imagenet
freezing_strategy: all
output_dim: 256
fusion_strategy: concat
prediction_head:
_target_: src.models.components.pred_heads.mlp_regression_head.MLPRegressionPredictionHead
nn_layers: 3
hidden_dim: 256
trainable_modules: [geo_encoder.encoder_branches.1, geo_encoder.encoder_branches.2, prediction_head]
normalize_features: true
metrics: ${metrics}
optimizer:
_target_: torch.optim.Adam
_partial_: true
lr: 0.001
weight_decay: 0.0
scheduler:
_target_: torch.optim.lr_scheduler.ReduceLROnPlateau
_partial_: true
mode: min
factor: 0.1
patience: 10
loss_fn:
_target_: torch.nn.MSELoss
Loading