diff --git a/CHANGELOG.md b/CHANGELOG.md index 98ce22e..edab6fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,23 @@ uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +## [0.1.1] - 2026-06-06 + +### Fixed + +- **`parse_probabilistic` probability scale (100x bug).** The day-1/2/3 + probabilistic-outlook parser divided every isopleth value by 100 + unconditionally, assuming `LABEL`/`dn` is an integer percent (`"2"`, + `"5"`). The live `www.spc.noaa.gov` and ArcGIS GeoJSON feeds actually + ship the value as an already-normalized fraction (`"0.02"`, `"0.30"`), + so a 2% tornado risk parsed to `0.0002` instead of `0.02`. Now + normalized like the day4-8 path (`pct > 1.0 ? pct / 100.0 : pct`): + integer percents are divided, fractions pass through. Both forms are + pinned by regression tests (`Parser.ProbabilisticFractionalLabel`, + `Corpus.ProbabilisticParsesAndScales`). Consumers that persist + probabilities (e.g. `spc-data`'s `spc.prob_outlooks`) get correct + `[0,1]` values after upgrading. + ## [0.1.0] - 2026-05-17 Initial public release. Open-source C++23 SDK for NOAA Storm Prediction diff --git a/CMakeLists.txt b/CMakeLists.txt index aa93431..8c7de44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.20) -project(spc-cpp VERSION 0.1.0 LANGUAGES CXX) +project(spc-cpp VERSION 0.1.1 LANGUAGES CXX) set(CMAKE_CXX_STANDARD 23) set(CMAKE_CXX_STANDARD_REQUIRED ON) diff --git a/src/models/outlook.cpp b/src/models/outlook.cpp index c90daac..d8bfd3b 100644 --- a/src/models/outlook.cpp +++ b/src/models/outlook.cpp @@ -118,8 +118,14 @@ ProbOutlookPayload parse_probabilistic(std::string_view body, std::int32_t day_o } ProbOutlookFeature pf; pf.hazard = hazard; - // SPC prob isopleths store percentage as LABEL ("2", "5", "10", ...) or - // as DN, occasionally as a numeric. + // SPC prob isopleths express the risk percentage two incompatible ways + // depending on the source: as an integer-percent (`LABEL`/`dn` = "2", + // "5", "30") or as an already-normalized fraction (`LABEL` = "0.02", + // "0.05", "0.30" — the form the live www.spc.noaa.gov + ArcGIS GeoJSON + // actually ship). Normalize like the day4-8 path (convective.cpp): only + // values > 1 are percents to divide by 100; fractions pass through. A + // bare `/ 100.0` here silently produced 100x-too-small probabilities + // (0.02 -> 0.0002) for every real feed. double pct = detail::json_number_or_numeric_string(*props, "LABEL"); if (pct == 0.0) { pct = detail::json_number_or_numeric_string(*props, "label"); @@ -127,7 +133,7 @@ ProbOutlookPayload parse_probabilistic(std::string_view body, std::int32_t day_o if (pct == 0.0) { pct = detail::json_number_or_numeric_string(*props, "dn"); } - pf.probability = pct / 100.0; + pf.probability = pct > 1.0 ? pct / 100.0 : pct; pf.issued_at = detail::as_spc_ts(*props, "ISSUE"); if (pf.issued_at.empty()) { pf.issued_at = detail::as_spc_ts(*props, "issue"); diff --git a/tests/test_corpus.cpp b/tests/test_corpus.cpp index 0d5a44d..6ccac4b 100644 --- a/tests/test_corpus.cpp +++ b/tests/test_corpus.cpp @@ -62,16 +62,18 @@ TEST(Corpus, ArcGISGeoJsonCategoricalParses) { } TEST(Corpus, ProbabilisticParsesAndScales) { - // ArcGIS f=geojson probabilistic tornado: label is "0.02"/"0.05"/"0.10". - // parse_probabilistic divides by 100 (verbatim spc-data semantics), so a - // "0.02" label -> 0.0002 probability. + // ArcGIS f=geojson probabilistic tornado: label is the already-normalized + // fraction "0.02"/"0.05"/"0.10". parse_probabilistic passes fractions + // through (only integer-percent forms are /100'd), so a "0.02" label -> + // 0.02 probability — NOT the 0.0002 a bare /100 used to produce. The + // >= 0.01 floor pins that fix (every real SPC isopleth is >= 2%). const ProbOutlookPayload p = parse_probabilistic(slurp("arcgis_day1_prob_tornado.geojson"), 1, "tornado"); ASSERT_GT(p.features.size(), 0u); for (const ProbOutlookFeature& f : p.features) { EXPECT_EQ(f.hazard, "tornado"); - EXPECT_GT(f.probability, 0.0); - EXPECT_LT(f.probability, 1.0); + EXPECT_GE(f.probability, 0.01); + EXPECT_LE(f.probability, 1.0); EXPECT_FALSE(f.rings.empty()); } } diff --git a/tests/test_parser.cpp b/tests/test_parser.cpp index c8b81fc..b5d3900 100644 --- a/tests/test_parser.cpp +++ b/tests/test_parser.cpp @@ -92,6 +92,26 @@ TEST(Parser, ProbabilisticTornado) { EXPECT_DOUBLE_EQ(p.features[1].probability, 0.05); } +// Regression: the live www.spc.noaa.gov + ArcGIS GeoJSON ship the probability +// as an already-normalized fraction ("0.02", "0.30") — NOT an integer percent. +// A bare `/ 100.0` turned a 2% risk into 0.0002. Both label forms must map to +// the same [0,1] probability. +TEST(Parser, ProbabilisticFractionalLabel) { + const ProbOutlookPayload p = parse_probabilistic(R"({ + "type": "FeatureCollection", + "features": [ + {"properties": {"LABEL": "0.02", "LABEL2": "2% Tornado Risk"}, + "geometry": {"type":"Polygon","coordinates":[[[-100,34],[-95,34],[-95,38],[-100,38],[-100,34]]]}}, + {"properties": {"LABEL": "0.30", "LABEL2": "30% Tornado Risk"}, + "geometry": {"type":"Polygon","coordinates":[[[-99,35],[-96,35],[-96,37],[-99,37],[-99,35]]]}} + ] + })", + 1, "tornado"); + ASSERT_EQ(p.features.size(), 2u); + EXPECT_DOUBLE_EQ(p.features[0].probability, 0.02); + EXPECT_DOUBLE_EQ(p.features[1].probability, 0.30); +} + TEST(Parser, UnknownLabelSkipped) { const CategoricalOutlookPayload p = parse_categorical(R"({ "type": "FeatureCollection",