From 4127f98cf4d116c05c93c0c68464dbeb5079b5c4 Mon Sep 17 00:00:00 2001 From: garo Date: Wed, 1 Oct 2025 13:04:28 -0700 Subject: [PATCH 1/2] fix: resolve URL mapping and non-deterministic ordering issues in US soil identification - Replace index-based URL storage with component key mapping to fix URL mismatches - Add deterministic sorting to groupby operations for consistent component ordering - Improve component name duplication handling with sorted processing - Fix Series URL generation logic to properly match components with their URLs Resolves issues where soil components received incorrect SDE/SEE URLs due to sorting misalignment between URL lists and component data ordering. --- ...il_location[33.81246789,-101.9733687].json | 124 ++++++------- ...t_soil_location[35.59918,-120.491439].json | 20 +-- .../test_soil_location[37.422,-122.084].json | 72 ++++---- ...il_location[37.48216451,-99.55016693].json | 154 ++++++++-------- ...il_location[39.26009312,-85.50621214].json | 168 +++++++++--------- ..._soil_location[42.494912,-123.064531].json | 98 +++++----- ...il_location[42.63413723,-94.31005777].json | 66 +++---- ...il_location[43.06450312,-119.4596489].json | 90 +++++----- ...il_location[45.88932423,-121.0347381].json | 8 +- ...soil_location[47.213922,-69.28246582].json | 44 ++--- soil_id/us_soil.py | 78 ++++---- 11 files changed, 458 insertions(+), 464 deletions(-) diff --git a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[33.81246789,-101.9733687].json b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[33.81246789,-101.9733687].json index e86f224..66bc6ab 100644 --- a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[33.81246789,-101.9733687].json +++ b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[33.81246789,-101.9733687].json @@ -662,11 +662,11 @@ "componentData": "Missing Data", "componentID": 25623332, "name": "Randall", - "rank_data": "8", + "rank_data": "9", "rank_data_loc": "1", "rank_loc": "1", - "score_data": 0.427, - "score_data_loc": 0.451, + "score_data": 0.415, + "score_data_loc": 0.445, "score_loc": 0.474 }, { @@ -674,96 +674,96 @@ "componentData": "Data Complete", "componentID": 25623297, "name": "Acuff2", - "rank_data": "1", + "rank_data": "2", "rank_data_loc": "2", "rank_loc": "Not Displayed", - "score_data": 0.593, - "score_data_loc": 0.423, + "score_data": 0.617, + "score_data_loc": 0.435, "score_loc": 0.253 }, + { + "component": "Amarillo", + "componentData": "Data Complete", + "componentID": 25623212, + "name": "Amarillo", + "rank_data": "1", + "rank_data_loc": "3", + "rank_loc": "8", + "score_data": 0.645, + "score_data_loc": 0.328, + "score_loc": 0.011 + }, { "component": "Olton", "componentData": "Data Complete", "componentID": 25623299, "name": "Olton", "rank_data": "7", - "rank_data_loc": "3", + "rank_data_loc": "4", "rank_loc": "3", - "score_data": 0.46, - "score_data_loc": 0.293, + "score_data": 0.487, + "score_data_loc": 0.306, "score_loc": 0.126 }, + { + "component": "Mclean", + "componentData": "Missing Data", + "componentID": 25623333, + "name": "Mclean", + "rank_data": "5", + "rank_data_loc": "5", + "rank_loc": "4", + "score_data": 0.5, + "score_data_loc": 0.28, + "score_loc": 0.059 + }, { "component": "Friona", "componentData": "Data Complete", "componentID": 25623214, "name": "Friona", - "rank_data": "2", - "rank_data_loc": "4", + "rank_data": "3", + "rank_data_loc": "6", "rank_loc": "7", - "score_data": 0.57, - "score_data_loc": 0.292, + "score_data": 0.543, + "score_data_loc": 0.279, "score_loc": 0.014 }, - { - "component": "Amarillo", - "componentData": "Data Complete", - "componentID": 25623212, - "name": "Amarillo", - "rank_data": "3", - "rank_data_loc": "5", - "rank_loc": "8", - "score_data": 0.553, - "score_data_loc": 0.282, - "score_loc": 0.011 - }, { "component": "Lockney", "componentData": "Missing Data", "componentID": 25623334, "name": "Lockney", - "rank_data": "5", - "rank_data_loc": "6", - "rank_loc": "5", - "score_data": 0.533, - "score_data_loc": 0.281, - "score_loc": 0.03 - }, - { - "component": "Pullman", - "componentData": "Data Complete", - "componentID": 25623298, - "name": "Pullman", "rank_data": "4", "rank_data_loc": "7", - "rank_loc": "9", - "score_data": 0.539, - "score_data_loc": 0.275, - "score_loc": 0.011 + "rank_loc": "5", + "score_data": 0.504, + "score_data_loc": 0.267, + "score_loc": 0.03 }, { "component": "Estacado", "componentData": "Data Complete", - "componentID": 25623213, - "name": "Estacado", + "componentID": 25623296, + "name": "Estacado2", "rank_data": "6", "rank_data_loc": "8", - "rank_loc": "6", - "score_data": 0.492, - "score_data_loc": 0.257, + "rank_loc": "Not Displayed", + "score_data": 0.499, + "score_data_loc": 0.26, "score_loc": 0.021 }, { - "component": "Mclean", - "componentData": "Missing Data", - "componentID": 25623333, - "name": "Mclean", - "rank_data": "9", + "component": "Pullman", + "componentData": "Data Complete", + "componentID": 25623298, + "name": "Pullman", + "rank_data": "8", "rank_data_loc": "9", - "rank_loc": "4", - "score_data": 0.411, - "score_data_loc": 0.235, - "score_loc": 0.059 + "rank_loc": "9", + "score_data": 0.483, + "score_data_loc": 0.247, + "score_loc": 0.011 }, { "component": "Acuff", @@ -773,20 +773,20 @@ "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", "rank_loc": "2", - "score_data": 0.576, - "score_data_loc": 0.414, + "score_data": 0.49, + "score_data_loc": 0.372, "score_loc": 0.253 }, { "component": "Estacado", "componentData": "Data Complete", - "componentID": 25623296, - "name": "Estacado2", + "componentID": 25623213, + "name": "Estacado", "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", - "rank_loc": "Not Displayed", - "score_data": 0.441, - "score_data_loc": 0.231, + "rank_loc": "6", + "score_data": 0.414, + "score_data_loc": 0.218, "score_loc": 0.021 } ] diff --git a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[35.59918,-120.491439].json b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[35.59918,-120.491439].json index 7e31ea0..1cd4bb3 100644 --- a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[35.59918,-120.491439].json +++ b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[35.59918,-120.491439].json @@ -134,8 +134,8 @@ "nirrcapscl": "e", "nirrcapunit": "nan", "rfvInfill": "No", - "sdeURL": "https://casoilresource.lawr.ucdavis.edu/sde/?series=balcom", - "seeURL": "https://casoilresource.lawr.ucdavis.edu/see/#balcom", + "sdeURL": "https://casoilresource.lawr.ucdavis.edu/sde/?series=los_osos", + "seeURL": "https://casoilresource.lawr.ucdavis.edu/see/#los_osos", "slope": 40.0, "taxsubgrp": "Typic Argixerolls", "textureInfill": "Yes" @@ -273,8 +273,8 @@ "rank_data": "1", "rank_data_loc": "1", "rank_loc": "Not Displayed", - "score_data": 0.552, - "score_data_loc": 0.402, + "score_data": 0.578, + "score_data_loc": 0.415, "score_loc": 0.252 }, { @@ -285,8 +285,8 @@ "rank_data": "2", "rank_data_loc": "2", "rank_loc": "Not Displayed", - "score_data": 0.535, - "score_data_loc": 0.33, + "score_data": 0.397, + "score_data_loc": 0.261, "score_loc": 0.125 }, { @@ -297,8 +297,8 @@ "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", "rank_loc": "1", - "score_data": 0.345, - "score_data_loc": 0.299, + "score_data": 0.383, + "score_data_loc": 0.317, "score_loc": 0.252 }, { @@ -309,8 +309,8 @@ "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", "rank_loc": "2", - "score_data": 0.37, - "score_data_loc": 0.248, + "score_data": 0.383, + "score_data_loc": 0.254, "score_loc": 0.125 } ] diff --git a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[37.422,-122.084].json b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[37.422,-122.084].json index c1822a3..63eeae5 100644 --- a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[37.422,-122.084].json +++ b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[37.422,-122.084].json @@ -435,7 +435,7 @@ "soilRank": [ { "component": "Xerorthents", - "componentData": "Data Complete", + "componentData": "Missing Data", "componentID": 26038503, "name": "Xerorthents", "rank_data": "1", @@ -450,71 +450,71 @@ "componentData": "Data Complete", "componentID": 26038467, "name": "Hangerone", - "rank_data": "6", + "rank_data": "5", "rank_data_loc": "2", "rank_loc": "1", "score_data": 0.554, "score_data_loc": 0.472, "score_loc": 0.39 }, - { - "component": "Aquic xerorthents", - "componentData": "Missing Data", - "componentID": 26038456, - "name": "Aquic xerorthents", - "rank_data": "3", - "rank_data_loc": "3", - "rank_loc": "3", - "score_data": 0.626, - "score_data_loc": 0.395, - "score_loc": 0.165 - }, { "component": "Embarcadero", - "componentData": "Missing Data", + "componentData": "Data Complete", "componentID": 26038464, "name": "Embarcadero", "rank_data": "2", - "rank_data_loc": "4", + "rank_data_loc": "3", "rank_loc": "6", - "score_data": 0.712, - "score_data_loc": 0.362, + "score_data": 0.732, + "score_data_loc": 0.372, "score_loc": 0.012 }, { - "component": "Clear lake", + "component": "Bayshore", + "componentData": "Data Complete", + "componentID": 26038468, + "name": "Bayshore", + "rank_data": "3", + "rank_data_loc": "4", + "rank_loc": "5", + "score_data": 0.709, + "score_data_loc": 0.364, + "score_loc": 0.019 + }, + { + "component": "Aquic xerorthents", "componentData": "Missing Data", - "componentID": 26038466, - "name": "Clear lake", - "rank_data": "4", + "componentID": 26038456, + "name": "Aquic xerorthents", + "rank_data": "6", "rank_data_loc": "5", - "rank_loc": "4", - "score_data": 0.617, - "score_data_loc": 0.323, - "score_loc": 0.029 + "rank_loc": "3", + "score_data": 0.514, + "score_data_loc": 0.339, + "score_loc": 0.165 }, { - "component": "Bayshore", + "component": "Clear lake", "componentData": "Data Complete", - "componentID": 26038468, - "name": "Bayshore", - "rank_data": "5", + "componentID": 26038466, + "name": "Clear lake", + "rank_data": "4", "rank_data_loc": "6", - "rank_loc": "5", - "score_data": 0.607, + "rank_loc": "4", + "score_data": 0.597, "score_data_loc": 0.313, - "score_loc": 0.019 + "score_loc": 0.029 }, { "component": "Xerorthents", - "componentData": "Data Complete", + "componentData": "Missing Data", "componentID": 26038454, "name": "Xerorthents2", "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", "rank_loc": "Not Displayed", - "score_data": 0.652, - "score_data_loc": 0.519, + "score_data": 0.662, + "score_data_loc": 0.524, "score_loc": 0.386 } ] diff --git a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[37.48216451,-99.55016693].json b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[37.48216451,-99.55016693].json index 3267a5d..7e485f3 100644 --- a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[37.48216451,-99.55016693].json +++ b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[37.48216451,-99.55016693].json @@ -881,16 +881,28 @@ "model": "v2" }, "soilRank": [ + { + "component": "Uly", + "componentData": "Data Complete", + "componentID": 25865522, + "name": "Uly2", + "rank_data": "6", + "rank_data_loc": "1", + "rank_loc": "Not Displayed", + "score_data": 0.612, + "score_data_loc": 0.453, + "score_loc": 0.295 + }, { "component": "Harney", "componentData": "Data Complete", "componentID": 25865346, "name": "Harney", "rank_data": "4", - "rank_data_loc": "1", + "rank_data_loc": "2", "rank_loc": "2", - "score_data": 0.649, - "score_data_loc": 0.456, + "score_data": 0.636, + "score_data_loc": 0.45, "score_loc": 0.263 }, { @@ -898,24 +910,24 @@ "componentData": "Data Complete", "componentID": 25865523, "name": "Coly2", - "rank_data": "1", - "rank_data_loc": "2", + "rank_data": "3", + "rank_data_loc": "3", "rank_loc": "Not Displayed", - "score_data": 0.677, - "score_data_loc": 0.446, + "score_data": 0.662, + "score_data_loc": 0.439, "score_loc": 0.215 }, { - "component": "Uly", + "component": "Canlon", "componentData": "Data Complete", - "componentID": 25865522, - "name": "Uly2", - "rank_data": "8", - "rank_data_loc": "3", - "rank_loc": "Not Displayed", - "score_data": 0.571, - "score_data_loc": 0.433, - "score_loc": 0.295 + "componentID": 25865520, + "name": "Canlon", + "rank_data": "1", + "rank_data_loc": "4", + "rank_loc": "11", + "score_data": 0.71, + "score_data_loc": 0.356, + "score_loc": 0.002 }, { "component": "Penden", @@ -923,23 +935,23 @@ "componentID": 25865344, "name": "Penden", "rank_data": "2", - "rank_data_loc": "4", + "rank_data_loc": "5", "rank_loc": "7", "score_data": 0.667, "score_data_loc": 0.341, "score_loc": 0.016 }, { - "component": "Canlon", + "component": "Wakeen", "componentData": "Data Complete", - "componentID": 25865520, - "name": "Canlon", - "rank_data": "3", - "rank_data_loc": "5", - "rank_loc": "11", - "score_data": 0.659, - "score_data_loc": 0.331, - "score_loc": 0.002 + "componentID": 25865551, + "name": "Wakeen", + "rank_data": "5", + "rank_data_loc": "6", + "rank_loc": "9", + "score_data": 0.625, + "score_data_loc": 0.318, + "score_loc": 0.01 }, { "component": "Case", @@ -947,46 +959,22 @@ "componentID": 25865495, "name": "Case", "rank_data": "7", - "rank_data_loc": "6", + "rank_data_loc": "7", "rank_loc": "6", - "score_data": 0.58, - "score_data_loc": 0.302, + "score_data": 0.593, + "score_data_loc": 0.308, "score_loc": 0.023 }, - { - "component": "Aquolls", - "componentData": "Data Complete", - "componentID": 25865552, - "name": "Aquolls", - "rank_data": "5", - "rank_data_loc": "7", - "rank_loc": "10", - "score_data": 0.594, - "score_data_loc": 0.298, - "score_loc": 0.003 - }, - { - "component": "Wakeen", - "componentData": "Missing Data", - "componentID": 25865551, - "name": "Wakeen", - "rank_data": "6", - "rank_data_loc": "8", - "rank_loc": "9", - "score_data": 0.585, - "score_data_loc": 0.298, - "score_loc": 0.01 - }, { "component": "Tobin", "componentData": "Data Complete", "componentID": 25865519, "name": "Tobin", "rank_data": "10", - "rank_data_loc": "9", + "rank_data_loc": "8", "rank_loc": "5", "score_data": 0.538, - "score_data_loc": 0.298, + "score_data_loc": 0.297, "score_loc": 0.057 }, { @@ -994,13 +982,25 @@ "componentData": "Data Complete", "componentID": 25865494, "name": "Bridgeport", - "rank_data": "9", - "rank_data_loc": "10", + "rank_data": "8", + "rank_data_loc": "9", "rank_loc": "8", - "score_data": 0.551, - "score_data_loc": 0.282, + "score_data": 0.563, + "score_data_loc": 0.288, "score_loc": 0.012 }, + { + "component": "Aquolls", + "componentData": "Missing Data", + "componentID": 25865552, + "name": "Aquolls", + "rank_data": "9", + "rank_data_loc": "10", + "rank_loc": "10", + "score_data": 0.563, + "score_data_loc": 0.283, + "score_loc": 0.003 + }, { "component": "Holdrege", "componentData": "Data Complete", @@ -1009,22 +1009,10 @@ "rank_data": "11", "rank_data_loc": "11", "rank_loc": "4", - "score_data": 0.44, - "score_data_loc": 0.272, + "score_data": 0.4, + "score_data_loc": 0.252, "score_loc": 0.104 }, - { - "component": "Harney", - "componentData": "Data Complete", - "componentID": 25865521, - "name": "Harney3", - "rank_data": "Not Displayed", - "rank_data_loc": "Not Displayed", - "rank_loc": "Not Displayed", - "score_data": 0.585, - "score_data_loc": 0.424, - "score_loc": 0.263 - }, { "component": "Uly", "componentData": "Data Complete", @@ -1033,8 +1021,8 @@ "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", "rank_loc": "1", - "score_data": 0.535, - "score_data_loc": 0.415, + "score_data": 0.547, + "score_data_loc": 0.421, "score_loc": 0.295 }, { @@ -1049,6 +1037,18 @@ "score_data_loc": 0.407, "score_loc": 0.263 }, + { + "component": "Harney", + "componentData": "Data Complete", + "componentID": 25865521, + "name": "Harney3", + "rank_data": "Not Displayed", + "rank_data_loc": "Not Displayed", + "rank_loc": "Not Displayed", + "score_data": 0.534, + "score_data_loc": 0.399, + "score_loc": 0.263 + }, { "component": "Coly", "componentData": "Data Complete", @@ -1057,8 +1057,8 @@ "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", "rank_loc": "3", - "score_data": 0.597, - "score_data_loc": 0.406, + "score_data": 0.578, + "score_data_loc": 0.396, "score_loc": 0.215 } ] diff --git a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[39.26009312,-85.50621214].json b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[39.26009312,-85.50621214].json index 23f7f2a..dc94449 100644 --- a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[39.26009312,-85.50621214].json +++ b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[39.26009312,-85.50621214].json @@ -998,107 +998,95 @@ "componentData": "Data Complete", "componentID": 25325308, "name": "Fincastle", - "rank_data": "1", + "rank_data": "2", "rank_data_loc": "1", "rank_loc": "2", "score_data": 0.671, "score_data_loc": 0.363, "score_loc": 0.054 }, - { - "component": "Crosby", - "componentData": "Data Complete", - "componentID": 25325341, - "name": "Crosby", - "rank_data": "3", - "rank_data_loc": "2", - "rank_loc": "4", - "score_data": 0.662, - "score_data_loc": 0.355, - "score_loc": 0.048 - }, { "component": "Bonnell", "componentData": "Data Complete", "componentID": 25325330, "name": "Bonnell", - "rank_data": "2", - "rank_data_loc": "3", + "rank_data": "1", + "rank_data_loc": "2", "rank_loc": "9", - "score_data": 0.671, - "score_data_loc": 0.336, + "score_data": 0.689, + "score_data_loc": 0.345, "score_loc": 0.002 }, { - "component": "Holton", + "component": "Hennepin", "componentData": "Data Complete", - "componentID": 25325333, - "name": "Holton", - "rank_data": "4", - "rank_data_loc": "4", - "rank_loc": "8", - "score_data": 0.643, - "score_data_loc": 0.323, - "score_loc": 0.002 + "componentID": 25325355, + "name": "Hennepin3", + "rank_data": "6", + "rank_data_loc": "3", + "rank_loc": "Not Displayed", + "score_data": 0.624, + "score_data_loc": 0.343, + "score_loc": 0.061 }, { - "component": "Grayford", + "component": "Crosby", "componentData": "Data Complete", - "componentID": 25325329, - "name": "Grayford", - "rank_data": "5", - "rank_data_loc": "5", - "rank_loc": "11", + "componentID": 25325353, + "name": "Crosby3", + "rank_data": "4", + "rank_data_loc": "4", + "rank_loc": "Not Displayed", "score_data": 0.637, - "score_data_loc": 0.319, - "score_loc": 0.001 + "score_data_loc": 0.342, + "score_loc": 0.048 }, { "component": "Brookston", "componentData": "Data Complete", "componentID": 25325342, "name": "Brookston", - "rank_data": "6", - "rank_data_loc": "6", + "rank_data": "3", + "rank_data_loc": "5", "rank_loc": "6", - "score_data": 0.61, - "score_data_loc": 0.31, + "score_data": 0.652, + "score_data_loc": 0.33, "score_loc": 0.009 }, + { + "component": "Holton", + "componentData": "Data Complete", + "componentID": 25325333, + "name": "Holton", + "rank_data": "5", + "rank_data_loc": "6", + "rank_loc": "8", + "score_data": 0.637, + "score_data_loc": 0.32, + "score_loc": 0.002 + }, { "component": "Cyclone", "componentData": "Data Complete", "componentID": 25325348, "name": "Cyclone", - "rank_data": "7", + "rank_data": "8", "rank_data_loc": "7", "rank_loc": "5", - "score_data": 0.596, - "score_data_loc": 0.303, + "score_data": 0.618, + "score_data_loc": 0.314, "score_loc": 0.011 }, - { - "component": "Hennepin", - "componentData": "Data Complete", - "componentID": 25325355, - "name": "Hennepin3", - "rank_data": "10", - "rank_data_loc": "8", - "rank_loc": "Not Displayed", - "score_data": 0.54, - "score_data_loc": 0.301, - "score_loc": 0.061 - }, { "component": "Cincinnati", "componentData": "Data Complete", "componentID": 25325332, "name": "Cincinnati", - "rank_data": "8", - "rank_data_loc": "9", + "rank_data": "7", + "rank_data_loc": "8", "rank_loc": "10", - "score_data": 0.591, - "score_data_loc": 0.296, + "score_data": 0.62, + "score_data_loc": 0.311, "score_loc": 0.001 }, { @@ -1107,10 +1095,10 @@ "componentID": 25325406, "name": "Celina", "rank_data": "9", - "rank_data_loc": "10", + "rank_data_loc": "9", "rank_loc": "7", - "score_data": 0.555, - "score_data_loc": 0.279, + "score_data": 0.611, + "score_data_loc": 0.307, "score_loc": 0.003 }, { @@ -1119,12 +1107,24 @@ "componentID": 25325331, "name": "Hickory", "rank_data": "12", - "rank_data_loc": "11", + "rank_data_loc": "10", "rank_loc": "3", "score_data": 0.499, "score_data_loc": 0.275, "score_loc": 0.051 }, + { + "component": "Grayford", + "componentData": "Data Complete", + "componentID": 25325329, + "name": "Grayford", + "rank_data": "10", + "rank_data_loc": "11", + "rank_loc": "11", + "score_data": 0.524, + "score_data_loc": 0.263, + "score_loc": 0.001 + }, { "component": "Jessietown", "componentData": "Data Complete", @@ -1133,22 +1133,34 @@ "rank_data": "11", "rank_data_loc": "12", "rank_loc": "12", - "score_data": 0.522, - "score_data_loc": 0.262, + "score_data": 0.508, + "score_data_loc": 0.255, "score_loc": 0.001 }, { "component": "Crosby", "componentData": "Data Complete", - "componentID": 25325353, - "name": "Crosby3", + "componentID": 25325341, + "name": "Crosby", "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", - "rank_loc": "Not Displayed", - "score_data": 0.654, - "score_data_loc": 0.351, + "rank_loc": "4", + "score_data": 0.606, + "score_data_loc": 0.327, "score_loc": 0.048 }, + { + "component": "Hennepin", + "componentData": "Data Complete", + "componentID": 25325347, + "name": "Hennepin", + "rank_data": "Not Displayed", + "rank_data_loc": "Not Displayed", + "rank_loc": "1", + "score_data": 0.54, + "score_data_loc": 0.301, + "score_loc": 0.061 + }, { "component": "Crosby", "componentData": "Data Complete", @@ -1157,8 +1169,8 @@ "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", "rank_loc": "Not Displayed", - "score_data": 0.609, - "score_data_loc": 0.328, + "score_data": 0.552, + "score_data_loc": 0.3, "score_loc": 0.048 }, { @@ -1173,18 +1185,6 @@ "score_data_loc": 0.299, "score_loc": 0.061 }, - { - "component": "Hennepin", - "componentData": "Data Complete", - "componentID": 25325347, - "name": "Hennepin", - "rank_data": "Not Displayed", - "rank_data_loc": "Not Displayed", - "rank_loc": "1", - "score_data": 0.525, - "score_data_loc": 0.293, - "score_loc": 0.061 - }, { "component": "Cyclone", "componentData": "Data Complete", @@ -1193,8 +1193,8 @@ "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", "rank_loc": "Not Displayed", - "score_data": 0.555, - "score_data_loc": 0.283, + "score_data": 0.552, + "score_data_loc": 0.281, "score_loc": 0.011 } ] diff --git a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[42.494912,-123.064531].json b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[42.494912,-123.064531].json index ca74501..2eef609 100644 --- a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[42.494912,-123.064531].json +++ b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[42.494912,-123.064531].json @@ -606,37 +606,37 @@ "componentData": "Data Complete", "componentID": 25443415, "name": "Ruch", - "rank_data": "3", + "rank_data": "5", "rank_data_loc": "1", "rank_loc": "1", - "score_data": 0.671, - "score_data_loc": 0.469, + "score_data": 0.618, + "score_data_loc": 0.442, "score_loc": 0.267 }, - { - "component": "Josephine", - "componentData": NaN, - "componentID": 25443848, - "name": "Josephine2", - "rank_data": "4", - "rank_data_loc": "2", - "rank_loc": "Not Displayed", - "score_data": 0.664, - "score_data_loc": 0.395, - "score_loc": 0.125 - }, { "component": "Abegg", "componentData": "Data Complete", "componentID": 25443548, "name": "Abegg", "rank_data": "2", - "rank_data_loc": "3", + "rank_data_loc": "2", "rank_loc": "3", - "score_data": 0.709, - "score_data_loc": 0.389, + "score_data": 0.76, + "score_data_loc": 0.414, "score_loc": 0.069 }, + { + "component": "Josephine", + "componentData": NaN, + "componentID": 25443848, + "name": "Josephine2", + "rank_data": "4", + "rank_data_loc": "3", + "rank_loc": "Not Displayed", + "score_data": 0.646, + "score_data_loc": 0.386, + "score_loc": 0.125 + }, { "component": "Gregory", "componentData": "Data Complete", @@ -645,8 +645,8 @@ "rank_data": "1", "rank_data_loc": "4", "rank_loc": "9", - "score_data": 0.746, - "score_data_loc": 0.376, + "score_data": 0.761, + "score_data_loc": 0.384, "score_loc": 0.007 }, { @@ -654,35 +654,23 @@ "componentData": NaN, "componentID": 25443850, "name": "Pollard", - "rank_data": "5", + "rank_data": "3", "rank_data_loc": "5", "rank_loc": "7", - "score_data": 0.626, - "score_data_loc": 0.326, + "score_data": 0.7, + "score_data_loc": 0.363, "score_loc": 0.025 }, - { - "component": "Offenbacher", - "componentData": "Data Complete", - "componentID": 25443597, - "name": "Offenbacher", - "rank_data": "6", - "rank_data_loc": "6", - "rank_loc": "8", - "score_data": 0.473, - "score_data_loc": 0.245, - "score_loc": 0.017 - }, { "component": "Caris", "componentData": NaN, "componentID": 25443598, "name": "Caris", - "rank_data": "7", - "rank_data_loc": "7", + "rank_data": "6", + "rank_data_loc": "6", "rank_loc": "4", - "score_data": 0.406, - "score_data_loc": 0.228, + "score_data": 0.546, + "score_data_loc": 0.298, "score_loc": 0.05 }, { @@ -690,11 +678,11 @@ "componentData": "Data Complete", "componentID": 25443287, "name": "Beekman", - "rank_data": "9", - "rank_data_loc": "8", + "rank_data": "8", + "rank_data_loc": "7", "rank_loc": "5", - "score_data": 0.388, - "score_data_loc": 0.217, + "score_data": 0.508, + "score_data_loc": 0.277, "score_loc": 0.046 }, { @@ -702,13 +690,25 @@ "componentData": "Data Complete", "componentID": 25443286, "name": "Colestine", - "rank_data": "8", - "rank_data_loc": "9", + "rank_data": "7", + "rank_data_loc": "8", "rank_loc": "6", - "score_data": 0.392, - "score_data_loc": 0.209, + "score_data": 0.52, + "score_data_loc": 0.273, "score_loc": 0.025 }, + { + "component": "Offenbacher", + "componentData": "Data Complete", + "componentID": 25443597, + "name": "Offenbacher", + "rank_data": "9", + "rank_data_loc": "9", + "rank_loc": "8", + "score_data": 0.367, + "score_data_loc": 0.192, + "score_loc": 0.017 + }, { "component": "Josephine", "componentData": "Data Complete", @@ -717,8 +717,8 @@ "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", "rank_loc": "2", - "score_data": 0.531, - "score_data_loc": 0.328, + "score_data": 0.599, + "score_data_loc": 0.362, "score_loc": 0.125 } ] diff --git a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[42.63413723,-94.31005777].json b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[42.63413723,-94.31005777].json index b700429..a80482a 100644 --- a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[42.63413723,-94.31005777].json +++ b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[42.63413723,-94.31005777].json @@ -441,56 +441,56 @@ "rank_data": "1", "rank_data_loc": "1", "rank_loc": 3, - "score_data": 0.517, - "score_data_loc": 0.358, + "score_data": 0.48, + "score_data_loc": 0.34, "score_loc": 0.2 }, + { + "component": "Webster", + "componentData": "Data Complete", + "componentID": 25584954, + "name": "Webster", + "rank_data": "4", + "rank_data_loc": "2", + "rank_loc": 2, + "score_data": 0.336, + "score_data_loc": 0.333, + "score_loc": 0.331 + }, { "component": "Nicollet", "componentData": "Data Complete", "componentID": 25584956, "name": "Nicollet", "rank_data": "7", - "rank_data_loc": "2", + "rank_data_loc": "3", "rank_loc": 1, "score_data": 0.22, "score_data_loc": 0.311, "score_loc": 0.402 }, { - "component": "Webster", - "componentData": "Data Complete", - "componentID": 25584954, - "name": "Webster", - "rank_data": "6", - "rank_data_loc": "3", - "rank_loc": 2, - "score_data": 0.268, - "score_data_loc": 0.299, - "score_loc": 0.331 - }, - { - "component": "Glencoe", + "component": "Storden", "componentData": "Data Complete", - "componentID": 25584960, - "name": "Glencoe", + "componentID": 25584602, + "name": "Storden", "rank_data": "2", "rank_data_loc": "4", - "rank_loc": 5, - "score_data": 0.451, - "score_data_loc": 0.231, + "rank_loc": 6, + "score_data": 0.458, + "score_data_loc": 0.234, "score_loc": 0.011 }, { - "component": "Storden", + "component": "Glencoe", "componentData": "Data Complete", - "componentID": 25584602, - "name": "Storden", + "componentID": 25584960, + "name": "Glencoe", "rank_data": "3", "rank_data_loc": "5", - "rank_loc": 6, - "score_data": 0.433, - "score_data_loc": 0.222, + "rank_loc": 5, + "score_data": 0.402, + "score_data_loc": 0.206, "score_loc": 0.011 }, { @@ -498,11 +498,11 @@ "componentData": "Missing Data", "componentID": 25584952, "name": "Okoboji", - "rank_data": "4", + "rank_data": "5", "rank_data_loc": "6", "rank_loc": 4, - "score_data": 0.303, - "score_data_loc": 0.171, + "score_data": 0.315, + "score_data_loc": 0.177, "score_loc": 0.04 }, { @@ -510,11 +510,11 @@ "componentData": "Data Complete", "componentID": 25584962, "name": "Canisteo", - "rank_data": "5", + "rank_data": "6", "rank_data_loc": "7", "rank_loc": 7, - "score_data": 0.287, - "score_data_loc": 0.147, + "score_data": 0.268, + "score_data_loc": 0.137, "score_loc": 0.007 } ] diff --git a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[43.06450312,-119.4596489].json b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[43.06450312,-119.4596489].json index 44cb9c2..41f1635 100644 --- a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[43.06450312,-119.4596489].json +++ b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[43.06450312,-119.4596489].json @@ -545,40 +545,40 @@ "model": "v2" }, "soilRank": [ - { - "component": "Lonely", - "componentData": "Data Complete", - "componentID": 25442849, - "name": "Lonely", - "rank_data": "6", - "rank_data_loc": "1", - "rank_loc": 1, - "score_data": 0.611, - "score_data_loc": 0.439, - "score_loc": 0.267 - }, { "component": "Robson", "componentData": "Data Complete", "componentID": 25442848, "name": "Robson", - "rank_data": "2", - "rank_data_loc": "2", + "rank_data": "1", + "rank_data_loc": "1", "rank_loc": 2, - "score_data": 0.69, - "score_data_loc": 0.439, + "score_data": 0.761, + "score_data_loc": 0.474, "score_loc": 0.187 }, + { + "component": "Lonely", + "componentData": "Data Complete", + "componentID": 25442849, + "name": "Lonely", + "rank_data": "5", + "rank_data_loc": "2", + "rank_loc": 1, + "score_data": 0.635, + "score_data_loc": 0.451, + "score_loc": 0.267 + }, { "component": "Actem", "componentData": "Data Complete", "componentID": 25443068, "name": "Actem", - "rank_data": "1", + "rank_data": "2", "rank_data_loc": "3", "rank_loc": 5, - "score_data": 0.695, - "score_data_loc": 0.404, + "score_data": 0.711, + "score_data_loc": 0.412, "score_loc": 0.113 }, { @@ -586,35 +586,23 @@ "componentData": "Data Complete", "componentID": 25442616, "name": "Rinconflat", - "rank_data": "5", + "rank_data": "7", "rank_data_loc": "4", "rank_loc": 4, - "score_data": 0.621, - "score_data_loc": 0.367, + "score_data": 0.626, + "score_data_loc": 0.37, "score_loc": 0.113 }, - { - "component": "Brace", - "componentData": "Data Complete", - "componentID": 25442637, - "name": "Brace", - "rank_data": "3", - "rank_data_loc": "5", - "rank_loc": 8, - "score_data": 0.656, - "score_data_loc": 0.351, - "score_loc": 0.046 - }, { "component": "Raz", "componentData": "Data Complete", "componentID": 25442636, "name": "Raz", "rank_data": "4", - "rank_data_loc": "6", + "rank_data_loc": "5", "rank_loc": 7, - "score_data": 0.629, - "score_data_loc": 0.348, + "score_data": 0.645, + "score_data_loc": 0.356, "score_loc": 0.067 }, { @@ -623,22 +611,34 @@ "componentID": 25442630, "name": "Reallis", "rank_data": "8", - "rank_data_loc": "7", + "rank_data_loc": "6", "rank_loc": 3, - "score_data": 0.579, - "score_data_loc": 0.346, + "score_data": 0.595, + "score_data_loc": 0.354, "score_loc": 0.113 }, + { + "component": "Brace", + "componentData": "Data Complete", + "componentID": 25442637, + "name": "Brace", + "rank_data": "3", + "rank_data_loc": "7", + "rank_loc": 8, + "score_data": 0.645, + "score_data_loc": 0.346, + "score_loc": 0.046 + }, { "component": "Ausmus", "componentData": "Data Complete", "componentID": 25442629, "name": "Ausmus", - "rank_data": "7", + "rank_data": "6", "rank_data_loc": "8", "rank_loc": 9, - "score_data": 0.602, - "score_data_loc": 0.304, + "score_data": 0.63, + "score_data_loc": 0.318, "score_loc": 0.005 }, { @@ -649,8 +649,8 @@ "rank_data": "9", "rank_data_loc": "9", "rank_loc": 6, - "score_data": 0.437, - "score_data_loc": 0.262, + "score_data": 0.469, + "score_data_loc": 0.278, "score_loc": 0.087 } ] diff --git a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[45.88932423,-121.0347381].json b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[45.88932423,-121.0347381].json index 849b4ce..5518e60 100644 --- a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[45.88932423,-121.0347381].json +++ b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[45.88932423,-121.0347381].json @@ -144,8 +144,8 @@ "rank_data": "1", "rank_data_loc": "1", "rank_loc": "Not Displayed", - "score_data": 0.37, - "score_data_loc": 0.649, + "score_data": 0.481, + "score_data_loc": 0.705, "score_loc": 0.929 }, { @@ -156,8 +156,8 @@ "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", "rank_loc": "1", - "score_data": 0.312, - "score_data_loc": 0.62, + "score_data": 0.399, + "score_data_loc": 0.664, "score_loc": 0.929 } ] diff --git a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[47.213922,-69.28246582].json b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[47.213922,-69.28246582].json index a6b3b34..c56254b 100644 --- a/soil_id/tests/us/__snapshots__/test_us/test_soil_location[47.213922,-69.28246582].json +++ b/soil_id/tests/us/__snapshots__/test_us/test_soil_location[47.213922,-69.28246582].json @@ -786,7 +786,7 @@ "componentData": "Data Complete", "componentID": 25209556, "name": "Knob lock2", - "rank_data": "3", + "rank_data": "2", "rank_data_loc": "2", "rank_loc": "Not Displayed", "score_data": 0.667, @@ -798,7 +798,7 @@ "componentData": "Data Complete", "componentID": 25209557, "name": "Elliottsville", - "rank_data": "4", + "rank_data": "3", "rank_data_loc": "3", "rank_loc": "3", "score_data": 0.648, @@ -810,11 +810,11 @@ "componentData": "Data Complete", "componentID": 25209554, "name": "Monson", - "rank_data": "5", + "rank_data": "4", "rank_data_loc": "4", "rank_loc": "4", - "score_data": 0.616, - "score_data_loc": 0.354, + "score_data": 0.648, + "score_data_loc": 0.37, "score_loc": 0.092 }, { @@ -822,11 +822,11 @@ "componentData": "Data Complete", "componentID": 25209553, "name": "Abram", - "rank_data": "2", + "rank_data": "5", "rank_data_loc": "5", "rank_loc": "5", - "score_data": 0.671, - "score_data_loc": 0.353, + "score_data": 0.631, + "score_data_loc": 0.333, "score_loc": 0.036 }, { @@ -838,7 +838,7 @@ "rank_data_loc": "6", "rank_loc": "7", "score_data": 0.615, - "score_data_loc": 0.322, + "score_data_loc": 0.323, "score_loc": 0.03 }, { @@ -849,8 +849,8 @@ "rank_data": "7", "rank_data_loc": "7", "rank_loc": "6", - "score_data": 0.484, - "score_data_loc": 0.257, + "score_data": 0.497, + "score_data_loc": 0.264, "score_loc": 0.03 }, { @@ -861,7 +861,7 @@ "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", "rank_loc": "Not Displayed", - "score_data": 0.715, + "score_data": 0.714, "score_data_loc": 0.496, "score_loc": 0.277 }, @@ -873,8 +873,8 @@ "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", "rank_loc": "1", - "score_data": 0.558, - "score_data_loc": 0.441, + "score_data": 0.578, + "score_data_loc": 0.451, "score_loc": 0.325 }, { @@ -885,8 +885,8 @@ "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", "rank_loc": "Not Displayed", - "score_data": 0.539, - "score_data_loc": 0.432, + "score_data": 0.55, + "score_data_loc": 0.437, "score_loc": 0.325 }, { @@ -897,8 +897,8 @@ "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", "rank_loc": "Not Displayed", - "score_data": 0.546, - "score_data_loc": 0.379, + "score_data": 0.587, + "score_data_loc": 0.399, "score_loc": 0.211 }, { @@ -909,8 +909,8 @@ "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", "rank_loc": "Not Displayed", - "score_data": 0.598, - "score_data_loc": 0.345, + "score_data": 0.587, + "score_data_loc": 0.339, "score_loc": 0.092 }, { @@ -921,8 +921,8 @@ "rank_data": "Not Displayed", "rank_data_loc": "Not Displayed", "rank_loc": "Not Displayed", - "score_data": 0.469, - "score_data_loc": 0.252, + "score_data": 0.404, + "score_data_loc": 0.22, "score_loc": 0.036 } ] diff --git a/soil_id/us_soil.py b/soil_id/us_soil.py index cdca332..c0c8071 100644 --- a/soil_id/us_soil.py +++ b/soil_id/us_soil.py @@ -217,7 +217,7 @@ def list_soils(lon, lat): # Add distance column from mucompdata_pd using cokey link muhorzdata_pd = pd.merge( muhorzdata_pd, - mucompdata_pd[["cokey", "distance", "distance_score"]], + mucompdata_pd[["cokey", "distance", "distance_score"]], on="cokey", how="left", ) @@ -234,9 +234,7 @@ def list_soils(lon, lat): mucompdata_pd = mucompdata_pd[mucompdata_pd["cokey"].isin(comp_key)] # Sort mucompdata_pd based on 'cond_prob' and 'distance' - mucompdata_pd.sort_values( - ["cond_prob", "distance", "compname"], ascending=[False, True, True], inplace=True - ) + mucompdata_pd.sort_values(["cond_prob", "distance", "compname"], ascending=[False, True, True], inplace=True) mucompdata_pd.reset_index(drop=True, inplace=True) # Duplicate the 'compname' column for grouping purposes @@ -260,13 +258,16 @@ def list_soils(lon, lat): component_names = mucompdata_pd["compname"].tolist() name_counts = collections.Counter(component_names) + # Track which indices have been processed for each name + processed_indices = {} + for name, count in sorted(name_counts.items()): # Sort for deterministic order if count > 1: # If a component name is duplicated # Find all indices for this name indices = [i for i, comp_name in enumerate(component_names) if comp_name == name] # Sort indices for deterministic order indices.sort() - + # Add suffixes to all occurrences except the first for i, idx in enumerate(indices): if i > 0: # Skip the first occurrence (keep original name) @@ -664,9 +665,7 @@ def list_soils(lon, lat): if mucompdata_pd["compkind"].isin(OSD_compkind).any(): # Group data by cokey - OSDhorzdata_group_cokey = [ - group for _, group in OSDhorzdata_pd.groupby("cokey", sort=False) - ] + OSDhorzdata_group_cokey = [group for _, group in OSDhorzdata_pd.groupby("cokey")] # Initialize empty lists lab_lyrs = [] @@ -975,39 +974,39 @@ def list_soils(lon, lat): munsell_lyrs.append(dict(zip(hzb_lyrs[index].keys(), munsell_dummy))) # Series URL Generation - # Initialize lists to store series URLs - SDE_URL = [] - SEE_URL = [] + # Create a mapping of cokey to URLs for safe lookup + cokey_to_urls = {} - # Group data by 'cokey' - OSDhorzdata_group_cokey = [g for _, g in OSDhorzdata_pd.groupby("cokey", sort=False)] + # Group data by 'cokey' - use sort=True for deterministic ordering + OSDhorzdata_group_cokey = [g for _, g in OSDhorzdata_pd.groupby("cokey", sort=True)] for index, group in enumerate(OSDhorzdata_group_cokey): + cokey = group["cokey"].iloc[0] # Get the cokey for this group + # Check if compkind is not in OSD_compkind or if series contains any null values if ( - mucompdata_pd.loc[index]["compkind"] not in OSD_compkind + mucompdata_pd[mucompdata_pd["cokey"] == cokey]["compkind"].iloc[0] not in OSD_compkind or group["series"].isnull().any() ): - SDE_URL.append("") - SEE_URL.append("") + cokey_to_urls[cokey] = {"sde": "", "see": ""} else: - # Extract compname, convert to lowercase, remove trailing numbers, and replace - # spaces with underscores + # Extract compname, convert to lowercase, remove trailing numbers, and replace spaces with underscores comp = group["compname"].iloc[0].lower() comp = re.sub(r"\d+$", "", comp) comp = comp.replace(" ", "_") - # Create and append URLs - SDE_URL.append(f"https://casoilresource.lawr.ucdavis.edu/sde/?series={comp}") - SEE_URL.append(f"https://casoilresource.lawr.ucdavis.edu/see/#{comp}") + # Create URLs + cokey_to_urls[cokey] = { + "sde": f"https://casoilresource.lawr.ucdavis.edu/sde/?series={comp}", + "see": f"https://casoilresource.lawr.ucdavis.edu/see/#{comp}" + } else: # Initialize lists to store data layers and URLs lab_lyrs = [] lab_intpl_lyrs = [] munsell_lyrs = [] - SDE_URL = [] - SEE_URL = [] + cokey_to_urls = {} # Iterate over each entry in mucompdata_pd for i in range(len(mucompdata_pd)): @@ -1028,17 +1027,16 @@ def list_soils(lon, lat): lab_lyrs.append(dict(zip(keys, lab_dummy))) munsell_lyrs.append(dict(zip(keys, munsell_dummy))) - # Append empty URLs - SDE_URL.append("") - SEE_URL.append("") + # Create empty URLs for each component + cokey = mucompdata_pd.iloc[i]["cokey"] + cokey_to_urls[cokey] = {"sde": "", "see": ""} else: # Initialize lists to store data layers and URLs lab_lyrs = [] lab_intpl_lyrs = [] munsell_lyrs = [] - SDE_URL = [] - SEE_URL = [] + cokey_to_urls = {} # Iterate over each entry in mucompdata_pd for i in range(len(mucompdata_pd)): @@ -1059,9 +1057,9 @@ def list_soils(lon, lat): lab_lyrs.append(dict(zip(keys, lab_dummy))) munsell_lyrs.append(dict(zip(keys, munsell_dummy))) - # Append empty URLs - SDE_URL.append("") - SEE_URL.append("") + # Create empty URLs for each component + cokey = mucompdata_pd.iloc[i]["cokey"] + cokey_to_urls[cokey] = {"sde": "", "see": ""} # Subset datasets to exclude pedons without any depth information cokeys_with_depth = mucompdata_pd[mucompdata_pd["comp_max_bottom"] > 0].cokey.unique() @@ -1422,7 +1420,7 @@ def list_soils(lon, lat): # Replace NaN values with an empty string mucompdata_cond_prob = mucompdata_cond_prob.fillna("") - # Generate the Site list + # Generate the Site list using cokey-based URL lookup Site = [ { "siteData": { @@ -1443,8 +1441,8 @@ def list_soils(lon, lat): "irrcapscl": row["irrcapscl"], "irrcapunit": row["irrcapunit"], "taxsubgrp": row["taxsubgrp"], - "sdeURL": SDE_URL[idx], - "seeURL": SEE_URL[idx], + "sdeURL": cokey_to_urls.get(row["cokey"], {"sde": ""})["sde"], + "seeURL": cokey_to_urls.get(row["cokey"], {"see": ""})["see"], }, "siteDescription": row["brief_narrative"], } @@ -1576,7 +1574,7 @@ def rank_soils( # Check if list_output_data is a string (error message) instead of expected object if isinstance(list_output_data, str): return {"error": f"Cannot rank soils: {list_output_data}"} - + # --------------------------------------------------------------------------------------- # ------ Load in user data --------# # Initialize the DataFrame from the input data @@ -2063,14 +2061,12 @@ def rank_soils( # Concatenate the sorted and ranked groups D_final = pd.concat(soilIDList_data).reset_index(drop=True) - + # Merge with the Rank_Filter data D_final = pd.merge(D_final, Rank_Filter, on="compname", how="left") # Sort dataframe to correctly assign Rank_Data - D_final = D_final.sort_values( - by=["soilID_rank_data", "Score_Data", "compname"], ascending=[False, False, True] - ) + D_final = D_final.sort_values(by=["soilID_rank_data", "Score_Data", "compname"], ascending=[False, False, True]) # Assigning rank based on the soilID rank and rank status rank_id = 1 @@ -2164,9 +2160,7 @@ def rank_soils( soilIDList_out = [] for _, group in D_final_loc.groupby("compname_grp", sort=True): - group = group.sort_values( - ["Score_Data_Loc", "compname"], ascending=[False, True] - ).reset_index(drop=True) + group = group.sort_values(["Score_Data_Loc", "compname"], ascending=[False, True]).reset_index(drop=True) group["soilID_rank_final"] = [True if idx == 0 else False for idx in range(len(group))] soilIDList_out.append(group) From 80cb2d266669d7e1c958c8afac2fa6d2337097d9 Mon Sep 17 00:00:00 2001 From: jjmaynard Date: Tue, 30 Sep 2025 16:43:45 -0700 Subject: [PATCH 2/2] fix: refactor sorting and formatting for readability Improves code readability by reformatting long sort_values and other function calls across the file. No functional changes were made; only code style and formatting were updated for clarity and consistency. --- soil_id/us_soil.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/soil_id/us_soil.py b/soil_id/us_soil.py index c0c8071..99408af 100644 --- a/soil_id/us_soil.py +++ b/soil_id/us_soil.py @@ -217,7 +217,7 @@ def list_soils(lon, lat): # Add distance column from mucompdata_pd using cokey link muhorzdata_pd = pd.merge( muhorzdata_pd, - mucompdata_pd[["cokey", "distance", "distance_score"]], + mucompdata_pd[["cokey", "distance", "distance_score"]], on="cokey", how="left", ) @@ -234,7 +234,9 @@ def list_soils(lon, lat): mucompdata_pd = mucompdata_pd[mucompdata_pd["cokey"].isin(comp_key)] # Sort mucompdata_pd based on 'cond_prob' and 'distance' - mucompdata_pd.sort_values(["cond_prob", "distance", "compname"], ascending=[False, True, True], inplace=True) + mucompdata_pd.sort_values( + ["cond_prob", "distance", "compname"], ascending=[False, True, True], inplace=True + ) mucompdata_pd.reset_index(drop=True, inplace=True) # Duplicate the 'compname' column for grouping purposes @@ -258,16 +260,13 @@ def list_soils(lon, lat): component_names = mucompdata_pd["compname"].tolist() name_counts = collections.Counter(component_names) - # Track which indices have been processed for each name - processed_indices = {} - for name, count in sorted(name_counts.items()): # Sort for deterministic order if count > 1: # If a component name is duplicated # Find all indices for this name indices = [i for i, comp_name in enumerate(component_names) if comp_name == name] # Sort indices for deterministic order indices.sort() - + # Add suffixes to all occurrences except the first for i, idx in enumerate(indices): if i > 0: # Skip the first occurrence (keep original name) @@ -982,10 +981,11 @@ def list_soils(lon, lat): for index, group in enumerate(OSDhorzdata_group_cokey): cokey = group["cokey"].iloc[0] # Get the cokey for this group - + # Check if compkind is not in OSD_compkind or if series contains any null values if ( - mucompdata_pd[mucompdata_pd["cokey"] == cokey]["compkind"].iloc[0] not in OSD_compkind + mucompdata_pd[mucompdata_pd["cokey"] == cokey]["compkind"].iloc[0] + not in OSD_compkind or group["series"].isnull().any() ): cokey_to_urls[cokey] = {"sde": "", "see": ""} @@ -998,7 +998,7 @@ def list_soils(lon, lat): # Create URLs cokey_to_urls[cokey] = { "sde": f"https://casoilresource.lawr.ucdavis.edu/sde/?series={comp}", - "see": f"https://casoilresource.lawr.ucdavis.edu/see/#{comp}" + "see": f"https://casoilresource.lawr.ucdavis.edu/see/#{comp}", } else: @@ -1574,7 +1574,7 @@ def rank_soils( # Check if list_output_data is a string (error message) instead of expected object if isinstance(list_output_data, str): return {"error": f"Cannot rank soils: {list_output_data}"} - + # --------------------------------------------------------------------------------------- # ------ Load in user data --------# # Initialize the DataFrame from the input data @@ -2061,12 +2061,14 @@ def rank_soils( # Concatenate the sorted and ranked groups D_final = pd.concat(soilIDList_data).reset_index(drop=True) - + # Merge with the Rank_Filter data D_final = pd.merge(D_final, Rank_Filter, on="compname", how="left") # Sort dataframe to correctly assign Rank_Data - D_final = D_final.sort_values(by=["soilID_rank_data", "Score_Data", "compname"], ascending=[False, False, True]) + D_final = D_final.sort_values( + by=["soilID_rank_data", "Score_Data", "compname"], ascending=[False, False, True] + ) # Assigning rank based on the soilID rank and rank status rank_id = 1 @@ -2160,7 +2162,9 @@ def rank_soils( soilIDList_out = [] for _, group in D_final_loc.groupby("compname_grp", sort=True): - group = group.sort_values(["Score_Data_Loc", "compname"], ascending=[False, True]).reset_index(drop=True) + group = group.sort_values( + ["Score_Data_Loc", "compname"], ascending=[False, True] + ).reset_index(drop=True) group["soilID_rank_final"] = [True if idx == 0 else False for idx in range(len(group))] soilIDList_out.append(group)