From ecf4202298100f73afb7881906c508ea60a4ed71 Mon Sep 17 00:00:00 2001 From: Jonathan Maynard Date: Tue, 20 Jan 2026 15:27:12 -0800 Subject: [PATCH 1/2] Update CEC infill method and rename column - Fixed CEC duplicate column issue in process_horizon_data by filling cec7_r in-place before rename - CEC values now return as numeric instead of strings" --- soil_id/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/soil_id/utils.py b/soil_id/utils.py index 54a95a1..f5fb997 100644 --- a/soil_id/utils.py +++ b/soil_id/utils.py @@ -1611,10 +1611,9 @@ def process_horizon_data(muhorzdata_pd): ["cokey", "chkey", "hzname"] ].astype(str) - # Infill missing CEC values with ECEC - muhorzdata_pd["CEC"] = muhorzdata_pd["cec7_r"].fillna(muhorzdata_pd["ecec_r"]) + # Infill missing CEC values with ECEC and rename columns for better clarity + muhorzdata_pd["cec7_r"] = muhorzdata_pd["cec7_r"].fillna(muhorzdata_pd["ecec_r"]) - # Rename columns for better clarity muhorzdata_pd = muhorzdata_pd.rename( columns={"cec7_r": "CEC", "ph1to1h2o_r": "pH", "ec_r": "EC"} ) From bfe86d46ca5163b57bf95331ca16a308e0a56ccc Mon Sep 17 00:00:00 2001 From: Jonathan Maynard Date: Tue, 20 Jan 2026 15:34:41 -0800 Subject: [PATCH 2/2] fix/OSD data aggregation - Fixed array length inconsistencies in OSD infilling by using stored horizon depths from hzb_lyrs instead of muhorzdata_pd_group - Applied fix to sand/clay/texture aggregation (lines 919-942) - Applied fix to LAB/Munsell aggregation (lines 775-806) --- soil_id/us_soil.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/soil_id/us_soil.py b/soil_id/us_soil.py index 51a268c..ec99359 100644 --- a/soil_id/us_soil.py +++ b/soil_id/us_soil.py @@ -766,20 +766,29 @@ def list_soils(lon, lat): lab_lyrs.append(["", "", ""]) munsell_lyrs.append("") else: + # Use the horizon bottom depths that match the stored horizon structure + # Convert string values to float, filtering out empty strings + horizon_bottom_depths = [ + float(v) if v != "" else np.nan + for v in hzb_lyrs[index].values() + ] + # Filter out NaN values + horizon_bottom_depths = [d for d in horizon_bottom_depths if not np.isnan(d)] + # Aggregate data for each color dimension l_d = aggregate_data( data=lab_intpl["l"], - bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(), + bottom_depths=horizon_bottom_depths, sd=2, ).fillna("") a_d = aggregate_data( data=lab_intpl["a"], - bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(), + bottom_depths=horizon_bottom_depths, sd=2, ).fillna("") b_d = aggregate_data( data=lab_intpl["b"], - bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(), + bottom_depths=horizon_bottom_depths, sd=2, ).fillna("") @@ -909,16 +918,25 @@ def list_soils(lon, lat): getProfile_cokey[index] = getProfile_mod + # Use the horizon bottom depths that match the stored horizon structure + # Convert string values to float, filtering out empty strings + horizon_bottom_depths = [ + float(v) if v != "" else np.nan + for v in hzb_lyrs[index].values() + ] + # Filter out NaN values + horizon_bottom_depths = [d for d in horizon_bottom_depths if not np.isnan(d)] + # Aggregate sand data snd_d_osd = aggregate_data( data=OSD_sand_intpl.iloc[:, 0], - bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(), + bottom_depths=horizon_bottom_depths, ) # Aggregate clay data cly_d_osd = aggregate_data( data=OSD_clay_intpl.iloc[:, 1], - bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(), + bottom_depths=horizon_bottom_depths, ) # Calculate texture data based on sand and clay data @@ -931,7 +949,7 @@ def list_soils(lon, lat): # Aggregate rock fragment data rf_d_osd = aggregate_data( data=OSD_rfv_intpl.c_cfpct_intpl, - bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(), + bottom_depths=horizon_bottom_depths, ) # Fill NaN values @@ -952,9 +970,9 @@ def list_soils(lon, lat): # Update cec, ph, and ec layers if they contain only a single # empty string for lyr in [cec_lyrs, ph_lyrs, ec_lyrs]: - if len(lyr[index]) == 1 and lyr[index][0] == "": + if len(lyr[index]) == 1 and list(lyr[index].values())[0] == "": empty_values = [""] * len(hzb_lyrs[index]) - lyr[index] = dict(zip(hzb_lyrs[index], empty_values)) + lyr[index] = dict(zip(hzb_lyrs[index].keys(), empty_values)) else: OSDhorzdata_group_cokey[index] = group_sorted