From ecf4202298100f73afb7881906c508ea60a4ed71 Mon Sep 17 00:00:00 2001
From: Jonathan Maynard <jonathan.maynard@usda.gov>
Date: Tue, 20 Jan 2026 15:27:12 -0800
Subject: [PATCH 1/2] Update CEC infill method and rename column

- Fixed CEC duplicate column issue in process_horizon_data by filling cec7_r in-place before rename
- CEC values now return as numeric instead of strings"
---
 soil_id/utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/soil_id/utils.py b/soil_id/utils.py
index 54a95a1..f5fb997 100644
--- a/soil_id/utils.py
+++ b/soil_id/utils.py
@@ -1611,10 +1611,9 @@ def process_horizon_data(muhorzdata_pd):
         ["cokey", "chkey", "hzname"]
     ].astype(str)
 
-    # Infill missing CEC values with ECEC
-    muhorzdata_pd["CEC"] = muhorzdata_pd["cec7_r"].fillna(muhorzdata_pd["ecec_r"])
+    # Infill missing CEC values with ECEC and rename columns for better clarity
+    muhorzdata_pd["cec7_r"] = muhorzdata_pd["cec7_r"].fillna(muhorzdata_pd["ecec_r"])
 
-    # Rename columns for better clarity
     muhorzdata_pd = muhorzdata_pd.rename(
         columns={"cec7_r": "CEC", "ph1to1h2o_r": "pH", "ec_r": "EC"}
     )

From bfe86d46ca5163b57bf95331ca16a308e0a56ccc Mon Sep 17 00:00:00 2001
From: Jonathan Maynard <jonathan.maynard@usda.gov>
Date: Tue, 20 Jan 2026 15:34:41 -0800
Subject: [PATCH 2/2] fix/OSD data aggregation

- Fixed array length inconsistencies in OSD infilling by using stored horizon depths from hzb_lyrs instead of muhorzdata_pd_group
- Applied fix to sand/clay/texture aggregation (lines 919-942)
- Applied fix to LAB/Munsell aggregation (lines 775-806)
---
 soil_id/us_soil.py | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/soil_id/us_soil.py b/soil_id/us_soil.py
index 51a268c..ec99359 100644
--- a/soil_id/us_soil.py
+++ b/soil_id/us_soil.py
@@ -766,20 +766,29 @@ def list_soils(lon, lat):
                         lab_lyrs.append(["", "", ""])
                         munsell_lyrs.append("")
                     else:
+                        # Use the horizon bottom depths that match the stored horizon structure
+                        # Convert string values to float, filtering out empty strings
+                        horizon_bottom_depths = [
+                            float(v) if v != "" else np.nan 
+                            for v in hzb_lyrs[index].values()
+                        ]
+                        # Filter out NaN values
+                        horizon_bottom_depths = [d for d in horizon_bottom_depths if not np.isnan(d)]
+
                         # Aggregate data for each color dimension
                         l_d = aggregate_data(
                             data=lab_intpl["l"],
-                            bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(),
+                            bottom_depths=horizon_bottom_depths,
                             sd=2,
                         ).fillna("")
                         a_d = aggregate_data(
                             data=lab_intpl["a"],
-                            bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(),
+                            bottom_depths=horizon_bottom_depths,
                             sd=2,
                         ).fillna("")
                         b_d = aggregate_data(
                             data=lab_intpl["b"],
-                            bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(),
+                            bottom_depths=horizon_bottom_depths,
                             sd=2,
                         ).fillna("")
 
@@ -909,16 +918,25 @@ def list_soils(lon, lat):
 
                         getProfile_cokey[index] = getProfile_mod
 
+                        # Use the horizon bottom depths that match the stored horizon structure
+                        # Convert string values to float, filtering out empty strings
+                        horizon_bottom_depths = [
+                            float(v) if v != "" else np.nan 
+                            for v in hzb_lyrs[index].values()
+                        ]
+                        # Filter out NaN values
+                        horizon_bottom_depths = [d for d in horizon_bottom_depths if not np.isnan(d)]
+
                         # Aggregate sand data
                         snd_d_osd = aggregate_data(
                             data=OSD_sand_intpl.iloc[:, 0],
-                            bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(),
+                            bottom_depths=horizon_bottom_depths,
                         )
 
                         # Aggregate clay data
                         cly_d_osd = aggregate_data(
                             data=OSD_clay_intpl.iloc[:, 1],
-                            bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(),
+                            bottom_depths=horizon_bottom_depths,
                         )
 
                         # Calculate texture data based on sand and clay data
@@ -931,7 +949,7 @@ def list_soils(lon, lat):
                         # Aggregate rock fragment data
                         rf_d_osd = aggregate_data(
                             data=OSD_rfv_intpl.c_cfpct_intpl,
-                            bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(),
+                            bottom_depths=horizon_bottom_depths,
                         )
 
                         # Fill NaN values
@@ -952,9 +970,9 @@ def list_soils(lon, lat):
                         # Update cec, ph, and ec layers if they contain only a single
                         # empty string
                         for lyr in [cec_lyrs, ph_lyrs, ec_lyrs]:
-                            if len(lyr[index]) == 1 and lyr[index][0] == "":
+                            if len(lyr[index]) == 1 and list(lyr[index].values())[0] == "":
                                 empty_values = [""] * len(hzb_lyrs[index])
-                                lyr[index] = dict(zip(hzb_lyrs[index], empty_values))
+                                lyr[index] = dict(zip(hzb_lyrs[index].keys(), empty_values))
 
                 else:
                     OSDhorzdata_group_cokey[index] = group_sorted