diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 1166fef..a4ff161 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -13,3 +13,4 @@ chrono = { version = "0.4.39", features = ["serde"] } codelist-rs = { path = "../../rust/codelist-rs" } codelist-validator-rs = { path = "../../rust/codelist-validator-rs" } indexmap = "2.9.0" +regex = "1.11.1" diff --git a/bindings/python/src/codelist.rs b/bindings/python/src/codelist.rs index b99441b..6a2041e 100755 --- a/bindings/python/src/codelist.rs +++ b/bindings/python/src/codelist.rs @@ -20,6 +20,7 @@ use pyo3::{ types::{PyDict, PySet}, PyErr, PyResult, }; +use regex::Regex; /// Python wrapper for the CodeList struct /// @@ -452,8 +453,21 @@ impl PyCodeList { } /// Validate the codelist based on the codelist type - fn validate_codes(&self) -> PyResult<()> { - self.inner.validate_codes().map_err(|e| PyValueError::new_err(e.to_string())) + #[pyo3(signature = (custom_regex=None))] + fn validate_codes(&self, custom_regex: Option) -> PyResult<()> { + match custom_regex { + Some(regex_str) => { + let regex = Regex::new(®ex_str) + .map_err(|e| PyValueError::new_err(format!("Invalid regex: {}", e)))?; + self.inner + .validate_codes(Some(®ex)) + .map_err(|e| PyValueError::new_err(e.to_string()))? + } + None => { + self.inner.validate_codes(None).map_err(|e| PyValueError::new_err(e.to_string()))? + } + } + Ok(()) } /// Add a comment to the codelist diff --git a/bindings/python/tests/test_codelist.py b/bindings/python/tests/test_codelist.py index b75987d..429d8cc 100644 --- a/bindings/python/tests/test_codelist.py +++ b/bindings/python/tests/test_codelist.py @@ -296,7 +296,18 @@ def test_x_code_not_added_snomed(self): codelist.add_x_codes() self.assertEqual(str(e.exception), "SNOMED cannot be transformed by having X added to the end of it") - + def test_validate_codes_with_custom_regex(self): + codelist = CodeList( + name="Test Codelist", + codelist_type="ICD10", + source="Manually created", + ) + codelist.add_entry("A11", "Valid test code") + codelist.validate_codes("^A") + codelist.add_entry("B112", "Invalid test code") + with self.assertRaises(ValueError) as e: + codelist.validate_codes("^A") + self.assertIn("Code B112 contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern", str(e.exception)) diff --git a/rust/codelist-validator-rs/src/ctv3_validator.rs b/rust/codelist-validator-rs/src/ctv3_validator.rs index 2d3d943..0f9c80c 100644 --- a/rust/codelist-validator-rs/src/ctv3_validator.rs +++ b/rust/codelist-validator-rs/src/ctv3_validator.rs @@ -88,7 +88,7 @@ mod tests { fn test_validate_codelist_with_valid_code() -> Result<(), CodeListError> { let mut codelist = create_test_codelist()?; let _ = codelist.add_entry("A9f..".to_string(), None, None); - assert!(codelist.validate_codes().is_ok()); + assert!(codelist.validate_codes(None).is_ok()); Ok(()) } @@ -154,7 +154,7 @@ mod tests { codelist.add_entry("Me...".to_string(), None, None)?; codelist.add_entry("99999".to_string(), None, None)?; codelist.add_entry(".....".to_string(), None, None)?; - assert!(codelist.validate_codes().is_ok()); + assert!(codelist.validate_codes(None).is_ok()); Ok(()) } @@ -169,7 +169,7 @@ mod tests { codelist.add_entry("*unf.".to_string(), None, None)?; codelist.add_entry("..j..".to_string(), None, None)?; codelist.add_entry("9874ji".to_string(), None, None)?; - let error = codelist.validate_codes().unwrap_err(); + let error = codelist.validate_codes(None).unwrap_err(); let error_string = error.to_string(); assert!(error_string.contains("Some codes in the list are invalid. Details:")); @@ -199,7 +199,7 @@ mod tests { codelist.add_entry("A00.l".to_string(), None, None)?; codelist.add_entry("Q90.....".to_string(), None, None)?; codelist.add_entry("A..9k".to_string(), None, None)?; - let error = codelist.validate_codes().unwrap_err(); + let error = codelist.validate_codes(None).unwrap_err(); let error_string = error.to_string(); assert!(error_string.contains("Some codes in the list are invalid. Details:")); diff --git a/rust/codelist-validator-rs/src/icd10_validator.rs b/rust/codelist-validator-rs/src/icd10_validator.rs index 3a9a087..f7d6b95 100644 --- a/rust/codelist-validator-rs/src/icd10_validator.rs +++ b/rust/codelist-validator-rs/src/icd10_validator.rs @@ -84,7 +84,7 @@ mod tests { fn test_validate_code_with_valid_code() -> Result<(), CodeListError> { let mut codelist = create_test_codelist()?; let _ = codelist.add_entry("A100".to_string(), Some("test".to_string()), None); - assert!(codelist.validate_codes().is_ok()); + assert!(codelist.validate_codes(None).is_ok()); Ok(()) } @@ -197,7 +197,7 @@ mod tests { codelist.add_entry("M10".to_string(), Some("Gout".to_string()), None)?; codelist.add_entry("Q90".to_string(), Some("Down Syndrome".to_string()), None)?; codelist.add_entry("K02".to_string(), Some("Dental caries".to_string()), None)?; - assert!(codelist.validate_codes().is_ok()); + assert!(codelist.validate_codes(None).is_ok()); Ok(()) } @@ -216,7 +216,7 @@ mod tests { codelist.add_entry("A00.A".to_string(), Some("Gout".to_string()), None)?; codelist.add_entry("A00X12".to_string(), Some("Down Syndrome".to_string()), None)?; codelist.add_entry("A00.4AA".to_string(), Some("Dental caries".to_string()), None)?; - let error = codelist.validate_codes().unwrap_err(); + let error = codelist.validate_codes(None).unwrap_err(); let error_string = error.to_string(); assert!(error_string.contains("Some codes in the list are invalid. Details:")); @@ -250,7 +250,7 @@ mod tests { codelist.add_entry("A00.A".to_string(), Some("Gout".to_string()), None)?; codelist.add_entry("Q90".to_string(), Some("Down Syndrome".to_string()), None)?; codelist.add_entry("A00.4AA".to_string(), Some("Dental caries".to_string()), None)?; - let error = codelist.validate_codes().unwrap_err(); + let error = codelist.validate_codes(None).unwrap_err(); let error_string = error.to_string(); assert!(error_string.contains("Some codes in the list are invalid. Details:")); diff --git a/rust/codelist-validator-rs/src/opcs_validator.rs b/rust/codelist-validator-rs/src/opcs_validator.rs index 20d4ec9..71989dc 100644 --- a/rust/codelist-validator-rs/src/opcs_validator.rs +++ b/rust/codelist-validator-rs/src/opcs_validator.rs @@ -89,7 +89,7 @@ mod tests { #[test] fn test_validate_code_with_valid_code() -> Result<(), CodeListError> { let codelist = create_test_codelist()?; - assert!(codelist.validate_codes().is_ok()); + assert!(codelist.validate_codes(None).is_ok()); Ok(()) } @@ -228,7 +228,7 @@ mod tests { Some("Insertion Artery Coeliac Stent Transluminal Percutaneous".to_string()), None, )?; - assert!(codelist.validate_codes().is_ok()); + assert!(codelist.validate_codes(None).is_ok()); Ok(()) } @@ -271,7 +271,7 @@ mod tests { Some("Insertion Artery Coeliac Stent Transluminal Percutaneous".to_string()), None, )?; - let error = codelist.validate_codes().unwrap_err(); + let error = codelist.validate_codes(None).unwrap_err(); let error_string = error.to_string(); assert!(error_string.contains("Some codes in the list are invalid. Details:")); @@ -329,7 +329,7 @@ mod tests { Some("Insertion Artery Coeliac Stent Transluminal Percutaneous".to_string()), None, )?; - let error = codelist.validate_codes().unwrap_err(); + let error = codelist.validate_codes(None).unwrap_err(); let error_string = error.to_string(); assert!(error_string.contains("Some codes in the list are invalid. Details:")); diff --git a/rust/codelist-validator-rs/src/snomed_validator.rs b/rust/codelist-validator-rs/src/snomed_validator.rs index 0ba0aa8..c16cccf 100644 --- a/rust/codelist-validator-rs/src/snomed_validator.rs +++ b/rust/codelist-validator-rs/src/snomed_validator.rs @@ -71,7 +71,7 @@ mod tests { #[test] fn test_validate_code_with_valid_code_default_max_min_lengths() -> Result<(), CodeListError> { let codelist = create_test_codelist()?; - assert!(codelist.validate_codes().is_ok()); + assert!(codelist.validate_codes(None).is_ok()); Ok(()) } #[test] @@ -179,7 +179,7 @@ mod tests { None, )?; - assert!(codelist.validate_codes().is_ok()); + assert!(codelist.validate_codes(None).is_ok()); Ok(()) } @@ -223,7 +223,7 @@ mod tests { None, )?; - let error = codelist.validate_codes().unwrap_err(); + let error = codelist.validate_codes(None).unwrap_err(); let error_string = error.to_string(); assert!(error_string.contains("Some codes in the list are invalid. Details:")); @@ -283,7 +283,7 @@ mod tests { None, )?; - let error = codelist.validate_codes().unwrap_err(); + let error = codelist.validate_codes(None).unwrap_err(); let error_string = error.to_string(); assert!(error_string.contains("Some codes in the list are invalid. Details:")); diff --git a/rust/codelist-validator-rs/src/validator.rs b/rust/codelist-validator-rs/src/validator.rs index 25a5eb2..8b6f8ac 100644 --- a/rust/codelist-validator-rs/src/validator.rs +++ b/rust/codelist-validator-rs/src/validator.rs @@ -1,4 +1,6 @@ //! Generic trait for validating a codelist +use regex::Regex; + use codelist_rs::{codelist::CodeList, types::CodeListType}; use crate::{ @@ -17,16 +19,183 @@ pub(crate) trait CodeValidator { /// Validator trait pub trait Validator { - fn validate_codes(&self) -> Result<(), CodeListValidatorError>; + fn validate_codes(&self, custom_regex: Option<&Regex>) -> Result<(), CodeListValidatorError>; } impl Validator for CodeList { - fn validate_codes(&self) -> Result<(), CodeListValidatorError> { - match self.codelist_type { - CodeListType::ICD10 => IcdValidator(self).validate_all_code(), - CodeListType::SNOMED => SnomedValidator(self).validate_all_code(), - CodeListType::OPCS => OpcsValidator(self).validate_all_code(), - CodeListType::CTV3 => Ctv3Validator(self).validate_all_code(), + fn validate_codes(&self, custom_regex: Option<&Regex>) -> Result<(), CodeListValidatorError> { + match custom_regex { + Some(regex) => custom_validate_all_code(self, regex), + None => match self.codelist_type { + CodeListType::ICD10 => IcdValidator(self).validate_all_code(), + CodeListType::SNOMED => SnomedValidator(self).validate_all_code(), + CodeListType::OPCS => OpcsValidator(self).validate_all_code(), + CodeListType::CTV3 => Ctv3Validator(self).validate_all_code(), + }, + } + } +} + +/// Validate all codes in the codelist using a custom regex +/// +/// # Arguments +/// * `codelist` - The codelist to validate +/// * `regex` - The regex to use to validate the codes +/// +/// # Returns +/// * `Result<(), CodeListValidatorError>` - Ok(()) if all codes match the custom regex pattern, Err(CodeListValidatorError) otherwise +fn custom_validate_all_code(codelist: &CodeList, re: &Regex) -> Result<(), CodeListValidatorError> { + let mut reasons = Vec::new(); + for (code, _) in codelist.entries.iter() { + if !re.is_match(code) { + reasons.push( + CodeListValidatorError::invalid_code_contents( + code, + "Code does not match the custom regex pattern", + codelist.codelist_type.to_string(), + ) + .to_string(), + ); } } + + if reasons.is_empty() { + Ok(()) + } else { + Err(CodeListValidatorError::invalid_codelist(reasons)) + } +} + +#[cfg(test)] +mod tests { + use codelist_rs::{ + codelist::CodeList, codelist_options::CodeListOptions, errors::CodeListError, + metadata::Metadata, types::CodeListType, + }; + + use super::*; + use crate::validator::Validator; + use regex::Regex; + use std::sync::LazyLock; + + static TEST_REGEX: LazyLock = + LazyLock::new(|| Regex::new(r"^B\d{2}$").expect("Failed to compile test regex")); + + // Helper function to create a test codelist with two entries, default options + // and test metadata + fn create_test_codelist() -> CodeList { + let options = CodeListOptions { + allow_duplicates: true, + code_column_name: "test_code".to_string(), + term_column_name: "test_term".to_string(), + code_field_name: "test_code".to_string(), + term_field_name: "test_term".to_string(), + }; + + CodeList::new( + "test_codelist".to_string(), + CodeListType::ICD10, + Metadata::default(), + Some(options), + ) + } + + #[test] + fn test_validate_code_with_valid_code() -> Result<(), CodeListError> { + let mut codelist = create_test_codelist(); + codelist.add_entry("B11".to_string(), None, None)?; + assert!(codelist.validate_codes(Some(&TEST_REGEX)).is_ok()); + Ok(()) + } + + #[test] + fn test_validate_code_with_invalid_code_length_too_long() -> Result<(), CodeListError> { + let mut codelist = create_test_codelist(); + codelist.add_entry("B1123".to_string(), None, None)?; + let error = codelist.validate_codes(Some(&TEST_REGEX)).unwrap_err().to_string(); + assert!(error.contains("Some codes in the list are invalid. Details:")); + assert!(error.contains("Code B1123 contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + Ok(()) + } + + #[test] + fn test_validate_invalid_code_invalid_contents() -> Result<(), CodeListError> { + let mut codelist = create_test_codelist(); + codelist.add_entry("!!!".to_string(), None, None)?; + let error = codelist.validate_codes(Some(&TEST_REGEX)).unwrap_err().to_string(); + assert!(error.contains("Some codes in the list are invalid. Details:")); + assert!(error.contains("Code !!! contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + Ok(()) + } + + #[test] + fn test_validate_codelist_with_valid_codes() -> Result<(), CodeListError> { + let mut codelist = create_test_codelist(); + codelist.add_entry("B01".to_string(), None, None)?; + codelist.add_entry("B02".to_string(), None, None)?; + codelist.add_entry("B03".to_string(), None, None)?; + codelist.add_entry("B04".to_string(), None, None)?; + codelist.add_entry("B05".to_string(), None, None)?; + codelist.add_entry("B06".to_string(), None, None)?; + codelist.add_entry("B07".to_string(), None, None)?; + codelist.add_entry("B08".to_string(), None, None)?; + assert!(codelist.validate_codes(Some(&TEST_REGEX)).is_ok()); + Ok(()) + } + + #[test] + fn test_validate_codelist_with_all_invalid_codes() -> Result<(), CodeListError> { + let mut codelist = create_test_codelist(); + codelist.add_entry("B011!".to_string(), None, None)?; + codelist.add_entry("B0A".to_string(), None, None)?; + codelist.add_entry("A03".to_string(), None, None)?; + codelist.add_entry("BK4".to_string(), None, None)?; + codelist.add_entry("B".to_string(), None, None)?; + codelist.add_entry("BA907".to_string(), None, None)?; + codelist.add_entry("B07x".to_string(), None, None)?; + codelist.add_entry("b08".to_string(), None, None)?; + let error = codelist.validate_codes(Some(&TEST_REGEX)).unwrap_err(); + let error_string = error.to_string(); + + assert!(error_string.contains("Some codes in the list are invalid. Details:")); + assert!(error_string.contains("Code B011! contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code B0A contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code A03 contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code BK4 contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code B contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code BA907 contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code B07x contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code b08 contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + + assert!( + matches!(error, CodeListValidatorError::InvalidCodelist { reasons } if reasons.len() == 8) + ); + Ok(()) + } + + #[test] + fn test_validate_codelist_with_mixed_invalid_and_valid_codes() -> Result<(), CodeListError> { + let mut codelist = create_test_codelist(); + codelist.add_entry("B01".to_string(), None, None)?; + codelist.add_entry("B02".to_string(), None, None)?; + codelist.add_entry("B03".to_string(), None, None)?; + codelist.add_entry("B04".to_string(), None, None)?; + codelist.add_entry("B".to_string(), None, None)?; + codelist.add_entry("BA907".to_string(), None, None)?; + codelist.add_entry("B07x".to_string(), None, None)?; + codelist.add_entry("b08".to_string(), None, None)?; + let error = codelist.validate_codes(Some(&TEST_REGEX)).unwrap_err(); + let error_string = error.to_string(); + + assert!(error_string.contains("Some codes in the list are invalid. Details:")); + assert!(error_string.contains("Code B contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code BA907 contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code B07x contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code b08 contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + + assert!( + matches!(error, CodeListValidatorError::InvalidCodelist { reasons } if reasons.len() == 4) + ); + Ok(()) + } }