From 15ad24fd74349e3b75864021f49082f4af7f503b Mon Sep 17 00:00:00 2001 From: Emma <156218556+em-baggie@users.noreply.github.com> Date: Sat, 14 Jun 2025 20:47:32 +0100 Subject: [PATCH 1/5] creating custom validator --- rust/codelist-rs/Cargo.toml | 1 + rust/codelist-rs/src/codelist_options.rs | 5 +- .../src/custom_validator.rs | 78 +++++++++++++++++++ rust/codelist-validator-rs/src/errors.rs | 3 + rust/codelist-validator-rs/src/lib.rs | 1 + 5 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 rust/codelist-validator-rs/src/custom_validator.rs diff --git a/rust/codelist-rs/Cargo.toml b/rust/codelist-rs/Cargo.toml index 25b3b51..facc7f1 100644 --- a/rust/codelist-rs/Cargo.toml +++ b/rust/codelist-rs/Cargo.toml @@ -14,3 +14,4 @@ thiserror = "2.0.9" thiserror-ext = "0.3.0" chrono = { version = "0.4", features = ["serde"] } indexmap = { version = "2.9.0", features = ["serde"] } +regex = = "1.11.1" diff --git a/rust/codelist-rs/src/codelist_options.rs b/rust/codelist-rs/src/codelist_options.rs index 9e9a9f7..0332205 100644 --- a/rust/codelist-rs/src/codelist_options.rs +++ b/rust/codelist-rs/src/codelist_options.rs @@ -1,6 +1,7 @@ //! This file contains the codelist options for the codelist use serde::{Deserialize, Serialize}; +use regex::Regex; /// Struct to represent a codelist options /// @@ -14,7 +15,8 @@ pub struct CodeListOptions { pub code_column_name: String, // for csv files pub term_column_name: String, // for csv files pub code_field_name: String, // for json files - pub term_field_name: String, // for json files + pub term_field_name: String, + pub custom_regex: Option, // for custom validation } impl Default for CodeListOptions { @@ -29,6 +31,7 @@ impl Default for CodeListOptions { term_column_name: "term".to_string(), code_field_name: "code".to_string(), term_field_name: "term".to_string(), + custom_regex: None, } } } diff --git a/rust/codelist-validator-rs/src/custom_validator.rs b/rust/codelist-validator-rs/src/custom_validator.rs new file mode 100644 index 0000000..f137947 --- /dev/null +++ b/rust/codelist-validator-rs/src/custom_validator.rs @@ -0,0 +1,78 @@ +//! Trait for custom validation of a codelist + +use codelist_rs::{codelist::CodeList, types::CodeListType}; + +use crate::{ + errors::CodeListValidatorError, icd10_validator::IcdValidator, opcs_validator::OpcsValidator, + snomed_validator::SnomedValidator, +}; + +pub(crate) trait CustomCodeValidator { + fn custom_validate_code(&self, code: &str) -> Result<(), CodeListValidatorError>; // for 1 code + fn custom_validate_all_code(&self) -> Result<(), CodeListValidatorError>; +} + +impl CustomCodeValidator for CodeList { + fn custom_validate_code(&self, code: &str) -> Result<(), CodeListValidatorError> { + Ok(()) + //TODO + } + + fn custom_validate_all_code(&self) -> Result<(), CodeListValidatorError> { + Ok(()) + //TODO + } +} + +/// Custom validator trait +pub trait CustomValidator { + fn custom_validate_codes(&self) -> Result<(), CodeListValidatorError>; +} + +impl CustomValidator for CodeList { + fn custom_validate_codes(&self) -> Result<(), CodeListValidatorError> { + match self.codelist_options.custom_regex { + Some(_) => { + self.custom_validate_all_code() + } + None => { + Err(CodeListValidatorError::CustomValidationFailed(format!("No regex provided for custom validation"))) + } + } + } +} + + + +// fn custom_validate_codes(&self) -> Result<(), CodeListValidatorError> { + // let mut reasons = Vec::new(); + + // for (code, _) in self.entries.iter() { + // if let Err(err) = self.custom_validate_code(code) { + // reasons.push(err.to_string()); + // } + // } + + // if reasons.is_empty() { + // Ok(()) + // } else { + // Err(CodeListValidatorError::invalid_codelist(reasons)) + // } // for (code, _) in self.entries.iter() { + // // if re.is_match(code) { + // // return Ok(()); + // // } + // // } + // // Err(CodeListValidatorError::CustomValidationFailed(format!("No codes matched the regex: {}", re.to_string()))) + // } + + // fn custom_validate_code(&self, code: &str, re: Regex) -> Result<(), CodeListValidatorError> { + // if !re.is_match(code) { + // return Err(CodeListValidatorError::invalid_code_contents( + // code, + // "Code does not match the inputted regex", + // self.codelist_type.to_string(), + // )); + // } + + // Ok(()) + // } \ No newline at end of file diff --git a/rust/codelist-validator-rs/src/errors.rs b/rust/codelist-validator-rs/src/errors.rs index 23b9861..a6e072d 100644 --- a/rust/codelist-validator-rs/src/errors.rs +++ b/rust/codelist-validator-rs/src/errors.rs @@ -27,4 +27,7 @@ pub enum CodeListValidatorError { #[error("CodeType {code_type} is not supported")] UnsupportedCodeType { code_type: String }, + + #[error("Custom validation failed. Reason: {reason}")] + CustomValidationFailed { reason: String }, } diff --git a/rust/codelist-validator-rs/src/lib.rs b/rust/codelist-validator-rs/src/lib.rs index 24f7bc0..97a5f7a 100644 --- a/rust/codelist-validator-rs/src/lib.rs +++ b/rust/codelist-validator-rs/src/lib.rs @@ -5,3 +5,4 @@ pub mod icd10_validator; pub mod opcs_validator; pub mod snomed_validator; pub mod validator; +pub mod custom_validator; From 4a7452c1a4ab3b1e348b04d8d6008aa3449542e4 Mon Sep 17 00:00:00 2001 From: Emma <156218556+em-baggie@users.noreply.github.com> Date: Sun, 15 Jun 2025 17:44:36 +0100 Subject: [PATCH 2/5] added custom_validator, fixed other files and added tests --- bindings/python/src/codelist.rs | 2 +- rust/codelist-rs/Cargo.toml | 2 +- rust/codelist-rs/examples/sandbox.rs | 2 +- rust/codelist-rs/src/codelist.rs | 78 ++++-- rust/codelist-rs/src/codelist_factory.rs | 12 +- rust/codelist-rs/src/codelist_options.rs | 4 +- rust/codelist-rs/src/errors.rs | 5 + .../src/custom_validator.rs | 227 +++++++++++++----- rust/codelist-validator-rs/src/errors.rs | 6 +- .../src/icd10_validator.rs | 2 +- .../src/opcs_validator.rs | 2 +- .../src/snomed_validator.rs | 2 +- rust/codelist-validator-rs/src/validator.rs | 19 +- 13 files changed, 256 insertions(+), 107 deletions(-) diff --git a/bindings/python/src/codelist.rs b/bindings/python/src/codelist.rs index b99441b..a1f3b9d 100755 --- a/bindings/python/src/codelist.rs +++ b/bindings/python/src/codelist.rs @@ -80,7 +80,7 @@ impl PyCodeList { let codelist_options = CodeListOptions::default(); // Create codelist - let codelist = CodeList::new(name, codelist_type, metadata, Some(codelist_options)); + let codelist = CodeList::new(name, codelist_type, metadata, Some(codelist_options))?; Ok(PyCodeList { inner: codelist }) } diff --git a/rust/codelist-rs/Cargo.toml b/rust/codelist-rs/Cargo.toml index facc7f1..dbe631c 100644 --- a/rust/codelist-rs/Cargo.toml +++ b/rust/codelist-rs/Cargo.toml @@ -14,4 +14,4 @@ thiserror = "2.0.9" thiserror-ext = "0.3.0" chrono = { version = "0.4", features = ["serde"] } indexmap = { version = "2.9.0", features = ["serde"] } -regex = = "1.11.1" +regex = "1.11.1" diff --git a/rust/codelist-rs/examples/sandbox.rs b/rust/codelist-rs/examples/sandbox.rs index 869769d..7d3f7cd 100644 --- a/rust/codelist-rs/examples/sandbox.rs +++ b/rust/codelist-rs/examples/sandbox.rs @@ -6,7 +6,7 @@ use codelist_rs::{ fn main() -> Result<(), CodeListError> { // Create a new codelist let mut codelist = - CodeList::new("test_codelist".to_string(), CodeListType::ICD10, Metadata::default(), None); + CodeList::new("test_codelist".to_string(), CodeListType::ICD10, Metadata::default(), None)?; codelist.add_entry("A00".to_string(), Some("Cholera".to_string()), None)?; codelist.add_entry( diff --git a/rust/codelist-rs/src/codelist.rs b/rust/codelist-rs/src/codelist.rs index ab32311..5423c9b 100644 --- a/rust/codelist-rs/src/codelist.rs +++ b/rust/codelist-rs/src/codelist.rs @@ -8,6 +8,7 @@ use std::{ }; use csv::Writer; +use regex::Regex; use serde::{Deserialize, Serialize}; // Internal imports @@ -52,15 +53,22 @@ impl CodeList { codelist_type: CodeListType, metadata: Metadata, options: Option, - ) -> Self { - CodeList { + ) -> Result { + let options = options.unwrap_or_default(); + + // Validate custom regex if it has been set + if let Some(regex_str) = &options.custom_regex { + Regex::new(regex_str)?; + } + + Ok(CodeList { name, entries: BTreeMap::new(), codelist_type, metadata, logs: Vec::new(), - codelist_options: options.unwrap_or_default(), - } + codelist_options: options, + }) } /// Get the type of the codelist @@ -520,7 +528,7 @@ mod tests { CodeListType::ICD10, Metadata::default(), None, - ); + )?; codelist.add_entry("R65.2".to_string(), None, None)?; codelist.add_entry( @@ -581,6 +589,7 @@ mod tests { term_column_name: "test_term".to_string(), code_field_name: "test_code".to_string(), term_field_name: "test_term".to_string(), + custom_regex: None, }; let codelist = CodeList::new( @@ -588,13 +597,14 @@ mod tests { CodeListType::ICD10, Default::default(), Some(codelist_options), - ); + )?; assert!(codelist.codelist_options.allow_duplicates); assert_eq!(codelist.codelist_options.code_field_name, "test_code".to_string()); assert_eq!(codelist.codelist_options.term_field_name, "test_term".to_string()); assert_eq!(codelist.codelist_options.code_column_name, "test_code".to_string()); assert_eq!(codelist.codelist_options.term_column_name, "test_term".to_string()); + assert_eq!(codelist.codelist_options.custom_regex, None); assert_eq!(codelist.metadata().provenance.source, Source::ManuallyCreated); let time_difference = get_time_difference(codelist.metadata().provenance.created_date); @@ -632,7 +642,7 @@ mod tests { CodeListType::ICD10, Default::default(), None, - ); + )?; codelist.add_entry("R65.2".to_string(), Some("Severe sepsis".to_string()), None)?; codelist.add_entry("R65.2".to_string(), Some("Severe sepsis".to_string()), None)?; @@ -807,12 +817,14 @@ mod tests { } #[test] - fn test_get_metadata() { + fn test_get_metadata() -> Result<(), CodeListError> { let metadata: Metadata = Default::default(); let codelist = - CodeList::new("test".to_string(), CodeListType::ICD10, metadata.clone(), None); + CodeList::new("test".to_string(), CodeListType::ICD10, metadata.clone(), None)?; assert_eq!(codelist.metadata(), &metadata); + + Ok(()) } #[test] @@ -989,7 +1001,7 @@ mod tests { CodeListType::SNOMED, Default::default(), None, - ); + )?; // A SNOMED list is not truncatable assert!(snomed_codelist.truncate_to_3_digits(TermManagement::First).is_err()); @@ -1001,8 +1013,12 @@ mod tests { fn test_truncate_to_3_digits_icd10_4_digits_drop_term() -> Result<(), CodeListError> { let metadata: Metadata = Default::default(); - let mut expected_codelist = - CodeList::new("test_codelist".to_string(), CodeListType::ICD10, metadata.clone(), None); + let mut expected_codelist = CodeList::new( + "test_codelist".to_string(), + CodeListType::ICD10, + metadata.clone(), + None, + )?; expected_codelist.add_entry( "B01".to_string(), None, @@ -1010,7 +1026,7 @@ mod tests { )?; let mut observed_codelist = - CodeList::new("test_codelist".to_string(), CodeListType::ICD10, metadata, None); + CodeList::new("test_codelist".to_string(), CodeListType::ICD10, metadata, None)?; observed_codelist.add_entry( "B012".to_string(), @@ -1029,8 +1045,12 @@ mod tests { fn test_truncate_to_3_digits_3_and_4_digits_drop_term() -> Result<(), CodeListError> { let metadata: Metadata = Default::default(); - let mut expected_codelist = - CodeList::new("test_codelist".to_string(), CodeListType::ICD10, metadata.clone(), None); + let mut expected_codelist = CodeList::new( + "test_codelist".to_string(), + CodeListType::ICD10, + metadata.clone(), + None, + )?; expected_codelist.add_entry( "B01".to_string(), Some("Varicella [chickenpox]".to_string()), @@ -1038,7 +1058,7 @@ mod tests { )?; let mut observed_codelist = - CodeList::new("test_codelist".to_string(), CodeListType::ICD10, metadata, None); + CodeList::new("test_codelist".to_string(), CodeListType::ICD10, metadata, None)?; observed_codelist.add_entry( "B01".to_string(), @@ -1062,8 +1082,12 @@ mod tests { fn test_truncate_to_3_digits_icd10_4_digits_first() -> Result<(), CodeListError> { let metadata: Metadata = Default::default(); - let mut expected_codelist = - CodeList::new("test_codelist".to_string(), CodeListType::ICD10, metadata.clone(), None); + let mut expected_codelist = CodeList::new( + "test_codelist".to_string(), + CodeListType::ICD10, + metadata.clone(), + None, + )?; expected_codelist.add_entry( "B01".to_string(), Some("Varicella pneumonia".to_string()), @@ -1071,7 +1095,7 @@ mod tests { )?; let mut observed_codelist = - CodeList::new("test_codelist".to_string(), CodeListType::ICD10, metadata, None); + CodeList::new("test_codelist".to_string(), CodeListType::ICD10, metadata, None)?; observed_codelist.add_entry( "B012".to_string(), @@ -1090,8 +1114,12 @@ mod tests { fn test_truncate_to_3_digits_3_and_4_digits_first() -> Result<(), CodeListError> { let metadata: Metadata = Default::default(); - let mut expected_codelist = - CodeList::new("test_codelist".to_string(), CodeListType::ICD10, metadata.clone(), None); + let mut expected_codelist = CodeList::new( + "test_codelist".to_string(), + CodeListType::ICD10, + metadata.clone(), + None, + )?; expected_codelist.add_entry( "B01".to_string(), Some("Varicella [chickenpox]".to_string()), @@ -1099,7 +1127,7 @@ mod tests { )?; let mut observed_codelist = - CodeList::new("test_codelist".to_string(), CodeListType::ICD10, metadata, None); + CodeList::new("test_codelist".to_string(), CodeListType::ICD10, metadata, None)?; observed_codelist.add_entry( "B01".to_string(), @@ -1126,7 +1154,7 @@ mod tests { CodeListType::ICD10, Default::default(), None, - ); + )?; expected_codelist.add_entry("A10".to_string(), Some("Cholera".to_string()), None)?; expected_codelist.add_entry( @@ -1161,7 +1189,7 @@ mod tests { CodeListType::ICD10, Default::default(), None, - ); + )?; expected_codelist.add_entry("A10".to_string(), Some("Cholera".to_string()), None)?; expected_codelist.add_entry( @@ -1196,7 +1224,7 @@ mod tests { CodeListType::SNOMED, Default::default(), None, - ); + )?; // A SNOMED list is not x_appendable assert!(snomed_codelist.add_x_codes().is_err()); diff --git a/rust/codelist-rs/src/codelist_factory.rs b/rust/codelist-rs/src/codelist_factory.rs index 270cab9..3a7a3b0 100644 --- a/rust/codelist-rs/src/codelist_factory.rs +++ b/rust/codelist-rs/src/codelist_factory.rs @@ -68,7 +68,7 @@ impl CodeListFactory { self.codelist_type.clone(), self.metadata.clone(), Some(self.codelist_options.clone()), - ); + )?; let code_column: Vec<_> = headers .iter() @@ -179,7 +179,7 @@ impl CodeListFactory { self.codelist_type.clone(), self.metadata.clone(), Some(self.codelist_options.clone()), - ); + )?; let file = std::fs::File::open(file_path)?; let reader = std::io::BufReader::new(file); @@ -446,13 +446,13 @@ mod tests { CodeListType::ICD10, factory.metadata.clone(), Some(factory.codelist_options.clone()), - ); + )?; let codelist2 = CodeList::new( "test_codelist2".to_string(), CodeListType::ICD10, factory.metadata.clone(), Some(factory.codelist_options.clone()), - ); + )?; let codelists = factory.load_codelists(Some(vec![codelist1, codelist2]), None)?; Ok(codelists) } @@ -1016,13 +1016,13 @@ B02,Test Disease 2,Description 2"; CodeListType::ICD10, factory.metadata.clone(), Some(factory.codelist_options.clone()), - ); + )?; let codelist2 = CodeList::new( "test_codelist2".to_string(), CodeListType::ICD10, factory.metadata.clone(), Some(factory.codelist_options.clone()), - ); + )?; let codelists = factory.load_codelists(Some(vec![codelist1, codelist2]), None)?; // load codelists from folder diff --git a/rust/codelist-rs/src/codelist_options.rs b/rust/codelist-rs/src/codelist_options.rs index 0332205..f21b3cc 100644 --- a/rust/codelist-rs/src/codelist_options.rs +++ b/rust/codelist-rs/src/codelist_options.rs @@ -1,7 +1,6 @@ //! This file contains the codelist options for the codelist use serde::{Deserialize, Serialize}; -use regex::Regex; /// Struct to represent a codelist options /// @@ -16,7 +15,7 @@ pub struct CodeListOptions { pub term_column_name: String, // for csv files pub code_field_name: String, // for json files pub term_field_name: String, - pub custom_regex: Option, // for custom validation + pub custom_regex: Option, // for custom validation } impl Default for CodeListOptions { @@ -48,5 +47,6 @@ mod tests { assert_eq!(options.term_column_name, "term"); assert_eq!(options.code_field_name, "code"); assert_eq!(options.term_field_name, "term"); + assert_eq!(options.custom_regex, None); } } diff --git a/rust/codelist-rs/src/errors.rs b/rust/codelist-rs/src/errors.rs index 25c4f40..945ba15 100644 --- a/rust/codelist-rs/src/errors.rs +++ b/rust/codelist-rs/src/errors.rs @@ -3,6 +3,7 @@ use std::io; use csv; +use regex; use serde_json; /// Enum to represent the different types of errors that can occur in the @@ -138,4 +139,8 @@ pub enum CodeListError { #[error("{codelist_type} cannot be transformed by having X added to the end of it")] CodeListNotXAddable { codelist_type: String }, + + #[error("Invalid custom regex pattern: {0}")] + #[construct(skip)] + InvalidRegexPattern(#[from] regex::Error), } diff --git a/rust/codelist-validator-rs/src/custom_validator.rs b/rust/codelist-validator-rs/src/custom_validator.rs index f137947..c4695dc 100644 --- a/rust/codelist-validator-rs/src/custom_validator.rs +++ b/rust/codelist-validator-rs/src/custom_validator.rs @@ -1,78 +1,181 @@ -//! Trait for custom validation of a codelist +use regex::Regex; +use codelist_rs::codelist::CodeList; +use crate::errors::CodeListValidatorError; +use crate::validator::CustomCodeValidator; -use codelist_rs::{codelist::CodeList, types::CodeListType}; +impl CustomCodeValidator for CodeList { + fn custom_validate_all_code(&self) -> Result<(), CodeListValidatorError> { + let mut reasons = Vec::new(); + + let re_str = self.codelist_options.custom_regex.as_ref() + .ok_or_else(|| CodeListValidatorError::custom_validation_failed("Custom regex pattern not provided"))?; -use crate::{ - errors::CodeListValidatorError, icd10_validator::IcdValidator, opcs_validator::OpcsValidator, - snomed_validator::SnomedValidator, -}; + // regex is compiled once when this method is called and used for validation of all codes + let re = Regex::new(re_str)?; -pub(crate) trait CustomCodeValidator { - fn custom_validate_code(&self, code: &str) -> Result<(), CodeListValidatorError>; // for 1 code - fn custom_validate_all_code(&self) -> Result<(), CodeListValidatorError>; + for (code, _) in self.entries.iter() { + if !re.is_match(code) { + reasons.push( + CodeListValidatorError::invalid_code_contents( + code, + "Code does not match the custom regex pattern", + self.codelist_type.to_string(), + ) + .to_string(), + ); + } + } + + if reasons.is_empty() { + Ok(()) + } else { + Err(CodeListValidatorError::invalid_codelist(reasons)) + } + } } -impl CustomCodeValidator for CodeList { - fn custom_validate_code(&self, code: &str) -> Result<(), CodeListValidatorError> { +#[cfg(test)] +mod tests { + use codelist_rs::{ + codelist::CodeList, + errors::CodeListError, + metadata::{ + categorisation_and_usage::CategorisationAndUsage, metadata_source::Source, + provenance::Provenance, purpose_and_context::PurposeAndContext, + validation_and_review::ValidationAndReview, Metadata, + }, + types::CodeListType, + codelist_options::CodeListOptions, + }; + + use super::*; + use crate::validator::Validator; + + // Helper function to create test metadata + fn create_test_metadata() -> Metadata { + Metadata::new( + Provenance::new(Source::ManuallyCreated, None), + CategorisationAndUsage::new(None, None, None), + PurposeAndContext::new(None, None, None), + ValidationAndReview::new(None, None, None, None, None), + ) + } + + // Helper function to create a test codelist with two entries, default options + // and test metadata + fn create_test_codelist() -> Result { + let options = CodeListOptions { + allow_duplicates: true, + code_column_name: "test_code".to_string(), + term_column_name: "test_term".to_string(), + code_field_name: "test_code".to_string(), + term_field_name: "test_term".to_string(), + custom_regex: Some("^[A-Z]{3}[!]{1}$".to_string()), + }; + + let codelist = CodeList::new( + "test_codelist".to_string(), + CodeListType::ICD10, + create_test_metadata(), + Some(options), + )?; + Ok(codelist) + } + + #[test] + fn test_validate_code_with_valid_code() -> Result<(), CodeListError> { + let mut codelist = create_test_codelist()?; + codelist.add_entry("ABC!".to_string(), None, None)?; + assert!(codelist.validate_codes().is_ok()); Ok(()) - //TODO } - fn custom_validate_all_code(&self) -> Result<(), CodeListValidatorError> { + #[test] + fn test_validate_code_with_invalid_code_length_too_long() -> Result<(), CodeListError> { + let mut codelist = create_test_codelist()?; + codelist.add_entry("ABC!L".to_string(), None, None)?; + let error = codelist.validate_codes().unwrap_err().to_string(); + assert_eq!(error, "Some codes in the list are invalid. Details: Code ABC!L contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern"); Ok(()) - //TODO } -} -/// Custom validator trait -pub trait CustomValidator { - fn custom_validate_codes(&self) -> Result<(), CodeListValidatorError>; -} + #[test] + fn test_validate_invalid_code_invalid_contents() -> Result<(), CodeListError> { + let mut codelist = create_test_codelist()?; + codelist.add_entry("100!".to_string(), None, None)?; + let error = codelist.validate_codes().unwrap_err().to_string(); + assert_eq!(error, "Some codes in the list are invalid. Details: Code 100! contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern"); + Ok(()) + } -impl CustomValidator for CodeList { - fn custom_validate_codes(&self) -> Result<(), CodeListValidatorError> { - match self.codelist_options.custom_regex { - Some(_) => { - self.custom_validate_all_code() - } - None => { - Err(CodeListValidatorError::CustomValidationFailed(format!("No regex provided for custom validation"))) - } - } + #[test] + fn test_validate_codelist_with_valid_codes() -> Result<(), CodeListError> { + let mut codelist = create_test_codelist()?; + codelist.add_entry("ABC!".to_string(), None, None)?; + codelist.add_entry("CDE!".to_string(), None, None)?; + codelist.add_entry("ZOE!".to_string(), None, None)?; + codelist.add_entry("FQH!".to_string(), None, None)?; + codelist.add_entry("OKL!".to_string(), None, None)?; + codelist.add_entry("MYP!".to_string(), None, None)?; + codelist.add_entry("QNM!".to_string(), None, None)?; + codelist.add_entry("KPL!".to_string(), None, None)?; + assert!(codelist.validate_codes().is_ok()); + Ok(()) + } + + #[test] + fn test_validate_codelist_with_all_invalid_codes() -> Result<(), CodeListError> { + let mut codelist = create_test_codelist()?; + codelist.add_entry("A0P!".to_string(), Some("Gonorrhoea".to_string()), None)?; + codelist.add_entry("AaB!".to_string(), Some("Pertussis".to_string()), None)?; + codelist.add_entry("AAAAAAA!".to_string(), Some("Measles".to_string()), None)?; + codelist.add_entry("AB".to_string(), Some("Lymphatic filariasis".to_string()), None)?; + codelist.add_entry("abcd".to_string(), None, None)?; + codelist.add_entry("abC!".to_string(), Some("Gout".to_string()), None)?; + codelist.add_entry("OPP!!".to_string(), Some("Down Syndrome".to_string()), None)?; + codelist.add_entry("!!PP".to_string(), Some("Dental caries".to_string()), None)?; + let error = codelist.validate_codes().unwrap_err(); + let error_string = error.to_string(); + + assert!(error_string.contains("Some codes in the list are invalid. Details:")); + assert!(error_string.contains("Code A0P! contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code AaB! contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code AAAAAAA! contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code AB contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code abcd contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code abC! contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code OPP!! contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code !!PP contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + + assert!( + matches!(error, CodeListValidatorError::InvalidCodelist { reasons } if reasons.len() == 8) + ); + Ok(()) } -} + #[test] + fn test_validate_codelist_with_mixed_invalid_and_valid_codes() -> Result<(), CodeListError> { + let mut codelist = create_test_codelist()?; + codelist.add_entry("A54!p".to_string(), None, None)?; + codelist.add_entry("1009!".to_string(), None, None)?; + codelist.add_entry("A0p5!".to_string(), None, None)?; + codelist.add_entry("aab!".to_string(), None, None)?; + codelist.add_entry("ABC!".to_string(), None, None)?; + codelist.add_entry("LPK!".to_string(), None, None)?; + codelist.add_entry("FLP!".to_string(), None, None)?; + codelist.add_entry("GVM!".to_string(), None, None)?; + let error = codelist.validate_codes().unwrap_err(); + let error_string = error.to_string(); + assert!(error_string.contains("Some codes in the list are invalid. Details:")); + assert!(error_string.contains("Code A54!p contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code 1009! contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code A0p5! contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); + assert!(error_string.contains("Code aab! contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern")); -// fn custom_validate_codes(&self) -> Result<(), CodeListValidatorError> { - // let mut reasons = Vec::new(); - - // for (code, _) in self.entries.iter() { - // if let Err(err) = self.custom_validate_code(code) { - // reasons.push(err.to_string()); - // } - // } - - // if reasons.is_empty() { - // Ok(()) - // } else { - // Err(CodeListValidatorError::invalid_codelist(reasons)) - // } // for (code, _) in self.entries.iter() { - // // if re.is_match(code) { - // // return Ok(()); - // // } - // // } - // // Err(CodeListValidatorError::CustomValidationFailed(format!("No codes matched the regex: {}", re.to_string()))) - // } - - // fn custom_validate_code(&self, code: &str, re: Regex) -> Result<(), CodeListValidatorError> { - // if !re.is_match(code) { - // return Err(CodeListValidatorError::invalid_code_contents( - // code, - // "Code does not match the inputted regex", - // self.codelist_type.to_string(), - // )); - // } - - // Ok(()) - // } \ No newline at end of file + assert!( + matches!(error, CodeListValidatorError::InvalidCodelist { reasons } if reasons.len() == 4) + ); + Ok(()) + } +} \ No newline at end of file diff --git a/rust/codelist-validator-rs/src/errors.rs b/rust/codelist-validator-rs/src/errors.rs index a6e072d..e99df0f 100644 --- a/rust/codelist-validator-rs/src/errors.rs +++ b/rust/codelist-validator-rs/src/errors.rs @@ -22,7 +22,7 @@ pub enum CodeListValidatorError { #[error("Code {code} contents is invalid for type {codelist_type}. Reason: {reason}")] InvalidCodeContents { code: String, reason: String, codelist_type: String }, - #[error("Some codes in the list are invalid. Details: {reasons:?}")] + #[error("Some codes in the list are invalid. Details: {}", reasons.join(", "))] InvalidCodelist { reasons: Vec }, #[error("CodeType {code_type} is not supported")] @@ -30,4 +30,8 @@ pub enum CodeListValidatorError { #[error("Custom validation failed. Reason: {reason}")] CustomValidationFailed { reason: String }, + + #[error("Invalid custom regex pattern: {0}")] + #[construct(skip)] + InvalidRegexPattern(#[from] regex::Error), } diff --git a/rust/codelist-validator-rs/src/icd10_validator.rs b/rust/codelist-validator-rs/src/icd10_validator.rs index 88666a8..9b3e46f 100644 --- a/rust/codelist-validator-rs/src/icd10_validator.rs +++ b/rust/codelist-validator-rs/src/icd10_validator.rs @@ -83,7 +83,7 @@ mod tests { CodeListType::ICD10, create_test_metadata(), None, - ); + )?; Ok(codelist) } diff --git a/rust/codelist-validator-rs/src/opcs_validator.rs b/rust/codelist-validator-rs/src/opcs_validator.rs index 20f4d90..bf12b07 100644 --- a/rust/codelist-validator-rs/src/opcs_validator.rs +++ b/rust/codelist-validator-rs/src/opcs_validator.rs @@ -91,7 +91,7 @@ mod tests { CodeListType::OPCS, create_test_metadata(), None, - ); + )?; Ok(codelist) } diff --git a/rust/codelist-validator-rs/src/snomed_validator.rs b/rust/codelist-validator-rs/src/snomed_validator.rs index 63f15e4..6a47412 100644 --- a/rust/codelist-validator-rs/src/snomed_validator.rs +++ b/rust/codelist-validator-rs/src/snomed_validator.rs @@ -77,7 +77,7 @@ mod tests { CodeListType::SNOMED, create_test_metadata(), None, - ); + )?; Ok(codelist) } diff --git a/rust/codelist-validator-rs/src/validator.rs b/rust/codelist-validator-rs/src/validator.rs index 646672d..3411adc 100644 --- a/rust/codelist-validator-rs/src/validator.rs +++ b/rust/codelist-validator-rs/src/validator.rs @@ -15,6 +15,12 @@ pub(crate) trait CodeValidator { fn validate_all_code(&self) -> Result<(), CodeListValidatorError>; } +/// Custom validator trait for validating a codelist with a custom regex pattern defined in the CodelistOptions +/// +/// `custom_validate_all_code`: validates all codes in the codelist with the custom regex pattern +pub(crate) trait CustomCodeValidator { + fn custom_validate_all_code(&self) -> Result<(), CodeListValidatorError>; +} /// Validator trait pub trait Validator { fn validate_codes(&self) -> Result<(), CodeListValidatorError>; @@ -22,10 +28,13 @@ pub trait Validator { impl Validator for CodeList { fn validate_codes(&self) -> Result<(), CodeListValidatorError> { - match self.codelist_type { - CodeListType::ICD10 => IcdValidator(self).validate_all_code(), - CodeListType::SNOMED => SnomedValidator(self).validate_all_code(), - CodeListType::OPCS => OpcsValidator(self).validate_all_code(), + match &self.codelist_options.custom_regex { + Some(_) => self.custom_validate_all_code(), + None => match self.codelist_type { + CodeListType::ICD10 => IcdValidator(self).validate_all_code(), + CodeListType::SNOMED => SnomedValidator(self).validate_all_code(), + CodeListType::OPCS => OpcsValidator(self).validate_all_code(), + }, } } -} +} \ No newline at end of file From 831088d48420351d66bcf456d31cdba3daaad9d8 Mon Sep 17 00:00:00 2001 From: Emma <156218556+em-baggie@users.noreply.github.com> Date: Sun, 15 Jun 2025 21:43:27 +0100 Subject: [PATCH 3/5] fix bindings --- bindings/python/src/codelist.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bindings/python/src/codelist.rs b/bindings/python/src/codelist.rs index a1f3b9d..50e4527 100755 --- a/bindings/python/src/codelist.rs +++ b/bindings/python/src/codelist.rs @@ -80,7 +80,9 @@ impl PyCodeList { let codelist_options = CodeListOptions::default(); // Create codelist - let codelist = CodeList::new(name, codelist_type, metadata, Some(codelist_options))?; + let codelist = CodeList::new(name, codelist_type, metadata, Some(codelist_options)) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + Ok(PyCodeList { inner: codelist }) } From 44c94d353de3aca33bcbbf8c4d6b057dd9a8394b Mon Sep 17 00:00:00 2001 From: Emma <156218556+em-baggie@users.noreply.github.com> Date: Sun, 15 Jun 2025 21:58:42 +0100 Subject: [PATCH 4/5] edit rust validation code --- rust/codelist-validator-rs/src/custom_validator.rs | 13 +++++++------ rust/codelist-validator-rs/src/errors.rs | 2 +- rust/codelist-validator-rs/src/lib.rs | 2 +- rust/codelist-validator-rs/src/validator.rs | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/rust/codelist-validator-rs/src/custom_validator.rs b/rust/codelist-validator-rs/src/custom_validator.rs index c4695dc..0cdb3a9 100644 --- a/rust/codelist-validator-rs/src/custom_validator.rs +++ b/rust/codelist-validator-rs/src/custom_validator.rs @@ -1,14 +1,15 @@ -use regex::Regex; -use codelist_rs::codelist::CodeList; use crate::errors::CodeListValidatorError; use crate::validator::CustomCodeValidator; +use codelist_rs::codelist::CodeList; +use regex::Regex; impl CustomCodeValidator for CodeList { fn custom_validate_all_code(&self) -> Result<(), CodeListValidatorError> { let mut reasons = Vec::new(); - let re_str = self.codelist_options.custom_regex.as_ref() - .ok_or_else(|| CodeListValidatorError::custom_validation_failed("Custom regex pattern not provided"))?; + let re_str = self.codelist_options.custom_regex.as_ref().ok_or_else(|| { + CodeListValidatorError::custom_validation_failed("Custom regex pattern not provided") + })?; // regex is compiled once when this method is called and used for validation of all codes let re = Regex::new(re_str)?; @@ -38,6 +39,7 @@ impl CustomCodeValidator for CodeList { mod tests { use codelist_rs::{ codelist::CodeList, + codelist_options::CodeListOptions, errors::CodeListError, metadata::{ categorisation_and_usage::CategorisationAndUsage, metadata_source::Source, @@ -45,7 +47,6 @@ mod tests { validation_and_review::ValidationAndReview, Metadata, }, types::CodeListType, - codelist_options::CodeListOptions, }; use super::*; @@ -178,4 +179,4 @@ mod tests { ); Ok(()) } -} \ No newline at end of file +} diff --git a/rust/codelist-validator-rs/src/errors.rs b/rust/codelist-validator-rs/src/errors.rs index e99df0f..a8650e9 100644 --- a/rust/codelist-validator-rs/src/errors.rs +++ b/rust/codelist-validator-rs/src/errors.rs @@ -31,7 +31,7 @@ pub enum CodeListValidatorError { #[error("Custom validation failed. Reason: {reason}")] CustomValidationFailed { reason: String }, - #[error("Invalid custom regex pattern: {0}")] + #[error("Invalid custom regex pattern: {0}")] #[construct(skip)] InvalidRegexPattern(#[from] regex::Error), } diff --git a/rust/codelist-validator-rs/src/lib.rs b/rust/codelist-validator-rs/src/lib.rs index 97a5f7a..6eb341d 100644 --- a/rust/codelist-validator-rs/src/lib.rs +++ b/rust/codelist-validator-rs/src/lib.rs @@ -1,8 +1,8 @@ extern crate core; +pub mod custom_validator; pub mod errors; pub mod icd10_validator; pub mod opcs_validator; pub mod snomed_validator; pub mod validator; -pub mod custom_validator; diff --git a/rust/codelist-validator-rs/src/validator.rs b/rust/codelist-validator-rs/src/validator.rs index 3411adc..5439602 100644 --- a/rust/codelist-validator-rs/src/validator.rs +++ b/rust/codelist-validator-rs/src/validator.rs @@ -37,4 +37,4 @@ impl Validator for CodeList { }, } } -} \ No newline at end of file +} From 3b2bd5f1202db53bcf761362b76aa235223b576c Mon Sep 17 00:00:00 2001 From: Emma <156218556+em-baggie@users.noreply.github.com> Date: Mon, 16 Jun 2025 20:31:47 +0100 Subject: [PATCH 5/5] fix rust tests and formatting --- rust/codelist-validator-rs/src/ctv3_validator.rs | 3 ++- rust/codelist-validator-rs/src/lib.rs | 2 +- rust/codelist-validator-rs/src/validator.rs | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/rust/codelist-validator-rs/src/ctv3_validator.rs b/rust/codelist-validator-rs/src/ctv3_validator.rs index 2d3d943..dd51c27 100644 --- a/rust/codelist-validator-rs/src/ctv3_validator.rs +++ b/rust/codelist-validator-rs/src/ctv3_validator.rs @@ -80,7 +80,8 @@ mod tests { CodeListType::CTV3, Metadata::default(), None, - ); + )?; + Ok(codelist) } diff --git a/rust/codelist-validator-rs/src/lib.rs b/rust/codelist-validator-rs/src/lib.rs index 93f05b6..d2f596d 100644 --- a/rust/codelist-validator-rs/src/lib.rs +++ b/rust/codelist-validator-rs/src/lib.rs @@ -1,7 +1,7 @@ extern crate core; -pub mod custom_validator; pub mod ctv3_validator; +pub mod custom_validator; pub mod errors; pub mod icd10_validator; pub mod opcs_validator; diff --git a/rust/codelist-validator-rs/src/validator.rs b/rust/codelist-validator-rs/src/validator.rs index 71fecfa..066991e 100644 --- a/rust/codelist-validator-rs/src/validator.rs +++ b/rust/codelist-validator-rs/src/validator.rs @@ -35,7 +35,7 @@ impl Validator for CodeList { CodeListType::SNOMED => SnomedValidator(self).validate_all_code(), CodeListType::OPCS => OpcsValidator(self).validate_all_code(), CodeListType::CTV3 => Ctv3Validator(self).validate_all_code(), - } + }, } } }