Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bindings/python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ chrono = { version = "0.4.39", features = ["serde"] }
codelist-rs = { path = "../../rust/codelist-rs" }
codelist-validator-rs = { path = "../../rust/codelist-validator-rs" }
indexmap = "2.9.0"
regex = "1.11.1"
18 changes: 16 additions & 2 deletions bindings/python/src/codelist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use pyo3::{
types::{PyDict, PySet},
PyErr, PyResult,
};
use regex::Regex;

/// Python wrapper for the CodeList struct
///
Expand Down Expand Up @@ -452,8 +453,21 @@ impl PyCodeList {
}

/// Validate the codelist based on the codelist type
fn validate_codes(&self) -> PyResult<()> {
self.inner.validate_codes().map_err(|e| PyValueError::new_err(e.to_string()))
#[pyo3(signature = (custom_regex=None))]
fn validate_codes(&self, custom_regex: Option<String>) -> PyResult<()> {
match custom_regex {
Some(regex_str) => {
let regex = Regex::new(&regex_str)
.map_err(|e| PyValueError::new_err(format!("Invalid regex: {}", e)))?;
self.inner
.validate_codes(Some(&regex))
.map_err(|e| PyValueError::new_err(e.to_string()))?
}
None => {
self.inner.validate_codes(None).map_err(|e| PyValueError::new_err(e.to_string()))?
}
}
Ok(())
}

/// Add a comment to the codelist
Expand Down
13 changes: 12 additions & 1 deletion bindings/python/tests/test_codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,18 @@ def test_x_code_not_added_snomed(self):
codelist.add_x_codes()
self.assertEqual(str(e.exception), "SNOMED cannot be transformed by having X added to the end of it")


def test_validate_codes_with_custom_regex(self):
codelist = CodeList(
name="Test Codelist",
codelist_type="ICD10",
source="Manually created",
)
codelist.add_entry("A11", "Valid test code")
codelist.validate_codes("^A")
codelist.add_entry("B112", "Invalid test code")
Comment on lines +305 to +307
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Awesome, thanks

with self.assertRaises(ValueError) as e:
codelist.validate_codes("^A")
self.assertIn("Code B112 contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern", str(e.exception))



Expand Down
8 changes: 4 additions & 4 deletions rust/codelist-validator-rs/src/ctv3_validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ mod tests {
fn test_validate_codelist_with_valid_code() -> Result<(), CodeListError> {
let mut codelist = create_test_codelist()?;
let _ = codelist.add_entry("A9f..".to_string(), None, None);
assert!(codelist.validate_codes().is_ok());
assert!(codelist.validate_codes(None).is_ok());
Ok(())
}

Expand Down Expand Up @@ -154,7 +154,7 @@ mod tests {
codelist.add_entry("Me...".to_string(), None, None)?;
codelist.add_entry("99999".to_string(), None, None)?;
codelist.add_entry(".....".to_string(), None, None)?;
assert!(codelist.validate_codes().is_ok());
assert!(codelist.validate_codes(None).is_ok());
Ok(())
}

Expand All @@ -169,7 +169,7 @@ mod tests {
codelist.add_entry("*unf.".to_string(), None, None)?;
codelist.add_entry("..j..".to_string(), None, None)?;
codelist.add_entry("9874ji".to_string(), None, None)?;
let error = codelist.validate_codes().unwrap_err();
let error = codelist.validate_codes(None).unwrap_err();
let error_string = error.to_string();

assert!(error_string.contains("Some codes in the list are invalid. Details:"));
Expand Down Expand Up @@ -199,7 +199,7 @@ mod tests {
codelist.add_entry("A00.l".to_string(), None, None)?;
codelist.add_entry("Q90.....".to_string(), None, None)?;
codelist.add_entry("A..9k".to_string(), None, None)?;
let error = codelist.validate_codes().unwrap_err();
let error = codelist.validate_codes(None).unwrap_err();
let error_string = error.to_string();

assert!(error_string.contains("Some codes in the list are invalid. Details:"));
Expand Down
8 changes: 4 additions & 4 deletions rust/codelist-validator-rs/src/icd10_validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ mod tests {
fn test_validate_code_with_valid_code() -> Result<(), CodeListError> {
let mut codelist = create_test_codelist()?;
let _ = codelist.add_entry("A100".to_string(), Some("test".to_string()), None);
assert!(codelist.validate_codes().is_ok());
assert!(codelist.validate_codes(None).is_ok());
Ok(())
}

Expand Down Expand Up @@ -197,7 +197,7 @@ mod tests {
codelist.add_entry("M10".to_string(), Some("Gout".to_string()), None)?;
codelist.add_entry("Q90".to_string(), Some("Down Syndrome".to_string()), None)?;
codelist.add_entry("K02".to_string(), Some("Dental caries".to_string()), None)?;
assert!(codelist.validate_codes().is_ok());
assert!(codelist.validate_codes(None).is_ok());
Ok(())
}

Expand All @@ -216,7 +216,7 @@ mod tests {
codelist.add_entry("A00.A".to_string(), Some("Gout".to_string()), None)?;
codelist.add_entry("A00X12".to_string(), Some("Down Syndrome".to_string()), None)?;
codelist.add_entry("A00.4AA".to_string(), Some("Dental caries".to_string()), None)?;
let error = codelist.validate_codes().unwrap_err();
let error = codelist.validate_codes(None).unwrap_err();
let error_string = error.to_string();

assert!(error_string.contains("Some codes in the list are invalid. Details:"));
Expand Down Expand Up @@ -250,7 +250,7 @@ mod tests {
codelist.add_entry("A00.A".to_string(), Some("Gout".to_string()), None)?;
codelist.add_entry("Q90".to_string(), Some("Down Syndrome".to_string()), None)?;
codelist.add_entry("A00.4AA".to_string(), Some("Dental caries".to_string()), None)?;
let error = codelist.validate_codes().unwrap_err();
let error = codelist.validate_codes(None).unwrap_err();
let error_string = error.to_string();

assert!(error_string.contains("Some codes in the list are invalid. Details:"));
Expand Down
8 changes: 4 additions & 4 deletions rust/codelist-validator-rs/src/opcs_validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ mod tests {
#[test]
fn test_validate_code_with_valid_code() -> Result<(), CodeListError> {
let codelist = create_test_codelist()?;
assert!(codelist.validate_codes().is_ok());
assert!(codelist.validate_codes(None).is_ok());
Ok(())
}

Expand Down Expand Up @@ -228,7 +228,7 @@ mod tests {
Some("Insertion Artery Coeliac Stent Transluminal Percutaneous".to_string()),
None,
)?;
assert!(codelist.validate_codes().is_ok());
assert!(codelist.validate_codes(None).is_ok());
Ok(())
}

Expand Down Expand Up @@ -271,7 +271,7 @@ mod tests {
Some("Insertion Artery Coeliac Stent Transluminal Percutaneous".to_string()),
None,
)?;
let error = codelist.validate_codes().unwrap_err();
let error = codelist.validate_codes(None).unwrap_err();
let error_string = error.to_string();

assert!(error_string.contains("Some codes in the list are invalid. Details:"));
Expand Down Expand Up @@ -329,7 +329,7 @@ mod tests {
Some("Insertion Artery Coeliac Stent Transluminal Percutaneous".to_string()),
None,
)?;
let error = codelist.validate_codes().unwrap_err();
let error = codelist.validate_codes(None).unwrap_err();
let error_string = error.to_string();

assert!(error_string.contains("Some codes in the list are invalid. Details:"));
Expand Down
8 changes: 4 additions & 4 deletions rust/codelist-validator-rs/src/snomed_validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ mod tests {
#[test]
fn test_validate_code_with_valid_code_default_max_min_lengths() -> Result<(), CodeListError> {
let codelist = create_test_codelist()?;
assert!(codelist.validate_codes().is_ok());
assert!(codelist.validate_codes(None).is_ok());
Ok(())
}
#[test]
Expand Down Expand Up @@ -179,7 +179,7 @@ mod tests {
None,
)?;

assert!(codelist.validate_codes().is_ok());
assert!(codelist.validate_codes(None).is_ok());

Ok(())
}
Expand Down Expand Up @@ -223,7 +223,7 @@ mod tests {
None,
)?;

let error = codelist.validate_codes().unwrap_err();
let error = codelist.validate_codes(None).unwrap_err();
let error_string = error.to_string();

assert!(error_string.contains("Some codes in the list are invalid. Details:"));
Expand Down Expand Up @@ -283,7 +283,7 @@ mod tests {
None,
)?;

let error = codelist.validate_codes().unwrap_err();
let error = codelist.validate_codes(None).unwrap_err();
let error_string = error.to_string();

assert!(error_string.contains("Some codes in the list are invalid. Details:"));
Expand Down
183 changes: 176 additions & 7 deletions rust/codelist-validator-rs/src/validator.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
//! Generic trait for validating a codelist
use regex::Regex;

use codelist_rs::{codelist::CodeList, types::CodeListType};

use crate::{
Expand All @@ -17,16 +19,183 @@ pub(crate) trait CodeValidator {

/// Validator trait
pub trait Validator {
fn validate_codes(&self) -> Result<(), CodeListValidatorError>;
fn validate_codes(&self, custom_regex: Option<&Regex>) -> Result<(), CodeListValidatorError>;
}

impl Validator for CodeList {
fn validate_codes(&self) -> Result<(), CodeListValidatorError> {
match self.codelist_type {
CodeListType::ICD10 => IcdValidator(self).validate_all_code(),
CodeListType::SNOMED => SnomedValidator(self).validate_all_code(),
CodeListType::OPCS => OpcsValidator(self).validate_all_code(),
CodeListType::CTV3 => Ctv3Validator(self).validate_all_code(),
fn validate_codes(&self, custom_regex: Option<&Regex>) -> Result<(), CodeListValidatorError> {
match custom_regex {
Some(regex) => custom_validate_all_code(self, regex),
None => match self.codelist_type {
CodeListType::ICD10 => IcdValidator(self).validate_all_code(),
CodeListType::SNOMED => SnomedValidator(self).validate_all_code(),
CodeListType::OPCS => OpcsValidator(self).validate_all_code(),
CodeListType::CTV3 => Ctv3Validator(self).validate_all_code(),
},
}
}
}

/// Validate all codes in the codelist using a custom regex
///
/// # Arguments
/// * `codelist` - The codelist to validate
/// * `regex` - The regex to use to validate the codes
///
/// # Returns
/// * `Result<(), CodeListValidatorError>` - Ok(()) if all codes match the custom regex pattern, Err(CodeListValidatorError) otherwise
fn custom_validate_all_code(codelist: &CodeList, re: &Regex) -> Result<(), CodeListValidatorError> {
let mut reasons = Vec::new();
for (code, _) in codelist.entries.iter() {
if !re.is_match(code) {
reasons.push(
CodeListValidatorError::invalid_code_contents(
code,
"Code does not match the custom regex pattern",
codelist.codelist_type.to_string(),
)
.to_string(),
);
}
}

if reasons.is_empty() {
Ok(())
} else {
Err(CodeListValidatorError::invalid_codelist(reasons))
}
}

#[cfg(test)]
mod tests {
use codelist_rs::{
codelist::CodeList, codelist_options::CodeListOptions, errors::CodeListError,
metadata::Metadata, types::CodeListType,
};

use super::*;
use crate::validator::Validator;
use regex::Regex;
use std::sync::LazyLock;

static TEST_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^B\d{2}$").expect("Failed to compile test regex"));

// Helper function to create a test codelist with two entries, default options
// and test metadata
fn create_test_codelist() -> CodeList {
let options = CodeListOptions {
allow_duplicates: true,
code_column_name: "test_code".to_string(),
term_column_name: "test_term".to_string(),
code_field_name: "test_code".to_string(),
term_field_name: "test_term".to_string(),
};

CodeList::new(
"test_codelist".to_string(),
CodeListType::ICD10,
Metadata::default(),
Some(options),
)
}

#[test]
fn test_validate_code_with_valid_code() -> Result<(), CodeListError> {
let mut codelist = create_test_codelist();
codelist.add_entry("B11".to_string(), None, None)?;
assert!(codelist.validate_codes(Some(&TEST_REGEX)).is_ok());
Ok(())
}

#[test]
fn test_validate_code_with_invalid_code_length_too_long() -> Result<(), CodeListError> {
let mut codelist = create_test_codelist();
codelist.add_entry("B1123".to_string(), None, None)?;
let error = codelist.validate_codes(Some(&TEST_REGEX)).unwrap_err().to_string();
assert!(error.contains("Some codes in the list are invalid. Details:"));
assert!(error.contains("Code B1123 contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern"));
Ok(())
}

#[test]
fn test_validate_invalid_code_invalid_contents() -> Result<(), CodeListError> {
let mut codelist = create_test_codelist();
codelist.add_entry("!!!".to_string(), None, None)?;
let error = codelist.validate_codes(Some(&TEST_REGEX)).unwrap_err().to_string();
assert!(error.contains("Some codes in the list are invalid. Details:"));
assert!(error.contains("Code !!! contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern"));
Ok(())
}

#[test]
fn test_validate_codelist_with_valid_codes() -> Result<(), CodeListError> {
let mut codelist = create_test_codelist();
codelist.add_entry("B01".to_string(), None, None)?;
codelist.add_entry("B02".to_string(), None, None)?;
codelist.add_entry("B03".to_string(), None, None)?;
codelist.add_entry("B04".to_string(), None, None)?;
codelist.add_entry("B05".to_string(), None, None)?;
codelist.add_entry("B06".to_string(), None, None)?;
codelist.add_entry("B07".to_string(), None, None)?;
codelist.add_entry("B08".to_string(), None, None)?;
assert!(codelist.validate_codes(Some(&TEST_REGEX)).is_ok());
Ok(())
}

#[test]
fn test_validate_codelist_with_all_invalid_codes() -> Result<(), CodeListError> {
let mut codelist = create_test_codelist();
codelist.add_entry("B011!".to_string(), None, None)?;
codelist.add_entry("B0A".to_string(), None, None)?;
codelist.add_entry("A03".to_string(), None, None)?;
codelist.add_entry("BK4".to_string(), None, None)?;
codelist.add_entry("B".to_string(), None, None)?;
codelist.add_entry("BA907".to_string(), None, None)?;
codelist.add_entry("B07x".to_string(), None, None)?;
codelist.add_entry("b08".to_string(), None, None)?;
let error = codelist.validate_codes(Some(&TEST_REGEX)).unwrap_err();
let error_string = error.to_string();

assert!(error_string.contains("Some codes in the list are invalid. Details:"));
assert!(error_string.contains("Code B011! contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern"));
assert!(error_string.contains("Code B0A contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern"));
assert!(error_string.contains("Code A03 contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern"));
assert!(error_string.contains("Code BK4 contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern"));
assert!(error_string.contains("Code B contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern"));
assert!(error_string.contains("Code BA907 contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern"));
assert!(error_string.contains("Code B07x contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern"));
assert!(error_string.contains("Code b08 contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern"));

assert!(
matches!(error, CodeListValidatorError::InvalidCodelist { reasons } if reasons.len() == 8)
);
Ok(())
}

#[test]
fn test_validate_codelist_with_mixed_invalid_and_valid_codes() -> Result<(), CodeListError> {
let mut codelist = create_test_codelist();
codelist.add_entry("B01".to_string(), None, None)?;
codelist.add_entry("B02".to_string(), None, None)?;
codelist.add_entry("B03".to_string(), None, None)?;
codelist.add_entry("B04".to_string(), None, None)?;
codelist.add_entry("B".to_string(), None, None)?;
codelist.add_entry("BA907".to_string(), None, None)?;
codelist.add_entry("B07x".to_string(), None, None)?;
codelist.add_entry("b08".to_string(), None, None)?;
let error = codelist.validate_codes(Some(&TEST_REGEX)).unwrap_err();
let error_string = error.to_string();

assert!(error_string.contains("Some codes in the list are invalid. Details:"));
assert!(error_string.contains("Code B contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern"));
assert!(error_string.contains("Code BA907 contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern"));
assert!(error_string.contains("Code B07x contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern"));
assert!(error_string.contains("Code b08 contents is invalid for type ICD10. Reason: Code does not match the custom regex pattern"));

assert!(
matches!(error, CodeListValidatorError::InvalidCodelist { reasons } if reasons.len() == 4)
);
Ok(())
}
}
Loading