From 7716cb4a20371205e2dd3ba20bbd601c7a67cae9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Dec 2025 13:49:05 +0000 Subject: [PATCH 1/7] Initial plan From ec8a621e83029c9b1d42185c31876674ce626072 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Dec 2025 13:53:31 +0000 Subject: [PATCH 2/7] Add status field and HTML cleaning to location models Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- location/choices.py | 9 ++ location/migrations/0004_add_status_field.py | 67 +++++++++++ location/models.py | 109 ++++++++++++++++- location/tests.py | 118 +++++++++++++++++++ 4 files changed, 299 insertions(+), 4 deletions(-) create mode 100644 location/migrations/0004_add_status_field.py diff --git a/location/choices.py b/location/choices.py index 21f53c658..e68470aed 100755 --- a/location/choices.py +++ b/location/choices.py @@ -6,3 +6,12 @@ ("Sudeste", "Sudeste"), ("Sul", "Sul"), ) + +# Processing status for canonical location data +LOCATION_STATUS = ( + ("RAW", "RAW"), # Raw data, no processing + ("CLEANED", "CLEANED"), # Pre-cleaned data + ("MATCHED", "MATCHED"), # Matched to canonical record + ("VERIFIED", "VERIFIED"), # Officially validated + ("REJECTED", "REJECTED"), # Invalid or unresolvable +) diff --git a/location/migrations/0004_add_status_field.py b/location/migrations/0004_add_status_field.py new file mode 100644 index 000000000..41a80cc40 --- /dev/null +++ b/location/migrations/0004_add_status_field.py @@ -0,0 +1,67 @@ +# Generated manually for location data normalization + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('location', '0003_alter_city_options_alter_country_options_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='city', + name='status', + field=models.CharField( + blank=True, + choices=[ + ('RAW', 'RAW'), + ('CLEANED', 'CLEANED'), + ('MATCHED', 'MATCHED'), + ('VERIFIED', 'VERIFIED'), + ('REJECTED', 'REJECTED') + ], + default='RAW', + max_length=10, + null=True, + verbose_name='Status' + ), + ), + migrations.AddField( + model_name='state', + name='status', + field=models.CharField( + blank=True, + choices=[ + ('RAW', 'RAW'), + ('CLEANED', 'CLEANED'), + ('MATCHED', 'MATCHED'), + ('VERIFIED', 'VERIFIED'), + ('REJECTED', 'REJECTED') + ], + default='RAW', + max_length=10, + null=True, + verbose_name='Status' + ), + ), + migrations.AddField( + model_name='country', + name='status', + field=models.CharField( + blank=True, + choices=[ + ('RAW', 'RAW'), + ('CLEANED', 'CLEANED'), + ('MATCHED', 'MATCHED'), + ('VERIFIED', 'VERIFIED'), + ('REJECTED', 'REJECTED') + ], + default='RAW', + max_length=10, + null=True, + verbose_name='Status' + ), + ), + ] diff --git a/location/models.py b/location/models.py index 015e84b2f..946a412f6 100755 --- a/location/models.py +++ b/location/models.py @@ -4,6 +4,7 @@ from django.db import models, IntegrityError from django.db.models import Q +from django.utils.html import strip_tags from django.utils.translation import gettext_lazy as _ from modelcluster.fields import ParentalKey from modelcluster.models import ClusterableModel @@ -15,6 +16,7 @@ from core.forms import CoreAdminModelForm from core.models import CommonControlField, Language, TextWithLang from core.utils.standardizer import standardize_name, standardize_code_and_name, remove_extra_spaces +from .choices import LOCATION_STATUS class City(CommonControlField): @@ -23,12 +25,21 @@ class City(CommonControlField): Fields: name + status: Processing status (RAW, CLEANED, MATCHED, VERIFIED, REJECTED) """ name = models.TextField(_("Name of the city"), unique=True) + status = models.CharField( + _("Status"), + max_length=10, + choices=LOCATION_STATUS, + default="RAW", + blank=True, + null=True, + ) base_form_class = CoreAdminModelForm - panels = [FieldPanel("name")] + panels = [FieldPanel("name"), FieldPanel("status")] autocomplete_search_field = "name" def autocomplete_label(self): @@ -76,19 +87,33 @@ def get(cls, name): return cls.objects.filter(name__iexact=name).first() @classmethod - def create(cls, user=None, name=None): + def create(cls, user=None, name=None, status="RAW"): name = remove_extra_spaces(name) if not name: raise ValueError("City.get_or_create requires name") try: city = City() city.name = name + city.status = status city.creator = user city.save() return city except IntegrityError: return cls.get(name) + @classmethod + def clean_data(cls, name): + """ + Pre-clean city name data: remove HTML, extra spaces, etc. + """ + if not name: + return name + # Remove HTML tags + cleaned = strip_tags(name) + # Remove extra spaces + cleaned = remove_extra_spaces(cleaned) + return cleaned + @staticmethod def standardize(text, user=None): """ @@ -110,13 +135,22 @@ class State(CommonControlField): Fields: name acronym + status: Processing status (RAW, CLEANED, MATCHED, VERIFIED, REJECTED) """ name = models.TextField(_("State name"), null=True, blank=True) acronym = models.CharField(_("State Acronym"), max_length=2, null=True, blank=True) + status = models.CharField( + _("Status"), + max_length=10, + choices=LOCATION_STATUS, + default="RAW", + blank=True, + null=True, + ) base_form_class = CoreAdminModelForm - panels = [FieldPanel("name"), FieldPanel("acronym")] + panels = [FieldPanel("name"), FieldPanel("acronym"), FieldPanel("status")] @staticmethod def autocomplete_custom_queryset_filter(search_term): @@ -181,7 +215,7 @@ def get(cls, name=None, acronym=None): raise ValueError("State.get requires name or acronym") @classmethod - def create(cls, user, name=None, acronym=None): + def create(cls, user, name=None, acronym=None, status="RAW"): name = remove_extra_spaces(name) acronym = remove_extra_spaces(acronym) if name or acronym: @@ -189,6 +223,7 @@ def create(cls, user, name=None, acronym=None): obj = cls() obj.name = name obj.acronym = acronym + obj.status = status obj.creator = user obj.save() return obj @@ -196,6 +231,29 @@ def create(cls, user, name=None, acronym=None): return cls.get(name, acronym) raise ValueError("State.create requires name or acronym") + @classmethod + def clean_data(cls, name=None, acronym=None): + """ + Pre-clean state data: remove HTML, extra spaces, etc. + Returns tuple (cleaned_name, cleaned_acronym) + """ + cleaned_name = name + cleaned_acronym = acronym + + if name: + # Remove HTML tags + cleaned_name = strip_tags(name) + # Remove extra spaces + cleaned_name = remove_extra_spaces(cleaned_name) + + if acronym: + # Remove HTML tags + cleaned_acronym = strip_tags(acronym) + # Remove extra spaces + cleaned_acronym = remove_extra_spaces(cleaned_acronym) + + return cleaned_name, cleaned_acronym + @classmethod def create_or_update(cls, user, name=None, acronym=None): name = remove_extra_spaces(name) @@ -332,6 +390,8 @@ class Country(CommonControlField, ClusterableModel): Fields: name acronym + acron3 + status: Processing status (RAW, CLEANED, MATCHED, VERIFIED, REJECTED) """ name = models.CharField(_("Country Name"), blank=True, null=True, max_length=255) @@ -341,12 +401,21 @@ class Country(CommonControlField, ClusterableModel): acron3 = models.CharField( _("Country Acronym (3 char)"), blank=True, null=True, max_length=3 ) + status = models.CharField( + _("Status"), + max_length=10, + choices=LOCATION_STATUS, + default="RAW", + blank=True, + null=True, + ) base_form_class = CoreAdminModelForm panels = [ FieldPanel("name"), FieldPanel("acronym"), FieldPanel("acron3"), + FieldPanel("status"), InlinePanel("country_name", label=_("Country names")), ] @@ -433,6 +502,7 @@ def create_or_update( acron3=None, country_names=None, lang_code2=None, + status="RAW", ): name = remove_extra_spaces(name) acronym = remove_extra_spaces(acronym) @@ -445,6 +515,7 @@ def create_or_update( except cls.DoesNotExist: obj = cls() obj.creator = user + obj.status = status obj.name = name or obj.name obj.acronym = acronym or obj.acronym @@ -463,6 +534,36 @@ def create_or_update( ) return obj + @classmethod + def clean_data(cls, name=None, acronym=None, acron3=None): + """ + Pre-clean country data: remove HTML, extra spaces, etc. + Returns tuple (cleaned_name, cleaned_acronym, cleaned_acron3) + """ + cleaned_name = name + cleaned_acronym = acronym + cleaned_acron3 = acron3 + + if name: + # Remove HTML tags + cleaned_name = strip_tags(name) + # Remove extra spaces + cleaned_name = remove_extra_spaces(cleaned_name) + + if acronym: + # Remove HTML tags + cleaned_acronym = strip_tags(acronym) + # Remove extra spaces + cleaned_acronym = remove_extra_spaces(cleaned_acronym) + + if acron3: + # Remove HTML tags + cleaned_acron3 = strip_tags(acron3) + # Remove extra spaces + cleaned_acron3 = remove_extra_spaces(cleaned_acron3) + + return cleaned_name, cleaned_acronym, cleaned_acron3 + @staticmethod def standardize(text, user=None): """ diff --git a/location/tests.py b/location/tests.py index df02b59de..4baffc594 100755 --- a/location/tests.py +++ b/location/tests.py @@ -207,3 +207,121 @@ def test_standardize_returns_dict_original(self): self.assertIsInstance(item["country"], dict) self.assertEqual("BR, MX, Chile", item["country"].get("name")) self.assertEqual(None, item["country"].get("code")) + + +class CityStatusTest(TestCase): + def test_city_default_status_is_raw(self): + """Test that City objects have RAW status by default""" + user, created = User.objects.get_or_create(username="adm") + city = models.City.create(user=user, name="Test City") + self.assertEqual(city.status, "RAW") + + def test_city_status_can_be_set(self): + """Test that City status can be set to different values""" + user, created = User.objects.get_or_create(username="adm") + city = models.City.create(user=user, name="Verified City", status="VERIFIED") + self.assertEqual(city.status, "VERIFIED") + + def test_city_clean_data_removes_html(self): + """Test that clean_data removes HTML tags""" + dirty_name = "
City Name
" + cleaned = models.City.clean_data(dirty_name) + self.assertEqual(cleaned, "City Name") + + def test_city_clean_data_removes_extra_spaces(self): + """Test that clean_data removes extra spaces""" + dirty_name = "City Name with spaces" + cleaned = models.City.clean_data(dirty_name) + self.assertEqual(cleaned, "City Name with spaces") + + def test_city_clean_data_handles_none(self): + """Test that clean_data handles None values""" + cleaned = models.City.clean_data(None) + self.assertIsNone(cleaned) + + +class StateStatusTest(TestCase): + def test_state_default_status_is_raw(self): + """Test that State objects have RAW status by default""" + user, created = User.objects.get_or_create(username="adm") + state = models.State.create(user=user, name="Test State", acronym="TS") + self.assertEqual(state.status, "RAW") + + def test_state_status_can_be_set(self): + """Test that State status can be set to different values""" + user, created = User.objects.get_or_create(username="adm") + state = models.State.create(user=user, name="Verified State", acronym="VS", status="VERIFIED") + self.assertEqual(state.status, "VERIFIED") + + def test_state_clean_data_removes_html(self): + """Test that clean_data removes HTML tags from name and acronym""" + dirty_name = "State Name" + dirty_acronym = "ST" + cleaned_name, cleaned_acronym = models.State.clean_data(dirty_name, dirty_acronym) + self.assertEqual(cleaned_name, "State Name") + self.assertEqual(cleaned_acronym, "ST") + + def test_state_clean_data_removes_extra_spaces(self): + """Test that clean_data removes extra spaces""" + dirty_name = "State Name" + dirty_acronym = "S T" + cleaned_name, cleaned_acronym = models.State.clean_data(dirty_name, dirty_acronym) + self.assertEqual(cleaned_name, "State Name") + self.assertEqual(cleaned_acronym, "S T") + + def test_state_clean_data_handles_none(self): + """Test that clean_data handles None values""" + cleaned_name, cleaned_acronym = models.State.clean_data(None, None) + self.assertIsNone(cleaned_name) + self.assertIsNone(cleaned_acronym) + + +class CountryStatusTest(TestCase): + def test_country_default_status_is_raw(self): + """Test that Country objects have RAW status by default""" + user, created = User.objects.get_or_create(username="adm") + country = models.Country.create_or_update( + user=user, name="Test Country", acronym="TC" + ) + self.assertEqual(country.status, "RAW") + + def test_country_status_can_be_set(self): + """Test that Country status can be set to different values""" + user, created = User.objects.get_or_create(username="adm") + country = models.Country.create_or_update( + user=user, name="Verified Country", acronym="VC", status="VERIFIED" + ) + self.assertEqual(country.status, "VERIFIED") + + def test_country_clean_data_removes_html(self): + """Test that clean_data removes HTML tags""" + dirty_name = "Country Name" + dirty_acronym = "CN" + dirty_acron3 = "CNT" + cleaned_name, cleaned_acronym, cleaned_acron3 = models.Country.clean_data( + dirty_name, dirty_acronym, dirty_acron3 + ) + self.assertEqual(cleaned_name, "Country Name") + self.assertEqual(cleaned_acronym, "CN") + self.assertEqual(cleaned_acron3, "CNT") + + def test_country_clean_data_removes_extra_spaces(self): + """Test that clean_data removes extra spaces""" + dirty_name = "Country Name" + dirty_acronym = "C N" + dirty_acron3 = "C N T" + cleaned_name, cleaned_acronym, cleaned_acron3 = models.Country.clean_data( + dirty_name, dirty_acronym, dirty_acron3 + ) + self.assertEqual(cleaned_name, "Country Name") + self.assertEqual(cleaned_acronym, "C N") + self.assertEqual(cleaned_acron3, "C N T") + + def test_country_clean_data_handles_none(self): + """Test that clean_data handles None values""" + cleaned_name, cleaned_acronym, cleaned_acron3 = models.Country.clean_data( + None, None, None + ) + self.assertIsNone(cleaned_name) + self.assertIsNone(cleaned_acronym) + self.assertIsNone(cleaned_acron3) From 0f085a4576c531da7f43bd135a578220d8064368 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Dec 2025 13:54:44 +0000 Subject: [PATCH 3/7] Add documentation for location status field Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- location/STATUS_FIELD.md | 136 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 location/STATUS_FIELD.md diff --git a/location/STATUS_FIELD.md b/location/STATUS_FIELD.md new file mode 100644 index 000000000..0139db504 --- /dev/null +++ b/location/STATUS_FIELD.md @@ -0,0 +1,136 @@ +# Location Data Normalization - Status Field + +## Overview + +The location models (Country, State, City) now include a `status` field to track the processing state of location data. This implements a canonical data architecture that ensures data quality and traceability. + +## Status Values + +The `status` field can have one of the following values: + +| Status | Description | +|--------|-------------| +| **RAW** | Raw data, no processing. Default value for new records. | +| **CLEANED** | Pre-cleaned data. HTML removed, spaces normalized. | +| **MATCHED** | Matched to a canonical record from reference databases. | +| **VERIFIED** | Officially validated against authoritative sources. | +| **REJECTED** | Invalid or unresolvable data that cannot be matched. | + +## Data Cleaning + +Each model now includes a `clean_data()` class method for pre-cleaning operations: + +### City.clean_data(name) +Removes HTML tags and normalizes spaces in city names. + +```python +cleaned_name = City.clean_data("São Paulo
") +# Returns: "São Paulo" +``` + +### State.clean_data(name, acronym) +Removes HTML tags and normalizes spaces in state names and acronyms. + +```python +cleaned_name, cleaned_acronym = State.clean_data("São Paulo", "SP") +# Returns: ("São Paulo", "SP") +``` + +### Country.clean_data(name, acronym, acron3) +Removes HTML tags and normalizes spaces in country names and acronyms. + +```python +cleaned_name, cleaned_acronym, cleaned_acron3 = Country.clean_data( + "Brazil", + "BR", + "BRA" +) +# Returns: ("Brazil", "BR", "BRA") +``` + +## Usage Example + +### Creating records with status + +```python +from django.contrib.auth import get_user_model + +User = get_user_model() +user = User.objects.first() + +# Create a city with RAW status (default) +city = City.create(user=user, name="São Paulo") +# city.status == "RAW" + +# Create a city with VERIFIED status +verified_city = City.create(user=user, name="Rio de Janeiro", status="VERIFIED") +# verified_city.status == "VERIFIED" +``` + +### Cleaning data before creation + +```python +# Dirty data from external source +dirty_name = "São Paulo City
" + +# Clean the data +cleaned_name = City.clean_data(dirty_name) +# cleaned_name == "São Paulo City" + +# Create with CLEANED status +city = City.create(user=user, name=cleaned_name, status="CLEANED") +``` + +## Workflow + +The typical workflow for location data is: + +1. **RAW** → Data is initially created/imported in raw form +2. **CLEANED** → HTML is removed, spaces normalized +3. **MATCHED** → Data is matched to canonical reference (e.g., GeoNames) +4. **VERIFIED** → Data is validated against authoritative source +5. **REJECTED** → Data that cannot be verified or matched + +## Database Migration + +The status field was added via migration `0004_add_status_field.py`: +- Adds nullable `status` field to City, State, and Country models +- Default value is "RAW" +- Max length: 10 characters +- Choices: RAW, CLEANED, MATCHED, VERIFIED, REJECTED + +## Testing + +Comprehensive tests have been added to verify: +- Default status is RAW +- Status can be set to any valid value +- `clean_data()` methods remove HTML +- `clean_data()` methods normalize spaces +- `clean_data()` methods handle None values + +Run tests with: +```bash +python manage.py test location +``` + +## Reference Data Sources + +The canonical location data should be sourced from: + +1. **Countries States Cities Database** + - GitHub: https://github.com/dr5hn/countries-states-cities-database + - Comprehensive database of countries, states, and cities + +2. **GeoNames** + - Website: https://www.geonames.org/ + - Official geographical database + +## Future Enhancements + +Potential improvements for the location normalization system: + +1. Add methods to transition between states +2. Implement automatic matching against reference databases +3. Add validation rules for each status transition +4. Create admin views to bulk-update status +5. Add logging/audit trail for status changes From 4c05efe4bd40e3d9516017d0969abe6d65020619 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Dec 2025 13:56:12 +0000 Subject: [PATCH 4/7] Fix test expectations for acronym space handling Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- location/tests.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/location/tests.py b/location/tests.py index 4baffc594..e4f1f1ec1 100755 --- a/location/tests.py +++ b/location/tests.py @@ -264,10 +264,10 @@ def test_state_clean_data_removes_html(self): def test_state_clean_data_removes_extra_spaces(self): """Test that clean_data removes extra spaces""" dirty_name = "State Name" - dirty_acronym = "S T" + dirty_acronym = "SP " cleaned_name, cleaned_acronym = models.State.clean_data(dirty_name, dirty_acronym) self.assertEqual(cleaned_name, "State Name") - self.assertEqual(cleaned_acronym, "S T") + self.assertEqual(cleaned_acronym, "SP") def test_state_clean_data_handles_none(self): """Test that clean_data handles None values""" @@ -308,14 +308,14 @@ def test_country_clean_data_removes_html(self): def test_country_clean_data_removes_extra_spaces(self): """Test that clean_data removes extra spaces""" dirty_name = "Country Name" - dirty_acronym = "C N" - dirty_acron3 = "C N T" + dirty_acronym = "CN " + dirty_acron3 = "CNT " cleaned_name, cleaned_acronym, cleaned_acron3 = models.Country.clean_data( dirty_name, dirty_acronym, dirty_acron3 ) self.assertEqual(cleaned_name, "Country Name") - self.assertEqual(cleaned_acronym, "C N") - self.assertEqual(cleaned_acron3, "C N T") + self.assertEqual(cleaned_acronym, "CN") + self.assertEqual(cleaned_acron3, "CNT") def test_country_clean_data_handles_none(self): """Test that clean_data handles None values""" From 43c4553ccdde171d2bd1ddd8b0b722eb993206f1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Dec 2025 13:58:11 +0000 Subject: [PATCH 5/7] Fix status parameter handling in create_or_update methods Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- location/models.py | 12 ++++++++---- location/tests.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/location/models.py b/location/models.py index 946a412f6..22aff7e9f 100755 --- a/location/models.py +++ b/location/models.py @@ -255,7 +255,7 @@ def clean_data(cls, name=None, acronym=None): return cleaned_name, cleaned_acronym @classmethod - def create_or_update(cls, user, name=None, acronym=None): + def create_or_update(cls, user, name=None, acronym=None, status=None): name = remove_extra_spaces(name) acronym = remove_extra_spaces(acronym) try: @@ -263,9 +263,11 @@ def create_or_update(cls, user, name=None, acronym=None): obj.updated_by = user obj.name = name or obj.name obj.acronym = acronym or obj.acronym + if status is not None: + obj.status = status obj.save() except cls.DoesNotExist: - obj = cls.create(user, name, acronym) + obj = cls.create(user, name, acronym, status or "RAW") return obj @staticmethod @@ -502,7 +504,7 @@ def create_or_update( acron3=None, country_names=None, lang_code2=None, - status="RAW", + status=None, ): name = remove_extra_spaces(name) acronym = remove_extra_spaces(acronym) @@ -512,10 +514,12 @@ def create_or_update( try: obj = cls.get(name, acronym, acron3) obj.updated_by = user + if status is not None: + obj.status = status except cls.DoesNotExist: obj = cls() obj.creator = user - obj.status = status + obj.status = status or "RAW" obj.name = name or obj.name obj.acronym = acronym or obj.acronym diff --git a/location/tests.py b/location/tests.py index e4f1f1ec1..7f0307420 100755 --- a/location/tests.py +++ b/location/tests.py @@ -253,6 +253,21 @@ def test_state_status_can_be_set(self): state = models.State.create(user=user, name="Verified State", acronym="VS", status="VERIFIED") self.assertEqual(state.status, "VERIFIED") + def test_state_create_or_update_sets_status_on_new(self): + """Test that create_or_update sets status on new objects""" + user, created = User.objects.get_or_create(username="adm") + state = models.State.create_or_update(user=user, name="New State", acronym="NS", status="CLEANED") + self.assertEqual(state.status, "CLEANED") + + def test_state_create_or_update_updates_status_on_existing(self): + """Test that create_or_update updates status on existing objects""" + user, created = User.objects.get_or_create(username="adm") + state = models.State.create_or_update(user=user, name="Existing State", acronym="ES", status="RAW") + self.assertEqual(state.status, "RAW") + # Update the same state with a new status + state = models.State.create_or_update(user=user, name="Existing State", acronym="ES", status="VERIFIED") + self.assertEqual(state.status, "VERIFIED") + def test_state_clean_data_removes_html(self): """Test that clean_data removes HTML tags from name and acronym""" dirty_name = "State Name" @@ -293,6 +308,27 @@ def test_country_status_can_be_set(self): ) self.assertEqual(country.status, "VERIFIED") + def test_country_create_or_update_sets_status_on_new(self): + """Test that create_or_update sets status on new objects""" + user, created = User.objects.get_or_create(username="adm") + country = models.Country.create_or_update( + user=user, name="New Country", acronym="NC", status="CLEANED" + ) + self.assertEqual(country.status, "CLEANED") + + def test_country_create_or_update_updates_status_on_existing(self): + """Test that create_or_update updates status on existing objects""" + user, created = User.objects.get_or_create(username="adm") + country = models.Country.create_or_update( + user=user, name="Existing Country", acronym="EC", status="RAW" + ) + self.assertEqual(country.status, "RAW") + # Update the same country with a new status + country = models.Country.create_or_update( + user=user, name="Existing Country", acronym="EC", status="MATCHED" + ) + self.assertEqual(country.status, "MATCHED") + def test_country_clean_data_removes_html(self): """Test that clean_data removes HTML tags""" dirty_name = "Country Name" From 8e95b87573bf214cc3f201f07abd6e65e177dc6a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Dec 2025 14:00:01 +0000 Subject: [PATCH 6/7] Refactor to use shared clean_text_data utility function Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- location/migrations/0004_add_status_field.py | 2 - location/models.py | 70 ++++++++------------ 2 files changed, 26 insertions(+), 46 deletions(-) diff --git a/location/migrations/0004_add_status_field.py b/location/migrations/0004_add_status_field.py index 41a80cc40..b1546f1c3 100644 --- a/location/migrations/0004_add_status_field.py +++ b/location/migrations/0004_add_status_field.py @@ -1,5 +1,3 @@ -# Generated manually for location data normalization - from django.db import migrations, models diff --git a/location/models.py b/location/models.py index 22aff7e9f..c75af78bc 100755 --- a/location/models.py +++ b/location/models.py @@ -19,6 +19,26 @@ from .choices import LOCATION_STATUS +def clean_text_data(text): + """ + Utility function to clean location text data. + Removes HTML tags and normalizes whitespace. + + Args: + text: The text to clean, can be None + + Returns: + Cleaned text or None if input was None + """ + if not text: + return text + # Remove HTML tags + cleaned = strip_tags(text) + # Remove extra spaces + cleaned = remove_extra_spaces(cleaned) + return cleaned + + class City(CommonControlField): """ Represent a list of cities @@ -106,13 +126,7 @@ def clean_data(cls, name): """ Pre-clean city name data: remove HTML, extra spaces, etc. """ - if not name: - return name - # Remove HTML tags - cleaned = strip_tags(name) - # Remove extra spaces - cleaned = remove_extra_spaces(cleaned) - return cleaned + return clean_text_data(name) @staticmethod def standardize(text, user=None): @@ -237,21 +251,8 @@ def clean_data(cls, name=None, acronym=None): Pre-clean state data: remove HTML, extra spaces, etc. Returns tuple (cleaned_name, cleaned_acronym) """ - cleaned_name = name - cleaned_acronym = acronym - - if name: - # Remove HTML tags - cleaned_name = strip_tags(name) - # Remove extra spaces - cleaned_name = remove_extra_spaces(cleaned_name) - - if acronym: - # Remove HTML tags - cleaned_acronym = strip_tags(acronym) - # Remove extra spaces - cleaned_acronym = remove_extra_spaces(cleaned_acronym) - + cleaned_name = clean_text_data(name) + cleaned_acronym = clean_text_data(acronym) return cleaned_name, cleaned_acronym @classmethod @@ -544,28 +545,9 @@ def clean_data(cls, name=None, acronym=None, acron3=None): Pre-clean country data: remove HTML, extra spaces, etc. Returns tuple (cleaned_name, cleaned_acronym, cleaned_acron3) """ - cleaned_name = name - cleaned_acronym = acronym - cleaned_acron3 = acron3 - - if name: - # Remove HTML tags - cleaned_name = strip_tags(name) - # Remove extra spaces - cleaned_name = remove_extra_spaces(cleaned_name) - - if acronym: - # Remove HTML tags - cleaned_acronym = strip_tags(acronym) - # Remove extra spaces - cleaned_acronym = remove_extra_spaces(cleaned_acronym) - - if acron3: - # Remove HTML tags - cleaned_acron3 = strip_tags(acron3) - # Remove extra spaces - cleaned_acron3 = remove_extra_spaces(cleaned_acron3) - + cleaned_name = clean_text_data(name) + cleaned_acronym = clean_text_data(acronym) + cleaned_acron3 = clean_text_data(acron3) return cleaned_name, cleaned_acronym, cleaned_acron3 @staticmethod From c484dedfbadb7837b9e3448f8c5c2e17eef1da4e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Dec 2025 14:01:30 +0000 Subject: [PATCH 7/7] Add status parameter to get_or_create methods for consistency Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- location/models.py | 8 ++++---- location/tests.py | 12 ++++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/location/models.py b/location/models.py index c75af78bc..ef549229b 100755 --- a/location/models.py +++ b/location/models.py @@ -90,11 +90,11 @@ def load(cls, user, file_path=None): logging.exception(e) @classmethod - def get_or_create(cls, user=None, name=None): + def get_or_create(cls, user=None, name=None, status="RAW"): try: return cls.get(name) except cls.DoesNotExist: - return cls.create(user, name) + return cls.create(user, name, status) @classmethod def get(cls, name): @@ -214,8 +214,8 @@ def load(cls, user, file_path=None): ) @classmethod - def get_or_create(cls, user=None, name=None, acronym=None): - return cls.create_or_update(user, name=name, acronym=acronym) + def get_or_create(cls, user=None, name=None, acronym=None, status=None): + return cls.create_or_update(user, name=name, acronym=acronym, status=status) @classmethod def get(cls, name=None, acronym=None): diff --git a/location/tests.py b/location/tests.py index 7f0307420..ccb846bb4 100755 --- a/location/tests.py +++ b/location/tests.py @@ -239,6 +239,12 @@ def test_city_clean_data_handles_none(self): cleaned = models.City.clean_data(None) self.assertIsNone(cleaned) + def test_city_get_or_create_with_status(self): + """Test that get_or_create accepts status parameter""" + user, created = User.objects.get_or_create(username="adm") + city = models.City.get_or_create(user=user, name="Test City", status="CLEANED") + self.assertEqual(city.status, "CLEANED") + class StateStatusTest(TestCase): def test_state_default_status_is_raw(self): @@ -290,6 +296,12 @@ def test_state_clean_data_handles_none(self): self.assertIsNone(cleaned_name) self.assertIsNone(cleaned_acronym) + def test_state_get_or_create_with_status(self): + """Test that get_or_create accepts status parameter""" + user, created = User.objects.get_or_create(username="adm") + state = models.State.get_or_create(user=user, name="Test State", acronym="TS", status="MATCHED") + self.assertEqual(state.status, "MATCHED") + class CountryStatusTest(TestCase): def test_country_default_status_is_raw(self):