Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions events/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,3 +542,7 @@ def clean_new_password(self):
if errors:
raise ValidationError(errors)
return new_password


class AcademicCenterCSVUploadForm(forms.Form):
csv_file = forms.FileField(label="Upload Academic Center CSV")
1 change: 1 addition & 0 deletions events/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
url(r'^ac/$', ac, name='ac'),
url(r'^ac/new/$', new_ac, name='new_ac'),
url(r'^ac/(\d+)/edit/$', edit_ac, name='edit_ac'),
url(r'^ac/upload/$', upload_ac_csv, name='upload_ac_csv'),

#url(r'^xmlparse/$', xmlparse', name='xmlparse'),
#url(r'^pdf/$', pdf', name='pdf'),
Expand Down
239 changes: 237 additions & 2 deletions events/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import xml.etree.cElementTree as etree
from django.conf import settings
import json
import os,time, csv, random, string
import os,time, csv, random, string, io
from validate_email import validate_email

import os.path
Expand Down Expand Up @@ -60,7 +60,7 @@
from django.template.context_processors import csrf

from io import StringIO, BytesIO

from config import MAX_ROWS, ACADEMIC_CSV_TEMPLATE, MAX_ERROR_COUNT

#randon string
import string
Expand Down Expand Up @@ -675,7 +675,242 @@ def new_ac(request):
context = {}
context.update(csrf(request))
context['form'] = AcademicForm(user=request.user)
context['form_csv'] = AcademicCenterCSVUploadForm()
context['academic_csv_template'] = ACADEMIC_CSV_TEMPLATE
return render(request, 'events/templates/ac/form.html', context)

@login_required
def upload_ac_csv(request):
if request.method == "POST":
EXPECTED_COLUMNS = [
'institution_name', 'state', 'district', 'city', 'address', 'pincode',
'institution_type', 'institute_category', 'university', 'contact_person',
'resource_center', 'ratings', 'remarks'
]
RATING_CHOICES = {1, 2, 3, 4, 5}
INSTITUTE_CATEGORIES = [x.name for x in InstituteCategory.objects.all()]

form = AcademicCenterCSVUploadForm(request.POST, request.FILES)
if not form.is_valid():
messages.error(request, f"Form is not valid: {form.errors}")
return redirect('events:new_ac')

csv_file = form.cleaned_data['csv_file']

try:
decoded_file = csv_file.read().decode('utf-8')
reader = csv.DictReader(io.StringIO(decoded_file))
Comment on lines +701 to +702
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
decoded_file = csv_file.read().decode('utf-8')
reader = csv.DictReader(io.StringIO(decoded_file))
decoded_file = io.TextIOWrapper(csv_file, encoding='utf-8')
reader = csv.DictReader(decoded_file)

This will still read the entire file in memory. Use something like the one suggested.

row_count = sum(1 for _ in reader)
if row_count > MAX_ROWS:
messages.error(request, f"CSV has too many rows ({row_count}). Limit is {MAX_ROWS}.")
return redirect('events:new_ac')
Comment on lines +703 to +706
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the row count check here as this is still going to read the entire file.

except UnicodeDecodeError:
messages.error(request, f"File is not UTF-8 encoded. Please upload a valid CSV.")
return redirect('events:new_ac')
except Exception:
messages.error(request, f"CSV parsing error: An unknown error occurred")
return redirect('events:new_ac')

if not reader:
messages.error(request, f"CSV is empty.")
return redirect('events:new_ac')

header = reader.fieldnames
if set(header) != set(EXPECTED_COLUMNS):
messages.error(request, f"CSV columns do not match the expected format. Expected columns: {', '.join(EXPECTED_COLUMNS)}")
return redirect('events:new_ac')

reader = csv.DictReader(io.StringIO(decoded_file))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't initialise reader again.

# Prepare data
_states = set()
_universities = set()
_institution_types = set()
_districts = set()
_cities = set()
for idx, row in enumerate(reader, start=2): # Start from 2nd row
_states.add(row.get('state').strip())
_universities.add(row.get('university').strip())
_institution_types.add(row.get('institution_type').strip())
_districts.add(row.get('district').strip())
_cities.add(row.get('city').strip())

# Bulk query
states = { s.name.lower(): s for s in State.objects.filter(name__in=_states)}
universities = { (u.name.lower(), u.state.name.lower()): u for u in University.objects.filter(name__in=_universities).select_related('state')}
districts = { (d.name.lower(), d.state.name.lower()): d for d in District.objects.filter(name__in=_districts).select_related('state')}
cities = { (c.name.lower(), c.state.name.lower()): c for c in City.objects.filter(name__in=_cities).select_related('state')}
institution_types = { i.name.lower(): i for i in InstituteType.objects.filter(name__in=_institution_types)}
Comment on lines +730 to +742
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this bulk query is also incorrect. you have to loop only once. you may build the cache in that loop.


success_count, failure_count = 0, 0
success_institutes, error_rows, validation_errors, duplicate_val = [], [], [], []

# Inner function
def get_cleaned_value(row, key, default='', to_lower=True, to_int=False):
value = row.get(key)
if value is None or value.strip() == '':
return default
value = value.strip()
if to_lower:
value = value.lower()
if to_int:
try:
return int(value)
except:
return default
return value

def get_resource_center(val):
if val is not None:
return 1 if val == 'yes' else 0
else:
return 0
reader = csv.DictReader(io.StringIO(decoded_file))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't initialise the reader again

for idx, row in enumerate(reader, start=2): # Start from 2nd row
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't specify the start parameter.

if failure_count >= MAX_ERROR_COUNT:
messages.error(request, "Too many errors. Stopping CSV processing.")
break

csv_row_error = False
# Extract fields
institution_name = get_cleaned_value(row, 'institution_name', to_lower=False)
state_val = get_cleaned_value(row, 'state')
city_val = get_cleaned_value(row, 'city')
district_val = get_cleaned_value(row, 'district')
university_val = get_cleaned_value(row, 'university')
institution_type_val = get_cleaned_value(row, 'institution_type')
institute_category = get_cleaned_value(row, 'institute_category', default='Uncategorised')
rating = get_cleaned_value(row, 'ratings', default=1, to_int=True)
is_resource_center = get_resource_center(row.get('resource_center'))
pincode = get_cleaned_value(row, 'pincode', to_lower=False, default=None)
address = get_cleaned_value(row, 'address', to_lower=False)
# field validations
if institution_name == "":
csv_row_error = True
validation_errors.append(f"Error in row: {idx} : Please add institute name.")

if state_val != "":
state = states.get(state_val) # State object
else:
validation_errors.append(f"Error in row: {idx} : {institution_name} --> Please select state.")
continue

# validate city
if city_val != "":
city = cities.get((city_val, state_val))
if not city:
csv_row_error = True
validation_errors.append(f"Error in row: {idx} : {institution_name} --> Unknown city - {city_val} for state {state_val}")
else:
csv_row_error = True
validation_errors.append(f"Error in row: {idx} : {institution_name} --> Please select city.")

# validate district
if district_val != "":
district = districts.get((district_val, state_val))
if not district:
csv_row_error = True
validation_errors.append(f"Error in row: {idx} : {institution_name} --> Unknown district - {district_val} for state {state_val}")
else:
csv_row_error = True
validation_errors.append(f"Error in row: {idx} : {institution_name} --> Please select district.")

# validate university
if university_val != "":
university = universities.get((university_val, state_val))
if not university:
try:
university = University.objects.create(state=state, name=university_val, user=request.user)
except:
pass
else:
csv_row_error = True
validation_errors.append(f"Error in row: {idx} : {institution_name} --> Please select university.")

# validate institution_type
if institution_type_val != "":
institution_type = institution_types.get(institution_type_val)
if not institution_type:
validation_errors.append(f"Error in row: {idx} : {institution_name} --> Unknown institute type.")
else:
csv_row_error = True
validation_errors.append(f"Error in row: {idx} : {institution_name} --> Please select institute type.")

# validate rating
if rating not in RATING_CHOICES:
csv_row_error = True
validation_errors.append(f"Error in row: {idx} : {institution_name} --> Ratings should be between 1 & 5")

# validate institute_category
if institute_category not in INSTITUTE_CATEGORIES:
csv_row_error = True
validation_errors.append(f"Error in row: {idx} : {institute_category} --> invalid. Options are: Govt, Private, NGO, Uncategorised")

# validate pincode
if pincode is None:
csv_row_error = True
validation_errors.append(f"Error in row: {idx} : {institution_name} --> Please enter pincode. ")
# try to save only if there is no field error in a row
if csv_row_error:
failure_count +=1
continue
try:
academic_code = get_academic_code(state)
institute_category_obj = InstituteCategory.objects.get(name=institute_category)
data = {
'user': request.user,
'state_id': state.id,
'institution_type_id': institution_type.id,
'university_id': university.id,
'academic_code': academic_code,
'institution_name': institution_name,
'district_id': district.id,
'city_id': city.id,
'address' : address,
'pincode' : pincode,
'resource_center': is_resource_center,
'rating': rating,
'contact_person' : get_cleaned_value(row, 'contact_person', to_lower=False),
'remarks' : get_cleaned_value(row, 'remarks', to_lower=False),
'status': 1,
'institute_category': institute_category_obj
}
filters = {
'institution_name': institution_name,
'state_id': state.id,
'district_id': district.id,
'city_id': city.id,
}
if AcademicCenter.objects.filter(**filters).exists():
duplicate_val.append((idx, institution_name))
continue
academic_center = AcademicCenter.objects.create(**data)
success_count += 1
success_institutes.append(institution_name)
except Exception as e:
failure_count += 1
error_rows.append((idx, str(e)))

# Display messages
if success_count > 0: # display institutes saved successfully
messages.success(request, f"{success_count} entries uploaded successfully.\n{' | '.join(success_institutes)}")
else:
messages.error(request, f"Academic centers not added.") # display if none of the institutes is added

# Added Errors
if len(duplicate_val) > 0:
messages.error(request, f"{len(duplicate_val)} Duplicate enteries detected as listed below. These academic centers already exist for given state, district & city. ")
for entry in duplicate_val:
messages.warning(request, f"Duplicate entry : Row {entry[0]} - {entry[1]}")
if failure_count > 0:
messages.error(request, f"{failure_count} rows failed to upload.")
for row, err in error_rows:
messages.error(request, f"Row {row} : {institution_name} : {err}")
if len(validation_errors) > 0:
for error in validation_errors:
messages.warning(request, error)

return redirect('events:new_ac')


@login_required
def edit_ac(request, rid = None):
Expand Down
56 changes: 56 additions & 0 deletions static/events/templates/ac/form.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,66 @@
{% block title %}New Institution{% endblock %}
{% block heading %}
<i class="fa fa-list-ul"></i> New Institution
{% endblock %}
{% block cssblock %}
<style type="text/css">

</style>

{% endblock %}
{% block content %}
<div class="row well">
<div class='col-12'>
<form action="{% url 'events:upload_ac_csv' %}" method="post" enctype="multipart/form-data" class="form-horizontal">
{% csrf_token %}
{{ form_csv }}
<button type="submit" class="btn btn-primary m-4" style="margin-top: 12px;">upload csv</button>
</form>
</div>
<div class="alert alert-info col-12" style="margin-top: 12px;">
<h4>CSV Upload Instructions</h4>
<ol>
<li>
Use the following column headers, in the <strong>exact order and with exact spellings</strong>:<br>
<code>
institution_name, state, district, city, address, pincode, institution_type, institute_category, university,<br>
contact_person,resource_center, ratings, remarks, status<br>

</code>
</li>
<li>
<strong>Do not change column names or their sequence.</strong>
</li>
<li>
If a value contains a comma <code>(,)</code>, enclose it in <strong>double quotes</strong> <code>("")</code>.<br>
<b>Example:</b><br>
<code>
"Institute of Physics, Pune",Maharashtra,Pune, Pune, "Plot 21, Industrial Area, MIDC",411001, School, Uncategorised, Central Board of Secondary education,
Ankita 999999999 ankita@samtafoundation.com, no,1,no remarks,1
</code>
</li>
<li>
Save your file with a <code>.csv</code> extension and ensure it's UTF-8 encoded.
</li>
</ol>
<div>
<div class="alert alert-warning col-12" style="margin-top: 12px;">
<p>
To ensure that the fields — State, District, City, Institute Type, Institute Category, and University — exactly match the values in our database, please <strong>use the provided Google Sheet</strong> (link below). The sheet includes dropdowns for these fields to maintain correct data entry.<br/>
Make a copy of the sheet (ensure you are logged into Google), fill in your data, download it as a CSV file, and then upload it here.
</p>

<strong><a href="{{ academic_csv_template }}" target="_blank">CSV Template Link</a></strong>
</p>
</div>
</div>

</div>

</div>
<div class="row well">
<div class='col-sm-8'>
<p><strong>Add Individual Academic Center</strong></p>
<form action="{{ action }}" method="post" class='form-horizontal'>{% csrf_token %}
{% with WIDGET_ERROR_CLASS='field-error' WIDGET_REQUIRED_CLASS='field-required' %}
{% if form.non_field_errors %}
Expand Down