Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 172 additions & 0 deletions Connecting Python to SQL.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"id": "521d4783",
"metadata": {},
"outputs": [],
"source": [
"from sqlalchemy import create_engine\n",
"import pandas as pd\n",
"\n",
"# Example connection string (adjust as needed)\n",
"# MySQL Sakila database running locally\n",
"engine = create_engine('mysql+pymysql://root:#Safeguard100@localhost/sakila')\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "2ab775b4",
"metadata": {},
"outputs": [],
"source": [
"def rentals_month(engine, month, year):\n",
" \"\"\"\n",
" Retrieves rental data for a specific month and year from the Sakila database.\n",
" \n",
" Parameters:\n",
" engine : SQLAlchemy engine object\n",
" month : int, month number (e.g., 5 for May)\n",
" year : int, year number (e.g., 2005)\n",
" \n",
" Returns:\n",
" pandas DataFrame with rental data for that month and year\n",
" \"\"\"\n",
" query = f\"\"\"\n",
" SELECT \n",
" rental_id,\n",
" rental_date,\n",
" customer_id\n",
" FROM rental\n",
" WHERE MONTH(rental_date) = {month}\n",
" AND YEAR(rental_date) = {year};\n",
" \"\"\"\n",
" \n",
" df = pd.read_sql(query, engine)\n",
" return df\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "8a9e7496",
"metadata": {},
"outputs": [],
"source": [
"def rental_count_month(df, month, year):\n",
" \"\"\"\n",
" Counts number of rentals per customer for a given month/year.\n",
" \n",
" Parameters:\n",
" df : pandas DataFrame from rentals_month()\n",
" month : int\n",
" year : int\n",
" \n",
" Returns:\n",
" pandas DataFrame with columns [customer_id, rentals_MM_YYYY]\n",
" \"\"\"\n",
" col_name = f\"rentals_{month:02d}_{year}\"\n",
" \n",
" rental_counts = (\n",
" df.groupby('customer_id')\n",
" .size()\n",
" .reset_index(name=col_name)\n",
" )\n",
" \n",
" return rental_counts\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "99229755",
"metadata": {},
"outputs": [],
"source": [
"def compare_rentals(df1, df2):\n",
" \"\"\"\n",
" Compares rental activity between two months.\n",
" \n",
" Parameters:\n",
" df1, df2 : DataFrames from rental_count_month()\n",
" \n",
" Returns:\n",
" Combined DataFrame with rental counts for both months and a 'difference' column.\n",
" \"\"\"\n",
" merged = pd.merge(df1, df2, on='customer_id', how='inner')\n",
" \n",
" # Identify column names dynamically (the two rental count columns)\n",
" cols = [c for c in merged.columns if c.startswith('rentals_')]\n",
" \n",
" merged['difference'] = merged[cols[1]] - merged[cols[0]]\n",
" \n",
" return merged\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "e1db19d4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" customer_id rentals_05_2005 rentals_06_2005 difference\n",
"0 1 2 7 5\n",
"1 2 1 1 0\n",
"2 3 2 4 2\n",
"3 5 3 5 2\n",
"4 6 3 4 1\n"
]
}
],
"source": [
"# Get data for May and June 2005\n",
"may_df = rentals_month(engine, 5, 2005)\n",
"june_df = rentals_month(engine, 6, 2005)\n",
"\n",
"# Count rentals per customer\n",
"may_counts = rental_count_month(may_df, 5, 2005)\n",
"june_counts = rental_count_month(june_df, 6, 2005)\n",
"\n",
"# Compare customer activity\n",
"comparison = compare_rentals(may_counts, june_counts)\n",
"\n",
"print(comparison.head())\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1c1caa42",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}