Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
214 changes: 214 additions & 0 deletions sql_python_connection.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "f6644b47",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1\n"
]
}
],
"source": [
"# 1. Connect to a MySQL Database and Query Data\n",
"\n",
"import pandas as pd\n",
"from sqlalchemy import create_engine, text\n",
"from urllib.parse import quote_plus\n",
"import getpass\n",
"import matplotlib.pyplot as plt\n",
"\n",
"bd = \"sakila\" \n",
"user = \"root\" \n",
"host = \"127.0.0.1\" \n",
"port = 3306\n",
"\n",
"\n",
"pw_raw = getpass.getpass(\"MySQL password: \")\n",
"pw = quote_plus(pw_raw)\n",
"\n",
"url = f\"mysql+pymysql://{user}:{pw}@{host}:{port}/{bd}?charset=utf8mb4\"\n",
"\n",
"\n",
"engine = create_engine(url, pool_pre_ping=True)\n",
"\n",
"with engine.begin() as conn:\n",
" print(conn.exec_driver_sql(\"SELECT 1\").scalar())"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "f0626989",
"metadata": {},
"outputs": [],
"source": [
"def rentals_month(engine, month: int, year:int):\n",
" \"\"\"\n",
" Retrieves rental data for a given month and year (passed as parameters) from the Sakila database as a Pandas DataFrame.\n",
"\n",
" Execute a SQL query to retrieve the rental data for the specified month and year from the rental table in the Sakila database, \n",
" and return it as a pandas DataFrame.\n",
"\n",
" \"\"\"\n",
" query = f\"\"\"\n",
" SELECT\n",
" *\n",
" FROM rental\n",
" WHERE YEAR(rental_date) = {year}\n",
" AND MONTH(rental_date) = {month}\n",
" ORDER BY rental_date;\n",
" \"\"\" \n",
" with engine.connect() as conn:\n",
" df = pd.read_sql(query, conn)\n",
" return df\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "9738131b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" rental_id rental_date inventory_id customer_id \\\n",
"0 1158 2005-06-14 22:53:33 1632 416 \n",
"1 1159 2005-06-14 22:55:13 4395 516 \n",
"2 1160 2005-06-14 23:00:34 2795 239 \n",
"3 1161 2005-06-14 23:07:08 1690 285 \n",
"4 1162 2005-06-14 23:09:38 987 310 \n",
"\n",
" return_date staff_id last_update \n",
"0 2005-06-18 21:37:33 2 2006-02-15 21:30:53 \n",
"1 2005-06-17 02:11:13 1 2006-02-15 21:30:53 \n",
"2 2005-06-18 01:58:34 2 2006-02-15 21:30:53 \n",
"3 2005-06-21 17:12:08 1 2006-02-15 21:30:53 \n",
"4 2005-06-23 22:00:38 1 2006-02-15 21:30:53 \n"
]
}
],
"source": [
"# Trial to see if the function works properly:\n",
"\n",
"df = rentals_month(engine, 6, 2005)\n",
"\n",
"print(df.head())\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "2c2bafd6",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"def rental_count_month(df: pd.DataFrame, month: int, year: int) -> pd.DataFrame:\n",
" \"\"\"\n",
" Returns a DataFrame showing the number of rentals per customer_id\n",
" for the specified month and year.\n",
" \"\"\"\n",
"\n",
" # Create a dynamic column name\n",
" col_name = f\"rentals_{month:02d}_{year}\"\n",
"\n",
" # Group by customer and count how many rentals each made\n",
" result = (\n",
" df.groupby(\"customer_id\")\n",
" .size() # counts the number of rows per customer_id\n",
" .reset_index(name=col_name)\n",
" )\n",
"\n",
" return result\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "41fb3a70",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" customer_id rentals_05_2005\n",
"0 1 7\n",
"1 2 1\n",
"2 3 4\n",
"3 4 6\n",
"4 5 5\n"
]
}
],
"source": [
"\n",
"# Now get the rental counts per customer for that month:\n",
"df_counts = rental_count_month(df, 5, 2005)\n",
"\n",
"print(df_counts.head())\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "416299c4",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"def compare_rentals(df1: pd.DataFrame, df2: pd.DataFrame):\n",
" \"\"\"\n",
" Combines two DataFrames containing rental counts by customer for\n",
" different months/years and calculates the difference.\n",
"\n",
" \"\"\"\n",
"\n",
" # Merge the two DataFrames on customer_id (keeps all customers)\n",
" combined = pd.merge(df1, df2, on=\"customer_id\", how=\"outer\").fillna(0)\n",
"\n",
" # Get the names of the rental columns dynamically\n",
" col1 = [c for c in df1.columns if c != \"customer_id\"][0]\n",
" col2 = [c for c in df2.columns if c != \"customer_id\"][0]\n",
"\n",
" # Create the difference column\n",
" combined[\"difference\"] = combined[col2] - combined[col1]\n",
"\n",
" return combined\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}