Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
196 changes: 196 additions & 0 deletions Connecting Python to SQL.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "f9878474",
"metadata": {},
"outputs": [],
"source": [
"from sqlalchemy import create_engine\n",
"\n",
"engine = create_engine(\n",
" \"mysql+mysqlconnector://root:Reynold451!!@localhost/sakila\"\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "b8eddb12",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sqlalchemy import text\n",
"\n",
"# 2. rentals_month(engine, month, year)\n",
"def rentals_month(engine, month: int, year: int) -> pd.DataFrame:\n",
" \"\"\"\n",
" Return all rentals for a given month/year from sakila.rental as a DataFrame.\n",
" \"\"\"\n",
" query = text(\n",
" \"\"\"\n",
" SELECT\n",
" rental_id,\n",
" rental_date,\n",
" inventory_id,\n",
" customer_id,\n",
" return_date,\n",
" staff_id,\n",
" last_update\n",
" FROM rental\n",
" WHERE\n",
" MONTH(rental_date) = :month\n",
" AND YEAR(rental_date) = :year\n",
" \"\"\"\n",
" )\n",
" df = pd.read_sql(query, con=engine, params={\"month\": month, \"year\": year})\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2428798c",
"metadata": {},
"outputs": [],
"source": [
"# 3. rental_count_month(df_rentals, month, year)\n",
"def rental_count_month(df_rentals: pd.DataFrame, month: int, year: int) -> pd.DataFrame:\n",
" \"\"\"\n",
" From a rentals DataFrame, return a DataFrame with number of rentals\n",
" per customer_id for the given month/year.\n",
" Column name should be rentals_MM_YYYY.\n",
" \"\"\"\n",
" col_name = f\"rentals_{month:02d}_{year}\"\n",
"\n",
" # group by customer_id and count rentals\n",
" counts = (\n",
" df_rentals.groupby(\"customer_id\")[\"rental_id\"]\n",
" .count()\n",
" .reset_index()\n",
" .rename(columns={\"rental_id\": col_name})\n",
" )\n",
" return counts\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "4b9bead9",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd \n",
"from sqlalchemy import create_engine, text\n",
"import getpass\n",
"import urllib.parse\n",
"\n",
"# 1. Create SQLAlchemy engine (prompt for user/password/host/db)\n",
"# Example for local MYSQL + Sakila:\n",
"# pip install sqlalchemy mysql-connector-python\n",
"\n",
"# Prompt for credentials so you don't hard-code invalid credentials in the notebook.\n",
"# Enter the correct username/password for your MySQL instance.\n",
"user = input(\"MySQL user (e.g. 'root' or your user'): \").strip()\n",
"host = input(\"MySQL host (default 'localhost'): \").strip() or \"localhost\"\n",
"db = input(\"Database name (default 'sakila'): \").strip() or \"sakila\"\n",
"password = getpass.getpass(f\"Password for {user}@{host}: \")\n",
"\n",
"# escape password to be safe in a URL\n",
"password_esc = urllib.parse.quote_plus(password)\n",
"\n",
"engine = create_engine(f\"mysql+mysqlconnector://{user}:{password_esc}@{host}/{db}\")\n",
"\n",
"# quick connection test with helpful error message\n",
"try:\n",
" with engine.connect() as conn:\n",
" conn.execute(text(\"SELECT 1\"))\n",
"except Exception as e:\n",
" print(\"Failed to connect to the database. Check username/password/host/database.\")\n",
" print(\"Connection error:\", e)"
]
},
{
"cell_type": "markdown",
"id": "0c4b1703",
"metadata": {},
"source": [
"# Create compare_rentals to compare between two months "
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "a50f45cd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Empty DataFrame\n",
"Columns: [customer_id, rentals_05_2005, rentals_06_2005, difference]\n",
"Index: []\n"
]
}
],
"source": [
"# 4. compare_rentals(df1, df2)\n",
"def compare_rentals(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:\n",
" \"\"\"\n",
" Merge two DataFrames (counts per customer) and compute a 'difference' column\n",
" = rentals in df2 minus rentals in df1.\n",
" Assumes both have 'customer_id' and one rentals_* column each.\n",
" \"\"\"\n",
" # Inner or outer join depending on requirement; lab usually wants\n",
" # customers active in both months -> inner join:\n",
" df_merged = df1.merge(df2, on=\"customer_id\", how=\"inner\")\n",
"\n",
" # Identify the two rentals columns\n",
" rental_cols = [c for c in df_merged.columns if c.startswith(\"rentals_\")]\n",
" if len(rental_cols) != 2:\n",
" raise ValueError(\"Each input DataFrame must have exactly one rentals_* column.\")\n",
"\n",
" col1, col2 = rental_cols\n",
" df_merged[\"difference\"] = df_merged[col2] - df_merged[col1]\n",
" return df_merged\n",
"\n",
"\n",
"if __name__ == \"__main__\":\n",
" # Example usage for May and June 2005\n",
" may_df = rentals_month(engine, 5, 2005)\n",
" june_df = rentals_month(engine, 6, 2005)\n",
"\n",
" may_counts = rental_count_month(may_df, 5, 2005)\n",
" june_counts = rental_count_month(june_df, 6, 2005)\n",
"\n",
" comparison = compare_rentals(may_counts, june_counts)\n",
" print(comparison.head())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.20"
}
},
"nbformat": 4,
"nbformat_minor": 5
}