From 65a5931eb983f1cf18d7677ec6a51e9415fd3245 Mon Sep 17 00:00:00 2001
From: Marisa Oliveira <163911161+HR-Freak@users.noreply.github.com>
Date: Sun, 4 Jan 2026 11:37:45 +0000
Subject: [PATCH] Solved lab: SQL to Python connection
---
sql-python-connection.ipynb | 487 ++++++++++++++++++++++++++++++++++++
1 file changed, 487 insertions(+)
create mode 100644 sql-python-connection.ipynb
diff --git a/sql-python-connection.ipynb b/sql-python-connection.ipynb
new file mode 100644
index 0000000..adae7cb
--- /dev/null
+++ b/sql-python-connection.ipynb
@@ -0,0 +1,487 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "c3d411ce",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: sqlalchemy in /opt/anaconda3/lib/python3.13/site-packages (2.0.39)\n",
+ "Requirement already satisfied: pymysql in /opt/anaconda3/lib/python3.13/site-packages (1.1.2)\n",
+ "Requirement already satisfied: pandas in /opt/anaconda3/lib/python3.13/site-packages (2.2.3)\n",
+ "Requirement already satisfied: typing-extensions>=4.6.0 in /opt/anaconda3/lib/python3.13/site-packages (from sqlalchemy) (4.12.2)\n",
+ "Requirement already satisfied: numpy>=1.26.0 in /opt/anaconda3/lib/python3.13/site-packages (from pandas) (2.1.3)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/anaconda3/lib/python3.13/site-packages (from pandas) (2.9.0.post0)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /opt/anaconda3/lib/python3.13/site-packages (from pandas) (2024.1)\n",
+ "Requirement already satisfied: tzdata>=2022.7 in /opt/anaconda3/lib/python3.13/site-packages (from pandas) (2025.2)\n",
+ "Requirement already satisfied: six>=1.5 in /opt/anaconda3/lib/python3.13/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install sqlalchemy pymysql pandas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d9d299cc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sqlalchemy import create_engine\n",
+ "import pandas as pd\n",
+ "\n",
+ "USER = \"\"\n",
+ "PASSWORD = \"\" \n",
+ "HOST = \"127.0.0.1\"\n",
+ "PORT = 3306\n",
+ "DB = \"sakila\"\n",
+ "\n",
+ "engine = create_engine(f\"mysql+pymysql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DB}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "74c82b84",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 1\n",
+ "0 1"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.read_sql(\"SELECT 1;\", engine)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "d0e81744",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def rentals_month(engine, month: int, year: int) -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " Return rental records for a given month/year from sakila.rental as a DataFrame.\n",
+ " \"\"\"\n",
+ " query = \"\"\"\n",
+ " SELECT rental_id, rental_date, inventory_id, customer_id, staff_id, return_date\n",
+ " FROM rental\n",
+ " WHERE MONTH(rental_date) = %(month)s\n",
+ " AND YEAR(rental_date) = %(year)s;\n",
+ " \"\"\"\n",
+ " return pd.read_sql(query, engine, params={\"month\": month, \"year\": year})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "2a37550d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "( rental_id rental_date inventory_id customer_id staff_id \\\n",
+ " 0 1 2005-05-24 22:53:30 367 130 1 \n",
+ " 1 2 2005-05-24 22:54:33 1525 459 1 \n",
+ " 2 3 2005-05-24 23:03:39 1711 408 1 \n",
+ " 3 4 2005-05-24 23:04:41 2452 333 2 \n",
+ " 4 5 2005-05-24 23:05:21 2079 222 1 \n",
+ " \n",
+ " return_date \n",
+ " 0 2005-05-26 22:04:30 \n",
+ " 1 2005-05-28 19:40:33 \n",
+ " 2 2005-06-01 22:12:39 \n",
+ " 3 2005-06-03 01:43:41 \n",
+ " 4 2005-06-02 04:33:21 ,\n",
+ " rental_id rental_date inventory_id customer_id staff_id \\\n",
+ " 0 1158 2005-06-14 22:53:33 1632 416 2 \n",
+ " 1 1159 2005-06-14 22:55:13 4395 516 1 \n",
+ " 2 1160 2005-06-14 23:00:34 2795 239 2 \n",
+ " 3 1161 2005-06-14 23:07:08 1690 285 1 \n",
+ " 4 1162 2005-06-14 23:09:38 987 310 1 \n",
+ " \n",
+ " return_date \n",
+ " 0 2005-06-18 21:37:33 \n",
+ " 1 2005-06-17 02:11:13 \n",
+ " 2 2005-06-18 01:58:34 \n",
+ " 3 2005-06-21 17:12:08 \n",
+ " 4 2005-06-23 22:00:38 )"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "may_df = rentals_month(engine, 5, 2005)\n",
+ "june_df = rentals_month(engine, 6, 2005)\n",
+ "\n",
+ "may_df.head(), june_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "9d2f5782",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def rental_count_month(rentals_df: pd.DataFrame, month: int, year: int) -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " Given rentals dataframe for a month/year, return a dataframe:\n",
+ " customer_id | rentals_MM_YYYY\n",
+ " \"\"\"\n",
+ " col_name = f\"rentals_{month:02d}_{year}\"\n",
+ " \n",
+ " counts = (\n",
+ " rentals_df.groupby(\"customer_id\")\n",
+ " .size()\n",
+ " .reset_index(name=col_name)\n",
+ " )\n",
+ " return counts"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "5df92360",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "( customer_id rentals_05_2005\n",
+ " 0 1 2\n",
+ " 1 2 1\n",
+ " 2 3 2\n",
+ " 3 5 3\n",
+ " 4 6 3,\n",
+ " customer_id rentals_06_2005\n",
+ " 0 1 7\n",
+ " 1 2 1\n",
+ " 2 3 4\n",
+ " 3 4 6\n",
+ " 4 5 5)"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "may_counts = rental_count_month(may_df, 5, 2005)\n",
+ "june_counts = rental_count_month(june_df, 6, 2005)\n",
+ "\n",
+ "may_counts.head(), june_counts.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "07f62be3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def compare_rentals(counts_df_1: pd.DataFrame, counts_df_2: pd.DataFrame) -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " Join two monthly rental count dataframes on customer_id and add a difference column:\n",
+ " difference = month2 - month1\n",
+ " \"\"\"\n",
+ " # Identify the month columns (everything except customer_id)\n",
+ " month_col_1 = [c for c in counts_df_1.columns if c != \"customer_id\"][0]\n",
+ " month_col_2 = [c for c in counts_df_2.columns if c != \"customer_id\"][0]\n",
+ "\n",
+ " merged = counts_df_1.merge(counts_df_2, on=\"customer_id\", how=\"inner\")\n",
+ " merged[\"difference\"] = merged[month_col_2] - merged[month_col_1]\n",
+ " return merged"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "8e17ba0b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " rentals_05_2005 | \n",
+ " rentals_06_2005 | \n",
+ " difference | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 7 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 5 | \n",
+ " 3 | \n",
+ " 5 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 6 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id rentals_05_2005 rentals_06_2005 difference\n",
+ "0 1 2 7 5\n",
+ "1 2 1 1 0\n",
+ "2 3 2 4 2\n",
+ "3 5 3 5 2\n",
+ "4 6 3 4 1"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "comparison = compare_rentals(may_counts, june_counts)\n",
+ "comparison.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "01da1e3f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " first_name | \n",
+ " last_name | \n",
+ " email | \n",
+ " rentals_05_2005 | \n",
+ " rentals_06_2005 | \n",
+ " difference | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " MARY | \n",
+ " SMITH | \n",
+ " MARY.SMITH@sakilacustomer.org | \n",
+ " 2 | \n",
+ " 7 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " PATRICIA | \n",
+ " JOHNSON | \n",
+ " PATRICIA.JOHNSON@sakilacustomer.org | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " LINDA | \n",
+ " WILLIAMS | \n",
+ " LINDA.WILLIAMS@sakilacustomer.org | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 5 | \n",
+ " ELIZABETH | \n",
+ " BROWN | \n",
+ " ELIZABETH.BROWN@sakilacustomer.org | \n",
+ " 3 | \n",
+ " 5 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 6 | \n",
+ " JENNIFER | \n",
+ " DAVIS | \n",
+ " JENNIFER.DAVIS@sakilacustomer.org | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id first_name last_name email \\\n",
+ "0 1 MARY SMITH MARY.SMITH@sakilacustomer.org \n",
+ "1 2 PATRICIA JOHNSON PATRICIA.JOHNSON@sakilacustomer.org \n",
+ "2 3 LINDA WILLIAMS LINDA.WILLIAMS@sakilacustomer.org \n",
+ "3 5 ELIZABETH BROWN ELIZABETH.BROWN@sakilacustomer.org \n",
+ "4 6 JENNIFER DAVIS JENNIFER.DAVIS@sakilacustomer.org \n",
+ "\n",
+ " rentals_05_2005 rentals_06_2005 difference \n",
+ "0 2 7 5 \n",
+ "1 1 1 0 \n",
+ "2 2 4 2 \n",
+ "3 3 5 2 \n",
+ "4 3 4 1 "
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "customer_info = pd.read_sql(\n",
+ " \"SELECT customer_id, first_name, last_name, email FROM customer;\",\n",
+ " engine\n",
+ ")\n",
+ "\n",
+ "final = comparison.merge(customer_info, on=\"customer_id\", how=\"left\")\n",
+ "final = final[[\"customer_id\", \"first_name\", \"last_name\", \"email\"] + \n",
+ " [c for c in comparison.columns if c != \"customer_id\"]]\n",
+ "\n",
+ "final.head()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "base",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}