From 1ae10d87e6cb0d8f22660a032478f33c7f011722 Mon Sep 17 00:00:00 2001 From: REYNOLD TAKURA CHORUMA Date: Tue, 30 Dec 2025 14:32:23 +0100 Subject: [PATCH 1/2] Lab Notebook: Connecting Python to SQL --- Connecting Python to SQL.ipynb | 143 +++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 Connecting Python to SQL.ipynb diff --git a/Connecting Python to SQL.ipynb b/Connecting Python to SQL.ipynb new file mode 100644 index 0000000..3d518be --- /dev/null +++ b/Connecting Python to SQL.ipynb @@ -0,0 +1,143 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "id": "f9878474", + "metadata": {}, + "outputs": [], + "source": [ + "from sqlalchemy import create_engine\n", + "\n", + "engine = create_engine(\n", + " \"mysql+mysqlconnector://root:Reynold451!!@localhost/sakila\"\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b8eddb12", + "metadata": {}, + "outputs": [], + "source": [ + "# 2. rentals_month(engine, month, year)\n", + "def rentals_month(engine, month: int, year: int) -> pd.DataFrame:\n", + " \"\"\"\n", + " Return all rentals for a given month/year from sakila.rental as a DataFrame.\n", + " \"\"\"\n", + " query = text(\n", + " \"\"\"\n", + " SELECT\n", + " rental_id,\n", + " rental_date,\n", + " inventory_id,\n", + " customer_id,\n", + " return_date,\n", + " staff_id,\n", + " last_update\n", + " FROM rental\n", + " WHERE\n", + " MONTH(rental_date) = :month\n", + " AND YEAR(rental_date) = :year\n", + " \"\"\"\n", + " )\n", + " df = pd.read_sql(query, con=engine, params={\"month\": month, \"year\": year})\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2428798c", + "metadata": {}, + "outputs": [], + "source": [ + "# 3. rental_count_month(df_rentals, month, year)\n", + "def rental_count_month(df_rentals: pd.DataFrame, month: int, year: int) -> pd.DataFrame:\n", + " \"\"\"\n", + " From a rentals DataFrame, return a DataFrame with number of rentals\n", + " per customer_id for the given month/year.\n", + " Column name should be rentals_MM_YYYY.\n", + " \"\"\"\n", + " col_name = f\"rentals_{month:02d}_{year}\"\n", + "\n", + " # group by customer_id and count rentals\n", + " counts = (\n", + " df_rentals.groupby(\"customer_id\")[\"rental_id\"]\n", + " .count()\n", + " .reset_index()\n", + " .rename(columns={\"rental_id\": col_name})\n", + " )\n", + " return counts\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b658ebde", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "4b9bead9", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd \n", + "from sqlalchemy import create_engine, text\n", + "import getpass\n", + "import urllib.parse\n", + "\n", + "# 1. Create SQLAlchemy engine (prompt for user/password/host/db)\n", + "# Example for local MYSQL + Sakila:\n", + "# pip install sqlalchemy mysql-connector-python\n", + "\n", + "# Prompt for credentials so you don't hard-code invalid credentials in the notebook.\n", + "# Enter the correct username/password for your MySQL instance.\n", + "user = input(\"MySQL user (e.g. 'root' or your user'): \").strip()\n", + "host = input(\"MySQL host (default 'localhost'): \").strip() or \"localhost\"\n", + "db = input(\"Database name (default 'sakila'): \").strip() or \"sakila\"\n", + "password = getpass.getpass(f\"Password for {user}@{host}: \")\n", + "\n", + "# escape password to be safe in a URL\n", + "password_esc = urllib.parse.quote_plus(password)\n", + "\n", + "engine = create_engine(f\"mysql+mysqlconnector://{user}:{password_esc}@{host}/{db}\")\n", + "\n", + "# quick connection test with helpful error message\n", + "try:\n", + " with engine.connect() as conn:\n", + " conn.execute(text(\"SELECT 1\"))\n", + "except Exception as e:\n", + " print(\"Failed to connect to the database. Check username/password/host/database.\")\n", + " print(\"Connection error:\", e)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.20" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 47c45e437e2cd9288e35d6403cd695dcb11b0e0b Mon Sep 17 00:00:00 2001 From: REYNOLD TAKURA CHORUMA Date: Tue, 30 Dec 2025 14:40:25 +0100 Subject: [PATCH 2/2] Edited Lab - Connecting Python to SQL --- .DS_Store | Bin 0 -> 6148 bytes Connecting Python to SQL.ipynb | 77 ++++++++++++++++++++++++++++----- 2 files changed, 65 insertions(+), 12 deletions(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..c857cc1f4836f49a470105a6ff4016951c2028f7 GIT binary patch literal 6148 zcmeHLyH3ME5S)V)L1|J_p=)R@@dqY21rkU=egHW^5GmLcP|*2@_(qtyJx~m8K`7dl z_Ga&P_Z}np9tIF*FRK$^3SdkX1{njQ=0Vq94i<*g#~qe<#u^Ro2RizXO`7`=&d_qs z4YubWafMf|<{|katrigrT;l=-*Z4$lgZg~aF6&KOvmcT8Mm>H{LJK&+4dysSg-c?A zN36bO%otT{Wu!DOoB?OR8E^)ifnQ^QXSQTKG4#?Ia0Z-#F9x(fM5?gN*cqm+gN3fo z@{fcP+PVe{W5zOLXGjkvSSry{Lw;feOJ{q`ab?EN(9#jo$m}PN{P}nZX>_*76pm0D zdg%-}1APYetvc2DKj$Yi_{i_4c+DAb26l{rFsc@nenaK6^~dsb)<)_rRid~~28DL- j62OY~k;CLPf0WI*GGk|`Ueun_iGC4Cgm~c$`~d?WgN!aw literal 0 HcmV?d00001 diff --git a/Connecting Python to SQL.ipynb b/Connecting Python to SQL.ipynb index 3d518be..ba79135 100644 --- a/Connecting Python to SQL.ipynb +++ b/Connecting Python to SQL.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 9, + "execution_count": 2, "id": "f9878474", "metadata": {}, "outputs": [], @@ -16,11 +16,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "id": "b8eddb12", "metadata": {}, "outputs": [], "source": [ + "import pandas as pd\n", + "from sqlalchemy import text\n", + "\n", "# 2. rentals_month(engine, month, year)\n", "def rentals_month(engine, month: int, year: int) -> pd.DataFrame:\n", " \"\"\"\n", @@ -48,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "id": "2428798c", "metadata": {}, "outputs": [], @@ -75,15 +78,7 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "b658ebde", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 10, + "execution_count": 6, "id": "4b9bead9", "metadata": {}, "outputs": [], @@ -117,6 +112,64 @@ " print(\"Failed to connect to the database. Check username/password/host/database.\")\n", " print(\"Connection error:\", e)" ] + }, + { + "cell_type": "markdown", + "id": "0c4b1703", + "metadata": {}, + "source": [ + "# Create compare_rentals to compare between two months " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a50f45cd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empty DataFrame\n", + "Columns: [customer_id, rentals_05_2005, rentals_06_2005, difference]\n", + "Index: []\n" + ] + } + ], + "source": [ + "# 4. compare_rentals(df1, df2)\n", + "def compare_rentals(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"\n", + " Merge two DataFrames (counts per customer) and compute a 'difference' column\n", + " = rentals in df2 minus rentals in df1.\n", + " Assumes both have 'customer_id' and one rentals_* column each.\n", + " \"\"\"\n", + " # Inner or outer join depending on requirement; lab usually wants\n", + " # customers active in both months -> inner join:\n", + " df_merged = df1.merge(df2, on=\"customer_id\", how=\"inner\")\n", + "\n", + " # Identify the two rentals columns\n", + " rental_cols = [c for c in df_merged.columns if c.startswith(\"rentals_\")]\n", + " if len(rental_cols) != 2:\n", + " raise ValueError(\"Each input DataFrame must have exactly one rentals_* column.\")\n", + "\n", + " col1, col2 = rental_cols\n", + " df_merged[\"difference\"] = df_merged[col2] - df_merged[col1]\n", + " return df_merged\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " # Example usage for May and June 2005\n", + " may_df = rentals_month(engine, 5, 2005)\n", + " june_df = rentals_month(engine, 6, 2005)\n", + "\n", + " may_counts = rental_count_month(may_df, 5, 2005)\n", + " june_counts = rental_count_month(june_df, 6, 2005)\n", + "\n", + " comparison = compare_rentals(may_counts, june_counts)\n", + " print(comparison.head())" + ] } ], "metadata": {