diff --git a/Connecting Python to SQL.ipynb b/Connecting Python to SQL.ipynb new file mode 100644 index 0000000..e7cf9c4 --- /dev/null +++ b/Connecting Python to SQL.ipynb @@ -0,0 +1,172 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 6, + "id": "521d4783", + "metadata": {}, + "outputs": [], + "source": [ + "from sqlalchemy import create_engine\n", + "import pandas as pd\n", + "\n", + "# Example connection string (adjust as needed)\n", + "# MySQL Sakila database running locally\n", + "engine = create_engine('mysql+pymysql://root:#Safeguard100@localhost/sakila')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2ab775b4", + "metadata": {}, + "outputs": [], + "source": [ + "def rentals_month(engine, month, year):\n", + " \"\"\"\n", + " Retrieves rental data for a specific month and year from the Sakila database.\n", + " \n", + " Parameters:\n", + " engine : SQLAlchemy engine object\n", + " month : int, month number (e.g., 5 for May)\n", + " year : int, year number (e.g., 2005)\n", + " \n", + " Returns:\n", + " pandas DataFrame with rental data for that month and year\n", + " \"\"\"\n", + " query = f\"\"\"\n", + " SELECT \n", + " rental_id,\n", + " rental_date,\n", + " customer_id\n", + " FROM rental\n", + " WHERE MONTH(rental_date) = {month}\n", + " AND YEAR(rental_date) = {year};\n", + " \"\"\"\n", + " \n", + " df = pd.read_sql(query, engine)\n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8a9e7496", + "metadata": {}, + "outputs": [], + "source": [ + "def rental_count_month(df, month, year):\n", + " \"\"\"\n", + " Counts number of rentals per customer for a given month/year.\n", + " \n", + " Parameters:\n", + " df : pandas DataFrame from rentals_month()\n", + " month : int\n", + " year : int\n", + " \n", + " Returns:\n", + " pandas DataFrame with columns [customer_id, rentals_MM_YYYY]\n", + " \"\"\"\n", + " col_name = f\"rentals_{month:02d}_{year}\"\n", + " \n", + " rental_counts = (\n", + " df.groupby('customer_id')\n", + " .size()\n", + " .reset_index(name=col_name)\n", + " )\n", + " \n", + " return rental_counts\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "99229755", + "metadata": {}, + "outputs": [], + "source": [ + "def compare_rentals(df1, df2):\n", + " \"\"\"\n", + " Compares rental activity between two months.\n", + " \n", + " Parameters:\n", + " df1, df2 : DataFrames from rental_count_month()\n", + " \n", + " Returns:\n", + " Combined DataFrame with rental counts for both months and a 'difference' column.\n", + " \"\"\"\n", + " merged = pd.merge(df1, df2, on='customer_id', how='inner')\n", + " \n", + " # Identify column names dynamically (the two rental count columns)\n", + " cols = [c for c in merged.columns if c.startswith('rentals_')]\n", + " \n", + " merged['difference'] = merged[cols[1]] - merged[cols[0]]\n", + " \n", + " return merged\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e1db19d4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " customer_id rentals_05_2005 rentals_06_2005 difference\n", + "0 1 2 7 5\n", + "1 2 1 1 0\n", + "2 3 2 4 2\n", + "3 5 3 5 2\n", + "4 6 3 4 1\n" + ] + } + ], + "source": [ + "# Get data for May and June 2005\n", + "may_df = rentals_month(engine, 5, 2005)\n", + "june_df = rentals_month(engine, 6, 2005)\n", + "\n", + "# Count rentals per customer\n", + "may_counts = rental_count_month(may_df, 5, 2005)\n", + "june_counts = rental_count_month(june_df, 6, 2005)\n", + "\n", + "# Compare customer activity\n", + "comparison = compare_rentals(may_counts, june_counts)\n", + "\n", + "print(comparison.head())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c1caa42", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}