diff --git a/lab_connecting_python_to_sql.ipynb b/lab_connecting_python_to_sql.ipynb
new file mode 100644
index 0000000..2987d46
--- /dev/null
+++ b/lab_connecting_python_to_sql.ipynb
@@ -0,0 +1,1266 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "275bb141",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: sqlalchemy in c:\\users\\cpall\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (2.0.44)\n",
+ "Requirement already satisfied: greenlet>=1 in c:\\users\\cpall\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from sqlalchemy) (3.3.0)\n",
+ "Requirement already satisfied: typing-extensions>=4.6.0 in c:\\users\\cpall\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from sqlalchemy) (4.15.0)\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "[notice] A new release of pip is available: 25.2 -> 25.3\n",
+ "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install sqlalchemy"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "e5c29f30",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: pymysql in c:\\users\\cpall\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (1.1.2)\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "[notice] A new release of pip is available: 25.2 -> 25.3\n",
+ "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install pymysql"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "b1e3e2bf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import pymysql\n",
+ "from sqlalchemy import create_engine\n",
+ "import getpass # To get the password without showing the input\n",
+ "password = getpass.getpass()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "d2808e0f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "('sakila',)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Establish a connection between Python and the Sakila database.\n",
+ "import getpass\n",
+ "from sqlalchemy import create_engine, text # <- import text\n",
+ "\n",
+ "password = getpass.getpass(\"Enter MySQL root password: \")\n",
+ "\n",
+ "bd = \"Sakila\"\n",
+ "connection_string = f\"mysql+pymysql://root:{password}@localhost/{bd}\"\n",
+ "\n",
+ "engine = create_engine(connection_string)\n",
+ "\n",
+ "# Test connection\n",
+ "with engine.connect() as connection:\n",
+ " result = connection.execute(text(\"SELECT DATABASE();\")) # <- wrap SQL in text()\n",
+ " print(result.fetchone())\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "6cd5b106",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 2. Write a Python function called `rentals_month` that retrieves rental data for a given month and year (passed as parameters) from the Sakila database as a Pandas DataFrame. The function should take in three parameters:\n",
+ "#\t- `engine`: an object representing the database connection engine to be used to establish a connection to the Sakila database.\n",
+ "#\t- `month`: an integer representing the month for which rental data is to be retrieved.\n",
+ "#\t- `year`: an integer representing the year for which rental data is to be retrieved.\n",
+ "#\tThe function should execute a SQL query to retrieve the rental data for the specified month and year from the rental table in the Sakila database,\n",
+ "# and return it as a pandas DataFrame.\n",
+ " \n",
+ "def rental_month(engine, month, year):\n",
+ " \"\"\"\n",
+ " Retrieves rental data for a given month and year from the Sakila database.\n",
+ "\n",
+ " Parameters:\n",
+ " engine : SQLAlchemy engine\n",
+ " Database connection engine to the Sakila database.\n",
+ " month : int\n",
+ " Month (1-12) for which to retrieve rental data.\n",
+ " year : int\n",
+ " Year (e.g., 2023) for which to retrieve rental data.\n",
+ "\n",
+ " Returns:\n",
+ " pd.DataFrame\n",
+ " DataFrame containing rental data for the specified month and year.\n",
+ " \"\"\"\n",
+ " # SQL query to get rentals for the given month and year # Use text() to safely pass SQL with parameters\n",
+ " query = text(\"\"\" \n",
+ " SELECT *\n",
+ " FROM rental\n",
+ " WHERE MONTH(rental_date) = :month\n",
+ " AND YEAR(rental_date) = :year\n",
+ " \"\"\")\n",
+ " # Execute query and return result as DataFrame\n",
+ " with engine.connect() as connection:\n",
+ " df = pd.read_sql(query, connection, params={\"month\": month, \"year\": year}) # :month and :year are placeholders to avoid SQL injection\n",
+ " \n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "896df784",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 3. Develop a Python function called `rental_count_month` that takes the DataFrame provided by `rentals_month` as input\n",
+ "# along with the month and year and returns a new DataFrame containing the number of rentals made by each customer_id\n",
+ "# during the selected month and year. \n",
+ "#\tThe function should also include the month and year as parameters and use them to name the new column according to the month and year,\n",
+ "# for example, if the input month is 05 and the year is 2005, the column name should be \"rentals_05_2005\".\n",
+ "#\t*Hint: Consider making use of pandas [groupby()]\n",
+ "\n",
+ "import pandas as pd\n",
+ "\n",
+ "def rental_count_month(df, month, year):\n",
+ " \"\"\"\n",
+ " Takes the DataFrame from rentals_month and returns a new DataFrame \n",
+ " containing the number of rentals made by each customer_id during the selected month and year.\n",
+ " \n",
+ " Parameters:\n",
+ " df : pd.DataFrame\n",
+ " DataFrame from rentals_month function.\n",
+ " month : int\n",
+ " Month for which rental counts are calculated.\n",
+ " year : int\n",
+ " Year for which rental counts are calculated.\n",
+ " \n",
+ " Returns:\n",
+ " pd.DataFrame\n",
+ " DataFrame with 'customer_id' and rental count column named according to month and year.\n",
+ " \"\"\"\n",
+ " # Group by customer_id and count rentals\n",
+ " rental_counts = df.groupby('customer_id').size().reset_index(name=f\"rentals_{month:02d}_{year}\")\n",
+ " \n",
+ " return rental_counts\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "e20fd15a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 4. Create a Python function called compare_rentals that takes two DataFrames as input\n",
+ "# containing the number of rentals made by each customer in different months and years.\n",
+ "# The function should return a combined DataFrame with a new 'difference' column, \n",
+ "# which is the difference between the number of rentals in the two months.\n",
+ "import pandas as pd\n",
+ "\n",
+ "def compare_rentals(df1, df2):\n",
+ " \"\"\"\n",
+ " Compare rental counts between two DataFrames for different months/years.\n",
+ "\n",
+ " Parameters:\n",
+ " df1 : pd.DataFrame\n",
+ " DataFrame containing 'customer_id' and rental counts for the first month/year.\n",
+ " df2 : pd.DataFrame\n",
+ " DataFrame containing 'customer_id' and rental counts for the second month/year.\n",
+ "\n",
+ " Returns:\n",
+ " pd.DataFrame\n",
+ " Combined DataFrame with rental counts from both months and a 'difference' column\n",
+ " (df2 count - df1 count).\n",
+ " \"\"\"\n",
+ " # Merge the two DataFrames on customer_id (outer join to include all customers)\n",
+ " # Fill NaN values with 0 (customers who didn't rent in one of the months)\n",
+ " combined_df = pd.merge(df1, df2, on='customer_id', how='outer').fillna(0)\n",
+ "\n",
+ " # Identify the rental count columns (assumes only one column besides customer_id)\n",
+ " col1 = [c for c in df1.columns if c != 'customer_id'][0]\n",
+ " col2 = [c for c in df2.columns if c != 'customer_id'][0]\n",
+ "\n",
+ " # Create the difference column\n",
+ " combined_df['difference'] = combined_df[col2] - combined_df[col1]\n",
+ "\n",
+ " return combined_df\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "796fed32",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total rentals in May 2005: 1156\n",
+ "\n",
+ "First 5 rows of May rental data:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " rental_id | \n",
+ " rental_date | \n",
+ " inventory_id | \n",
+ " customer_id | \n",
+ " return_date | \n",
+ " staff_id | \n",
+ " last_update | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 2005-05-24 22:53:30 | \n",
+ " 367 | \n",
+ " 130 | \n",
+ " 2005-05-26 22:04:30 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 2005-05-24 22:54:33 | \n",
+ " 1525 | \n",
+ " 459 | \n",
+ " 2005-05-28 19:40:33 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 2005-05-24 23:03:39 | \n",
+ " 1711 | \n",
+ " 408 | \n",
+ " 2005-06-01 22:12:39 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 2005-05-24 23:04:41 | \n",
+ " 2452 | \n",
+ " 333 | \n",
+ " 2005-06-03 01:43:41 | \n",
+ " 2 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 2005-05-24 23:05:21 | \n",
+ " 2079 | \n",
+ " 222 | \n",
+ " 2005-06-02 04:33:21 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " rental_id rental_date inventory_id customer_id \\\n",
+ "0 1 2005-05-24 22:53:30 367 130 \n",
+ "1 2 2005-05-24 22:54:33 1525 459 \n",
+ "2 3 2005-05-24 23:03:39 1711 408 \n",
+ "3 4 2005-05-24 23:04:41 2452 333 \n",
+ "4 5 2005-05-24 23:05:21 2079 222 \n",
+ "\n",
+ " return_date staff_id last_update \n",
+ "0 2005-05-26 22:04:30 1 2006-02-15 21:30:53 \n",
+ "1 2005-05-28 19:40:33 1 2006-02-15 21:30:53 \n",
+ "2 2005-06-01 22:12:39 1 2006-02-15 21:30:53 \n",
+ "3 2005-06-03 01:43:41 2 2006-02-15 21:30:53 \n",
+ "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Analysis: Get rental data for May 2005\n",
+ "may_rentals = rental_month(engine, 5, 2005)\n",
+ "print(f\"Total rentals in May 2005: {len(may_rentals)}\")\n",
+ "print(\"\\nFirst 5 rows of May rental data:\")\n",
+ "display(may_rentals.head())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "efaefc57",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total rentals in June 2005: 2311\n",
+ "\n",
+ "First 5 rows of June rental data:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " rental_id | \n",
+ " rental_date | \n",
+ " inventory_id | \n",
+ " customer_id | \n",
+ " return_date | \n",
+ " staff_id | \n",
+ " last_update | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1158 | \n",
+ " 2005-06-14 22:53:33 | \n",
+ " 1632 | \n",
+ " 416 | \n",
+ " 2005-06-18 21:37:33 | \n",
+ " 2 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1159 | \n",
+ " 2005-06-14 22:55:13 | \n",
+ " 4395 | \n",
+ " 516 | \n",
+ " 2005-06-17 02:11:13 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1160 | \n",
+ " 2005-06-14 23:00:34 | \n",
+ " 2795 | \n",
+ " 239 | \n",
+ " 2005-06-18 01:58:34 | \n",
+ " 2 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1161 | \n",
+ " 2005-06-14 23:07:08 | \n",
+ " 1690 | \n",
+ " 285 | \n",
+ " 2005-06-21 17:12:08 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1162 | \n",
+ " 2005-06-14 23:09:38 | \n",
+ " 987 | \n",
+ " 310 | \n",
+ " 2005-06-23 22:00:38 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " rental_id rental_date inventory_id customer_id \\\n",
+ "0 1158 2005-06-14 22:53:33 1632 416 \n",
+ "1 1159 2005-06-14 22:55:13 4395 516 \n",
+ "2 1160 2005-06-14 23:00:34 2795 239 \n",
+ "3 1161 2005-06-14 23:07:08 1690 285 \n",
+ "4 1162 2005-06-14 23:09:38 987 310 \n",
+ "\n",
+ " return_date staff_id last_update \n",
+ "0 2005-06-18 21:37:33 2 2006-02-15 21:30:53 \n",
+ "1 2005-06-17 02:11:13 1 2006-02-15 21:30:53 \n",
+ "2 2005-06-18 01:58:34 2 2006-02-15 21:30:53 \n",
+ "3 2005-06-21 17:12:08 1 2006-02-15 21:30:53 \n",
+ "4 2005-06-23 22:00:38 1 2006-02-15 21:30:53 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Analysis: Get rental data for June 2005\n",
+ "june_rentals = rental_month(engine, 6, 2005)\n",
+ "print(f\"Total rentals in June 2005: {len(june_rentals)}\")\n",
+ "print(\"\\nFirst 5 rows of June rental data:\")\n",
+ "display(june_rentals.head())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "caadbd81",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of customers who rented in May 2005: 520\n",
+ "Number of customers who rented in June 2005: 590\n",
+ "\n",
+ "Top 5 customers in May:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " rentals_05_2005 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 168 | \n",
+ " 197 | \n",
+ " 8 | \n",
+ "
\n",
+ " \n",
+ " | 92 | \n",
+ " 109 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 441 | \n",
+ " 506 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 19 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " | 43 | \n",
+ " 53 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id rentals_05_2005\n",
+ "168 197 8\n",
+ "92 109 7\n",
+ "441 506 7\n",
+ "15 19 6\n",
+ "43 53 6"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Top 5 customers in June:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " rentals_06_2005 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 30 | \n",
+ " 31 | \n",
+ " 11 | \n",
+ "
\n",
+ " \n",
+ " | 445 | \n",
+ " 454 | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " | 209 | \n",
+ " 213 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ " | 263 | \n",
+ " 267 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ " | 291 | \n",
+ " 295 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id rentals_06_2005\n",
+ "30 31 11\n",
+ "445 454 10\n",
+ "209 213 9\n",
+ "263 267 9\n",
+ "291 295 9"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Get rental counts per customer for each month\n",
+ "may_counts = rental_count_month(may_rentals, 5, 2005)\n",
+ "june_counts = rental_count_month(june_rentals, 6, 2005)\n",
+ "\n",
+ "print(f\"Number of customers who rented in May 2005: {len(may_counts)}\")\n",
+ "print(f\"Number of customers who rented in June 2005: {len(june_counts)}\")\n",
+ "\n",
+ "print(\"\\nTop 5 customers in May:\")\n",
+ "display(may_counts.nlargest(5, 'rentals_05_2005'))\n",
+ "\n",
+ "print(\"\\nTop 5 customers in June:\")\n",
+ "display(june_counts.nlargest(5, 'rentals_06_2005'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "b911a76e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total customers active in either month: 598\n",
+ "Customers active in BOTH May and June: 512\n",
+ "\n",
+ "Comparison DataFrame (first 10 rows):\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " rentals_05_2005 | \n",
+ " rentals_06_2005 | \n",
+ " difference | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 2.0 | \n",
+ " 7.0 | \n",
+ " 5.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 2.0 | \n",
+ " 4.0 | \n",
+ " 2.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 0.0 | \n",
+ " 6.0 | \n",
+ " 6.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 3.0 | \n",
+ " 5.0 | \n",
+ " 2.0 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 6 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 7 | \n",
+ " 5.0 | \n",
+ " 5.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 8 | \n",
+ " 1.0 | \n",
+ " 3.0 | \n",
+ " 2.0 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 9 | \n",
+ " 3.0 | \n",
+ " 2.0 | \n",
+ " -1.0 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 10 | \n",
+ " 1.0 | \n",
+ " 5.0 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id rentals_05_2005 rentals_06_2005 difference\n",
+ "0 1 2.0 7.0 5.0\n",
+ "1 2 1.0 1.0 0.0\n",
+ "2 3 2.0 4.0 2.0\n",
+ "3 4 0.0 6.0 6.0\n",
+ "4 5 3.0 5.0 2.0\n",
+ "5 6 3.0 4.0 1.0\n",
+ "6 7 5.0 5.0 0.0\n",
+ "7 8 1.0 3.0 2.0\n",
+ "8 9 3.0 2.0 -1.0\n",
+ "9 10 1.0 5.0 4.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Compare the two months\n",
+ "comparison = compare_rentals(may_counts, june_counts)\n",
+ "print(f\"Total customers active in either month: {len(comparison)}\")\n",
+ "\n",
+ "# Show customers who were active in both months\n",
+ "active_both_months = comparison[(comparison['rentals_05_2005'] > 0) & (comparison['rentals_06_2005'] > 0)]\n",
+ "print(f\"Customers active in BOTH May and June: {len(active_both_months)}\")\n",
+ "\n",
+ "print(\"\\nComparison DataFrame (first 10 rows):\")\n",
+ "display(comparison.head(10))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "b0e52bc2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Top 10 customers by total activity:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " rentals_05_2005 | \n",
+ " rentals_06_2005 | \n",
+ " difference | \n",
+ " total_rentals | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 195 | \n",
+ " 197 | \n",
+ " 8.0 | \n",
+ " 8.0 | \n",
+ " 0.0 | \n",
+ " 16.0 | \n",
+ "
\n",
+ " \n",
+ " | 175 | \n",
+ " 176 | \n",
+ " 5.0 | \n",
+ " 8.0 | \n",
+ " 3.0 | \n",
+ " 13.0 | \n",
+ "
\n",
+ " \n",
+ " | 369 | \n",
+ " 371 | \n",
+ " 6.0 | \n",
+ " 7.0 | \n",
+ " 1.0 | \n",
+ " 13.0 | \n",
+ "
\n",
+ " \n",
+ " | 108 | \n",
+ " 109 | \n",
+ " 7.0 | \n",
+ " 5.0 | \n",
+ " -2.0 | \n",
+ " 12.0 | \n",
+ "
\n",
+ " \n",
+ " | 194 | \n",
+ " 196 | \n",
+ " 4.0 | \n",
+ " 8.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ "
\n",
+ " \n",
+ " | 254 | \n",
+ " 256 | \n",
+ " 5.0 | \n",
+ " 7.0 | \n",
+ " 2.0 | \n",
+ " 12.0 | \n",
+ "
\n",
+ " \n",
+ " | 265 | \n",
+ " 267 | \n",
+ " 3.0 | \n",
+ " 9.0 | \n",
+ " 6.0 | \n",
+ " 12.0 | \n",
+ "
\n",
+ " \n",
+ " | 504 | \n",
+ " 506 | \n",
+ " 7.0 | \n",
+ " 5.0 | \n",
+ " -2.0 | \n",
+ " 12.0 | \n",
+ "
\n",
+ " \n",
+ " | 524 | \n",
+ " 526 | \n",
+ " 3.0 | \n",
+ " 9.0 | \n",
+ " 6.0 | \n",
+ " 12.0 | \n",
+ "
\n",
+ " \n",
+ " | 30 | \n",
+ " 31 | \n",
+ " 0.0 | \n",
+ " 11.0 | \n",
+ " 11.0 | \n",
+ " 11.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id rentals_05_2005 rentals_06_2005 difference total_rentals\n",
+ "195 197 8.0 8.0 0.0 16.0\n",
+ "175 176 5.0 8.0 3.0 13.0\n",
+ "369 371 6.0 7.0 1.0 13.0\n",
+ "108 109 7.0 5.0 -2.0 12.0\n",
+ "194 196 4.0 8.0 4.0 12.0\n",
+ "254 256 5.0 7.0 2.0 12.0\n",
+ "265 267 3.0 9.0 6.0 12.0\n",
+ "504 506 7.0 5.0 -2.0 12.0\n",
+ "524 526 3.0 9.0 6.0 12.0\n",
+ "30 31 0.0 11.0 11.0 11.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "\n",
+ "# Statistical analysis\n",
+ "comparison['total_rentals'] = comparison['rentals_05_2005'] + comparison['rentals_06_2005']\n",
+ "\n",
+ "print(\"Top 10 customers by total activity:\")\n",
+ "top_customers = comparison.nlargest(10, 'total_rentals')[['customer_id', 'rentals_05_2005', 'rentals_06_2005', 'difference', 'total_rentals']]\n",
+ "display(top_customers)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "b386f8e4",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Top 10 customers with biggest increase from May to June:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " rentals_05_2005 | \n",
+ " rentals_06_2005 | \n",
+ " difference | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 30 | \n",
+ " 31 | \n",
+ " 0.0 | \n",
+ " 11.0 | \n",
+ " 11.0 | \n",
+ "
\n",
+ " \n",
+ " | 327 | \n",
+ " 329 | \n",
+ " 0.0 | \n",
+ " 9.0 | \n",
+ " 9.0 | \n",
+ "
\n",
+ " \n",
+ " | 452 | \n",
+ " 454 | \n",
+ " 1.0 | \n",
+ " 10.0 | \n",
+ " 9.0 | \n",
+ "
\n",
+ " \n",
+ " | 177 | \n",
+ " 178 | \n",
+ " 0.0 | \n",
+ " 8.0 | \n",
+ " 8.0 | \n",
+ "
\n",
+ " \n",
+ " | 211 | \n",
+ " 213 | \n",
+ " 1.0 | \n",
+ " 9.0 | \n",
+ " 8.0 | \n",
+ "
\n",
+ " \n",
+ " | 266 | \n",
+ " 268 | \n",
+ " 0.0 | \n",
+ " 8.0 | \n",
+ " 8.0 | \n",
+ "
\n",
+ " \n",
+ " | 293 | \n",
+ " 295 | \n",
+ " 1.0 | \n",
+ " 9.0 | \n",
+ " 8.0 | \n",
+ "
\n",
+ " \n",
+ " | 334 | \n",
+ " 336 | \n",
+ " 0.0 | \n",
+ " 8.0 | \n",
+ " 8.0 | \n",
+ "
\n",
+ " \n",
+ " | 338 | \n",
+ " 340 | \n",
+ " 0.0 | \n",
+ " 8.0 | \n",
+ " 8.0 | \n",
+ "
\n",
+ " \n",
+ " | 455 | \n",
+ " 457 | \n",
+ " 1.0 | \n",
+ " 9.0 | \n",
+ " 8.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id rentals_05_2005 rentals_06_2005 difference\n",
+ "30 31 0.0 11.0 11.0\n",
+ "327 329 0.0 9.0 9.0\n",
+ "452 454 1.0 10.0 9.0\n",
+ "177 178 0.0 8.0 8.0\n",
+ "211 213 1.0 9.0 8.0\n",
+ "266 268 0.0 8.0 8.0\n",
+ "293 295 1.0 9.0 8.0\n",
+ "334 336 0.0 8.0 8.0\n",
+ "338 340 0.0 8.0 8.0\n",
+ "455 457 1.0 9.0 8.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Customers with biggest increase from May to June\n",
+ "print(\"Top 10 customers with biggest increase from May to June:\")\n",
+ "biggest_increase = comparison.nlargest(10, 'difference')[['customer_id', 'rentals_05_2005', 'rentals_06_2005', 'difference']]\n",
+ "display(biggest_increase)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "a64fe03e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Top 10 customers with biggest decrease from May to June:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " rentals_05_2005 | \n",
+ " rentals_06_2005 | \n",
+ " difference | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 205 | \n",
+ " 207 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " -5.0 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 14 | \n",
+ " 5.0 | \n",
+ " 1.0 | \n",
+ " -4.0 | \n",
+ "
\n",
+ " \n",
+ " | 160 | \n",
+ " 161 | \n",
+ " 6.0 | \n",
+ " 2.0 | \n",
+ " -4.0 | \n",
+ "
\n",
+ " \n",
+ " | 196 | \n",
+ " 198 | \n",
+ " 5.0 | \n",
+ " 1.0 | \n",
+ " -4.0 | \n",
+ "
\n",
+ " \n",
+ " | 248 | \n",
+ " 250 | \n",
+ " 5.0 | \n",
+ " 1.0 | \n",
+ " -4.0 | \n",
+ "
\n",
+ " \n",
+ " | 272 | \n",
+ " 274 | \n",
+ " 6.0 | \n",
+ " 2.0 | \n",
+ " -4.0 | \n",
+ "
\n",
+ " \n",
+ " | 594 | \n",
+ " 596 | \n",
+ " 6.0 | \n",
+ " 2.0 | \n",
+ " -4.0 | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 19 | \n",
+ " 6.0 | \n",
+ " 3.0 | \n",
+ " -3.0 | \n",
+ "
\n",
+ " \n",
+ " | 123 | \n",
+ " 124 | \n",
+ " 4.0 | \n",
+ " 1.0 | \n",
+ " -3.0 | \n",
+ "
\n",
+ " \n",
+ " | 220 | \n",
+ " 222 | \n",
+ " 5.0 | \n",
+ " 2.0 | \n",
+ " -3.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id rentals_05_2005 rentals_06_2005 difference\n",
+ "205 207 6.0 1.0 -5.0\n",
+ "13 14 5.0 1.0 -4.0\n",
+ "160 161 6.0 2.0 -4.0\n",
+ "196 198 5.0 1.0 -4.0\n",
+ "248 250 5.0 1.0 -4.0\n",
+ "272 274 6.0 2.0 -4.0\n",
+ "594 596 6.0 2.0 -4.0\n",
+ "18 19 6.0 3.0 -3.0\n",
+ "123 124 4.0 1.0 -3.0\n",
+ "220 222 5.0 2.0 -3.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Customers with biggest decrease from May to June\n",
+ "print(\"Top 10 customers with biggest decrease from May to June:\")\n",
+ "biggest_decrease = comparison.nsmallest(10, 'difference')[['customer_id', 'rentals_05_2005', 'rentals_06_2005', 'difference']]\n",
+ "display(biggest_decrease)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}