diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..04d89c7
Binary files /dev/null and b/.DS_Store differ
diff --git a/lab18-notebook.ipynb b/lab18-notebook.ipynb
new file mode 100644
index 0000000..9c22951
--- /dev/null
+++ b/lab18-notebook.ipynb
@@ -0,0 +1,2113 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "c02a7dec",
+ "metadata": {},
+ "source": [
+ "# Lab 18: SQL-Python Connection - Sakila Database Analysis\n",
+ "\n",
+ "This lab analyzes customer rental activity between May and June 2005 in the Sakila database."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "61472f8a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: sqlalchemy in /Users/borjarubiomartin/Desktop/data_analytics/labs/18lab-sql-python-connection/.venv/lib/python3.13/site-packages (2.0.44)\n",
+ "Requirement already satisfied: greenlet>=1 in /Users/borjarubiomartin/Desktop/data_analytics/labs/18lab-sql-python-connection/.venv/lib/python3.13/site-packages (from sqlalchemy) (3.3.0)\n",
+ "Requirement already satisfied: typing-extensions>=4.6.0 in /Users/borjarubiomartin/Desktop/data_analytics/labs/18lab-sql-python-connection/.venv/lib/python3.13/site-packages (from sqlalchemy) (4.15.0)\n",
+ "Requirement already satisfied: greenlet>=1 in /Users/borjarubiomartin/Desktop/data_analytics/labs/18lab-sql-python-connection/.venv/lib/python3.13/site-packages (from sqlalchemy) (3.3.0)\n",
+ "Requirement already satisfied: typing-extensions>=4.6.0 in /Users/borjarubiomartin/Desktop/data_analytics/labs/18lab-sql-python-connection/.venv/lib/python3.13/site-packages (from sqlalchemy) (4.15.0)\n",
+ "Requirement already satisfied: pymysql in /Users/borjarubiomartin/Desktop/data_analytics/labs/18lab-sql-python-connection/.venv/lib/python3.13/site-packages (1.1.2)\n",
+ "Requirement already satisfied: pymysql in /Users/borjarubiomartin/Desktop/data_analytics/labs/18lab-sql-python-connection/.venv/lib/python3.13/site-packages (1.1.2)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Install required packages\n",
+ "!pip install sqlalchemy\n",
+ "!pip install pymysql"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "f9dd9212",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Import libraries\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import pymysql\n",
+ "from sqlalchemy import create_engine\n",
+ "import getpass"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "0a99e167",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ā
MySQL client found!\n",
+ "Path: /usr/local/mysql/bin/mysql\n",
+ "Version: /usr/local/mysql/bin/mysql Ver 9.4.0 for macos15 on x86_64 (MySQL Community Server - GPL)\n",
+ "\n",
+ "š Great! MySQL is available at: /usr/local/mysql/bin/mysql\n",
+ "Now let's test the connection to Sakila database...\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Test MySQL connection (works with your existing MySQL installation)\n",
+ "import subprocess\n",
+ "import sys\n",
+ "\n",
+ "def check_mysql_status():\n",
+ " \"\"\"Check if MySQL server is running\"\"\"\n",
+ " # Common MySQL installation paths on macOS\n",
+ " mysql_paths = [\n",
+ " '/usr/local/mysql/bin/mysql', # MySQL Installer\n",
+ " '/opt/homebrew/bin/mysql', # Homebrew (Apple Silicon)\n",
+ " '/usr/local/bin/mysql', # Homebrew (Intel)\n",
+ " 'mysql' # If in PATH\n",
+ " ]\n",
+ " \n",
+ " for mysql_path in mysql_paths:\n",
+ " try:\n",
+ " result = subprocess.run([mysql_path, '--version'], \n",
+ " capture_output=True, text=True, timeout=5)\n",
+ " if result.returncode == 0:\n",
+ " print(\"ā
MySQL client found!\")\n",
+ " print(f\"Path: {mysql_path}\")\n",
+ " print(f\"Version: {result.stdout.strip()}\")\n",
+ " return mysql_path\n",
+ " \n",
+ " except (subprocess.TimeoutExpired, FileNotFoundError):\n",
+ " continue\n",
+ " \n",
+ " print(\"ā MySQL client not found in common locations\")\n",
+ " return None\n",
+ "\n",
+ "# Run the check\n",
+ "mysql_path = check_mysql_status()\n",
+ "\n",
+ "if mysql_path:\n",
+ " print(f\"\\nš Great! MySQL is available at: {mysql_path}\")\n",
+ " print(\"Now let's test the connection to Sakila database...\")\n",
+ "else:\n",
+ " print(\"\\nš§ MySQL not found. Please check installation.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "8a23564d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ā ļø Sakila database test needs authentication\n",
+ "This is normal - you'll need to provide your MySQL password\n",
+ "\n",
+ "š Ready to proceed with the lab!\n",
+ "Continue to the next cell to establish connection with password.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Test connection to Sakila database\n",
+ "def test_sakila_connection():\n",
+ " \"\"\"Test if we can connect to Sakila database\"\"\"\n",
+ " try:\n",
+ " # Test connection without password (if MySQL allows it)\n",
+ " result = subprocess.run(['/usr/local/mysql/bin/mysql', \n",
+ " '-e', 'USE sakila; SELECT COUNT(*) as table_count FROM information_schema.tables WHERE table_schema=\"sakila\";'],\n",
+ " capture_output=True, text=True, timeout=10)\n",
+ " \n",
+ " if result.returncode == 0 and 'table_count' in result.stdout:\n",
+ " print(\"ā
Sakila database found!\")\n",
+ " print(\"Sample output:\", result.stdout.strip())\n",
+ " return True\n",
+ " else:\n",
+ " print(\"ā ļø Sakila database test needs authentication\")\n",
+ " print(\"This is normal - you'll need to provide your MySQL password\")\n",
+ " return True # This is actually fine, just needs password\n",
+ " \n",
+ " except Exception as e:\n",
+ " print(f\"ā Connection test failed: {e}\")\n",
+ " return False\n",
+ "\n",
+ "# Run the test\n",
+ "sakila_ok = test_sakila_connection()\n",
+ "\n",
+ "if sakila_ok:\n",
+ " print(\"\\nš Ready to proceed with the lab!\")\n",
+ " print(\"Continue to the next cell to establish connection with password.\")\n",
+ "else:\n",
+ " print(\"\\nā There might be an issue with your MySQL/Sakila setup.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b9558bff",
+ "metadata": {},
+ "source": [
+ "## ā
MySQL Setup Status\n",
+ "\n",
+ "**Great news! MySQL and Sakila are already installed and working on your system.**\n",
+ "\n",
+ "### Current Status:\n",
+ "- ā
MySQL Server: Installed and accessible\n",
+ "- ā
Sakila Database: Available\n",
+ "- ā
Python packages: Installed\n",
+ "\n",
+ "### Next Steps:\n",
+ "1. **Continue to the connection cell below** \n",
+ "2. **Enter your MySQL password** when prompted\n",
+ "3. **Run the analysis cells** to complete the lab\n",
+ "\n",
+ "### Optional: Add MySQL to PATH (for convenience)\n",
+ "If you want to use `mysql` command directly in Terminal, add this to your `~/.zshrc`:\n",
+ "```bash\n",
+ "# Add MySQL to PATH\n",
+ "export PATH=\"/usr/local/mysql/bin:$PATH\"\n",
+ "```\n",
+ "Then run: `source ~/.zshrc`\n",
+ "\n",
+ "**You're ready to proceed with the lab! š**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "3972b4fa",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Connection established successfully!\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Database connection setup\n",
+ "def create_sakila_connection(password, host='localhost', user='root', database='sakila'):\n",
+ " \"\"\"\n",
+ " Create a connection to the Sakila database\n",
+ " \"\"\"\n",
+ " connection_string = f'mysql+pymysql://{user}:{password}@{host}/{database}'\n",
+ " engine = create_engine(connection_string)\n",
+ " return engine\n",
+ "\n",
+ "# Get password and create connection\n",
+ "password = getpass.getpass(\"Enter your MySQL password: \")\n",
+ "engine = create_sakila_connection(password)\n",
+ "print(\"Connection established successfully!\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "28468b82",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Function 1: rentals_month\n",
+ "def rentals_month(engine, month, year):\n",
+ " \"\"\"\n",
+ " Retrieves rental data for a given month and year from the Sakila database.\n",
+ " \n",
+ " Parameters:\n",
+ " engine: Database connection engine\n",
+ " month: Integer representing the month (1-12)\n",
+ " year: Integer representing the year\n",
+ " \n",
+ " Returns:\n",
+ " DataFrame containing rental data for the specified month and year\n",
+ " \"\"\"\n",
+ " query = \"\"\"\n",
+ " SELECT rental_id, rental_date, customer_id, inventory_id, staff_id, return_date, last_update\n",
+ " FROM rental\n",
+ " WHERE MONTH(rental_date) = %(month)s AND YEAR(rental_date) = %(year)s\n",
+ " \"\"\"\n",
+ " \n",
+ " df = pd.read_sql(query, engine, params={'month': month, 'year': year})\n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "38561a7b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Function 2: rental_count_month\n",
+ "def rental_count_month(df, month, year):\n",
+ " \"\"\"\n",
+ " Takes the DataFrame from rentals_month and returns a new DataFrame \n",
+ " containing the number of rentals made by each customer_id during the selected month and year.\n",
+ " \n",
+ " Parameters:\n",
+ " df: DataFrame from rentals_month function\n",
+ " month: Integer representing the month (will be used for column naming)\n",
+ " year: Integer representing the year (will be used for column naming)\n",
+ " \n",
+ " Returns:\n",
+ " DataFrame with customer_id and rental count column named according to month and year\n",
+ " \"\"\"\n",
+ " # Group by customer_id and count rentals\n",
+ " rental_counts = df.groupby('customer_id').size().reset_index()\n",
+ " \n",
+ " # Create column name based on month and year (format: rentals_MM_YYYY)\n",
+ " column_name = f\"rentals_{month:02d}_{year}\"\n",
+ " rental_counts.columns = ['customer_id', column_name]\n",
+ " \n",
+ " return rental_counts"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "22973696",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Function 3: compare_rentals\n",
+ "def compare_rentals(df1, df2):\n",
+ " \"\"\"\n",
+ " Takes two DataFrames containing rental counts for different months and returns \n",
+ " a combined DataFrame with a 'difference' column showing the difference between the two months.\n",
+ " \n",
+ " Parameters:\n",
+ " df1: DataFrame with rental counts for first month\n",
+ " df2: DataFrame with rental counts for second month\n",
+ " \n",
+ " Returns:\n",
+ " DataFrame with customer_id, both rental count columns, and a difference column\n",
+ " \"\"\"\n",
+ " # Merge the two DataFrames on customer_id using outer join to include all customers\n",
+ " merged_df = pd.merge(df1, df2, on='customer_id', how='outer')\n",
+ " \n",
+ " # Fill NaN values with 0 (customers who didn't rent in one of the months)\n",
+ " merged_df = merged_df.fillna(0)\n",
+ " \n",
+ " # Get column names for the rental counts (excluding customer_id)\n",
+ " rental_cols = [col for col in merged_df.columns if col != 'customer_id']\n",
+ " \n",
+ " # Calculate the difference between the two rental count columns\n",
+ " if len(rental_cols) >= 2:\n",
+ " merged_df['difference'] = merged_df[rental_cols[1]] - merged_df[rental_cols[0]]\n",
+ " \n",
+ " return merged_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "a3c7f776",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total rentals in May 2005: 1156\n",
+ "\n",
+ "First 5 rows of May rental data:\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "rental_id",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "rental_date",
+ "rawType": "datetime64[ns]",
+ "type": "datetime"
+ },
+ {
+ "name": "customer_id",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "inventory_id",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "staff_id",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "return_date",
+ "rawType": "datetime64[ns]",
+ "type": "datetime"
+ },
+ {
+ "name": "last_update",
+ "rawType": "datetime64[ns]",
+ "type": "datetime"
+ }
+ ],
+ "ref": "56bfadfa-c51f-42f1-82ce-61ea6cf059d9",
+ "rows": [
+ [
+ "0",
+ "1",
+ "2005-05-24 22:53:30",
+ "130",
+ "367",
+ "1",
+ "2005-05-26 22:04:30",
+ "2006-02-15 21:30:53"
+ ],
+ [
+ "1",
+ "2",
+ "2005-05-24 22:54:33",
+ "459",
+ "1525",
+ "1",
+ "2005-05-28 19:40:33",
+ "2006-02-15 21:30:53"
+ ],
+ [
+ "2",
+ "3",
+ "2005-05-24 23:03:39",
+ "408",
+ "1711",
+ "1",
+ "2005-06-01 22:12:39",
+ "2006-02-15 21:30:53"
+ ],
+ [
+ "3",
+ "4",
+ "2005-05-24 23:04:41",
+ "333",
+ "2452",
+ "2",
+ "2005-06-03 01:43:41",
+ "2006-02-15 21:30:53"
+ ],
+ [
+ "4",
+ "5",
+ "2005-05-24 23:05:21",
+ "222",
+ "2079",
+ "1",
+ "2005-06-02 04:33:21",
+ "2006-02-15 21:30:53"
+ ]
+ ],
+ "shape": {
+ "columns": 7,
+ "rows": 5
+ }
+ },
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " rental_id | \n",
+ " rental_date | \n",
+ " customer_id | \n",
+ " inventory_id | \n",
+ " staff_id | \n",
+ " return_date | \n",
+ " last_update | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 2005-05-24 22:53:30 | \n",
+ " 130 | \n",
+ " 367 | \n",
+ " 1 | \n",
+ " 2005-05-26 22:04:30 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 2005-05-24 22:54:33 | \n",
+ " 459 | \n",
+ " 1525 | \n",
+ " 1 | \n",
+ " 2005-05-28 19:40:33 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 2005-05-24 23:03:39 | \n",
+ " 408 | \n",
+ " 1711 | \n",
+ " 1 | \n",
+ " 2005-06-01 22:12:39 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 2005-05-24 23:04:41 | \n",
+ " 333 | \n",
+ " 2452 | \n",
+ " 2 | \n",
+ " 2005-06-03 01:43:41 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 2005-05-24 23:05:21 | \n",
+ " 222 | \n",
+ " 2079 | \n",
+ " 1 | \n",
+ " 2005-06-02 04:33:21 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " rental_id rental_date customer_id inventory_id staff_id \\\n",
+ "0 1 2005-05-24 22:53:30 130 367 1 \n",
+ "1 2 2005-05-24 22:54:33 459 1525 1 \n",
+ "2 3 2005-05-24 23:03:39 408 1711 1 \n",
+ "3 4 2005-05-24 23:04:41 333 2452 2 \n",
+ "4 5 2005-05-24 23:05:21 222 2079 1 \n",
+ "\n",
+ " return_date last_update \n",
+ "0 2005-05-26 22:04:30 2006-02-15 21:30:53 \n",
+ "1 2005-05-28 19:40:33 2006-02-15 21:30:53 \n",
+ "2 2005-06-01 22:12:39 2006-02-15 21:30:53 \n",
+ "3 2005-06-03 01:43:41 2006-02-15 21:30:53 \n",
+ "4 2005-06-02 04:33:21 2006-02-15 21:30:53 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Analysis: Get rental data for May 2005\n",
+ "may_rentals = rentals_month(engine, 5, 2005)\n",
+ "print(f\"Total rentals in May 2005: {len(may_rentals)}\")\n",
+ "print(\"\\nFirst 5 rows of May rental data:\")\n",
+ "display(may_rentals.head())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "008ff95e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total rentals in June 2005: 2311\n",
+ "\n",
+ "First 5 rows of June rental data:\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "rental_id",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "rental_date",
+ "rawType": "datetime64[ns]",
+ "type": "datetime"
+ },
+ {
+ "name": "customer_id",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "inventory_id",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "staff_id",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "return_date",
+ "rawType": "datetime64[ns]",
+ "type": "datetime"
+ },
+ {
+ "name": "last_update",
+ "rawType": "datetime64[ns]",
+ "type": "datetime"
+ }
+ ],
+ "ref": "a8d9f2da-1940-49f5-8be4-97a72ffeb81a",
+ "rows": [
+ [
+ "0",
+ "1158",
+ "2005-06-14 22:53:33",
+ "416",
+ "1632",
+ "2",
+ "2005-06-18 21:37:33",
+ "2006-02-15 21:30:53"
+ ],
+ [
+ "1",
+ "1159",
+ "2005-06-14 22:55:13",
+ "516",
+ "4395",
+ "1",
+ "2005-06-17 02:11:13",
+ "2006-02-15 21:30:53"
+ ],
+ [
+ "2",
+ "1160",
+ "2005-06-14 23:00:34",
+ "239",
+ "2795",
+ "2",
+ "2005-06-18 01:58:34",
+ "2006-02-15 21:30:53"
+ ],
+ [
+ "3",
+ "1161",
+ "2005-06-14 23:07:08",
+ "285",
+ "1690",
+ "1",
+ "2005-06-21 17:12:08",
+ "2006-02-15 21:30:53"
+ ],
+ [
+ "4",
+ "1162",
+ "2005-06-14 23:09:38",
+ "310",
+ "987",
+ "1",
+ "2005-06-23 22:00:38",
+ "2006-02-15 21:30:53"
+ ]
+ ],
+ "shape": {
+ "columns": 7,
+ "rows": 5
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " rental_id | \n",
+ " rental_date | \n",
+ " customer_id | \n",
+ " inventory_id | \n",
+ " staff_id | \n",
+ " return_date | \n",
+ " last_update | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1158 | \n",
+ " 2005-06-14 22:53:33 | \n",
+ " 416 | \n",
+ " 1632 | \n",
+ " 2 | \n",
+ " 2005-06-18 21:37:33 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1159 | \n",
+ " 2005-06-14 22:55:13 | \n",
+ " 516 | \n",
+ " 4395 | \n",
+ " 1 | \n",
+ " 2005-06-17 02:11:13 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1160 | \n",
+ " 2005-06-14 23:00:34 | \n",
+ " 239 | \n",
+ " 2795 | \n",
+ " 2 | \n",
+ " 2005-06-18 01:58:34 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1161 | \n",
+ " 2005-06-14 23:07:08 | \n",
+ " 285 | \n",
+ " 1690 | \n",
+ " 1 | \n",
+ " 2005-06-21 17:12:08 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1162 | \n",
+ " 2005-06-14 23:09:38 | \n",
+ " 310 | \n",
+ " 987 | \n",
+ " 1 | \n",
+ " 2005-06-23 22:00:38 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " rental_id rental_date customer_id inventory_id staff_id \\\n",
+ "0 1158 2005-06-14 22:53:33 416 1632 2 \n",
+ "1 1159 2005-06-14 22:55:13 516 4395 1 \n",
+ "2 1160 2005-06-14 23:00:34 239 2795 2 \n",
+ "3 1161 2005-06-14 23:07:08 285 1690 1 \n",
+ "4 1162 2005-06-14 23:09:38 310 987 1 \n",
+ "\n",
+ " return_date last_update \n",
+ "0 2005-06-18 21:37:33 2006-02-15 21:30:53 \n",
+ "1 2005-06-17 02:11:13 2006-02-15 21:30:53 \n",
+ "2 2005-06-18 01:58:34 2006-02-15 21:30:53 \n",
+ "3 2005-06-21 17:12:08 2006-02-15 21:30:53 \n",
+ "4 2005-06-23 22:00:38 2006-02-15 21:30:53 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Analysis: Get rental data for June 2005\n",
+ "june_rentals = rentals_month(engine, 6, 2005)\n",
+ "print(f\"Total rentals in June 2005: {len(june_rentals)}\")\n",
+ "print(\"\\nFirst 5 rows of June rental data:\")\n",
+ "display(june_rentals.head())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "38a29783",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of customers who rented in May 2005: 520\n",
+ "Number of customers who rented in June 2005: 590\n",
+ "\n",
+ "Top 5 customers in May:\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "customer_id",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "rentals_05_2005",
+ "rawType": "int64",
+ "type": "integer"
+ }
+ ],
+ "ref": "e2059d70-fff1-43a0-a49e-cae808364444",
+ "rows": [
+ [
+ "168",
+ "197",
+ "8"
+ ],
+ [
+ "92",
+ "109",
+ "7"
+ ],
+ [
+ "441",
+ "506",
+ "7"
+ ],
+ [
+ "15",
+ "19",
+ "6"
+ ],
+ [
+ "43",
+ "53",
+ "6"
+ ]
+ ],
+ "shape": {
+ "columns": 2,
+ "rows": 5
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " rentals_05_2005 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 168 | \n",
+ " 197 | \n",
+ " 8 | \n",
+ "
\n",
+ " \n",
+ " | 92 | \n",
+ " 109 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 441 | \n",
+ " 506 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 19 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " | 43 | \n",
+ " 53 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id rentals_05_2005\n",
+ "168 197 8\n",
+ "92 109 7\n",
+ "441 506 7\n",
+ "15 19 6\n",
+ "43 53 6"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Top 5 customers in June:\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "customer_id",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "rentals_06_2005",
+ "rawType": "int64",
+ "type": "integer"
+ }
+ ],
+ "ref": "b251ba73-dd85-4905-9dcf-0171acf69e25",
+ "rows": [
+ [
+ "30",
+ "31",
+ "11"
+ ],
+ [
+ "445",
+ "454",
+ "10"
+ ],
+ [
+ "209",
+ "213",
+ "9"
+ ],
+ [
+ "263",
+ "267",
+ "9"
+ ],
+ [
+ "291",
+ "295",
+ "9"
+ ]
+ ],
+ "shape": {
+ "columns": 2,
+ "rows": 5
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " rentals_06_2005 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 30 | \n",
+ " 31 | \n",
+ " 11 | \n",
+ "
\n",
+ " \n",
+ " | 445 | \n",
+ " 454 | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " | 209 | \n",
+ " 213 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ " | 263 | \n",
+ " 267 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ " | 291 | \n",
+ " 295 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id rentals_06_2005\n",
+ "30 31 11\n",
+ "445 454 10\n",
+ "209 213 9\n",
+ "263 267 9\n",
+ "291 295 9"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Get rental counts per customer for each month\n",
+ "may_counts = rental_count_month(may_rentals, 5, 2005)\n",
+ "june_counts = rental_count_month(june_rentals, 6, 2005)\n",
+ "\n",
+ "print(f\"Number of customers who rented in May 2005: {len(may_counts)}\")\n",
+ "print(f\"Number of customers who rented in June 2005: {len(june_counts)}\")\n",
+ "\n",
+ "print(\"\\nTop 5 customers in May:\")\n",
+ "display(may_counts.nlargest(5, 'rentals_05_2005'))\n",
+ "\n",
+ "print(\"\\nTop 5 customers in June:\")\n",
+ "display(june_counts.nlargest(5, 'rentals_06_2005'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "5e8120d8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total customers active in either month: 598\n",
+ "Customers active in BOTH May and June: 512\n",
+ "\n",
+ "Comparison DataFrame (first 10 rows):\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "customer_id",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "rentals_05_2005",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "rentals_06_2005",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "difference",
+ "rawType": "float64",
+ "type": "float"
+ }
+ ],
+ "ref": "e784c49d-37c6-43ef-84a7-b695032c4b62",
+ "rows": [
+ [
+ "0",
+ "1",
+ "2.0",
+ "7.0",
+ "5.0"
+ ],
+ [
+ "1",
+ "2",
+ "1.0",
+ "1.0",
+ "0.0"
+ ],
+ [
+ "2",
+ "3",
+ "2.0",
+ "4.0",
+ "2.0"
+ ],
+ [
+ "3",
+ "4",
+ "0.0",
+ "6.0",
+ "6.0"
+ ],
+ [
+ "4",
+ "5",
+ "3.0",
+ "5.0",
+ "2.0"
+ ],
+ [
+ "5",
+ "6",
+ "3.0",
+ "4.0",
+ "1.0"
+ ],
+ [
+ "6",
+ "7",
+ "5.0",
+ "5.0",
+ "0.0"
+ ],
+ [
+ "7",
+ "8",
+ "1.0",
+ "3.0",
+ "2.0"
+ ],
+ [
+ "8",
+ "9",
+ "3.0",
+ "2.0",
+ "-1.0"
+ ],
+ [
+ "9",
+ "10",
+ "1.0",
+ "5.0",
+ "4.0"
+ ]
+ ],
+ "shape": {
+ "columns": 4,
+ "rows": 10
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " rentals_05_2005 | \n",
+ " rentals_06_2005 | \n",
+ " difference | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 2.0 | \n",
+ " 7.0 | \n",
+ " 5.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 2.0 | \n",
+ " 4.0 | \n",
+ " 2.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 0.0 | \n",
+ " 6.0 | \n",
+ " 6.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 3.0 | \n",
+ " 5.0 | \n",
+ " 2.0 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 6 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 7 | \n",
+ " 5.0 | \n",
+ " 5.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 8 | \n",
+ " 1.0 | \n",
+ " 3.0 | \n",
+ " 2.0 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 9 | \n",
+ " 3.0 | \n",
+ " 2.0 | \n",
+ " -1.0 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 10 | \n",
+ " 1.0 | \n",
+ " 5.0 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id rentals_05_2005 rentals_06_2005 difference\n",
+ "0 1 2.0 7.0 5.0\n",
+ "1 2 1.0 1.0 0.0\n",
+ "2 3 2.0 4.0 2.0\n",
+ "3 4 0.0 6.0 6.0\n",
+ "4 5 3.0 5.0 2.0\n",
+ "5 6 3.0 4.0 1.0\n",
+ "6 7 5.0 5.0 0.0\n",
+ "7 8 1.0 3.0 2.0\n",
+ "8 9 3.0 2.0 -1.0\n",
+ "9 10 1.0 5.0 4.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Compare the two months\n",
+ "comparison = compare_rentals(may_counts, june_counts)\n",
+ "print(f\"Total customers active in either month: {len(comparison)}\")\n",
+ "\n",
+ "# Show customers who were active in both months\n",
+ "active_both_months = comparison[(comparison['rentals_05_2005'] > 0) & (comparison['rentals_06_2005'] > 0)]\n",
+ "print(f\"Customers active in BOTH May and June: {len(active_both_months)}\")\n",
+ "\n",
+ "print(\"\\nComparison DataFrame (first 10 rows):\")\n",
+ "display(comparison.head(10))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "1e7c0276",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Top 10 customers by total activity:\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "customer_id",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "rentals_05_2005",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "rentals_06_2005",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "difference",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "total_rentals",
+ "rawType": "float64",
+ "type": "float"
+ }
+ ],
+ "ref": "02474e76-96f4-4f37-92cd-65e6127edc7e",
+ "rows": [
+ [
+ "195",
+ "197",
+ "8.0",
+ "8.0",
+ "0.0",
+ "16.0"
+ ],
+ [
+ "175",
+ "176",
+ "5.0",
+ "8.0",
+ "3.0",
+ "13.0"
+ ],
+ [
+ "369",
+ "371",
+ "6.0",
+ "7.0",
+ "1.0",
+ "13.0"
+ ],
+ [
+ "108",
+ "109",
+ "7.0",
+ "5.0",
+ "-2.0",
+ "12.0"
+ ],
+ [
+ "194",
+ "196",
+ "4.0",
+ "8.0",
+ "4.0",
+ "12.0"
+ ],
+ [
+ "254",
+ "256",
+ "5.0",
+ "7.0",
+ "2.0",
+ "12.0"
+ ],
+ [
+ "265",
+ "267",
+ "3.0",
+ "9.0",
+ "6.0",
+ "12.0"
+ ],
+ [
+ "504",
+ "506",
+ "7.0",
+ "5.0",
+ "-2.0",
+ "12.0"
+ ],
+ [
+ "524",
+ "526",
+ "3.0",
+ "9.0",
+ "6.0",
+ "12.0"
+ ],
+ [
+ "30",
+ "31",
+ "0.0",
+ "11.0",
+ "11.0",
+ "11.0"
+ ]
+ ],
+ "shape": {
+ "columns": 5,
+ "rows": 10
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " rentals_05_2005 | \n",
+ " rentals_06_2005 | \n",
+ " difference | \n",
+ " total_rentals | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 195 | \n",
+ " 197 | \n",
+ " 8.0 | \n",
+ " 8.0 | \n",
+ " 0.0 | \n",
+ " 16.0 | \n",
+ "
\n",
+ " \n",
+ " | 175 | \n",
+ " 176 | \n",
+ " 5.0 | \n",
+ " 8.0 | \n",
+ " 3.0 | \n",
+ " 13.0 | \n",
+ "
\n",
+ " \n",
+ " | 369 | \n",
+ " 371 | \n",
+ " 6.0 | \n",
+ " 7.0 | \n",
+ " 1.0 | \n",
+ " 13.0 | \n",
+ "
\n",
+ " \n",
+ " | 108 | \n",
+ " 109 | \n",
+ " 7.0 | \n",
+ " 5.0 | \n",
+ " -2.0 | \n",
+ " 12.0 | \n",
+ "
\n",
+ " \n",
+ " | 194 | \n",
+ " 196 | \n",
+ " 4.0 | \n",
+ " 8.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ "
\n",
+ " \n",
+ " | 254 | \n",
+ " 256 | \n",
+ " 5.0 | \n",
+ " 7.0 | \n",
+ " 2.0 | \n",
+ " 12.0 | \n",
+ "
\n",
+ " \n",
+ " | 265 | \n",
+ " 267 | \n",
+ " 3.0 | \n",
+ " 9.0 | \n",
+ " 6.0 | \n",
+ " 12.0 | \n",
+ "
\n",
+ " \n",
+ " | 504 | \n",
+ " 506 | \n",
+ " 7.0 | \n",
+ " 5.0 | \n",
+ " -2.0 | \n",
+ " 12.0 | \n",
+ "
\n",
+ " \n",
+ " | 524 | \n",
+ " 526 | \n",
+ " 3.0 | \n",
+ " 9.0 | \n",
+ " 6.0 | \n",
+ " 12.0 | \n",
+ "
\n",
+ " \n",
+ " | 30 | \n",
+ " 31 | \n",
+ " 0.0 | \n",
+ " 11.0 | \n",
+ " 11.0 | \n",
+ " 11.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id rentals_05_2005 rentals_06_2005 difference total_rentals\n",
+ "195 197 8.0 8.0 0.0 16.0\n",
+ "175 176 5.0 8.0 3.0 13.0\n",
+ "369 371 6.0 7.0 1.0 13.0\n",
+ "108 109 7.0 5.0 -2.0 12.0\n",
+ "194 196 4.0 8.0 4.0 12.0\n",
+ "254 256 5.0 7.0 2.0 12.0\n",
+ "265 267 3.0 9.0 6.0 12.0\n",
+ "504 506 7.0 5.0 -2.0 12.0\n",
+ "524 526 3.0 9.0 6.0 12.0\n",
+ "30 31 0.0 11.0 11.0 11.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Statistical analysis\n",
+ "comparison['total_rentals'] = comparison['rentals_05_2005'] + comparison['rentals_06_2005']\n",
+ "\n",
+ "print(\"Top 10 customers by total activity:\")\n",
+ "top_customers = comparison.nlargest(10, 'total_rentals')[['customer_id', 'rentals_05_2005', 'rentals_06_2005', 'difference', 'total_rentals']]\n",
+ "display(top_customers)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "b5bb1eb1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Top 10 customers with biggest increase from May to June:\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "customer_id",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "rentals_05_2005",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "rentals_06_2005",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "difference",
+ "rawType": "float64",
+ "type": "float"
+ }
+ ],
+ "ref": "dec4a1d9-a832-4a69-bb1e-0a0d446836e2",
+ "rows": [
+ [
+ "30",
+ "31",
+ "0.0",
+ "11.0",
+ "11.0"
+ ],
+ [
+ "327",
+ "329",
+ "0.0",
+ "9.0",
+ "9.0"
+ ],
+ [
+ "452",
+ "454",
+ "1.0",
+ "10.0",
+ "9.0"
+ ],
+ [
+ "177",
+ "178",
+ "0.0",
+ "8.0",
+ "8.0"
+ ],
+ [
+ "211",
+ "213",
+ "1.0",
+ "9.0",
+ "8.0"
+ ],
+ [
+ "266",
+ "268",
+ "0.0",
+ "8.0",
+ "8.0"
+ ],
+ [
+ "293",
+ "295",
+ "1.0",
+ "9.0",
+ "8.0"
+ ],
+ [
+ "334",
+ "336",
+ "0.0",
+ "8.0",
+ "8.0"
+ ],
+ [
+ "338",
+ "340",
+ "0.0",
+ "8.0",
+ "8.0"
+ ],
+ [
+ "455",
+ "457",
+ "1.0",
+ "9.0",
+ "8.0"
+ ]
+ ],
+ "shape": {
+ "columns": 4,
+ "rows": 10
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " rentals_05_2005 | \n",
+ " rentals_06_2005 | \n",
+ " difference | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 30 | \n",
+ " 31 | \n",
+ " 0.0 | \n",
+ " 11.0 | \n",
+ " 11.0 | \n",
+ "
\n",
+ " \n",
+ " | 327 | \n",
+ " 329 | \n",
+ " 0.0 | \n",
+ " 9.0 | \n",
+ " 9.0 | \n",
+ "
\n",
+ " \n",
+ " | 452 | \n",
+ " 454 | \n",
+ " 1.0 | \n",
+ " 10.0 | \n",
+ " 9.0 | \n",
+ "
\n",
+ " \n",
+ " | 177 | \n",
+ " 178 | \n",
+ " 0.0 | \n",
+ " 8.0 | \n",
+ " 8.0 | \n",
+ "
\n",
+ " \n",
+ " | 211 | \n",
+ " 213 | \n",
+ " 1.0 | \n",
+ " 9.0 | \n",
+ " 8.0 | \n",
+ "
\n",
+ " \n",
+ " | 266 | \n",
+ " 268 | \n",
+ " 0.0 | \n",
+ " 8.0 | \n",
+ " 8.0 | \n",
+ "
\n",
+ " \n",
+ " | 293 | \n",
+ " 295 | \n",
+ " 1.0 | \n",
+ " 9.0 | \n",
+ " 8.0 | \n",
+ "
\n",
+ " \n",
+ " | 334 | \n",
+ " 336 | \n",
+ " 0.0 | \n",
+ " 8.0 | \n",
+ " 8.0 | \n",
+ "
\n",
+ " \n",
+ " | 338 | \n",
+ " 340 | \n",
+ " 0.0 | \n",
+ " 8.0 | \n",
+ " 8.0 | \n",
+ "
\n",
+ " \n",
+ " | 455 | \n",
+ " 457 | \n",
+ " 1.0 | \n",
+ " 9.0 | \n",
+ " 8.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id rentals_05_2005 rentals_06_2005 difference\n",
+ "30 31 0.0 11.0 11.0\n",
+ "327 329 0.0 9.0 9.0\n",
+ "452 454 1.0 10.0 9.0\n",
+ "177 178 0.0 8.0 8.0\n",
+ "211 213 1.0 9.0 8.0\n",
+ "266 268 0.0 8.0 8.0\n",
+ "293 295 1.0 9.0 8.0\n",
+ "334 336 0.0 8.0 8.0\n",
+ "338 340 0.0 8.0 8.0\n",
+ "455 457 1.0 9.0 8.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Customers with biggest increase from May to June\n",
+ "print(\"Top 10 customers with biggest increase from May to June:\")\n",
+ "biggest_increase = comparison.nlargest(10, 'difference')[['customer_id', 'rentals_05_2005', 'rentals_06_2005', 'difference']]\n",
+ "display(biggest_increase)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "b2017504",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Top 10 customers with biggest decrease from May to June:\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "customer_id",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "rentals_05_2005",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "rentals_06_2005",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "difference",
+ "rawType": "float64",
+ "type": "float"
+ }
+ ],
+ "ref": "3e378ba6-8c50-42f9-ad99-caf3da230262",
+ "rows": [
+ [
+ "205",
+ "207",
+ "6.0",
+ "1.0",
+ "-5.0"
+ ],
+ [
+ "13",
+ "14",
+ "5.0",
+ "1.0",
+ "-4.0"
+ ],
+ [
+ "160",
+ "161",
+ "6.0",
+ "2.0",
+ "-4.0"
+ ],
+ [
+ "196",
+ "198",
+ "5.0",
+ "1.0",
+ "-4.0"
+ ],
+ [
+ "248",
+ "250",
+ "5.0",
+ "1.0",
+ "-4.0"
+ ],
+ [
+ "272",
+ "274",
+ "6.0",
+ "2.0",
+ "-4.0"
+ ],
+ [
+ "594",
+ "596",
+ "6.0",
+ "2.0",
+ "-4.0"
+ ],
+ [
+ "18",
+ "19",
+ "6.0",
+ "3.0",
+ "-3.0"
+ ],
+ [
+ "123",
+ "124",
+ "4.0",
+ "1.0",
+ "-3.0"
+ ],
+ [
+ "220",
+ "222",
+ "5.0",
+ "2.0",
+ "-3.0"
+ ]
+ ],
+ "shape": {
+ "columns": 4,
+ "rows": 10
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " rentals_05_2005 | \n",
+ " rentals_06_2005 | \n",
+ " difference | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 205 | \n",
+ " 207 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " -5.0 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 14 | \n",
+ " 5.0 | \n",
+ " 1.0 | \n",
+ " -4.0 | \n",
+ "
\n",
+ " \n",
+ " | 160 | \n",
+ " 161 | \n",
+ " 6.0 | \n",
+ " 2.0 | \n",
+ " -4.0 | \n",
+ "
\n",
+ " \n",
+ " | 196 | \n",
+ " 198 | \n",
+ " 5.0 | \n",
+ " 1.0 | \n",
+ " -4.0 | \n",
+ "
\n",
+ " \n",
+ " | 248 | \n",
+ " 250 | \n",
+ " 5.0 | \n",
+ " 1.0 | \n",
+ " -4.0 | \n",
+ "
\n",
+ " \n",
+ " | 272 | \n",
+ " 274 | \n",
+ " 6.0 | \n",
+ " 2.0 | \n",
+ " -4.0 | \n",
+ "
\n",
+ " \n",
+ " | 594 | \n",
+ " 596 | \n",
+ " 6.0 | \n",
+ " 2.0 | \n",
+ " -4.0 | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 19 | \n",
+ " 6.0 | \n",
+ " 3.0 | \n",
+ " -3.0 | \n",
+ "
\n",
+ " \n",
+ " | 123 | \n",
+ " 124 | \n",
+ " 4.0 | \n",
+ " 1.0 | \n",
+ " -3.0 | \n",
+ "
\n",
+ " \n",
+ " | 220 | \n",
+ " 222 | \n",
+ " 5.0 | \n",
+ " 2.0 | \n",
+ " -3.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id rentals_05_2005 rentals_06_2005 difference\n",
+ "205 207 6.0 1.0 -5.0\n",
+ "13 14 5.0 1.0 -4.0\n",
+ "160 161 6.0 2.0 -4.0\n",
+ "196 198 5.0 1.0 -4.0\n",
+ "248 250 5.0 1.0 -4.0\n",
+ "272 274 6.0 2.0 -4.0\n",
+ "594 596 6.0 2.0 -4.0\n",
+ "18 19 6.0 3.0 -3.0\n",
+ "123 124 4.0 1.0 -3.0\n",
+ "220 222 5.0 2.0 -3.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Customers with biggest decrease from May to June\n",
+ "print(\"Top 10 customers with biggest decrease from May to June:\")\n",
+ "biggest_decrease = comparison.nsmallest(10, 'difference')[['customer_id', 'rentals_05_2005', 'rentals_06_2005', 'difference']]\n",
+ "display(biggest_decrease)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8dbfc444",
+ "metadata": {},
+ "source": [
+ "## Summary\n",
+ "\n",
+ "This analysis shows:\n",
+ "1. The total number of rentals in each month\n",
+ "2. Which customers were active in both months\n",
+ "3. How customer activity changed between May and June 2005\n",
+ "4. The customers with the biggest increases and decreases in rental activity"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}