From f5b88ff144c39ac18028cd9d35d54fe1610983a0 Mon Sep 17 00:00:00 2001 From: Claudia Palladino Date: Sat, 6 Dec 2025 12:02:16 +0000 Subject: [PATCH] Solved lab --- lab_connecting_python_to_sql.ipynb | 1266 ++++++++++++++++++++++++++++ 1 file changed, 1266 insertions(+) create mode 100644 lab_connecting_python_to_sql.ipynb diff --git a/lab_connecting_python_to_sql.ipynb b/lab_connecting_python_to_sql.ipynb new file mode 100644 index 0000000..2987d46 --- /dev/null +++ b/lab_connecting_python_to_sql.ipynb @@ -0,0 +1,1266 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 15, + "id": "275bb141", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: sqlalchemy in c:\\users\\cpall\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (2.0.44)\n", + "Requirement already satisfied: greenlet>=1 in c:\\users\\cpall\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from sqlalchemy) (3.3.0)\n", + "Requirement already satisfied: typing-extensions>=4.6.0 in c:\\users\\cpall\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from sqlalchemy) (4.15.0)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "[notice] A new release of pip is available: 25.2 -> 25.3\n", + "[notice] To update, run: python.exe -m pip install --upgrade pip\n" + ] + } + ], + "source": [ + "!pip install sqlalchemy" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "e5c29f30", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pymysql in c:\\users\\cpall\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (1.1.2)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "[notice] A new release of pip is available: 25.2 -> 25.3\n", + "[notice] To update, run: python.exe -m pip install --upgrade pip\n" + ] + } + ], + "source": [ + "!pip install pymysql" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "b1e3e2bf", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import pymysql\n", + "from sqlalchemy import create_engine\n", + "import getpass # To get the password without showing the input\n", + "password = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "d2808e0f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('sakila',)\n" + ] + } + ], + "source": [ + "# Establish a connection between Python and the Sakila database.\n", + "import getpass\n", + "from sqlalchemy import create_engine, text # <- import text\n", + "\n", + "password = getpass.getpass(\"Enter MySQL root password: \")\n", + "\n", + "bd = \"Sakila\"\n", + "connection_string = f\"mysql+pymysql://root:{password}@localhost/{bd}\"\n", + "\n", + "engine = create_engine(connection_string)\n", + "\n", + "# Test connection\n", + "with engine.connect() as connection:\n", + " result = connection.execute(text(\"SELECT DATABASE();\")) # <- wrap SQL in text()\n", + " print(result.fetchone())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "6cd5b106", + "metadata": {}, + "outputs": [], + "source": [ + "# 2. Write a Python function called `rentals_month` that retrieves rental data for a given month and year (passed as parameters) from the Sakila database as a Pandas DataFrame. The function should take in three parameters:\n", + "#\t- `engine`: an object representing the database connection engine to be used to establish a connection to the Sakila database.\n", + "#\t- `month`: an integer representing the month for which rental data is to be retrieved.\n", + "#\t- `year`: an integer representing the year for which rental data is to be retrieved.\n", + "#\tThe function should execute a SQL query to retrieve the rental data for the specified month and year from the rental table in the Sakila database,\n", + "# and return it as a pandas DataFrame.\n", + " \n", + "def rental_month(engine, month, year):\n", + " \"\"\"\n", + " Retrieves rental data for a given month and year from the Sakila database.\n", + "\n", + " Parameters:\n", + " engine : SQLAlchemy engine\n", + " Database connection engine to the Sakila database.\n", + " month : int\n", + " Month (1-12) for which to retrieve rental data.\n", + " year : int\n", + " Year (e.g., 2023) for which to retrieve rental data.\n", + "\n", + " Returns:\n", + " pd.DataFrame\n", + " DataFrame containing rental data for the specified month and year.\n", + " \"\"\"\n", + " # SQL query to get rentals for the given month and year # Use text() to safely pass SQL with parameters\n", + " query = text(\"\"\" \n", + " SELECT *\n", + " FROM rental\n", + " WHERE MONTH(rental_date) = :month\n", + " AND YEAR(rental_date) = :year\n", + " \"\"\")\n", + " # Execute query and return result as DataFrame\n", + " with engine.connect() as connection:\n", + " df = pd.read_sql(query, connection, params={\"month\": month, \"year\": year}) # :month and :year are placeholders to avoid SQL injection\n", + " \n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "896df784", + "metadata": {}, + "outputs": [], + "source": [ + "# 3. Develop a Python function called `rental_count_month` that takes the DataFrame provided by `rentals_month` as input\n", + "# along with the month and year and returns a new DataFrame containing the number of rentals made by each customer_id\n", + "# during the selected month and year. \n", + "#\tThe function should also include the month and year as parameters and use them to name the new column according to the month and year,\n", + "# for example, if the input month is 05 and the year is 2005, the column name should be \"rentals_05_2005\".\n", + "#\t*Hint: Consider making use of pandas [groupby()]\n", + "\n", + "import pandas as pd\n", + "\n", + "def rental_count_month(df, month, year):\n", + " \"\"\"\n", + " Takes the DataFrame from rentals_month and returns a new DataFrame \n", + " containing the number of rentals made by each customer_id during the selected month and year.\n", + " \n", + " Parameters:\n", + " df : pd.DataFrame\n", + " DataFrame from rentals_month function.\n", + " month : int\n", + " Month for which rental counts are calculated.\n", + " year : int\n", + " Year for which rental counts are calculated.\n", + " \n", + " Returns:\n", + " pd.DataFrame\n", + " DataFrame with 'customer_id' and rental count column named according to month and year.\n", + " \"\"\"\n", + " # Group by customer_id and count rentals\n", + " rental_counts = df.groupby('customer_id').size().reset_index(name=f\"rentals_{month:02d}_{year}\")\n", + " \n", + " return rental_counts\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "e20fd15a", + "metadata": {}, + "outputs": [], + "source": [ + "# 4. Create a Python function called compare_rentals that takes two DataFrames as input\n", + "# containing the number of rentals made by each customer in different months and years.\n", + "# The function should return a combined DataFrame with a new 'difference' column, \n", + "# which is the difference between the number of rentals in the two months.\n", + "import pandas as pd\n", + "\n", + "def compare_rentals(df1, df2):\n", + " \"\"\"\n", + " Compare rental counts between two DataFrames for different months/years.\n", + "\n", + " Parameters:\n", + " df1 : pd.DataFrame\n", + " DataFrame containing 'customer_id' and rental counts for the first month/year.\n", + " df2 : pd.DataFrame\n", + " DataFrame containing 'customer_id' and rental counts for the second month/year.\n", + "\n", + " Returns:\n", + " pd.DataFrame\n", + " Combined DataFrame with rental counts from both months and a 'difference' column\n", + " (df2 count - df1 count).\n", + " \"\"\"\n", + " # Merge the two DataFrames on customer_id (outer join to include all customers)\n", + " # Fill NaN values with 0 (customers who didn't rent in one of the months)\n", + " combined_df = pd.merge(df1, df2, on='customer_id', how='outer').fillna(0)\n", + "\n", + " # Identify the rental count columns (assumes only one column besides customer_id)\n", + " col1 = [c for c in df1.columns if c != 'customer_id'][0]\n", + " col2 = [c for c in df2.columns if c != 'customer_id'][0]\n", + "\n", + " # Create the difference column\n", + " combined_df['difference'] = combined_df[col2] - combined_df[col1]\n", + "\n", + " return combined_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "796fed32", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total rentals in May 2005: 1156\n", + "\n", + "First 5 rows of May rental data:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_update
012005-05-24 22:53:303671302005-05-26 22:04:3012006-02-15 21:30:53
122005-05-24 22:54:3315254592005-05-28 19:40:3312006-02-15 21:30:53
232005-05-24 23:03:3917114082005-06-01 22:12:3912006-02-15 21:30:53
342005-05-24 23:04:4124523332005-06-03 01:43:4122006-02-15 21:30:53
452005-05-24 23:05:2120792222005-06-02 04:33:2112006-02-15 21:30:53
\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1 2005-05-24 22:53:30 367 130 \n", + "1 2 2005-05-24 22:54:33 1525 459 \n", + "2 3 2005-05-24 23:03:39 1711 408 \n", + "3 4 2005-05-24 23:04:41 2452 333 \n", + "4 5 2005-05-24 23:05:21 2079 222 \n", + "\n", + " return_date staff_id last_update \n", + "0 2005-05-26 22:04:30 1 2006-02-15 21:30:53 \n", + "1 2005-05-28 19:40:33 1 2006-02-15 21:30:53 \n", + "2 2005-06-01 22:12:39 1 2006-02-15 21:30:53 \n", + "3 2005-06-03 01:43:41 2 2006-02-15 21:30:53 \n", + "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Analysis: Get rental data for May 2005\n", + "may_rentals = rental_month(engine, 5, 2005)\n", + "print(f\"Total rentals in May 2005: {len(may_rentals)}\")\n", + "print(\"\\nFirst 5 rows of May rental data:\")\n", + "display(may_rentals.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "efaefc57", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total rentals in June 2005: 2311\n", + "\n", + "First 5 rows of June rental data:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_update
011582005-06-14 22:53:3316324162005-06-18 21:37:3322006-02-15 21:30:53
111592005-06-14 22:55:1343955162005-06-17 02:11:1312006-02-15 21:30:53
211602005-06-14 23:00:3427952392005-06-18 01:58:3422006-02-15 21:30:53
311612005-06-14 23:07:0816902852005-06-21 17:12:0812006-02-15 21:30:53
411622005-06-14 23:09:389873102005-06-23 22:00:3812006-02-15 21:30:53
\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1158 2005-06-14 22:53:33 1632 416 \n", + "1 1159 2005-06-14 22:55:13 4395 516 \n", + "2 1160 2005-06-14 23:00:34 2795 239 \n", + "3 1161 2005-06-14 23:07:08 1690 285 \n", + "4 1162 2005-06-14 23:09:38 987 310 \n", + "\n", + " return_date staff_id last_update \n", + "0 2005-06-18 21:37:33 2 2006-02-15 21:30:53 \n", + "1 2005-06-17 02:11:13 1 2006-02-15 21:30:53 \n", + "2 2005-06-18 01:58:34 2 2006-02-15 21:30:53 \n", + "3 2005-06-21 17:12:08 1 2006-02-15 21:30:53 \n", + "4 2005-06-23 22:00:38 1 2006-02-15 21:30:53 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Analysis: Get rental data for June 2005\n", + "june_rentals = rental_month(engine, 6, 2005)\n", + "print(f\"Total rentals in June 2005: {len(june_rentals)}\")\n", + "print(\"\\nFirst 5 rows of June rental data:\")\n", + "display(june_rentals.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "caadbd81", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of customers who rented in May 2005: 520\n", + "Number of customers who rented in June 2005: 590\n", + "\n", + "Top 5 customers in May:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_05_2005
1681978
921097
4415067
15196
43536
\n", + "
" + ], + "text/plain": [ + " customer_id rentals_05_2005\n", + "168 197 8\n", + "92 109 7\n", + "441 506 7\n", + "15 19 6\n", + "43 53 6" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Top 5 customers in June:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_06_2005
303111
44545410
2092139
2632679
2912959
\n", + "
" + ], + "text/plain": [ + " customer_id rentals_06_2005\n", + "30 31 11\n", + "445 454 10\n", + "209 213 9\n", + "263 267 9\n", + "291 295 9" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Get rental counts per customer for each month\n", + "may_counts = rental_count_month(may_rentals, 5, 2005)\n", + "june_counts = rental_count_month(june_rentals, 6, 2005)\n", + "\n", + "print(f\"Number of customers who rented in May 2005: {len(may_counts)}\")\n", + "print(f\"Number of customers who rented in June 2005: {len(june_counts)}\")\n", + "\n", + "print(\"\\nTop 5 customers in May:\")\n", + "display(may_counts.nlargest(5, 'rentals_05_2005'))\n", + "\n", + "print(\"\\nTop 5 customers in June:\")\n", + "display(june_counts.nlargest(5, 'rentals_06_2005'))" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "b911a76e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total customers active in either month: 598\n", + "Customers active in BOTH May and June: 512\n", + "\n", + "Comparison DataFrame (first 10 rows):\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_05_2005rentals_06_2005difference
012.07.05.0
121.01.00.0
232.04.02.0
340.06.06.0
453.05.02.0
563.04.01.0
675.05.00.0
781.03.02.0
893.02.0-1.0
9101.05.04.0
\n", + "
" + ], + "text/plain": [ + " customer_id rentals_05_2005 rentals_06_2005 difference\n", + "0 1 2.0 7.0 5.0\n", + "1 2 1.0 1.0 0.0\n", + "2 3 2.0 4.0 2.0\n", + "3 4 0.0 6.0 6.0\n", + "4 5 3.0 5.0 2.0\n", + "5 6 3.0 4.0 1.0\n", + "6 7 5.0 5.0 0.0\n", + "7 8 1.0 3.0 2.0\n", + "8 9 3.0 2.0 -1.0\n", + "9 10 1.0 5.0 4.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Compare the two months\n", + "comparison = compare_rentals(may_counts, june_counts)\n", + "print(f\"Total customers active in either month: {len(comparison)}\")\n", + "\n", + "# Show customers who were active in both months\n", + "active_both_months = comparison[(comparison['rentals_05_2005'] > 0) & (comparison['rentals_06_2005'] > 0)]\n", + "print(f\"Customers active in BOTH May and June: {len(active_both_months)}\")\n", + "\n", + "print(\"\\nComparison DataFrame (first 10 rows):\")\n", + "display(comparison.head(10))" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "b0e52bc2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Top 10 customers by total activity:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_05_2005rentals_06_2005differencetotal_rentals
1951978.08.00.016.0
1751765.08.03.013.0
3693716.07.01.013.0
1081097.05.0-2.012.0
1941964.08.04.012.0
2542565.07.02.012.0
2652673.09.06.012.0
5045067.05.0-2.012.0
5245263.09.06.012.0
30310.011.011.011.0
\n", + "
" + ], + "text/plain": [ + " customer_id rentals_05_2005 rentals_06_2005 difference total_rentals\n", + "195 197 8.0 8.0 0.0 16.0\n", + "175 176 5.0 8.0 3.0 13.0\n", + "369 371 6.0 7.0 1.0 13.0\n", + "108 109 7.0 5.0 -2.0 12.0\n", + "194 196 4.0 8.0 4.0 12.0\n", + "254 256 5.0 7.0 2.0 12.0\n", + "265 267 3.0 9.0 6.0 12.0\n", + "504 506 7.0 5.0 -2.0 12.0\n", + "524 526 3.0 9.0 6.0 12.0\n", + "30 31 0.0 11.0 11.0 11.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "# Statistical analysis\n", + "comparison['total_rentals'] = comparison['rentals_05_2005'] + comparison['rentals_06_2005']\n", + "\n", + "print(\"Top 10 customers by total activity:\")\n", + "top_customers = comparison.nlargest(10, 'total_rentals')[['customer_id', 'rentals_05_2005', 'rentals_06_2005', 'difference', 'total_rentals']]\n", + "display(top_customers)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "b386f8e4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Top 10 customers with biggest increase from May to June:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_05_2005rentals_06_2005difference
30310.011.011.0
3273290.09.09.0
4524541.010.09.0
1771780.08.08.0
2112131.09.08.0
2662680.08.08.0
2932951.09.08.0
3343360.08.08.0
3383400.08.08.0
4554571.09.08.0
\n", + "
" + ], + "text/plain": [ + " customer_id rentals_05_2005 rentals_06_2005 difference\n", + "30 31 0.0 11.0 11.0\n", + "327 329 0.0 9.0 9.0\n", + "452 454 1.0 10.0 9.0\n", + "177 178 0.0 8.0 8.0\n", + "211 213 1.0 9.0 8.0\n", + "266 268 0.0 8.0 8.0\n", + "293 295 1.0 9.0 8.0\n", + "334 336 0.0 8.0 8.0\n", + "338 340 0.0 8.0 8.0\n", + "455 457 1.0 9.0 8.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Customers with biggest increase from May to June\n", + "print(\"Top 10 customers with biggest increase from May to June:\")\n", + "biggest_increase = comparison.nlargest(10, 'difference')[['customer_id', 'rentals_05_2005', 'rentals_06_2005', 'difference']]\n", + "display(biggest_increase)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "a64fe03e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Top 10 customers with biggest decrease from May to June:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_05_2005rentals_06_2005difference
2052076.01.0-5.0
13145.01.0-4.0
1601616.02.0-4.0
1961985.01.0-4.0
2482505.01.0-4.0
2722746.02.0-4.0
5945966.02.0-4.0
18196.03.0-3.0
1231244.01.0-3.0
2202225.02.0-3.0
\n", + "
" + ], + "text/plain": [ + " customer_id rentals_05_2005 rentals_06_2005 difference\n", + "205 207 6.0 1.0 -5.0\n", + "13 14 5.0 1.0 -4.0\n", + "160 161 6.0 2.0 -4.0\n", + "196 198 5.0 1.0 -4.0\n", + "248 250 5.0 1.0 -4.0\n", + "272 274 6.0 2.0 -4.0\n", + "594 596 6.0 2.0 -4.0\n", + "18 19 6.0 3.0 -3.0\n", + "123 124 4.0 1.0 -3.0\n", + "220 222 5.0 2.0 -3.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Customers with biggest decrease from May to June\n", + "print(\"Top 10 customers with biggest decrease from May to June:\")\n", + "biggest_decrease = comparison.nsmallest(10, 'difference')[['customer_id', 'rentals_05_2005', 'rentals_06_2005', 'difference']]\n", + "display(biggest_decrease)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}