diff --git a/SQL_python_connection.ipynb b/SQL_python_connection.ipynb
new file mode 100644
index 0000000..745a7cb
--- /dev/null
+++ b/SQL_python_connection.ipynb
@@ -0,0 +1,1033 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "59ab424e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: sqlalchemy in /opt/anaconda3/lib/python3.13/site-packages (2.0.39)\n",
+ "Requirement already satisfied: pandas in /opt/anaconda3/lib/python3.13/site-packages (2.2.3)\n",
+ "Requirement already satisfied: mysql-connector-python in /opt/anaconda3/lib/python3.13/site-packages (9.5.0)\n",
+ "Requirement already satisfied: typing-extensions>=4.6.0 in /opt/anaconda3/lib/python3.13/site-packages (from sqlalchemy) (4.15.0)\n",
+ "Requirement already satisfied: numpy>=1.26.0 in /opt/anaconda3/lib/python3.13/site-packages (from pandas) (2.1.3)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/anaconda3/lib/python3.13/site-packages (from pandas) (2.9.0.post0)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /opt/anaconda3/lib/python3.13/site-packages (from pandas) (2024.1)\n",
+ "Requirement already satisfied: tzdata>=2022.7 in /opt/anaconda3/lib/python3.13/site-packages (from pandas) (2025.2)\n",
+ "Requirement already satisfied: six>=1.5 in /opt/anaconda3/lib/python3.13/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n",
+ "Note: you may need to restart the kernel to use updated packages.\n"
+ ]
+ }
+ ],
+ "source": [
+ "pip install sqlalchemy pandas mysql-connector-python"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "05ae9bda",
+ "metadata": {},
+ "source": [
+ "Importing the library sqlalchemy for My sql"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "a98c9515",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "from sqlalchemy import create_engine"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "86f9c6fc",
+ "metadata": {},
+ "source": [
+ "Challenge - 1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "e123eb11",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "engine = create_engine(\n",
+ " \"mysql+mysqlconnector://root:divya2025@localhost:3306/sakila\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "476cb1ed",
+ "metadata": {},
+ "source": [
+ "Tesing the connection"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "fa01a84b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " rental_id | \n",
+ " rental_date | \n",
+ " inventory_id | \n",
+ " customer_id | \n",
+ " return_date | \n",
+ " staff_id | \n",
+ " last_update | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 2005-05-24 22:53:30 | \n",
+ " 367 | \n",
+ " 130 | \n",
+ " 2005-05-26 22:04:30 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 2005-05-24 22:54:33 | \n",
+ " 1525 | \n",
+ " 459 | \n",
+ " 2005-05-28 19:40:33 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 2005-05-24 23:03:39 | \n",
+ " 1711 | \n",
+ " 408 | \n",
+ " 2005-06-01 22:12:39 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 2005-05-24 23:04:41 | \n",
+ " 2452 | \n",
+ " 333 | \n",
+ " 2005-06-03 01:43:41 | \n",
+ " 2 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 2005-05-24 23:05:21 | \n",
+ " 2079 | \n",
+ " 222 | \n",
+ " 2005-06-02 04:33:21 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " rental_id rental_date inventory_id customer_id \\\n",
+ "0 1 2005-05-24 22:53:30 367 130 \n",
+ "1 2 2005-05-24 22:54:33 1525 459 \n",
+ "2 3 2005-05-24 23:03:39 1711 408 \n",
+ "3 4 2005-05-24 23:04:41 2452 333 \n",
+ "4 5 2005-05-24 23:05:21 2079 222 \n",
+ "\n",
+ " return_date staff_id last_update \n",
+ "0 2005-05-26 22:04:30 1 2006-02-15 21:30:53 \n",
+ "1 2005-05-28 19:40:33 1 2006-02-15 21:30:53 \n",
+ "2 2005-06-01 22:12:39 1 2006-02-15 21:30:53 \n",
+ "3 2005-06-03 01:43:41 2 2006-02-15 21:30:53 \n",
+ "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "query = \"SELECT * FROM rental LIMIT 5;\"\n",
+ "df_test = pd.read_sql(query, engine)\n",
+ "df_test"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "36d092b2",
+ "metadata": {},
+ "source": [
+ "Loading the dataframe for functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "ef5b5b10",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_sql(query, engine)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3d7f28e0",
+ "metadata": {},
+ "source": [
+ "Challenge - 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "d4164e13",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def rentals_month(engine, month, year):\n",
+ " query = f\"\"\"\n",
+ " SELECT * FROM rental\n",
+ " WHERE MONTH(rental_date) = {month}\n",
+ " AND YEAR(rental_date) = {year};\n",
+ " \"\"\"\n",
+ "\n",
+ " df = pd.read_sql(query, engine) #df is loaded inside the function to avoid a global variable\n",
+ " return df "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "d7356a96",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " rental_id | \n",
+ " rental_date | \n",
+ " inventory_id | \n",
+ " customer_id | \n",
+ " return_date | \n",
+ " staff_id | \n",
+ " last_update | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 2005-05-24 22:53:30 | \n",
+ " 367 | \n",
+ " 130 | \n",
+ " 2005-05-26 22:04:30 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 2005-05-24 22:54:33 | \n",
+ " 1525 | \n",
+ " 459 | \n",
+ " 2005-05-28 19:40:33 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 2005-05-24 23:03:39 | \n",
+ " 1711 | \n",
+ " 408 | \n",
+ " 2005-06-01 22:12:39 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 2005-05-24 23:04:41 | \n",
+ " 2452 | \n",
+ " 333 | \n",
+ " 2005-06-03 01:43:41 | \n",
+ " 2 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 2005-05-24 23:05:21 | \n",
+ " 2079 | \n",
+ " 222 | \n",
+ " 2005-06-02 04:33:21 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 1151 | \n",
+ " 1153 | \n",
+ " 2005-05-31 21:36:44 | \n",
+ " 2725 | \n",
+ " 506 | \n",
+ " 2005-06-10 01:26:44 | \n",
+ " 2 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 1152 | \n",
+ " 1154 | \n",
+ " 2005-05-31 21:42:09 | \n",
+ " 2732 | \n",
+ " 59 | \n",
+ " 2005-06-08 16:40:09 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 1153 | \n",
+ " 1155 | \n",
+ " 2005-05-31 22:17:11 | \n",
+ " 2048 | \n",
+ " 251 | \n",
+ " 2005-06-04 20:27:11 | \n",
+ " 2 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 1154 | \n",
+ " 1156 | \n",
+ " 2005-05-31 22:37:34 | \n",
+ " 460 | \n",
+ " 106 | \n",
+ " 2005-06-01 23:02:34 | \n",
+ " 2 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 1155 | \n",
+ " 1157 | \n",
+ " 2005-05-31 22:47:45 | \n",
+ " 1449 | \n",
+ " 61 | \n",
+ " 2005-06-02 18:01:45 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1156 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " rental_id rental_date inventory_id customer_id \\\n",
+ "0 1 2005-05-24 22:53:30 367 130 \n",
+ "1 2 2005-05-24 22:54:33 1525 459 \n",
+ "2 3 2005-05-24 23:03:39 1711 408 \n",
+ "3 4 2005-05-24 23:04:41 2452 333 \n",
+ "4 5 2005-05-24 23:05:21 2079 222 \n",
+ "... ... ... ... ... \n",
+ "1151 1153 2005-05-31 21:36:44 2725 506 \n",
+ "1152 1154 2005-05-31 21:42:09 2732 59 \n",
+ "1153 1155 2005-05-31 22:17:11 2048 251 \n",
+ "1154 1156 2005-05-31 22:37:34 460 106 \n",
+ "1155 1157 2005-05-31 22:47:45 1449 61 \n",
+ "\n",
+ " return_date staff_id last_update \n",
+ "0 2005-05-26 22:04:30 1 2006-02-15 21:30:53 \n",
+ "1 2005-05-28 19:40:33 1 2006-02-15 21:30:53 \n",
+ "2 2005-06-01 22:12:39 1 2006-02-15 21:30:53 \n",
+ "3 2005-06-03 01:43:41 2 2006-02-15 21:30:53 \n",
+ "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 \n",
+ "... ... ... ... \n",
+ "1151 2005-06-10 01:26:44 2 2006-02-15 21:30:53 \n",
+ "1152 2005-06-08 16:40:09 1 2006-02-15 21:30:53 \n",
+ "1153 2005-06-04 20:27:11 2 2006-02-15 21:30:53 \n",
+ "1154 2005-06-01 23:02:34 2 2006-02-15 21:30:53 \n",
+ "1155 2005-06-02 18:01:45 1 2006-02-15 21:30:53 \n",
+ "\n",
+ "[1156 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "may_df = rentals_month(engine, 5, 2005)\n",
+ "may_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "65958955",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " rental_id | \n",
+ " rental_date | \n",
+ " inventory_id | \n",
+ " customer_id | \n",
+ " return_date | \n",
+ " staff_id | \n",
+ " last_update | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1158 | \n",
+ " 2005-06-14 22:53:33 | \n",
+ " 1632 | \n",
+ " 416 | \n",
+ " 2005-06-18 21:37:33 | \n",
+ " 2 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1159 | \n",
+ " 2005-06-14 22:55:13 | \n",
+ " 4395 | \n",
+ " 516 | \n",
+ " 2005-06-17 02:11:13 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1160 | \n",
+ " 2005-06-14 23:00:34 | \n",
+ " 2795 | \n",
+ " 239 | \n",
+ " 2005-06-18 01:58:34 | \n",
+ " 2 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1161 | \n",
+ " 2005-06-14 23:07:08 | \n",
+ " 1690 | \n",
+ " 285 | \n",
+ " 2005-06-21 17:12:08 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1162 | \n",
+ " 2005-06-14 23:09:38 | \n",
+ " 987 | \n",
+ " 310 | \n",
+ " 2005-06-23 22:00:38 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 2306 | \n",
+ " 3465 | \n",
+ " 2005-06-21 22:10:01 | \n",
+ " 1488 | \n",
+ " 510 | \n",
+ " 2005-06-30 21:35:01 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 2307 | \n",
+ " 3466 | \n",
+ " 2005-06-21 22:13:33 | \n",
+ " 371 | \n",
+ " 226 | \n",
+ " 2005-06-25 21:01:33 | \n",
+ " 2 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 2308 | \n",
+ " 3467 | \n",
+ " 2005-06-21 22:19:25 | \n",
+ " 729 | \n",
+ " 543 | \n",
+ " 2005-06-27 00:03:25 | \n",
+ " 2 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 2309 | \n",
+ " 3468 | \n",
+ " 2005-06-21 22:43:45 | \n",
+ " 2899 | \n",
+ " 100 | \n",
+ " 2005-06-30 01:49:45 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ " | 2310 | \n",
+ " 3469 | \n",
+ " 2005-06-21 22:48:59 | \n",
+ " 4087 | \n",
+ " 181 | \n",
+ " 2005-06-28 19:32:59 | \n",
+ " 1 | \n",
+ " 2006-02-15 21:30:53 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2311 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " rental_id rental_date inventory_id customer_id \\\n",
+ "0 1158 2005-06-14 22:53:33 1632 416 \n",
+ "1 1159 2005-06-14 22:55:13 4395 516 \n",
+ "2 1160 2005-06-14 23:00:34 2795 239 \n",
+ "3 1161 2005-06-14 23:07:08 1690 285 \n",
+ "4 1162 2005-06-14 23:09:38 987 310 \n",
+ "... ... ... ... ... \n",
+ "2306 3465 2005-06-21 22:10:01 1488 510 \n",
+ "2307 3466 2005-06-21 22:13:33 371 226 \n",
+ "2308 3467 2005-06-21 22:19:25 729 543 \n",
+ "2309 3468 2005-06-21 22:43:45 2899 100 \n",
+ "2310 3469 2005-06-21 22:48:59 4087 181 \n",
+ "\n",
+ " return_date staff_id last_update \n",
+ "0 2005-06-18 21:37:33 2 2006-02-15 21:30:53 \n",
+ "1 2005-06-17 02:11:13 1 2006-02-15 21:30:53 \n",
+ "2 2005-06-18 01:58:34 2 2006-02-15 21:30:53 \n",
+ "3 2005-06-21 17:12:08 1 2006-02-15 21:30:53 \n",
+ "4 2005-06-23 22:00:38 1 2006-02-15 21:30:53 \n",
+ "... ... ... ... \n",
+ "2306 2005-06-30 21:35:01 1 2006-02-15 21:30:53 \n",
+ "2307 2005-06-25 21:01:33 2 2006-02-15 21:30:53 \n",
+ "2308 2005-06-27 00:03:25 2 2006-02-15 21:30:53 \n",
+ "2309 2005-06-30 01:49:45 1 2006-02-15 21:30:53 \n",
+ "2310 2005-06-28 19:32:59 1 2006-02-15 21:30:53 \n",
+ "\n",
+ "[2311 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "june_df = rentals_month(engine, 6, 2005)\n",
+ "june_df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5021ad98",
+ "metadata": {},
+ "source": [
+ "Challenge - 3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "471c0cea",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def rental_count_month (df, month, year):\n",
+ " column_name = f\"rentals_{month:02d}_{year}\" # since column name as per instructions should be rentals_month number_year\n",
+ "\n",
+ " result = (df.groupby(\"customer_id\")[\"rental_id\"].count().reset_index(name=column_name))\n",
+ " return result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "1b80daf3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " rentals_05_2005 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 5 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 6 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 515 | \n",
+ " 594 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 516 | \n",
+ " 595 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 517 | \n",
+ " 596 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " | 518 | \n",
+ " 597 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 519 | \n",
+ " 599 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
520 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id rentals_05_2005\n",
+ "0 1 2\n",
+ "1 2 1\n",
+ "2 3 2\n",
+ "3 5 3\n",
+ "4 6 3\n",
+ ".. ... ...\n",
+ "515 594 4\n",
+ "516 595 1\n",
+ "517 596 6\n",
+ "518 597 2\n",
+ "519 599 1\n",
+ "\n",
+ "[520 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "may_count = rental_count_month(may_df, 5, 2005)\n",
+ "may_count"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "fc45ae1f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " rentals_06_2005 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 585 | \n",
+ " 595 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 586 | \n",
+ " 596 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 587 | \n",
+ " 597 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 588 | \n",
+ " 598 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 589 | \n",
+ " 599 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
590 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id rentals_06_2005\n",
+ "0 1 7\n",
+ "1 2 1\n",
+ "2 3 4\n",
+ "3 4 6\n",
+ "4 5 5\n",
+ ".. ... ...\n",
+ "585 595 2\n",
+ "586 596 2\n",
+ "587 597 3\n",
+ "588 598 1\n",
+ "589 599 4\n",
+ "\n",
+ "[590 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "june_count = rental_count_month(june_df, 6, 2005)\n",
+ "june_count"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1275699b",
+ "metadata": {},
+ "source": [
+ "Create a Python function called compare_rentals that takes two DataFrames as input containing the number of rentals made by each customer in different months and years. The function should return a combined DataFrame with a new 'difference' column, which is the difference between the number of rentals in the two months."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "769fc709",
+ "metadata": {},
+ "source": [
+ "Challenge - 4"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "cb19f7a7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def compare_rentals(df1, df2):\n",
+ " merged = df1.merge(df2, on=\"customer_id\", how =\"inner\") #merging both dfs to find out the difference, only for customers who are present in both months (inner join)\n",
+ " rental_columns = merged.columns.drop(\"customer_id\")\n",
+ " merged[\"difference\"] = merged[rental_columns[1]] - merged[rental_columns[0]]\n",
+ " return merged\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "9c765aad",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " rentals_05_2005 | \n",
+ " rentals_06_2005 | \n",
+ " difference | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 7 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 5 | \n",
+ " 3 | \n",
+ " 5 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 6 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id rentals_05_2005 rentals_06_2005 difference\n",
+ "0 1 2 7 5\n",
+ "1 2 1 1 0\n",
+ "2 3 2 4 2\n",
+ "3 5 3 5 2\n",
+ "4 6 3 4 1"
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "may_df = rentals_month(engine, 5, 2005)\n",
+ "june_df = rentals_month(engine, 6, 2005)\n",
+ "\n",
+ "may_counts = rental_count_month(may_df, 5, 2005)\n",
+ "june_counts = rental_count_month(june_df, 6, 2005)\n",
+ "\n",
+ "comparison = compare_rentals(may_counts, june_counts)\n",
+ "comparison.head()\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "base",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}