diff --git a/lab-sql-python-connection.ipynb b/lab-sql-python-connection.ipynb
new file mode 100644
index 0000000..eab2c36
--- /dev/null
+++ b/lab-sql-python-connection.ipynb
@@ -0,0 +1,344 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "47ed2e29",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: pymysql in /opt/anaconda3/lib/python3.13/site-packages (1.1.2)\n",
+ "Note: you may need to restart the kernel to use updated packages.\n"
+ ]
+ }
+ ],
+ "source": [
+ "%pip install pymysql"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "c5060bba",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import pymysql\n",
+ "from sqlalchemy import create_engine, text\n",
+ "import getpass\n",
+ "password=getpass.getpass()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "4582723a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bd= \"sakila\"\n",
+ "connection_string= ''"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "494b90ff",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Engine(mysql+pymysql://root:***@localhost/sakila)"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bd = \"sakila\"\n",
+ "connection_string = 'mysql+pymysql://root:' + password + '@localhost/'+bd\n",
+ "engine = create_engine(connection_string)\n",
+ "engine"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "6b47e247",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def rentals_month(engine, month, year):\n",
+ " query = f\"\"\"\n",
+ " SELECT \n",
+ " rental_id,\n",
+ " rental_date,\n",
+ " inventory_id,\n",
+ " customer_id,\n",
+ " return_date,\n",
+ " staff_id\n",
+ " FROM rental\n",
+ " WHERE MONTH(rental_date) = {month}\n",
+ " AND YEAR(rental_date) = {year};\n",
+ " \"\"\"\n",
+ " df = pd.read_sql_query(query, engine)\n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "4ad87897",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " rental_id | \n",
+ " rental_date | \n",
+ " inventory_id | \n",
+ " customer_id | \n",
+ " return_date | \n",
+ " staff_id | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 2005-05-24 22:53:30 | \n",
+ " 367 | \n",
+ " 130 | \n",
+ " 2005-05-26 22:04:30 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 2005-05-24 22:54:33 | \n",
+ " 1525 | \n",
+ " 459 | \n",
+ " 2005-05-28 19:40:33 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 2005-05-24 23:03:39 | \n",
+ " 1711 | \n",
+ " 408 | \n",
+ " 2005-06-01 22:12:39 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 2005-05-24 23:04:41 | \n",
+ " 2452 | \n",
+ " 333 | \n",
+ " 2005-06-03 01:43:41 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 2005-05-24 23:05:21 | \n",
+ " 2079 | \n",
+ " 222 | \n",
+ " 2005-06-02 04:33:21 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " rental_id rental_date inventory_id customer_id \\\n",
+ "0 1 2005-05-24 22:53:30 367 130 \n",
+ "1 2 2005-05-24 22:54:33 1525 459 \n",
+ "2 3 2005-05-24 23:03:39 1711 408 \n",
+ "3 4 2005-05-24 23:04:41 2452 333 \n",
+ "4 5 2005-05-24 23:05:21 2079 222 \n",
+ "\n",
+ " return_date staff_id \n",
+ "0 2005-05-26 22:04:30 1 \n",
+ "1 2005-05-28 19:40:33 1 \n",
+ "2 2005-06-01 22:12:39 1 \n",
+ "3 2005-06-03 01:43:41 2 \n",
+ "4 2005-06-02 04:33:21 1 "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_rentals = rentals_month(engine, 5, 2005)\n",
+ "df_rentals.head()\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c5a4eeef",
+ "metadata": {},
+ "source": [
+ "Develop a Python function called rental_count_month that takes the DataFrame provided by rentals_month as input along with the month and year and returns a new DataFrame containing the number of rentals made by each customer_id during the selected month and year."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "9d6101c4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def rental_count_month(df, month, year):\n",
+ " result = (\n",
+ " df.groupby(\"customer_id\")\n",
+ " .agg(rental_count=(\"rental_id\", \"count\"))\n",
+ " .reset_index()\n",
+ " )\n",
+ " result[\"month\"] = month\n",
+ " result[\"year\"] = year\n",
+ " return result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "3702dc53",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " customer_id rental_count month year\n",
+ "0 1 2 5 2005\n",
+ "1 2 1 5 2005\n",
+ "2 3 2 5 2005\n",
+ "3 5 3 5 2005\n",
+ "4 6 3 5 2005\n"
+ ]
+ }
+ ],
+ "source": [
+ "df_rentals = rentals_month(engine, 5, 2005)\n",
+ "df_counts = rental_count_month(df_rentals, 5, 2005)\n",
+ "print(df_counts.head())\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ff3dd271",
+ "metadata": {},
+ "source": [
+ "Create a Python function called compare_rentals that takes two DataFrames as input containing the number of rentals made by each customer in different months and years. The function should return a combined DataFrame with a new 'difference' column, which is the difference between the number of rentals in the two months."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "a5a57a25",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "def compare_rentals(df1, df2):\n",
+ " merged = pd.merge(df1, df2, on=\"customer_id\", suffixes=(\"_1\", \"_2\"), how=\"outer\").fillna(0)\n",
+ " merged[\"difference\"] = merged[\"rental_count_2\"] - merged[\"rental_count_1\"]\n",
+ " return merged"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "47fccace",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " customer_id rental_count_1 month_1 year_1 rental_count_2 month_2 \\\n",
+ "0 1 2.0 5.0 2005.0 7.0 6.0 \n",
+ "1 2 1.0 5.0 2005.0 1.0 6.0 \n",
+ "2 3 2.0 5.0 2005.0 4.0 6.0 \n",
+ "3 4 0.0 0.0 0.0 6.0 6.0 \n",
+ "4 5 3.0 5.0 2005.0 5.0 6.0 \n",
+ "\n",
+ " year_2 difference \n",
+ "0 2005.0 5.0 \n",
+ "1 2005.0 0.0 \n",
+ "2 2005.0 2.0 \n",
+ "3 2005.0 6.0 \n",
+ "4 2005.0 2.0 \n"
+ ]
+ }
+ ],
+ "source": [
+ "df_may = rental_count_month(rentals_month(engine, 5, 2005), 5, 2005)\n",
+ "df_june = rental_count_month(rentals_month(engine, 6, 2005), 6, 2005)\n",
+ "\n",
+ "comparison = compare_rentals(df_may, df_june)\n",
+ "print(comparison.head())\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1c2a4eef",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "base",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}