diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..81db2e7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.env +db_connection.py \ No newline at end of file diff --git a/Solved_lab.ipynb b/Solved_lab.ipynb new file mode 100644 index 0000000..e44afcf --- /dev/null +++ b/Solved_lab.ipynb @@ -0,0 +1,277 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "0f9ec18a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pandas in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (2.3.3)\n", + "Requirement already satisfied: sqlalchemy in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (2.0.45)\n", + "Collecting pymysql\n", + " Downloading pymysql-1.1.2-py3-none-any.whl.metadata (4.3 kB)\n", + "Requirement already satisfied: numpy>=1.26.0 in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pandas) (2.3.5)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pandas) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pandas) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pandas) (2025.3)\n", + "Requirement already satisfied: greenlet>=1 in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from sqlalchemy) (3.3.0)\n", + "Requirement already satisfied: typing-extensions>=4.6.0 in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from sqlalchemy) (4.15.0)\n", + "Requirement already satisfied: six>=1.5 in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n", + "Downloading pymysql-1.1.2-py3-none-any.whl (45 kB)\n", + "Installing collected packages: pymysql\n", + "Successfully installed pymysql-1.1.2\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install pandas sqlalchemy pymysql\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "847fd242", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting python-dotenv\n", + " Downloading python_dotenv-1.2.1-py3-none-any.whl.metadata (25 kB)\n", + "Downloading python_dotenv-1.2.1-py3-none-any.whl (21 kB)\n", + "Installing collected packages: python-dotenv\n", + "Successfully installed python-dotenv-1.2.1\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install python-dotenv\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ac1e7b04", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sqlalchemy import create_engine\n", + "from dotenv import load_dotenv\n", + "import os\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "fb4353be", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "load_dotenv()\n", + "\n", + "def get_engine():\n", + " engine = create_engine(\n", + " f\"mysql+pymysql://{os.getenv('DB_USER')}:{os.getenv('DB_PASSWORD')}@\"\n", + " f\"{os.getenv('DB_HOST')}/{os.getenv('DB_NAME')}\"\n", + " )\n", + " return engine\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "e1ecb632", + "metadata": {}, + "outputs": [], + "source": [ + "def rentals_month(engine, month, year):\n", + " query = f\"\"\"\n", + " SELECT \n", + " customer_id,\n", + " rental_date\n", + " FROM rental\n", + " WHERE MONTH(rental_date) = {month}\n", + " AND YEAR(rental_date) = {year};\n", + " \"\"\"\n", + " \n", + " df = pd.read_sql(query, engine)\n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "82512abf", + "metadata": {}, + "outputs": [], + "source": [ + "def rental_count_month(df, month, year):\n", + " column_name = f\"rentals_{month:02d}_{year}\"\n", + " \n", + " result = (\n", + " df.groupby(\"customer_id\")\n", + " .size()\n", + " .reset_index(name=column_name)\n", + " )\n", + " \n", + " return result\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "7e24fe5f", + "metadata": {}, + "outputs": [], + "source": [ + "def compare_rentals(df1, df2):\n", + " merged = pd.merge(\n", + " df1,\n", + " df2,\n", + " on=\"customer_id\",\n", + " how=\"inner\"\n", + " )\n", + " \n", + " col1 = merged.columns[1]\n", + " col2 = merged.columns[2]\n", + " \n", + " merged[\"difference\"] = merged[col2] - merged[col1]\n", + " \n", + " return merged\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "41e7285d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_05_2005rentals_06_2005difference
01275
12110
23242
35352
46341
\n", + "
" + ], + "text/plain": [ + " customer_id rentals_05_2005 rentals_06_2005 difference\n", + "0 1 2 7 5\n", + "1 2 1 1 0\n", + "2 3 2 4 2\n", + "3 5 3 5 2\n", + "4 6 3 4 1" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Retrieve rentals\n", + "may_rentals = rentals_month(engine, 5, 2005)\n", + "june_rentals = rentals_month(engine, 6, 2005)\n", + "\n", + "# Count rentals per customer\n", + "may_counts = rental_count_month(may_rentals, 5, 2005)\n", + "june_counts = rental_count_month(june_rentals, 6, 2005)\n", + "\n", + "# Compare months\n", + "comparison = compare_rentals(may_counts, june_counts)\n", + "\n", + "comparison.head()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}