diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..81db2e7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.env +db_connection.py \ No newline at end of file diff --git a/Solved_lab.ipynb b/Solved_lab.ipynb new file mode 100644 index 0000000..e44afcf --- /dev/null +++ b/Solved_lab.ipynb @@ -0,0 +1,277 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "0f9ec18a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pandas in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (2.3.3)\n", + "Requirement already satisfied: sqlalchemy in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (2.0.45)\n", + "Collecting pymysql\n", + " Downloading pymysql-1.1.2-py3-none-any.whl.metadata (4.3 kB)\n", + "Requirement already satisfied: numpy>=1.26.0 in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pandas) (2.3.5)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pandas) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pandas) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pandas) (2025.3)\n", + "Requirement already satisfied: greenlet>=1 in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from sqlalchemy) (3.3.0)\n", + "Requirement already satisfied: typing-extensions>=4.6.0 in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from sqlalchemy) (4.15.0)\n", + "Requirement already satisfied: six>=1.5 in c:\\users\\rodri\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n", + "Downloading pymysql-1.1.2-py3-none-any.whl (45 kB)\n", + "Installing collected packages: pymysql\n", + "Successfully installed pymysql-1.1.2\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install pandas sqlalchemy pymysql\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "847fd242", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting python-dotenv\n", + " Downloading python_dotenv-1.2.1-py3-none-any.whl.metadata (25 kB)\n", + "Downloading python_dotenv-1.2.1-py3-none-any.whl (21 kB)\n", + "Installing collected packages: python-dotenv\n", + "Successfully installed python-dotenv-1.2.1\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install python-dotenv\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ac1e7b04", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sqlalchemy import create_engine\n", + "from dotenv import load_dotenv\n", + "import os\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "fb4353be", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "load_dotenv()\n", + "\n", + "def get_engine():\n", + " engine = create_engine(\n", + " f\"mysql+pymysql://{os.getenv('DB_USER')}:{os.getenv('DB_PASSWORD')}@\"\n", + " f\"{os.getenv('DB_HOST')}/{os.getenv('DB_NAME')}\"\n", + " )\n", + " return engine\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "e1ecb632", + "metadata": {}, + "outputs": [], + "source": [ + "def rentals_month(engine, month, year):\n", + " query = f\"\"\"\n", + " SELECT \n", + " customer_id,\n", + " rental_date\n", + " FROM rental\n", + " WHERE MONTH(rental_date) = {month}\n", + " AND YEAR(rental_date) = {year};\n", + " \"\"\"\n", + " \n", + " df = pd.read_sql(query, engine)\n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "82512abf", + "metadata": {}, + "outputs": [], + "source": [ + "def rental_count_month(df, month, year):\n", + " column_name = f\"rentals_{month:02d}_{year}\"\n", + " \n", + " result = (\n", + " df.groupby(\"customer_id\")\n", + " .size()\n", + " .reset_index(name=column_name)\n", + " )\n", + " \n", + " return result\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "7e24fe5f", + "metadata": {}, + "outputs": [], + "source": [ + "def compare_rentals(df1, df2):\n", + " merged = pd.merge(\n", + " df1,\n", + " df2,\n", + " on=\"customer_id\",\n", + " how=\"inner\"\n", + " )\n", + " \n", + " col1 = merged.columns[1]\n", + " col2 = merged.columns[2]\n", + " \n", + " merged[\"difference\"] = merged[col2] - merged[col1]\n", + " \n", + " return merged\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "41e7285d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | customer_id | \n", + "rentals_05_2005 | \n", + "rentals_06_2005 | \n", + "difference | \n", + "
|---|---|---|---|---|
| 0 | \n", + "1 | \n", + "2 | \n", + "7 | \n", + "5 | \n", + "
| 1 | \n", + "2 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "
| 2 | \n", + "3 | \n", + "2 | \n", + "4 | \n", + "2 | \n", + "
| 3 | \n", + "5 | \n", + "3 | \n", + "5 | \n", + "2 | \n", + "
| 4 | \n", + "6 | \n", + "3 | \n", + "4 | \n", + "1 | \n", + "