From b16a75a207bbad6b5f39a2eb5f448fef047763c6 Mon Sep 17 00:00:00 2001 From: Ivan Date: Sat, 6 Dec 2025 12:45:25 +0100 Subject: [PATCH] Solved --- Lab18.ipynb | 779 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 779 insertions(+) create mode 100644 Lab18.ipynb diff --git a/Lab18.ipynb b/Lab18.ipynb new file mode 100644 index 0000000..1b2e771 --- /dev/null +++ b/Lab18.ipynb @@ -0,0 +1,779 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6bfc2e23", + "metadata": {}, + "source": [ + "1. Establish a connection between Python and the Sakila database." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c10fd532", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install mysql-connector-python\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "eb4b2370", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✔ Conectado con éxito a sakila\n", + "✔ Engine para Pandas creado con éxito\n" + ] + } + ], + "source": [ + "#connection to a Sakila database\n", + "import mysql.connector\n", + "from mysql.connector import Error\n", + "from getpass import getpass\n", + "from sqlalchemy import create_engine\n", + "\n", + "def create_mysql_connection(host, user, database):\n", + " try:\n", + " password = getpass(\"Introduce tu contraseña MySQL: \") \n", + " conn = mysql.connector.connect(\n", + " host=host,\n", + " user=user,\n", + " password=password,\n", + " database=database\n", + " )\n", + " if conn.is_connected():\n", + " print(f\"✔ Conectado con éxito a {database}\")\n", + " \n", + " engine = create_engine(f\"mysql+mysqlconnector://{user}:{password}@{host}/{database}\")\n", + " print(\"✔ Engine para Pandas creado con éxito\")\n", + " return conn, engine # devolvemos ambas cosas\n", + " except Error as e:\n", + " print(f\"Error de conexión: {e}\")\n", + " return None, None\n", + " \n", + "#Usa tu usuario, host y la base sakila\n", + "connection, engine = create_mysql_connection(\n", + " host=\"localhost\",\n", + " user=\"root\", # puedes cambiar a otro usuario si quieres\n", + " database=\"sakila\")" + ] + }, + { + "cell_type": "markdown", + "id": "24f5c2a8", + "metadata": {}, + "source": [ + "2. Write a Python function called rentals_month that retrieves rental data for a given month and year (passed as parameters) from the Sakila database as a Pandas DataFrame. The function should take in three parameters:\n", + "\n", + "engine: an object representing the database connection engine to be used to establish a connection to the Sakila database.\n", + "month: an integer representing the month for which rental data is to be retrieved.\n", + "year: an integer representing the year for which rental data is to be retrieved.\n", + "The function should execute a SQL query to retrieve the rental data for the specified month and year from the rental table in the Sakila database, and return it as a pandas DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "a2e8eb43", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "def rentals_month(engine, month, year):\n", + " \"\"\"\n", + " Returns a DataFrame with rental data filtered by month and year.\n", + " \"\"\"\n", + " query = \"\"\"\n", + " SELECT *\n", + " FROM rental\n", + " WHERE MONTH(rental_date) = %s\n", + " AND YEAR(rental_date) = %s;\n", + " \"\"\"\n", + " \n", + " # 👇 Usamos parámetros seguros en vez de f-string\n", + " df_rentals = pd.read_sql(query, engine, params=(month, year))\n", + " return df_rentals\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "fab31592", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "rental_id", + "rawType": "int64", + "type": "integer" + }, + { + "name": "rental_date", + "rawType": "datetime64[ns]", + "type": "datetime" + }, + { + "name": "inventory_id", + "rawType": "int64", + "type": "integer" + }, + { + "name": "customer_id", + "rawType": "int64", + "type": "integer" + }, + { + "name": "return_date", + "rawType": "datetime64[ns]", + "type": "datetime" + }, + { + "name": "staff_id", + "rawType": "int64", + "type": "integer" + }, + { + "name": "last_update", + "rawType": "datetime64[ns]", + "type": "datetime" + } + ], + "ref": "c2a55cdc-a2d8-4443-8784-1d1031d1ff6b", + "rows": [ + [ + "0", + "1158", + "2005-06-14 22:53:33", + "1632", + "416", + "2005-06-18 21:37:33", + "2", + "2006-02-15 21:30:53" + ], + [ + "1", + "1159", + "2005-06-14 22:55:13", + "4395", + "516", + "2005-06-17 02:11:13", + "1", + "2006-02-15 21:30:53" + ], + [ + "2", + "1160", + "2005-06-14 23:00:34", + "2795", + "239", + "2005-06-18 01:58:34", + "2", + "2006-02-15 21:30:53" + ], + [ + "3", + "1161", + "2005-06-14 23:07:08", + "1690", + "285", + "2005-06-21 17:12:08", + "1", + "2006-02-15 21:30:53" + ], + [ + "4", + "1162", + "2005-06-14 23:09:38", + "987", + "310", + "2005-06-23 22:00:38", + "1", + "2006-02-15 21:30:53" + ] + ], + "shape": { + "columns": 7, + "rows": 5 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_update
011582005-06-14 22:53:3316324162005-06-18 21:37:3322006-02-15 21:30:53
111592005-06-14 22:55:1343955162005-06-17 02:11:1312006-02-15 21:30:53
211602005-06-14 23:00:3427952392005-06-18 01:58:3422006-02-15 21:30:53
311612005-06-14 23:07:0816902852005-06-21 17:12:0812006-02-15 21:30:53
411622005-06-14 23:09:389873102005-06-23 22:00:3812006-02-15 21:30:53
\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1158 2005-06-14 22:53:33 1632 416 \n", + "1 1159 2005-06-14 22:55:13 4395 516 \n", + "2 1160 2005-06-14 23:00:34 2795 239 \n", + "3 1161 2005-06-14 23:07:08 1690 285 \n", + "4 1162 2005-06-14 23:09:38 987 310 \n", + "\n", + " return_date staff_id last_update \n", + "0 2005-06-18 21:37:33 2 2006-02-15 21:30:53 \n", + "1 2005-06-17 02:11:13 1 2006-02-15 21:30:53 \n", + "2 2005-06-18 01:58:34 2 2006-02-15 21:30:53 \n", + "3 2005-06-21 17:12:08 1 2006-02-15 21:30:53 \n", + "4 2005-06-23 22:00:38 1 2006-02-15 21:30:53 " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#EJEMPLO USO\n", + "df_rentals = rentals_month(engine, 6, 2005)\n", + "df_rentals.head()" + ] + }, + { + "cell_type": "markdown", + "id": "6e44ea9d", + "metadata": {}, + "source": [ + "3. Develop a Python function called rental_count_month that takes the DataFrame provided by rentals_month as input along with the month and year and returns a new DataFrame containing the number of rentals made by each customer_id during the selected month and year.\n", + "\n", + "The function should also include the month and year as parameters and use them to name the new column according to the month and year, for example, if the input month is 05 and the year is 2005, the column name should be \"rentals_05_2005\".\n", + "\n", + "Hint: Consider making use of pandas groupby()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "1d9351e2", + "metadata": {}, + "outputs": [], + "source": [ + "#function called rental_count_month\n", + "import pandas as pd\n", + "def rental_count_month(df_rentals, month, year):\n", + " \"\"\"\n", + " Returns a DataFrame with the number of rentals per customer_id\n", + " for a given month and year.\n", + " \n", + " The output column is named rentals_MM_YYYY.\n", + " \n", + " Parameters:\n", + " df_rentals (DataFrame): DataFrame from rentals_month().\n", + " month (int): Month number (1-12).\n", + " year (int): Year (e.g., 2005).\n", + " \n", + " Returns:\n", + " DataFrame: customer_id + number of rentals in that period.\n", + " \"\"\"\n", + " \n", + " # Format month with 2 digits (e.g., 05 instead of 5)\n", + " month_str = f\"{month:02d}\"\n", + " col_name = f\"rentals_{month_str}_{year}\"\n", + " \n", + " # Group by customer_id and count rentals\n", + " result = (\n", + " df_rentals.groupby(\"customer_id\")[\"rental_id\"]\n", + " .count()\n", + " .reset_index(name=col_name)\n", + " )\n", + " \n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "39d4ffc7", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "customer_id", + "rawType": "int64", + "type": "integer" + }, + { + "name": "rentals_06_2005", + "rawType": "int64", + "type": "integer" + } + ], + "ref": "8d12240c-dc83-4695-9d4f-c3973a271b2f", + "rows": [ + [ + "0", + "1", + "7" + ], + [ + "1", + "2", + "1" + ], + [ + "2", + "3", + "4" + ], + [ + "3", + "4", + "6" + ], + [ + "4", + "5", + "5" + ] + ], + "shape": { + "columns": 2, + "rows": 5 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_06_2005
017
121
234
346
455
\n", + "
" + ], + "text/plain": [ + " customer_id rentals_06_2005\n", + "0 1 7\n", + "1 2 1\n", + "2 3 4\n", + "3 4 6\n", + "4 5 5" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#EJEMPLO USO\n", + "df_counts = rental_count_month(df_rentals, 6, 2005)\n", + "df_counts.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a83c31a", + "metadata": { + "vscode": { + "languageId": "julia" + } + }, + "outputs": [], + "source": [ + "\n", + "\n", + "import pandas as pd\n", + "\n", + "def rental_count_month(df_rentals: pd.DataFrame, month: int, year: int) -> pd.DataFrame:\n", + " \"\"\"\n", + " Takes a DataFrame of rentals and returns a count of rentals per customer_id\n", + " with a dynamic column name like rentals_MM_YYYY.\n", + " \"\"\"\n", + " # Dynamic column name\n", + " colname = f\"rentals_{month:02d}_{year}\"\n", + "\n", + " # Handle empty DataFrame\n", + " if df_rentals is None or df_rentals.empty:\n", + " return pd.DataFrame(columns=[\"customer_id\", colname])\n", + "\n", + " # Group by customer_id and count rentals\n", + " counts = (\n", + " df_rentals.groupby(\"customer_id\")\n", + " .size()\n", + " .reset_index(name=colname)\n", + " .sort_values(\"customer_id\")\n", + " )\n", + "\n", + " return counts\n" + ] + }, + { + "cell_type": "markdown", + "id": "63f9f61e", + "metadata": {}, + "source": [ + "4. Create a Python function called compare_rentals that takes two DataFrames as input containing the number of rentals made by each customer in different months and years.\n", + "The function should return a combined DataFrame with a new 'difference' column, which is the difference between the number of rentals in the two months." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "42c04da4", + "metadata": {}, + "outputs": [], + "source": [ + "#function called compare_rentals\n", + "import pandas as pd\n", + "def compare_rentals(df_month1, df_month2):\n", + " \"\"\"\n", + " Compares two rental count DataFrames (customer_id + rentals_MM_YYYY)\n", + " and returns a merged DataFrame with a new 'difference' column.\n", + " \n", + " The difference column shows how many more (or fewer) rentals a customer \n", + " made in the second month compared to the first.\n", + " \"\"\"\n", + " \n", + " # Merge ambos DataFrames por customer_id\n", + " df_compare = pd.merge(df_month1, df_month2, on=\"customer_id\", how=\"outer\").fillna(0)\n", + " \n", + " # Nombre dinámico de columnas (segundo - primero)\n", + " col1 = df_month1.columns[1]\n", + " col2 = df_month2.columns[1]\n", + " \n", + " # Crear columna difference\n", + " df_compare[\"difference\"] = df_compare[col2] - df_compare[col1]\n", + " # Convert all rental columns to int\n", + " for col in df_compare.columns:\n", + " if col != \"customer_id\":\n", + " df_compare[col] = df_compare[col].astype(int)\n", + " \n", + " return df_compare" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "8783978c", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "customer_id", + "rawType": "int64", + "type": "integer" + }, + { + "name": "rentals_06_2005", + "rawType": "int64", + "type": "integer" + }, + { + "name": "rentals_07_2005", + "rawType": "int64", + "type": "integer" + }, + { + "name": "difference", + "rawType": "int64", + "type": "integer" + } + ], + "ref": "4cc6c31e-e559-409e-8559-016dd3632da9", + "rows": [ + [ + "0", + "1", + "7", + "12", + "5" + ], + [ + "1", + "2", + "1", + "14", + "13" + ], + [ + "2", + "3", + "4", + "13", + "9" + ], + [ + "3", + "4", + "6", + "5", + "-1" + ], + [ + "4", + "5", + "5", + "16", + "11" + ] + ], + "shape": { + "columns": 4, + "rows": 5 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_06_2005rentals_07_2005difference
017125
1211413
234139
3465-1
4551611
\n", + "
" + ], + "text/plain": [ + " customer_id rentals_06_2005 rentals_07_2005 difference\n", + "0 1 7 12 5\n", + "1 2 1 14 13\n", + "2 3 4 13 9\n", + "3 4 6 5 -1\n", + "4 5 5 16 11" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#EJEMPLO USO\n", + "df_06 = rental_count_month(rentals_month(engine, 6, 2005), 6, 2005)\n", + "df_07 = rental_count_month(rentals_month(engine, 7, 2005), 7, 2005)\n", + "df_compare = compare_rentals(df_06, df_07)\n", + "df_compare.head()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}