From 2fc8df0f6f512b91b2538d63cf1d45a737315068 Mon Sep 17 00:00:00 2001 From: martacarballo Date: Sat, 6 Dec 2025 10:14:35 +0100 Subject: [PATCH 1/2] solved --- connection.ipynb | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 connection.ipynb diff --git a/connection.ipynb b/connection.ipynb new file mode 100644 index 0000000..e69de29 From 8e8862b661c7648a0637feaae913fc129a3853ab Mon Sep 17 00:00:00 2001 From: martacarballo Date: Sat, 6 Dec 2025 10:18:26 +0100 Subject: [PATCH 2/2] solved --- connection.ipynb | 224 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) diff --git a/connection.ipynb b/connection.ipynb index e69de29..9820744 100644 --- a/connection.ipynb +++ b/connection.ipynb @@ -0,0 +1,224 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "3ff965a3", + "metadata": {}, + "outputs": [], + "source": [ + "# 1. Establish a connection between Python and the Sakila database.\n", + "\n", + "from sqlalchemy import create_engine\n", + "import pandas as pd\n", + "\n", + "# Rellena con tus datos reales\n", + "USER = \"root\"\n", + "PASSWORD = \"xxxx\" # cambiar por contraseña de MySQL\n", + "HOST = \"localhost\" # o la IP/host donde esté MySQL\n", + "PORT = \"3306\" # puerto por defecto de MySQL\n", + "DB = \"sakila\"\n", + "\n", + "# Cadena de conexión usando el driver pymysql\n", + "connection_string = f\"mysql+pymysql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DB}\"\n", + "\n", + "engine = create_engine(connection_string)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "3d06cf2b", + "metadata": {}, + "outputs": [], + "source": [ + "# 2. Write a Python function called rentals_month that retrieves rental data for a \n", + "# given month and year (passed as parameters) from the Sakila database as a Pandas DataFrame.\n", + "# The function should take in three parameters\n", + "\n", + "def rentals_month(engine, month, year):\n", + " \"\"\"\n", + " Recupera todos los alquileres del mes y año especificados.\n", + " \"\"\"\n", + " query = f\"\"\"\n", + " SELECT *\n", + " FROM rental\n", + " WHERE MONTH(rental_date) = {month}\n", + " AND YEAR(rental_date) = {year};\n", + " \"\"\"\n", + " df = pd.read_sql(query, engine)\n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "501f11dd", + "metadata": {}, + "outputs": [], + "source": [ + "# 3. Develop a Python function called rental_count_month that takes the DataFrame provided \n", + "# by rentals_month as input along with the month and year and returns a new DataFrame containing \n", + "# the number of rentals made by each customer_id during the selected month and year.\n", + "\n", + "\n", + "def rental_count_month(df, month, year):\n", + " \"\"\"\n", + " Devuelve un DataFrame con el número total de alquileres por cliente.\n", + " \"\"\"\n", + " col_name = f\"alquileres_{month:02d}_{year}\"\n", + " df_counts = (\n", + " df.groupby(\"customer_id\")\n", + " .size()\n", + " .reset_index(name=col_name)\n", + " )\n", + " return df_counts\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "60f23c52", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# 4. Create a Python function called compare_rentals that takes two DataFrames as input containing \n", + "# the number of rentals made by each customer in different months and years. \n", + "# The function should return a combined DataFrame with a new 'difference' column, which is \n", + "# the difference between the number of rentals in the two months.\n", + "\n", + "def compare_rentals(df1, df2):\n", + " \"\"\"\n", + " Combina dos DataFrames de conteo de alquileres por cliente\n", + " y calcula la diferencia entre ambos meses.\n", + " \"\"\"\n", + " df_merged = pd.merge(df1, df2, on=\"customer_id\", how=\"outer\").fillna(0)\n", + " col1 = df1.columns[1]\n", + " col2 = df2.columns[1]\n", + " df_merged[\"diferencia\"] = df_merged[col2] - df_merged[col1]\n", + " return df_merged\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "9cf34905", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idalquileres_05_2005alquileres_06_2005diferencia
012.07.05.0
121.01.00.0
232.04.02.0
340.06.06.0
453.05.02.0
\n", + "
" + ], + "text/plain": [ + " customer_id alquileres_05_2005 alquileres_06_2005 diferencia\n", + "0 1 2.0 7.0 5.0\n", + "1 2 1.0 1.0 0.0\n", + "2 3 2.0 4.0 2.0\n", + "3 4 0.0 6.0 6.0\n", + "4 5 3.0 5.0 2.0" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#check\n", + "\n", + "df_may = rentals_month(engine, 5, 2005)\n", + "df_jun = rentals_month(engine, 6, 2005)\n", + "count_may = rental_count_month(df_may, 5, 2005)\n", + "count_jun = rental_count_month(df_jun, 6, 2005)\n", + "comparison = compare_rentals(count_may, count_jun)\n", + "comparison.head()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}