diff --git a/connection.ipynb b/connection.ipynb new file mode 100644 index 0000000..9820744 --- /dev/null +++ b/connection.ipynb @@ -0,0 +1,224 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "3ff965a3", + "metadata": {}, + "outputs": [], + "source": [ + "# 1. Establish a connection between Python and the Sakila database.\n", + "\n", + "from sqlalchemy import create_engine\n", + "import pandas as pd\n", + "\n", + "# Rellena con tus datos reales\n", + "USER = \"root\"\n", + "PASSWORD = \"xxxx\" # cambiar por contraseña de MySQL\n", + "HOST = \"localhost\" # o la IP/host donde esté MySQL\n", + "PORT = \"3306\" # puerto por defecto de MySQL\n", + "DB = \"sakila\"\n", + "\n", + "# Cadena de conexión usando el driver pymysql\n", + "connection_string = f\"mysql+pymysql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DB}\"\n", + "\n", + "engine = create_engine(connection_string)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "3d06cf2b", + "metadata": {}, + "outputs": [], + "source": [ + "# 2. Write a Python function called rentals_month that retrieves rental data for a \n", + "# given month and year (passed as parameters) from the Sakila database as a Pandas DataFrame.\n", + "# The function should take in three parameters\n", + "\n", + "def rentals_month(engine, month, year):\n", + " \"\"\"\n", + " Recupera todos los alquileres del mes y año especificados.\n", + " \"\"\"\n", + " query = f\"\"\"\n", + " SELECT *\n", + " FROM rental\n", + " WHERE MONTH(rental_date) = {month}\n", + " AND YEAR(rental_date) = {year};\n", + " \"\"\"\n", + " df = pd.read_sql(query, engine)\n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "501f11dd", + "metadata": {}, + "outputs": [], + "source": [ + "# 3. Develop a Python function called rental_count_month that takes the DataFrame provided \n", + "# by rentals_month as input along with the month and year and returns a new DataFrame containing \n", + "# the number of rentals made by each customer_id during the selected month and year.\n", + "\n", + "\n", + "def rental_count_month(df, month, year):\n", + " \"\"\"\n", + " Devuelve un DataFrame con el número total de alquileres por cliente.\n", + " \"\"\"\n", + " col_name = f\"alquileres_{month:02d}_{year}\"\n", + " df_counts = (\n", + " df.groupby(\"customer_id\")\n", + " .size()\n", + " .reset_index(name=col_name)\n", + " )\n", + " return df_counts\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "60f23c52", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# 4. Create a Python function called compare_rentals that takes two DataFrames as input containing \n", + "# the number of rentals made by each customer in different months and years. \n", + "# The function should return a combined DataFrame with a new 'difference' column, which is \n", + "# the difference between the number of rentals in the two months.\n", + "\n", + "def compare_rentals(df1, df2):\n", + " \"\"\"\n", + " Combina dos DataFrames de conteo de alquileres por cliente\n", + " y calcula la diferencia entre ambos meses.\n", + " \"\"\"\n", + " df_merged = pd.merge(df1, df2, on=\"customer_id\", how=\"outer\").fillna(0)\n", + " col1 = df1.columns[1]\n", + " col2 = df2.columns[1]\n", + " df_merged[\"diferencia\"] = df_merged[col2] - df_merged[col1]\n", + " return df_merged\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "9cf34905", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | customer_id | \n", + "alquileres_05_2005 | \n", + "alquileres_06_2005 | \n", + "diferencia | \n", + "
|---|---|---|---|---|
| 0 | \n", + "1 | \n", + "2.0 | \n", + "7.0 | \n", + "5.0 | \n", + "
| 1 | \n", + "2 | \n", + "1.0 | \n", + "1.0 | \n", + "0.0 | \n", + "
| 2 | \n", + "3 | \n", + "2.0 | \n", + "4.0 | \n", + "2.0 | \n", + "
| 3 | \n", + "4 | \n", + "0.0 | \n", + "6.0 | \n", + "6.0 | \n", + "
| 4 | \n", + "5 | \n", + "3.0 | \n", + "5.0 | \n", + "2.0 | \n", + "