Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
224 changes: 224 additions & 0 deletions connection.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "3ff965a3",
"metadata": {},
"outputs": [],
"source": [
"# 1. Establish a connection between Python and the Sakila database.\n",
"\n",
"from sqlalchemy import create_engine\n",
"import pandas as pd\n",
"\n",
"# Rellena con tus datos reales\n",
"USER = \"root\"\n",
"PASSWORD = \"xxxx\" # cambiar por contraseña de MySQL\n",
"HOST = \"localhost\" # o la IP/host donde esté MySQL\n",
"PORT = \"3306\" # puerto por defecto de MySQL\n",
"DB = \"sakila\"\n",
"\n",
"# Cadena de conexión usando el driver pymysql\n",
"connection_string = f\"mysql+pymysql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DB}\"\n",
"\n",
"engine = create_engine(connection_string)\n"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "3d06cf2b",
"metadata": {},
"outputs": [],
"source": [
"# 2. Write a Python function called rentals_month that retrieves rental data for a \n",
"# given month and year (passed as parameters) from the Sakila database as a Pandas DataFrame.\n",
"# The function should take in three parameters\n",
"\n",
"def rentals_month(engine, month, year):\n",
" \"\"\"\n",
" Recupera todos los alquileres del mes y año especificados.\n",
" \"\"\"\n",
" query = f\"\"\"\n",
" SELECT *\n",
" FROM rental\n",
" WHERE MONTH(rental_date) = {month}\n",
" AND YEAR(rental_date) = {year};\n",
" \"\"\"\n",
" df = pd.read_sql(query, engine)\n",
" return df\n"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "501f11dd",
"metadata": {},
"outputs": [],
"source": [
"# 3. Develop a Python function called rental_count_month that takes the DataFrame provided \n",
"# by rentals_month as input along with the month and year and returns a new DataFrame containing \n",
"# the number of rentals made by each customer_id during the selected month and year.\n",
"\n",
"\n",
"def rental_count_month(df, month, year):\n",
" \"\"\"\n",
" Devuelve un DataFrame con el número total de alquileres por cliente.\n",
" \"\"\"\n",
" col_name = f\"alquileres_{month:02d}_{year}\"\n",
" df_counts = (\n",
" df.groupby(\"customer_id\")\n",
" .size()\n",
" .reset_index(name=col_name)\n",
" )\n",
" return df_counts\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "60f23c52",
"metadata": {},
"outputs": [],
"source": [
"\n",
"# 4. Create a Python function called compare_rentals that takes two DataFrames as input containing \n",
"# the number of rentals made by each customer in different months and years. \n",
"# The function should return a combined DataFrame with a new 'difference' column, which is \n",
"# the difference between the number of rentals in the two months.\n",
"\n",
"def compare_rentals(df1, df2):\n",
" \"\"\"\n",
" Combina dos DataFrames de conteo de alquileres por cliente\n",
" y calcula la diferencia entre ambos meses.\n",
" \"\"\"\n",
" df_merged = pd.merge(df1, df2, on=\"customer_id\", how=\"outer\").fillna(0)\n",
" col1 = df1.columns[1]\n",
" col2 = df2.columns[1]\n",
" df_merged[\"diferencia\"] = df_merged[col2] - df_merged[col1]\n",
" return df_merged\n"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "9cf34905",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>alquileres_05_2005</th>\n",
" <th>alquileres_06_2005</th>\n",
" <th>diferencia</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2.0</td>\n",
" <td>7.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>6.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" customer_id alquileres_05_2005 alquileres_06_2005 diferencia\n",
"0 1 2.0 7.0 5.0\n",
"1 2 1.0 1.0 0.0\n",
"2 3 2.0 4.0 2.0\n",
"3 4 0.0 6.0 6.0\n",
"4 5 3.0 5.0 2.0"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#check\n",
"\n",
"df_may = rentals_month(engine, 5, 2005)\n",
"df_jun = rentals_month(engine, 6, 2005)\n",
"count_may = rental_count_month(df_may, 5, 2005)\n",
"count_jun = rental_count_month(df_jun, 6, 2005)\n",
"comparison = compare_rentals(count_may, count_jun)\n",
"comparison.head()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}