Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
243 changes: 243 additions & 0 deletions LAB_Connect SQL_Python.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 15,
"id": "552553f7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: sqlalchemy in c:\\users\\cmoan\\miniforge3\\lib\\site-packages (2.0.44)\n",
"Requirement already satisfied: greenlet>=1 in c:\\users\\cmoan\\miniforge3\\lib\\site-packages (from sqlalchemy) (3.3.0)\n",
"Requirement already satisfied: typing-extensions>=4.6.0 in c:\\users\\cmoan\\miniforge3\\lib\\site-packages (from sqlalchemy) (4.15.0)\n",
"Requirement already satisfied: cryptography in c:\\users\\cmoan\\miniforge3\\lib\\site-packages (46.0.3)\n",
"Requirement already satisfied: cffi>=2.0.0 in c:\\users\\cmoan\\miniforge3\\lib\\site-packages (from cryptography) (2.0.0)\n",
"Requirement already satisfied: pycparser in c:\\users\\cmoan\\miniforge3\\lib\\site-packages (from cffi>=2.0.0->cryptography) (2.22)\n",
"Requirement already satisfied: pymysql in c:\\users\\cmoan\\miniforge3\\lib\\site-packages (1.1.2)\n"
]
}
],
"source": [
"!pip install sqlalchemy \n",
"!pip install cryptography\n",
"!pip install pymysql"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "f872e6e8",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sqlalchemy import create_engine\n",
"import getpass # Para pedir la contraseña de forma segura"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "50a9e467",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✅ Conexión establecida con éxito.\n"
]
}
],
"source": [
"# --- Variables de Conexión (Asegúrate de cambiar estos valores por los tuyos) ---\n",
"# Si tu base de datos no es MySQL, el \"mysql+...\" y el puerto (3306) deben cambiar.\n",
"DB_TIPO = 'mysql'\n",
"DB_DRIVER = 'pymysql' # O 'mysqlclient' si lo tienes instalado\n",
"DB_USER = 'root' # Tu nombre de usuario\n",
"DB_PASS = getpass.getpass(\"Ingresa tu contraseña de MySQL: \") # Te pedirá la contraseña\n",
"DB_HOST = '127.0.0.1' # O la IP donde esté tu DB\n",
"DB_PORT = '3306' # Puerto por defecto de MySQL\n",
"DB_NAME = 'sakila' # El nombre de la base de datos\n",
"\n",
"# --- Creación del Motor de Conexión (Engine) ---\n",
"# Esta línea construye la URL de conexión que usa SQLAlchemy\n",
"mysql_url = f\"{DB_TIPO}+{DB_DRIVER}://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}\"\n",
"\n",
"# Creamos el motor. Este objeto 'engine' es el que usaremos para todas las consultas.\n",
"try:\n",
" engine = create_engine(mysql_url)\n",
" print(\"✅ Conexión establecida con éxito.\")\n",
"except Exception as e:\n",
" print(f\"❌ Error al conectar a la base de datos: {e}\")\n",
" # Es crucial que la conexión funcione antes de seguir."
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "6de7a008",
"metadata": {},
"outputs": [],
"source": [
"# PASO 2\n",
"def rentals_month(engine, month: int, year: int) -> pd.DataFrame:\n",
" \"\"\"\n",
" Recupera todos los registros de alquiler para un mes y año específicos \n",
" de la tabla 'rental' de la base de datos Sakila.\n",
"\n",
" Parámetros:\n",
" - engine: Objeto de conexión de SQLAlchemy.\n",
" - month (int): El número del mes (ej: 5 para Mayo).\n",
" - year (int): El número del año (ej: 2005).\n",
"\n",
" Retorna:\n",
" - pd.DataFrame: Un DataFrame de Pandas con los resultados.\n",
" \"\"\"\n",
" \n",
" # 1. Creamos la consulta SQL\n",
" # La consulta filtra por el mes y el año de la columna 'rental_date'.\n",
" query = f\"\"\"\n",
" SELECT \n",
" rental_id, \n",
" customer_id, \n",
" rental_date\n",
" FROM \n",
" rental\n",
" WHERE \n",
" MONTH(rental_date) = {month} AND YEAR(rental_date) = {year};\n",
" \"\"\"\n",
" \n",
" print(f\"Buscando alquileres para el mes {month}/{year}...\")\n",
" \n",
" # 2. Ejecutamos la consulta usando el engine y pandas.read_sql\n",
" # Esto es la magia: pandas se conecta, ejecuta la query y trae el resultado \n",
" # directamente como un DataFrame.\n",
" df = pd.read_sql(query, engine)\n",
" \n",
" print(f\"Se encontraron {len(df)} alquileres.\")\n",
" \n",
" return df\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "4108cad8",
"metadata": {},
"outputs": [],
"source": [
"def rental_count_month(df: pd.DataFrame, month: int, year: int) -> pd.DataFrame:\n",
" column_name = f\"rentals_{month:02d}_{year}\" \n",
" rental_counts = df.groupby('customer_id').size().reset_index(name=column_name)\n",
" return rental_counts\n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "29f05683",
"metadata": {},
"outputs": [],
"source": [
"## PASO 4: Función para comparar la actividad (merge)\n",
"\n",
"def compare_rentals(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:\n",
" # Obtener nombres de columnas dinámicamente\n",
" col1 = [col for col in df1.columns if col != 'customer_id'][0] \n",
" col2 = [col for col in df2.columns if col != 'customer_id'][0] \n",
" \n",
" # Inner merge\n",
" combined_df = pd.merge(df1, df2, on='customer_id', how='inner')\n",
" \n",
" # Calcular diferencia: Segundo mes - Primer mes\n",
" combined_df['difference'] = combined_df[col2] - combined_df[col1]\n",
"\n",
" # Reordenar\n",
" combined_df = combined_df[['customer_id', col1, col2, 'difference']]\n",
" \n",
" return combined_df"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "20aff7a9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Buscando alquileres para el mes 5/2005...\n",
"Se encontraron 1156 alquileres.\n",
"Buscando alquileres para el mes 6/2005...\n",
"Se encontraron 2311 alquileres.\n",
"\n",
"Resultados de la comparación (Mayo vs. Junio):\n",
" customer_id rentals_05_2005 rentals_06_2005 difference\n",
"386 454 1 10 9\n",
"178 213 1 9 8\n",
"248 295 1 9 8\n",
"389 457 1 9 8\n",
"322 380 1 8 7\n",
"194 234 1 8 7\n",
"218 260 1 8 7\n",
"481 561 2 9 7\n",
"23 27 1 8 7\n",
"224 267 3 9 6\n"
]
}
],
"source": [
"# EJECUCIÓN \n",
"\n",
"# 1. Recuperar datos\n",
"df_may = rentals_month(engine, month=5, year=2005)\n",
"df_jun = rentals_month(engine, month=6, year=2005)\n",
"\n",
"# 2. Contar alquileres por cliente\n",
"df_may_counts = rental_count_month(df_may, month=5, year=2005)\n",
"df_jun_counts = rental_count_month(df_jun, month=6, year=2005)\n",
"\n",
"# 3. Comparar\n",
"final_comparison = compare_rentals(df_may_counts, df_jun_counts)\n",
"\n",
"print(\"\\nResultados de la comparación (Mayo vs. Junio):\")\n",
"print(final_comparison.sort_values(by='difference', ascending=False).head(10))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8d5df194",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}