From 846e992e328408a04a0a2a0a6aa31ba453b573c1 Mon Sep 17 00:00:00 2001 From: CristinaOancea Date: Sat, 6 Dec 2025 15:05:26 +0100 Subject: [PATCH] LAB Connect Python and SQL --- LAB_Connect SQL_Python.ipynb | 243 +++++++++++++++++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 LAB_Connect SQL_Python.ipynb diff --git a/LAB_Connect SQL_Python.ipynb b/LAB_Connect SQL_Python.ipynb new file mode 100644 index 0000000..77db4e0 --- /dev/null +++ b/LAB_Connect SQL_Python.ipynb @@ -0,0 +1,243 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 15, + "id": "552553f7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: sqlalchemy in c:\\users\\cmoan\\miniforge3\\lib\\site-packages (2.0.44)\n", + "Requirement already satisfied: greenlet>=1 in c:\\users\\cmoan\\miniforge3\\lib\\site-packages (from sqlalchemy) (3.3.0)\n", + "Requirement already satisfied: typing-extensions>=4.6.0 in c:\\users\\cmoan\\miniforge3\\lib\\site-packages (from sqlalchemy) (4.15.0)\n", + "Requirement already satisfied: cryptography in c:\\users\\cmoan\\miniforge3\\lib\\site-packages (46.0.3)\n", + "Requirement already satisfied: cffi>=2.0.0 in c:\\users\\cmoan\\miniforge3\\lib\\site-packages (from cryptography) (2.0.0)\n", + "Requirement already satisfied: pycparser in c:\\users\\cmoan\\miniforge3\\lib\\site-packages (from cffi>=2.0.0->cryptography) (2.22)\n", + "Requirement already satisfied: pymysql in c:\\users\\cmoan\\miniforge3\\lib\\site-packages (1.1.2)\n" + ] + } + ], + "source": [ + "!pip install sqlalchemy \n", + "!pip install cryptography\n", + "!pip install pymysql" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "f872e6e8", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sqlalchemy import create_engine\n", + "import getpass # Para pedir la contraseña de forma segura" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "50a9e467", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Conexión establecida con éxito.\n" + ] + } + ], + "source": [ + "# --- Variables de Conexión (Asegúrate de cambiar estos valores por los tuyos) ---\n", + "# Si tu base de datos no es MySQL, el \"mysql+...\" y el puerto (3306) deben cambiar.\n", + "DB_TIPO = 'mysql'\n", + "DB_DRIVER = 'pymysql' # O 'mysqlclient' si lo tienes instalado\n", + "DB_USER = 'root' # Tu nombre de usuario\n", + "DB_PASS = getpass.getpass(\"Ingresa tu contraseña de MySQL: \") # Te pedirá la contraseña\n", + "DB_HOST = '127.0.0.1' # O la IP donde esté tu DB\n", + "DB_PORT = '3306' # Puerto por defecto de MySQL\n", + "DB_NAME = 'sakila' # El nombre de la base de datos\n", + "\n", + "# --- Creación del Motor de Conexión (Engine) ---\n", + "# Esta línea construye la URL de conexión que usa SQLAlchemy\n", + "mysql_url = f\"{DB_TIPO}+{DB_DRIVER}://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}\"\n", + "\n", + "# Creamos el motor. Este objeto 'engine' es el que usaremos para todas las consultas.\n", + "try:\n", + " engine = create_engine(mysql_url)\n", + " print(\"✅ Conexión establecida con éxito.\")\n", + "except Exception as e:\n", + " print(f\"❌ Error al conectar a la base de datos: {e}\")\n", + " # Es crucial que la conexión funcione antes de seguir." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "6de7a008", + "metadata": {}, + "outputs": [], + "source": [ + "# PASO 2\n", + "def rentals_month(engine, month: int, year: int) -> pd.DataFrame:\n", + " \"\"\"\n", + " Recupera todos los registros de alquiler para un mes y año específicos \n", + " de la tabla 'rental' de la base de datos Sakila.\n", + "\n", + " Parámetros:\n", + " - engine: Objeto de conexión de SQLAlchemy.\n", + " - month (int): El número del mes (ej: 5 para Mayo).\n", + " - year (int): El número del año (ej: 2005).\n", + "\n", + " Retorna:\n", + " - pd.DataFrame: Un DataFrame de Pandas con los resultados.\n", + " \"\"\"\n", + " \n", + " # 1. Creamos la consulta SQL\n", + " # La consulta filtra por el mes y el año de la columna 'rental_date'.\n", + " query = f\"\"\"\n", + " SELECT \n", + " rental_id, \n", + " customer_id, \n", + " rental_date\n", + " FROM \n", + " rental\n", + " WHERE \n", + " MONTH(rental_date) = {month} AND YEAR(rental_date) = {year};\n", + " \"\"\"\n", + " \n", + " print(f\"Buscando alquileres para el mes {month}/{year}...\")\n", + " \n", + " # 2. Ejecutamos la consulta usando el engine y pandas.read_sql\n", + " # Esto es la magia: pandas se conecta, ejecuta la query y trae el resultado \n", + " # directamente como un DataFrame.\n", + " df = pd.read_sql(query, engine)\n", + " \n", + " print(f\"Se encontraron {len(df)} alquileres.\")\n", + " \n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "4108cad8", + "metadata": {}, + "outputs": [], + "source": [ + "def rental_count_month(df: pd.DataFrame, month: int, year: int) -> pd.DataFrame:\n", + " column_name = f\"rentals_{month:02d}_{year}\" \n", + " rental_counts = df.groupby('customer_id').size().reset_index(name=column_name)\n", + " return rental_counts\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "29f05683", + "metadata": {}, + "outputs": [], + "source": [ + "## PASO 4: Función para comparar la actividad (merge)\n", + "\n", + "def compare_rentals(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:\n", + " # Obtener nombres de columnas dinámicamente\n", + " col1 = [col for col in df1.columns if col != 'customer_id'][0] \n", + " col2 = [col for col in df2.columns if col != 'customer_id'][0] \n", + " \n", + " # Inner merge\n", + " combined_df = pd.merge(df1, df2, on='customer_id', how='inner')\n", + " \n", + " # Calcular diferencia: Segundo mes - Primer mes\n", + " combined_df['difference'] = combined_df[col2] - combined_df[col1]\n", + "\n", + " # Reordenar\n", + " combined_df = combined_df[['customer_id', col1, col2, 'difference']]\n", + " \n", + " return combined_df" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "20aff7a9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Buscando alquileres para el mes 5/2005...\n", + "Se encontraron 1156 alquileres.\n", + "Buscando alquileres para el mes 6/2005...\n", + "Se encontraron 2311 alquileres.\n", + "\n", + "Resultados de la comparación (Mayo vs. Junio):\n", + " customer_id rentals_05_2005 rentals_06_2005 difference\n", + "386 454 1 10 9\n", + "178 213 1 9 8\n", + "248 295 1 9 8\n", + "389 457 1 9 8\n", + "322 380 1 8 7\n", + "194 234 1 8 7\n", + "218 260 1 8 7\n", + "481 561 2 9 7\n", + "23 27 1 8 7\n", + "224 267 3 9 6\n" + ] + } + ], + "source": [ + "# EJECUCIÓN \n", + "\n", + "# 1. Recuperar datos\n", + "df_may = rentals_month(engine, month=5, year=2005)\n", + "df_jun = rentals_month(engine, month=6, year=2005)\n", + "\n", + "# 2. Contar alquileres por cliente\n", + "df_may_counts = rental_count_month(df_may, month=5, year=2005)\n", + "df_jun_counts = rental_count_month(df_jun, month=6, year=2005)\n", + "\n", + "# 3. Comparar\n", + "final_comparison = compare_rentals(df_may_counts, df_jun_counts)\n", + "\n", + "print(\"\\nResultados de la comparación (Mayo vs. Junio):\")\n", + "print(final_comparison.sort_values(by='difference', ascending=False).head(10))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d5df194", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}