Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
344 changes: 344 additions & 0 deletions lab-sql-python-connection.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,344 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "47ed2e29",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pymysql in /opt/anaconda3/lib/python3.13/site-packages (1.1.2)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install pymysql"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c5060bba",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import pymysql\n",
"from sqlalchemy import create_engine, text\n",
"import getpass\n",
"password=getpass.getpass()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "4582723a",
"metadata": {},
"outputs": [],
"source": [
"bd= \"sakila\"\n",
"connection_string= ''"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "494b90ff",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Engine(mysql+pymysql://root:***@localhost/sakila)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bd = \"sakila\"\n",
"connection_string = 'mysql+pymysql://root:' + password + '@localhost/'+bd\n",
"engine = create_engine(connection_string)\n",
"engine"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "6b47e247",
"metadata": {},
"outputs": [],
"source": [
"def rentals_month(engine, month, year):\n",
" query = f\"\"\"\n",
" SELECT \n",
" rental_id,\n",
" rental_date,\n",
" inventory_id,\n",
" customer_id,\n",
" return_date,\n",
" staff_id\n",
" FROM rental\n",
" WHERE MONTH(rental_date) = {month}\n",
" AND YEAR(rental_date) = {year};\n",
" \"\"\"\n",
" df = pd.read_sql_query(query, engine)\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "4ad87897",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>rental_id</th>\n",
" <th>rental_date</th>\n",
" <th>inventory_id</th>\n",
" <th>customer_id</th>\n",
" <th>return_date</th>\n",
" <th>staff_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2005-05-24 22:53:30</td>\n",
" <td>367</td>\n",
" <td>130</td>\n",
" <td>2005-05-26 22:04:30</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>2005-05-24 22:54:33</td>\n",
" <td>1525</td>\n",
" <td>459</td>\n",
" <td>2005-05-28 19:40:33</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>2005-05-24 23:03:39</td>\n",
" <td>1711</td>\n",
" <td>408</td>\n",
" <td>2005-06-01 22:12:39</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>2005-05-24 23:04:41</td>\n",
" <td>2452</td>\n",
" <td>333</td>\n",
" <td>2005-06-03 01:43:41</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>2005-05-24 23:05:21</td>\n",
" <td>2079</td>\n",
" <td>222</td>\n",
" <td>2005-06-02 04:33:21</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" rental_id rental_date inventory_id customer_id \\\n",
"0 1 2005-05-24 22:53:30 367 130 \n",
"1 2 2005-05-24 22:54:33 1525 459 \n",
"2 3 2005-05-24 23:03:39 1711 408 \n",
"3 4 2005-05-24 23:04:41 2452 333 \n",
"4 5 2005-05-24 23:05:21 2079 222 \n",
"\n",
" return_date staff_id \n",
"0 2005-05-26 22:04:30 1 \n",
"1 2005-05-28 19:40:33 1 \n",
"2 2005-06-01 22:12:39 1 \n",
"3 2005-06-03 01:43:41 2 \n",
"4 2005-06-02 04:33:21 1 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_rentals = rentals_month(engine, 5, 2005)\n",
"df_rentals.head()\n"
]
},
{
"cell_type": "markdown",
"id": "c5a4eeef",
"metadata": {},
"source": [
"Develop a Python function called rental_count_month that takes the DataFrame provided by rentals_month as input along with the month and year and returns a new DataFrame containing the number of rentals made by each customer_id during the selected month and year."
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "9d6101c4",
"metadata": {},
"outputs": [],
"source": [
"def rental_count_month(df, month, year):\n",
" result = (\n",
" df.groupby(\"customer_id\")\n",
" .agg(rental_count=(\"rental_id\", \"count\"))\n",
" .reset_index()\n",
" )\n",
" result[\"month\"] = month\n",
" result[\"year\"] = year\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "3702dc53",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" customer_id rental_count month year\n",
"0 1 2 5 2005\n",
"1 2 1 5 2005\n",
"2 3 2 5 2005\n",
"3 5 3 5 2005\n",
"4 6 3 5 2005\n"
]
}
],
"source": [
"df_rentals = rentals_month(engine, 5, 2005)\n",
"df_counts = rental_count_month(df_rentals, 5, 2005)\n",
"print(df_counts.head())\n"
]
},
{
"cell_type": "markdown",
"id": "ff3dd271",
"metadata": {},
"source": [
"Create a Python function called compare_rentals that takes two DataFrames as input containing the number of rentals made by each customer in different months and years. The function should return a combined DataFrame with a new 'difference' column, which is the difference between the number of rentals in the two months."
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "a5a57a25",
"metadata": {},
"outputs": [],
"source": [
"\n",
"def compare_rentals(df1, df2):\n",
" merged = pd.merge(df1, df2, on=\"customer_id\", suffixes=(\"_1\", \"_2\"), how=\"outer\").fillna(0)\n",
" merged[\"difference\"] = merged[\"rental_count_2\"] - merged[\"rental_count_1\"]\n",
" return merged"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "47fccace",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" customer_id rental_count_1 month_1 year_1 rental_count_2 month_2 \\\n",
"0 1 2.0 5.0 2005.0 7.0 6.0 \n",
"1 2 1.0 5.0 2005.0 1.0 6.0 \n",
"2 3 2.0 5.0 2005.0 4.0 6.0 \n",
"3 4 0.0 0.0 0.0 6.0 6.0 \n",
"4 5 3.0 5.0 2005.0 5.0 6.0 \n",
"\n",
" year_2 difference \n",
"0 2005.0 5.0 \n",
"1 2005.0 0.0 \n",
"2 2005.0 2.0 \n",
"3 2005.0 6.0 \n",
"4 2005.0 2.0 \n"
]
}
],
"source": [
"df_may = rental_count_month(rentals_month(engine, 5, 2005), 5, 2005)\n",
"df_june = rental_count_month(rentals_month(engine, 6, 2005), 6, 2005)\n",
"\n",
"comparison = compare_rentals(df_may, df_june)\n",
"print(comparison.head())\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1c2a4eef",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}