Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
311 changes: 311 additions & 0 deletions solution.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,311 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 23,
"id": "70a3d4bd",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sqlalchemy import create_engine\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "a06f6290",
"metadata": {},
"outputs": [],
"source": [
"engine = create_engine(\n",
" \"mysql+pymysql://ironhack:ironhack123@localhost:3306/sakila\"\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "642d6dee",
"metadata": {},
"outputs": [],
"source": [
"from sqlalchemy import text\n"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "dce446ce",
"metadata": {},
"outputs": [],
"source": [
"def rentals_month(engine, month, year):\n",
" query = text(\"\"\"\n",
" SELECT\n",
" rental_id,\n",
" rental_date,\n",
" customer_id\n",
" FROM rental\n",
" WHERE MONTH(rental_date) = :month\n",
" AND YEAR(rental_date) = :year\n",
" \"\"\")\n",
" \n",
" with engine.connect() as conn:\n",
" df = pd.read_sql(query, conn, params={\"month\": month, \"year\": year})\n",
" \n",
" return df\n"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "8d80bdb7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>rental_id</th>\n",
" <th>rental_date</th>\n",
" <th>customer_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2005-05-24 22:53:30</td>\n",
" <td>130</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>2005-05-24 22:54:33</td>\n",
" <td>459</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>2005-05-24 23:03:39</td>\n",
" <td>408</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>2005-05-24 23:04:41</td>\n",
" <td>333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>2005-05-24 23:05:21</td>\n",
" <td>222</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" rental_id rental_date customer_id\n",
"0 1 2005-05-24 22:53:30 130\n",
"1 2 2005-05-24 22:54:33 459\n",
"2 3 2005-05-24 23:03:39 408\n",
"3 4 2005-05-24 23:04:41 333\n",
"4 5 2005-05-24 23:05:21 222"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rentals_may = rentals_month(engine, 5, 2005)\n",
"rentals_june = rentals_month(engine, 6, 2005)\n",
"\n",
"rentals_may.head()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "6ab01c83",
"metadata": {},
"outputs": [],
"source": [
"def rental_count_month(df, month, year):\n",
" column_name = f\"rentals_{month:02d}_{year}\"\n",
" \n",
" rentals_count = (\n",
" df.groupby(\"customer_id\")\n",
" .size()\n",
" .reset_index(name=column_name)\n",
" )\n",
" \n",
" return rentals_count\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "0453b3dd",
"metadata": {},
"outputs": [],
"source": [
"rentals_may_count = rental_count_month(rentals_may, 5, 2005)\n",
"rentals_june_count = rental_count_month(rentals_june, 6, 2005)\n"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "37c3f199",
"metadata": {},
"outputs": [],
"source": [
"def compare_rentals(df1, df2):\n",
" df = df1.merge(df2, on=\"customer_id\", how=\"inner\")\n",
" \n",
" col1 = df.columns[1]\n",
" col2 = df.columns[2]\n",
" \n",
" df[\"difference\"] = df[col2] - df[col1]\n",
" \n",
" return df\n"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "2a294d7f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>rentals_05_2005</th>\n",
" <th>rentals_06_2005</th>\n",
" <th>difference</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" customer_id rentals_05_2005 rentals_06_2005 difference\n",
"0 1 2 7 5\n",
"1 2 1 1 0\n",
"2 3 2 4 2\n",
"3 5 3 5 2\n",
"4 6 3 4 1"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"comparison = compare_rentals(rentals_may_count, rentals_june_count)\n",
"comparison.head()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}