{
"cells": [
{
"cell_type": "markdown",
"id": "59226aa1-f47d-4a55-b942-773019670fba",
"metadata": {},
"source": [
"# Data Analysis with Pandas\n",
"\n",
"This notebook demonstrates reading and analyzing data from a CSV file using pandas."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c7973c27-a77e-4549-8a77-e231a7d990f9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pandas version: 2.3.3\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"print('Pandas version:', pd.__version__)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e2766c1a-3fa0-48a1-a4ba-f087964a937d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loaded 15 rows\n",
"Columns: ['order_id', 'customer_name', 'product', 'quantity', 'unit_price', 'total_amount', 'order_date', 'region']\n"
]
}
],
"source": [
"# Read CSV file\n",
"df = pd.read_csv('data/sales_data.csv')\n",
"\n",
"print(f'Loaded {len(df)} rows')\n",
"print(f'Columns: {list(df.columns)}')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "eba24ec6-cf14-4234-8a82-95bbfb016a7d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>order_id</th>\n",
" <th>customer_name</th>\n",
" <th>product</th>\n",
" <th>quantity</th>\n",
" <th>unit_price</th>\n",
" <th>total_amount</th>\n",
" <th>order_date</th>\n",
" <th>region</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1001</td>\n",
" <td>Alice Johnson</td>\n",
" <td>Laptop</td>\n",
" <td>1</td>\n",
" <td>1200.0</td>\n",
" <td>1200.0</td>\n",
" <td>2026-01-15</td>\n",
" <td>North</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1002</td>\n",
" <td>Bob Smith</td>\n",
" <td>Mouse</td>\n",
" <td>2</td>\n",
" <td>25.0</td>\n",
" <td>50.0</td>\n",
" <td>2026-01-16</td>\n",
" <td>South</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1003</td>\n",
" <td>Carol White</td>\n",
" <td>Keyboard</td>\n",
" <td>1</td>\n",
" <td>75.0</td>\n",
" <td>75.0</td>\n",
" <td>2026-01-17</td>\n",
" <td>East</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1004</td>\n",
" <td>David Brown</td>\n",
" <td>Monitor</td>\n",
" <td>2</td>\n",
" <td>350.0</td>\n",
" <td>700.0</td>\n",
" <td>2026-01-18</td>\n",
" <td>West</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1005</td>\n",
" <td>Eve Davis</td>\n",
" <td>Laptop</td>\n",
" <td>1</td>\n",
" <td>1200.0</td>\n",
" <td>1200.0</td>\n",
" <td>2026-01-20</td>\n",
" <td>North</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" order_id customer_name product quantity unit_price total_amount \\\n",
"0 1001 Alice Johnson Laptop 1 1200.0 1200.0 \n",
"1 1002 Bob Smith Mouse 2 25.0 50.0 \n",
"2 1003 Carol White Keyboard 1 75.0 75.0 \n",
"3 1004 David Brown Monitor 2 350.0 700.0 \n",
"4 1005 Eve Davis Laptop 1 1200.0 1200.0 \n",
"\n",
" order_date region \n",
"0 2026-01-15 North \n",
"1 2026-01-16 South \n",
"2 2026-01-17 East \n",
"3 2026-01-18 West \n",
"4 2026-01-20 North "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Display first few rows of the dataframe\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5412f31-b042-4188-a69d-ff04920d1a3a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 15 entries, 0 to 14\n",
"Data columns (total 8 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 order_id 15 non-null int64 \n",
" 1 customer_name 15 non-null object \n",
" 2 product 15 non-null object \n",
" 3 quantity 15 non-null int64 \n",
" 4 unit_price 15 non-null float64\n",
" 5 total_amount 15 non-null float64\n",
" 6 order_date 15 non-null object \n",
" 7 region 15 non-null object \n",
"dtypes: float64(2), int64(2), object(4)\n",
"memory usage: 1.1+ KB\n"
]
}
],
"source": [
"# Display dataframe information\n",
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "4b12332e-2745-4b55-93cc-5fbe40405085",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>order_id</th>\n",
" <th>quantity</th>\n",
" <th>unit_price</th>\n",
" <th>total_amount</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>15.000000</td>\n",
" <td>15.000000</td>\n",
" <td>15.000000</td>\n",
" <td>15.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1008.000000</td>\n",
" <td>2.533333</td>\n",
" <td>328.000000</td>\n",
" <td>470.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>4.472136</td>\n",
" <td>2.386470</td>\n",
" <td>463.337273</td>\n",
" <td>661.352690</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1001.000000</td>\n",
" <td>1.000000</td>\n",
" <td>10.000000</td>\n",
" <td>50.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1004.500000</td>\n",
" <td>1.000000</td>\n",
" <td>50.000000</td>\n",
" <td>90.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>1008.000000</td>\n",
" <td>2.000000</td>\n",
" <td>80.000000</td>\n",
" <td>150.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1011.500000</td>\n",
" <td>2.500000</td>\n",
" <td>350.000000</td>\n",
" <td>525.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1015.000000</td>\n",
" <td>10.000000</td>\n",
" <td>1200.000000</td>\n",
" <td>2400.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" order_id quantity unit_price total_amount\n",
"count 15.000000 15.000000 15.000000 15.000000\n",
"mean 1008.000000 2.533333 328.000000 470.333333\n",
"std 4.472136 2.386470 463.337273 661.352690\n",
"min 1001.000000 1.000000 10.000000 50.000000\n",
"25% 1004.500000 1.000000 50.000000 90.000000\n",
"50% 1008.000000 2.000000 80.000000 150.000000\n",
"75% 1011.500000 2.500000 350.000000 525.000000\n",
"max 1015.000000 10.000000 1200.000000 2400.000000"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Display statistical summary\n",
"df.describe()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}