Newer
Older
{
"cells": [
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2023-12-11T17:07:26.744932200Z",
"start_time": "2023-12-11T17:07:26.740214500Z"
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import sklearn as sk\n",
"import matplotlib.pyplot as plt\n",
"import json\n",
"import math"
]
},
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2023-12-11T17:07:55.590798700Z",
"start_time": "2023-12-11T17:07:26.744932200Z"
}
},
"ename": "NameError",
"evalue": "name 'pd' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m cf_2018 \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflight_data/Combined_Flights_2019.csv\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 2\u001b[0m cf_2018\u001b[38;5;241m.\u001b[39mhead()\n",
"\u001b[0;31mNameError\u001b[0m: name 'pd' is not defined"
]
}
],
"source": [
"cf_2018 = pd.read_csv('flight_data/Combined_Flights_2018.csv')\n",
"cf_2019 = pd.read_csv('flight_data/Combined_Flights_2019.csv')\n",
"cf_2018.head()"
]
},
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2023-12-11T17:07:56.493778700Z",
"start_time": "2023-12-11T17:07:55.953264900Z"
}
},
"outputs": [],
"source": [
"# Filter the dataframe to include only the delays from JFK\n",
"import RegressionModel\n",
"\n",
"filtered_df = cf_2018[(cf_2018['Origin'] == 'JFK')].copy()\n",
"RegressionModel.destinations = list(cf_2018['DestCityName'].unique())\n",
"\n",
"filtered_df"
]
},
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2023-12-11T17:07:56.527530200Z",
"start_time": "2023-12-11T17:07:56.498869700Z"
}
},
"relevant_columns = ['FlightDate', 'DOT_ID_Operating_Airline', 'DestAirportID', 'DepDelayMinutes', 'ArrDelayMinutes']\n",
"jfk_flights_2018 = filtered_df[relevant_columns].copy()\n",
]
},
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2023-12-11T17:07:56.542402200Z",
"start_time": "2023-12-11T17:07:56.526530600Z"
}
},
]
},
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2023-12-11T17:07:56.666500700Z",
"start_time": "2023-12-11T17:07:56.546471Z"
}
},
"# Convert 'FlightDate' into numerical components\n",
"jfk_flights_2018['FlightDate'] = pd.to_datetime(jfk_flights_2018['FlightDate'])\n",
"jfk_flights_2018['Year'] = jfk_flights_2018['FlightDate'].dt.year\n",
"jfk_flights_2018['Month'] = jfk_flights_2018['FlightDate'].dt.month\n",
"jfk_flights_2018['Day'] = jfk_flights_2018['FlightDate'].dt.day\n",
"\n",
]
},
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2023-12-11T17:07:56.737639800Z",
"start_time": "2023-12-11T17:07:56.666500700Z"
}
},
"from sklearn.model_selection import train_test_split\n",
"\n",
"# Choose your target variable, e.g., 'DepDelayMinutes'\n",
"X = jfk_flights_2018.drop('DepDelayMinutes', axis=1)\n",
"y = jfk_flights_2018['DepDelayMinutes']\n",
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"# Splitting the dataset into training and testing sets\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"\n",
"X_train = X_train.apply(pd.to_numeric, errors='coerce')\n",
"y_train = y_train.apply(pd.to_numeric, errors='coerce')\n",
"X_test = X_test.apply(pd.to_numeric, errors='coerce')\n",
"y_test = y_test.apply(pd.to_numeric, errors='coerce')\n"
]
},
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2023-12-11T17:07:57.060977300Z",
"start_time": "2023-12-11T17:07:56.738640200Z"
}
},
"source": [
"from sklearn.linear_model import LinearRegression\n",
"\n",
"# Initialize the model\n",
"model = LinearRegression()\n",
"\n",
"# Train the model\n",
"model.fit(X_train, y_train)\n"
]
},
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2023-12-11T17:08:11.323526900Z",
"start_time": "2023-12-11T17:08:10.881732300Z"
}
},
"source": [
"from sklearn.metrics import mean_squared_error, r2_score\n",
"\n",
"# Predict on the test set\n",
"y_pred = model.predict(X_test)\n",
"\n",
"# Evaluate the model\n",
"mse = mean_squared_error(y_test, y_pred)\n",
"r2 = r2_score(y_test, y_pred)\n",
"print(\"Mean Squared Error:\", mse)\n",
"print(\"R^2 Score:\", r2)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "sas2",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}