{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2023-12-11T17:07:26.744932200Z", "start_time": "2023-12-11T17:07:26.740214500Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "import sklearn as sk\n", "import matplotlib.pyplot as plt\n", "import json\n", "import math" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2023-12-11T17:07:55.590798700Z", "start_time": "2023-12-11T17:07:26.744932200Z" } }, "outputs": [ { "ename": "", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details." ] } ], "source": [ "cf_2018 = pd.read_csv('flight_data/Combined_Flights_2018.csv')\n", "cf_2019 = pd.read_csv('flight_data/Combined_Flights_2019.csv')\n", "combined_data = pd.concat(cf_2018, cf_2019)\n", "cf_2018 = combined_data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-12-11T17:07:56.493778700Z", "start_time": "2023-12-11T17:07:55.953264900Z" } }, "outputs": [], "source": [ "# Filter the dataframe to include only the delays from JFK\n", "import RegressionModel\n", "\n", "filtered_df = cf_2018[(cf_2018['Origin'] == 'JFK')].copy()\n", "\n", "RegressionModel.destinations = list(cf_2018['DestCityName'].unique())\n", "\n", "filtered_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-12-11T17:07:56.527530200Z", "start_time": "2023-12-11T17:07:56.498869700Z" } }, "outputs": [], "source": [ "relevant_columns = ['FlightDate', 'DOT_ID_Operating_Airline', 'DestAirportID', 'DepDelayMinutes', 'ArrDelayMinutes']\n", "jfk_flights_2018 = filtered_df[relevant_columns].copy()\n", "jfk_flights_2018.dropna(inplace=True)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-12-11T17:07:56.542402200Z", "start_time": "2023-12-11T17:07:56.526530600Z" } }, "outputs": [], "source": [ "jfk_flights_2018" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-12-11T17:07:56.666500700Z", "start_time": "2023-12-11T17:07:56.546471Z" } }, "outputs": [], "source": [ "# Convert 'FlightDate' into numerical components\n", "jfk_flights_2018['FlightDate'] = pd.to_datetime(jfk_flights_2018['FlightDate'])\n", "jfk_flights_2018['Year'] = jfk_flights_2018['FlightDate'].dt.year\n", "jfk_flights_2018['Month'] = jfk_flights_2018['FlightDate'].dt.month\n", "jfk_flights_2018['Day'] = jfk_flights_2018['FlightDate'].dt.day\n", "\n", "jfk_flights_2018 = jfk_flights_2018.dropna()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-12-11T17:07:56.737639800Z", "start_time": "2023-12-11T17:07:56.666500700Z" } }, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "# Choose your target variable, e.g., 'DepDelayMinutes'\n", "X = jfk_flights_2018.drop('DepDelayMinutes', axis=1)\n", "y = jfk_flights_2018['DepDelayMinutes']\n", "\n", "jfk_flights_2018" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "# Splitting the dataset into training and testing sets\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", "X_train = X_train.apply(pd.to_numeric, errors='coerce')\n", "y_train = y_train.apply(pd.to_numeric, errors='coerce')\n", "X_test = X_test.apply(pd.to_numeric, errors='coerce')\n", "y_test = y_test.apply(pd.to_numeric, errors='coerce')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-12-11T17:07:57.060977300Z", "start_time": "2023-12-11T17:07:56.738640200Z" } }, "outputs": [], "source": [ "from sklearn.linear_model import LinearRegression\n", "\n", "# Initialize the model\n", "model = LinearRegression()\n", "\n", "# Train the model\n", "model.fit(X_train, y_train)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-12-11T17:08:11.323526900Z", "start_time": "2023-12-11T17:08:10.881732300Z" } }, "outputs": [], "source": [ "from sklearn.metrics import mean_squared_error, r2_score\n", "\n", "# Predict on the test set\n", "y_pred = model.predict(X_test)\n", "\n", "# Evaluate the model\n", "mse = mean_squared_error(y_test, y_pred)\n", "r2 = r2_score(y_test, y_pred)\n", "print(\"Mean Squared Error:\", mse)\n", "print(\"R^2 Score:\", r2)\n" ] } ], "metadata": { "kernelspec": { "display_name": "sas2", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 2 }