{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:26.744932200Z",
     "start_time": "2023-12-11T17:07:26.740214500Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import sklearn as sk\n",
    "import matplotlib.pyplot as plt\n",
    "import json\n",
    "import math"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:55.590798700Z",
     "start_time": "2023-12-11T17:07:26.744932200Z"
    }
   },
   "outputs": [
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
     ]
    }
   ],
   "source": [
    "cf_2018 = pd.read_csv('flight_data/Combined_Flights_2018.csv')\n",
    "cf_2019 = pd.read_csv('flight_data/Combined_Flights_2019.csv')\n",
    "combined_data = pd.concat(cf_2018, cf_2019)\n",
    "cf_2018 = combined_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:56.493778700Z",
     "start_time": "2023-12-11T17:07:55.953264900Z"
    }
   },
   "outputs": [],
   "source": [
    "# Filter the dataframe to include only the delays from JFK\n",
    "import RegressionModel\n",
    "\n",
    "filtered_df = cf_2018[(cf_2018['Origin'] == 'JFK')].copy()\n",
    "\n",
    "RegressionModel.destinations = list(cf_2018['DestCityName'].unique())\n",
    "\n",
    "filtered_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:56.527530200Z",
     "start_time": "2023-12-11T17:07:56.498869700Z"
    }
   },
   "outputs": [],
   "source": [
    "relevant_columns = ['FlightDate', 'DOT_ID_Operating_Airline', 'DestAirportID', 'DepDelayMinutes', 'ArrDelayMinutes']\n",
    "jfk_flights_2018 = filtered_df[relevant_columns].copy()\n",
    "jfk_flights_2018.dropna(inplace=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:56.542402200Z",
     "start_time": "2023-12-11T17:07:56.526530600Z"
    }
   },
   "outputs": [],
   "source": [
    "jfk_flights_2018"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:56.666500700Z",
     "start_time": "2023-12-11T17:07:56.546471Z"
    }
   },
   "outputs": [],
   "source": [
    "# Convert 'FlightDate' into numerical components\n",
    "jfk_flights_2018['FlightDate'] = pd.to_datetime(jfk_flights_2018['FlightDate'])\n",
    "jfk_flights_2018['Year'] = jfk_flights_2018['FlightDate'].dt.year\n",
    "jfk_flights_2018['Month'] = jfk_flights_2018['FlightDate'].dt.month\n",
    "jfk_flights_2018['Day'] = jfk_flights_2018['FlightDate'].dt.day\n",
    "\n",
    "jfk_flights_2018 = jfk_flights_2018.dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:56.737639800Z",
     "start_time": "2023-12-11T17:07:56.666500700Z"
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "# Choose your target variable, e.g., 'DepDelayMinutes'\n",
    "X = jfk_flights_2018.drop('DepDelayMinutes', axis=1)\n",
    "y = jfk_flights_2018['DepDelayMinutes']\n",
    "\n",
    "jfk_flights_2018"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "# Splitting the dataset into training and testing sets\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
    "\n",
    "X_train = X_train.apply(pd.to_numeric, errors='coerce')\n",
    "y_train = y_train.apply(pd.to_numeric, errors='coerce')\n",
    "X_test = X_test.apply(pd.to_numeric, errors='coerce')\n",
    "y_test = y_test.apply(pd.to_numeric, errors='coerce')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:57.060977300Z",
     "start_time": "2023-12-11T17:07:56.738640200Z"
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LinearRegression\n",
    "\n",
    "# Initialize the model\n",
    "model = LinearRegression()\n",
    "\n",
    "# Train the model\n",
    "model.fit(X_train, y_train)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:08:11.323526900Z",
     "start_time": "2023-12-11T17:08:10.881732300Z"
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_squared_error, r2_score\n",
    "\n",
    "# Predict on the test set\n",
    "y_pred = model.predict(X_test)\n",
    "\n",
    "# Evaluate the model\n",
    "mse = mean_squared_error(y_test, y_pred)\n",
    "r2 = r2_score(y_test, y_pred)\n",
    "print(\"Mean Squared Error:\", mse)\n",
    "print(\"R^2 Score:\", r2)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "sas2",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}