Skip to content
Snippets Groups Projects
training.ipynb 51.1 KiB
Newer Older
{
 "cells": [
  {
   "cell_type": "code",
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:26.744932200Z",
     "start_time": "2023-12-11T17:07:26.740214500Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import sklearn as sk\n",
    "import matplotlib.pyplot as plt\n",
    "import json\n",
    "import math"
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:55.590798700Z",
     "start_time": "2023-12-11T17:07:26.744932200Z"
    }
   },
Fadi Gattoussi's avatar
Fadi Gattoussi committed
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>FlightDate</th>\n",
       "      <th>Airline</th>\n",
       "      <th>Origin</th>\n",
       "      <th>Dest</th>\n",
       "      <th>Cancelled</th>\n",
       "      <th>Diverted</th>\n",
       "      <th>CRSDepTime</th>\n",
       "      <th>DepTime</th>\n",
       "      <th>DepDelayMinutes</th>\n",
       "      <th>DepDelay</th>\n",
       "      <th>...</th>\n",
       "      <th>WheelsOff</th>\n",
       "      <th>WheelsOn</th>\n",
       "      <th>TaxiIn</th>\n",
       "      <th>CRSArrTime</th>\n",
       "      <th>ArrDelay</th>\n",
       "      <th>ArrDel15</th>\n",
       "      <th>ArrivalDelayGroups</th>\n",
       "      <th>ArrTimeBlk</th>\n",
       "      <th>DistanceGroup</th>\n",
       "      <th>DivAirportLandings</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2018-01-23</td>\n",
       "      <td>Endeavor Air Inc.</td>\n",
       "      <td>ABY</td>\n",
       "      <td>ATL</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>1202</td>\n",
       "      <td>1157.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-5.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1211.0</td>\n",
       "      <td>1249.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1304</td>\n",
       "      <td>-8.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1300-1359</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2018-01-24</td>\n",
       "      <td>Endeavor Air Inc.</td>\n",
       "      <td>ABY</td>\n",
       "      <td>ATL</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>1202</td>\n",
       "      <td>1157.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-5.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1210.0</td>\n",
       "      <td>1246.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>1304</td>\n",
       "      <td>-6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1300-1359</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2018-01-25</td>\n",
       "      <td>Endeavor Air Inc.</td>\n",
       "      <td>ABY</td>\n",
       "      <td>ATL</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>1202</td>\n",
       "      <td>1153.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-9.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1211.0</td>\n",
       "      <td>1251.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>1304</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1300-1359</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2018-01-26</td>\n",
       "      <td>Endeavor Air Inc.</td>\n",
       "      <td>ABY</td>\n",
       "      <td>ATL</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>1202</td>\n",
       "      <td>1150.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-12.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1207.0</td>\n",
       "      <td>1242.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>1304</td>\n",
       "      <td>-11.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1300-1359</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2018-01-27</td>\n",
       "      <td>Endeavor Air Inc.</td>\n",
       "      <td>ABY</td>\n",
       "      <td>ATL</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>1400</td>\n",
       "      <td>1355.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-5.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1412.0</td>\n",
       "      <td>1448.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>1500</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1500-1559</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 61 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   FlightDate            Airline Origin Dest  Cancelled  Diverted  CRSDepTime  \\\n",
       "0  2018-01-23  Endeavor Air Inc.    ABY  ATL      False     False        1202   \n",
       "1  2018-01-24  Endeavor Air Inc.    ABY  ATL      False     False        1202   \n",
       "2  2018-01-25  Endeavor Air Inc.    ABY  ATL      False     False        1202   \n",
       "3  2018-01-26  Endeavor Air Inc.    ABY  ATL      False     False        1202   \n",
       "4  2018-01-27  Endeavor Air Inc.    ABY  ATL      False     False        1400   \n",
       "\n",
       "   DepTime  DepDelayMinutes  DepDelay  ...  WheelsOff  WheelsOn  TaxiIn  \\\n",
       "0   1157.0              0.0      -5.0  ...     1211.0    1249.0     7.0   \n",
       "1   1157.0              0.0      -5.0  ...     1210.0    1246.0    12.0   \n",
       "2   1153.0              0.0      -9.0  ...     1211.0    1251.0    11.0   \n",
       "3   1150.0              0.0     -12.0  ...     1207.0    1242.0    11.0   \n",
       "4   1355.0              0.0      -5.0  ...     1412.0    1448.0    11.0   \n",
       "\n",
       "   CRSArrTime  ArrDelay  ArrDel15  ArrivalDelayGroups  ArrTimeBlk  \\\n",
       "0        1304      -8.0       0.0                -1.0   1300-1359   \n",
       "1        1304      -6.0       0.0                -1.0   1300-1359   \n",
       "2        1304      -2.0       0.0                -1.0   1300-1359   \n",
       "3        1304     -11.0       0.0                -1.0   1300-1359   \n",
       "4        1500      -1.0       0.0                -1.0   1500-1559   \n",
       "\n",
       "   DistanceGroup  DivAirportLandings  \n",
       "0              1                 0.0  \n",
       "1              1                 0.0  \n",
       "2              1                 0.0  \n",
       "3              1                 0.0  \n",
       "4              1                 0.0  \n",
       "\n",
       "[5 rows x 61 columns]"
      ]
Fadi Gattoussi's avatar
Fadi Gattoussi committed
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cf_2018 = pd.read_csv('flight_data/Combined_Flights_2018.csv')\n",
    "cf_2018.head()"
   ]
  },
  {
   "cell_type": "code",
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:56.493778700Z",
     "start_time": "2023-12-11T17:07:55.953264900Z"
    }
   },
   "outputs": [],
   "source": [
Fadi Gattoussi's avatar
Fadi Gattoussi committed
    "# Filter the dataframe to include only the delays from JFK\n",
    "import RegressionModel\n",
    "\n",
    "filtered_df = cf_2018[(cf_2018['Origin'] == 'JFK')]\n",
    "\n",
    "RegressionModel.destinations = list(cf_2018['DestCityName'].unique())"
   ]
  },
  {
   "cell_type": "code",
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:56.527530200Z",
     "start_time": "2023-12-11T17:07:56.498869700Z"
    }
   },
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "outputs": [],
Fadi Gattoussi's avatar
Fadi Gattoussi committed
    "relevant_columns = ['FlightDate', 'Airline', 'Dest', 'DepDelayMinutes', 'ArrDelayMinutes']\n",
    "jfk_flights_2018 = filtered_df[relevant_columns].copy()\n",
    "jfk_flights_2018.dropna(inplace=True)"
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:56.542402200Z",
     "start_time": "2023-12-11T17:07:56.526530600Z"
    }
   },
   "outputs": [
    {
     "data": {
Fadi Gattoussi's avatar
Fadi Gattoussi committed
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>FlightDate</th>\n",
       "      <th>Airline</th>\n",
       "      <th>Dest</th>\n",
       "      <th>DepDelayMinutes</th>\n",
       "      <th>ArrDelayMinutes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5544</th>\n",
       "      <td>2018-01-22</td>\n",
       "      <td>JetBlue Airways</td>\n",
       "      <td>SJC</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5547</th>\n",
       "      <td>2018-01-22</td>\n",
       "      <td>JetBlue Airways</td>\n",
       "      <td>MSY</td>\n",
       "      <td>86.0</td>\n",
       "      <td>77.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5548</th>\n",
       "      <td>2018-01-22</td>\n",
       "      <td>JetBlue Airways</td>\n",
       "      <td>JAX</td>\n",
       "      <td>29.0</td>\n",
       "      <td>11.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5554</th>\n",
       "      <td>2018-01-22</td>\n",
       "      <td>JetBlue Airways</td>\n",
       "      <td>ROC</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5565</th>\n",
       "      <td>2018-01-22</td>\n",
       "      <td>JetBlue Airways</td>\n",
       "      <td>BUF</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5666920</th>\n",
       "      <td>2018-09-10</td>\n",
       "      <td>Delta Air Lines Inc.</td>\n",
       "      <td>SFO</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5666921</th>\n",
       "      <td>2018-09-10</td>\n",
       "      <td>Delta Air Lines Inc.</td>\n",
       "      <td>MSP</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5667057</th>\n",
       "      <td>2018-09-10</td>\n",
       "      <td>Delta Air Lines Inc.</td>\n",
       "      <td>LAX</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5667231</th>\n",
       "      <td>2018-09-10</td>\n",
       "      <td>Delta Air Lines Inc.</td>\n",
       "      <td>SEA</td>\n",
       "      <td>48.0</td>\n",
       "      <td>44.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5672362</th>\n",
       "      <td>2018-09-04</td>\n",
       "      <td>Endeavor Air Inc.</td>\n",
       "      <td>BNA</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>83754 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         FlightDate               Airline Dest  DepDelayMinutes  \\\n",
       "5544     2018-01-22       JetBlue Airways  SJC              0.0   \n",
       "5547     2018-01-22       JetBlue Airways  MSY             86.0   \n",
       "5548     2018-01-22       JetBlue Airways  JAX             29.0   \n",
       "5554     2018-01-22       JetBlue Airways  ROC              0.0   \n",
       "5565     2018-01-22       JetBlue Airways  BUF              0.0   \n",
       "...             ...                   ...  ...              ...   \n",
       "5666920  2018-09-10  Delta Air Lines Inc.  SFO              0.0   \n",
       "5666921  2018-09-10  Delta Air Lines Inc.  MSP              0.0   \n",
       "5667057  2018-09-10  Delta Air Lines Inc.  LAX             13.0   \n",
       "5667231  2018-09-10  Delta Air Lines Inc.  SEA             48.0   \n",
       "5672362  2018-09-04     Endeavor Air Inc.  BNA              0.0   \n",
       "\n",
       "         ArrDelayMinutes  \n",
       "5544                 0.0  \n",
       "5547                77.0  \n",
       "5548                11.0  \n",
       "5554                 0.0  \n",
       "5565                 0.0  \n",
       "...                  ...  \n",
       "5666920              3.0  \n",
       "5666921              0.0  \n",
       "5667057              0.0  \n",
       "5667231             44.0  \n",
       "5672362              0.0  \n",
       "\n",
       "[83754 rows x 5 columns]"
      ]
Fadi Gattoussi's avatar
Fadi Gattoussi committed
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:56.666500700Z",
     "start_time": "2023-12-11T17:07:56.546471Z"
    }
   },
   "outputs": [
    {
Fadi Gattoussi's avatar
Fadi Gattoussi committed
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>FlightDate</th>\n",
       "      <th>Airline</th>\n",
       "      <th>DepDelayMinutes</th>\n",
       "      <th>ArrDelayMinutes</th>\n",
       "      <th>Year</th>\n",
       "      <th>Month</th>\n",
       "      <th>Day</th>\n",
       "      <th>Dest_ABQ</th>\n",
       "      <th>Dest_ACK</th>\n",
       "      <th>Dest_ATL</th>\n",
       "      <th>...</th>\n",
       "      <th>Dest_SFO</th>\n",
       "      <th>Dest_SJC</th>\n",
       "      <th>Dest_SJU</th>\n",
       "      <th>Dest_SLC</th>\n",
       "      <th>Dest_SMF</th>\n",
       "      <th>Dest_SNA</th>\n",
       "      <th>Dest_SRQ</th>\n",
       "      <th>Dest_STT</th>\n",
       "      <th>Dest_SYR</th>\n",
       "      <th>Dest_TPA</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5544</th>\n",
       "      <td>2018-01-22</td>\n",
       "      <td>JetBlue Airways</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2018</td>\n",
       "      <td>1</td>\n",
       "      <td>22</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5547</th>\n",
       "      <td>2018-01-22</td>\n",
       "      <td>JetBlue Airways</td>\n",
       "      <td>86.0</td>\n",
       "      <td>77.0</td>\n",
       "      <td>2018</td>\n",
       "      <td>1</td>\n",
       "      <td>22</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5548</th>\n",
       "      <td>2018-01-22</td>\n",
       "      <td>JetBlue Airways</td>\n",
       "      <td>29.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>2018</td>\n",
       "      <td>1</td>\n",
       "      <td>22</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5554</th>\n",
       "      <td>2018-01-22</td>\n",
       "      <td>JetBlue Airways</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2018</td>\n",
       "      <td>1</td>\n",
       "      <td>22</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5565</th>\n",
       "      <td>2018-01-22</td>\n",
       "      <td>JetBlue Airways</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2018</td>\n",
       "      <td>1</td>\n",
       "      <td>22</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 78 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     FlightDate          Airline  DepDelayMinutes  ArrDelayMinutes  Year  \\\n",
       "5544 2018-01-22  JetBlue Airways              0.0              0.0  2018   \n",
       "5547 2018-01-22  JetBlue Airways             86.0             77.0  2018   \n",
       "5548 2018-01-22  JetBlue Airways             29.0             11.0  2018   \n",
       "5554 2018-01-22  JetBlue Airways              0.0              0.0  2018   \n",
       "5565 2018-01-22  JetBlue Airways              0.0              0.0  2018   \n",
       "\n",
       "      Month  Day  Dest_ABQ  Dest_ACK  Dest_ATL  ...  Dest_SFO  Dest_SJC  \\\n",
       "5544      1   22     False     False     False  ...     False      True   \n",
       "5547      1   22     False     False     False  ...     False     False   \n",
       "5548      1   22     False     False     False  ...     False     False   \n",
       "5554      1   22     False     False     False  ...     False     False   \n",
       "5565      1   22     False     False     False  ...     False     False   \n",
       "\n",
       "      Dest_SJU  Dest_SLC  Dest_SMF  Dest_SNA  Dest_SRQ  Dest_STT  Dest_SYR  \\\n",
       "5544     False     False     False     False     False     False     False   \n",
       "5547     False     False     False     False     False     False     False   \n",
       "5548     False     False     False     False     False     False     False   \n",
       "5554     False     False     False     False     False     False     False   \n",
       "5565     False     False     False     False     False     False     False   \n",
       "\n",
       "      Dest_TPA  \n",
       "5544     False  \n",
       "5547     False  \n",
       "5548     False  \n",
       "5554     False  \n",
       "5565     False  \n",
       "\n",
       "[5 rows x 78 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    "# Convert 'FlightDate' into numerical components\n",
    "jfk_flights_2018['FlightDate'] = pd.to_datetime(jfk_flights_2018['FlightDate'])\n",
    "jfk_flights_2018['Year'] = jfk_flights_2018['FlightDate'].dt.year\n",
    "jfk_flights_2018['Month'] = jfk_flights_2018['FlightDate'].dt.month\n",
    "jfk_flights_2018['Day'] = jfk_flights_2018['FlightDate'].dt.day\n",
    "\n",
    "# One-hot encoding for categorical variables\n",
Fadi Gattoussi's avatar
Fadi Gattoussi committed
    "df_jfk_encoded = pd.get_dummies(jfk_flights_2018, columns=['Dest'])\n",
    "df_jfk_encoded.head()\n"
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "execution_count": 20,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:56.737639800Z",
     "start_time": "2023-12-11T17:07:56.666500700Z"
    }
   },
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "outputs": [
    {
     "data": {
      "image/png": "",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "# Choose your target variable, e.g., 'DepDelayMinutes'\n",
    "X = df_jfk_encoded.drop('DepDelayMinutes', axis=1)\n",
    "y = df_jfk_encoded['DepDelayMinutes']\n",
    "\n",
Fadi Gattoussi's avatar
Fadi Gattoussi committed
    "# plot the distribution of the target variable\n",
    "plt.hist(y, bins=100)\n",
    "plt.xlabel('Departure Delay (minutes)')\n",
    "plt.ylabel('Frequency')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "# Splitting the dataset into training and testing sets\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
    "\n",
    "X_train = X_train.apply(pd.to_numeric, errors='coerce')\n",
    "y_train = pd.to_numeric(y_train, errors='coerce')\n",
    "X_test = X_test.apply(pd.to_numeric, errors='coerce')\n",
    "y_test = pd.to_numeric(y_test, errors='coerce')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "# Splitting the dataset into training and testing sets\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
    "\n",
    "X_train = X_train.apply(pd.to_numeric, errors='coerce')\n",
    "y_train = pd.to_numeric(y_train, errors='coerce')\n",
    "X_test = X_test.apply(pd.to_numeric, errors='coerce')\n",
    "y_test = pd.to_numeric(y_test, errors='coerce')"
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:57.060977300Z",
     "start_time": "2023-12-11T17:07:56.738640200Z"
    }
   },
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LinearRegression\n",
    "\n",
    "# Initialize the model\n",
    "model = LinearRegression()\n",
    "\n",
    "# Train the model\n",
    "model.fit(X_train, y_train)\n"
   ]
  },
  {
   "cell_type": "code",
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:08:11.323526900Z",
     "start_time": "2023-12-11T17:08:10.881732300Z"
    }
   },
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_squared_error, r2_score\n",
    "\n",
    "# Predict on the test set\n",
    "y_pred = model.predict(X_test)\n",
    "\n",
    "# Evaluate the model\n",
    "mse = mean_squared_error(y_test, y_pred)\n",
    "r2 = r2_score(y_test, y_pred)\n",
    "print(\"Mean Squared Error:\", mse)\n",
    "print(\"R^2 Score:\", r2)\n"
   ]
  },
  {
   "cell_type": "code",
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:15:24.634324900Z",
     "start_time": "2023-12-11T17:15:24.628684100Z"
    }
   },
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "outputs": [],
   "source": [
    "print(list(X_test))"
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:28:49.141033600Z",
     "start_time": "2023-12-11T17:28:49.133184200Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "\n",
    "i = X_test.iloc[[3]].copy()\n",
    "i['FlightDate'] = 1640995200000000000\n",
    "i['Year'] = 2022\n",
    "i['Month'] = 1\n",
    "i['Day'] = 1\n",
    "print(i)\n"
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   ]
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:17:36.066986Z",
     "start_time": "2023-12-11T17:17:36.055529500Z"
Fadi Gattoussi's avatar
Fadi Gattoussi committed
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X_mike = X_test.iloc[[3]]"
   ]
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:29:00.458780500Z",
     "start_time": "2023-12-11T17:29:00.448275500Z"
Fadi Gattoussi's avatar
Fadi Gattoussi committed
    },
Fadi Gattoussi's avatar
Fadi Gattoussi committed
   },
   "outputs": [],
   "source": [
    "model.predict(i)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "sas2",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}