Skip to content
Snippets Groups Projects
training.ipynb 23.9 KiB
Newer Older
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:26.744932200Z",
     "start_time": "2023-12-11T17:07:26.740214500Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import sklearn as sk\n",
    "import matplotlib.pyplot as plt\n",
    "import json\n",
    "import math"
   "execution_count": 48,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:55.590798700Z",
     "start_time": "2023-12-11T17:07:26.744932200Z"
    }
   },
     "data": {
      "text/plain": "   FlightDate            Airline Origin Dest  Cancelled  Diverted  CRSDepTime  \\\n0  2018-01-23  Endeavor Air Inc.    ABY  ATL      False     False        1202   \n1  2018-01-24  Endeavor Air Inc.    ABY  ATL      False     False        1202   \n2  2018-01-25  Endeavor Air Inc.    ABY  ATL      False     False        1202   \n3  2018-01-26  Endeavor Air Inc.    ABY  ATL      False     False        1202   \n4  2018-01-27  Endeavor Air Inc.    ABY  ATL      False     False        1400   \n\n   DepTime  DepDelayMinutes  DepDelay  ...  WheelsOff  WheelsOn  TaxiIn  \\\n0   1157.0              0.0      -5.0  ...     1211.0    1249.0     7.0   \n1   1157.0              0.0      -5.0  ...     1210.0    1246.0    12.0   \n2   1153.0              0.0      -9.0  ...     1211.0    1251.0    11.0   \n3   1150.0              0.0     -12.0  ...     1207.0    1242.0    11.0   \n4   1355.0              0.0      -5.0  ...     1412.0    1448.0    11.0   \n\n   CRSArrTime  ArrDelay  ArrDel15  ArrivalDelayGroups  ArrTimeBlk  \\\n0        1304      -8.0       0.0                -1.0   1300-1359   \n1        1304      -6.0       0.0                -1.0   1300-1359   \n2        1304      -2.0       0.0                -1.0   1300-1359   \n3        1304     -11.0       0.0                -1.0   1300-1359   \n4        1500      -1.0       0.0                -1.0   1500-1559   \n\n   DistanceGroup  DivAirportLandings  \n0              1                 0.0  \n1              1                 0.0  \n2              1                 0.0  \n3              1                 0.0  \n4              1                 0.0  \n\n[5 rows x 61 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>FlightDate</th>\n      <th>Airline</th>\n      <th>Origin</th>\n      <th>Dest</th>\n      <th>Cancelled</th>\n      <th>Diverted</th>\n      <th>CRSDepTime</th>\n      <th>DepTime</th>\n      <th>DepDelayMinutes</th>\n      <th>DepDelay</th>\n      <th>...</th>\n      <th>WheelsOff</th>\n      <th>WheelsOn</th>\n      <th>TaxiIn</th>\n      <th>CRSArrTime</th>\n      <th>ArrDelay</th>\n      <th>ArrDel15</th>\n      <th>ArrivalDelayGroups</th>\n      <th>ArrTimeBlk</th>\n      <th>DistanceGroup</th>\n      <th>DivAirportLandings</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>2018-01-23</td>\n      <td>Endeavor Air Inc.</td>\n      <td>ABY</td>\n      <td>ATL</td>\n      <td>False</td>\n      <td>False</td>\n      <td>1202</td>\n      <td>1157.0</td>\n      <td>0.0</td>\n      <td>-5.0</td>\n      <td>...</td>\n      <td>1211.0</td>\n      <td>1249.0</td>\n      <td>7.0</td>\n      <td>1304</td>\n      <td>-8.0</td>\n      <td>0.0</td>\n      <td>-1.0</td>\n      <td>1300-1359</td>\n      <td>1</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2018-01-24</td>\n      <td>Endeavor Air Inc.</td>\n      <td>ABY</td>\n      <td>ATL</td>\n      <td>False</td>\n      <td>False</td>\n      <td>1202</td>\n      <td>1157.0</td>\n      <td>0.0</td>\n      <td>-5.0</td>\n      <td>...</td>\n      <td>1210.0</td>\n      <td>1246.0</td>\n      <td>12.0</td>\n      <td>1304</td>\n      <td>-6.0</td>\n      <td>0.0</td>\n      <td>-1.0</td>\n      <td>1300-1359</td>\n      <td>1</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2018-01-25</td>\n      <td>Endeavor Air Inc.</td>\n      <td>ABY</td>\n      <td>ATL</td>\n      <td>False</td>\n      <td>False</td>\n      <td>1202</td>\n      <td>1153.0</td>\n      <td>0.0</td>\n      <td>-9.0</td>\n      <td>...</td>\n      <td>1211.0</td>\n      <td>1251.0</td>\n      <td>11.0</td>\n      <td>1304</td>\n      <td>-2.0</td>\n      <td>0.0</td>\n      <td>-1.0</td>\n      <td>1300-1359</td>\n      <td>1</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>2018-01-26</td>\n      <td>Endeavor Air Inc.</td>\n      <td>ABY</td>\n      <td>ATL</td>\n      <td>False</td>\n      <td>False</td>\n      <td>1202</td>\n      <td>1150.0</td>\n      <td>0.0</td>\n      <td>-12.0</td>\n      <td>...</td>\n      <td>1207.0</td>\n      <td>1242.0</td>\n      <td>11.0</td>\n      <td>1304</td>\n      <td>-11.0</td>\n      <td>0.0</td>\n      <td>-1.0</td>\n      <td>1300-1359</td>\n      <td>1</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>2018-01-27</td>\n      <td>Endeavor Air Inc.</td>\n      <td>ABY</td>\n      <td>ATL</td>\n      <td>False</td>\n      <td>False</td>\n      <td>1400</td>\n      <td>1355.0</td>\n      <td>0.0</td>\n      <td>-5.0</td>\n      <td>...</td>\n      <td>1412.0</td>\n      <td>1448.0</td>\n      <td>11.0</td>\n      <td>1500</td>\n      <td>-1.0</td>\n      <td>0.0</td>\n      <td>-1.0</td>\n      <td>1500-1559</td>\n      <td>1</td>\n      <td>0.0</td>\n    </tr>\n  </tbody>\n</table>\n<p>5 rows × 61 columns</p>\n</div>"
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cf_2018 = pd.read_csv('flight_data/Combined_Flights_2018.csv')\n",
    "cf_2018.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:56.493778700Z",
     "start_time": "2023-12-11T17:07:55.953264900Z"
    }
   },
   "outputs": [],
   "source": [
    "# Filter the dataframe to include only the delays from ATL\n",
    "import RegressionModel\n",
    "\n",
    "filtered_df = cf_2018[(cf_2018['Origin'] == 'JFK')]\n",
    "\n",
    "RegressionModel.destinations = list(cf_2018['DestCityName'].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:56.527530200Z",
     "start_time": "2023-12-11T17:07:56.498869700Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\s2080\\AppData\\Local\\Temp\\ipykernel_13776\\3101909741.py:3: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  jfk_flights_2018.dropna(inplace=True)\n"
     ]
    }
   ],
    "relevant_columns = ['FlightDate', 'Dest', 'DepDelayMinutes', 'ArrDelayMinutes']\n",
    "jfk_flights_2018 = filtered_df[relevant_columns]\n",
    "jfk_flights_2018.dropna(inplace=True)"
   "execution_count": 51,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:56.542402200Z",
     "start_time": "2023-12-11T17:07:56.526530600Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "         FlightDate Dest  DepDelayMinutes  ArrDelayMinutes\n5544     2018-01-22  SJC              0.0              0.0\n5547     2018-01-22  MSY             86.0             77.0\n5548     2018-01-22  JAX             29.0             11.0\n5554     2018-01-22  ROC              0.0              0.0\n5565     2018-01-22  BUF              0.0              0.0\n...             ...  ...              ...              ...\n5666920  2018-09-10  SFO              0.0              3.0\n5666921  2018-09-10  MSP              0.0              0.0\n5667057  2018-09-10  LAX             13.0              0.0\n5667231  2018-09-10  SEA             48.0             44.0\n5672362  2018-09-04  BNA              0.0              0.0\n\n[83754 rows x 4 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>FlightDate</th>\n      <th>Dest</th>\n      <th>DepDelayMinutes</th>\n      <th>ArrDelayMinutes</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>5544</th>\n      <td>2018-01-22</td>\n      <td>SJC</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>5547</th>\n      <td>2018-01-22</td>\n      <td>MSY</td>\n      <td>86.0</td>\n      <td>77.0</td>\n    </tr>\n    <tr>\n      <th>5548</th>\n      <td>2018-01-22</td>\n      <td>JAX</td>\n      <td>29.0</td>\n      <td>11.0</td>\n    </tr>\n    <tr>\n      <th>5554</th>\n      <td>2018-01-22</td>\n      <td>ROC</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>5565</th>\n      <td>2018-01-22</td>\n      <td>BUF</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>5666920</th>\n      <td>2018-09-10</td>\n      <td>SFO</td>\n      <td>0.0</td>\n      <td>3.0</td>\n    </tr>\n    <tr>\n      <th>5666921</th>\n      <td>2018-09-10</td>\n      <td>MSP</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>5667057</th>\n      <td>2018-09-10</td>\n      <td>LAX</td>\n      <td>13.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>5667231</th>\n      <td>2018-09-10</td>\n      <td>SEA</td>\n      <td>48.0</td>\n      <td>44.0</td>\n    </tr>\n    <tr>\n      <th>5672362</th>\n      <td>2018-09-04</td>\n      <td>BNA</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n  </tbody>\n</table>\n<p>83754 rows × 4 columns</p>\n</div>"
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 52,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:56.666500700Z",
     "start_time": "2023-12-11T17:07:56.546471Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\s2080\\AppData\\Local\\Temp\\ipykernel_13776\\149138236.py:2: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  jfk_flights_2018['FlightDate'] = pd.to_datetime(jfk_flights_2018['FlightDate'])\n",
      "C:\\Users\\s2080\\AppData\\Local\\Temp\\ipykernel_13776\\149138236.py:3: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  jfk_flights_2018['Year'] = jfk_flights_2018['FlightDate'].dt.year\n",
      "C:\\Users\\s2080\\AppData\\Local\\Temp\\ipykernel_13776\\149138236.py:4: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  jfk_flights_2018['Month'] = jfk_flights_2018['FlightDate'].dt.month\n",
      "C:\\Users\\s2080\\AppData\\Local\\Temp\\ipykernel_13776\\149138236.py:5: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  jfk_flights_2018['Day'] = jfk_flights_2018['FlightDate'].dt.day\n"
     ]
    }
   ],
    "# Convert 'FlightDate' into numerical components\n",
    "jfk_flights_2018['FlightDate'] = pd.to_datetime(jfk_flights_2018['FlightDate'])\n",
    "jfk_flights_2018['Year'] = jfk_flights_2018['FlightDate'].dt.year\n",
    "jfk_flights_2018['Month'] = jfk_flights_2018['FlightDate'].dt.month\n",
    "jfk_flights_2018['Day'] = jfk_flights_2018['FlightDate'].dt.day\n",
    "\n",
    "# One-hot encoding for categorical variables\n",
    "df_jfk_encoded = pd.get_dummies(jfk_flights_2018, columns=['Dest'])\n"
   "execution_count": 53,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:56.737639800Z",
     "start_time": "2023-12-11T17:07:56.666500700Z"
    }
   },
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "# Choose your target variable, e.g., 'DepDelayMinutes'\n",
    "X = df_jfk_encoded.drop('DepDelayMinutes', axis=1)\n",
    "y = df_jfk_encoded['DepDelayMinutes']\n",
    "\n",
    "# Splitting the dataset into training and testing sets\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
    "\n",
    "X_train = X_train.apply(pd.to_numeric, errors='coerce')\n",
    "y_train = pd.to_numeric(y_train, errors='coerce')\n",
    "X_test = X_test.apply(pd.to_numeric, errors='coerce')\n",
    "y_test = pd.to_numeric(y_test, errors='coerce')"
   "execution_count": 54,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:07:57.060977300Z",
     "start_time": "2023-12-11T17:07:56.738640200Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "LinearRegression()",
      "text/html": "<style>#sk-container-id-5 {color: black;}#sk-container-id-5 pre{padding: 0;}#sk-container-id-5 div.sk-toggleable {background-color: white;}#sk-container-id-5 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-5 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-5 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-5 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-5 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-5 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-5 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-5 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-5 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-5 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-5 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-5 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-5 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-5 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-5 div.sk-item {position: relative;z-index: 1;}#sk-container-id-5 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-5 div.sk-item::before, #sk-container-id-5 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-5 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-5 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-5 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-5 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-5 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-5 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-5 div.sk-label-container {text-align: center;}#sk-container-id-5 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-5 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-5\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LinearRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" checked><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div>"
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import LinearRegression\n",
    "\n",
    "# Initialize the model\n",
    "model = LinearRegression()\n",
    "\n",
    "# Train the model\n",
    "model.fit(X_train, y_train)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:08:11.323526900Z",
     "start_time": "2023-12-11T17:08:10.881732300Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean Squared Error: 169.08504182894654\n",
      "R^2 Score: 0.9198498663312777\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import mean_squared_error, r2_score\n",
    "\n",
    "# Predict on the test set\n",
    "y_pred = model.predict(X_test)\n",
    "\n",
    "# Evaluate the model\n",
    "mse = mean_squared_error(y_test, y_pred)\n",
    "r2 = r2_score(y_test, y_pred)\n",
    "print(\"Mean Squared Error:\", mse)\n",
    "print(\"R^2 Score:\", r2)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-12-11T17:15:24.634324900Z",
     "start_time": "2023-12-11T17:15:24.628684100Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['FlightDate', 'ArrDelayMinutes', 'Year', 'Month', 'Day', 'Dest_ABQ', 'Dest_ACK', 'Dest_ATL', 'Dest_AUS', 'Dest_BGR', 'Dest_BNA', 'Dest_BOS', 'Dest_BQN', 'Dest_BTV', 'Dest_BUF', 'Dest_BUR', 'Dest_BWI', 'Dest_CHS', 'Dest_CLE', 'Dest_CLT', 'Dest_CMH', 'Dest_CVG', 'Dest_DAB', 'Dest_DCA', 'Dest_DEN', 'Dest_DFW', 'Dest_DTW', 'Dest_EGE', 'Dest_FLL', 'Dest_HNL', 'Dest_HOU', 'Dest_HYA', 'Dest_IAD', 'Dest_IND', 'Dest_JAC', 'Dest_JAX', 'Dest_LAS', 'Dest_LAX', 'Dest_LGB', 'Dest_MCO', 'Dest_MIA', 'Dest_MSP', 'Dest_MSY', 'Dest_MVY', 'Dest_OAK', 'Dest_ONT', 'Dest_ORD', 'Dest_ORF', 'Dest_ORH', 'Dest_PBI', 'Dest_PDX', 'Dest_PHL', 'Dest_PHX', 'Dest_PIT', 'Dest_PSE', 'Dest_PSP', 'Dest_PWM', 'Dest_RDU', 'Dest_RIC', 'Dest_RNO', 'Dest_ROC', 'Dest_RSW', 'Dest_SAN', 'Dest_SAT', 'Dest_SAV', 'Dest_SEA', 'Dest_SFO', 'Dest_SJC', 'Dest_SJU', 'Dest_SLC', 'Dest_SMF', 'Dest_SNA', 'Dest_SRQ', 'Dest_STT', 'Dest_SYR', 'Dest_TPA']\n"
     ]
    }
   ],
   "source": [
    "print(list(X_test))"
  {
   "cell_type": "code",
   "execution_count": 83,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                 FlightDate  ArrDelayMinutes  Year  Month  Day  Dest_ABQ  \\\n",
      "436014  1640995200000000000              0.0  2022      1    1     False   \n",
      "\n",
      "        Dest_ACK  Dest_ATL  Dest_AUS  Dest_BGR  ...  Dest_SFO  Dest_SJC  \\\n",
      "436014     False      True     False     False  ...     False     False   \n",
      "\n",
      "        Dest_SJU  Dest_SLC  Dest_SMF  Dest_SNA  Dest_SRQ  Dest_STT  Dest_SYR  \\\n",
      "436014     False     False     False     False     False     False     False   \n",
      "\n",
      "        Dest_TPA  \n",
      "436014     False  \n",
      "\n",
      "[1 rows x 76 columns]\n"
     ]
    }
   ],
   "source": [
    "\n",
    "i = X_test.iloc[[3]].copy()\n",
    "i['FlightDate'] = 1640995200000000000\n",
    "i['Year'] = 2022\n",
    "i['Month'] = 1\n",
    "i['Day'] = 1\n",
    "print(i)\n"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-11T17:28:49.141033600Z",
     "start_time": "2023-12-11T17:28:49.133184200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "outputs": [],
   "source": [
    "X_mike = X_test.iloc[[3]]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-11T17:17:36.066986Z",
     "start_time": "2023-12-11T17:17:36.055529500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "outputs": [
    {
     "data": {
      "text/plain": "array([-26.49915968])"
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.predict(i)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-11T17:29:00.458780500Z",
     "start_time": "2023-12-11T17:29:00.448275500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "sas2",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}