From 8ff99b911e0ee1e1a42dd0c33e84d0e85eb9707d Mon Sep 17 00:00:00 2001
From: Fadi <fadi.gattoussi@stud.th-deg.de>
Date: Mon, 11 Dec 2023 17:37:06 +0100
Subject: [PATCH] Added airlines and destination_cities json files

---
 DisplayWindow.py |   8 +-
 training.ipynb   | 731 +++++------------------------------------------
 2 files changed, 80 insertions(+), 659 deletions(-)

diff --git a/DisplayWindow.py b/DisplayWindow.py
index f9282c7..3d1ac33 100644
--- a/DisplayWindow.py
+++ b/DisplayWindow.py
@@ -5,7 +5,7 @@ import pyqtgraph as pg
 from PyQt6.QtWidgets import (QApplication, QMainWindow, QWidget,
                              QVBoxLayout, QHBoxLayout, QGridLayout,
                              QLabel, QPushButton, QSlider, QDateTimeEdit,
-                             QLineEdit, QComboBox)
+                             QLineEdit, QComboBox, QDateEdit)
 from PyQt6.QtGui import QPalette, QColor, QIcon
 
 
@@ -24,8 +24,8 @@ class MainWindow(QMainWindow):
         grid_layout = QGridLayout(central_widget)
 
         # Create labels and input fields
-        self.labels_text = ['Date and Time', 'Airline', 'Destination', 'Model Start Year']
-        self.input_fields = [QDateTimeEdit(self), QComboBox(self), QComboBox(self), QComboBox(self)]
+        self.labels_text = ['Date', 'Airline', 'Destination', 'Model Start Year']
+        self.input_fields = [QDateEdit(self), QComboBox(self), QComboBox(self), QComboBox(self)]
         self.input_fields[1].addItems(RegressionModel.airlines)
         self.input_fields[2].addItems(RegressionModel.destinations)
         self.input_fields[3].addItems(list(map(str, RegressionModel.model_years)))
@@ -39,7 +39,7 @@ class MainWindow(QMainWindow):
         # Connect signals
         for i in range(len(self.input_fields)):
             if i == 0:
-                self.input_fields[i].dateTimeChanged.connect(self.update_values)
+                self.input_fields[i].dateChanged.connect(self.update_values)
             else:
                 self.input_fields[i].currentTextChanged.connect(self.update_values)
 
diff --git a/training.ipynb b/training.ipynb
index ba450be..9769cd5 100644
--- a/training.ipynb
+++ b/training.ipynb
@@ -2,304 +2,29 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
     "import pandas as pd\n",
     "import sklearn as sk\n",
-    "import matplotlib.pyplot as plt"
+    "import matplotlib.pyplot as plt\n",
+    "import json\n",
+    "import math"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Code</th>\n",
-       "      <th>Description</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>02Q</td>\n",
-       "      <td>Titan Airways</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>04Q</td>\n",
-       "      <td>Tradewind Aviation</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>05Q</td>\n",
-       "      <td>Comlux Aviation, AG</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>06Q</td>\n",
-       "      <td>Master Top Linhas Aereas Ltd.</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>07Q</td>\n",
-       "      <td>Flair Airlines Ltd.</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  Code                    Description\n",
-       "0  02Q                  Titan Airways\n",
-       "1  04Q             Tradewind Aviation\n",
-       "2  05Q            Comlux Aviation, AG\n",
-       "3  06Q  Master Top Linhas Aereas Ltd.\n",
-       "4  07Q            Flair Airlines Ltd."
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df = pd.read_csv('flight_data/Airlines.csv')\n",
-    "df.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>FlightDate</th>\n",
-       "      <th>Airline</th>\n",
-       "      <th>Origin</th>\n",
-       "      <th>Dest</th>\n",
-       "      <th>Cancelled</th>\n",
-       "      <th>Diverted</th>\n",
-       "      <th>CRSDepTime</th>\n",
-       "      <th>DepTime</th>\n",
-       "      <th>DepDelayMinutes</th>\n",
-       "      <th>DepDelay</th>\n",
-       "      <th>...</th>\n",
-       "      <th>WheelsOff</th>\n",
-       "      <th>WheelsOn</th>\n",
-       "      <th>TaxiIn</th>\n",
-       "      <th>CRSArrTime</th>\n",
-       "      <th>ArrDelay</th>\n",
-       "      <th>ArrDel15</th>\n",
-       "      <th>ArrivalDelayGroups</th>\n",
-       "      <th>ArrTimeBlk</th>\n",
-       "      <th>DistanceGroup</th>\n",
-       "      <th>DivAirportLandings</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>2018-01-23</td>\n",
-       "      <td>Endeavor Air Inc.</td>\n",
-       "      <td>ABY</td>\n",
-       "      <td>ATL</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1202</td>\n",
-       "      <td>1157.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-5.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1211.0</td>\n",
-       "      <td>1249.0</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>1304</td>\n",
-       "      <td>-8.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1300-1359</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2018-01-24</td>\n",
-       "      <td>Endeavor Air Inc.</td>\n",
-       "      <td>ABY</td>\n",
-       "      <td>ATL</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1202</td>\n",
-       "      <td>1157.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-5.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1210.0</td>\n",
-       "      <td>1246.0</td>\n",
-       "      <td>12.0</td>\n",
-       "      <td>1304</td>\n",
-       "      <td>-6.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1300-1359</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>2018-01-25</td>\n",
-       "      <td>Endeavor Air Inc.</td>\n",
-       "      <td>ABY</td>\n",
-       "      <td>ATL</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1202</td>\n",
-       "      <td>1153.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-9.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1211.0</td>\n",
-       "      <td>1251.0</td>\n",
-       "      <td>11.0</td>\n",
-       "      <td>1304</td>\n",
-       "      <td>-2.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1300-1359</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>2018-01-26</td>\n",
-       "      <td>Endeavor Air Inc.</td>\n",
-       "      <td>ABY</td>\n",
-       "      <td>ATL</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1202</td>\n",
-       "      <td>1150.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-12.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1207.0</td>\n",
-       "      <td>1242.0</td>\n",
-       "      <td>11.0</td>\n",
-       "      <td>1304</td>\n",
-       "      <td>-11.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1300-1359</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>2018-01-27</td>\n",
-       "      <td>Endeavor Air Inc.</td>\n",
-       "      <td>ABY</td>\n",
-       "      <td>ATL</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1400</td>\n",
-       "      <td>1355.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-5.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1412.0</td>\n",
-       "      <td>1448.0</td>\n",
-       "      <td>11.0</td>\n",
-       "      <td>1500</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1500-1559</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 61 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   FlightDate            Airline Origin Dest  Cancelled  Diverted  CRSDepTime  \\\n",
-       "0  2018-01-23  Endeavor Air Inc.    ABY  ATL      False     False        1202   \n",
-       "1  2018-01-24  Endeavor Air Inc.    ABY  ATL      False     False        1202   \n",
-       "2  2018-01-25  Endeavor Air Inc.    ABY  ATL      False     False        1202   \n",
-       "3  2018-01-26  Endeavor Air Inc.    ABY  ATL      False     False        1202   \n",
-       "4  2018-01-27  Endeavor Air Inc.    ABY  ATL      False     False        1400   \n",
-       "\n",
-       "   DepTime  DepDelayMinutes  DepDelay  ...  WheelsOff  WheelsOn  TaxiIn  \\\n",
-       "0   1157.0              0.0      -5.0  ...     1211.0    1249.0     7.0   \n",
-       "1   1157.0              0.0      -5.0  ...     1210.0    1246.0    12.0   \n",
-       "2   1153.0              0.0      -9.0  ...     1211.0    1251.0    11.0   \n",
-       "3   1150.0              0.0     -12.0  ...     1207.0    1242.0    11.0   \n",
-       "4   1355.0              0.0      -5.0  ...     1412.0    1448.0    11.0   \n",
-       "\n",
-       "   CRSArrTime  ArrDelay  ArrDel15  ArrivalDelayGroups  ArrTimeBlk  \\\n",
-       "0        1304      -8.0       0.0                -1.0   1300-1359   \n",
-       "1        1304      -6.0       0.0                -1.0   1300-1359   \n",
-       "2        1304      -2.0       0.0                -1.0   1300-1359   \n",
-       "3        1304     -11.0       0.0                -1.0   1300-1359   \n",
-       "4        1500      -1.0       0.0                -1.0   1500-1559   \n",
-       "\n",
-       "   DistanceGroup  DivAirportLandings  \n",
-       "0              1                 0.0  \n",
-       "1              1                 0.0  \n",
-       "2              1                 0.0  \n",
-       "3              1                 0.0  \n",
-       "4              1                 0.0  \n",
-       "\n",
-       "[5 rows x 61 columns]"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
+     ]
     }
    ],
    "source": [
@@ -309,408 +34,104 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Filter the dataframe to include only the delays from ATL\n",
     "import RegressionModel\n",
     "\n",
-    "filtered_df = cf_2018[(cf_2018['Origin'] == 'ATL')]\n",
+    "filtered_df = cf_2018[(cf_2018['Origin'] == 'JFK')]\n",
     "\n",
     "RegressionModel.destinations = list(cf_2018['DestCityName'].unique())"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['FlightDate', 'Airline', 'Origin', 'Dest', 'Cancelled', 'Diverted',\n",
-       "       'CRSDepTime', 'DepTime', 'DepDelayMinutes', 'DepDelay', 'ArrTime',\n",
-       "       'ArrDelayMinutes', 'AirTime', 'CRSElapsedTime', 'ActualElapsedTime',\n",
-       "       'Distance', 'Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek',\n",
-       "       'Marketing_Airline_Network', 'Operated_or_Branded_Code_Share_Partners',\n",
-       "       'DOT_ID_Marketing_Airline', 'IATA_Code_Marketing_Airline',\n",
-       "       'Flight_Number_Marketing_Airline', 'Operating_Airline',\n",
-       "       'DOT_ID_Operating_Airline', 'IATA_Code_Operating_Airline',\n",
-       "       'Tail_Number', 'Flight_Number_Operating_Airline', 'OriginAirportID',\n",
-       "       'OriginAirportSeqID', 'OriginCityMarketID', 'OriginCityName',\n",
-       "       'OriginState', 'OriginStateFips', 'OriginStateName', 'OriginWac',\n",
-       "       'DestAirportID', 'DestAirportSeqID', 'DestCityMarketID', 'DestCityName',\n",
-       "       'DestState', 'DestStateFips', 'DestStateName', 'DestWac', 'DepDel15',\n",
-       "       'DepartureDelayGroups', 'DepTimeBlk', 'TaxiOut', 'WheelsOff',\n",
-       "       'WheelsOn', 'TaxiIn', 'CRSArrTime', 'ArrDelay', 'ArrDel15',\n",
-       "       'ArrivalDelayGroups', 'ArrTimeBlk', 'DistanceGroup',\n",
-       "       'DivAirportLandings'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "# list of columns in cf_2018\n",
-    "cf_2018.columns"
+    "relevant_columns = ['FlightDate', 'Airline', 'Dest', 'DepDelayMinutes', 'ArrDelayMinutes']\n",
+    "jfk_flights_2018 = filtered_df[relevant_columns]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "ny_flights_2018 = cf_2018[(cf_2018['Origin'] == 'JFK') | (cf_2018['Origin'] == 'LGA') | (cf_2018['Origin'] == 'EWR')]"
+    "jfk_flights_2018"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>FlightDate</th>\n",
-       "      <th>Airline</th>\n",
-       "      <th>Origin</th>\n",
-       "      <th>Dest</th>\n",
-       "      <th>Cancelled</th>\n",
-       "      <th>Diverted</th>\n",
-       "      <th>CRSDepTime</th>\n",
-       "      <th>DepTime</th>\n",
-       "      <th>DepDelayMinutes</th>\n",
-       "      <th>DepDelay</th>\n",
-       "      <th>...</th>\n",
-       "      <th>WheelsOff</th>\n",
-       "      <th>WheelsOn</th>\n",
-       "      <th>TaxiIn</th>\n",
-       "      <th>CRSArrTime</th>\n",
-       "      <th>ArrDelay</th>\n",
-       "      <th>ArrDel15</th>\n",
-       "      <th>ArrivalDelayGroups</th>\n",
-       "      <th>ArrTimeBlk</th>\n",
-       "      <th>DistanceGroup</th>\n",
-       "      <th>DivAirportLandings</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>95</th>\n",
-       "      <td>2018-01-06</td>\n",
-       "      <td>Endeavor Air Inc.</td>\n",
-       "      <td>LGA</td>\n",
-       "      <td>SYR</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1810</td>\n",
-       "      <td>1805.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-5.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1825.0</td>\n",
-       "      <td>1907.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>1928</td>\n",
-       "      <td>-15.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1900-1959</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>96</th>\n",
-       "      <td>2018-01-13</td>\n",
-       "      <td>Endeavor Air Inc.</td>\n",
-       "      <td>LGA</td>\n",
-       "      <td>SYR</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1810</td>\n",
-       "      <td>1800.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-10.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1820.0</td>\n",
-       "      <td>1901.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>1929</td>\n",
-       "      <td>-22.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-2.0</td>\n",
-       "      <td>1900-1959</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>97</th>\n",
-       "      <td>2018-01-14</td>\n",
-       "      <td>Endeavor Air Inc.</td>\n",
-       "      <td>LGA</td>\n",
-       "      <td>SYR</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1810</td>\n",
-       "      <td>1803.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-7.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1832.0</td>\n",
-       "      <td>1913.0</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>1931</td>\n",
-       "      <td>-14.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1900-1959</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>98</th>\n",
-       "      <td>2018-01-20</td>\n",
-       "      <td>Endeavor Air Inc.</td>\n",
-       "      <td>LGA</td>\n",
-       "      <td>SYR</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1810</td>\n",
-       "      <td>1758.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-12.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1822.0</td>\n",
-       "      <td>1857.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>1928</td>\n",
-       "      <td>-25.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-2.0</td>\n",
-       "      <td>1900-1959</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>99</th>\n",
-       "      <td>2018-01-27</td>\n",
-       "      <td>Endeavor Air Inc.</td>\n",
-       "      <td>LGA</td>\n",
-       "      <td>SYR</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1810</td>\n",
-       "      <td>1758.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-12.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1822.0</td>\n",
-       "      <td>1858.0</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>1929</td>\n",
-       "      <td>-26.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>-2.0</td>\n",
-       "      <td>1900-1959</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 61 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    FlightDate            Airline Origin Dest  Cancelled  Diverted  \\\n",
-       "95  2018-01-06  Endeavor Air Inc.    LGA  SYR      False     False   \n",
-       "96  2018-01-13  Endeavor Air Inc.    LGA  SYR      False     False   \n",
-       "97  2018-01-14  Endeavor Air Inc.    LGA  SYR      False     False   \n",
-       "98  2018-01-20  Endeavor Air Inc.    LGA  SYR      False     False   \n",
-       "99  2018-01-27  Endeavor Air Inc.    LGA  SYR      False     False   \n",
-       "\n",
-       "    CRSDepTime  DepTime  DepDelayMinutes  DepDelay  ...  WheelsOff  WheelsOn  \\\n",
-       "95        1810   1805.0              0.0      -5.0  ...     1825.0    1907.0   \n",
-       "96        1810   1800.0              0.0     -10.0  ...     1820.0    1901.0   \n",
-       "97        1810   1803.0              0.0      -7.0  ...     1832.0    1913.0   \n",
-       "98        1810   1758.0              0.0     -12.0  ...     1822.0    1857.0   \n",
-       "99        1810   1758.0              0.0     -12.0  ...     1822.0    1858.0   \n",
-       "\n",
-       "    TaxiIn  CRSArrTime  ArrDelay  ArrDel15  ArrivalDelayGroups  ArrTimeBlk  \\\n",
-       "95     6.0        1928     -15.0       0.0                -1.0   1900-1959   \n",
-       "96     6.0        1929     -22.0       0.0                -2.0   1900-1959   \n",
-       "97     4.0        1931     -14.0       0.0                -1.0   1900-1959   \n",
-       "98     6.0        1928     -25.0       0.0                -2.0   1900-1959   \n",
-       "99     5.0        1929     -26.0       0.0                -2.0   1900-1959   \n",
-       "\n",
-       "    DistanceGroup  DivAirportLandings  \n",
-       "95              1                 0.0  \n",
-       "96              1                 0.0  \n",
-       "97              1                 0.0  \n",
-       "98              1                 0.0  \n",
-       "99              1                 0.0  \n",
-       "\n",
-       "[5 rows x 61 columns]"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "ny_flights_2018.head()"
+    "# Convert 'FlightDate' into numerical components\n",
+    "jfk_flights_2018['FlightDate'] = pd.to_datetime(jfk_flights_2018['FlightDate'])\n",
+    "jfk_flights_2018['Year'] = jfk_flights_2018['FlightDate'].dt.year\n",
+    "jfk_flights_2018['Month'] = jfk_flights_2018['FlightDate'].dt.month\n",
+    "jfk_flights_2018['Day'] = jfk_flights_2018['FlightDate'].dt.day\n",
+    "\n",
+    "# One-hot encoding for categorical variables\n",
+    "df_jfk_encoded = pd.get_dummies(jfk_flights_2018, columns=['Airline', 'Dest'])\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array(['Atlanta, GA', 'Albany, GA', 'Mobile, AL', 'Evansville, IN',\n",
-       "       'Detroit, MI', 'Jacksonville/Camp Lejeune, NC', 'Boston, MA',\n",
-       "       'Syracuse, NY', 'New Bern/Morehead/Beaufort, NC', 'Montgomery, AL',\n",
-       "       'New York, NY', 'Minneapolis, MN', 'Cleveland, OH',\n",
-       "       'Charlotte, NC', 'Fayetteville, NC', 'St. Louis, MO',\n",
-       "       'Columbus, OH', 'Indianapolis, IN', 'Knoxville, TN', 'Mosinee, WI',\n",
-       "       'Alexandria, LA', 'Killeen, TX', 'San Antonio, TX', 'Peoria, IL',\n",
-       "       'Houston, TX', 'Dallas/Fort Worth, TX', 'Richmond, VA',\n",
-       "       'Portland, ME', 'Kalamazoo, MI', 'Cincinnati, OH',\n",
-       "       'Bristol/Johnson City/Kingsport, TN', 'Washington, DC',\n",
-       "       'Asheville, NC', 'Baton Rouge, LA', 'Buffalo, NY',\n",
-       "       'Grand Rapids, MI', 'Bismarck/Mandan, ND', 'Chattanooga, TN',\n",
-       "       'Traverse City, MI', 'Fort Smith, AR', 'Burlington, VT',\n",
-       "       'Kansas City, MO', 'Green Bay, WI', 'Tulsa, OK', 'Valdosta, GA',\n",
-       "       'Charleston/Dunbar, WV', 'Fayetteville, AR', 'Rochester, NY',\n",
-       "       'Savannah, GA', 'Roanoke, VA', 'Harrisburg, PA', 'New Orleans, LA',\n",
-       "       'Nashville, TN', 'Raleigh/Durham, NC', 'Milwaukee, WI',\n",
-       "       'Gainesville, FL', 'Tampa, FL', 'Greensboro/High Point, NC',\n",
-       "       'Charlottesville, VA', 'Bangor, ME', 'Grand Forks, ND',\n",
-       "       'Brunswick, GA', 'Memphis, TN', 'Wilmington, NC',\n",
-       "       'Bloomington/Normal, IL', 'Norfolk, VA', 'Manchester, NH',\n",
-       "       'Dayton, OH', 'Greer, SC', 'Lexington, KY', 'Louisville, KY',\n",
-       "       'Hartford, CT', 'Saginaw/Bay City/Midland, MI', 'Minot, ND',\n",
-       "       'Columbia, SC', 'Columbus, GA', 'Des Moines, IA', 'Dothan, AL',\n",
-       "       'Elmira/Corning, NY', 'Pittsburgh, PA', 'Jacksonville, FL',\n",
-       "       'Chicago, IL', 'Springfield, MO', 'Shreveport, LA',\n",
-       "       'Huntsville, AL', 'Omaha, NE', 'Tallahassee, FL', 'Madison, WI',\n",
-       "       'Lansing, MI', 'Baltimore, MD', 'Charleston, SC',\n",
-       "       'Little Rock, AR', 'Fort Myers, FL', 'Orlando, FL', 'Fargo, ND',\n",
-       "       'Appleton, WI', 'Akron, OH', 'Harlingen/San Benito, TX',\n",
-       "       'White Plains, NY', 'Philadelphia, PA', 'Moline, IL',\n",
-       "       'Aguadilla, PR', 'San Francisco, CA', 'Newark, NJ',\n",
-       "       'Fort Lauderdale, FL', 'Salt Lake City, UT', 'Ponce, PR',\n",
-       "       'San Jose, CA', 'Long Beach, CA', 'Las Vegas, NV',\n",
-       "       'Los Angeles, CA', 'West Palm Beach/Palm Beach, FL',\n",
-       "       'San Juan, PR', 'Sarasota/Bradenton, FL', 'Austin, TX',\n",
-       "       'Newburgh/Poughkeepsie, NY', 'Denver, CO', 'Seattle, WA',\n",
-       "       'Providence, RI', 'Sacramento, CA', 'Charlotte Amalie, VI',\n",
-       "       'Portland, OR', 'Worcester, MA', 'San Diego, CA', 'Reno, NV',\n",
-       "       'Albany, NY', 'Phoenix, AZ', 'Oakland, CA', 'Palm Springs, CA',\n",
-       "       'Christiansted, VI', 'Burbank, CA', 'Daytona Beach, FL',\n",
-       "       'Albuquerque, NM', 'Brownsville, TX', 'El Paso, TX',\n",
-       "       'Oklahoma City, OK', 'Gulfport/Biloxi, MS', 'South Bend, IN',\n",
-       "       'Mission/McAllen/Edinburg, TX', 'Lake Charles, LA',\n",
-       "       'Lafayette, LA', 'Myrtle Beach, SC', 'Rochester, MN',\n",
-       "       'Lincoln, NE', 'Birmingham, AL', 'Augusta, GA',\n",
-       "       'Jackson/Vicksburg, MS', 'Cedar Rapids/Iowa City, IA',\n",
-       "       'Panama City, FL', 'Valparaiso, FL', 'Key West, FL',\n",
-       "       'Allentown/Bethlehem/Easton, PA', 'Sanford, FL', 'Billings, MT',\n",
-       "       'Flint, MI', 'Missoula, MT', 'Idaho Falls, ID', 'Stockton, CA',\n",
-       "       'Toledo, OH', 'Sioux Falls, SD', 'Ashland, WV', 'Fresno, CA',\n",
-       "       'Eugene, OR', 'Bellingham, WA', 'Punta Gorda, FL', 'St. Cloud, MN',\n",
-       "       'St. Petersburg, FL', 'Plattsburgh, NY', 'Ogdensburg, NY',\n",
-       "       'Concord, NC', 'Springfield, IL', 'Provo, UT',\n",
-       "       'Montrose/Delta, CO', 'Fort Wayne, IN', 'Portsmouth, NH',\n",
-       "       'Trenton, NJ', 'Niagara Falls, NY', 'Belleville, IL',\n",
-       "       'Rockford, IL', 'Ogden, UT', 'Boise, ID', 'Medford, OR',\n",
-       "       'Great Falls, MT', 'Pasco/Kennewick/Richland, WA', 'Wichita, KS',\n",
-       "       'Grand Island, NE', 'Monterey, CA', 'Grand Junction, CO',\n",
-       "       'Laredo, TX', 'Rapid City, SD', 'Kalispell, MT', 'Santa Maria, CA',\n",
-       "       'Bozeman, MT', 'Hagerstown, MD', 'Clarksburg/Fairmont, WV',\n",
-       "       'Colorado Springs, CO', 'Owensboro, KY', 'Honolulu, HI',\n",
-       "       'Kahului, HI', 'Kona, HI', 'Lihue, HI', 'Hilo, HI', 'Columbia, MO',\n",
-       "       'Casper, WY', 'Scranton/Wilkes-Barre, PA', 'Tucson, AZ',\n",
-       "       'Bend/Redmond, OR', 'Santa Barbara, CA', 'Aspen, CO',\n",
-       "       'Ontario, CA', 'Helena, MT', 'Sun Valley/Hailey/Ketchum, ID',\n",
-       "       'Durango, CO', 'Williston, ND', 'Arcata/Eureka, CA',\n",
-       "       'San Luis Obispo, CA', 'Mammoth Lakes, CA', 'Spokane, WA',\n",
-       "       'Jackson, WY', 'Santa Rosa, CA', 'Santa Ana, CA', 'Redding, CA',\n",
-       "       'Bakersfield, CA', 'Hayden, CO', 'Midland/Odessa, TX',\n",
-       "       'Ithaca/Cortland, NY', 'Lewiston, ID', 'Pocatello, ID',\n",
-       "       'Aberdeen, SD', 'Cody, WY', 'Alpena, MI', 'Escanaba, MI',\n",
-       "       'Pellston, MI', 'Bemidji, MN', 'Brainerd, MN', 'Butte, MT',\n",
-       "       'Cedar City, UT', 'St. George, UT', 'Sault Ste. Marie, MI',\n",
-       "       'Marquette, MI', 'Elko, NV', 'Hibbing, MN', 'Binghamton, NY',\n",
-       "       'Twin Falls, ID', 'Rhinelander, WI', 'International Falls, MN',\n",
-       "       'Iron Mountain/Kingsfd, MI', 'Dallas, TX', 'Columbus, MS',\n",
-       "       'Monroe, LA', 'State College, PA', 'Erie, PA', 'La Crosse, WI',\n",
-       "       'Duluth, MN', 'Muskegon, MI', 'Pueblo, CO', 'Hancock/Houghton, MI',\n",
-       "       'Paducah, KY', 'Quincy, IL', 'Rock Springs, WY', 'Jamestown, ND',\n",
-       "       'Devils Lake, ND', 'Laramie, WY', 'Gillette, WY', 'Eau Claire, WI',\n",
-       "       'Hays, KS', 'Eagle, CO', 'Cape Girardeau, MO',\n",
-       "       'North Bend/Coos Bay, OR', 'Amarillo, TX', 'Miami, FL',\n",
-       "       'Islip, NY', 'Pago Pago, TT', 'Hoolehua, HI', 'Lanai, HI',\n",
-       "       'Kapalua, HI', 'Atlantic City, NJ', 'Latrobe, PA', 'Lubbock, TX',\n",
-       "       'Pensacola, FL', 'Corpus Christi, TX', 'Melbourne, FL',\n",
-       "       'Fairbanks, AK', 'Anchorage, AK', 'Newport News/Williamsburg, VA',\n",
-       "       'Guam, TT', 'Gunnison, CO', 'Bethel, AK', 'Kodiak, AK',\n",
-       "       'Deadhorse, AK', 'Barrow, AK', 'Ketchikan, AK', 'Juneau, AK',\n",
-       "       'Sitka, AK', 'Petersburg, AK', 'Wrangell, AK', 'Nome, AK',\n",
-       "       'Kotzebue, AK', 'Yakutat, AK', 'Cordova, AK', 'Adak Island, AK',\n",
-       "       'Yakima, WA', 'Santa Fe, NM', 'Champaign/Urbana, IL',\n",
-       "       'Dickinson, ND', 'Saipan, TT', 'Rota, TT', 'Walla Walla, WA',\n",
-       "       'Wenatchee, WA', 'Pullman, WA', 'College Station/Bryan, TX',\n",
-       "       'Hobbs, NM', 'Youngstown/Warren, OH', 'Unalaska, AK',\n",
-       "       'Scottsbluff, NE', 'Nantucket, MA', \"Martha's Vineyard, MA\",\n",
-       "       'Branson, MO', 'Tyler, TX', 'San Angelo, TX', 'Wichita Falls, TX',\n",
-       "       'Beaumont/Port Arthur, TX', 'Lawton/Fort Sill, OK', 'Waco, TX',\n",
-       "       'Flagstaff, AZ', 'Yuma, AZ', 'Meridian, MS',\n",
-       "       'Hattiesburg/Laurel, MS', 'Roswell, NM', 'Kearney, NE', 'Moab, UT',\n",
-       "       'Vernal, UT', 'Lewisburg, WV', 'Staunton, VA', 'Salina, KS',\n",
-       "       'Liberal, KS', 'North Platte, NE', 'Prescott, AZ', 'Abilene, TX',\n",
-       "       'Manhattan/Ft. Riley, KS', 'Texarkana, AR', 'Lynchburg, VA',\n",
-       "       'Greenville, NC', 'New Haven, CT', 'Presque Isle/Houlton, ME',\n",
-       "       'Hilton Head, SC', 'Longview, TX', 'Waterloo, IA',\n",
-       "       'Sioux City, IA', 'Dubuque, IA', 'Garden City, KS',\n",
-       "       'Salisbury, MD', 'Williamsport, PA', 'Florence, SC',\n",
-       "       'Stillwater, OK', 'Joplin, MO', 'Watertown, NY', 'Cheyenne, WY',\n",
-       "       'Del Rio, TX', 'Hyannis, MA', 'Dillingham, AK', 'King Salmon, AK',\n",
-       "       'Gustavus, AK', 'West Yellowstone, MT'], dtype=object)"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "cf_2018['DestCityName'].unique()"
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "# Choose your target variable, e.g., 'DepDelayMinutes'\n",
+    "X = df_jfk_encoded.drop('DepDelayMinutes', axis=1)\n",
+    "y = df_jfk_encoded['DepDelayMinutes']\n",
+    "\n",
+    "# Splitting the dataset into training and testing sets\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
+    "\n",
+    "X_train = X_train.apply(pd.to_numeric, errors='coerce')\n",
+    "y_train = pd.to_numeric(y_train, errors='coerce')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import LinearRegression\n",
+    "\n",
+    "# Initialize the model\n",
+    "model = LinearRegression()\n",
+    "\n",
+    "# Train the model\n",
+    "model.fit(X_train, y_train)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import mean_squared_error, r2_score\n",
+    "\n",
+    "# Predict on the test set\n",
+    "y_pred = model.predict(X_test)\n",
+    "\n",
+    "# Evaluate the model\n",
+    "mse = mean_squared_error(y_test, y_pred)\n",
+    "r2 = r2_score(y_test, y_pred)\n",
+    "print(\"Mean Squared Error:\", mse)\n",
+    "print(\"R^2 Score:\", r2)\n",
+    "\n"
    ]
   },
   {
-- 
GitLab