diff --git a/DisplayWindow.py b/DisplayWindow.py index f9282c7115234b067f03b55e9a70c2b96f745fc0..3d1ac33e566a843835c112793d97380f23e7e307 100644 --- a/DisplayWindow.py +++ b/DisplayWindow.py @@ -5,7 +5,7 @@ import pyqtgraph as pg from PyQt6.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QGridLayout, QLabel, QPushButton, QSlider, QDateTimeEdit, - QLineEdit, QComboBox) + QLineEdit, QComboBox, QDateEdit) from PyQt6.QtGui import QPalette, QColor, QIcon @@ -24,8 +24,8 @@ class MainWindow(QMainWindow): grid_layout = QGridLayout(central_widget) # Create labels and input fields - self.labels_text = ['Date and Time', 'Airline', 'Destination', 'Model Start Year'] - self.input_fields = [QDateTimeEdit(self), QComboBox(self), QComboBox(self), QComboBox(self)] + self.labels_text = ['Date', 'Airline', 'Destination', 'Model Start Year'] + self.input_fields = [QDateEdit(self), QComboBox(self), QComboBox(self), QComboBox(self)] self.input_fields[1].addItems(RegressionModel.airlines) self.input_fields[2].addItems(RegressionModel.destinations) self.input_fields[3].addItems(list(map(str, RegressionModel.model_years))) @@ -39,7 +39,7 @@ class MainWindow(QMainWindow): # Connect signals for i in range(len(self.input_fields)): if i == 0: - self.input_fields[i].dateTimeChanged.connect(self.update_values) + self.input_fields[i].dateChanged.connect(self.update_values) else: self.input_fields[i].currentTextChanged.connect(self.update_values) diff --git a/training.ipynb b/training.ipynb index ba450be1d28e0cb4af0e604186eab39c35473390..9769cd5772c0033438fa4d5f5104c3303a18d133 100644 --- a/training.ipynb +++ b/training.ipynb @@ -2,304 +2,29 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import sklearn as sk\n", - "import matplotlib.pyplot as plt" + "import matplotlib.pyplot as plt\n", + "import json\n", + "import math" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 45, "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>Code</th>\n", - " <th>Description</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>02Q</td>\n", - " <td>Titan Airways</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>04Q</td>\n", - " <td>Tradewind Aviation</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>05Q</td>\n", - " <td>Comlux Aviation, AG</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>06Q</td>\n", - " <td>Master Top Linhas Aereas Ltd.</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>07Q</td>\n", - " <td>Flair Airlines Ltd.</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " Code Description\n", - "0 02Q Titan Airways\n", - "1 04Q Tradewind Aviation\n", - "2 05Q Comlux Aviation, AG\n", - "3 06Q Master Top Linhas Aereas Ltd.\n", - "4 07Q Flair Airlines Ltd." - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.read_csv('flight_data/Airlines.csv')\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>FlightDate</th>\n", - " <th>Airline</th>\n", - " <th>Origin</th>\n", - " <th>Dest</th>\n", - " <th>Cancelled</th>\n", - " <th>Diverted</th>\n", - " <th>CRSDepTime</th>\n", - " <th>DepTime</th>\n", - " <th>DepDelayMinutes</th>\n", - " <th>DepDelay</th>\n", - " <th>...</th>\n", - " <th>WheelsOff</th>\n", - " <th>WheelsOn</th>\n", - " <th>TaxiIn</th>\n", - " <th>CRSArrTime</th>\n", - " <th>ArrDelay</th>\n", - " <th>ArrDel15</th>\n", - " <th>ArrivalDelayGroups</th>\n", - " <th>ArrTimeBlk</th>\n", - " <th>DistanceGroup</th>\n", - " <th>DivAirportLandings</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>2018-01-23</td>\n", - " <td>Endeavor Air Inc.</td>\n", - " <td>ABY</td>\n", - " <td>ATL</td>\n", - " <td>False</td>\n", - " <td>False</td>\n", - " <td>1202</td>\n", - " <td>1157.0</td>\n", - " <td>0.0</td>\n", - " <td>-5.0</td>\n", - " <td>...</td>\n", - " <td>1211.0</td>\n", - " <td>1249.0</td>\n", - " <td>7.0</td>\n", - " <td>1304</td>\n", - " <td>-8.0</td>\n", - " <td>0.0</td>\n", - " <td>-1.0</td>\n", - " <td>1300-1359</td>\n", - " <td>1</td>\n", - " <td>0.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>2018-01-24</td>\n", - " <td>Endeavor Air Inc.</td>\n", - " <td>ABY</td>\n", - " <td>ATL</td>\n", - " <td>False</td>\n", - " <td>False</td>\n", - " <td>1202</td>\n", - " <td>1157.0</td>\n", - " <td>0.0</td>\n", - " <td>-5.0</td>\n", - " <td>...</td>\n", - " <td>1210.0</td>\n", - " <td>1246.0</td>\n", - " <td>12.0</td>\n", - " <td>1304</td>\n", - " <td>-6.0</td>\n", - " <td>0.0</td>\n", - " <td>-1.0</td>\n", - " <td>1300-1359</td>\n", - " <td>1</td>\n", - " <td>0.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>2018-01-25</td>\n", - " <td>Endeavor Air Inc.</td>\n", - " <td>ABY</td>\n", - " <td>ATL</td>\n", - " <td>False</td>\n", - " <td>False</td>\n", - " <td>1202</td>\n", - " <td>1153.0</td>\n", - " <td>0.0</td>\n", - " <td>-9.0</td>\n", - " <td>...</td>\n", - " <td>1211.0</td>\n", - " <td>1251.0</td>\n", - " <td>11.0</td>\n", - " <td>1304</td>\n", - " <td>-2.0</td>\n", - " <td>0.0</td>\n", - " <td>-1.0</td>\n", - " <td>1300-1359</td>\n", - " <td>1</td>\n", - " <td>0.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>2018-01-26</td>\n", - " <td>Endeavor Air Inc.</td>\n", - " <td>ABY</td>\n", - " <td>ATL</td>\n", - " <td>False</td>\n", - " <td>False</td>\n", - " <td>1202</td>\n", - " <td>1150.0</td>\n", - " <td>0.0</td>\n", - " <td>-12.0</td>\n", - " <td>...</td>\n", - " <td>1207.0</td>\n", - " <td>1242.0</td>\n", - " <td>11.0</td>\n", - " <td>1304</td>\n", - " <td>-11.0</td>\n", - " <td>0.0</td>\n", - " <td>-1.0</td>\n", - " <td>1300-1359</td>\n", - " <td>1</td>\n", - " <td>0.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>2018-01-27</td>\n", - " <td>Endeavor Air Inc.</td>\n", - " <td>ABY</td>\n", - " <td>ATL</td>\n", - " <td>False</td>\n", - " <td>False</td>\n", - " <td>1400</td>\n", - " <td>1355.0</td>\n", - " <td>0.0</td>\n", - " <td>-5.0</td>\n", - " <td>...</td>\n", - " <td>1412.0</td>\n", - " <td>1448.0</td>\n", - " <td>11.0</td>\n", - " <td>1500</td>\n", - " <td>-1.0</td>\n", - " <td>0.0</td>\n", - " <td>-1.0</td>\n", - " <td>1500-1559</td>\n", - " <td>1</td>\n", - " <td>0.0</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>5 rows × 61 columns</p>\n", - "</div>" - ], - "text/plain": [ - " FlightDate Airline Origin Dest Cancelled Diverted CRSDepTime \\\n", - "0 2018-01-23 Endeavor Air Inc. ABY ATL False False 1202 \n", - "1 2018-01-24 Endeavor Air Inc. ABY ATL False False 1202 \n", - "2 2018-01-25 Endeavor Air Inc. ABY ATL False False 1202 \n", - "3 2018-01-26 Endeavor Air Inc. ABY ATL False False 1202 \n", - "4 2018-01-27 Endeavor Air Inc. ABY ATL False False 1400 \n", - "\n", - " DepTime DepDelayMinutes DepDelay ... WheelsOff WheelsOn TaxiIn \\\n", - "0 1157.0 0.0 -5.0 ... 1211.0 1249.0 7.0 \n", - "1 1157.0 0.0 -5.0 ... 1210.0 1246.0 12.0 \n", - "2 1153.0 0.0 -9.0 ... 1211.0 1251.0 11.0 \n", - "3 1150.0 0.0 -12.0 ... 1207.0 1242.0 11.0 \n", - "4 1355.0 0.0 -5.0 ... 1412.0 1448.0 11.0 \n", - "\n", - " CRSArrTime ArrDelay ArrDel15 ArrivalDelayGroups ArrTimeBlk \\\n", - "0 1304 -8.0 0.0 -1.0 1300-1359 \n", - "1 1304 -6.0 0.0 -1.0 1300-1359 \n", - "2 1304 -2.0 0.0 -1.0 1300-1359 \n", - "3 1304 -11.0 0.0 -1.0 1300-1359 \n", - "4 1500 -1.0 0.0 -1.0 1500-1559 \n", - "\n", - " DistanceGroup DivAirportLandings \n", - "0 1 0.0 \n", - "1 1 0.0 \n", - "2 1 0.0 \n", - "3 1 0.0 \n", - "4 1 0.0 \n", - "\n", - "[5 rows x 61 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details." + ] } ], "source": [ @@ -309,408 +34,104 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Filter the dataframe to include only the delays from ATL\n", "import RegressionModel\n", "\n", - "filtered_df = cf_2018[(cf_2018['Origin'] == 'ATL')]\n", + "filtered_df = cf_2018[(cf_2018['Origin'] == 'JFK')]\n", "\n", "RegressionModel.destinations = list(cf_2018['DestCityName'].unique())" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['FlightDate', 'Airline', 'Origin', 'Dest', 'Cancelled', 'Diverted',\n", - " 'CRSDepTime', 'DepTime', 'DepDelayMinutes', 'DepDelay', 'ArrTime',\n", - " 'ArrDelayMinutes', 'AirTime', 'CRSElapsedTime', 'ActualElapsedTime',\n", - " 'Distance', 'Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek',\n", - " 'Marketing_Airline_Network', 'Operated_or_Branded_Code_Share_Partners',\n", - " 'DOT_ID_Marketing_Airline', 'IATA_Code_Marketing_Airline',\n", - " 'Flight_Number_Marketing_Airline', 'Operating_Airline',\n", - " 'DOT_ID_Operating_Airline', 'IATA_Code_Operating_Airline',\n", - " 'Tail_Number', 'Flight_Number_Operating_Airline', 'OriginAirportID',\n", - " 'OriginAirportSeqID', 'OriginCityMarketID', 'OriginCityName',\n", - " 'OriginState', 'OriginStateFips', 'OriginStateName', 'OriginWac',\n", - " 'DestAirportID', 'DestAirportSeqID', 'DestCityMarketID', 'DestCityName',\n", - " 'DestState', 'DestStateFips', 'DestStateName', 'DestWac', 'DepDel15',\n", - " 'DepartureDelayGroups', 'DepTimeBlk', 'TaxiOut', 'WheelsOff',\n", - " 'WheelsOn', 'TaxiIn', 'CRSArrTime', 'ArrDelay', 'ArrDel15',\n", - " 'ArrivalDelayGroups', 'ArrTimeBlk', 'DistanceGroup',\n", - " 'DivAirportLandings'],\n", - " dtype='object')" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "# list of columns in cf_2018\n", - "cf_2018.columns" + "relevant_columns = ['FlightDate', 'Airline', 'Dest', 'DepDelayMinutes', 'ArrDelayMinutes']\n", + "jfk_flights_2018 = filtered_df[relevant_columns]" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "ny_flights_2018 = cf_2018[(cf_2018['Origin'] == 'JFK') | (cf_2018['Origin'] == 'LGA') | (cf_2018['Origin'] == 'EWR')]" + "jfk_flights_2018" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>FlightDate</th>\n", - " <th>Airline</th>\n", - " <th>Origin</th>\n", - " <th>Dest</th>\n", - " <th>Cancelled</th>\n", - " <th>Diverted</th>\n", - " <th>CRSDepTime</th>\n", - " <th>DepTime</th>\n", - " <th>DepDelayMinutes</th>\n", - " <th>DepDelay</th>\n", - " <th>...</th>\n", - " <th>WheelsOff</th>\n", - " <th>WheelsOn</th>\n", - " <th>TaxiIn</th>\n", - " <th>CRSArrTime</th>\n", - " <th>ArrDelay</th>\n", - " <th>ArrDel15</th>\n", - " <th>ArrivalDelayGroups</th>\n", - " <th>ArrTimeBlk</th>\n", - " <th>DistanceGroup</th>\n", - " <th>DivAirportLandings</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>95</th>\n", - " <td>2018-01-06</td>\n", - " <td>Endeavor Air Inc.</td>\n", - " <td>LGA</td>\n", - " <td>SYR</td>\n", - " <td>False</td>\n", - " <td>False</td>\n", - " <td>1810</td>\n", - " <td>1805.0</td>\n", - " <td>0.0</td>\n", - " <td>-5.0</td>\n", - " <td>...</td>\n", - " <td>1825.0</td>\n", - " <td>1907.0</td>\n", - " <td>6.0</td>\n", - " <td>1928</td>\n", - " <td>-15.0</td>\n", - " <td>0.0</td>\n", - " <td>-1.0</td>\n", - " <td>1900-1959</td>\n", - " <td>1</td>\n", - " <td>0.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>96</th>\n", - " <td>2018-01-13</td>\n", - " <td>Endeavor Air Inc.</td>\n", - " <td>LGA</td>\n", - " <td>SYR</td>\n", - " <td>False</td>\n", - " <td>False</td>\n", - " <td>1810</td>\n", - " <td>1800.0</td>\n", - " <td>0.0</td>\n", - " <td>-10.0</td>\n", - " <td>...</td>\n", - " <td>1820.0</td>\n", - " <td>1901.0</td>\n", - " <td>6.0</td>\n", - " <td>1929</td>\n", - " <td>-22.0</td>\n", - " <td>0.0</td>\n", - " <td>-2.0</td>\n", - " <td>1900-1959</td>\n", - " <td>1</td>\n", - " <td>0.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>97</th>\n", - " <td>2018-01-14</td>\n", - " <td>Endeavor Air Inc.</td>\n", - " <td>LGA</td>\n", - " <td>SYR</td>\n", - " <td>False</td>\n", - " <td>False</td>\n", - " <td>1810</td>\n", - " <td>1803.0</td>\n", - " <td>0.0</td>\n", - " <td>-7.0</td>\n", - " <td>...</td>\n", - " <td>1832.0</td>\n", - " <td>1913.0</td>\n", - " <td>4.0</td>\n", - " <td>1931</td>\n", - " <td>-14.0</td>\n", - " <td>0.0</td>\n", - " <td>-1.0</td>\n", - " <td>1900-1959</td>\n", - " <td>1</td>\n", - " <td>0.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>98</th>\n", - " <td>2018-01-20</td>\n", - " <td>Endeavor Air Inc.</td>\n", - " <td>LGA</td>\n", - " <td>SYR</td>\n", - " <td>False</td>\n", - " <td>False</td>\n", - " <td>1810</td>\n", - " <td>1758.0</td>\n", - " <td>0.0</td>\n", - " <td>-12.0</td>\n", - " <td>...</td>\n", - " <td>1822.0</td>\n", - " <td>1857.0</td>\n", - " <td>6.0</td>\n", - " <td>1928</td>\n", - " <td>-25.0</td>\n", - " <td>0.0</td>\n", - " <td>-2.0</td>\n", - " <td>1900-1959</td>\n", - " <td>1</td>\n", - " <td>0.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>99</th>\n", - " <td>2018-01-27</td>\n", - " <td>Endeavor Air Inc.</td>\n", - " <td>LGA</td>\n", - " <td>SYR</td>\n", - " <td>False</td>\n", - " <td>False</td>\n", - " <td>1810</td>\n", - " <td>1758.0</td>\n", - " <td>0.0</td>\n", - " <td>-12.0</td>\n", - " <td>...</td>\n", - " <td>1822.0</td>\n", - " <td>1858.0</td>\n", - " <td>5.0</td>\n", - " <td>1929</td>\n", - " <td>-26.0</td>\n", - " <td>0.0</td>\n", - " <td>-2.0</td>\n", - " <td>1900-1959</td>\n", - " <td>1</td>\n", - " <td>0.0</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>5 rows × 61 columns</p>\n", - "</div>" - ], - "text/plain": [ - " FlightDate Airline Origin Dest Cancelled Diverted \\\n", - "95 2018-01-06 Endeavor Air Inc. LGA SYR False False \n", - "96 2018-01-13 Endeavor Air Inc. LGA SYR False False \n", - "97 2018-01-14 Endeavor Air Inc. LGA SYR False False \n", - "98 2018-01-20 Endeavor Air Inc. LGA SYR False False \n", - "99 2018-01-27 Endeavor Air Inc. LGA SYR False False \n", - "\n", - " CRSDepTime DepTime DepDelayMinutes DepDelay ... WheelsOff WheelsOn \\\n", - "95 1810 1805.0 0.0 -5.0 ... 1825.0 1907.0 \n", - "96 1810 1800.0 0.0 -10.0 ... 1820.0 1901.0 \n", - "97 1810 1803.0 0.0 -7.0 ... 1832.0 1913.0 \n", - "98 1810 1758.0 0.0 -12.0 ... 1822.0 1857.0 \n", - "99 1810 1758.0 0.0 -12.0 ... 1822.0 1858.0 \n", - "\n", - " TaxiIn CRSArrTime ArrDelay ArrDel15 ArrivalDelayGroups ArrTimeBlk \\\n", - "95 6.0 1928 -15.0 0.0 -1.0 1900-1959 \n", - "96 6.0 1929 -22.0 0.0 -2.0 1900-1959 \n", - "97 4.0 1931 -14.0 0.0 -1.0 1900-1959 \n", - "98 6.0 1928 -25.0 0.0 -2.0 1900-1959 \n", - "99 5.0 1929 -26.0 0.0 -2.0 1900-1959 \n", - "\n", - " DistanceGroup DivAirportLandings \n", - "95 1 0.0 \n", - "96 1 0.0 \n", - "97 1 0.0 \n", - "98 1 0.0 \n", - "99 1 0.0 \n", - "\n", - "[5 rows x 61 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "ny_flights_2018.head()" + "# Convert 'FlightDate' into numerical components\n", + "jfk_flights_2018['FlightDate'] = pd.to_datetime(jfk_flights_2018['FlightDate'])\n", + "jfk_flights_2018['Year'] = jfk_flights_2018['FlightDate'].dt.year\n", + "jfk_flights_2018['Month'] = jfk_flights_2018['FlightDate'].dt.month\n", + "jfk_flights_2018['Day'] = jfk_flights_2018['FlightDate'].dt.day\n", + "\n", + "# One-hot encoding for categorical variables\n", + "df_jfk_encoded = pd.get_dummies(jfk_flights_2018, columns=['Airline', 'Dest'])\n" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['Atlanta, GA', 'Albany, GA', 'Mobile, AL', 'Evansville, IN',\n", - " 'Detroit, MI', 'Jacksonville/Camp Lejeune, NC', 'Boston, MA',\n", - " 'Syracuse, NY', 'New Bern/Morehead/Beaufort, NC', 'Montgomery, AL',\n", - " 'New York, NY', 'Minneapolis, MN', 'Cleveland, OH',\n", - " 'Charlotte, NC', 'Fayetteville, NC', 'St. Louis, MO',\n", - " 'Columbus, OH', 'Indianapolis, IN', 'Knoxville, TN', 'Mosinee, WI',\n", - " 'Alexandria, LA', 'Killeen, TX', 'San Antonio, TX', 'Peoria, IL',\n", - " 'Houston, TX', 'Dallas/Fort Worth, TX', 'Richmond, VA',\n", - " 'Portland, ME', 'Kalamazoo, MI', 'Cincinnati, OH',\n", - " 'Bristol/Johnson City/Kingsport, TN', 'Washington, DC',\n", - " 'Asheville, NC', 'Baton Rouge, LA', 'Buffalo, NY',\n", - " 'Grand Rapids, MI', 'Bismarck/Mandan, ND', 'Chattanooga, TN',\n", - " 'Traverse City, MI', 'Fort Smith, AR', 'Burlington, VT',\n", - " 'Kansas City, MO', 'Green Bay, WI', 'Tulsa, OK', 'Valdosta, GA',\n", - " 'Charleston/Dunbar, WV', 'Fayetteville, AR', 'Rochester, NY',\n", - " 'Savannah, GA', 'Roanoke, VA', 'Harrisburg, PA', 'New Orleans, LA',\n", - " 'Nashville, TN', 'Raleigh/Durham, NC', 'Milwaukee, WI',\n", - " 'Gainesville, FL', 'Tampa, FL', 'Greensboro/High Point, NC',\n", - " 'Charlottesville, VA', 'Bangor, ME', 'Grand Forks, ND',\n", - " 'Brunswick, GA', 'Memphis, TN', 'Wilmington, NC',\n", - " 'Bloomington/Normal, IL', 'Norfolk, VA', 'Manchester, NH',\n", - " 'Dayton, OH', 'Greer, SC', 'Lexington, KY', 'Louisville, KY',\n", - " 'Hartford, CT', 'Saginaw/Bay City/Midland, MI', 'Minot, ND',\n", - " 'Columbia, SC', 'Columbus, GA', 'Des Moines, IA', 'Dothan, AL',\n", - " 'Elmira/Corning, NY', 'Pittsburgh, PA', 'Jacksonville, FL',\n", - " 'Chicago, IL', 'Springfield, MO', 'Shreveport, LA',\n", - " 'Huntsville, AL', 'Omaha, NE', 'Tallahassee, FL', 'Madison, WI',\n", - " 'Lansing, MI', 'Baltimore, MD', 'Charleston, SC',\n", - " 'Little Rock, AR', 'Fort Myers, FL', 'Orlando, FL', 'Fargo, ND',\n", - " 'Appleton, WI', 'Akron, OH', 'Harlingen/San Benito, TX',\n", - " 'White Plains, NY', 'Philadelphia, PA', 'Moline, IL',\n", - " 'Aguadilla, PR', 'San Francisco, CA', 'Newark, NJ',\n", - " 'Fort Lauderdale, FL', 'Salt Lake City, UT', 'Ponce, PR',\n", - " 'San Jose, CA', 'Long Beach, CA', 'Las Vegas, NV',\n", - " 'Los Angeles, CA', 'West Palm Beach/Palm Beach, FL',\n", - " 'San Juan, PR', 'Sarasota/Bradenton, FL', 'Austin, TX',\n", - " 'Newburgh/Poughkeepsie, NY', 'Denver, CO', 'Seattle, WA',\n", - " 'Providence, RI', 'Sacramento, CA', 'Charlotte Amalie, VI',\n", - " 'Portland, OR', 'Worcester, MA', 'San Diego, CA', 'Reno, NV',\n", - " 'Albany, NY', 'Phoenix, AZ', 'Oakland, CA', 'Palm Springs, CA',\n", - " 'Christiansted, VI', 'Burbank, CA', 'Daytona Beach, FL',\n", - " 'Albuquerque, NM', 'Brownsville, TX', 'El Paso, TX',\n", - " 'Oklahoma City, OK', 'Gulfport/Biloxi, MS', 'South Bend, IN',\n", - " 'Mission/McAllen/Edinburg, TX', 'Lake Charles, LA',\n", - " 'Lafayette, LA', 'Myrtle Beach, SC', 'Rochester, MN',\n", - " 'Lincoln, NE', 'Birmingham, AL', 'Augusta, GA',\n", - " 'Jackson/Vicksburg, MS', 'Cedar Rapids/Iowa City, IA',\n", - " 'Panama City, FL', 'Valparaiso, FL', 'Key West, FL',\n", - " 'Allentown/Bethlehem/Easton, PA', 'Sanford, FL', 'Billings, MT',\n", - " 'Flint, MI', 'Missoula, MT', 'Idaho Falls, ID', 'Stockton, CA',\n", - " 'Toledo, OH', 'Sioux Falls, SD', 'Ashland, WV', 'Fresno, CA',\n", - " 'Eugene, OR', 'Bellingham, WA', 'Punta Gorda, FL', 'St. Cloud, MN',\n", - " 'St. Petersburg, FL', 'Plattsburgh, NY', 'Ogdensburg, NY',\n", - " 'Concord, NC', 'Springfield, IL', 'Provo, UT',\n", - " 'Montrose/Delta, CO', 'Fort Wayne, IN', 'Portsmouth, NH',\n", - " 'Trenton, NJ', 'Niagara Falls, NY', 'Belleville, IL',\n", - " 'Rockford, IL', 'Ogden, UT', 'Boise, ID', 'Medford, OR',\n", - " 'Great Falls, MT', 'Pasco/Kennewick/Richland, WA', 'Wichita, KS',\n", - " 'Grand Island, NE', 'Monterey, CA', 'Grand Junction, CO',\n", - " 'Laredo, TX', 'Rapid City, SD', 'Kalispell, MT', 'Santa Maria, CA',\n", - " 'Bozeman, MT', 'Hagerstown, MD', 'Clarksburg/Fairmont, WV',\n", - " 'Colorado Springs, CO', 'Owensboro, KY', 'Honolulu, HI',\n", - " 'Kahului, HI', 'Kona, HI', 'Lihue, HI', 'Hilo, HI', 'Columbia, MO',\n", - " 'Casper, WY', 'Scranton/Wilkes-Barre, PA', 'Tucson, AZ',\n", - " 'Bend/Redmond, OR', 'Santa Barbara, CA', 'Aspen, CO',\n", - " 'Ontario, CA', 'Helena, MT', 'Sun Valley/Hailey/Ketchum, ID',\n", - " 'Durango, CO', 'Williston, ND', 'Arcata/Eureka, CA',\n", - " 'San Luis Obispo, CA', 'Mammoth Lakes, CA', 'Spokane, WA',\n", - " 'Jackson, WY', 'Santa Rosa, CA', 'Santa Ana, CA', 'Redding, CA',\n", - " 'Bakersfield, CA', 'Hayden, CO', 'Midland/Odessa, TX',\n", - " 'Ithaca/Cortland, NY', 'Lewiston, ID', 'Pocatello, ID',\n", - " 'Aberdeen, SD', 'Cody, WY', 'Alpena, MI', 'Escanaba, MI',\n", - " 'Pellston, MI', 'Bemidji, MN', 'Brainerd, MN', 'Butte, MT',\n", - " 'Cedar City, UT', 'St. George, UT', 'Sault Ste. Marie, MI',\n", - " 'Marquette, MI', 'Elko, NV', 'Hibbing, MN', 'Binghamton, NY',\n", - " 'Twin Falls, ID', 'Rhinelander, WI', 'International Falls, MN',\n", - " 'Iron Mountain/Kingsfd, MI', 'Dallas, TX', 'Columbus, MS',\n", - " 'Monroe, LA', 'State College, PA', 'Erie, PA', 'La Crosse, WI',\n", - " 'Duluth, MN', 'Muskegon, MI', 'Pueblo, CO', 'Hancock/Houghton, MI',\n", - " 'Paducah, KY', 'Quincy, IL', 'Rock Springs, WY', 'Jamestown, ND',\n", - " 'Devils Lake, ND', 'Laramie, WY', 'Gillette, WY', 'Eau Claire, WI',\n", - " 'Hays, KS', 'Eagle, CO', 'Cape Girardeau, MO',\n", - " 'North Bend/Coos Bay, OR', 'Amarillo, TX', 'Miami, FL',\n", - " 'Islip, NY', 'Pago Pago, TT', 'Hoolehua, HI', 'Lanai, HI',\n", - " 'Kapalua, HI', 'Atlantic City, NJ', 'Latrobe, PA', 'Lubbock, TX',\n", - " 'Pensacola, FL', 'Corpus Christi, TX', 'Melbourne, FL',\n", - " 'Fairbanks, AK', 'Anchorage, AK', 'Newport News/Williamsburg, VA',\n", - " 'Guam, TT', 'Gunnison, CO', 'Bethel, AK', 'Kodiak, AK',\n", - " 'Deadhorse, AK', 'Barrow, AK', 'Ketchikan, AK', 'Juneau, AK',\n", - " 'Sitka, AK', 'Petersburg, AK', 'Wrangell, AK', 'Nome, AK',\n", - " 'Kotzebue, AK', 'Yakutat, AK', 'Cordova, AK', 'Adak Island, AK',\n", - " 'Yakima, WA', 'Santa Fe, NM', 'Champaign/Urbana, IL',\n", - " 'Dickinson, ND', 'Saipan, TT', 'Rota, TT', 'Walla Walla, WA',\n", - " 'Wenatchee, WA', 'Pullman, WA', 'College Station/Bryan, TX',\n", - " 'Hobbs, NM', 'Youngstown/Warren, OH', 'Unalaska, AK',\n", - " 'Scottsbluff, NE', 'Nantucket, MA', \"Martha's Vineyard, MA\",\n", - " 'Branson, MO', 'Tyler, TX', 'San Angelo, TX', 'Wichita Falls, TX',\n", - " 'Beaumont/Port Arthur, TX', 'Lawton/Fort Sill, OK', 'Waco, TX',\n", - " 'Flagstaff, AZ', 'Yuma, AZ', 'Meridian, MS',\n", - " 'Hattiesburg/Laurel, MS', 'Roswell, NM', 'Kearney, NE', 'Moab, UT',\n", - " 'Vernal, UT', 'Lewisburg, WV', 'Staunton, VA', 'Salina, KS',\n", - " 'Liberal, KS', 'North Platte, NE', 'Prescott, AZ', 'Abilene, TX',\n", - " 'Manhattan/Ft. Riley, KS', 'Texarkana, AR', 'Lynchburg, VA',\n", - " 'Greenville, NC', 'New Haven, CT', 'Presque Isle/Houlton, ME',\n", - " 'Hilton Head, SC', 'Longview, TX', 'Waterloo, IA',\n", - " 'Sioux City, IA', 'Dubuque, IA', 'Garden City, KS',\n", - " 'Salisbury, MD', 'Williamsport, PA', 'Florence, SC',\n", - " 'Stillwater, OK', 'Joplin, MO', 'Watertown, NY', 'Cheyenne, WY',\n", - " 'Del Rio, TX', 'Hyannis, MA', 'Dillingham, AK', 'King Salmon, AK',\n", - " 'Gustavus, AK', 'West Yellowstone, MT'], dtype=object)" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "cf_2018['DestCityName'].unique()" + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Choose your target variable, e.g., 'DepDelayMinutes'\n", + "X = df_jfk_encoded.drop('DepDelayMinutes', axis=1)\n", + "y = df_jfk_encoded['DepDelayMinutes']\n", + "\n", + "# Splitting the dataset into training and testing sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "X_train = X_train.apply(pd.to_numeric, errors='coerce')\n", + "y_train = pd.to_numeric(y_train, errors='coerce')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "\n", + "# Initialize the model\n", + "model = LinearRegression()\n", + "\n", + "# Train the model\n", + "model.fit(X_train, y_train)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import mean_squared_error, r2_score\n", + "\n", + "# Predict on the test set\n", + "y_pred = model.predict(X_test)\n", + "\n", + "# Evaluate the model\n", + "mse = mean_squared_error(y_test, y_pred)\n", + "r2 = r2_score(y_test, y_pred)\n", + "print(\"Mean Squared Error:\", mse)\n", + "print(\"R^2 Score:\", r2)\n", + "\n" ] }, {