Skip to content
Snippets Groups Projects
Commit 8863e134 authored by Fadi Gattoussi's avatar Fadi Gattoussi Committed by Michael Mutote
Browse files

Add data for 2019

parent f2732caa
No related branches found
No related tags found
No related merge requests found
angie.pkl 0 → 100644
File added
File added
This diff is collapsed.
%% Cell type:code id: tags:
``` python
import pandas as pd
import sklearn as sk
import matplotlib.pyplot as plt
import json
import math
```
%% Cell type:code id: tags:
``` python
cf_2018 = pd.read_csv('flight_data/Combined_Flights_2018.csv')
# cf_2019 = pd.read_csv('flight_data/Combined_Flights_2019.csv')
# combined_data = pd.concat([cf_2018, cf_2019])
# cf_2018 = combined_data
```
%% Cell type:code id: tags:
``` python
# Filter the dataframe to include only the delays from JFK
import RegressionModel
filtered_df = cf_2018[(cf_2018['Origin'] == 'JFK')].copy()
RegressionModel.destinations = list(cf_2018['DestCityName'].unique())
# filtered_df
```
%% Cell type:code id: tags:
``` python
relevant_columns = ['FlightDate', 'AirTime', 'DOT_ID_Operating_Airline', 'DestAirportID','DepDelayMinutes', 'ArrDelayMinutes']
jfk_flights_2018 = filtered_df[relevant_columns].copy()
jfk_flights_2018.dropna(inplace=True)
```
%% Cell type:code id: tags:
``` python
# jfk_flights_2018
```
%% Cell type:code id: tags:
``` python
# Convert 'FlightDate' into numerical components
jfk_flights_2018['FlightDate'] = pd.to_datetime(jfk_flights_2018['FlightDate'])
# jfk_flights_2018['Year'] = jfk_flights_2018['FlightDate'].dt.year
jfk_flights_2018['Month'] = jfk_flights_2018['FlightDate'].dt.month
jfk_flights_2018['Day'] = jfk_flights_2018['FlightDate'].dt.day
jfk_flights_2018 = jfk_flights_2018.dropna()
```
%% Cell type:code id: tags:
``` python
from sklearn.model_selection import train_test_split
# Choose your target variable, e.g., 'DepDelayMinutes'
X = jfk_flights_2018.drop('DepDelayMinutes', axis=1)
X = X.drop(['ArrDelayMinutes'], axis=1)
X = X.drop(["FlightDate"], axis=1)
# y = jfk_flights_2018[['DepDelayMinutes', 'ArrDelayMinutes']]
y = jfk_flights_2018['ArrDelayMinutes']
X.head()
```
%% Output
AirTime DOT_ID_Operating_Airline DestAirportID Month
5544 336.0 20409 14831 1
5547 182.0 20409 13495 1
5548 124.0 20409 12451 1
5554 50.0 20409 14576 1
5565 54.0 20409 10792 1
%% Cell type:code id: tags:
``` python
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = X_train.apply(pd.to_numeric, errors='coerce')
y_train = y_train.apply(pd.to_numeric, errors='coerce')
X_test = X_test.apply(pd.to_numeric, errors='coerce')
y_test = y_test.apply(pd.to_numeric, errors='coerce')
```
%% Cell type:code id: tags:
``` python
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
# Initialize the model
model = LinearRegression()
model2 = RandomForestRegressor(n_estimators=100,random_state=42)
# Train the model
model.fit(X_train, y_train)
model2.fit(X_train, y_train)
```
%% Output
RandomForestRegressor(random_state=42)
%% Cell type:code id: tags:
``` python
from sklearn.metrics import mean_squared_error, r2_score
# Predict on the test set
y_pred = model.predict(X_test)
y_pred2 = model2.predict(X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mse2 = mean_squared_error(y_test, y_pred2)
r2 = r2_score(y_test, y_pred)
r22 = r2_score(y_test, y_pred2)
print("Mean Squared Error:", mse)
print("MSE 2 ", mse2)
print("R^2 Score:", r2)
print("R^2 Score:", r22)
```
%% Output
Mean Squared Error: 2100.674655688025
MSE 2 2540.4206764113023
R^2 Score: 0.0025435901788870563
R^2 Score: -0.20625956069270712
%% Cell type:code id: tags:
``` python
from joblib import dump
dump(model, 'C:/Users/s2080/PycharmProjects/ws-23-sas-02/fadi.joblib')
```
%% Output
['C:/Users/s2080/PycharmProjects/ws-23-sas-02/fadi.joblib']
%% Cell type:code id: tags:
``` python
from joblib import load
model = load('fadi.joblib')
```
%% Cell type:code id: tags:
``` python
jfk_flights_2018.to_pickle('C:/Users/s2080/PycharmProjects/ws-23-sas-02/angie.pkl')
```
%% Cell type:code id: tags:
``` python
jfk_flights_2018 = pd.read_pickle("angie.pkl")
```
%% Cell type:code id: tags:
``` python
# jfk_unpickle.head()
```
%% Cell type:code id: tags:
``` python
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment