{ "cells": [ { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import sklearn as sk\n", "import matplotlib.pyplot as plt\n", "import json\n", "import math" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "ename": "", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details." ] } ], "source": [ "cf_2018 = pd.read_csv('flight_data/Combined_Flights_2018.csv')\n", "cf_2018.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Filter the dataframe to include only the delays from ATL\n", "import RegressionModel\n", "\n", "filtered_df = cf_2018[(cf_2018['Origin'] == 'JFK')]\n", "\n", "RegressionModel.destinations = list(cf_2018['DestCityName'].unique())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "relevant_columns = ['FlightDate', 'Airline', 'Dest', 'DepDelayMinutes', 'ArrDelayMinutes']\n", "jfk_flights_2018 = filtered_df[relevant_columns]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "jfk_flights_2018" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Convert 'FlightDate' into numerical components\n", "jfk_flights_2018['FlightDate'] = pd.to_datetime(jfk_flights_2018['FlightDate'])\n", "jfk_flights_2018['Year'] = jfk_flights_2018['FlightDate'].dt.year\n", "jfk_flights_2018['Month'] = jfk_flights_2018['FlightDate'].dt.month\n", "jfk_flights_2018['Day'] = jfk_flights_2018['FlightDate'].dt.day\n", "\n", "# One-hot encoding for categorical variables\n", "df_jfk_encoded = pd.get_dummies(jfk_flights_2018, columns=['Airline', 'Dest'])\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "# Choose your target variable, e.g., 'DepDelayMinutes'\n", "X = df_jfk_encoded.drop('DepDelayMinutes', axis=1)\n", "y = df_jfk_encoded['DepDelayMinutes']\n", "\n", "# Splitting the dataset into training and testing sets\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", "X_train = X_train.apply(pd.to_numeric, errors='coerce')\n", "y_train = pd.to_numeric(y_train, errors='coerce')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LinearRegression\n", "\n", "# Initialize the model\n", "model = LinearRegression()\n", "\n", "# Train the model\n", "model.fit(X_train, y_train)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import mean_squared_error, r2_score\n", "\n", "# Predict on the test set\n", "y_pred = model.predict(X_test)\n", "\n", "# Evaluate the model\n", "mse = mean_squared_error(y_test, y_pred)\n", "r2 = r2_score(y_test, y_pred)\n", "print(\"Mean Squared Error:\", mse)\n", "print(\"R^2 Score:\", r2)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "sas2", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 2 }