diff --git a/pricepredict.py b/pricepredict.py new file mode 100644 index 0000000000000000000000000000000000000000..84036d3533c13597786bc570dca1cf560647a9db --- /dev/null +++ b/pricepredict.py @@ -0,0 +1,515 @@ +import sys +import numpy as np +from PyQt6.QtWidgets import ( + QApplication, + QMainWindow, + QLabel, + QVBoxLayout, + QPushButton, + QFileDialog, + QWidget, + QFormLayout, + QLineEdit, + QComboBox, + QTextBrowser, + QSlider, + QCheckBox, +) +from PyQt6.QtCore import Qt +from PyQt6.QtGui import QAction, QKeySequence +import pandas as pd +import seaborn as sns +from sklearn.model_selection import train_test_split +from sklearn.ensemble import RandomForestRegressor +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error +import matplotlib.pyplot as plt +from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas +import timeit + +start_time = timeit.default_timer() + + +class LaptopPricePredictionApp(QMainWindow): + def __init__(self): + super().__init__() + + self.setWindowTitle("Laptop Recommendation App") + self.setGeometry(100, 100, 800, 600) + + self.central_widget = QWidget(self) + self.setCentralWidget(self.central_widget) + + self.layout = QVBoxLayout(self.central_widget) + + # Add a toolbar + self.toolbar = self.addToolBar("Toolbar") + + # Create a File menu and add actions + file_menu = self.menuBar().addMenu("File") + + import_action = QAction("Import Data", self) + import_action.triggered.connect(self.import_data) + import_action.setShortcut(QKeySequence.StandardKey.Open) # Use Open shortcut + file_menu.addAction(import_action) + + show_info_action = QAction("Show Data Info", self) + show_info_action.triggered.connect(self.show_data_info) + file_menu.addAction(show_info_action) + + # Feature input layout + self.feature_layout = QFormLayout() + + self.Company_input = QLineEdit(self) + self.feature_layout.addRow("Company:", self.Company_input) + + # inches + self.screen_inches_slider = QSlider() + self.screen_inches_slider.setOrientation(Qt.Orientation.Horizontal) + + # Set the range and single step for non-integer values + self.screen_inches_slider.setRange( + 130, 180 + ) # Assuming values like 13.0 to 18.0 + + # Set the initial value + self.screen_inches_slider.setValue(150) # Set the initial value to 15.0 + + self.screen_inches_slider.valueChanged.connect(self.update_screen_inches_label) + + self.screen_inches_label = QLabel( + f"Screen Inches: {self.screen_inches_slider.value() / 10}", self + ) + self.feature_layout.addRow(self.screen_inches_label, self.screen_inches_slider) + + # Checkboxes + self.ipspanel_checkbox = QCheckBox("IPS Panel", self) + self.feature_layout.addRow(self.ipspanel_checkbox) + + self.retinadisplay_checkbox = QCheckBox("Retina Display", self) + self.feature_layout.addRow(self.retinadisplay_checkbox) + + # CPU Brand ComboBox + self.cpu_brand_combobox = QComboBox(self) + self.cpu_brand_combobox.addItems(["", "Intel", "AMD"]) + self.feature_layout.addRow("CPU Brand:", self.cpu_brand_combobox) + + # CPU Speed Slider + self.cpu_speed_slider = QSlider() + self.cpu_speed_slider.setOrientation(Qt.Orientation.Horizontal) + self.cpu_speed_slider.setRange(10, 30) + self.cpu_speed_slider.setValue(15) + self.cpu_speed_slider.valueChanged.connect(self.update_cpu_speed_label) + + self.cpu_speed_label = QLabel( + f"CPU Speed: {self.cpu_speed_slider.value() / 10}", self + ) + self.feature_layout.addRow(self.cpu_speed_label, self.cpu_speed_slider) + + # Ram Size ComboBox + self.ram_size_combobox = QComboBox(self) + ram_sizes = ["4GB", "8GB", "16GB", "32GB", "64GB"] + self.ram_size_combobox.addItems(ram_sizes) + self.feature_layout.addRow("Ram Size:", self.ram_size_combobox) + + # Storage checkboxes + self.hdd_checkbox = QCheckBox("HDD", self) + self.feature_layout.addRow(self.hdd_checkbox) + + self.ssd_checkbox = QCheckBox("SSD", self) + self.ssd_checkbox.stateChanged.connect(self.ssd_checkbox_state_changed) + self.feature_layout.addRow(self.ssd_checkbox) + + # Storage Size ComboBox + self.ssd_size_combobox = QComboBox(self) + ssd_sizes = ["128GB", "256GB", "512GB", "1TB", "2TB"] + self.ssd_size_combobox.addItems(ssd_sizes) + self.ssd_size_combobox.setEnabled(True) + self.feature_layout.addRow("Storage Size:", self.ssd_size_combobox) + + # GPU Brand ComboBox + self.gpu_brand_combobox = QComboBox(self) + self.gpu_brand_combobox.addItems(["", "Nvidia", "AMD", "Intel"]) + self.feature_layout.addRow("GPU Brand:", self.gpu_brand_combobox) + + # OpSys ComboBox + self.OpSys_combobox = QComboBox(self) + OpSys_options = ["windows", "macos", "linux"] + self.OpSys_combobox.addItems(OpSys_options) + self.feature_layout.addRow("Operating System:", self.OpSys_combobox) + + # Laptop Weight Slider + self.weight_slider = QSlider() + self.weight_slider.setOrientation(Qt.Orientation.Horizontal) + self.weight_slider.setRange(1, 50) + self.weight_slider.setValue(25) + self.weight_slider.valueChanged.connect(self.update_weight_label) + + self.weight_label = QLabel( + f"Laptop Weight: {self.weight_slider.value() / 10} kg", self + ) + self.feature_layout.addRow(self.weight_label, self.weight_slider) + + self.layout.addLayout(self.feature_layout) + + # Get Recommendation button + self.predict_button = QPushButton("Get Recommendation", self) + self.predict_button.clicked.connect(self.predict_recommendation) + self.layout.addWidget(self.predict_button) + + # Metrics label + self.metric_label = QLabel("Metrics:", self) + self.layout.addWidget(self.metric_label) + + # Text Browser + self.text_browser = QTextBrowser(self) + self.layout.addWidget(self.text_browser) + + # Recommendation Canvas + self.canvas = RecommendationCanvas(self) + self.layout.addWidget(self.canvas) + + # Data variables + self.df = None + self.model = None + self.X_train = None + self.y_train = None + self.scaler = None # Added scaler attribute + self.X_train_columns = None # Added X_train_columns attribute + + def update_cpu_speed_label(self, value): + cpu_speed_value = value / 10 + self.cpu_speed_label.setText(f"CPU Speed: {cpu_speed_value}") + + def update_screen_inches_label(self, value): + screen_inches_value = value / 10 + self.screen_inches_label.setText(f"Screen Inches: {screen_inches_value}") + + def update_weight_label(self, value): + weight_value = value / 10 + self.weight_label.setText(f"Laptop Weight: {weight_value} kg") + + def import_data(self): + file_dialog = QFileDialog() + file_path, _ = file_dialog.getOpenFileName( + self, "Open CSV File", "", "CSV Files (*.csv)" + ) + + if file_path: + self.df = pd.read_csv(file_path) + + # Convert DataFrame columns to NumPy arrays for numerical operations + numerical_data = self.df[["Inches", "cpu_speed", "Weight_kg"]] + + # Fit and save the scaler + self.scaler = StandardScaler() + self.scaler.fit(numerical_data) + + # self.scaler.fit(self.df[["Inches", "cpu_speed", "Weight_kg"]]) + + def preprocess_data(self): + if self.scaler is not None and self.df is not None: + # Check if categorical columns are present in the DataFrame + existing_categorical_columns = [ + col for col in self.df.columns if col in ["gpu_brand", "OpSys", "Ram"] + ] + + # One-hot encode existing categorical columns + self.df = pd.get_dummies( + self.df, + columns=existing_categorical_columns, + prefix=existing_categorical_columns, + ) + + # Scale numerical features using the saved scaler + numerical_columns = ["Inches", "cpu_speed", "Weight_kg"] + self.df[numerical_columns] = self.scaler.transform(self.df[numerical_columns]) + + def train_model(self): + if self.df is not None: + # Features and target variable + features = [ + "Inches", + "cpu_speed", + "gpu_brand_AMD", + "gpu_brand_Nvidia", + "gpu_brand_Intel", + "OpSys_windows", + "OpSys_macos", + "OpSys_linux", + "ipspanel", + "retinadisplay", + "hdd", + "ssd", + "Weight_kg", + "Ram_4", + "Ram_8", + "Ram_16", + "Ram_32", + "Ram_64", + ] + target = "Price" + + X = self.df[features].to_numpy() + y = self.df[target].to_numpy() + + self.X_train, _, self.y_train, _ = train_test_split( + X, y, test_size=0.2, random_state=42 + ) + + self.X_train_columns = self.df[features].columns.tolist() + + self.model = RandomForestRegressor( + n_estimators=100, # You can adjust this + max_depth=None, # You can adjust this + min_samples_split=2, # You can adjust this + min_samples_leaf=1, # You can adjust this + random_state=42, + ) + + # Fit the model + # self.model.fit(self.X_train, self.y_train) + self.model.fit( + pd.DataFrame(self.X_train, columns=self.X_train_columns), self.y_train + ) + + def predict_recommendation(self): + if self.df is not None: + self.preprocess_data() + self.train_model() + + Company = self.Company_input.text() + cpu_speed = ( + float(self.cpu_speed_slider.value()) / 10 + ) # Divide by 10 to match the slider scaling + screen_inches = ( + float(self.screen_inches_slider.value()) / 10 + ) # Divide by 10 for consistency + gpu_brand = self.gpu_brand_combobox.currentText() + ram_size_text = self.ram_size_combobox.currentText() + ram_size = int(ram_size_text.replace("GB", "")) + OpSys = self.OpSys_combobox.currentText() + weight = self.weight_slider.value() / 10 + + ipspanel = self.ipspanel_checkbox.isChecked() + retinadisplay = self.retinadisplay_checkbox.isChecked() + hdd = self.hdd_checkbox.isChecked() + ssd = self.ssd_checkbox.isChecked() + + # Features used during model training + features = [ + "Inches", + "cpu_speed", + "gpu_brand_AMD", + "gpu_brand_Nvidia", + "gpu_brand_Intel", + "OpSys_windows", + "OpSys_macos", + "OpSys_linux", + "ipspanel", + "retinadisplay", + "hdd", + "ssd", + "Weight_kg", + "Ram_4", + "Ram_8", + "Ram_16", + "Ram_32", + "Ram_64", + ] + + # Get the feature names from the original DataFrame + # model_feature_names = self.df[features].columns + + # Initialize input_data with user's specifications + input_data = pd.DataFrame( + { + "Inches": [screen_inches], + "cpu_speed": [cpu_speed], + "gpu_brand_AMD": [0], + "gpu_brand_Nvidia": [0], + "gpu_brand_Intel": [0], + "OpSys_windows": [0], + "OpSys_macos": [0], + "OpSys_linux": [0], + "ipspanel": [int(ipspanel)], + "retinadisplay": [int(retinadisplay)], + "hdd": [int(hdd)], + "ssd": [int(ssd)], + "Weight_kg": [weight], + "Ram_4": [0], + "Ram_8": [0], + "Ram_16": [0], + "Ram_32": [0], + "Ram_64": [0], + }, + columns=features, # Specify the columns explicitly + ) + + input_data[f"gpu_brand_{gpu_brand}"] = 1 + input_data[f"OpSys_{OpSys.lower()}"] = 1 + input_data[f"Ram_{ram_size}"] = 1 + + if ssd: + storage_size = self.ssd_size_combobox.currentText() + input_data[f"storage_size_{storage_size}"] = 1 + + # Predict using the input data + input_data = pd.DataFrame(input_data, columns=self.X_train_columns) + prediction = self.model.predict(input_data) + elapsed_time = timeit.default_timer() - start_time + # Find the closest match in the DataFrame + closest_match = self.find_closest_match(input_data, self.X_train_columns) + + if closest_match is not None: + self.metric_label.setText( + f"Predicted Price: ₹{prediction[0]:,.2f} | R-squared: {self.calculate_r_squared()} | MAE: {self.calculate_mae()} | MSE: {self.calculate_mse()} | RMSE: {self.calculate_rmse()} | Elapsed Time: {elapsed_time:.4f} seconds" + ) + self.canvas.plot_scatter(self.df, closest_match, self.model) + self.plot_histogram() + self.plot_pairplot() + else: + self.metric_label.setText("No matching specifications found.") + + # Add a helper method to find the closest match in the DataFrame + def find_closest_match(self, input_data, features): + input_df = pd.DataFrame(input_data, columns=features) + # training_feature_names = self.X_train_columns + + distances = np.sum( + ( + pd.DataFrame(self.df[self.X_train_columns], columns=features).values + - input_df.values + ) + ** 2, + axis=1, + ) + + closest_index = distances.argmin() + + closest_match = self.df.loc[closest_index, self.X_train_columns].to_frame().T + + return closest_match + + def calculate_mse(self): + y_test_pred = self.model.predict(self.X_train) + mse = np.mean((self.y_train - y_test_pred) ** 2) + return round(mse, 2) + + def calculate_rmse(self): + y_test_pred = self.model.predict(self.X_train) + rmse = np.sqrt(np.mean((self.y_train - y_test_pred) ** 2)) + return round(rmse, 2) + + def calculate_r_squared(self): + y_test_pred = self.model.predict(self.X_train) + r_squared = r2_score(self.y_train, y_test_pred) + return round(r_squared, 4) + + def calculate_mae(self): + y_test_pred = self.model.predict(self.X_train) + mae = np.mean(np.abs(self.y_train - y_test_pred)) + return round(mae, 2) + + def show_data_info(self): + if self.df is not None: + numeric_columns = self.df.select_dtypes(include=["number"]).columns + corr_matrix = self.df[numeric_columns].corr() + + info_text = f"Data Info:\n{self.df.info()}\n\nData Description:\n{self.df.describe()}\n\nCorrelation Matrix:\n{corr_matrix}" + self.text_browser.setPlainText(info_text) + + def plot_histogram(self): + plt.figure(figsize=(8, 6)) + sns.histplot(self.df["Price"], kde=True) + plt.title("Distribution of Laptop Prices") + plt.xlabel("Price") + plt.ylabel("Frequency") + plt.show() + + def plot_pairplot(self): + ram_columns = [f"Ram_{size}" for size in ["4", "8", "16", "32", "64"]] + features_to_plot = ( + ["Inches", "cpu_speed"] + ram_columns + ["Weight_kg", "Price"] + ) + sns.pairplot(self.df[features_to_plot]) + plt.suptitle("Pair Plot of Selected Features") + plt.show() + + def ssd_checkbox_state_changed(self, state): + # Always enable the storage size combo box + self.ssd_size_combobox.setDisabled(False) + + def hdd_checkbox_state_changed(self, state): + # Always enable the storage size combo box + self.ssd_size_combobox.setDisabled(False) + + +class RecommendationCanvas(FigureCanvas): + def __init__(self, parent=None, width=5, height=4, dpi=100): + self.fig, self.ax = plt.subplots(figsize=(width, height), dpi=dpi) + super().__init__(self.fig) + self.setParent(parent) + + self.scatter_training = None + self.scatter_recommendation = None + self.hist_plot = None + self.pair_plot = None + + def plot_scatter(self, df, input_data, model): + if self.scatter_training: + self.scatter_training.remove() + if self.scatter_recommendation: + self.scatter_recommendation.remove() + + # self.ax.clear() + + self.scatter_training = self.ax.scatter( + df["cpu_speed"].abs(), df["Price"], label="Training Data", color="blue" + ) + self.scatter_recommendation = self.ax.scatter( + input_data["cpu_speed"], + model.predict(input_data), + label="Recommended Laptop", + color="red", + ) + + self.ax.set_xlabel("CPU Speed") + self.ax.set_ylabel("Price") + self.ax.legend() + + self.draw() + + def plot_histogram(self, data): + # Clear existing histogram plot + if self.hist_plot: + self.hist_plot.remove() + + # Plot new histogram + self.hist_plot = sns.histplot(data, kde=True, ax=self.ax) + self.ax.set_title("Distribution of Laptop Prices") + self.ax.set_xlabel("Price") + self.ax.set_ylabel("Frequency") + + self.draw() + + def plot_pairplot(self, data): + # Clear existing pair plot + if self.pair_plot: + plt.close(self.pair_plot.fig) + + # Plot new pair plot + self.pair_plot = sns.pairplot(data) + self.pair_plot.fig.suptitle("Pair Plot of Selected Features") + + self.draw() + + +elapsed_time = timeit.default_timer() - start_time +print(f"Elapsed Time: {elapsed_time} seconds") + +if __name__ == "__main__": + app = QApplication(sys.argv) + main_window = LaptopPricePredictionApp() + main_window.show() + sys.exit(app.exec())