diff --git a/.DS_Store b/.DS_Store
index 7af4a461d14c25e88125e7b19c76e2734d9a2105..dfc5dbcfa01d087fb54b1a339b8f597409dba961 100644
Binary files a/.DS_Store and b/.DS_Store differ
diff --git a/README.md b/README.md
index 96acca8f9ef2ad6e52220584ef2187be35b803c3..0ac28333c50b16b0317b95744484b7a30e98a72b 100644
--- a/README.md
+++ b/README.md
@@ -2,9 +2,9 @@ Khan, Asif, 22300224
 
 Netflix Content Analysis
 
-MyGit Repositoty : https://mygit.th-deg.de/assistance_systems/gitlab-profile/-/tree/main
+https://mygit.th-deg.de/assistance_systems
 
-MyGit Wiki : https://mygit.th-deg.de/assistance_systems/gitlab-profile/-/wikis/home
+https://mygit.th-deg.de/assistance_systems/gitlab-profile/-/wikis/home
 
 # Project Description
 
@@ -36,12 +36,7 @@ Cast and Directors
 
 After downloading the project files in a project folder, do the following steps:
 
-Prerequisite: 
-To install all the required libraries for this project, run the following command in your terminal:
-
-pip install -r requirements.txt
-
-The rasa model is trained! (with `rasa train`)
+Prerequisite: The rasa model is trained! (with `rasa train`)
 
 1. `rasa run actions`
 2. `rasa run`
@@ -67,18 +62,7 @@ The webpage shows the main pages in the navigation bar on the left. Typically th
 
 9. Data is loaded in the main.py
 
-10. see pages : 
-    about_me.py
-    add_and_apply_model.py
-    algorithm_selection.py
-    augmentation.py
-    chatbot.py
-    data_metrics.py
-    feature_engineering.py
-    model_application.py
-    model_training.py
-    preprocessing.py
-    visualization.py
+10. see pages/01_Data_*.py files
 
 11 - 13. See data chapter in the Wiki
 
@@ -86,7 +70,7 @@ The webpage shows the main pages in the navigation bar on the left. Typically th
 
 15. Input widgets are mainly in 05_Model_training.py
 
-16. Scikit-Learn Logistic regression and Random Forest are used
+16. Scikit-Learn Linear regression and Lasso are used
 
 17. See 'right-fit' chapter in Wiki
 
@@ -98,7 +82,9 @@ The webpage shows the main pages in the navigation bar on the left. Typically th
 
 21. rasa implementation with files:
 
-    domain.yml, data/nlu.yml, data/stories.yml, actions/actions.py
+domain.yml, data/nlu.yml, data/stories.yml, actions/actions.py
+
+22. tbd
 
 # Work done
 
diff --git a/imgs/.DS_Store b/imgs/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..adb52307ec01241120a1b48183eb548237113e94
Binary files /dev/null and b/imgs/.DS_Store differ
diff --git a/imgs/logo.png b/imgs/logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..5e0641e22134e44c2bcdd1877e096ae928c290e2
Binary files /dev/null and b/imgs/logo.png differ
diff --git a/imgs/logo2.png b/imgs/logo2.png
new file mode 100644
index 0000000000000000000000000000000000000000..73eeab3ab1690ec4b0d05f19038ed0c6ac5d7abf
Binary files /dev/null and b/imgs/logo2.png differ
diff --git a/main.py b/main.py
index 35e765c2d4f132370e25157e2ee2f5c3ae3d969d..a2dc8566b5677ec8c23ab210fc2f30ca94c34aa9 100644
--- a/main.py
+++ b/main.py
@@ -2,89 +2,99 @@ from ast import main
 from flask import Flask, json, logging
 from matplotlib.pylab import f
 import streamlit as st
-
 import logging
-import streamlit as st
 
-# Reset logging configuration
+# Reset logging configuration to avoid conflicts from previous handlers
 for handler in logging.root.handlers[:]:
     logging.root.removeHandler(handler)
 
-# logging.basicConfig(
-#     level=logging.INFO
-# )
-
-
-# Define each page as an instance of st.Page
+# Flask app initialization (can be used for server-side integration)
 app = Flask(__name__)
 
+# ------------------------
+# Define Streamlit Pages
+# ------------------------
 
+# Define the "About Me" page
 about_page = st.Page(
-    page = "pages/about_me.py",
-    title = "About Me",
-    default = True,
+    page="pages/about_me.py",  # Path to the Python file for this page
+    title="About Me",  # Title displayed on the navigation
+    default=True,  # Set as the default landing page
 )
 
+# Define the "Preprocessing" page
 preprocessing_page = st.Page(
     page="pages/preprocessing.py",
-    title="Preprocessing",
-    icon = ":material/bar_chart:",
+    title="Preprocessing",  # Title for the preprocessing page
+    icon=":material/bar_chart:",  # Icon displayed next to the page title
 )
 
+# Define the "Data Augmentation" page
 augmentation_page = st.Page(
     page="pages/augmentation.py",
-    title="Data Augmentation",
-    icon = ":material/bar_chart:",
+    title="Data Augmentation",  # Title for the augmentation page
+    icon=":material/bar_chart:",
 )
 
+# Define the "Feature Engineering" page
 feature_engineering_page = st.Page(
     page="pages/feature_engineering.py",
-    title="Feature Engineering",
-    icon = ":material/bar_chart:",
+    title="Feature Engineering",  # Title for the feature engineering page
+    icon=":material/bar_chart:",
 )
 
+# Define the "Data Metrics" page
 data_metrics_page = st.Page(
     page="pages/data_metrics.py",
-    title="Data Metrics",
-    icon = ":material/bar_chart:",
+    title="Data Metrics",  # Title for the data metrics page
+    icon=":material/bar_chart:",
 )
 
+# Define the "Algorithm Selection" page
 algorithm_selection_page = st.Page(
     page="pages/algorithm_selection.py",
-    title="Algorithm Selection",
-    icon = ":material/bar_chart:",
-
+    title="Algorithm Selection",  # Title for selecting ML algorithms
+    icon=":material/bar_chart:",
 )
 
+# Define the "Model Training" page
 model_training_page = st.Page(
     page="pages/model_training.py",
-    title="Model Training",
-    icon = ":material/bar_chart:",
+    title="Model Training",  # Title for the model training page
+    icon=":material/bar_chart:",
 )
 
+# Define the "Add and Apply Model" page
 add_and_apply_model_page = st.Page(
     page="pages/add_and_apply_model.py",
-    title="Add Augmented Data and Apply Selected Model",
-    icon = ":material/bar_chart:",
+    title="Model Training with Augmented Data",  # Title for combining and applying models
+    icon=":material/bar_chart:",
 )
 
+# Define the "Model Application" page
 model_application_page = st.Page(
     page="pages/model_application.py",
-    title="Model Application",
-    icon = ":material/bar_chart:",
+    title="Model Application",  # Title for using the trained model
+    icon=":material/bar_chart:",
 )
 
+# Define the "Chat Bot" page
 chatbot_page = st.Page(
     page="pages/chatbot.py",
-    title="Chat Bot",
-    icon = ":material/bar_chart:",
+    title="Chat Bot",  # Title for interacting with the chatbot
+    icon=":material/bar_chart:",
 )
 
-# Navigation configuration
+# ------------------------
+# Navigation Configuration
+# ------------------------
+
+# Organize pages into navigation groups
 pg = st.navigation(
     {
-        "Info": [about_page],
-        "Projects": [preprocessing_page,
+        "Info": [about_page],  # "Info" category with the About Me page
+        "Projects": [          # "Projects" category with ML pipeline pages
+            preprocessing_page,
             augmentation_page,
             feature_engineering_page,
             data_metrics_page,
@@ -92,38 +102,38 @@ pg = st.navigation(
             model_training_page,
             add_and_apply_model_page,
             model_application_page,
-            chatbot_page],
+            chatbot_page,
+        ],
     }
 )
 
-st.sidebar.text("ALL ABOUT NETFLIX")
+# Add a simple title to the sidebar
+st.sidebar.text("ALL ABOUT ")
+st.sidebar.image("imgs/logo.png", width=400)  # Set desired width in pixels
+st.sidebar.text("source : www.vecteezy.com/")
 
+# Start running the navigation
 pg.run()
 
+# ------------------------
+# Logging Configuration
+# ------------------------
 
+logger = logging.getLogger(__name__)  # Create a logger instance
 
-
-logger = logging.getLogger(__name__)
-
-# Rasa endpoint
+# Define Rasa server endpoint (used if chatbot functionality is required)
 URL = "http://localhost:5005/webhooks/rest/webhook"
 
+# ------------------------
+# Main Function
+# ------------------------
+
 def main():
-    st.html(
-        """
-    <style>
-    [data-testid="stSidebarContent"] {
-        color: white;
-        background-color: #dadada; // THD light grey
-    }
-    </style>
     """
-    )
-
-
-    st.sidebar.success("Select the menu points from top to bottom in order to use the ML pipeline.")
-    # st.sidebar.success("Go through the pages one by one to make a prediction or use the chatbot.")
+    Main function to display the sidebar instructions and guide users
+    to navigate the app in the intended order.
+    """
 
+# Entry point for the application
 if __name__ == "__main__":
     main()
-    
\ No newline at end of file
diff --git a/pages/about_me.py b/pages/about_me.py
index b619cd221b667ee61e9cbe19dc4805fa239fc020..9ff8c57e7f2ec0585b0d849091eb90aa665cd055 100644
--- a/pages/about_me.py
+++ b/pages/about_me.py
@@ -1,52 +1,80 @@
 import streamlit as st
 
 # Display the title and subtitle
-st.title("🎥 Welcome to Netflix Dataset Analysis & Modeling Workflow")
-st.subheader("📊 Unlock insights and build predictive models using Netflix's extensive dataset")
+st.title("🎥 Welcome to Netflix Content Analysis")
+st.subheader("Unlock insights, explore trends, and dive deep into the world of Netflix content!")
 
-# Add a banner or header section
+# Add a stylish banner or header section
 st.markdown(
     """
     <style>
     .banner {
-        background-color: #FF6347;
+        background: linear-gradient(to right, #ff7e5f, #feb47b);
         color: white;
         padding: 20px;
-        border-radius: 5px;
+        border-radius: 10px;
         text-align: center;
-        font-size: 18px;
+        font-size: 20px;
+        font-weight: bold;
+        box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.2);
     }
     </style>
     <div class="banner">
-        An interactive Streamlit app to preprocess, analyze, augment, and model Netflix dataset seamlessly!
+        📊 Your one-stop solution to analyze, preprocess, and model the Netflix dataset seamlessly!
     </div>
     """,
     unsafe_allow_html=True,
 )
 
-# Add a short introduction
-st.markdown("## 📜 What does this app do?")
+# Add an introduction with emojis for better engagement
+st.markdown("## 📜 What this Offers?")
 st.markdown(
     """
-    - **Clean & Preprocess Data**: Handle missing values and standardize the dataset.
-    - **Augment Data**: Add realistic fake data (20-25%) to enhance your analysis.
-    - **Feature Engineering**: Extract meaningful features to improve predictions.
-    - **Metrics & Visualizations**: Dive deep into statistics and correlations.
-    - **Model Training**: Train machine learning models to predict insights.
-    - **Custom Predictions**: Explore what-if scenarios and make predictions in real-time.
+    - 🛠️ **Data Preprocessing**: Clean and handle missing values for a refined dataset.
+    - 📈 **Augment Data**: Add realistic synthetic data to expand analysis.
+    - 🔍 **Feature Engineering**: Extract impactful features for improved analysis.
+    - 📊 **Visualizations & Metrics**: Generate insightful graphs and statistics.
+    - 🤖 **Model Training**: Train ML models for meaningful predictions.
+    - 🧮 **Custom Predictions**: Experiment with real-time scenarios and insights.
     """
 )
 
-# Add a section to navigate to other pages
-st.markdown("## 🚀 Get Started")
-col1, col2, col3 = st.columns(3)
+# Add an interactive "Get Started" section with buttons or links to navigate
+st.markdown("---")
+
+st.markdown(
+    """
+    <style>
+    .start {
+        text-align: center;
+        font-size: 25px;
+
+    }
+    </style>
+    <div class="start">
+        🚀 Get Started
+    </div>
+    """,
+    unsafe_allow_html=True,
+)
 
+st.logo("imgs/logo2.png")
 
-# Footer
+# Footer with styling
 st.markdown("---")
 st.markdown(
     """
-    ### 👩‍💻 About the Developer
-    Developed with ❤️ by Asif Khan(https://www.linkedin.com).
-    """
-)
\ No newline at end of file
+    <style>
+    .footer {
+        text-align: center;
+        font-size: 16px;
+        color: gray;
+        margin-top: 20px;
+    }
+    </style>
+    <div class="footer">
+        👨‍💻 Developed with 🧠 and 💻 by <b>Asif Khan</b> | 🌟 Empowering Netflix Data Analysis! 🌟
+    </div>
+    """,
+    unsafe_allow_html=True,
+)
diff --git a/pages/add_and_apply_model.py b/pages/add_and_apply_model.py
index 0253776306c548ebf8d8ec80c2fe1915a93dcd8d..3cd45bc5e5544d5c1625d91ac783345983d598b2 100644
--- a/pages/add_and_apply_model.py
+++ b/pages/add_and_apply_model.py
@@ -1,20 +1,23 @@
 import streamlit as st
 import pandas as pd
 import matplotlib.pyplot as plt
-from sklearn.linear_model import LinearRegression
-from sklearn.model_selection import train_test_split
 import seaborn as sns
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
 
-
+# Preprocess the dataset
 def preprocess_data(df):
-    """Preprocess the combined DataFrame."""
-    # Handle missing values
+    """Preprocess the combined DataFrame for analysis and modeling."""
+    # Handle missing values in key columns
     df["release_year"] = df["release_year"].fillna(df["release_year"].median())
     df["rating"] = df["rating"].fillna("Not Rated")
     df["date_added"] = pd.to_datetime(df["date_added"], errors="coerce").fillna(pd.Timestamp("2020-01-01"))
     df["duration"] = df["duration"].fillna("0 min")
+    df["director"] = df["director"].fillna("Unknown")
 
-    # Preprocess duration column
+    # Clean the 'duration' column to extract numeric values
     def preprocess_duration(row):
         if "min" in row:
             return int(row.replace(" min", ""))
@@ -24,34 +27,54 @@ def preprocess_data(df):
 
     df["duration_numeric"] = df["duration"].apply(preprocess_duration)
 
-    # Drop rows with invalid duration
+    # Drop rows where duration could not be parsed
     df = df[df["duration_numeric"].notnull()]
-    return df
 
+    # Extract primary genre from 'listed_in' and encode it
+    df["primary_genre"] = df["listed_in"].str.split(',').str[0].str.strip()
+    df["primary_genre"] = df["primary_genre"].fillna("Other")
+    return df
 
+# Main page for adding and applying the model
 def add_and_apply_model_page():
-    st.title("Netflix Dataset: Add and Apply Model")
+    st.title("🎥 Netflix Dataset: Predict Genre with Logistic Regression")
 
-    # Load Combined Dataset
-    st.header("1. Load Combined Dataset")
+    # Step 1: Load the Combined Dataset
+    st.header("📂 1. Load Combined Dataset")
     try:
         combined_df = pd.read_csv("netflix_combined_dataset.csv")  # Replace with your combined dataset path
-        st.success("Combined dataset loaded successfully!")
+        st.success("✅ Combined dataset loaded successfully!")
         st.dataframe(combined_df.head())
     except Exception as e:
-        st.error(f"Error loading combined dataset: {e}")
+        st.error(f"❌ Error loading combined dataset: {e}")
         return
 
-    # Preprocess Combined Data
-    st.header("2. Preprocess Combined Dataset")
+    # Step 2: Preprocess the Combined Dataset
+    st.header("🔄 2. Preprocess Combined Dataset")
     combined_df = preprocess_data(combined_df)
-    st.success("Preprocessing completed!")
+
+    # Use only the first 1000 rows for the prediction
+    combined_df = combined_df.head(1000)
+    st.success("✅ Preprocessing completed!")
     st.dataframe(combined_df.head())
 
-    # Features and Target Selection
-    st.header("3. Feature and Target Selection")
-    features = ["duration_numeric"]  # Use duration as the feature
-    target = "release_year"  # Target variable
+    # Step 3: Encode Director and Genre
+    st.header("🔢 3. Encode Features and Target")
+    director_encoder = LabelEncoder()
+    genre_encoder = LabelEncoder()
+
+    combined_df["director_encoded"] = director_encoder.fit_transform(combined_df["director"])
+    combined_df["genre_encoded"] = genre_encoder.fit_transform(combined_df["primary_genre"])
+
+    # Filter out genres with fewer than 2 samples
+    genre_counts = combined_df["genre_encoded"].value_counts()
+    valid_genres = genre_counts[genre_counts >= 2].index
+    combined_df = combined_df[combined_df["genre_encoded"].isin(valid_genres)]
+
+    # Step 4: Feature and Target Selection
+    st.header("🎯 4. Feature and Target Selection")
+    features = ["director_encoded", "duration_numeric", "release_year"]  # Features for prediction
+    target = "genre_encoded"  # Target variable
     X = combined_df[features]
     y = combined_df[target]
     st.write("### Features (X)")
@@ -59,67 +82,74 @@ def add_and_apply_model_page():
     st.write("### Target (y)")
     st.write(y.head())
 
-    # Train-Test Split
-    st.header("4. Train-Test Split")
+    # Step 5: Train-Test Split
+    st.header("🔀 5. Train-Test Split")
     test_size = st.slider("Select Test Size Percentage", min_value=10, max_value=50, value=20, step=5)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size / 100, random_state=42)
-    st.success(f"Data split: {100 - test_size}% training and {test_size}% testing.")
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size / 100, random_state=42, stratify=y)
+    st.success(f"✅ Data split: {100 - test_size}% training and {test_size}% testing.")
 
-    # Train Linear Regression Model
-    st.header("5. Train Linear Regression Model")
-    model = LinearRegression()
+    # Step 6: Train Logistic Regression Model
+    st.header("🤖 6. Train Logistic Regression Model")
+    model = LogisticRegression(max_iter=1000, random_state=42)
     model.fit(X_train, y_train)
-    st.success("Linear Regression Model trained successfully!")
+    st.success("✅ Logistic Regression Model trained successfully!")
 
-    # Model Predictions
-    st.header("6. Model Predictions")
+    # Step 7: Model Predictions
+    st.header("🔍 7. Model Predictions")
     y_pred = model.predict(X_test)
-    comparison_df = pd.DataFrame({"Actual": y_test, "Predicted": y_pred})
+    comparison_df = pd.DataFrame({
+        "Actual": genre_encoder.inverse_transform(y_test),
+        "Predicted": genre_encoder.inverse_transform(y_pred)
+    })
     st.dataframe(comparison_df.head())
 
-    # Evaluation Metrics
-    st.header("7. Model Evaluation")
-    mse = ((y_test - y_pred) ** 2).mean()
-    st.write(f"Mean Squared Error (MSE): {mse:.2f}")
-
-    # Visualization: Actual vs Predicted
-    st.header("8. Visualization: Actual vs Predicted")
-    fig, ax = plt.subplots()
-    sns.scatterplot(x=y_test, y=y_pred, alpha=0.6, ax=ax, label="Predictions")
-    ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--", lw=2, label="Perfect Fit")
-    ax.set_xlim(1990, y_test.max())  # Start x-axis from 1960
-    ax.set_ylim(1990, y_test.max())  # Start y-axis from 1960
-    ax.set_xlabel("Actual Release Year")
-    ax.set_ylabel("Predicted Release Year")
-    ax.set_title("Actual vs. Predicted Release Years")
-    ax.legend()
+    # Step 8: Visualization of Predictions
+    st.header("📈 8. Visualization: Actual vs Predicted")
+    fig, ax = plt.subplots(figsize=(10, 6))
+    sns.countplot(x="Actual", data=comparison_df, color="blue", alpha=0.6, label="Actual")
+    sns.countplot(x="Predicted", data=comparison_df, color="orange", alpha=0.6, label="Predicted")
+    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
+    plt.title("Actual vs Predicted Genre Counts")
+    plt.xlabel("Genre")
+    plt.ylabel("Count")
+    plt.legend()
     st.pyplot(fig)
 
-    # Custom Input for Prediction
-    st.header("9. Custom Prediction")
+    # Step 9: Custom Input for Prediction
+    st.header("🛠️ 9. Custom Prediction")
+
+    # Add a placeholder for director selection
+    directors = ["Select a Director"] + sorted(combined_df["director"].unique())
+    selected_director = st.selectbox("Select a Director", directors)
+
+    # Add sliders for duration and release year with no default values
     duration_choice = st.slider(
         "Enter Duration (in minutes)",
         min_value=int(combined_df["duration_numeric"].min()),
         max_value=int(combined_df["duration_numeric"].max()),
-        value=90,
     )
-    custom_input = [[duration_choice]]
-    custom_prediction = model.predict(custom_input)[0]
-    st.write(f"Predicted Release Year: {custom_prediction:.2f}")
-
-    # Highlight Custom Input on Plot
-    st.markdown("### Custom Prediction on Plot")
-    fig, ax = plt.subplots()
-    sns.scatterplot(x=y_test, y=y_pred, alpha=0.6, ax=ax, label="Predictions")
-    ax.scatter(duration_choice, custom_prediction, color="red", label="Custom Prediction", zorder=5)
-    ax.legend()
-    ax.set_xlabel("Actual")
-    ax.set_ylabel("Predicted")
-    st.pyplot(fig)
+    release_year_choice = st.slider(
+        "Enter Release Year",
+        min_value=int(combined_df["release_year"].min()),
+        max_value=int(combined_df["release_year"].max()),
+    )
+
+    # Default message for predicted genre
+    if selected_director == "Select a Director":
+        st.header("**Predicted Genre:** ❓ 🤷 ❓")
+    else:
+        # Encode the input
+        director_encoded = director_encoder.transform([selected_director])[0]
+        custom_input = [[director_encoded, duration_choice, release_year_choice]]
+
+        # Predict the genre
+        custom_prediction = genre_encoder.inverse_transform(model.predict(custom_input))[0]
+        st.header(f"**Predicted Genre:**  ✨ {custom_prediction} ✨")
+
 
-    # Save Model and Data to Session State
+    # Step 10: Save Model and Data to Session State
     st.session_state.model = model
     st.session_state.combined_data = combined_df
-    st.success("Model applied successfully!")
+    st.success("✅ Model applied successfully!")
 
 add_and_apply_model_page()
diff --git a/pages/algorithm_selection.py b/pages/algorithm_selection.py
index 35a05bd81e34982d7700c81a265d149a7ca755a8..fc56217a97139e6363a06fd96f979bf3e6afb21c 100644
--- a/pages/algorithm_selection.py
+++ b/pages/algorithm_selection.py
@@ -9,45 +9,41 @@ from sklearn.ensemble import RandomForestClassifier
 from sklearn.metrics import classification_report, confusion_matrix
 
 def algorithm_selection_page():
-    st.title("Algorithm Selection for Netflix Genre Prediction (Classification)")
+    st.title("🧠 Algorithm Selection for Netflix Genre Prediction")
 
     # ---------------------------
     # 1. Load and Preprocess Data
     # ---------------------------
+    st.markdown("## 🔄 Data Loading and Preprocessing")
     file_path = "netflix_titles.csv"  # Update with the correct file path
     df = pd.read_csv(file_path)
 
-    st.markdown("## Preprocessing Dataset")
-    
-    # Handle missing values
-    df["listed_in"] = df["listed_in"].fillna("Unknown")  # Genre field
+    # Handle missing values in critical columns
+    df["listed_in"] = df["listed_in"].fillna("Unknown")  # Fill missing genres
     df["release_year"] = df["release_year"].fillna(df["release_year"].median())
-    df["duration"] = df["duration"].fillna("0 min")
+    df["duration"] = df["duration"].fillna("0 min")  # Default to "0 min" for missing durations
 
-    # Clean and preprocess 'duration'
+    # Clean the 'duration' column (convert "90 min" -> 90)
     def clean_duration(value):
         if "min" in str(value):
             return int(value.replace(" min", "").strip())
-        elif "Season" in str(value):
-            return 0  # For TV shows
-        return 0  # Unrecognized values
+        elif "Season" in str(value):  # Handle TV Shows
+            return 0
+        return 0  # Default for unrecognized values
 
     df["duration"] = df["duration"].apply(clean_duration)
 
-    # Extract primary genre from 'listed_in'
+    # Extract primary genre from 'listed_in' and group rare genres as "Other"
     df['primary_genre'] = df['listed_in'].str.split(',').str[0].str.strip()
     df['primary_genre'] = df['primary_genre'].fillna("Other")
-
-    # Group rare genres into 'Other' (optional)
-    genre_counts = df['primary_genre'].value_counts()
-    common_genres = genre_counts[genre_counts >= 5].index
+    common_genres = df['primary_genre'].value_counts()[df['primary_genre'].value_counts() >= 5].index
     df['primary_genre'] = df['primary_genre'].apply(lambda x: x if x in common_genres else 'Other')
 
-    # Encode the target (genre)
+    # Encode the target variable (genre)
     genre_encoder = LabelEncoder()
     df["genre_encoded"] = genre_encoder.fit_transform(df["primary_genre"])
 
-    # Scale numeric features
+    # Scale numeric features: duration and release_year
     scaler = StandardScaler()
     df["duration_scaled"] = scaler.fit_transform(df[["duration"]])
     df["release_year_scaled"] = scaler.fit_transform(df[["release_year"]])
@@ -55,24 +51,27 @@ def algorithm_selection_page():
     # ---------------------------
     # 2. Feature and Target Setup
     # ---------------------------
-    st.markdown("## Feature Selection")
-    
+    st.markdown("## 🛠️ Feature Selection")
+
+    # Select features for training and the target column
     selected_features = ["duration_scaled", "release_year_scaled"]
     target_col = "genre_encoded"
 
     X = df[selected_features]
     y = df[target_col]
 
-    st.write("### Features")
+    # Display selected features and target variable
+    st.markdown("### Selected Features (X)")
     st.write(X.head())
-    st.write("### Target (Genre Encoded)")
+    st.markdown("### Target Variable (Genre Encoded)")
     st.write(y.head())
 
     # ---------------------------
     # 3. Train-Test Split
     # ---------------------------
-    st.markdown("## Train/Test Split")
-
+    st.markdown("## 🔀 Train/Test Split")
+    
+    # Let the user set the test set size
     test_size = st.slider("Test Set Size (%)", min_value=10, max_value=50, value=20, step=5)
     X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size/100, random_state=42)
 
@@ -82,77 +81,57 @@ def algorithm_selection_page():
     # ---------------------------
     # 4. Model Training and Scoring
     # ---------------------------
-    st.markdown("## Model Training and Comparison")
+    st.markdown("## 🤖 Model Training and Comparison")
 
     # ---- Model A: Logistic Regression ----
+    st.markdown("### Logistic Regression")
     logreg = LogisticRegression(max_iter=1000, random_state=42)
     logreg.fit(X_train, y_train)
     y_pred_logreg = logreg.predict(X_val)
-
-    # Classification metrics for Logistic Regression
     logreg_accuracy = logreg.score(X_val, y_val)
-    st.markdown("### Logistic Regression")
-    st.write(f"Accuracy on Validation Set: {logreg_accuracy:.4f}")
-    
-    # Cross-validation scores (optional)
+
+    st.write(f"Accuracy on Validation Set: **{logreg_accuracy:.4f}**")
     cv_scores_log = cross_val_score(logreg, X, y, cv=5, scoring='accuracy')
-    st.write("Cross-Validation Scores (Logistic Regression):")
+    st.write("Cross-Validation Scores:")
     st.write(cv_scores_log)
 
     # ---- Model B: Random Forest ----
+    st.markdown("### Random Forest")
     rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
     rf_model.fit(X_train, y_train)
     y_pred_rf = rf_model.predict(X_val)
-
-    # Classification metrics for Random Forest
     rf_accuracy = rf_model.score(X_val, y_val)
-    st.markdown("### Random Forest")
-    st.write(f"Accuracy on Validation Set: {rf_accuracy:.4f}")
 
-    # Cross-validation scores (optional)
+    st.write(f"Accuracy on Validation Set: **{rf_accuracy:.4f}**")
     cv_scores_rf = cross_val_score(rf_model, X, y, cv=5, scoring='accuracy')
-    st.write("Cross-Validation Scores (Random Forest):")
+    st.write("Cross-Validation Scores:")
     st.write(cv_scores_rf)
 
     # ---------------------------
     # 5. Detailed Model Comparison
     # ---------------------------
+    st.markdown("## 📊 Model Comparison")
 
-    # Compare in a small DataFrame
+    # Compare models in a small DataFrame
     comparison_df = pd.DataFrame({
         "Model": ["Logistic Regression", "Random Forest"],
         "Validation Accuracy": [logreg_accuracy, rf_accuracy],
         "CV Accuracy (mean)": [cv_scores_log.mean(), cv_scores_rf.mean()]
     })
-    st.write("### Comparison Table")
+    st.markdown("### Comparison Table")
     st.write(comparison_df)
 
-    # Determine best model by validation accuracy
+    # Highlight the best model
     best_idx = comparison_df["Validation Accuracy"].idxmax()
     best_model_info = comparison_df.iloc[best_idx]
-    st.write("### Best Model")
+    st.markdown("### Best Model")
     st.write(best_model_info)
 
-    # Save to session state (optional)
+    # Save results to session state for future use
     st.session_state.comparison_df = comparison_df
     st.session_state.best_model_info = best_model_info
 
-    st.success("Algorithm selection and comparison completed!")
-
-
-def _plot_confusion_matrix(y_true, y_pred, encoder, title="Confusion Matrix"):
-    """Helper to plot confusion matrix as a figure."""
-    import matplotlib.pyplot as plt
-    import seaborn as sns
-    from sklearn.metrics import confusion_matrix
-
-    cm = confusion_matrix(y_true, y_pred, labels=range(len(encoder.classes_)))
-    fig, ax = plt.subplots(figsize=(5, 4))
-    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
-                xticklabels=encoder.classes_, yticklabels=encoder.classes_, ax=ax)
-    ax.set_xlabel("Predicted")
-    ax.set_ylabel("Actual")
-    ax.set_title(title)
-    return fig
+    st.success("✅ Algorithm selection and comparison completed!")
+    st.success("✅ Algorithm Selected is Logistic Regression!")
 
 algorithm_selection_page()
diff --git a/pages/augmentation.py b/pages/augmentation.py
index 9a354a12848449b234a834953ac9a949cbf7b9b2..6481cb3590f95e38e7aab0fdd2becdfbfce5855a 100644
--- a/pages/augmentation.py
+++ b/pages/augmentation.py
@@ -2,177 +2,174 @@ import streamlit as st
 import pandas as pd
 import random
 import numpy as np
-import joblib
-
 from sklearn.preprocessing import StandardScaler, LabelEncoder
 
 # Constants for random data generation
 YEAR_MIN = 1920
 YEAR_MAX = 2023
 DURATION_MIN = 1
-DURATION_MAX = 300  # Assuming max duration in minutes or episodes
+DURATION_MAX = 300  # Maximum duration in minutes or episodes
 
 def load_data(file_path: str) -> pd.DataFrame:
     """Load the Netflix dataset from a CSV file."""
     try:
         return pd.read_csv(file_path)
     except FileNotFoundError:
-        st.error(f"File not found at path: {file_path}. Please check the file path.")
-        return pd.DataFrame()  # Return empty DataFrame if file not found
+        # Show error message if the file isn't found
+        st.error(f"❌ File not found at path: {file_path}. Please check the file path.")
+        return pd.DataFrame()
 
-def preprocess_data(df: pd.DataFrame) -> tuple:
-    """Preprocess the dataset: drop missing values, encode genre, scale features, etc."""
-    # a. Remove rows with missing values in key columns
+def preprocess_data(df: pd.DataFrame):
+    """Preprocess the dataset for augmentation."""
+    # Drop rows with missing essential data
     df = df.dropna(subset=['duration', 'director', 'release_year', 'listed_in'])
-    
-    # b. Clean 'duration' column (convert "xxx min" -> int, or 0 if unrecognized)
+
+    # Extract numeric duration values (e.g., "120 min" → 120)
     def clean_duration(value):
         if "min" in str(value):
             return int(value.replace(" min", "").strip())
-        return 0  # For "Season" or other unrecognized patterns
-    
+        return 0  # Default to 0 if unrecognized
+
     df['duration_numeric'] = df['duration'].apply(clean_duration)
-    df = df[df['duration_numeric'] >= 0]
-    
-    # c. Extract primary genre; group rare genres into 'Other'
+    df = df[df['duration_numeric'] >= 0]  # Keep valid durations
+
+    # Simplify genres by focusing on the primary genre and grouping rare ones as 'Other'
     df['primary_genre'] = df['listed_in'].str.split(',').str[0].str.strip()
-    genre_counts = df['primary_genre'].value_counts()
-    common_genres = genre_counts[genre_counts >=5].index.tolist()
+    common_genres = df['primary_genre'].value_counts()[df['primary_genre'].value_counts() >= 5].index.tolist()
     df['primary_genre'] = df['primary_genre'].apply(lambda g: g if g in common_genres else 'Other')
-    
-    # d. Encode Genre (target variable)
+
+    # Encode genres and count directors for further analysis
     genre_encoder = LabelEncoder()
     df['genre_encoded'] = genre_encoder.fit_transform(df['primary_genre'])
-    
-    # e. Encode Director by counting the number of movies each director has
     director_counts = df['director'].value_counts()
     df['director_count'] = df['director'].map(director_counts)
-    
-    # f. Feature Scaling for 'duration_numeric' and 'release_year' with separate scalers
+
+    # Scale numeric features for consistency
     duration_scaler = StandardScaler()
     release_year_scaler = StandardScaler()
     df['duration_scaled'] = duration_scaler.fit_transform(df[['duration_numeric']])
     df['release_year_scaled'] = release_year_scaler.fit_transform(df[['release_year']])
-    
+
     return df, genre_encoder, duration_scaler, release_year_scaler, director_counts
 
-def generate_fake_row(genre_encoder: LabelEncoder, common_genres: list, director_counts: pd.Series) -> dict:
-    """Generate a single row of synthetic Netflix data."""
+def generate_fake_row(genre_encoder, common_genres, director_counts):
+    """Generate a single synthetic data row."""
+    # Randomly select a genre and encode it
     primary_genre = random.choice(common_genres + ['Other'])
-    
-    # Encode genre using the existing LabelEncoder
     genre_encoded = genre_encoder.transform([primary_genre])[0]
-    
-    # Randomly decide if the director is from existing ones or new
+
+    # 70% chance to use an existing director, 30% to create a new one
     if random.random() < 0.7 and not director_counts.empty:
-        # 70% chance to choose an existing director
-        existing_directors = director_counts.index.tolist()
-        director = random.choice(existing_directors)
-        director_count = director_counts.get(director, 1) + 1  # Increment count
+        director = random.choice(director_counts.index.tolist())
+        director_count = director_counts.get(director, 1) + 1
     else:
-        # 30% chance to create a new director
         director = f"Director {random.randint(1001, 2000)}"
-        director_count = 1  # First occurrence
-    
-    # Randomly decide the type to set duration accordingly
+        director_count = 1
+
+    # Generate type, duration, and other fields
     type_choice = random.choice(['Movie', 'TV Show'])
-    
-    # Generate duration as integer based on type
     duration_numeric = random.randint(DURATION_MIN, DURATION_MAX)
     duration = f"{duration_numeric} min" if type_choice == "Movie" else f"{duration_numeric} episodes"
-    
+
     return {
-        'show_id': f's{random.randint(10000, 99999)}',
         'type': type_choice,
         'title': f"Random Title {random.randint(1, 1000)}",
         'director': director,
-        'cast': f"Actor {random.randint(1, 500)}, Actor {random.randint(501, 1000)}",
-        'country': f"Country {random.randint(1, 100)}",
-        'date_added': f"{random.randint(1, 12)}/{random.randint(1, 28)}/{random.randint(YEAR_MIN, YEAR_MAX)}",
-        'release_year': random.randint(YEAR_MIN, YEAR_MAX),
-        'rating': random.choice(['G', 'PG', 'PG-13', 'R', 'NC-17', 'TV-Y', 'TV-Y7', 'TV-G', 'TV-PG', 'TV-14', 'TV-MA']),
         'duration': duration,
-        'listed_in': f"{random.choice(['Action', 'Comedy', 'Drama', 'Horror', 'Romance', 'Thriller', 'Documentary', 'Family', 'Sci-Fi'])}, {random.choice(['Action', 'Comedy', 'Drama', 'Horror', 'Romance', 'Thriller', 'Documentary', 'Family', 'Sci-Fi'])}",
-        'description': f"Description for Random Title {random.randint(1, 1000)}",
+        'release_year': random.randint(YEAR_MIN, YEAR_MAX),
+        'primary_genre': primary_genre,
         'genre_encoded': genre_encoded,
         'director_count': director_count,
-        'duration_numeric': duration_numeric
+        'duration_numeric': duration_numeric,
     }
 
 def augmentation_page():
-    st.title("Netflix Dataset Augmentation")
-    
+    st.title("🛠️ Netflix Dataset Augmentation")  # Main page title
+    st.markdown(
+        """
+        <style>
+        .header {
+            background-color: #4caf50;
+            color: white;
+            padding: 15px;
+            text-align: center;
+            border-radius: 10px;
+        }
+        </style>
+        <div class="header">
+            Enhance your Netflix dataset with synthetic data for improved analysis!
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+
     # Load Netflix dataset
-    file_path = "netflix_titles.csv"  # Replace with the correct file path
+    file_path = "netflix_titles.csv"
     df = load_data(file_path)
-    
+
+    # Exit if the dataset is empty
     if df.empty:
-        st.warning("Dataset is empty. Please check the file path and try again.")
+        st.warning("⚠️ Dataset is empty. Please check the file path and try again.")
         return
-    
-    # Preprocess original data
+
+    # Preprocess the dataset to clean and encode necessary features
     df, genre_encoder, duration_scaler, release_year_scaler, director_counts = preprocess_data(df)
-    
-    # Identify common genres
-    common_genres = df['primary_genre'].value_counts()[df['primary_genre'].value_counts() >=5].index.tolist()
-    
-    # Display preprocessed original dataset
-    st.markdown("## Original Preprocessed Dataset")
+
+    # Display the cleaned and preprocessed dataset
+    st.markdown("## 🔍 Preprocessed Original Dataset")
     st.write(df.head())
-    
-    # Generate synthetic data
-    st.markdown("## Generating Synthetic Data")
-    num_rows = st.slider("Number of fake rows to generate", min_value=10, max_value=1000, step=10, value=100)
+
+    # User input for synthetic data generation
+    st.markdown("## 🔧 Generate Synthetic Data")
+    num_rows = st.slider("Number of rows to generate:", 10, 1000, step=10, value=100)
+
+    # Generate synthetic rows with a progress bar
     rows = []
-    progress_text = "Generating synthetic data. Please wait."
-    pbar = st.progress(0, text=progress_text)
-    
+    progress_bar = st.progress(0)
     for i in range(num_rows):
-        rows.append(generate_fake_row(genre_encoder, common_genres, director_counts))
-        pbar.progress((i + 1) / num_rows, text=progress_text)
-    
-    pbar.empty()
+        rows.append(generate_fake_row(genre_encoder, df['primary_genre'].unique().tolist(), director_counts))
+        progress_bar.progress((i + 1) / num_rows)
+    progress_bar.empty()  # Clear the progress bar
+
+    # Create a synthetic dataset
     fake_df = pd.DataFrame(rows)
-    
-    # Scale 'duration_numeric' and 'release_year'
+
+    # Scale synthetic data columns
     fake_df['duration_scaled'] = duration_scaler.transform(fake_df[['duration_numeric']])
     fake_df['release_year_scaled'] = release_year_scaler.transform(fake_df[['release_year']])
-    
-    # Display original and fake data
+
+    # Display comparison: original vs synthetic data
+    st.markdown("## 📊 Dataset Comparison")
     cols = st.columns(2)
     with cols[0]:
-        st.markdown("### Original Preprocessed Data")
+        st.markdown("### Original Data")
         st.dataframe(df.head())
-    
     with cols[1]:
-        st.markdown("### Generated Synthetic Data")
+        st.markdown("### Synthetic Data")
         st.dataframe(fake_df.head())
-    
-    # Display data metrics
-    st.markdown("## Data Metrics")
-    cols_metrics = st.columns(2)
-    with cols_metrics[0]:
+
+    # Display summary metrics for both datasets
+    st.markdown("## 📈 Dataset Metrics")
+    metrics_cols = st.columns(2)
+    with metrics_cols[0]:
         st.markdown("### Original Data Metrics")
         st.write(df.describe(include="all"))
-    
-    with cols_metrics[1]:
+    with metrics_cols[1]:
         st.markdown("### Synthetic Data Metrics")
         st.write(fake_df.describe(include="all"))
-    
-    # Option to combine original and synthetic data
-    st.markdown("## Combined Dataset")
-    if st.button("Combine Original and Synthetic Data"):
+
+    # Option to combine original and synthetic datasets
+    st.markdown("## 📦 Combine Datasets")
+    if st.button("Combine and Download"):
         combined_df = pd.concat([df, fake_df], ignore_index=True)
         st.write(combined_df.head())
-        st.write(f"Combined dataset size: {combined_df.shape}")
-        
-        # Optionally, allow downloading the combined dataset
+        st.write(f"**Combined Dataset Size:** {combined_df.shape[0]} rows")
         csv = combined_df.to_csv(index=False).encode('utf-8')
         st.download_button(
-            label="Download Combined Dataset as CSV",
-            data=csv,
-            file_name='netflix_combined_dataset.csv',
-            mime='text/csv',
+            "Download Combined Dataset as CSV",
+            csv,
+            "netflix_combined_dataset.csv",
+            "text/csv",
         )
 
 augmentation_page()
diff --git a/pages/chatbot.py b/pages/chatbot.py
index d59ec69a4ab05a0a2d922a11708f0ac71f2cc9e4..52fa968c13f69140cb73609734e7277779ebd310 100644
--- a/pages/chatbot.py
+++ b/pages/chatbot.py
@@ -26,7 +26,7 @@ def initialize_conversation():
     """Initialize the conversation with an assistant greeting."""
     assistant_message = "Hello! How can I help you today?"
     return [
-        {"role": "system", "content": "You are a helpful Rasa chatbot assistant."},
+        {"role": "system", "content": "Welcome to Rasa chatbot assistant."},
         {"role": "assistant", "content": assistant_message}
     ]
 
diff --git a/pages/data_metrics.py b/pages/data_metrics.py
index bb512ea8928f3feb35d94d60b46e98794d24b6ee..4bc2d95f8cc0596471cb9b89784d4b6f0aa5eb21 100644
--- a/pages/data_metrics.py
+++ b/pages/data_metrics.py
@@ -3,72 +3,67 @@ import matplotlib.pyplot as plt
 import pandas as pd
 
 def data_metrics_page():
-    st.title("Netflix Dataset Metrics and Visualization")
+    st.title("📊 Netflix Dataset Metrics and Visualization")  # Main title with emoji
 
-    # Load the Netflix dataset
+    # Step 1: Load the Netflix dataset
     file_path = "netflix_titles.csv"
     df = pd.read_csv(file_path)
-    st.session_state.df = df
+    st.session_state.df = df  # Save the dataset in session state for reuse
 
-    st.markdown("## Dataset Overview")
-    st.write("### Head of the Data")
+    # Step 2: Dataset Overview
+    st.markdown("## 🔍 Dataset Overview")
+
+    st.markdown("### Head of the Data")
     st.write(df.head())  # Display the first few rows of the dataset
 
-    st.write("### Summary Statistics")
-    st.write(df.describe(include='all'))  # Include all columns (even non-numeric)
+    st.markdown("### Summary Statistics")
+    st.write(df.describe(include='all'))  # Include all columns, even non-numeric
 
-    st.write("### Data Types")
-    st.write(df.dtypes)  # Display the data types of all columns
+    st.markdown("### Data Types")
+    st.write(df.dtypes)  # Show data types of all columns
 
-    st.write("### Null Values")
-    st.write(df.isnull().sum())  # Display the count of missing values per column
+    st.markdown("### Null Values")
+    st.write(df.isnull().sum())  # Count and display missing values for each column
 
-    # Correlation heatmap for numeric columns (if any)
-    st.markdown("## Correlation Heatmap (Numeric Features)")
-    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
-    if len(numeric_cols) > 0:
-        corr_matrix = df[numeric_cols].corr()
-        st.write(corr_matrix)
-    else:
-        st.info("No numeric columns to compute correlation.")
 
-    # Visualizations
-    st.markdown("## Visualizations")
+    # Step 3: Visualizations
+    st.markdown("## 📈 Visualizations")
 
-    # Distribution of content types (e.g., Movies vs. TV Shows)
+    # Visualization 1: Content Type Distribution
     if "type" in df.columns:
-        st.markdown("### Content Type Distribution")
-        type_counts = df["type"].value_counts()
+        st.markdown("### Distribution of Content Types")
+        type_counts = df["type"].value_counts()  # Count occurrences of each content type
         fig, ax = plt.subplots()
-        ax.bar(type_counts.index, type_counts.values)
+        ax.bar(type_counts.index, type_counts.values, color='skyblue')
         ax.set_xlabel("Type")
         ax.set_ylabel("Count")
         ax.set_title("Distribution of Content Types")
-        st.pyplot(fig)
+        st.pyplot(fig)  # Display the bar chart
 
-    # Distribution of release years
+    # Visualization 2: Release Year Distribution
     if "release_year" in df.columns:
-        st.markdown("### Release Year Distribution")
-        year_counts = df["release_year"].value_counts().sort_index()
+        st.markdown("### Number of Titles Released Over the Years")
+        year_counts = df["release_year"].value_counts().sort_index()  # Count by release year
         fig, ax = plt.subplots()
-        ax.plot(year_counts.index, year_counts.values)
+        ax.plot(year_counts.index, year_counts.values, marker='o', color='green')
         ax.set_xlabel("Year")
         ax.set_ylabel("Count")
         ax.set_title("Number of Titles Released Over the Years")
-        st.pyplot(fig)
+        st.pyplot(fig)  # Display the line plot
 
-    # Histogram of durations for movies
+    # Visualization 3: Duration Histogram
     if "duration" in df.columns:
-        st.markdown("### Duration of Movies/TV Shows")
-        # Filter duration if it's numeric (e.g., split '90 min' or '1 Season')
+        st.markdown("### Histogram of Durations (Movies/TV Shows)")
+        # Extract numeric duration values (e.g., '90 min' → 90)
         df["duration_numeric"] = pd.to_numeric(df["duration"].str.extract(r'(\d+)')[0], errors='coerce')
         fig, ax = plt.subplots()
-        ax.hist(df["duration_numeric"].dropna(), bins=20, edgecolor='black')
+        ax.hist(df["duration_numeric"].dropna(), bins=20, edgecolor='black', color='orange')
         ax.set_xlabel("Duration")
         ax.set_ylabel("Count")
         ax.set_title("Histogram of Durations")
-        st.pyplot(fig)
+        st.pyplot(fig)  # Display the histogram
 
-    st.success("Data metrics and visualization complete!")
+    # Step 4: Success Message
+    st.success("✅ Data metrics and visualizations are complete!")
 
-data_metrics_page()
\ No newline at end of file
+data_metrics_page()
diff --git a/pages/feature_engineering.py b/pages/feature_engineering.py
index 4126dde2c0be2d9c2c2023f076c73c425bb59e19..9804a01ef2268720e868035bb843daa21eedd915 100644
--- a/pages/feature_engineering.py
+++ b/pages/feature_engineering.py
@@ -1,77 +1,73 @@
 import streamlit as st
 import pandas as pd
 from sklearn.preprocessing import LabelEncoder
-import matplotlib.pyplot as plt
-import seaborn as sns
 
 def feature_engineering_page():
-    st.title("Netflix Dataset Feature Engineering")
-    
-    # 1. Access Preprocessed Data from Session State
+    st.title("📊 Netflix Dataset Feature Engineering")
+
+    # Step 1: Check for Preprocessed Data
     if 'df' not in st.session_state:
-        st.error("Preprocessed data not found in session state. Please run the Preprocessing step first.")
+        st.error("⚠️ Preprocessed data not found in session state. Please complete the Preprocessing step first.")
         return
-    
-    df = st.session_state.df.copy()  # Work on a copy to prevent altering the session state
-    
-    # 2. Display Preprocessed Dataset
-    st.markdown("## Preprocessed Dataset")
+
+    # Work on a copy of the preprocessed dataset
+    df = st.session_state.df.copy()
+
+    # Step 2: Display Preprocessed Dataset
+    st.markdown("## 🔍 Preprocessed Dataset")
     st.write(df.head())
-    
-    # 3. Verify Required Columns
+
+    # Step 3: Check for Required Columns
     required_columns = ['duration', 'director', 'release_year', 'listed_in']
     missing_columns = [col for col in required_columns if col not in df.columns]
-    
+
     if missing_columns:
-        st.error(f"The following required columns are missing from the dataset: {missing_columns}")
-        st.info("Ensure that the Preprocessing step has been completed successfully.")
+        st.error(f"❌ The following required columns are missing: {missing_columns}")
+        st.info("Ensure the Preprocessing step has been completed successfully.")
         return
-    
-    # 4. Feature Selection for Modeling
-    st.markdown("## Selecting Features for Logistic Regression")
-    
-    # Define feature columns and target
+
+    # Step 4: Feature Selection for Modeling
+    st.markdown("## 🛠️ Selecting Features for Modeling")
+
+    # Define feature columns and target variable
     feature_columns = ["duration", "director", "release_year"]
-    target_column = "listed_in"
-    
-    # Select features and target
+    df['primary_genre'] = df['listed_in'].str.split(',').str[0].str.strip()  # Extract primary genre
+    target_column = "primary_genre"
+
+    # Display selected features and target variable
     X = df[feature_columns]
     y = df[target_column]
-    
-    st.write("### Selected Features (X)")
+    st.markdown("### Features (X)")
     st.write(X.head())
-    
-    st.write("### Target Variable (y)")
+    st.markdown("### Target Variable (y - Primary Genre)")
     st.write(y.head())
-    
-    # 5. Encoding Additional Categorical Variables (if any)
-    # Assuming 'listed_in' was already handled during preprocessing
-    # If there are other categorical variables to encode, handle them here
-    
+
+    # Step 5: Encode Additional Categorical Variables
+    st.markdown("## 🔄 Encoding Categorical Variables")
+
+    # Identify categorical columns to encode (excluding already processed ones)
     categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
-    # Remove columns already encoded or not needed
-    columns_to_remove = ['show_id', 'title', 'cast', 'country', 'date_added', 'rating', 'duration', 'listed_in', 'description', 'primary_genre']
-    categorical_cols = [col for col in categorical_cols if col not in columns_to_remove]
-    
+    excluded_cols = [
+        'show_id', 'title', 'cast', 'country', 'date_added',
+        'rating', 'duration', 'listed_in', 'description', 'primary_genre'
+    ]
+    categorical_cols = [col for col in categorical_cols if col not in excluded_cols]
+
     if categorical_cols:
-        st.markdown("## Encoding Additional Categorical Variables")
         label_encoders = {}
         for col in categorical_cols:
             le = LabelEncoder()
-            df[col] = le.fit_transform(df[col])
+            df[col] = le.fit_transform(df[col])  # Apply Label Encoding
             label_encoders[col] = le
-            st.write(f"### Encoded '{col}'")
-            st.write(df[[col]].head())
-        st.session_state.label_encoders = label_encoders
+            st.markdown(f"### Encoded Column: `{col}`")
+            st.write(df[[col]].head())  # Display encoded column
+        st.session_state.label_encoders = label_encoders  # Save encoders to session state
     else:
-        st.write("No additional categorical columns to encode.")
+        st.info("No additional categorical variables found for encoding.")
 
-    
-    # 8. Save Features and Target to Session State for Modeling
+    # Step 6: Save Features and Target for Further Modeling
     st.session_state.features = X
     st.session_state.target = y
-    
-    st.success("Feature engineering complete. Features and target are ready for modeling!")
+    st.success("✅ Feature engineering is complete! Features and target are ready for modeling.")
 
-    
 feature_engineering_page()
diff --git a/pages/model_application.py b/pages/model_application.py
index 0322d9c36a211571f3e91fb4c8f0bb8b31a79092..a74224e6d31a10271011567eded714f89e1990d6 100644
--- a/pages/model_application.py
+++ b/pages/model_application.py
@@ -7,127 +7,122 @@ from sklearn.ensemble import RandomForestClassifier
 from sklearn.metrics import accuracy_score
 
 def model_application_page():
-    st.title("Netflix Dataset Model Application")
+    # Main title with emojis
+    st.title("🎥 Netflix Dataset Model Application")
+    st.markdown(
+        """
+        <style>
+        .main-header {
+            background-color: #1DB954;
+            color: white;
+            padding: 15px;
+            text-align: center;
+            font-size: 24px;
+            border-radius: 10px;
+        }
+        </style>
+        <div class="main-header">
+            Unlock the power of Netflix data with personalized recommendations, insights, and trends!
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
 
     # Load the Netflix dataset
     file_path = "netflix_titles.csv"
     df = pd.read_csv(file_path)
 
+    st.markdown("---")  # Section divider
 
-# 1. Personalized Playlist
-
-    st.title("1. Personalized Netflix Recommendations")
+    # 1. Personalized Playlist
+    st.header("📋 1. Personalized Netflix Recommendations")
 
     # Preprocessing
     valid_ratings = [
         "TV-MA", "PG-13", "R", "PG", "G", "NC-17", "TV-Y", "TV-Y7",
         "TV-G", "TV-PG", "TV-14", "NR", "UR"
     ]
-
-    # Replace invalid ratings with "Not Rated"
     df["rating"] = df["rating"].apply(lambda x: x if x in valid_ratings else "NR")
     df["release_year"] = df["release_year"].fillna(df["release_year"].median())
     df["rating"] = df["rating"].fillna("Not Rated")
     df["genres"] = df["listed_in"].str.split(", ")
 
     # User Inputs
-    st.markdown("### Input Your Preferences")
+    st.markdown("### 🎯 Input Your Preferences")
     available_genres = sorted(set(df["genres"].explode().dropna()))
-    preferred_genres = st.multiselect("What genres do you like?", available_genres, default=[])
+    preferred_genres = st.multiselect("🎥 Select Genres:", available_genres, default=[])
     available_ratings = sorted(df["rating"].unique())
-    preferred_ratings = st.multiselect("What ratings do you prefer?", available_ratings, default=[])
+    preferred_ratings = st.multiselect("⭐ Select Ratings:", available_ratings, default=[])
 
     # Filter Data by Genre
     filtered_df = df[df["genres"].apply(lambda genres: any(genre in genres for genre in preferred_genres))]
-
-    # Filter Data by Rating
     filtered_df = filtered_df[filtered_df["rating"].isin(preferred_ratings)]
-
-    # Sort by Relevance (e.g., release year)
     filtered_df = filtered_df.sort_values(by=["release_year"], ascending=False)
 
-    # Select Top 20 Recommendations
+    # Recommendations
     recommendations = filtered_df.head(20)
-
-    # Output Recommendations
-    st.markdown("### Top 20 Recommendations for You")
+    st.markdown("### 📃 Top 20 Recommendations for You")
     if recommendations.empty:
-        st.write("No recommendations found for the selected criteria. Please adjust your preferences.")
+        st.warning("No recommendations found for the selected criteria. Please adjust your preferences.")
     else:
         st.write(recommendations[["title", "type", "release_year", "rating", "listed_in"]])
 
-# 2. Country-Wise Popular Content
-    st.title("2. Country-Wise Popular Content Recommendations")
+    st.markdown("---")  # Section divider
 
-    df["country"] = df["country"].fillna("Unknown")
+    # 2. Country-Wise Popular Content
+    st.header("🌎 2. Country-Wise Popular Content Recommendations")
 
-    # Split multiple countries into individual entries
+    df["country"] = df["country"].fillna("Unknown")
     all_countries = df["country"].str.split(", ").explode().unique()
     all_countries = sorted([country.strip() for country in all_countries if country != "Unknown"])
 
-    # User Input: Select Country
-    selected_country = st.selectbox("Select a Country:", all_countries)
-
-    # Filter data by selected country
-    country_filtered_df = df[df["country"].str.contains(selected_country, case=False, na=False)]
-
-    # Filter only Movies and TV Shows
-    popular_content = country_filtered_df[country_filtered_df["rating"] != "Not Rated"]
-
-    # Sort by most popular ratings
-    rating_priority = {
-        "TV-MA": 1, "R": 2, "PG-13": 3, "TV-14": 4, "PG": 5, 
-        "G": 6, "NC-17": 7, "TV-Y7": 8, "TV-Y": 9, "TV-G": 10, 
-        "TV-PG": 11, "Not Rated": 12
-    }
-    popular_content["rating_priority"] = popular_content["rating"].map(rating_priority)
-    sorted_content = popular_content.sort_values(by=["rating_priority", "release_year"], ascending=[True, False])
-
-    # Select Top 20 Recommendations
-    recommendations = sorted_content.head(20)
-
-    # Display Recommendations
-    st.markdown(f"### Top 20 Popular Titles from {selected_country}")
-    if recommendations.empty:
-        st.write("No popular content found for the selected country. Please choose a different country.")
+    selected_country = st.selectbox("🌐 Select a Country:", all_countries)
+
+    if selected_country:
+        country_filtered_df = df[df["country"].str.contains(selected_country, case=False, na=False)]
+        popular_content = country_filtered_df[country_filtered_df["rating"] != "Not Rated"]
+        rating_priority = {
+            "TV-MA": 1, "R": 2, "PG-13": 3, "TV-14": 4, "PG": 5, 
+            "G": 6, "NC-17": 7, "TV-Y7": 8, "TV-Y": 9, "TV-G": 10, 
+            "TV-PG": 11, "Not Rated": 12
+        }
+        popular_content["rating_priority"] = popular_content["rating"].map(rating_priority)
+        sorted_content = popular_content.sort_values(by=["rating_priority", "release_year"], ascending=[True, False])
+        recommendations = sorted_content.head(20)
+        st.markdown(f"### 🎬 Top 20 Popular Titles from **{selected_country}**")
+        if recommendations.empty:
+            st.warning("No popular content found for the selected country. Please choose a different country.")
+        else:
+            st.write(recommendations[["title", "type", "release_year", "rating", "country"]])
     else:
-        st.write(recommendations[["title", "type", "release_year", "rating", "country"]])
+        st.info("Please select a country to view popular content recommendations.")
 
+    st.markdown("---")  # Section divider
 
+    # 3. Duration Graph
+    st.header("📊 3. Duration Trends Analysis")
 
-# 3.Duration Graph
-    # Preprocess the dataset
-    df["release_year"] = df["release_year"].fillna(df["release_year"].median())
-    df["listed_in"] = df["listed_in"].fillna("Unknown")
     df["duration"] = df["duration"].fillna("Unknown")
 
-    # Function to preprocess duration
     def preprocess_duration(row):
-        if "min" in row:  # For movies
+        if "min" in row:
             return int(row.replace(" min", ""))
-        elif "Season" in row:  # For TV Shows
+        elif "Season" in row:
             return int(row.replace(" Season", "").replace("s", ""))
-        else:
-            return None
+        return None
 
     df["duration_numeric"] = df["duration"].apply(preprocess_duration)
 
-    # Streamlit UI
-    st.title("3. Duration Trends Analysis")
-
-    # Inputs
-    content_type = st.radio("Select Content Type:", options=["Movie", "TV Show"])
+    content_type = st.radio("📽️ Select Content Type:", options=["Movie", "TV Show"])
     available_genres = sorted(set(df["listed_in"].str.split(", ").explode().dropna()))
-    selected_genres = st.multiselect("Select Genres:", available_genres, default=[])
+    selected_genres = st.multiselect("🎞️ Select Genres:", available_genres, default=[])
 
-    # Filter dataset
     filtered_df = df[
         (df["type"] == content_type) &
         (df["listed_in"].apply(lambda genres: any(genre in genres for genre in selected_genres))) &
         (df["duration_numeric"].notnull())
     ]
 
-    # Group and analyze
     duration_analysis = (
         filtered_df.groupby("release_year")["duration_numeric"]
         .mean()
@@ -135,8 +130,7 @@ def model_application_page():
         .rename(columns={"duration_numeric": "Average Duration"})
     )
 
-    # Plot the results
-    st.markdown("### Duration Trends Over Time")
+    st.markdown("### 📈 Duration Trends Over Time")
     if not duration_analysis.empty:
         fig, ax = plt.subplots(figsize=(10, 6))
         ax.plot(
@@ -144,12 +138,13 @@ def model_application_page():
             duration_analysis["Average Duration"],
             marker="o",
             linestyle="-",
+            color="#FF6347",
         )
-        ax.set_title(f"Average Duration of {content_type}s in Selected Genres Over Time")
+        ax.set_title(f"Average Duration of {content_type}s Over Time", fontsize=16)
         ax.set_xlabel("Release Year")
-        ax.set_ylabel("Average Duration (Minutes for Movies / Seasons for TV Shows)")
+        ax.set_ylabel("Average Duration (Minutes/Seasons)")
         st.pyplot(fig)
     else:
-        st.write("No data available for the selected criteria.")
+        st.warning("No data available for the selected criteria.")
 
-model_application_page()
\ No newline at end of file
+model_application_page()
diff --git a/pages/model_training.py b/pages/model_training.py
index d1b10b203a9d2543f06dc10e5217d5b4e14ab6fb..aa15e98fbe98ee70bfeea438101a1ed08caa8c1f 100644
--- a/pages/model_training.py
+++ b/pages/model_training.py
@@ -2,7 +2,6 @@ import streamlit as st
 import matplotlib.pyplot as plt
 import seaborn as sns
 import pandas as pd
-import joblib
 import numpy as np
 
 from sklearn.linear_model import LogisticRegression
@@ -11,40 +10,41 @@ from sklearn.preprocessing import StandardScaler, LabelEncoder
 from sklearn.metrics import classification_report, confusion_matrix
 
 
-
+# Function to load data
 def load_data(file_path):
-    return pd.read_csv("netflix_titles.csv")
+    """Load the Netflix dataset from the provided file path."""
+    return pd.read_csv(file_path)
 
 
+# Function to preprocess data
 def preprocess_data(df: pd.DataFrame):
-    """Preprocess the dataset: drop missing values, encode genre, scale features, etc."""
-    # a. Remove rows with missing values in key columns
+    """Preprocess the dataset: handle missing values, clean columns, encode features, and scale numeric data."""
+    # Handle missing values in key columns
     df = df.dropna(subset=['duration', 'director', 'release_year', 'listed_in'])
 
-    # b. Clean 'duration' column (convert "xxx min" -> int, or 0 if unrecognized)
+    # Clean 'duration' column
     def clean_duration(value):
         if "min" in str(value):
             return int(value.replace(" min", "").strip())
-        return 0  # For "Season" or other unrecognized patterns
+        return 0  # Handle 'Season' or unrecognized patterns
 
     df['duration'] = df['duration'].apply(clean_duration)
-    df = df[df['duration'] >= 0]
+    df = df[df['duration'] >= 0]  # Remove invalid durations
 
-    # c. Extract primary genre; group rare genres into 'Other'
+    # Extract and encode primary genre
     df['primary_genre'] = df['listed_in'].str.split(',').str[0].str.strip()
     genre_counts = df['primary_genre'].value_counts()
     common_genres = genre_counts[genre_counts >= 5].index
     df['primary_genre'] = df['primary_genre'].apply(lambda g: g if g in common_genres else 'Other')
 
-    # d. Encode genre with LabelEncoder
+    # Encode genres and directors
     genre_encoder = LabelEncoder()
     df['genre_encoded'] = genre_encoder.fit_transform(df['primary_genre'])
 
-    # e. Encode director by counting occurrences
     director_counts = df['director'].value_counts()
     df['director_count'] = df['director'].map(director_counts)
 
-    # f. Scale 'duration' and 'release_year' separately
+    # Scale numeric features
     duration_scaler = StandardScaler()
     release_year_scaler = StandardScaler()
     df['duration_scaled'] = duration_scaler.fit_transform(df[['duration']])
@@ -53,23 +53,26 @@ def preprocess_data(df: pd.DataFrame):
     return df, genre_encoder, duration_scaler, release_year_scaler, director_counts
 
 
+# Function to train Logistic Regression
 def train_logistic_regression(X_train, y_train):
-    """Train a Logistic Regression classifier."""
-    # Increase max_iter to ensure convergence on multi-class data
+    """Train a Logistic Regression model on the training dataset."""
     model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, random_state=42)
     model.fit(X_train, y_train)
     return model
 
 
+# Evaluate the model
 def evaluate_model(y_test, y_pred, encoder):
-    """Display classification metrics and confusion matrix."""
-    # Classification Report
+    """Display classification metrics and a confusion matrix."""
     report = classification_report(y_test, y_pred, target_names=encoder.classes_)
+    st.markdown("### Classification Report")
+    st.text(report)
 
 
+# Plot correlation heatmap
 def plot_correlation(df: pd.DataFrame):
-    """Optional: Plot a correlation heatmap for numeric features + encoded genre."""
-    st.markdown("## Correlation Heatmap")
+    """Visualize correlation between numeric features and the target."""
+    st.markdown("### Correlation Heatmap")
     corr_cols = ['duration', 'release_year', 'director_count', 'genre_encoded']
     corr = df[corr_cols].corr()
     fig, ax = plt.subplots(figsize=(8, 6))
@@ -77,115 +80,160 @@ def plot_correlation(df: pd.DataFrame):
     st.pyplot(fig)
 
 
+# Visualize feature coefficients
 def plot_feature_coefficients(model, features):
-    """Plot feature coefficients from the Logistic Regression model."""
-    # For multinomial logistic regression, model.coef_ shape is (n_classes, n_features).
-    # Below, we show absolute coefficients for class 0 as an example, or an average.
-    st.markdown("## Feature Coefficients")
-    # Taking the average of absolute coefficients across all classes for interpretability
-    # (Alternatively, you can visualize them per-class.)
+    """Visualize feature coefficients from the Logistic Regression model."""
+    st.markdown("### Feature Coefficients")
     mean_abs_coef = np.mean(np.abs(model.coef_), axis=0)
     coef_series = pd.Series(mean_abs_coef, index=features).sort_values()
 
     fig, ax = plt.subplots(figsize=(8, 6))
     coef_series.plot(kind='barh', color='skyblue', ax=ax)
-    ax.set_title("Average Absolute Feature Coefficients (Multinomial Logistic Regression)")
+    ax.set_title("Average Absolute Feature Coefficients")
     st.pyplot(fig)
 
 
 def visualize_custom_prediction(df: pd.DataFrame, encoder, custom_genre: str):
-    """Highlight the custom predicted genre in the distribution (red vs. blue)."""
+    """Highlight the custom predicted genre in a bar chart with a legend."""
+    # Generate color palette: red for custom genre, blue for others
     palette = ['red' if genre == custom_genre else 'blue' for genre in encoder.classes_]
+    
+    # Create the plot
     fig, ax = plt.subplots(figsize=(8, 4))
-    sns.countplot(y='predicted_genre', data=df, order=encoder.classes_, palette=palette, ax=ax)
-    ax.set_title("Predicted Genre Distribution (Custom Prediction in Red)")
+    sns.countplot(
+        y='predicted_genre', 
+        data=df, 
+        order=encoder.classes_, 
+        palette=palette, 
+        ax=ax
+    )
+    ax.set_title("Predicted Genre Distribution (Custom Prediction Highlighted)")
+    ax.set_xlabel("Count")
+    ax.set_ylabel("Genre")
+    
+    # Add legend
+    handles = [
+        plt.Line2D([0], [0], marker='o', color='red', label="Custom Prediction", markersize=10, linestyle=''),
+        plt.Line2D([0], [0], marker='o', color='blue', label="Data", markersize=10, linestyle='')
+    ]
+    ax.legend(handles=handles, loc='upper right', title="Legend")
+    
+    # Display the plot in Streamlit
     st.pyplot(fig)
 
 
+
+# Main application
 def main():
-    st.title("Netflix Genre Prediction using Logistic Regression")
+    st.title("🎬 Netflix Genre Prediction using Logistic Regression")
+    st.markdown(
+        """
+        <style>
+        .header {
+            background-color: #34495e;
+            color: white;
+            padding: 15px;
+            border-radius: 10px;
+            text-align: center;
+            font-size: 20px;
+        }
+        </style>
+        <div class="header">
+            Analyze, predict, and visualize genres in the Netflix dataset using Logistic Regression!
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
 
-    # 1. Load Data
+    # Load data
     file_path = "netflix_titles.csv"
     df = load_data(file_path)
     if st.checkbox("Show Raw Data"):
-        st.write(df.head())
+        st.dataframe(df.head())
 
-    # 2. Preprocess Data
+    # Preprocess data
     df, genre_encoder, duration_scaler, release_year_scaler, director_counts = preprocess_data(df)
 
-
-    # 4. Feature & Target
-    st.markdown("## Feature and Target Selection")
+    # Feature and Target Selection
+    st.markdown("## 📊 Feature and Target Selection")
     features = ["duration_scaled", "director_count", "release_year_scaled"]
     target_col = "genre_encoded"
     X, y = df[features], df[target_col]
 
-    st.write("### Features (Processed):")
-    st.write(X.head())
-    st.write("### Target (Genre Encoded):")
-    st.write(y.head())
+    st.write("### Features:")
+    st.dataframe(X.head())
+    st.write("### Target (Encoded Genres):")
+    st.dataframe(y.head())
 
-    # 5. Train/Test Split
-    st.markdown("## Splitting Data")
+    # Train-test split
+    st.markdown("## ✂️ Splitting Data")
     test_size = st.slider("Test Set Size (%)", min_value=10, max_value=50, value=20, step=5)
-    from sklearn.model_selection import train_test_split
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=test_size/100, random_state=42
-    )
-    st.write(f"Training set size: {len(X_train)}")
-    st.write(f"Testing set size: {len(X_test)}")
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size / 100, random_state=42)
+
+    st.write(f"Training Set Size: {len(X_train)}")
+    st.write(f"Testing Set Size: {len(X_test)}")
 
-    # 6. Train Logistic Regression
-    st.markdown("## Training the Model with Logistic Regression")
+    # Train Logistic Regression
+    st.markdown("## 🤖 Training Logistic Regression")
     model = train_logistic_regression(X_train, y_train)
     st.success("Logistic Regression Model Trained Successfully!")
 
-    # 7. Evaluate Model
-    st.markdown("## Model Evaluation")
-    y_pred = model.predict(X_test)
-    evaluate_model(y_test, y_pred, genre_encoder)
 
-  # 3. (Optional) Correlation Plot
+    # Correlation heatmap
     plot_correlation(df)
 
-    # 8. Predict on Entire Dataset
+    # Predict on entire dataset
     df["predicted_genre"] = genre_encoder.inverse_transform(model.predict(X))
-    st.markdown("## Sample Predictions on Entire Dataset")
-    st.write(df[[*features, target_col, "predicted_genre"]].head())
+    st.markdown("### Sample Predictions:")
+    st.dataframe(df[["duration", "release_year", "director_count", "primary_genre", "predicted_genre"]].head())
+
+    # Custom prediction
+    st.markdown("## 🎥 Make a Custom Prediction")
+
+    # Add a default option for directors
+    unique_directors = ["Select a Director"] + sorted(df["director"].unique())
+    selected_director = st.selectbox("Select Director", unique_directors)
+
+    # Initialize placeholders for inputs
+    custom_genre = None
+
+    if selected_director != "Select a Director":
+        # Proceed if a valid director is selected
+        director_count = director_counts.get(selected_director, 0)
+        
+        # Collect other inputs
+        duration_choice = st.slider(
+            "Duration (minutes)", 
+            min_value=int(df["duration"].min()), 
+            max_value=int(df["duration"].max())
+        )
+        release_year_choice = st.slider(
+            "Release Year", 
+            min_value=int(df["release_year"].min()), 
+            max_value=int(df["release_year"].max())
+        )
+
+        # Scale inputs and make a prediction
+        try:
+            duration_scaled_input = duration_scaler.transform([[duration_choice]])[0][0]
+            release_year_scaled_input = release_year_scaler.transform([[release_year_choice]])[0][0]
+            custom_input = [[duration_scaled_input, director_count, release_year_scaled_input]]
+            custom_pred_label = model.predict(custom_input)[0]
+            custom_genre = genre_encoder.inverse_transform([custom_pred_label])[0]
+        except Exception as e:
+            st.error(f"Error during prediction: {e}")
+    else:
+        # Prompt the user to select a valid director
+        st.warning("Please select a Director to make a Prediction.")
+
+    # Display the predicted genre if available
+    if custom_genre:
+        st.markdown(f"### Predicted Genre: 🎬 **{custom_genre}**")
+    else:
+        st.markdown("### Predicted Genre: Please provide all inputs to get a prediction.")
 
-    # 9. Custom Prediction
-    st.markdown("## Make a Custom Prediction")
-    st.markdown("### Input Movie Details")
-    unique_directors = df["director"].unique()
-    selected_director = st.selectbox("Select a Director", sorted(unique_directors))
-    director_count = director_counts.get(selected_director, 0)
-
-    dur_min, dur_max = int(df["duration"].min()), int(df["duration"].max())
-    duration_choice = st.slider("Duration (minutes)", dur_min, dur_max, 90)
-
-    year_min, year_max = int(df["release_year"].min()), int(df["release_year"].max())
-    release_year_choice = st.slider("Release Year", year_min, year_max, 2020)
-
-    # Scale inputs
-    duration_scaled_input = duration_scaler.transform([[duration_choice]])[0][0]
-    release_year_scaled_input = release_year_scaler.transform([[release_year_choice]])[0][0]
-
-    custom_input = [[duration_scaled_input, director_count, release_year_scaled_input]]
-    custom_pred_label = model.predict(custom_input)[0]
-    custom_genre = genre_encoder.inverse_transform([custom_pred_label])[0]
-    st.write(f"### Predicted Genre: {custom_genre}")
-
-    # 10. Visualization
     visualize_custom_prediction(df, genre_encoder, custom_genre)
 
-    # 13. Session State
-    st.session_state["trained_model"] = model
-    st.session_state["genre_encoder"] = genre_encoder
-    st.session_state["duration_scaler"] = duration_scaler
-    st.session_state["release_year_scaler"] = release_year_scaler
-    st.session_state["director_counts"] = director_counts
-
-    st.success("Logistic Regression training and prediction completed successfully!")
 
+# Run the application
 main()
diff --git a/pages/preprocessing.py b/pages/preprocessing.py
index 1362d20f4ca0c57110d4a047c8f8c82c63964832..fe9bad0f5d15cd9747881397bfc610308eb34795 100644
--- a/pages/preprocessing.py
+++ b/pages/preprocessing.py
@@ -4,75 +4,104 @@ from sklearn.preprocessing import MinMaxScaler, StandardScaler
 import numpy as np
 
 def preprocessing_page():
-    st.title("Netflix Dataset Preprocessing")
-
-    # 1. Load Netflix dataset
+    # Title with a styled header
+    st.title("📊 Netflix Dataset Preprocessing")
+    st.markdown(
+        """
+        <style>
+        .header {
+            background-color: #f39c12;
+            color: white;
+            padding: 15px;
+            border-radius: 10px;
+            text-align: center;
+            font-size: 20px;
+            margin-bottom: 20px;
+        }
+        </style>
+        <div class="header">
+            Clean, preprocess, and prepare your Netflix dataset for advanced analytics!
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+
+    # Load Netflix dataset
     file_path = "netflix_titles.csv"  # Ensure this path is correct
     try:
+        # Load the dataset and store it in session state
         df = pd.read_csv(file_path)
-        st.session_state.df = df  # Store original dataframe in session state
+        st.session_state.df = df
+        st.success("🎉 Dataset loaded successfully!")
     except FileNotFoundError:
-        st.error(f"File not found at path: {file_path}. Please check the file path and try again.")
+        # Error handling if the file is not found
+        st.error(f"❌ File not found at path: {file_path}. Please check the file path and try again.")
         return
 
-    # 2. Display original dataset
-    st.markdown("## Original Dataset")
-    st.write(df.head())
+    st.markdown("---")  # Divider
 
-    # 3. Summary Statistics Before Cleaning
-    st.markdown("## Summary Statistics (Before Cleaning)")
-    st.write(df.describe(include='all'))
+    # 1. Display original dataset
+    st.header("🔍 Original Dataset")
+    st.write(df.head())  # Display the first few rows
 
-    # 4. Missing Values Overview
-    st.markdown("## Missing Values Overview")
-    missing_values = df.isnull().sum()
-    st.write("### Missing Values per Column")
+    # 2. Summary Statistics Before Cleaning
+    st.header("📈 Summary Statistics (Before Cleaning)")
+    st.write(df.describe(include='all'))  # Display all columns' statistics
+
+    # 3. Missing Values Overview
+    st.header("🛠️ Missing Values Overview")
+    missing_values = df.isnull().sum()  # Calculate missing values per column
+    st.write("### Missing Values per Column:")
     st.write(missing_values)
 
-    # 5. Removing All Rows with Any Missing Values
-    st.markdown("## Removing All Missing Values")
+    # 4. Removing All Rows with Any Missing Values
+    st.header("🧹 Removing All Missing Values")
     num_rows_before = df.shape[0]
     num_missing = missing_values.sum()
     st.write(f"**Total Rows Before Cleaning:** {num_rows_before}")
     st.write(f"**Total Missing Values:** {num_missing}")
 
     if num_missing > 0:
+        # Drop rows with missing values
         df = df.dropna()
         num_rows_after = df.shape[0]
         rows_removed = num_rows_before - num_rows_after
-        st.write(f"**Total Rows After Cleaning:** {num_rows_after}")
-        st.write(f"**Total Rows Removed:** {rows_removed}")
+        st.success(f"✅ Missing values removed! Total rows after cleaning: {num_rows_after}")
+        st.info(f"🧾 Rows Removed: {rows_removed}")
     else:
-        st.write("No missing values found. No rows removed.")
+        # Notify the user if no missing values are found
+        st.success("✅ No missing values found. Dataset is already clean.")
 
-    # Update session state with cleaned dataframe
-    st.session_state.df = df
+    st.markdown("---")  # Divider
 
-    # 6. Summary Statistics After Cleaning
-    st.markdown("## Summary Statistics (After Cleaning)")
+    # 5. Summary Statistics After Cleaning
+    st.header("📊 Summary Statistics (After Cleaning)")
     st.write(df.describe(include='all'))
 
-    # 7. Scaling: Min-Max and Standardization
-    st.markdown("## Scaling: Min-Max Scaling and Standardization")
-    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns.tolist()
+    # 6. Scaling: Min-Max and Standardization
+    st.header("⚖️ Scaling: Min-Max Scaling and Standardization")
+    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns.tolist()  # Identify numeric columns
 
     if numeric_cols:
-        # a. Min-Max Scaling
-        st.markdown("### Min-Max Scaled Data")
+        # Min-Max Scaling
+        st.subheader("🔹 Min-Max Scaled Data")
         minmax_scaler = MinMaxScaler()
         df_minmax_scaled = pd.DataFrame(minmax_scaler.fit_transform(df[numeric_cols]), columns=numeric_cols)
         st.write(df_minmax_scaled.head())
 
-        # b. Standardization
-        st.markdown("### Standardized Data")
+        # Standardization
+        st.subheader("🔹 Standardized Data")
         standard_scaler = StandardScaler()
         df_standardized = pd.DataFrame(standard_scaler.fit_transform(df[numeric_cols]), columns=numeric_cols)
         st.write(df_standardized.head())
     else:
-        st.warning("No numeric columns available for scaling in the dataset.")
+        # Notify the user if no numeric columns are available for scaling
+        st.warning("⚠️ No numeric columns available for scaling in the dataset.")
 
-    # 8. Outlier Detection and Removal
-    st.markdown("## Outlier Detection and Removal")
+    st.markdown("---")  # Divider
+
+    # 7. Outlier Detection and Removal
+    st.header("🚨 Outlier Detection and Removal")
     st.write("Identifying outliers using the Interquartile Range (IQR) method.")
 
     # Function to remove outliers using IQR
@@ -92,14 +121,17 @@ def preprocessing_page():
         rows_removed = initial_count - final_count
         st.write(f"**Outliers Removed from '{col}':** {rows_removed} rows")
 
-    # 9. Summary Statistics After Outlier Removal
-    st.markdown("## Dataset After Removing Outliers")
-    st.write(df.describe(include='all'))
+    st.markdown("---")  # Divider
+
+    # 8. Dataset After Removing Outliers
+    st.header("📜 Dataset After Removing Outliers")
+    st.write(df.describe(include='all'))  # Display updated statistics
 
     # Update session state with the final cleaned dataframe
     st.session_state.df = df
 
-    # 10. Mark Preprocessing as Complete
-    st.success("Preprocessing Complete. Your dataset is now clean and ready for the next steps!")
+    # 9. Mark Preprocessing as Complete
+    st.success("🎉 Preprocessing Complete! Your dataset is clean and ready for further analysis.")
 
+# Run the preprocessing page function
 preprocessing_page()
diff --git a/rasa/actions/actions.py b/rasa/actions/actions.py
index 5267e20b6e1f180a6f5bdc3b1ddbee956c4c8696..a5e987e012c8e4f844890b00d6a9942c4e6aeddc 100644
--- a/rasa/actions/actions.py
+++ b/rasa/actions/actions.py
@@ -1,73 +1,20 @@
+# actions.py
+
 from typing import Any, Text, Dict, List
 from rasa_sdk import Action, Tracker
 from rasa_sdk.executor import CollectingDispatcher
 
-class ActionAskMovieGenre(Action):
-    def name(self) -> Text:
-        return "action_ask_movie_genre"
-
-    def run(self, dispatcher: CollectingDispatcher, tracker: Tracker, domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
-        dispatcher.utter_message(
-            text="To predict a movie's genre, I need the director's name, movie duration, and launch date. Let's get started!")
-        return []
-
-class ActionProvideDirectorName(Action):
-    def name(self) -> Text:
-        return "action_provide_director_name"
-
-    def run(self, dispatcher: CollectingDispatcher, tracker: Tracker, domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
-        dispatcher.utter_message(text="Thank you for providing the director's name. Please provide the movie's duration next.")
-        return []
-
-class ActionProvideMovieDuration(Action):
-    def name(self) -> Text:
-        return "action_provide_movie_duration"
-
-    def run(self, dispatcher: CollectingDispatcher, tracker: Tracker, domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
-        dispatcher.utter_message(text="Got the duration! Now, can you share the launch date of the movie?")
-        return []
-
-class ActionProvideLaunchDate(Action):
-    def name(self) -> Text:
-        return "action_provide_launch_date"
-
-    def run(self, dispatcher: CollectingDispatcher,
-            tracker: Tracker,
-            domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
-        dispatcher.utter_message(
-            text="Great! I now have all the details. Let me predict the genre for you.")
-        return []
-
-class ActionPredictMovieGenre(Action):
-    def name(self) -> Text:
-        return "action_predict_movie_genre"
-
-    def run(self, dispatcher: CollectingDispatcher,
-            tracker: Tracker,
-            domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
-        # Replace this with your actual prediction logic
-        dispatcher.utter_message(
-            text="Based on the details provided, the predicted genre of the movie is Drama. Let me know if there's anything else you need!")
-        return []
-
-class ActionExplainPredictionProcess(Action):
-    def name(self) -> Text:
-        return "action_explain_prediction_process"
-
-    def run(self, dispatcher: CollectingDispatcher,
-            tracker: Tracker,
-            domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
-        dispatcher.utter_message(
-            text="I predict movie genres by analyzing the director's name, duration, and launch date. Each parameter gives valuable insights into the genre.")
-        return []
+# Currently, no custom actions are needed because all bot replies are 'utter_' responses.
+# You can add custom actions here if your use case expands.
 
-class ActionHelp(Action):
+class ActionDummy(Action):
+    """A placeholder action, not used in stories."""
+    
     def name(self) -> Text:
-        return "action_help"
+        return "action_dummy"
 
     def run(self, dispatcher: CollectingDispatcher,
             tracker: Tracker,
             domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
-        dispatcher.utter_message(
-            text="I'm here to assist you with predicting movie genres. You can ask me how it works or start by sharing the movie details.")
+        dispatcher.utter_message(text="This is a dummy action for demonstration.")
         return []
diff --git a/rasa/data/nlu.yml b/rasa/data/nlu.yml
index e83deb307d7af0194421f3d07501a192b4de90e1..a11441b267797891fd0f092a80ec4b10ede12f00 100644
--- a/rasa/data/nlu.yml
+++ b/rasa/data/nlu.yml
@@ -3,173 +3,105 @@ version: "3.1"
 nlu:
 - intent: greet
   examples: |
-    - hello how are you?
-    - hey
-    - hello
     - hi
+    - hello
+    - hey
     - hello there
-    - good morning
-    - good evening
-    - moin
-    - hey there
-    - let's go
-    - hey dude
-    - good morning
-    - good evening
-    - good afternoon
-    - hallo
+
+- intent: thank_you
+  examples: |
+    - thanks
+    - thank you
+    - cool, thanks
+    - great, thanks
+    - Nice! I think I’ll start with To All the Boys I’ve Loved Before. Thank you!
 
 - intent: goodbye
   examples: |
-    - cu
-    - ciao ciao
+    - bye
     - goodbye
     - see you later
-    - good night
-    - bye
     - have a nice day
-    - see you around
-    - bye bye
-    - see you later
-    - thank you good
 
-- intent: affirm
+###############################################################################
+# Movie Enthusiast Dialogs
+###############################################################################
+- intent: ask_romance_recommendation
   examples: |
-    - yes
-    - y
-    - indeed
-    - of course
-    - that sounds good
-    - correct
-    - ok
-    - yep
-    - yeah
-    - hmm
-    - okay
+    - I’m in the mood for a romantic movie. Can you suggest some good ones?
+    - Recommend some romantic movies
+    - Show me some romance movies
+    - I want a romantic film
 
-- intent: deny
+- intent: ask_romance_summary
   examples: |
-    - no
-    - n
-    - never
-    - I don't think so
-    - don't like that
-    - no way
-    - not really
-    - I don't want to do this.
-    - nope
+    - Could you give me a brief summary of each?
+    - Tell me what these movies are about
+    - Can you explain what happens in each?
 
-- intent: mood_great
+- intent: ask_family_friendly_action
   examples: |
-    - perfect
-    - great
-    - amazing
-    - feeling like a king
-    - wonderful
-    - I am feeling very good
-    - I am great
-    - I am amazing
-    - I am going to save the world
-    - super stoked
-    - extremely good
-    - so so perfect
-    - so good
-    - so perfect
-    - happy
-    - I am good
-    - feels great
-    - fine
-    - fine, thank you
-    - not bad
+    - I’m looking for a family-friendly action movie for a weekend watch party. Any suggestions?
+    - Suggest some action movies suitable for kids
+    - Could you recommend a family-friendly action film?
 
-- intent: mood_unhappy
+- intent: ask_most_popular_action
   examples: |
-    - my day was horrible
-    - I am sad
-    - I don't feel very well
-    - I am disappointed
-    - super sad
-    - I'm so sad
-    - sad
-    - very sad
-    - unhappy
-    - not good
-    - not very good
-    - extremely sad
-    - so saad
-    - so sad
-    - not so good
+    - Which one is the most popular?
+    - Which is the best among them?
+    - Which one do you recommend the most?
 
-- intent: bot_challenge
+###############################################################################
+# Content Strategist Dialogs
+###############################################################################
+- intent: ask_comedy_viewer_demographics
   examples: |
-    - are you a bot?
-    - are you a human?
-    - am I talking to a bot?
-    - am I talking to a human?
-    - What are you?
-    - What can you do?
-    - who are you?
+    - I’d like to know about viewer demographics for comedy movies
+    - Any insights on comedy audience demographics?
+    - Who watches comedy films the most?
 
-- intent: ask_action_movies
-  example: |
-   - Can you show me some action movies?
-   - Show me action movies?
-   - Action movies?
+- intent: ask_drama_viewer_demographics
+  examples: |
+    - How about drama? Do they appeal to the same demographic?
+    - What about drama viewers?
+    - Is drama watched by the same age group?
+    - How about drama? Do they also appeal to the same demographic?
 
-- intent: ask_movie_genre
+- intent: ask_genre_trends
   examples: |
-    - Can you predict the genre of a movie?
-    - What is the genre of this movie?
-    - I want to know the genre of a film.
-    - Tell me the genre of this movie.
-    - Could you find the genre of a movie for me?
-    - Predict the movie genre.
-    - Identify the genre of the movie.
-    - Find out the genre for me.
-    - Can you help me know the movie genre?
+    - I want to compare viewership trends for different genres across seasons
+    - Can you help with seasonal genre trends?
+    - How do genres perform in different seasons?
 
-- intent: provide_director_name
+- intent: ask_specific_month_trends
   examples: |
-    - The director is Christopher Nolan.
-    - It's directed by Steven Spielberg.
-    - Directed by Quentin Tarantino.
-    - The director of the movie is James Cameron.
-    - Ridley Scott is the director.
-    - The film was directed by Martin Scorsese.
-    - This movie's director is Stanley Kubrick.
+    - Any specific months that stand out?
+    - Which months have higher viewership for certain genres?
+    - Tell me about monthly spikes
 
-- intent: provide_movie_duration
+###############################################################################
+# Data Science Student Dialogs
+###############################################################################
+- intent: ask_model_evaluation_metrics
   examples: |
-    - The movie is 120 minutes long.
-    - It's a two-hour film.
-    - Duration of the movie is 90 minutes.
-    - The length of the movie is 150 minutes.
-    - It's about 3 hours long.
-    - The runtime is 2 hours 15 minutes.
+    - Can you tell me about how you evaluate your genre prediction model?
+    - How is the model evaluated?
+    - Which metrics do you use for evaluation?
 
-- intent: provide_launch_date
+- intent: ask_model_accuracy
   examples: |
-    - The movie was released on 2020-12-25.
-    - It came out on 1999-07-16.
-    - Release date is 2021-05-07.
-    - It premiered in 2010.
-    - It was released in August 2012.
-    - The release year is 1984.
+    - Can you share the latest accuracy figure?
+    - What's the accuracy of your model?
+    - How accurate is the model right now?
 
-- intent: ask_for_help
+- intent: ask_feature_engineering
   examples: |
-    - I need some help.
-    - Can you assist me?
-    - Help me out.
-    - I need your assistance.
-    - Please guide me.
-    - Can you explain this?
+    - I’m curious about how you engineer new features from the raw data
+    - How do you create features for the dataset?
+    - What feature engineering steps do you take?
 
-- intent: ask_about_prediction_process
+- intent: ask_one_hot_encoding
   examples: |
-    - How do you predict the genre of a movie?
-    - Can you explain the prediction process?
-    - What details do you need to predict a movie's genre?
-    - How does this work?
-    - What is the method you use to find the genre?
-    - Tell me how the genre prediction works.
+    - Do you also use one-hot encoding for any categorical data?
+    - Are categorical features one-hot encoded?
+    - Do you apply one-hot encoding?
diff --git a/rasa/data/rules.yml b/rasa/data/rules.yml
index 60faa6d4463489042f3a77f75837e3eb0f66f6f9..70ba7c195eede59d1adeba9bc8c9e36c3e80e0d4 100644
--- a/rasa/data/rules.yml
+++ b/rasa/data/rules.yml
@@ -2,12 +2,7 @@ version: "3.1"
 
 rules:
 
-- rule: Say goodbye anytime the user says goodbye
+- rule: Respond to bot challenge
   steps:
-  - intent: goodbye
-  - action: utter_goodbye
-
-- rule: Say 'I am a bot' anytime the user challenges
-  steps:
-  - intent: bot_challenge
-  - action: utter_iamabot
\ No newline at end of file
+    - intent: bot_challenge
+    - action: utter_iamabot
diff --git a/rasa/data/stories.yml b/rasa/data/stories.yml
index 8be846b4532901db54c450a1513e5bb4605f51cc..29a118444c2772c3de89e2dd7e250ff707117d5b 100644
--- a/rasa/data/stories.yml
+++ b/rasa/data/stories.yml
@@ -1,132 +1,86 @@
 version: "3.1"
 
 stories:
-
-- story: happy path
-  steps:
-  - intent: greet
-  - action: utter_greet
-  - intent: mood_great
-  - action: utter_happy
-
-- story: sad path 1
-  steps:
-  - intent: greet
-  - action: utter_greet
-  - intent: mood_unhappy
-  - action: utter_cheer_up
-  - action: utter_did_that_help
-  - intent: affirm
-  - action: utter_happy
-
-- story: sad path 2
-  steps:
-  - intent: greet
-  - action: utter_greet
-  - intent: mood_unhappy
-  - action: utter_cheer_up
-  - action: utter_did_that_help
-  - intent: deny
-  - action: utter_goodbye
-
-- story: asking for movie genre
-  steps:
-    - intent: greet
-    - action: utter_greet
-    - intent: ask_movie_genre
-    - action: action_ask_movie_genre
-    - intent: provide_director_name
-    - action: action_provide_director_name
-    - intent: provide_movie_duration
-    - action: action_provide_movie_duration
-    - intent: provide_launch_date
-    - action: action_provide_launch_date
-    - action: action_predict_movie_genre
-    - action: utter_did_that_help
-    - intent: affirm
-    - action: utter_happy
-    - intent: goodbye
-    - action: utter_goodbye
-
-- story: Exploring Action Movies by Genre
-  steps:
-    - intent: greet
-    - action: utter_greet
-    - intent: ask_action_movies
-    - action: utter_action_movies
-    
-
-- story: asking about prediction process
+###############################################################################
+# 1. Movie Enthusiast - Dialog 1: Seeking Romance Recommendations
+###############################################################################
+- story: Movie Enthusiast Dialog 1
   steps:
     - intent: greet
     - action: utter_greet
-    - intent: ask_about_prediction_process
-    - action: action_explain_prediction_process
-    # - action: utter_asking_everything_ok
-    - intent: affirm
-    - action: utter_happy
-    - intent: goodbye
-    - action: utter_goodbye
+    - intent: ask_romance_recommendation
+    - action: utter_romance_recommendations
+    - intent: ask_romance_summary
+    - action: utter_romance_summaries
+    - intent: thank_you
+    - action: utter_thank_you
 
-- story: providing director name only
+###############################################################################
+# 2. Movie Enthusiast - Dialog 2: Family-Friendly Action Flicks
+###############################################################################
+- story: Movie Enthusiast Dialog 2
   steps:
     - intent: greet
     - action: utter_greet
-    - intent: provide_director_name
-    - action: action_provide_director_name
-    # - action: utter_asking_everything_ok
-    - intent: affirm
-    - action: utter_happy
-    - intent: goodbye
-    - action: utter_goodbye
+    - intent: ask_family_friendly_action
+    - action: utter_family_friendly_action
+    - intent: ask_most_popular_action
+    - action: utter_most_popular_action
+    - intent: thank_you
+    - action: utter_thank_you
 
-- story: providing movie duration only
+###############################################################################
+# 3. Content Strategist - Dialog 1: Delving Into Viewer Demographics
+###############################################################################
+- story: Content Strategist Dialog 1
   steps:
     - intent: greet
     - action: utter_greet
-    - intent: provide_movie_duration
-    - action: action_provide_movie_duration
-    # - action: utter_asking_everything_ok
-    # - intent: deny
-    # - action: utter_deny
-    - intent: goodbye
-    - action: utter_goodbye
+    - intent: ask_comedy_viewer_demographics
+    - action: utter_comedy_demographics
+    - intent: ask_drama_viewer_demographics
+    - action: utter_drama_demographics
+    - intent: thank_you
+    - action: utter_thank_you
 
-- story: providing launch date only
+###############################################################################
+# 4. Content Strategist - Dialog 2: Comparing Seasonal Trends
+###############################################################################
+- story: Content Strategist Dialog 2
   steps:
     - intent: greet
     - action: utter_greet
-    - intent: provide_launch_date
-    - action: action_provide_launch_date
-    # - action: utter_asking_everything_ok
-    - intent: affirm
-    - action: utter_happy
-    - intent: goodbye
-    - action: utter_goodbye
+    - intent: ask_genre_trends
+    - action: utter_genre_trends
+    - intent: ask_specific_month_trends
+    - action: utter_monthly_trends
+    - intent: thank_you
+    - action: utter_thank_you
 
-- story: incomplete movie details
+###############################################################################
+# 5. Data Science Student - Dialog 1: Model Evaluation Metrics
+###############################################################################
+- story: Data Science Student Dialog 1
   steps:
     - intent: greet
     - action: utter_greet
-    - intent: provide_director_name
-    - action: action_provide_director_name
-    - intent: provide_movie_duration
-    - action: action_provide_movie_duration
-    # - action: utter_asking_everything_ok
-    # - intent: deny
-    # - action: utter_deny
-    - intent: goodbye
-    - action: utter_goodbye
+    - intent: ask_model_evaluation_metrics
+    - action: utter_model_evaluation
+    - intent: ask_model_accuracy
+    - action: utter_model_accuracy
+    - intent: thank_you
+    - action: utter_thank_you
 
-- story: requesting help
+###############################################################################
+# 6. Data Science Student - Dialog 2: Feature Engineering Approaches
+###############################################################################
+- story: Data Science Student Dialog 2
   steps:
     - intent: greet
     - action: utter_greet
-    - intent: ask_for_help
-    - action: action_help
-    # - action: utter_asking_everything_ok
-    - intent: affirm
-    - action: utter_happy
-    - intent: goodbye
-    - action: utter_goodbye
-
+    - intent: ask_feature_engineering
+    - action: utter_feature_engineering
+    - intent: ask_one_hot_encoding
+    - action: utter_one_hot_encoding
+    - intent: thank_you
+    - action: utter_thank_you
diff --git a/rasa/domain.yml b/rasa/domain.yml
index b252d148a08285e69e6b432d2a7fbf8ad70dede0..21c0466f916e5d97e62b994be55f05947514a1d2 100644
--- a/rasa/domain.yml
+++ b/rasa/domain.yml
@@ -2,71 +2,161 @@ version: "3.1"
 
 intents:
   - greet
+  - thank_you
   - goodbye
-  - affirm
-  - deny
-  - mood_great
-  - mood_unhappy
-  - bot_challenge
-  - ask_movie_genre
-  - provide_director_name
-  - provide_movie_duration
-  - provide_launch_date
-  - ask_for_help
-  - ask_about_prediction_process
+  - ask_romance_recommendation
+  - ask_romance_summary
+  - ask_family_friendly_action
+  - ask_most_popular_action
+  - ask_comedy_viewer_demographics
+  - ask_drama_viewer_demographics
+  - ask_genre_trends
+  - ask_specific_month_trends
+  - ask_model_evaluation_metrics
+  - ask_model_accuracy
+  - ask_feature_engineering
+  - ask_one_hot_encoding
 
 responses:
+  ###########################################################################
+  # Generic
+  ###########################################################################
+  utter_iamabot:
+    - text: "I am a bot, powered by Rasa."
 
   utter_greet:
-  - text: "Hello and welcome! I am here to assist you with predicting the genre of any movie you have in mind. By providing some details like the director's name, movie duration, and launch date, I can make an educated guess about its genre. How can I help you get started today?"
+    - text: "Hello and welcome! How can I help you today?"
 
   utter_goodbye:
-  - text: "Goodbye! Thank you for using this service. If you ever have more movies to discuss, I'll be here to help. Have an amazing day!"
-
-  utter_cheer_up:
-  - text: "It’s okay if you’re feeling a bit down. Let’s talk about movies and dive into the wonderful world of cinema! It’s always a great escape."
-
-  utter_did_that_help:
-  - text: "I hope the information I provided was helpful. Is there anything else you'd like to ask or clarify?"
-
-  utter_happy:
-  - text: "That’s wonderful to hear! If you need any more assistance or have other questions, just let me know."
-
-  utter_ask_movie_genre:
-  - text: "I specialize in predicting the genre of movies! To get started, I need some details from you. Could you share the director's name, the movie's duration, and the release date? Once I have this information, I’ll provide a prediction."
-
-  utter_provide_director_name:
-  - text: "To predict the genre, I first need the name of the director. This helps me analyze the movie better. Could you please share the director's name?"
-
-  utter_provide_movie_duration:
-  - text: "The length of the movie plays a key role in identifying its genre. Could you let me know the movie's duration in minutes?"
-
-  utter_provide_launch_date:
-  - text: "The release date of a movie often hints at its genre trends. Could you please provide the launch date in the format YYYY-MM-DD?"
-
-  utter_ask_about_prediction_process:
-  - text: "Wondering how I predict genres? I use three key details: the director’s name, the movie’s duration, and its release date. These parameters help me make an informed prediction. Let me know if you’d like to give it a try!"
-
-  utter_help:
-  - text: "I’m here to make the process simple and fun! Share the director's name, movie duration, and launch date with me, and I’ll predict the genre for you. If you have questions about how this works, feel free to ask!"
-
-  utter_iamabot:
-  - text: "I am a smart assistant created to predict movie genres based on the information you provide. Let’s dive into the exciting world of cinema together!"
-
-  utter_action_movies:
-  - text: "Sure! Here are some top action movies:
-      Extraction
-      The Old Guard
-      6 Underground"
+    - text: "Goodbye! Have a great day."
+  
+  utter_default:
+    - text: "I'm sorry, I didn't get that. Can you please rephrase?"
+  
+  utter_thank_you:
+    - text: "You're welcome!"
+
+  ###########################################################################
+  # Movie Enthusiast - Dialog 1: Seeking Romance Recommendations
+  ###########################################################################
+  utter_romance_recommendations:
+    - text: >
+        Here are a few popular romantic movies currently trending on Netflix:
+        1. To All the Boys I’ve Loved Before
+        2. Set It Up
+        3. The Kissing Booth
+
+  utter_romance_summaries:
+    - text: >
+        1. To All the Boys I’ve Loved Before: A high school romance sparked by 
+        mailed love letters.
+
+        2. Set It Up: Two overworked assistants conspiring to matchmake their demanding bosses.
+
+        3. The Kissing Booth: A teen's first love complicated by friendship dynamics.
+
+  utter_romance_ack:
+    - text: "You’re welcome! Enjoy the movie. Let me know if you need more recommendations."
+
+  ###########################################################################
+  # Movie Enthusiast - Dialog 2: Family-Friendly Action Flicks
+  ###########################################################################
+  utter_family_friendly_action:
+    - text: >
+        Here are some action movies suitable for most ages:
+        1. Spy Kids
+        2. The Adventures of Sharkboy and Lavagirl
+        3. We Can Be Heroes
+
+  utter_most_popular_action:
+    - text: >
+        Spy Kids is especially popular among families due to its fun spy gadget themes.
+
+  utter_family_action_ack:
+    - text: "Glad I could help! Feel free to ask for more suggestions anytime."
+
+  ###########################################################################
+  # Content Strategist - Dialog 1: Delving Into Viewer Demographics
+  ###########################################################################
+  utter_comedy_demographics:
+    - text: >
+        Comedy films have the highest viewership among the 18-34 age group, 
+        with peak streaming times on weekends between 6 PM and 9 PM.
+
+  utter_drama_demographics:
+    - text: >
+        Dramas typically appeal to a broader 25-45 age range, with spikes on weekday evenings.
+
+  utter_demographics_ack:
+    - text: "Glad I could help! Let me know if you need more strategic insights."
+
+  ###########################################################################
+  # Content Strategist - Dialog 2: Comparing Seasonal Trends
+  ###########################################################################
+  utter_genre_trends:
+    - text: >
+        During winter, drama and romance do well (holiday seasons).
+        In summer, action and comedy spike due to family gatherings and free time.
+
+  utter_monthly_trends:
+    - text: >
+        December and January show high drama viewership, while June and July see a surge 
+        in action/comedy streaming.
+
+  utter_trends_ack:
+    - text: "That’s very useful. Let me know if you need more data insights."
+
+  ###########################################################################
+  # Data Science Student - Dialog 1: Model Evaluation Metrics
+  ###########################################################################
+  utter_model_evaluation:
+    - text: >
+        We use several metrics: Accuracy to see correct predictions, F1-Score for 
+        precision/recall balance, and ROC-AUC to measure classification performance.
+
+  utter_model_accuracy:
+    - text: "Our current genre prediction model is about 85% accurate."
+
+  utter_model_ack:
+    - text: "You’re welcome! Let me know if you need any more model insights."
+
+  ###########################################################################
+  # Data Science Student - Dialog 2: Feature Engineering Approaches
+  ###########################################################################
+  utter_feature_engineering:
+    - text: >
+        We create features like Director Frequency, Seasonal Release (grouping 
+        launch dates by season), and Title Keyword Extraction using NLP.
+
+  utter_one_hot_encoding:
+    - text: >
+        Yes, we apply one-hot encoding for categorical features such as country or language.
+
+  utter_feature_ack:
+    - text: "Perfect. Let me know if you need anything else regarding data preprocessing."
 
 actions:
-  - action_ask_movie_genre
-  - action_provide_director_name
-  - action_provide_movie_duration
-  - action_provide_launch_date
-  - action_help
-  - action_predict_movie_genre
-  - action_explain_prediction_process
+  # We will not use any custom actions for these sample dialogs,
+  # all responses are handled via 'utter_' responses
+  - utter_greet
+  - utter_romance_recommendations
+  - utter_romance_summaries
+  - utter_romance_ack
+  - utter_family_friendly_action
+  - utter_most_popular_action
+  - utter_family_action_ack
+  - utter_comedy_demographics
+  - utter_drama_demographics
+  - utter_demographics_ack
+  - utter_genre_trends
+  - utter_monthly_trends
+  - utter_trends_ack
+  - utter_model_evaluation
+  - utter_model_accuracy
+  - utter_model_ack
+  - utter_feature_engineering
+  - utter_one_hot_encoding
+  - utter_feature_ack
 
 session_config:
   session_expiration_time: 60
diff --git a/requirements.txt b/requirements.txt
index a108ab2fc5e85f3153d5474870edf2c7b4015a5c..411f5c3c584a6bef8c324311028f75b59f26f2b7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,4 @@
 streamlit
 streamlit-chat
 scikit-learn
-seaborn
-flask
-virtualenv
 rasa
\ No newline at end of file