diff --git a/.DS_Store b/.DS_Store
index 7af4a461d14c25e88125e7b19c76e2734d9a2105..dfc5dbcfa01d087fb54b1a339b8f597409dba961 100644
Binary files a/.DS_Store and b/.DS_Store differ
diff --git a/README.md b/README.md
index 96acca8f9ef2ad6e52220584ef2187be35b803c3..0ac28333c50b16b0317b95744484b7a30e98a72b 100644
--- a/README.md
+++ b/README.md
@@ -2,9 +2,9 @@ Khan, Asif, 22300224
 
 Netflix Content Analysis
 
-MyGit Repositoty : https://mygit.th-deg.de/assistance_systems/gitlab-profile/-/tree/main
+https://mygit.th-deg.de/assistance_systems
 
-MyGit Wiki : https://mygit.th-deg.de/assistance_systems/gitlab-profile/-/wikis/home
+https://mygit.th-deg.de/assistance_systems/gitlab-profile/-/wikis/home
 
 # Project Description
 
@@ -36,12 +36,7 @@ Cast and Directors
 
 After downloading the project files in a project folder, do the following steps:
 
-Prerequisite: 
-To install all the required libraries for this project, run the following command in your terminal:
-
-pip install -r requirements.txt
-
-The rasa model is trained! (with `rasa train`)
+Prerequisite: The rasa model is trained! (with `rasa train`)
 
 1. `rasa run actions`
 2. `rasa run`
@@ -67,18 +62,7 @@ The webpage shows the main pages in the navigation bar on the left. Typically th
 
 9. Data is loaded in the main.py
 
-10. see pages : 
-    about_me.py
-    add_and_apply_model.py
-    algorithm_selection.py
-    augmentation.py
-    chatbot.py
-    data_metrics.py
-    feature_engineering.py
-    model_application.py
-    model_training.py
-    preprocessing.py
-    visualization.py
+10. see pages/01_Data_*.py files
 
 11 - 13. See data chapter in the Wiki
 
@@ -86,7 +70,7 @@ The webpage shows the main pages in the navigation bar on the left. Typically th
 
 15. Input widgets are mainly in 05_Model_training.py
 
-16. Scikit-Learn Logistic regression and Random Forest are used
+16. Scikit-Learn Linear regression and Lasso are used
 
 17. See 'right-fit' chapter in Wiki
 
@@ -98,7 +82,9 @@ The webpage shows the main pages in the navigation bar on the left. Typically th
 
 21. rasa implementation with files:
 
-    domain.yml, data/nlu.yml, data/stories.yml, actions/actions.py
+domain.yml, data/nlu.yml, data/stories.yml, actions/actions.py
+
+22. tbd
 
 # Work done
 
diff --git a/imgs/.DS_Store b/imgs/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..adb52307ec01241120a1b48183eb548237113e94
Binary files /dev/null and b/imgs/.DS_Store differ
diff --git a/imgs/logo.png b/imgs/logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..5e0641e22134e44c2bcdd1877e096ae928c290e2
Binary files /dev/null and b/imgs/logo.png differ
diff --git a/imgs/logo2.png b/imgs/logo2.png
new file mode 100644
index 0000000000000000000000000000000000000000..73eeab3ab1690ec4b0d05f19038ed0c6ac5d7abf
Binary files /dev/null and b/imgs/logo2.png differ
diff --git a/main.py b/main.py
index 35e765c2d4f132370e25157e2ee2f5c3ae3d969d..a2dc8566b5677ec8c23ab210fc2f30ca94c34aa9 100644
--- a/main.py
+++ b/main.py
@@ -2,89 +2,99 @@ from ast import main
 from flask import Flask, json, logging
 from matplotlib.pylab import f
 import streamlit as st
-
 import logging
-import streamlit as st
 
-# Reset logging configuration
+# Reset logging configuration to avoid conflicts from previous handlers
 for handler in logging.root.handlers[:]:
     logging.root.removeHandler(handler)
 
-# logging.basicConfig(
-#     level=logging.INFO
-# )
-
-
-# Define each page as an instance of st.Page
+# Flask app initialization (can be used for server-side integration)
 app = Flask(__name__)
 
+# ------------------------
+# Define Streamlit Pages
+# ------------------------
 
+# Define the "About Me" page
 about_page = st.Page(
-    page = "pages/about_me.py",
-    title = "About Me",
-    default = True,
+    page="pages/about_me.py",  # Path to the Python file for this page
+    title="About Me",  # Title displayed on the navigation
+    default=True,  # Set as the default landing page
 )
 
+# Define the "Preprocessing" page
 preprocessing_page = st.Page(
     page="pages/preprocessing.py",
-    title="Preprocessing",
-    icon = ":material/bar_chart:",
+    title="Preprocessing",  # Title for the preprocessing page
+    icon=":material/bar_chart:",  # Icon displayed next to the page title
 )
 
+# Define the "Data Augmentation" page
 augmentation_page = st.Page(
     page="pages/augmentation.py",
-    title="Data Augmentation",
-    icon = ":material/bar_chart:",
+    title="Data Augmentation",  # Title for the augmentation page
+    icon=":material/bar_chart:",
 )
 
+# Define the "Feature Engineering" page
 feature_engineering_page = st.Page(
     page="pages/feature_engineering.py",
-    title="Feature Engineering",
-    icon = ":material/bar_chart:",
+    title="Feature Engineering",  # Title for the feature engineering page
+    icon=":material/bar_chart:",
 )
 
+# Define the "Data Metrics" page
 data_metrics_page = st.Page(
     page="pages/data_metrics.py",
-    title="Data Metrics",
-    icon = ":material/bar_chart:",
+    title="Data Metrics",  # Title for the data metrics page
+    icon=":material/bar_chart:",
 )
 
+# Define the "Algorithm Selection" page
 algorithm_selection_page = st.Page(
     page="pages/algorithm_selection.py",
-    title="Algorithm Selection",
-    icon = ":material/bar_chart:",
-
+    title="Algorithm Selection",  # Title for selecting ML algorithms
+    icon=":material/bar_chart:",
 )
 
+# Define the "Model Training" page
 model_training_page = st.Page(
     page="pages/model_training.py",
-    title="Model Training",
-    icon = ":material/bar_chart:",
+    title="Model Training",  # Title for the model training page
+    icon=":material/bar_chart:",
 )
 
+# Define the "Add and Apply Model" page
 add_and_apply_model_page = st.Page(
     page="pages/add_and_apply_model.py",
-    title="Add Augmented Data and Apply Selected Model",
-    icon = ":material/bar_chart:",
+    title="Model Training with Augmented Data",  # Title for combining and applying models
+    icon=":material/bar_chart:",
 )
 
+# Define the "Model Application" page
 model_application_page = st.Page(
     page="pages/model_application.py",
-    title="Model Application",
-    icon = ":material/bar_chart:",
+    title="Model Application",  # Title for using the trained model
+    icon=":material/bar_chart:",
 )
 
+# Define the "Chat Bot" page
 chatbot_page = st.Page(
     page="pages/chatbot.py",
-    title="Chat Bot",
-    icon = ":material/bar_chart:",
+    title="Chat Bot",  # Title for interacting with the chatbot
+    icon=":material/bar_chart:",
 )
 
-# Navigation configuration
+# ------------------------
+# Navigation Configuration
+# ------------------------
+
+# Organize pages into navigation groups
 pg = st.navigation(
     {
-        "Info": [about_page],
-        "Projects": [preprocessing_page,
+        "Info": [about_page],  # "Info" category with the About Me page
+        "Projects": [          # "Projects" category with ML pipeline pages
+            preprocessing_page,
             augmentation_page,
             feature_engineering_page,
             data_metrics_page,
@@ -92,38 +102,38 @@ pg = st.navigation(
             model_training_page,
             add_and_apply_model_page,
             model_application_page,
-            chatbot_page],
+            chatbot_page,
+        ],
     }
 )
 
-st.sidebar.text("ALL ABOUT NETFLIX")
+# Add a simple title to the sidebar
+st.sidebar.text("ALL ABOUT ")
+st.sidebar.image("imgs/logo.png", width=400)  # Set desired width in pixels
+st.sidebar.text("source : www.vecteezy.com/")
 
+# Start running the navigation
 pg.run()
 
+# ------------------------
+# Logging Configuration
+# ------------------------
 
+logger = logging.getLogger(__name__)  # Create a logger instance
 
-
-logger = logging.getLogger(__name__)
-
-# Rasa endpoint
+# Define Rasa server endpoint (used if chatbot functionality is required)
 URL = "http://localhost:5005/webhooks/rest/webhook"
 
+# ------------------------
+# Main Function
+# ------------------------
+
 def main():
-    st.html(
-        """
-    <style>
-    [data-testid="stSidebarContent"] {
-        color: white;
-        background-color: #dadada; // THD light grey
-    }
-    </style>
     """
-    )
-
-
-    st.sidebar.success("Select the menu points from top to bottom in order to use the ML pipeline.")
-    # st.sidebar.success("Go through the pages one by one to make a prediction or use the chatbot.")
+    Main function to display the sidebar instructions and guide users
+    to navigate the app in the intended order.
+    """
 
+# Entry point for the application
 if __name__ == "__main__":
     main()
-    
\ No newline at end of file
diff --git a/pages/about_me.py b/pages/about_me.py
index b619cd221b667ee61e9cbe19dc4805fa239fc020..9ff8c57e7f2ec0585b0d849091eb90aa665cd055 100644
--- a/pages/about_me.py
+++ b/pages/about_me.py
@@ -1,52 +1,80 @@
 import streamlit as st
 
 # Display the title and subtitle
-st.title("ðŸŽ¥ Welcome to Netflix Dataset Analysis & Modeling Workflow")
-st.subheader("ðŸ“Š Unlock insights and build predictive models using Netflix's extensive dataset")
+st.title("ðŸŽ¥ Welcome to Netflix Content Analysis")
+st.subheader("Unlock insights, explore trends, and dive deep into the world of Netflix content!")
 
-# Add a banner or header section
+# Add a stylish banner or header section
 st.markdown(
     """
     <style>
     .banner {
-        background-color: #FF6347;
+        background: linear-gradient(to right, #ff7e5f, #feb47b);
         color: white;
         padding: 20px;
-        border-radius: 5px;
+        border-radius: 10px;
         text-align: center;
-        font-size: 18px;
+        font-size: 20px;
+        font-weight: bold;
+        box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.2);
     }
     </style>
     <div class="banner">
-        An interactive Streamlit app to preprocess, analyze, augment, and model Netflix dataset seamlessly!
+        ðŸ“Š Your one-stop solution to analyze, preprocess, and model the Netflix dataset seamlessly!
     </div>
     """,
     unsafe_allow_html=True,
 )
 
-# Add a short introduction
-st.markdown("## ðŸ“œ What does this app do?")
+# Add an introduction with emojis for better engagement
+st.markdown("## ðŸ“œ What this Offers?")
 st.markdown(
     """
-    - **Clean & Preprocess Data**: Handle missing values and standardize the dataset.
-    - **Augment Data**: Add realistic fake data (20-25%) to enhance your analysis.
-    - **Feature Engineering**: Extract meaningful features to improve predictions.
-    - **Metrics & Visualizations**: Dive deep into statistics and correlations.
-    - **Model Training**: Train machine learning models to predict insights.
-    - **Custom Predictions**: Explore what-if scenarios and make predictions in real-time.
+    - ðŸ› ï¸ **Data Preprocessing**: Clean and handle missing values for a refined dataset.
+    - ðŸ“ˆ **Augment Data**: Add realistic synthetic data to expand analysis.
+    - ðŸ” **Feature Engineering**: Extract impactful features for improved analysis.
+    - ðŸ“Š **Visualizations & Metrics**: Generate insightful graphs and statistics.
+    - ðŸ¤– **Model Training**: Train ML models for meaningful predictions.
+    - ðŸ§® **Custom Predictions**: Experiment with real-time scenarios and insights.
     """
 )
 
-# Add a section to navigate to other pages
-st.markdown("## ðŸš€ Get Started")
-col1, col2, col3 = st.columns(3)
+# Add an interactive "Get Started" section with buttons or links to navigate
+st.markdown("---")
+
+st.markdown(
+    """
+    <style>
+    .start {
+        text-align: center;
+        font-size: 25px;
+
+    }
+    </style>
+    <div class="start">
+        ðŸš€ Get Started
+    </div>
+    """,
+    unsafe_allow_html=True,
+)
 
+st.logo("imgs/logo2.png")
 
-# Footer
+# Footer with styling
 st.markdown("---")
 st.markdown(
     """
-    ### ðŸ‘©â€ðŸ’» About the Developer
-    Developed with â¤ï¸ by Asif Khan(https://www.linkedin.com).
-    """
-)
\ No newline at end of file
+    <style>
+    .footer {
+        text-align: center;
+        font-size: 16px;
+        color: gray;
+        margin-top: 20px;
+    }
+    </style>
+    <div class="footer">
+        ðŸ‘¨â€ðŸ’» Developed with ðŸ§  and ðŸ’» by <b>Asif Khan</b> | ðŸŒŸ Empowering Netflix Data Analysis! ðŸŒŸ
+    </div>
+    """,
+    unsafe_allow_html=True,
+)
diff --git a/pages/add_and_apply_model.py b/pages/add_and_apply_model.py
index 0253776306c548ebf8d8ec80c2fe1915a93dcd8d..3cd45bc5e5544d5c1625d91ac783345983d598b2 100644
--- a/pages/add_and_apply_model.py
+++ b/pages/add_and_apply_model.py
@@ -1,20 +1,23 @@
 import streamlit as st
 import pandas as pd
 import matplotlib.pyplot as plt
-from sklearn.linear_model import LinearRegression
-from sklearn.model_selection import train_test_split
 import seaborn as sns
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
 
-
+# Preprocess the dataset
 def preprocess_data(df):
-    """Preprocess the combined DataFrame."""
-    # Handle missing values
+    """Preprocess the combined DataFrame for analysis and modeling."""
+    # Handle missing values in key columns
     df["release_year"] = df["release_year"].fillna(df["release_year"].median())
     df["rating"] = df["rating"].fillna("Not Rated")
     df["date_added"] = pd.to_datetime(df["date_added"], errors="coerce").fillna(pd.Timestamp("2020-01-01"))
     df["duration"] = df["duration"].fillna("0 min")
+    df["director"] = df["director"].fillna("Unknown")
 
-    # Preprocess duration column
+    # Clean the 'duration' column to extract numeric values
     def preprocess_duration(row):
         if "min" in row:
             return int(row.replace(" min", ""))
@@ -24,34 +27,54 @@ def preprocess_data(df):
 
     df["duration_numeric"] = df["duration"].apply(preprocess_duration)
 
-    # Drop rows with invalid duration
+    # Drop rows where duration could not be parsed
     df = df[df["duration_numeric"].notnull()]
-    return df
 
+    # Extract primary genre from 'listed_in' and encode it
+    df["primary_genre"] = df["listed_in"].str.split(',').str[0].str.strip()
+    df["primary_genre"] = df["primary_genre"].fillna("Other")
+    return df
 
+# Main page for adding and applying the model
 def add_and_apply_model_page():
-    st.title("Netflix Dataset: Add and Apply Model")
+    st.title("ðŸŽ¥ Netflix Dataset: Predict Genre with Logistic Regression")
 
-    # Load Combined Dataset
-    st.header("1. Load Combined Dataset")
+    # Step 1: Load the Combined Dataset
+    st.header("ðŸ“‚ 1. Load Combined Dataset")
     try:
         combined_df = pd.read_csv("netflix_combined_dataset.csv")  # Replace with your combined dataset path
-        st.success("Combined dataset loaded successfully!")
+        st.success("âœ… Combined dataset loaded successfully!")
         st.dataframe(combined_df.head())
     except Exception as e:
-        st.error(f"Error loading combined dataset: {e}")
+        st.error(f"âŒ Error loading combined dataset: {e}")
         return
 
-    # Preprocess Combined Data
-    st.header("2. Preprocess Combined Dataset")
+    # Step 2: Preprocess the Combined Dataset
+    st.header("ðŸ”„ 2. Preprocess Combined Dataset")
     combined_df = preprocess_data(combined_df)
-    st.success("Preprocessing completed!")
+
+    # Use only the first 1000 rows for the prediction
+    combined_df = combined_df.head(1000)
+    st.success("âœ… Preprocessing completed!")
     st.dataframe(combined_df.head())
 
-    # Features and Target Selection
-    st.header("3. Feature and Target Selection")
-    features = ["duration_numeric"]  # Use duration as the feature
-    target = "release_year"  # Target variable
+    # Step 3: Encode Director and Genre
+    st.header("ðŸ”¢ 3. Encode Features and Target")
+    director_encoder = LabelEncoder()
+    genre_encoder = LabelEncoder()
+
+    combined_df["director_encoded"] = director_encoder.fit_transform(combined_df["director"])
+    combined_df["genre_encoded"] = genre_encoder.fit_transform(combined_df["primary_genre"])
+
+    # Filter out genres with fewer than 2 samples
+    genre_counts = combined_df["genre_encoded"].value_counts()
+    valid_genres = genre_counts[genre_counts >= 2].index
+    combined_df = combined_df[combined_df["genre_encoded"].isin(valid_genres)]
+
+    # Step 4: Feature and Target Selection
+    st.header("ðŸŽ¯ 4. Feature and Target Selection")
+    features = ["director_encoded", "duration_numeric", "release_year"]  # Features for prediction
+    target = "genre_encoded"  # Target variable
     X = combined_df[features]
     y = combined_df[target]
     st.write("### Features (X)")
@@ -59,67 +82,74 @@ def add_and_apply_model_page():
     st.write("### Target (y)")
     st.write(y.head())
 
-    # Train-Test Split
-    st.header("4. Train-Test Split")
+    # Step 5: Train-Test Split
+    st.header("ðŸ”€ 5. Train-Test Split")
     test_size = st.slider("Select Test Size Percentage", min_value=10, max_value=50, value=20, step=5)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size / 100, random_state=42)
-    st.success(f"Data split: {100 - test_size}% training and {test_size}% testing.")
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size / 100, random_state=42, stratify=y)
+    st.success(f"âœ… Data split: {100 - test_size}% training and {test_size}% testing.")
 
-    # Train Linear Regression Model
-    st.header("5. Train Linear Regression Model")
-    model = LinearRegression()
+    # Step 6: Train Logistic Regression Model
+    st.header("ðŸ¤– 6. Train Logistic Regression Model")
+    model = LogisticRegression(max_iter=1000, random_state=42)
     model.fit(X_train, y_train)
-    st.success("Linear Regression Model trained successfully!")
+    st.success("âœ… Logistic Regression Model trained successfully!")
 
-    # Model Predictions
-    st.header("6. Model Predictions")
+    # Step 7: Model Predictions
+    st.header("ðŸ” 7. Model Predictions")
     y_pred = model.predict(X_test)
-    comparison_df = pd.DataFrame({"Actual": y_test, "Predicted": y_pred})
+    comparison_df = pd.DataFrame({
+        "Actual": genre_encoder.inverse_transform(y_test),
+        "Predicted": genre_encoder.inverse_transform(y_pred)
+    })
     st.dataframe(comparison_df.head())
 
-    # Evaluation Metrics
-    st.header("7. Model Evaluation")
-    mse = ((y_test - y_pred) ** 2).mean()
-    st.write(f"Mean Squared Error (MSE): {mse:.2f}")
-
-    # Visualization: Actual vs Predicted
-    st.header("8. Visualization: Actual vs Predicted")
-    fig, ax = plt.subplots()
-    sns.scatterplot(x=y_test, y=y_pred, alpha=0.6, ax=ax, label="Predictions")
-    ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--", lw=2, label="Perfect Fit")
-    ax.set_xlim(1990, y_test.max())  # Start x-axis from 1960
-    ax.set_ylim(1990, y_test.max())  # Start y-axis from 1960
-    ax.set_xlabel("Actual Release Year")
-    ax.set_ylabel("Predicted Release Year")
-    ax.set_title("Actual vs. Predicted Release Years")
-    ax.legend()
+    # Step 8: Visualization of Predictions
+    st.header("ðŸ“ˆ 8. Visualization: Actual vs Predicted")
+    fig, ax = plt.subplots(figsize=(10, 6))
+    sns.countplot(x="Actual", data=comparison_df, color="blue", alpha=0.6, label="Actual")
+    sns.countplot(x="Predicted", data=comparison_df, color="orange", alpha=0.6, label="Predicted")
+    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
+    plt.title("Actual vs Predicted Genre Counts")
+    plt.xlabel("Genre")
+    plt.ylabel("Count")
+    plt.legend()
     st.pyplot(fig)
 
-    # Custom Input for Prediction
-    st.header("9. Custom Prediction")
+    # Step 9: Custom Input for Prediction
+    st.header("ðŸ› ï¸ 9. Custom Prediction")
+
+    # Add a placeholder for director selection
+    directors = ["Select a Director"] + sorted(combined_df["director"].unique())
+    selected_director = st.selectbox("Select a Director", directors)
+
+    # Add sliders for duration and release year with no default values
     duration_choice = st.slider(
         "Enter Duration (in minutes)",
         min_value=int(combined_df["duration_numeric"].min()),
         max_value=int(combined_df["duration_numeric"].max()),
-        value=90,
     )
-    custom_input = [[duration_choice]]
-    custom_prediction = model.predict(custom_input)[0]
-    st.write(f"Predicted Release Year: {custom_prediction:.2f}")
-
-    # Highlight Custom Input on Plot
-    st.markdown("### Custom Prediction on Plot")
-    fig, ax = plt.subplots()
-    sns.scatterplot(x=y_test, y=y_pred, alpha=0.6, ax=ax, label="Predictions")
-    ax.scatter(duration_choice, custom_prediction, color="red", label="Custom Prediction", zorder=5)
-    ax.legend()
-    ax.set_xlabel("Actual")
-    ax.set_ylabel("Predicted")
-    st.pyplot(fig)
+    release_year_choice = st.slider(
+        "Enter Release Year",
+        min_value=int(combined_df["release_year"].min()),
+        max_value=int(combined_df["release_year"].max()),
+    )
+
+    # Default message for predicted genre
+    if selected_director == "Select a Director":
+        st.header("**Predicted Genre:** â“ ðŸ¤· â“")
+    else:
+        # Encode the input
+        director_encoded = director_encoder.transform([selected_director])[0]
+        custom_input = [[director_encoded, duration_choice, release_year_choice]]
+
+        # Predict the genre
+        custom_prediction = genre_encoder.inverse_transform(model.predict(custom_input))[0]
+        st.header(f"**Predicted Genre:**  âœ¨ {custom_prediction} âœ¨")
+
 
-    # Save Model and Data to Session State
+    # Step 10: Save Model and Data to Session State
     st.session_state.model = model
     st.session_state.combined_data = combined_df
-    st.success("Model applied successfully!")
+    st.success("âœ… Model applied successfully!")
 
 add_and_apply_model_page()
diff --git a/pages/algorithm_selection.py b/pages/algorithm_selection.py
index 35a05bd81e34982d7700c81a265d149a7ca755a8..fc56217a97139e6363a06fd96f979bf3e6afb21c 100644
--- a/pages/algorithm_selection.py
+++ b/pages/algorithm_selection.py
@@ -9,45 +9,41 @@ from sklearn.ensemble import RandomForestClassifier
 from sklearn.metrics import classification_report, confusion_matrix
 
 def algorithm_selection_page():
-    st.title("Algorithm Selection for Netflix Genre Prediction (Classification)")
+    st.title("ðŸ§  Algorithm Selection for Netflix Genre Prediction")
 
     # ---------------------------
     # 1. Load and Preprocess Data
     # ---------------------------
+    st.markdown("## ðŸ”„ Data Loading and Preprocessing")
     file_path = "netflix_titles.csv"  # Update with the correct file path
     df = pd.read_csv(file_path)
 
-    st.markdown("## Preprocessing Dataset")
-    
-    # Handle missing values
-    df["listed_in"] = df["listed_in"].fillna("Unknown")  # Genre field
+    # Handle missing values in critical columns
+    df["listed_in"] = df["listed_in"].fillna("Unknown")  # Fill missing genres
     df["release_year"] = df["release_year"].fillna(df["release_year"].median())
-    df["duration"] = df["duration"].fillna("0 min")
+    df["duration"] = df["duration"].fillna("0 min")  # Default to "0 min" for missing durations
 
-    # Clean and preprocess 'duration'
+    # Clean the 'duration' column (convert "90 min" -> 90)
     def clean_duration(value):
         if "min" in str(value):
             return int(value.replace(" min", "").strip())
-        elif "Season" in str(value):
-            return 0  # For TV shows
-        return 0  # Unrecognized values
+        elif "Season" in str(value):  # Handle TV Shows
+            return 0
+        return 0  # Default for unrecognized values
 
     df["duration"] = df["duration"].apply(clean_duration)
 
-    # Extract primary genre from 'listed_in'
+    # Extract primary genre from 'listed_in' and group rare genres as "Other"
     df['primary_genre'] = df['listed_in'].str.split(',').str[0].str.strip()
     df['primary_genre'] = df['primary_genre'].fillna("Other")
-
-    # Group rare genres into 'Other' (optional)
-    genre_counts = df['primary_genre'].value_counts()
-    common_genres = genre_counts[genre_counts >= 5].index
+    common_genres = df['primary_genre'].value_counts()[df['primary_genre'].value_counts() >= 5].index
     df['primary_genre'] = df['primary_genre'].apply(lambda x: x if x in common_genres else 'Other')
 
-    # Encode the target (genre)
+    # Encode the target variable (genre)
     genre_encoder = LabelEncoder()
     df["genre_encoded"] = genre_encoder.fit_transform(df["primary_genre"])
 
-    # Scale numeric features
+    # Scale numeric features: duration and release_year
     scaler = StandardScaler()
     df["duration_scaled"] = scaler.fit_transform(df[["duration"]])
     df["release_year_scaled"] = scaler.fit_transform(df[["release_year"]])
@@ -55,24 +51,27 @@ def algorithm_selection_page():
     # ---------------------------
     # 2. Feature and Target Setup
     # ---------------------------
-    st.markdown("## Feature Selection")
-    
+    st.markdown("## ðŸ› ï¸ Feature Selection")
+
+    # Select features for training and the target column
     selected_features = ["duration_scaled", "release_year_scaled"]
     target_col = "genre_encoded"
 
     X = df[selected_features]
     y = df[target_col]
 
-    st.write("### Features")
+    # Display selected features and target variable
+    st.markdown("### Selected Features (X)")
     st.write(X.head())
-    st.write("### Target (Genre Encoded)")
+    st.markdown("### Target Variable (Genre Encoded)")
     st.write(y.head())
 
     # ---------------------------
     # 3. Train-Test Split
     # ---------------------------
-    st.markdown("## Train/Test Split")
-
+    st.markdown("## ðŸ”€ Train/Test Split")
+    
+    # Let the user set the test set size
     test_size = st.slider("Test Set Size (%)", min_value=10, max_value=50, value=20, step=5)
     X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size/100, random_state=42)
 
@@ -82,77 +81,57 @@ def algorithm_selection_page():
     # ---------------------------
     # 4. Model Training and Scoring
     # ---------------------------
-    st.markdown("## Model Training and Comparison")
+    st.markdown("## ðŸ¤– Model Training and Comparison")
 
     # ---- Model A: Logistic Regression ----
+    st.markdown("### Logistic Regression")
     logreg = LogisticRegression(max_iter=1000, random_state=42)
     logreg.fit(X_train, y_train)
     y_pred_logreg = logreg.predict(X_val)
-
-    # Classification metrics for Logistic Regression
     logreg_accuracy = logreg.score(X_val, y_val)
-    st.markdown("### Logistic Regression")
-    st.write(f"Accuracy on Validation Set: {logreg_accuracy:.4f}")
-    
-    # Cross-validation scores (optional)
+
+    st.write(f"Accuracy on Validation Set: **{logreg_accuracy:.4f}**")
     cv_scores_log = cross_val_score(logreg, X, y, cv=5, scoring='accuracy')
-    st.write("Cross-Validation Scores (Logistic Regression):")
+    st.write("Cross-Validation Scores:")
     st.write(cv_scores_log)
 
     # ---- Model B: Random Forest ----
+    st.markdown("### Random Forest")
     rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
     rf_model.fit(X_train, y_train)
     y_pred_rf = rf_model.predict(X_val)
-
-    # Classification metrics for Random Forest
     rf_accuracy = rf_model.score(X_val, y_val)
-    st.markdown("### Random Forest")
-    st.write(f"Accuracy on Validation Set: {rf_accuracy:.4f}")
 
-    # Cross-validation scores (optional)
+    st.write(f"Accuracy on Validation Set: **{rf_accuracy:.4f}**")
     cv_scores_rf = cross_val_score(rf_model, X, y, cv=5, scoring='accuracy')
-    st.write("Cross-Validation Scores (Random Forest):")
+    st.write("Cross-Validation Scores:")
     st.write(cv_scores_rf)
 
     # ---------------------------
     # 5. Detailed Model Comparison
     # ---------------------------
+    st.markdown("## ðŸ“Š Model Comparison")
 
-    # Compare in a small DataFrame
+    # Compare models in a small DataFrame
     comparison_df = pd.DataFrame({
         "Model": ["Logistic Regression", "Random Forest"],
         "Validation Accuracy": [logreg_accuracy, rf_accuracy],
         "CV Accuracy (mean)": [cv_scores_log.mean(), cv_scores_rf.mean()]
     })
-    st.write("### Comparison Table")
+    st.markdown("### Comparison Table")
     st.write(comparison_df)
 
-    # Determine best model by validation accuracy
+    # Highlight the best model
     best_idx = comparison_df["Validation Accuracy"].idxmax()
     best_model_info = comparison_df.iloc[best_idx]
-    st.write("### Best Model")
+    st.markdown("### Best Model")
     st.write(best_model_info)
 
-    # Save to session state (optional)
+    # Save results to session state for future use
     st.session_state.comparison_df = comparison_df
     st.session_state.best_model_info = best_model_info
 
-    st.success("Algorithm selection and comparison completed!")
-
-
-def _plot_confusion_matrix(y_true, y_pred, encoder, title="Confusion Matrix"):
-    """Helper to plot confusion matrix as a figure."""
-    import matplotlib.pyplot as plt
-    import seaborn as sns
-    from sklearn.metrics import confusion_matrix
-
-    cm = confusion_matrix(y_true, y_pred, labels=range(len(encoder.classes_)))
-    fig, ax = plt.subplots(figsize=(5, 4))
-    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
-                xticklabels=encoder.classes_, yticklabels=encoder.classes_, ax=ax)
-    ax.set_xlabel("Predicted")
-    ax.set_ylabel("Actual")
-    ax.set_title(title)
-    return fig
+    st.success("âœ… Algorithm selection and comparison completed!")
+    st.success("âœ… Algorithm Selected is Logistic Regression!")
 
 algorithm_selection_page()
diff --git a/pages/augmentation.py b/pages/augmentation.py
index 9a354a12848449b234a834953ac9a949cbf7b9b2..6481cb3590f95e38e7aab0fdd2becdfbfce5855a 100644
--- a/pages/augmentation.py
+++ b/pages/augmentation.py
@@ -2,177 +2,174 @@ import streamlit as st
 import pandas as pd
 import random
 import numpy as np
-import joblib
-
 from sklearn.preprocessing import StandardScaler, LabelEncoder
 
 # Constants for random data generation
 YEAR_MIN = 1920
 YEAR_MAX = 2023
 DURATION_MIN = 1
-DURATION_MAX = 300  # Assuming max duration in minutes or episodes
+DURATION_MAX = 300  # Maximum duration in minutes or episodes
 
 def load_data(file_path: str) -> pd.DataFrame:
     """Load the Netflix dataset from a CSV file."""
     try:
         return pd.read_csv(file_path)
     except FileNotFoundError:
-        st.error(f"File not found at path: {file_path}. Please check the file path.")
-        return pd.DataFrame()  # Return empty DataFrame if file not found
+        # Show error message if the file isn't found
+        st.error(f"âŒ File not found at path: {file_path}. Please check the file path.")
+        return pd.DataFrame()
 
-def preprocess_data(df: pd.DataFrame) -> tuple:
-    """Preprocess the dataset: drop missing values, encode genre, scale features, etc."""
-    # a. Remove rows with missing values in key columns
+def preprocess_data(df: pd.DataFrame):
+    """Preprocess the dataset for augmentation."""
+    # Drop rows with missing essential data
     df = df.dropna(subset=['duration', 'director', 'release_year', 'listed_in'])
-    
-    # b. Clean 'duration' column (convert "xxx min" -> int, or 0 if unrecognized)
+
+    # Extract numeric duration values (e.g., "120 min" â†’ 120)
     def clean_duration(value):
         if "min" in str(value):
             return int(value.replace(" min", "").strip())
-        return 0  # For "Season" or other unrecognized patterns
-    
+        return 0  # Default to 0 if unrecognized
+
     df['duration_numeric'] = df['duration'].apply(clean_duration)
-    df = df[df['duration_numeric'] >= 0]
-    
-    # c. Extract primary genre; group rare genres into 'Other'
+    df = df[df['duration_numeric'] >= 0]  # Keep valid durations
+
+    # Simplify genres by focusing on the primary genre and grouping rare ones as 'Other'
     df['primary_genre'] = df['listed_in'].str.split(',').str[0].str.strip()
-    genre_counts = df['primary_genre'].value_counts()
-    common_genres = genre_counts[genre_counts >=5].index.tolist()
+    common_genres = df['primary_genre'].value_counts()[df['primary_genre'].value_counts() >= 5].index.tolist()
     df['primary_genre'] = df['primary_genre'].apply(lambda g: g if g in common_genres else 'Other')
-    
-    # d. Encode Genre (target variable)
+
+    # Encode genres and count directors for further analysis
     genre_encoder = LabelEncoder()
     df['genre_encoded'] = genre_encoder.fit_transform(df['primary_genre'])
-    
-    # e. Encode Director by counting the number of movies each director has
     director_counts = df['director'].value_counts()
     df['director_count'] = df['director'].map(director_counts)
-    
-    # f. Feature Scaling for 'duration_numeric' and 'release_year' with separate scalers
+
+    # Scale numeric features for consistency
     duration_scaler = StandardScaler()
     release_year_scaler = StandardScaler()
     df['duration_scaled'] = duration_scaler.fit_transform(df[['duration_numeric']])
     df['release_year_scaled'] = release_year_scaler.fit_transform(df[['release_year']])
-    
+
     return df, genre_encoder, duration_scaler, release_year_scaler, director_counts
 
-def generate_fake_row(genre_encoder: LabelEncoder, common_genres: list, director_counts: pd.Series) -> dict:
-    """Generate a single row of synthetic Netflix data."""
+def generate_fake_row(genre_encoder, common_genres, director_counts):
+    """Generate a single synthetic data row."""
+    # Randomly select a genre and encode it
     primary_genre = random.choice(common_genres + ['Other'])
-    
-    # Encode genre using the existing LabelEncoder
     genre_encoded = genre_encoder.transform([primary_genre])[0]
-    
-    # Randomly decide if the director is from existing ones or new
+
+    # 70% chance to use an existing director, 30% to create a new one
     if random.random() < 0.7 and not director_counts.empty:
-        # 70% chance to choose an existing director
-        existing_directors = director_counts.index.tolist()
-        director = random.choice(existing_directors)
-        director_count = director_counts.get(director, 1) + 1  # Increment count
+        director = random.choice(director_counts.index.tolist())
+        director_count = director_counts.get(director, 1) + 1
     else:
-        # 30% chance to create a new director
         director = f"Director {random.randint(1001, 2000)}"
-        director_count = 1  # First occurrence
-    
-    # Randomly decide the type to set duration accordingly
+        director_count = 1
+
+    # Generate type, duration, and other fields
     type_choice = random.choice(['Movie', 'TV Show'])
-    
-    # Generate duration as integer based on type
     duration_numeric = random.randint(DURATION_MIN, DURATION_MAX)
     duration = f"{duration_numeric} min" if type_choice == "Movie" else f"{duration_numeric} episodes"
-    
+
     return {
-        'show_id': f's{random.randint(10000, 99999)}',
         'type': type_choice,
         'title': f"Random Title {random.randint(1, 1000)}",
         'director': director,
-        'cast': f"Actor {random.randint(1, 500)}, Actor {random.randint(501, 1000)}",
-        'country': f"Country {random.randint(1, 100)}",
-        'date_added': f"{random.randint(1, 12)}/{random.randint(1, 28)}/{random.randint(YEAR_MIN, YEAR_MAX)}",
-        'release_year': random.randint(YEAR_MIN, YEAR_MAX),
-        'rating': random.choice(['G', 'PG', 'PG-13', 'R', 'NC-17', 'TV-Y', 'TV-Y7', 'TV-G', 'TV-PG', 'TV-14', 'TV-MA']),
         'duration': duration,
-        'listed_in': f"{random.choice(['Action', 'Comedy', 'Drama', 'Horror', 'Romance', 'Thriller', 'Documentary', 'Family', 'Sci-Fi'])}, {random.choice(['Action', 'Comedy', 'Drama', 'Horror', 'Romance', 'Thriller', 'Documentary', 'Family', 'Sci-Fi'])}",
-        'description': f"Description for Random Title {random.randint(1, 1000)}",
+        'release_year': random.randint(YEAR_MIN, YEAR_MAX),
+        'primary_genre': primary_genre,
         'genre_encoded': genre_encoded,
         'director_count': director_count,
-        'duration_numeric': duration_numeric
+        'duration_numeric': duration_numeric,
     }
 
 def augmentation_page():
-    st.title("Netflix Dataset Augmentation")
-    
+    st.title("ðŸ› ï¸ Netflix Dataset Augmentation")  # Main page title
+    st.markdown(
+        """
+        <style>
+        .header {
+            background-color: #4caf50;
+            color: white;
+            padding: 15px;
+            text-align: center;
+            border-radius: 10px;
+        }
+        </style>
+        <div class="header">
+            Enhance your Netflix dataset with synthetic data for improved analysis!
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+
     # Load Netflix dataset
-    file_path = "netflix_titles.csv"  # Replace with the correct file path
+    file_path = "netflix_titles.csv"
     df = load_data(file_path)
-    
+
+    # Exit if the dataset is empty
     if df.empty:
-        st.warning("Dataset is empty. Please check the file path and try again.")
+        st.warning("âš ï¸ Dataset is empty. Please check the file path and try again.")
         return
-    
-    # Preprocess original data
+
+    # Preprocess the dataset to clean and encode necessary features
     df, genre_encoder, duration_scaler, release_year_scaler, director_counts = preprocess_data(df)
-    
-    # Identify common genres
-    common_genres = df['primary_genre'].value_counts()[df['primary_genre'].value_counts() >=5].index.tolist()
-    
-    # Display preprocessed original dataset
-    st.markdown("## Original Preprocessed Dataset")
+
+    # Display the cleaned and preprocessed dataset
+    st.markdown("## ðŸ” Preprocessed Original Dataset")
     st.write(df.head())
-    
-    # Generate synthetic data
-    st.markdown("## Generating Synthetic Data")
-    num_rows = st.slider("Number of fake rows to generate", min_value=10, max_value=1000, step=10, value=100)
+
+    # User input for synthetic data generation
+    st.markdown("## ðŸ”§ Generate Synthetic Data")
+    num_rows = st.slider("Number of rows to generate:", 10, 1000, step=10, value=100)
+
+    # Generate synthetic rows with a progress bar
     rows = []
-    progress_text = "Generating synthetic data. Please wait."
-    pbar = st.progress(0, text=progress_text)
-    
+    progress_bar = st.progress(0)
     for i in range(num_rows):
-        rows.append(generate_fake_row(genre_encoder, common_genres, director_counts))
-        pbar.progress((i + 1) / num_rows, text=progress_text)
-    
-    pbar.empty()
+        rows.append(generate_fake_row(genre_encoder, df['primary_genre'].unique().tolist(), director_counts))
+        progress_bar.progress((i + 1) / num_rows)
+    progress_bar.empty()  # Clear the progress bar
+
+    # Create a synthetic dataset
     fake_df = pd.DataFrame(rows)
-    
-    # Scale 'duration_numeric' and 'release_year'
+
+    # Scale synthetic data columns
     fake_df['duration_scaled'] = duration_scaler.transform(fake_df[['duration_numeric']])
     fake_df['release_year_scaled'] = release_year_scaler.transform(fake_df[['release_year']])
-    
-    # Display original and fake data
+
+    # Display comparison: original vs synthetic data
+    st.markdown("## ðŸ“Š Dataset Comparison")
     cols = st.columns(2)
     with cols[0]:
-        st.markdown("### Original Preprocessed Data")
+        st.markdown("### Original Data")
         st.dataframe(df.head())
-    
     with cols[1]:
-        st.markdown("### Generated Synthetic Data")
+        st.markdown("### Synthetic Data")
         st.dataframe(fake_df.head())
-    
-    # Display data metrics
-    st.markdown("## Data Metrics")
-    cols_metrics = st.columns(2)
-    with cols_metrics[0]:
+
+    # Display summary metrics for both datasets
+    st.markdown("## ðŸ“ˆ Dataset Metrics")
+    metrics_cols = st.columns(2)
+    with metrics_cols[0]:
         st.markdown("### Original Data Metrics")
         st.write(df.describe(include="all"))
-    
-    with cols_metrics[1]:
+    with metrics_cols[1]:
         st.markdown("### Synthetic Data Metrics")
         st.write(fake_df.describe(include="all"))
-    
-    # Option to combine original and synthetic data
-    st.markdown("## Combined Dataset")
-    if st.button("Combine Original and Synthetic Data"):
+
+    # Option to combine original and synthetic datasets
+    st.markdown("## ðŸ“¦ Combine Datasets")
+    if st.button("Combine and Download"):
         combined_df = pd.concat([df, fake_df], ignore_index=True)
         st.write(combined_df.head())
-        st.write(f"Combined dataset size: {combined_df.shape}")
-        
-        # Optionally, allow downloading the combined dataset
+        st.write(f"**Combined Dataset Size:** {combined_df.shape[0]} rows")
         csv = combined_df.to_csv(index=False).encode('utf-8')
         st.download_button(
-            label="Download Combined Dataset as CSV",
-            data=csv,
-            file_name='netflix_combined_dataset.csv',
-            mime='text/csv',
+            "Download Combined Dataset as CSV",
+            csv,
+            "netflix_combined_dataset.csv",
+            "text/csv",
         )
 
 augmentation_page()
diff --git a/pages/chatbot.py b/pages/chatbot.py
index d59ec69a4ab05a0a2d922a11708f0ac71f2cc9e4..52fa968c13f69140cb73609734e7277779ebd310 100644
--- a/pages/chatbot.py
+++ b/pages/chatbot.py
@@ -26,7 +26,7 @@ def initialize_conversation():
     """Initialize the conversation with an assistant greeting."""
     assistant_message = "Hello! How can I help you today?"
     return [
-        {"role": "system", "content": "You are a helpful Rasa chatbot assistant."},
+        {"role": "system", "content": "Welcome to Rasa chatbot assistant."},
         {"role": "assistant", "content": assistant_message}
     ]
 
diff --git a/pages/data_metrics.py b/pages/data_metrics.py
index bb512ea8928f3feb35d94d60b46e98794d24b6ee..4bc2d95f8cc0596471cb9b89784d4b6f0aa5eb21 100644
--- a/pages/data_metrics.py
+++ b/pages/data_metrics.py
@@ -3,72 +3,67 @@ import matplotlib.pyplot as plt
 import pandas as pd
 
 def data_metrics_page():
-    st.title("Netflix Dataset Metrics and Visualization")
+    st.title("ðŸ“Š Netflix Dataset Metrics and Visualization")  # Main title with emoji
 
-    # Load the Netflix dataset
+    # Step 1: Load the Netflix dataset
     file_path = "netflix_titles.csv"
     df = pd.read_csv(file_path)
-    st.session_state.df = df
+    st.session_state.df = df  # Save the dataset in session state for reuse
 
-    st.markdown("## Dataset Overview")
-    st.write("### Head of the Data")
+    # Step 2: Dataset Overview
+    st.markdown("## ðŸ” Dataset Overview")
+
+    st.markdown("### Head of the Data")
     st.write(df.head())  # Display the first few rows of the dataset
 
-    st.write("### Summary Statistics")
-    st.write(df.describe(include='all'))  # Include all columns (even non-numeric)
+    st.markdown("### Summary Statistics")
+    st.write(df.describe(include='all'))  # Include all columns, even non-numeric
 
-    st.write("### Data Types")
-    st.write(df.dtypes)  # Display the data types of all columns
+    st.markdown("### Data Types")
+    st.write(df.dtypes)  # Show data types of all columns
 
-    st.write("### Null Values")
-    st.write(df.isnull().sum())  # Display the count of missing values per column
+    st.markdown("### Null Values")
+    st.write(df.isnull().sum())  # Count and display missing values for each column
 
-    # Correlation heatmap for numeric columns (if any)
-    st.markdown("## Correlation Heatmap (Numeric Features)")
-    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
-    if len(numeric_cols) > 0:
-        corr_matrix = df[numeric_cols].corr()
-        st.write(corr_matrix)
-    else:
-        st.info("No numeric columns to compute correlation.")
 
-    # Visualizations
-    st.markdown("## Visualizations")
+    # Step 3: Visualizations
+    st.markdown("## ðŸ“ˆ Visualizations")
 
-    # Distribution of content types (e.g., Movies vs. TV Shows)
+    # Visualization 1: Content Type Distribution
     if "type" in df.columns:
-        st.markdown("### Content Type Distribution")
-        type_counts = df["type"].value_counts()
+        st.markdown("### Distribution of Content Types")
+        type_counts = df["type"].value_counts()  # Count occurrences of each content type
         fig, ax = plt.subplots()
-        ax.bar(type_counts.index, type_counts.values)
+        ax.bar(type_counts.index, type_counts.values, color='skyblue')
         ax.set_xlabel("Type")
         ax.set_ylabel("Count")
         ax.set_title("Distribution of Content Types")
-        st.pyplot(fig)
+        st.pyplot(fig)  # Display the bar chart
 
-    # Distribution of release years
+    # Visualization 2: Release Year Distribution
     if "release_year" in df.columns:
-        st.markdown("### Release Year Distribution")
-        year_counts = df["release_year"].value_counts().sort_index()
+        st.markdown("### Number of Titles Released Over the Years")
+        year_counts = df["release_year"].value_counts().sort_index()  # Count by release year
         fig, ax = plt.subplots()
-        ax.plot(year_counts.index, year_counts.values)
+        ax.plot(year_counts.index, year_counts.values, marker='o', color='green')
         ax.set_xlabel("Year")
         ax.set_ylabel("Count")
         ax.set_title("Number of Titles Released Over the Years")
-        st.pyplot(fig)
+        st.pyplot(fig)  # Display the line plot
 
-    # Histogram of durations for movies
+    # Visualization 3: Duration Histogram
     if "duration" in df.columns:
-        st.markdown("### Duration of Movies/TV Shows")
-        # Filter duration if it's numeric (e.g., split '90 min' or '1 Season')
+        st.markdown("### Histogram of Durations (Movies/TV Shows)")
+        # Extract numeric duration values (e.g., '90 min' â†’ 90)
         df["duration_numeric"] = pd.to_numeric(df["duration"].str.extract(r'(\d+)')[0], errors='coerce')
         fig, ax = plt.subplots()
-        ax.hist(df["duration_numeric"].dropna(), bins=20, edgecolor='black')
+        ax.hist(df["duration_numeric"].dropna(), bins=20, edgecolor='black', color='orange')
         ax.set_xlabel("Duration")
         ax.set_ylabel("Count")
         ax.set_title("Histogram of Durations")
-        st.pyplot(fig)
+        st.pyplot(fig)  # Display the histogram
 
-    st.success("Data metrics and visualization complete!")
+    # Step 4: Success Message
+    st.success("âœ… Data metrics and visualizations are complete!")
 
-data_metrics_page()
\ No newline at end of file
+data_metrics_page()
diff --git a/pages/feature_engineering.py b/pages/feature_engineering.py
index 4126dde2c0be2d9c2c2023f076c73c425bb59e19..9804a01ef2268720e868035bb843daa21eedd915 100644
--- a/pages/feature_engineering.py
+++ b/pages/feature_engineering.py
@@ -1,77 +1,73 @@
 import streamlit as st
 import pandas as pd
 from sklearn.preprocessing import LabelEncoder
-import matplotlib.pyplot as plt
-import seaborn as sns
 
 def feature_engineering_page():
-    st.title("Netflix Dataset Feature Engineering")
-    
-    # 1. Access Preprocessed Data from Session State
+    st.title("ðŸ“Š Netflix Dataset Feature Engineering")
+
+    # Step 1: Check for Preprocessed Data
     if 'df' not in st.session_state:
-        st.error("Preprocessed data not found in session state. Please run the Preprocessing step first.")
+        st.error("âš ï¸ Preprocessed data not found in session state. Please complete the Preprocessing step first.")
         return
-    
-    df = st.session_state.df.copy()  # Work on a copy to prevent altering the session state
-    
-    # 2. Display Preprocessed Dataset
-    st.markdown("## Preprocessed Dataset")
+
+    # Work on a copy of the preprocessed dataset
+    df = st.session_state.df.copy()
+
+    # Step 2: Display Preprocessed Dataset
+    st.markdown("## ðŸ” Preprocessed Dataset")
     st.write(df.head())
-    
-    # 3. Verify Required Columns
+
+    # Step 3: Check for Required Columns
     required_columns = ['duration', 'director', 'release_year', 'listed_in']
     missing_columns = [col for col in required_columns if col not in df.columns]
-    
+
     if missing_columns:
-        st.error(f"The following required columns are missing from the dataset: {missing_columns}")
-        st.info("Ensure that the Preprocessing step has been completed successfully.")
+        st.error(f"âŒ The following required columns are missing: {missing_columns}")
+        st.info("Ensure the Preprocessing step has been completed successfully.")
         return
-    
-    # 4. Feature Selection for Modeling
-    st.markdown("## Selecting Features for Logistic Regression")
-    
-    # Define feature columns and target
+
+    # Step 4: Feature Selection for Modeling
+    st.markdown("## ðŸ› ï¸ Selecting Features for Modeling")
+
+    # Define feature columns and target variable
     feature_columns = ["duration", "director", "release_year"]
-    target_column = "listed_in"
-    
-    # Select features and target
+    df['primary_genre'] = df['listed_in'].str.split(',').str[0].str.strip()  # Extract primary genre
+    target_column = "primary_genre"
+
+    # Display selected features and target variable
     X = df[feature_columns]
     y = df[target_column]
-    
-    st.write("### Selected Features (X)")
+    st.markdown("### Features (X)")
     st.write(X.head())
-    
-    st.write("### Target Variable (y)")
+    st.markdown("### Target Variable (y - Primary Genre)")
     st.write(y.head())
-    
-    # 5. Encoding Additional Categorical Variables (if any)
-    # Assuming 'listed_in' was already handled during preprocessing
-    # If there are other categorical variables to encode, handle them here
-    
+
+    # Step 5: Encode Additional Categorical Variables
+    st.markdown("## ðŸ”„ Encoding Categorical Variables")
+
+    # Identify categorical columns to encode (excluding already processed ones)
     categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
-    # Remove columns already encoded or not needed
-    columns_to_remove = ['show_id', 'title', 'cast', 'country', 'date_added', 'rating', 'duration', 'listed_in', 'description', 'primary_genre']
-    categorical_cols = [col for col in categorical_cols if col not in columns_to_remove]
-    
+    excluded_cols = [
+        'show_id', 'title', 'cast', 'country', 'date_added',
+        'rating', 'duration', 'listed_in', 'description', 'primary_genre'
+    ]
+    categorical_cols = [col for col in categorical_cols if col not in excluded_cols]
+
     if categorical_cols:
-        st.markdown("## Encoding Additional Categorical Variables")
         label_encoders = {}
         for col in categorical_cols:
             le = LabelEncoder()
-            df[col] = le.fit_transform(df[col])
+            df[col] = le.fit_transform(df[col])  # Apply Label Encoding
             label_encoders[col] = le
-            st.write(f"### Encoded '{col}'")
-            st.write(df[[col]].head())
-        st.session_state.label_encoders = label_encoders
+            st.markdown(f"### Encoded Column: `{col}`")
+            st.write(df[[col]].head())  # Display encoded column
+        st.session_state.label_encoders = label_encoders  # Save encoders to session state
     else:
-        st.write("No additional categorical columns to encode.")
+        st.info("No additional categorical variables found for encoding.")
 
-    
-    # 8. Save Features and Target to Session State for Modeling
+    # Step 6: Save Features and Target for Further Modeling
     st.session_state.features = X
     st.session_state.target = y
-    
-    st.success("Feature engineering complete. Features and target are ready for modeling!")
+    st.success("âœ… Feature engineering is complete! Features and target are ready for modeling.")
 
-    
 feature_engineering_page()
diff --git a/pages/model_application.py b/pages/model_application.py
index 0322d9c36a211571f3e91fb4c8f0bb8b31a79092..a74224e6d31a10271011567eded714f89e1990d6 100644
--- a/pages/model_application.py
+++ b/pages/model_application.py
@@ -7,127 +7,122 @@ from sklearn.ensemble import RandomForestClassifier
 from sklearn.metrics import accuracy_score
 
 def model_application_page():
-    st.title("Netflix Dataset Model Application")
+    # Main title with emojis
+    st.title("ðŸŽ¥ Netflix Dataset Model Application")
+    st.markdown(
+        """
+        <style>
+        .main-header {
+            background-color: #1DB954;
+            color: white;
+            padding: 15px;
+            text-align: center;
+            font-size: 24px;
+            border-radius: 10px;
+        }
+        </style>
+        <div class="main-header">
+            Unlock the power of Netflix data with personalized recommendations, insights, and trends!
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
 
     # Load the Netflix dataset
     file_path = "netflix_titles.csv"
     df = pd.read_csv(file_path)
 
+    st.markdown("---")  # Section divider
 
-# 1. Personalized Playlist
-
-    st.title("1. Personalized Netflix Recommendations")
+    # 1. Personalized Playlist
+    st.header("ðŸ“‹ 1. Personalized Netflix Recommendations")
 
     # Preprocessing
     valid_ratings = [
         "TV-MA", "PG-13", "R", "PG", "G", "NC-17", "TV-Y", "TV-Y7",
         "TV-G", "TV-PG", "TV-14", "NR", "UR"
     ]
-
-    # Replace invalid ratings with "Not Rated"
     df["rating"] = df["rating"].apply(lambda x: x if x in valid_ratings else "NR")
     df["release_year"] = df["release_year"].fillna(df["release_year"].median())
     df["rating"] = df["rating"].fillna("Not Rated")
     df["genres"] = df["listed_in"].str.split(", ")
 
     # User Inputs
-    st.markdown("### Input Your Preferences")
+    st.markdown("### ðŸŽ¯ Input Your Preferences")
     available_genres = sorted(set(df["genres"].explode().dropna()))
-    preferred_genres = st.multiselect("What genres do you like?", available_genres, default=[])
+    preferred_genres = st.multiselect("ðŸŽ¥ Select Genres:", available_genres, default=[])
     available_ratings = sorted(df["rating"].unique())
-    preferred_ratings = st.multiselect("What ratings do you prefer?", available_ratings, default=[])
+    preferred_ratings = st.multiselect("â Select Ratings:", available_ratings, default=[])
 
     # Filter Data by Genre
     filtered_df = df[df["genres"].apply(lambda genres: any(genre in genres for genre in preferred_genres))]
-
-    # Filter Data by Rating
     filtered_df = filtered_df[filtered_df["rating"].isin(preferred_ratings)]
-
-    # Sort by Relevance (e.g., release year)
     filtered_df = filtered_df.sort_values(by=["release_year"], ascending=False)
 
-    # Select Top 20 Recommendations
+    # Recommendations
     recommendations = filtered_df.head(20)
-
-    # Output Recommendations
-    st.markdown("### Top 20 Recommendations for You")
+    st.markdown("### ðŸ“ƒ Top 20 Recommendations for You")
     if recommendations.empty:
-        st.write("No recommendations found for the selected criteria. Please adjust your preferences.")
+        st.warning("No recommendations found for the selected criteria. Please adjust your preferences.")
     else:
         st.write(recommendations[["title", "type", "release_year", "rating", "listed_in"]])
 
-# 2. Country-Wise Popular Content
-    st.title("2. Country-Wise Popular Content Recommendations")
+    st.markdown("---")  # Section divider
 
-    df["country"] = df["country"].fillna("Unknown")
+    # 2. Country-Wise Popular Content
+    st.header("ðŸŒŽ 2. Country-Wise Popular Content Recommendations")
 
-    # Split multiple countries into individual entries
+    df["country"] = df["country"].fillna("Unknown")
     all_countries = df["country"].str.split(", ").explode().unique()
     all_countries = sorted([country.strip() for country in all_countries if country != "Unknown"])
 
-    # User Input: Select Country
-    selected_country = st.selectbox("Select a Country:", all_countries)
-
-    # Filter data by selected country
-    country_filtered_df = df[df["country"].str.contains(selected_country, case=False, na=False)]
-
-    # Filter only Movies and TV Shows
-    popular_content = country_filtered_df[country_filtered_df["rating"] != "Not Rated"]
-
-    # Sort by most popular ratings
-    rating_priority = {
-        "TV-MA": 1, "R": 2, "PG-13": 3, "TV-14": 4, "PG": 5, 
-        "G": 6, "NC-17": 7, "TV-Y7": 8, "TV-Y": 9, "TV-G": 10, 
-        "TV-PG": 11, "Not Rated": 12
-    }
-    popular_content["rating_priority"] = popular_content["rating"].map(rating_priority)
-    sorted_content = popular_content.sort_values(by=["rating_priority", "release_year"], ascending=[True, False])
-
-    # Select Top 20 Recommendations
-    recommendations = sorted_content.head(20)
-
-    # Display Recommendations
-    st.markdown(f"### Top 20 Popular Titles from {selected_country}")
-    if recommendations.empty:
-        st.write("No popular content found for the selected country. Please choose a different country.")
+    selected_country = st.selectbox("ðŸŒ Select a Country:", all_countries)
+
+    if selected_country:
+        country_filtered_df = df[df["country"].str.contains(selected_country, case=False, na=False)]
+        popular_content = country_filtered_df[country_filtered_df["rating"] != "Not Rated"]
+        rating_priority = {
+            "TV-MA": 1, "R": 2, "PG-13": 3, "TV-14": 4, "PG": 5, 
+            "G": 6, "NC-17": 7, "TV-Y7": 8, "TV-Y": 9, "TV-G": 10, 
+            "TV-PG": 11, "Not Rated": 12
+        }
+        popular_content["rating_priority"] = popular_content["rating"].map(rating_priority)
+        sorted_content = popular_content.sort_values(by=["rating_priority", "release_year"], ascending=[True, False])
+        recommendations = sorted_content.head(20)
+        st.markdown(f"### ðŸŽ¬ Top 20 Popular Titles from **{selected_country}**")
+        if recommendations.empty:
+            st.warning("No popular content found for the selected country. Please choose a different country.")
+        else:
+            st.write(recommendations[["title", "type", "release_year", "rating", "country"]])
     else:
-        st.write(recommendations[["title", "type", "release_year", "rating", "country"]])
+        st.info("Please select a country to view popular content recommendations.")
 
+    st.markdown("---")  # Section divider
 
+    # 3. Duration Graph
+    st.header("ðŸ“Š 3. Duration Trends Analysis")
 
-# 3.Duration Graph
-    # Preprocess the dataset
-    df["release_year"] = df["release_year"].fillna(df["release_year"].median())
-    df["listed_in"] = df["listed_in"].fillna("Unknown")
     df["duration"] = df["duration"].fillna("Unknown")
 
-    # Function to preprocess duration
     def preprocess_duration(row):
-        if "min" in row:  # For movies
+        if "min" in row:
             return int(row.replace(" min", ""))
-        elif "Season" in row:  # For TV Shows
+        elif "Season" in row:
             return int(row.replace(" Season", "").replace("s", ""))
-        else:
-            return None
+        return None
 
     df["duration_numeric"] = df["duration"].apply(preprocess_duration)
 
-    # Streamlit UI
-    st.title("3. Duration Trends Analysis")
-
-    # Inputs
-    content_type = st.radio("Select Content Type:", options=["Movie", "TV Show"])
+    content_type = st.radio("ðŸ“½ï¸ Select Content Type:", options=["Movie", "TV Show"])
     available_genres = sorted(set(df["listed_in"].str.split(", ").explode().dropna()))
-    selected_genres = st.multiselect("Select Genres:", available_genres, default=[])
+    selected_genres = st.multiselect("ðŸŽžï¸ Select Genres:", available_genres, default=[])
 
-    # Filter dataset
     filtered_df = df[
         (df["type"] == content_type) &
         (df["listed_in"].apply(lambda genres: any(genre in genres for genre in selected_genres))) &
         (df["duration_numeric"].notnull())
     ]
 
-    # Group and analyze
     duration_analysis = (
         filtered_df.groupby("release_year")["duration_numeric"]
         .mean()
@@ -135,8 +130,7 @@ def model_application_page():
         .rename(columns={"duration_numeric": "Average Duration"})
     )
 
-    # Plot the results
-    st.markdown("### Duration Trends Over Time")
+    st.markdown("### ðŸ“ˆ Duration Trends Over Time")
     if not duration_analysis.empty:
         fig, ax = plt.subplots(figsize=(10, 6))
         ax.plot(
@@ -144,12 +138,13 @@ def model_application_page():
             duration_analysis["Average Duration"],
             marker="o",
             linestyle="-",
+            color="#FF6347",
         )
-        ax.set_title(f"Average Duration of {content_type}s in Selected Genres Over Time")
+        ax.set_title(f"Average Duration of {content_type}s Over Time", fontsize=16)
         ax.set_xlabel("Release Year")
-        ax.set_ylabel("Average Duration (Minutes for Movies / Seasons for TV Shows)")
+        ax.set_ylabel("Average Duration (Minutes/Seasons)")
         st.pyplot(fig)
     else:
-        st.write("No data available for the selected criteria.")
+        st.warning("No data available for the selected criteria.")
 
-model_application_page()
\ No newline at end of file
+model_application_page()
diff --git a/pages/model_training.py b/pages/model_training.py
index d1b10b203a9d2543f06dc10e5217d5b4e14ab6fb..aa15e98fbe98ee70bfeea438101a1ed08caa8c1f 100644
--- a/pages/model_training.py
+++ b/pages/model_training.py
@@ -2,7 +2,6 @@ import streamlit as st
 import matplotlib.pyplot as plt
 import seaborn as sns
 import pandas as pd
-import joblib
 import numpy as np
 
 from sklearn.linear_model import LogisticRegression
@@ -11,40 +10,41 @@ from sklearn.preprocessing import StandardScaler, LabelEncoder
 from sklearn.metrics import classification_report, confusion_matrix
 
 
-
+# Function to load data
 def load_data(file_path):
-    return pd.read_csv("netflix_titles.csv")
+    """Load the Netflix dataset from the provided file path."""
+    return pd.read_csv(file_path)
 
 
+# Function to preprocess data
 def preprocess_data(df: pd.DataFrame):
-    """Preprocess the dataset: drop missing values, encode genre, scale features, etc."""
-    # a. Remove rows with missing values in key columns
+    """Preprocess the dataset: handle missing values, clean columns, encode features, and scale numeric data."""
+    # Handle missing values in key columns
     df = df.dropna(subset=['duration', 'director', 'release_year', 'listed_in'])
 
-    # b. Clean 'duration' column (convert "xxx min" -> int, or 0 if unrecognized)
+    # Clean 'duration' column
     def clean_duration(value):
         if "min" in str(value):
             return int(value.replace(" min", "").strip())
-        return 0  # For "Season" or other unrecognized patterns
+        return 0  # Handle 'Season' or unrecognized patterns
 
     df['duration'] = df['duration'].apply(clean_duration)
-    df = df[df['duration'] >= 0]
+    df = df[df['duration'] >= 0]  # Remove invalid durations
 
-    # c. Extract primary genre; group rare genres into 'Other'
+    # Extract and encode primary genre
     df['primary_genre'] = df['listed_in'].str.split(',').str[0].str.strip()
     genre_counts = df['primary_genre'].value_counts()
     common_genres = genre_counts[genre_counts >= 5].index
     df['primary_genre'] = df['primary_genre'].apply(lambda g: g if g in common_genres else 'Other')
 
-    # d. Encode genre with LabelEncoder
+    # Encode genres and directors
     genre_encoder = LabelEncoder()
     df['genre_encoded'] = genre_encoder.fit_transform(df['primary_genre'])
 
-    # e. Encode director by counting occurrences
     director_counts = df['director'].value_counts()
     df['director_count'] = df['director'].map(director_counts)
 
-    # f. Scale 'duration' and 'release_year' separately
+    # Scale numeric features
     duration_scaler = StandardScaler()
     release_year_scaler = StandardScaler()
     df['duration_scaled'] = duration_scaler.fit_transform(df[['duration']])
@@ -53,23 +53,26 @@ def preprocess_data(df: pd.DataFrame):
     return df, genre_encoder, duration_scaler, release_year_scaler, director_counts
 
 
+# Function to train Logistic Regression
 def train_logistic_regression(X_train, y_train):
-    """Train a Logistic Regression classifier."""
-    # Increase max_iter to ensure convergence on multi-class data
+    """Train a Logistic Regression model on the training dataset."""
     model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, random_state=42)
     model.fit(X_train, y_train)
     return model
 
 
+# Evaluate the model
 def evaluate_model(y_test, y_pred, encoder):
-    """Display classification metrics and confusion matrix."""
-    # Classification Report
+    """Display classification metrics and a confusion matrix."""
     report = classification_report(y_test, y_pred, target_names=encoder.classes_)
+    st.markdown("### Classification Report")
+    st.text(report)
 
 
+# Plot correlation heatmap
 def plot_correlation(df: pd.DataFrame):
-    """Optional: Plot a correlation heatmap for numeric features + encoded genre."""
-    st.markdown("## Correlation Heatmap")
+    """Visualize correlation between numeric features and the target."""
+    st.markdown("### Correlation Heatmap")
     corr_cols = ['duration', 'release_year', 'director_count', 'genre_encoded']
     corr = df[corr_cols].corr()
     fig, ax = plt.subplots(figsize=(8, 6))
@@ -77,115 +80,160 @@ def plot_correlation(df: pd.DataFrame):
     st.pyplot(fig)
 
 
+# Visualize feature coefficients
 def plot_feature_coefficients(model, features):
-    """Plot feature coefficients from the Logistic Regression model."""
-    # For multinomial logistic regression, model.coef_ shape is (n_classes, n_features).
-    # Below, we show absolute coefficients for class 0 as an example, or an average.
-    st.markdown("## Feature Coefficients")
-    # Taking the average of absolute coefficients across all classes for interpretability
-    # (Alternatively, you can visualize them per-class.)
+    """Visualize feature coefficients from the Logistic Regression model."""
+    st.markdown("### Feature Coefficients")
     mean_abs_coef = np.mean(np.abs(model.coef_), axis=0)
     coef_series = pd.Series(mean_abs_coef, index=features).sort_values()
 
     fig, ax = plt.subplots(figsize=(8, 6))
     coef_series.plot(kind='barh', color='skyblue', ax=ax)
-    ax.set_title("Average Absolute Feature Coefficients (Multinomial Logistic Regression)")
+    ax.set_title("Average Absolute Feature Coefficients")
     st.pyplot(fig)
 
 
 def visualize_custom_prediction(df: pd.DataFrame, encoder, custom_genre: str):
-    """Highlight the custom predicted genre in the distribution (red vs. blue)."""
+    """Highlight the custom predicted genre in a bar chart with a legend."""
+    # Generate color palette: red for custom genre, blue for others
     palette = ['red' if genre == custom_genre else 'blue' for genre in encoder.classes_]
+    
+    # Create the plot
     fig, ax = plt.subplots(figsize=(8, 4))
-    sns.countplot(y='predicted_genre', data=df, order=encoder.classes_, palette=palette, ax=ax)
-    ax.set_title("Predicted Genre Distribution (Custom Prediction in Red)")
+    sns.countplot(
+        y='predicted_genre', 
+        data=df, 
+        order=encoder.classes_, 
+        palette=palette, 
+        ax=ax
+    )
+    ax.set_title("Predicted Genre Distribution (Custom Prediction Highlighted)")
+    ax.set_xlabel("Count")
+    ax.set_ylabel("Genre")
+    
+    # Add legend
+    handles = [
+        plt.Line2D([0], [0], marker='o', color='red', label="Custom Prediction", markersize=10, linestyle=''),
+        plt.Line2D([0], [0], marker='o', color='blue', label="Data", markersize=10, linestyle='')
+    ]
+    ax.legend(handles=handles, loc='upper right', title="Legend")
+    
+    # Display the plot in Streamlit
     st.pyplot(fig)
 
 
+
+# Main application
 def main():
-    st.title("Netflix Genre Prediction using Logistic Regression")
+    st.title("ðŸŽ¬ Netflix Genre Prediction using Logistic Regression")
+    st.markdown(
+        """
+        <style>
+        .header {
+            background-color: #34495e;
+            color: white;
+            padding: 15px;
+            border-radius: 10px;
+            text-align: center;
+            font-size: 20px;
+        }
+        </style>
+        <div class="header">
+            Analyze, predict, and visualize genres in the Netflix dataset using Logistic Regression!
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
 
-    # 1. Load Data
+    # Load data
     file_path = "netflix_titles.csv"
     df = load_data(file_path)
     if st.checkbox("Show Raw Data"):
-        st.write(df.head())
+        st.dataframe(df.head())
 
-    # 2. Preprocess Data
+    # Preprocess data
     df, genre_encoder, duration_scaler, release_year_scaler, director_counts = preprocess_data(df)
 
-
-    # 4. Feature & Target
-    st.markdown("## Feature and Target Selection")
+    # Feature and Target Selection
+    st.markdown("## ðŸ“Š Feature and Target Selection")
     features = ["duration_scaled", "director_count", "release_year_scaled"]
     target_col = "genre_encoded"
     X, y = df[features], df[target_col]
 
-    st.write("### Features (Processed):")
-    st.write(X.head())
-    st.write("### Target (Genre Encoded):")
-    st.write(y.head())
+    st.write("### Features:")
+    st.dataframe(X.head())
+    st.write("### Target (Encoded Genres):")
+    st.dataframe(y.head())
 
-    # 5. Train/Test Split
-    st.markdown("## Splitting Data")
+    # Train-test split
+    st.markdown("## âœ‚ï¸ Splitting Data")
     test_size = st.slider("Test Set Size (%)", min_value=10, max_value=50, value=20, step=5)
-    from sklearn.model_selection import train_test_split
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=test_size/100, random_state=42
-    )
-    st.write(f"Training set size: {len(X_train)}")
-    st.write(f"Testing set size: {len(X_test)}")
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size / 100, random_state=42)
+
+    st.write(f"Training Set Size: {len(X_train)}")
+    st.write(f"Testing Set Size: {len(X_test)}")
 
-    # 6. Train Logistic Regression
-    st.markdown("## Training the Model with Logistic Regression")
+    # Train Logistic Regression
+    st.markdown("## ðŸ¤– Training Logistic Regression")
     model = train_logistic_regression(X_train, y_train)
     st.success("Logistic Regression Model Trained Successfully!")
 
-    # 7. Evaluate Model
-    st.markdown("## Model Evaluation")
-    y_pred = model.predict(X_test)
-    evaluate_model(y_test, y_pred, genre_encoder)
 
-  # 3. (Optional) Correlation Plot
+    # Correlation heatmap
     plot_correlation(df)
 
-    # 8. Predict on Entire Dataset
+    # Predict on entire dataset
     df["predicted_genre"] = genre_encoder.inverse_transform(model.predict(X))
-    st.markdown("## Sample Predictions on Entire Dataset")
-    st.write(df[[*features, target_col, "predicted_genre"]].head())
+    st.markdown("### Sample Predictions:")
+    st.dataframe(df[["duration", "release_year", "director_count", "primary_genre", "predicted_genre"]].head())
+
+    # Custom prediction
+    st.markdown("## ðŸŽ¥ Make a Custom Prediction")
+
+    # Add a default option for directors
+    unique_directors = ["Select a Director"] + sorted(df["director"].unique())
+    selected_director = st.selectbox("Select Director", unique_directors)
+
+    # Initialize placeholders for inputs
+    custom_genre = None
+
+    if selected_director != "Select a Director":
+        # Proceed if a valid director is selected
+        director_count = director_counts.get(selected_director, 0)
+        
+        # Collect other inputs
+        duration_choice = st.slider(
+            "Duration (minutes)", 
+            min_value=int(df["duration"].min()), 
+            max_value=int(df["duration"].max())
+        )
+        release_year_choice = st.slider(
+            "Release Year", 
+            min_value=int(df["release_year"].min()), 
+            max_value=int(df["release_year"].max())
+        )
+
+        # Scale inputs and make a prediction
+        try:
+            duration_scaled_input = duration_scaler.transform([[duration_choice]])[0][0]
+            release_year_scaled_input = release_year_scaler.transform([[release_year_choice]])[0][0]
+            custom_input = [[duration_scaled_input, director_count, release_year_scaled_input]]
+            custom_pred_label = model.predict(custom_input)[0]
+            custom_genre = genre_encoder.inverse_transform([custom_pred_label])[0]
+        except Exception as e:
+            st.error(f"Error during prediction: {e}")
+    else:
+        # Prompt the user to select a valid director
+        st.warning("Please select a Director to make a Prediction.")
+
+    # Display the predicted genre if available
+    if custom_genre:
+        st.markdown(f"### Predicted Genre: ðŸŽ¬ **{custom_genre}**")
+    else:
+        st.markdown("### Predicted Genre: Please provide all inputs to get a prediction.")
 
-    # 9. Custom Prediction
-    st.markdown("## Make a Custom Prediction")
-    st.markdown("### Input Movie Details")
-    unique_directors = df["director"].unique()
-    selected_director = st.selectbox("Select a Director", sorted(unique_directors))
-    director_count = director_counts.get(selected_director, 0)
-
-    dur_min, dur_max = int(df["duration"].min()), int(df["duration"].max())
-    duration_choice = st.slider("Duration (minutes)", dur_min, dur_max, 90)
-
-    year_min, year_max = int(df["release_year"].min()), int(df["release_year"].max())
-    release_year_choice = st.slider("Release Year", year_min, year_max, 2020)
-
-    # Scale inputs
-    duration_scaled_input = duration_scaler.transform([[duration_choice]])[0][0]
-    release_year_scaled_input = release_year_scaler.transform([[release_year_choice]])[0][0]
-
-    custom_input = [[duration_scaled_input, director_count, release_year_scaled_input]]
-    custom_pred_label = model.predict(custom_input)[0]
-    custom_genre = genre_encoder.inverse_transform([custom_pred_label])[0]
-    st.write(f"### Predicted Genre: {custom_genre}")
-
-    # 10. Visualization
     visualize_custom_prediction(df, genre_encoder, custom_genre)
 
-    # 13. Session State
-    st.session_state["trained_model"] = model
-    st.session_state["genre_encoder"] = genre_encoder
-    st.session_state["duration_scaler"] = duration_scaler
-    st.session_state["release_year_scaler"] = release_year_scaler
-    st.session_state["director_counts"] = director_counts
-
-    st.success("Logistic Regression training and prediction completed successfully!")
 
+# Run the application
 main()
diff --git a/pages/preprocessing.py b/pages/preprocessing.py
index 1362d20f4ca0c57110d4a047c8f8c82c63964832..fe9bad0f5d15cd9747881397bfc610308eb34795 100644
--- a/pages/preprocessing.py
+++ b/pages/preprocessing.py
@@ -4,75 +4,104 @@ from sklearn.preprocessing import MinMaxScaler, StandardScaler
 import numpy as np
 
 def preprocessing_page():
-    st.title("Netflix Dataset Preprocessing")
-
-    # 1. Load Netflix dataset
+    # Title with a styled header
+    st.title("ðŸ“Š Netflix Dataset Preprocessing")
+    st.markdown(
+        """
+        <style>
+        .header {
+            background-color: #f39c12;
+            color: white;
+            padding: 15px;
+            border-radius: 10px;
+            text-align: center;
+            font-size: 20px;
+            margin-bottom: 20px;
+        }
+        </style>
+        <div class="header">
+            Clean, preprocess, and prepare your Netflix dataset for advanced analytics!
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+
+    # Load Netflix dataset
     file_path = "netflix_titles.csv"  # Ensure this path is correct
     try:
+        # Load the dataset and store it in session state
         df = pd.read_csv(file_path)
-        st.session_state.df = df  # Store original dataframe in session state
+        st.session_state.df = df
+        st.success("ðŸŽ‰ Dataset loaded successfully!")
     except FileNotFoundError:
-        st.error(f"File not found at path: {file_path}. Please check the file path and try again.")
+        # Error handling if the file is not found
+        st.error(f"âŒ File not found at path: {file_path}. Please check the file path and try again.")
         return
 
-    # 2. Display original dataset
-    st.markdown("## Original Dataset")
-    st.write(df.head())
+    st.markdown("---")  # Divider
 
-    # 3. Summary Statistics Before Cleaning
-    st.markdown("## Summary Statistics (Before Cleaning)")
-    st.write(df.describe(include='all'))
+    # 1. Display original dataset
+    st.header("ðŸ” Original Dataset")
+    st.write(df.head())  # Display the first few rows
 
-    # 4. Missing Values Overview
-    st.markdown("## Missing Values Overview")
-    missing_values = df.isnull().sum()
-    st.write("### Missing Values per Column")
+    # 2. Summary Statistics Before Cleaning
+    st.header("ðŸ“ˆ Summary Statistics (Before Cleaning)")
+    st.write(df.describe(include='all'))  # Display all columns' statistics
+
+    # 3. Missing Values Overview
+    st.header("ðŸ› ï¸ Missing Values Overview")
+    missing_values = df.isnull().sum()  # Calculate missing values per column
+    st.write("### Missing Values per Column:")
     st.write(missing_values)
 
-    # 5. Removing All Rows with Any Missing Values
-    st.markdown("## Removing All Missing Values")
+    # 4. Removing All Rows with Any Missing Values
+    st.header("ðŸ§¹ Removing All Missing Values")
     num_rows_before = df.shape[0]
     num_missing = missing_values.sum()
     st.write(f"**Total Rows Before Cleaning:** {num_rows_before}")
     st.write(f"**Total Missing Values:** {num_missing}")
 
     if num_missing > 0:
+        # Drop rows with missing values
         df = df.dropna()
         num_rows_after = df.shape[0]
         rows_removed = num_rows_before - num_rows_after
-        st.write(f"**Total Rows After Cleaning:** {num_rows_after}")
-        st.write(f"**Total Rows Removed:** {rows_removed}")
+        st.success(f"âœ… Missing values removed! Total rows after cleaning: {num_rows_after}")
+        st.info(f"ðŸ§¾ Rows Removed: {rows_removed}")
     else:
-        st.write("No missing values found. No rows removed.")
+        # Notify the user if no missing values are found
+        st.success("âœ… No missing values found. Dataset is already clean.")
 
-    # Update session state with cleaned dataframe
-    st.session_state.df = df
+    st.markdown("---")  # Divider
 
-    # 6. Summary Statistics After Cleaning
-    st.markdown("## Summary Statistics (After Cleaning)")
+    # 5. Summary Statistics After Cleaning
+    st.header("ðŸ“Š Summary Statistics (After Cleaning)")
     st.write(df.describe(include='all'))
 
-    # 7. Scaling: Min-Max and Standardization
-    st.markdown("## Scaling: Min-Max Scaling and Standardization")
-    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns.tolist()
+    # 6. Scaling: Min-Max and Standardization
+    st.header("âš–ï¸ Scaling: Min-Max Scaling and Standardization")
+    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns.tolist()  # Identify numeric columns
 
     if numeric_cols:
-        # a. Min-Max Scaling
-        st.markdown("### Min-Max Scaled Data")
+        # Min-Max Scaling
+        st.subheader("ðŸ”¹ Min-Max Scaled Data")
         minmax_scaler = MinMaxScaler()
         df_minmax_scaled = pd.DataFrame(minmax_scaler.fit_transform(df[numeric_cols]), columns=numeric_cols)
         st.write(df_minmax_scaled.head())
 
-        # b. Standardization
-        st.markdown("### Standardized Data")
+        # Standardization
+        st.subheader("ðŸ”¹ Standardized Data")
         standard_scaler = StandardScaler()
         df_standardized = pd.DataFrame(standard_scaler.fit_transform(df[numeric_cols]), columns=numeric_cols)
         st.write(df_standardized.head())
     else:
-        st.warning("No numeric columns available for scaling in the dataset.")
+        # Notify the user if no numeric columns are available for scaling
+        st.warning("âš ï¸ No numeric columns available for scaling in the dataset.")
 
-    # 8. Outlier Detection and Removal
-    st.markdown("## Outlier Detection and Removal")
+    st.markdown("---")  # Divider
+
+    # 7. Outlier Detection and Removal
+    st.header("ðŸš¨ Outlier Detection and Removal")
     st.write("Identifying outliers using the Interquartile Range (IQR) method.")
 
     # Function to remove outliers using IQR
@@ -92,14 +121,17 @@ def preprocessing_page():
         rows_removed = initial_count - final_count
         st.write(f"**Outliers Removed from '{col}':** {rows_removed} rows")
 
-    # 9. Summary Statistics After Outlier Removal
-    st.markdown("## Dataset After Removing Outliers")
-    st.write(df.describe(include='all'))
+    st.markdown("---")  # Divider
+
+    # 8. Dataset After Removing Outliers
+    st.header("ðŸ“œ Dataset After Removing Outliers")
+    st.write(df.describe(include='all'))  # Display updated statistics
 
     # Update session state with the final cleaned dataframe
     st.session_state.df = df
 
-    # 10. Mark Preprocessing as Complete
-    st.success("Preprocessing Complete. Your dataset is now clean and ready for the next steps!")
+    # 9. Mark Preprocessing as Complete
+    st.success("ðŸŽ‰ Preprocessing Complete! Your dataset is clean and ready for further analysis.")
 
+# Run the preprocessing page function
 preprocessing_page()
diff --git a/rasa/actions/actions.py b/rasa/actions/actions.py
index 5267e20b6e1f180a6f5bdc3b1ddbee956c4c8696..a5e987e012c8e4f844890b00d6a9942c4e6aeddc 100644
--- a/rasa/actions/actions.py
+++ b/rasa/actions/actions.py
@@ -1,73 +1,20 @@
+# actions.py
+
 from typing import Any, Text, Dict, List
 from rasa_sdk import Action, Tracker
 from rasa_sdk.executor import CollectingDispatcher
 
-class ActionAskMovieGenre(Action):
-    def name(self) -> Text:
-        return "action_ask_movie_genre"
-
-    def run(self, dispatcher: CollectingDispatcher, tracker: Tracker, domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
-        dispatcher.utter_message(
-            text="To predict a movie's genre, I need the director's name, movie duration, and launch date. Let's get started!")
-        return []
-
-class ActionProvideDirectorName(Action):
-    def name(self) -> Text:
-        return "action_provide_director_name"
-
-    def run(self, dispatcher: CollectingDispatcher, tracker: Tracker, domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
-        dispatcher.utter_message(text="Thank you for providing the director's name. Please provide the movie's duration next.")
-        return []
-
-class ActionProvideMovieDuration(Action):
-    def name(self) -> Text:
-        return "action_provide_movie_duration"
-
-    def run(self, dispatcher: CollectingDispatcher, tracker: Tracker, domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
-        dispatcher.utter_message(text="Got the duration! Now, can you share the launch date of the movie?")
-        return []
-
-class ActionProvideLaunchDate(Action):
-    def name(self) -> Text:
-        return "action_provide_launch_date"
-
-    def run(self, dispatcher: CollectingDispatcher,
-            tracker: Tracker,
-            domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
-        dispatcher.utter_message(
-            text="Great! I now have all the details. Let me predict the genre for you.")
-        return []
-
-class ActionPredictMovieGenre(Action):
-    def name(self) -> Text:
-        return "action_predict_movie_genre"
-
-    def run(self, dispatcher: CollectingDispatcher,
-            tracker: Tracker,
-            domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
-        # Replace this with your actual prediction logic
-        dispatcher.utter_message(
-            text="Based on the details provided, the predicted genre of the movie is Drama. Let me know if there's anything else you need!")
-        return []
-
-class ActionExplainPredictionProcess(Action):
-    def name(self) -> Text:
-        return "action_explain_prediction_process"
-
-    def run(self, dispatcher: CollectingDispatcher,
-            tracker: Tracker,
-            domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
-        dispatcher.utter_message(
-            text="I predict movie genres by analyzing the director's name, duration, and launch date. Each parameter gives valuable insights into the genre.")
-        return []
+# Currently, no custom actions are needed because all bot replies are 'utter_' responses.
+# You can add custom actions here if your use case expands.
 
-class ActionHelp(Action):
+class ActionDummy(Action):
+    """A placeholder action, not used in stories."""
+    
     def name(self) -> Text:
-        return "action_help"
+        return "action_dummy"
 
     def run(self, dispatcher: CollectingDispatcher,
             tracker: Tracker,
             domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
-        dispatcher.utter_message(
-            text="I'm here to assist you with predicting movie genres. You can ask me how it works or start by sharing the movie details.")
+        dispatcher.utter_message(text="This is a dummy action for demonstration.")
         return []
diff --git a/rasa/data/nlu.yml b/rasa/data/nlu.yml
index e83deb307d7af0194421f3d07501a192b4de90e1..a11441b267797891fd0f092a80ec4b10ede12f00 100644
--- a/rasa/data/nlu.yml
+++ b/rasa/data/nlu.yml
@@ -3,173 +3,105 @@ version: "3.1"
 nlu:
 - intent: greet
   examples: |
-    - hello how are you?
-    - hey
-    - hello
     - hi
+    - hello
+    - hey
     - hello there
-    - good morning
-    - good evening
-    - moin
-    - hey there
-    - let's go
-    - hey dude
-    - good morning
-    - good evening
-    - good afternoon
-    - hallo
+
+- intent: thank_you
+  examples: |
+    - thanks
+    - thank you
+    - cool, thanks
+    - great, thanks
+    - Nice! I think Iâ€™ll start with To All the Boys Iâ€™ve Loved Before. Thank you!
 
 - intent: goodbye
   examples: |
-    - cu
-    - ciao ciao
+    - bye
     - goodbye
     - see you later
-    - good night
-    - bye
     - have a nice day
-    - see you around
-    - bye bye
-    - see you later
-    - thank you good
 
-- intent: affirm
+###############################################################################
+# Movie Enthusiast Dialogs
+###############################################################################
+- intent: ask_romance_recommendation
   examples: |
-    - yes
-    - y
-    - indeed
-    - of course
-    - that sounds good
-    - correct
-    - ok
-    - yep
-    - yeah
-    - hmm
-    - okay
+    - Iâ€™m in the mood for a romantic movie. Can you suggest some good ones?
+    - Recommend some romantic movies
+    - Show me some romance movies
+    - I want a romantic film
 
-- intent: deny
+- intent: ask_romance_summary
   examples: |
-    - no
-    - n
-    - never
-    - I don't think so
-    - don't like that
-    - no way
-    - not really
-    - I don't want to do this.
-    - nope
+    - Could you give me a brief summary of each?
+    - Tell me what these movies are about
+    - Can you explain what happens in each?
 
-- intent: mood_great
+- intent: ask_family_friendly_action
   examples: |
-    - perfect
-    - great
-    - amazing
-    - feeling like a king
-    - wonderful
-    - I am feeling very good
-    - I am great
-    - I am amazing
-    - I am going to save the world
-    - super stoked
-    - extremely good
-    - so so perfect
-    - so good
-    - so perfect
-    - happy
-    - I am good
-    - feels great
-    - fine
-    - fine, thank you
-    - not bad
+    - Iâ€™m looking for a family-friendly action movie for a weekend watch party. Any suggestions?
+    - Suggest some action movies suitable for kids
+    - Could you recommend a family-friendly action film?
 
-- intent: mood_unhappy
+- intent: ask_most_popular_action
   examples: |
-    - my day was horrible
-    - I am sad
-    - I don't feel very well
-    - I am disappointed
-    - super sad
-    - I'm so sad
-    - sad
-    - very sad
-    - unhappy
-    - not good
-    - not very good
-    - extremely sad
-    - so saad
-    - so sad
-    - not so good
+    - Which one is the most popular?
+    - Which is the best among them?
+    - Which one do you recommend the most?
 
-- intent: bot_challenge
+###############################################################################
+# Content Strategist Dialogs
+###############################################################################
+- intent: ask_comedy_viewer_demographics
   examples: |
-    - are you a bot?
-    - are you a human?
-    - am I talking to a bot?
-    - am I talking to a human?
-    - What are you?
-    - What can you do?
-    - who are you?
+    - Iâ€™d like to know about viewer demographics for comedy movies
+    - Any insights on comedy audience demographics?
+    - Who watches comedy films the most?
 
-- intent: ask_action_movies
-  example: |
-   - Can you show me some action movies?
-   - Show me action movies?
-   - Action movies?
+- intent: ask_drama_viewer_demographics
+  examples: |
+    - How about drama? Do they appeal to the same demographic?
+    - What about drama viewers?
+    - Is drama watched by the same age group?
+    - How about drama? Do they also appeal to the same demographic?
 
-- intent: ask_movie_genre
+- intent: ask_genre_trends
   examples: |
-    - Can you predict the genre of a movie?
-    - What is the genre of this movie?
-    - I want to know the genre of a film.
-    - Tell me the genre of this movie.
-    - Could you find the genre of a movie for me?
-    - Predict the movie genre.
-    - Identify the genre of the movie.
-    - Find out the genre for me.
-    - Can you help me know the movie genre?
+    - I want to compare viewership trends for different genres across seasons
+    - Can you help with seasonal genre trends?
+    - How do genres perform in different seasons?
 
-- intent: provide_director_name
+- intent: ask_specific_month_trends
   examples: |
-    - The director is Christopher Nolan.
-    - It's directed by Steven Spielberg.
-    - Directed by Quentin Tarantino.
-    - The director of the movie is James Cameron.
-    - Ridley Scott is the director.
-    - The film was directed by Martin Scorsese.
-    - This movie's director is Stanley Kubrick.
+    - Any specific months that stand out?
+    - Which months have higher viewership for certain genres?
+    - Tell me about monthly spikes
 
-- intent: provide_movie_duration
+###############################################################################
+# Data Science Student Dialogs
+###############################################################################
+- intent: ask_model_evaluation_metrics
   examples: |
-    - The movie is 120 minutes long.
-    - It's a two-hour film.
-    - Duration of the movie is 90 minutes.
-    - The length of the movie is 150 minutes.
-    - It's about 3 hours long.
-    - The runtime is 2 hours 15 minutes.
+    - Can you tell me about how you evaluate your genre prediction model?
+    - How is the model evaluated?
+    - Which metrics do you use for evaluation?
 
-- intent: provide_launch_date
+- intent: ask_model_accuracy
   examples: |
-    - The movie was released on 2020-12-25.
-    - It came out on 1999-07-16.
-    - Release date is 2021-05-07.
-    - It premiered in 2010.
-    - It was released in August 2012.
-    - The release year is 1984.
+    - Can you share the latest accuracy figure?
+    - What's the accuracy of your model?
+    - How accurate is the model right now?
 
-- intent: ask_for_help
+- intent: ask_feature_engineering
   examples: |
-    - I need some help.
-    - Can you assist me?
-    - Help me out.
-    - I need your assistance.
-    - Please guide me.
-    - Can you explain this?
+    - Iâ€™m curious about how you engineer new features from the raw data
+    - How do you create features for the dataset?
+    - What feature engineering steps do you take?
 
-- intent: ask_about_prediction_process
+- intent: ask_one_hot_encoding
   examples: |
-    - How do you predict the genre of a movie?
-    - Can you explain the prediction process?
-    - What details do you need to predict a movie's genre?
-    - How does this work?
-    - What is the method you use to find the genre?
-    - Tell me how the genre prediction works.
+    - Do you also use one-hot encoding for any categorical data?
+    - Are categorical features one-hot encoded?
+    - Do you apply one-hot encoding?
diff --git a/rasa/data/rules.yml b/rasa/data/rules.yml
index 60faa6d4463489042f3a77f75837e3eb0f66f6f9..70ba7c195eede59d1adeba9bc8c9e36c3e80e0d4 100644
--- a/rasa/data/rules.yml
+++ b/rasa/data/rules.yml
@@ -2,12 +2,7 @@ version: "3.1"
 
 rules:
 
-- rule: Say goodbye anytime the user says goodbye
+- rule: Respond to bot challenge
   steps:
-  - intent: goodbye
-  - action: utter_goodbye
-
-- rule: Say 'I am a bot' anytime the user challenges
-  steps:
-  - intent: bot_challenge
-  - action: utter_iamabot
\ No newline at end of file
+    - intent: bot_challenge
+    - action: utter_iamabot
diff --git a/rasa/data/stories.yml b/rasa/data/stories.yml
index 8be846b4532901db54c450a1513e5bb4605f51cc..29a118444c2772c3de89e2dd7e250ff707117d5b 100644
--- a/rasa/data/stories.yml
+++ b/rasa/data/stories.yml
@@ -1,132 +1,86 @@
 version: "3.1"
 
 stories:
-
-- story: happy path
-  steps:
-  - intent: greet
-  - action: utter_greet
-  - intent: mood_great
-  - action: utter_happy
-
-- story: sad path 1
-  steps:
-  - intent: greet
-  - action: utter_greet
-  - intent: mood_unhappy
-  - action: utter_cheer_up
-  - action: utter_did_that_help
-  - intent: affirm
-  - action: utter_happy
-
-- story: sad path 2
-  steps:
-  - intent: greet
-  - action: utter_greet
-  - intent: mood_unhappy
-  - action: utter_cheer_up
-  - action: utter_did_that_help
-  - intent: deny
-  - action: utter_goodbye
-
-- story: asking for movie genre
-  steps:
-    - intent: greet
-    - action: utter_greet
-    - intent: ask_movie_genre
-    - action: action_ask_movie_genre
-    - intent: provide_director_name
-    - action: action_provide_director_name
-    - intent: provide_movie_duration
-    - action: action_provide_movie_duration
-    - intent: provide_launch_date
-    - action: action_provide_launch_date
-    - action: action_predict_movie_genre
-    - action: utter_did_that_help
-    - intent: affirm
-    - action: utter_happy
-    - intent: goodbye
-    - action: utter_goodbye
-
-- story: Exploring Action Movies by Genre
-  steps:
-    - intent: greet
-    - action: utter_greet
-    - intent: ask_action_movies
-    - action: utter_action_movies
-    
-
-- story: asking about prediction process
+###############################################################################
+# 1. Movie Enthusiast - Dialog 1: Seeking Romance Recommendations
+###############################################################################
+- story: Movie Enthusiast Dialog 1
   steps:
     - intent: greet
     - action: utter_greet
-    - intent: ask_about_prediction_process
-    - action: action_explain_prediction_process
-    # - action: utter_asking_everything_ok
-    - intent: affirm
-    - action: utter_happy
-    - intent: goodbye
-    - action: utter_goodbye
+    - intent: ask_romance_recommendation
+    - action: utter_romance_recommendations
+    - intent: ask_romance_summary
+    - action: utter_romance_summaries
+    - intent: thank_you
+    - action: utter_thank_you
 
-- story: providing director name only
+###############################################################################
+# 2. Movie Enthusiast - Dialog 2: Family-Friendly Action Flicks
+###############################################################################
+- story: Movie Enthusiast Dialog 2
   steps:
     - intent: greet
     - action: utter_greet
-    - intent: provide_director_name
-    - action: action_provide_director_name
-    # - action: utter_asking_everything_ok
-    - intent: affirm
-    - action: utter_happy
-    - intent: goodbye
-    - action: utter_goodbye
+    - intent: ask_family_friendly_action
+    - action: utter_family_friendly_action
+    - intent: ask_most_popular_action
+    - action: utter_most_popular_action
+    - intent: thank_you
+    - action: utter_thank_you
 
-- story: providing movie duration only
+###############################################################################
+# 3. Content Strategist - Dialog 1: Delving Into Viewer Demographics
+###############################################################################
+- story: Content Strategist Dialog 1
   steps:
     - intent: greet
     - action: utter_greet
-    - intent: provide_movie_duration
-    - action: action_provide_movie_duration
-    # - action: utter_asking_everything_ok
-    # - intent: deny
-    # - action: utter_deny
-    - intent: goodbye
-    - action: utter_goodbye
+    - intent: ask_comedy_viewer_demographics
+    - action: utter_comedy_demographics
+    - intent: ask_drama_viewer_demographics
+    - action: utter_drama_demographics
+    - intent: thank_you
+    - action: utter_thank_you
 
-- story: providing launch date only
+###############################################################################
+# 4. Content Strategist - Dialog 2: Comparing Seasonal Trends
+###############################################################################
+- story: Content Strategist Dialog 2
   steps:
     - intent: greet
     - action: utter_greet
-    - intent: provide_launch_date
-    - action: action_provide_launch_date
-    # - action: utter_asking_everything_ok
-    - intent: affirm
-    - action: utter_happy
-    - intent: goodbye
-    - action: utter_goodbye
+    - intent: ask_genre_trends
+    - action: utter_genre_trends
+    - intent: ask_specific_month_trends
+    - action: utter_monthly_trends
+    - intent: thank_you
+    - action: utter_thank_you
 
-- story: incomplete movie details
+###############################################################################
+# 5. Data Science Student - Dialog 1: Model Evaluation Metrics
+###############################################################################
+- story: Data Science Student Dialog 1
   steps:
     - intent: greet
     - action: utter_greet
-    - intent: provide_director_name
-    - action: action_provide_director_name
-    - intent: provide_movie_duration
-    - action: action_provide_movie_duration
-    # - action: utter_asking_everything_ok
-    # - intent: deny
-    # - action: utter_deny
-    - intent: goodbye
-    - action: utter_goodbye
+    - intent: ask_model_evaluation_metrics
+    - action: utter_model_evaluation
+    - intent: ask_model_accuracy
+    - action: utter_model_accuracy
+    - intent: thank_you
+    - action: utter_thank_you
 
-- story: requesting help
+###############################################################################
+# 6. Data Science Student - Dialog 2: Feature Engineering Approaches
+###############################################################################
+- story: Data Science Student Dialog 2
   steps:
     - intent: greet
     - action: utter_greet
-    - intent: ask_for_help
-    - action: action_help
-    # - action: utter_asking_everything_ok
-    - intent: affirm
-    - action: utter_happy
-    - intent: goodbye
-    - action: utter_goodbye
-
+    - intent: ask_feature_engineering
+    - action: utter_feature_engineering
+    - intent: ask_one_hot_encoding
+    - action: utter_one_hot_encoding
+    - intent: thank_you
+    - action: utter_thank_you
diff --git a/rasa/domain.yml b/rasa/domain.yml
index b252d148a08285e69e6b432d2a7fbf8ad70dede0..21c0466f916e5d97e62b994be55f05947514a1d2 100644
--- a/rasa/domain.yml
+++ b/rasa/domain.yml
@@ -2,71 +2,161 @@ version: "3.1"
 
 intents:
   - greet
+  - thank_you
   - goodbye
-  - affirm
-  - deny
-  - mood_great
-  - mood_unhappy
-  - bot_challenge
-  - ask_movie_genre
-  - provide_director_name
-  - provide_movie_duration
-  - provide_launch_date
-  - ask_for_help
-  - ask_about_prediction_process
+  - ask_romance_recommendation
+  - ask_romance_summary
+  - ask_family_friendly_action
+  - ask_most_popular_action
+  - ask_comedy_viewer_demographics
+  - ask_drama_viewer_demographics
+  - ask_genre_trends
+  - ask_specific_month_trends
+  - ask_model_evaluation_metrics
+  - ask_model_accuracy
+  - ask_feature_engineering
+  - ask_one_hot_encoding
 
 responses:
+  ###########################################################################
+  # Generic
+  ###########################################################################
+  utter_iamabot:
+    - text: "I am a bot, powered by Rasa."
 
   utter_greet:
-  - text: "Hello and welcome! I am here to assist you with predicting the genre of any movie you have in mind. By providing some details like the director's name, movie duration, and launch date, I can make an educated guess about its genre. How can I help you get started today?"
+    - text: "Hello and welcome! How can I help you today?"
 
   utter_goodbye:
-  - text: "Goodbye! Thank you for using this service. If you ever have more movies to discuss, I'll be here to help. Have an amazing day!"
-
-  utter_cheer_up:
-  - text: "Itâ€™s okay if youâ€™re feeling a bit down. Letâ€™s talk about movies and dive into the wonderful world of cinema! Itâ€™s always a great escape."
-
-  utter_did_that_help:
-  - text: "I hope the information I provided was helpful. Is there anything else you'd like to ask or clarify?"
-
-  utter_happy:
-  - text: "Thatâ€™s wonderful to hear! If you need any more assistance or have other questions, just let me know."
-
-  utter_ask_movie_genre:
-  - text: "I specialize in predicting the genre of movies! To get started, I need some details from you. Could you share the director's name, the movie's duration, and the release date? Once I have this information, Iâ€™ll provide a prediction."
-
-  utter_provide_director_name:
-  - text: "To predict the genre, I first need the name of the director. This helps me analyze the movie better. Could you please share the director's name?"
-
-  utter_provide_movie_duration:
-  - text: "The length of the movie plays a key role in identifying its genre. Could you let me know the movie's duration in minutes?"
-
-  utter_provide_launch_date:
-  - text: "The release date of a movie often hints at its genre trends. Could you please provide the launch date in the format YYYY-MM-DD?"
-
-  utter_ask_about_prediction_process:
-  - text: "Wondering how I predict genres? I use three key details: the directorâ€™s name, the movieâ€™s duration, and its release date. These parameters help me make an informed prediction. Let me know if youâ€™d like to give it a try!"
-
-  utter_help:
-  - text: "Iâ€™m here to make the process simple and fun! Share the director's name, movie duration, and launch date with me, and Iâ€™ll predict the genre for you. If you have questions about how this works, feel free to ask!"
-
-  utter_iamabot:
-  - text: "I am a smart assistant created to predict movie genres based on the information you provide. Letâ€™s dive into the exciting world of cinema together!"
-
-  utter_action_movies:
-  - text: "Sure! Here are some top action movies:
-      Extraction
-      The Old Guard
-      6 Underground"
+    - text: "Goodbye! Have a great day."
+  
+  utter_default:
+    - text: "I'm sorry, I didn't get that. Can you please rephrase?"
+  
+  utter_thank_you:
+    - text: "You're welcome!"
+
+  ###########################################################################
+  # Movie Enthusiast - Dialog 1: Seeking Romance Recommendations
+  ###########################################################################
+  utter_romance_recommendations:
+    - text: >
+        Here are a few popular romantic movies currently trending on Netflix:
+        1. To All the Boys Iâ€™ve Loved Before
+        2. Set It Up
+        3. The Kissing Booth
+
+  utter_romance_summaries:
+    - text: >
+        1. To All the Boys Iâ€™ve Loved Before: A high school romance sparked by 
+        mailed love letters.
+
+        2. Set It Up: Two overworked assistants conspiring to matchmake their demanding bosses.
+
+        3. The Kissing Booth: A teen's first love complicated by friendship dynamics.
+
+  utter_romance_ack:
+    - text: "Youâ€™re welcome! Enjoy the movie. Let me know if you need more recommendations."
+
+  ###########################################################################
+  # Movie Enthusiast - Dialog 2: Family-Friendly Action Flicks
+  ###########################################################################
+  utter_family_friendly_action:
+    - text: >
+        Here are some action movies suitable for most ages:
+        1. Spy Kids
+        2. The Adventures of Sharkboy and Lavagirl
+        3. We Can Be Heroes
+
+  utter_most_popular_action:
+    - text: >
+        Spy Kids is especially popular among families due to its fun spy gadget themes.
+
+  utter_family_action_ack:
+    - text: "Glad I could help! Feel free to ask for more suggestions anytime."
+
+  ###########################################################################
+  # Content Strategist - Dialog 1: Delving Into Viewer Demographics
+  ###########################################################################
+  utter_comedy_demographics:
+    - text: >
+        Comedy films have the highest viewership among the 18-34 age group, 
+        with peak streaming times on weekends between 6 PM and 9 PM.
+
+  utter_drama_demographics:
+    - text: >
+        Dramas typically appeal to a broader 25-45 age range, with spikes on weekday evenings.
+
+  utter_demographics_ack:
+    - text: "Glad I could help! Let me know if you need more strategic insights."
+
+  ###########################################################################
+  # Content Strategist - Dialog 2: Comparing Seasonal Trends
+  ###########################################################################
+  utter_genre_trends:
+    - text: >
+        During winter, drama and romance do well (holiday seasons).
+        In summer, action and comedy spike due to family gatherings and free time.
+
+  utter_monthly_trends:
+    - text: >
+        December and January show high drama viewership, while June and July see a surge 
+        in action/comedy streaming.
+
+  utter_trends_ack:
+    - text: "Thatâ€™s very useful. Let me know if you need more data insights."
+
+  ###########################################################################
+  # Data Science Student - Dialog 1: Model Evaluation Metrics
+  ###########################################################################
+  utter_model_evaluation:
+    - text: >
+        We use several metrics: Accuracy to see correct predictions, F1-Score for 
+        precision/recall balance, and ROC-AUC to measure classification performance.
+
+  utter_model_accuracy:
+    - text: "Our current genre prediction model is about 85% accurate."
+
+  utter_model_ack:
+    - text: "Youâ€™re welcome! Let me know if you need any more model insights."
+
+  ###########################################################################
+  # Data Science Student - Dialog 2: Feature Engineering Approaches
+  ###########################################################################
+  utter_feature_engineering:
+    - text: >
+        We create features like Director Frequency, Seasonal Release (grouping 
+        launch dates by season), and Title Keyword Extraction using NLP.
+
+  utter_one_hot_encoding:
+    - text: >
+        Yes, we apply one-hot encoding for categorical features such as country or language.
+
+  utter_feature_ack:
+    - text: "Perfect. Let me know if you need anything else regarding data preprocessing."
 
 actions:
-  - action_ask_movie_genre
-  - action_provide_director_name
-  - action_provide_movie_duration
-  - action_provide_launch_date
-  - action_help
-  - action_predict_movie_genre
-  - action_explain_prediction_process
+  # We will not use any custom actions for these sample dialogs,
+  # all responses are handled via 'utter_' responses
+  - utter_greet
+  - utter_romance_recommendations
+  - utter_romance_summaries
+  - utter_romance_ack
+  - utter_family_friendly_action
+  - utter_most_popular_action
+  - utter_family_action_ack
+  - utter_comedy_demographics
+  - utter_drama_demographics
+  - utter_demographics_ack
+  - utter_genre_trends
+  - utter_monthly_trends
+  - utter_trends_ack
+  - utter_model_evaluation
+  - utter_model_accuracy
+  - utter_model_ack
+  - utter_feature_engineering
+  - utter_one_hot_encoding
+  - utter_feature_ack
 
 session_config:
   session_expiration_time: 60
diff --git a/requirements.txt b/requirements.txt
index a108ab2fc5e85f3153d5474870edf2c7b4015a5c..411f5c3c584a6bef8c324311028f75b59f26f2b7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,4 @@
 streamlit
 streamlit-chat
 scikit-learn
-seaborn
-flask
-virtualenv
 rasa
\ No newline at end of file