mods to data analysis code

91879c97 · Edward Mawuko Samlafo-Adams · 3375f5dc · 91879c97 · 91879c97
Commit 91879c97 authored 2 months ago by Edward Mawuko Samlafo-Adams
--- a/app_pages/__pycache__/data_analysis.cpython-310.pyc
+++ b/app_pages/__pycache__/data_analysis.cpython-310.pyc
--- a/app_pages/data_analysis.py
+++ b/app_pages/data_analysis.py
@@ -15,78 +15,40 @@ def app():
    # Load the dataset
    df = load_data()

+    # Filter dataset to relevant columns
+    columns_to_focus = ["Location", "Category"]
+    df = df[columns_to_focus]
+
    # Section 1: Basic Information
    st.header("Dataset Overview")
    st.write(f"**Total Job Postings:** {len(df)}")
    st.write(f"**Columns:** {len(df.columns)}")
-    st.write(df.head(10))  # Display first 10 rows for reference
-
-    # Section 3: Summary Statistics
-    st.header("Summary Statistics")
-    numerical_columns = df.select_dtypes(include='float64').columns.tolist()
-    if numerical_columns:
-        st.write(df[numerical_columns].describe())
-    else:
-        st.warning("No numerical columns available for summary statistics.")
-
-    # Section 4: Job Status Breakdown
-    st.header("Job Status Distribution")
-    if "Job Status" in df.columns:
-        job_status_counts = df["Job Status"].value_counts()
-        st.bar_chart(job_status_counts)
-    else:
-        st.warning("No 'Job Status' column found in the dataset.")
-
-    # Section 5: Top Job Locations
-    st.header("Top 10 Job Locations")
-    if "Location" in df.columns:
-        top_locations = df["Location"].value_counts().head(10)
-        st.dataframe(top_locations)
+    st.dataframe(df)  # Display the full dataset for reference
+
+    # Section 2: Parameter Selection for Analysis
+    st.header("Select Parameters for Analysis")
+    selected_locations = st.multiselect("Select Locations:", options=df["Location"].unique(), default=df["Location"].unique()[:3])
+    filtered_df = df[df["Location"].isin(selected_locations)]
+
+    # Section 3: Top Locations
+    st.header("Top Job Locations")
+    if not filtered_df.empty:
+        top_locations = filtered_df["Location"].value_counts()
+        st.bar_chart(top_locations)
    else:
-        st.warning("No 'Location' column found in the dataset.")
-
-    # Section 6: Salary Analysis
-    st.header("Salary Analysis")
-    if "Salary (Numeric)" in df.columns:
-        average_salary = df["Salary (Numeric)"].mean()
-        max_salary = df["Salary (Numeric)"].max()
-        min_salary = df["Salary (Numeric)"].min()
-        st.write(f"**Average Salary:** {average_salary:,.2f} EUR")
-        st.write(f"**Highest Salary:** {max_salary:,.2f} EUR")
-        st.write(f"**Lowest Salary:** {min_salary:,.2f} EUR")
-    else:
-        st.warning("No numerical salary data available.")
-
-    # **New Section: Modify Feature Variables (Request 14)**
+        st.warning("No data available for the selected locations.")

+    # Section 4: Modify Feature Variables
    st.header("Modify Feature Variables")
-    st.write("Interactively update feature variables in the dataset.")
-
-    # **1. Modify Salary for a Specific Job**
-    st.write("### Modify Salary")
-    job_index = st.number_input("Enter the index of the job to update salary:", min_value=0, max_value=len(df) - 1, value=0)
-    new_salary = st.number_input("Enter the new salary (EUR):", min_value=0, max_value=200000, value=60000, step=1000)
-    if st.button("Update Salary"):
-        df.at[job_index, "Salary (Numeric)"] = new_salary
-        df.at[job_index, "Salary"] = f"{new_salary} EUR"
-        st.success(f"Updated salary for '{df.at[job_index, 'Job Opening Title']}' to {new_salary} EUR")
-
-    # **2. Change Job Status**
-    st.write("### Change Job Status")
-    selected_job_index = st.selectbox("Select a job to update status:", df.index)
-    new_status = st.selectbox("New Job Status:", ["Open", "Closed"])
-    if st.button("Update Job Status"):
-        df.at[selected_job_index, "Job Status"] = new_status
-        st.success(f"Updated job status for '{df.at[selected_job_index, 'Job Opening Title']}' to '{new_status}'")

-    # **3. Change Job Category**
+    # **1. Change Job Category**
    st.write("### Change Job Category")
-    job_for_category_change = st.selectbox("Select a job to change category:", df.index)
+    job_for_category_change = st.selectbox("Select a job to change category:", filtered_df.index)
    new_category = st.text_input("Enter the new job category:", "Engineering")
    if st.button("Update Job Category"):
-        df.at[job_for_category_change, "Category"] = new_category
-        st.success(f"Updated category for '{df.at[job_for_category_change, 'Job Opening Title']}' to '{new_category}'")
+        filtered_df.at[job_for_category_change, "Category"] = new_category
+        st.success(f"Updated category for job at index {job_for_category_change} to '{new_category}'")

    # Display the modified dataset
    st.write("### Modified Job Listings")
-    st.dataframe(df.head(10))
+    st.dataframe(filtered_df)