| """ |
| Learning Paths Page |
| Analyzes ordered sequences of lessons within Musora app brands. |
| Shows how engagement and sentiment evolve as students progress through a path. |
| |
| Architecture: |
| - Filter panel β Fetch button β session state β charts + cards |
| - Per-Path view: one funnel per learning path, denominator resets per path |
| - Method-Wide view: continuous funnel across all paths with one shared denominator |
| - All data is `lp_`-prefixed in session state to avoid collision with other pages |
| """ |
| import sys |
| from pathlib import Path |
| from typing import Optional |
|
|
| import pandas as pd |
| import streamlit as st |
|
|
| parent_dir = Path(__file__).resolve().parent.parent |
| sys.path.append(str(parent_dir)) |
|
|
| from data.learning_paths_data_loader import LearningPathsDataLoader |
| from utils.learning_paths_utils import ( |
| merge_lesson_metrics, merge_method_wide, find_top_dropoffs, |
| get_overview_kpis, filter_by_paths, label_for_path, short_title, |
| load_lp_config, |
| ) |
| from visualizations.learning_paths_charts import LearningPathsCharts |
| from visualizations.distribution_charts import DistributionCharts |
| from visualizations.demographic_charts import DemographicCharts |
| from agents.learning_paths_summary_agent import LearningPathsSummaryAgent |
|
|
| _VIEWS = ["Per-Path", "Method-Wide"] |
|
|
|
|
| def render_learning_paths(data_loader: LearningPathsDataLoader): |
| """Main entry point for the Learning Paths page.""" |
| st.title("π Learning Paths") |
| st.markdown( |
| "Analyze ordered lesson sequences β see how student engagement and sentiment " |
| "evolve as they progress through each learning path." |
| ) |
| st.markdown("---") |
|
|
| cfg = load_lp_config() |
| charts = LearningPathsCharts() |
| brands = cfg.get("brands", []) |
|
|
| if not brands: |
| st.error("No brands configured for Learning Paths. Check `config/viz_config.json`.") |
| return |
|
|
| |
| st.markdown("### π― Filters") |
|
|
| filter_col1, filter_col2, filter_col3 = st.columns([2, 2, 2]) |
|
|
| with filter_col1: |
| brand = st.selectbox( |
| "Brand", options=brands, |
| index=0, key="lp_brand", |
| ) |
|
|
| with filter_col2: |
| view_mode = st.radio( |
| "View Mode", |
| options=_VIEWS, index=0, horizontal=True, |
| key="lp_view_mode", |
| help=( |
| "**Per-Path**: each path's funnel resets to its own first-lesson count.\n\n" |
| "**Method-Wide**: one continuous funnel using a single denominator " |
| "(students who started Learning Path 1) β shows true end-to-end attrition." |
| ), |
| ) |
|
|
| with filter_col3: |
| |
| prev_brand_key = st.session_state.get("lp_fetch_key", (None,))[0] |
| prev_lesson_map = st.session_state.get("lp_lesson_map", pd.DataFrame()) |
| if not prev_lesson_map.empty and prev_brand_key == brand: |
| available_paths = sorted(prev_lesson_map["learning_path_id"].unique().tolist()) |
| path_labels_opts = {pid: label_for_path(pid, cfg) for pid in available_paths} |
| selected_paths = st.multiselect( |
| "Learning Paths (leave empty = all)", |
| options=available_paths, |
| default=[], |
| format_func=lambda pid: path_labels_opts[pid], |
| key="lp_selected_paths", |
| ) |
| else: |
| selected_paths = [] |
| st.info("Fetch data to populate path selector.") |
|
|
| st.markdown("---") |
|
|
| |
| fetch_key = (brand, view_mode) |
| has_data = ( |
| st.session_state.get("lp_fetch_key") == fetch_key |
| and "lp_lesson_map" in st.session_state |
| and not st.session_state.get("lp_lesson_map", pd.DataFrame()).empty |
| ) |
|
|
| fetch_col, info_col = st.columns([1, 3]) |
| with fetch_col: |
| fetch_clicked = st.button("π Fetch Data", type="primary", |
| use_container_width=True, key="lp_fetch_btn") |
| with info_col: |
| if has_data: |
| n_lessons = len(st.session_state.get("lp_lesson_map", pd.DataFrame())) |
| st.success(f"β
Loaded **{n_lessons:,}** lessons for **{brand}**") |
| elif not fetch_clicked: |
| st.info("π Select a brand and click **Fetch Data** to load learning path metrics.") |
|
|
| if fetch_clicked: |
| _fetch_all(data_loader, brand, fetch_key) |
| st.rerun() |
|
|
| if not has_data and not fetch_clicked: |
| return |
|
|
| |
| lesson_map = st.session_state.get("lp_lesson_map", pd.DataFrame()) |
| per_path_df = st.session_state.get("lp_per_path", pd.DataFrame()) |
| method_df = st.session_state.get("lp_method_wide", pd.DataFrame()) |
| video_df = st.session_state.get("lp_video", pd.DataFrame()) |
| sentiment_df = st.session_state.get("lp_sentiment", pd.DataFrame()) |
|
|
| if view_mode == "Per-Path": |
| merged = merge_lesson_metrics(lesson_map, per_path_df, video_df, sentiment_df) |
| else: |
| merged = merge_method_wide(method_df, video_df, sentiment_df, cfg) |
|
|
| if merged.empty: |
| st.warning("No data returned. Check your Snowflake connection.") |
| return |
|
|
| |
| if view_mode == "Per-Path" and selected_paths: |
| merged = filter_by_paths(merged, selected_paths) |
|
|
| |
| if "learning_path_id" in merged.columns: |
| merged["path_label"] = merged["learning_path_id"].apply( |
| lambda pid: label_for_path(pid, cfg) |
| ) |
|
|
| |
| st.markdown("### π Overview") |
| kpis = get_overview_kpis(merged) |
|
|
| k1, k2, k3, k4, k5, k6 = st.columns(6) |
| k1.metric("Method Starters", f"{kpis.get('total_students', 0):,}") |
| k2.metric("Avg Completion", f"{kpis.get('avg_completion_pct', 0):.1f}%") |
| k3.metric("Avg Sentiment", f"{kpis.get('avg_sentiment_score', 0):.2f}", |
| help="Scale: β2 (very negative) to +2 (very positive)") |
| k4.metric("Total Comments", f"{kpis.get('total_comments', 0):,}") |
| k5.metric("Learning Paths", f"{kpis.get('n_paths', 0)}") |
| k6.metric("Total Lessons", f"{kpis.get('n_lessons', 0)}") |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### π― Engagement Journey") |
|
|
| if view_mode == "Per-Path": |
| path_ids = sorted(merged["learning_path_id"].unique()) \ |
| if "learning_path_id" in merged.columns else [] |
| if len(path_ids) > 1: |
| tab_labels = [label_for_path(pid, cfg) for pid in path_ids] |
| tabs = st.tabs(tab_labels) |
| for tab, pid in zip(tabs, path_ids): |
| with tab: |
| st.plotly_chart( |
| charts.create_dual_axis_engagement(merged, path_id=pid, |
| title=f"Completion vs Sentiment β {label_for_path(pid, cfg)}"), |
| use_container_width=True, key=f"lp_dual_{pid}", |
| ) |
| elif path_ids: |
| st.plotly_chart( |
| charts.create_dual_axis_engagement(merged, path_id=path_ids[0]), |
| use_container_width=True, key="lp_dual_single", |
| ) |
| else: |
| st.plotly_chart( |
| charts.create_dual_axis_engagement(merged), |
| use_container_width=True, key="lp_dual_all", |
| ) |
| else: |
| col1, col2 = st.columns(2) |
| with col1: |
| st.plotly_chart(charts.create_method_funnel(merged), |
| use_container_width=True, key="lp_method_funnel") |
| with col2: |
| st.plotly_chart(charts.create_method_sentiment_journey(merged), |
| use_container_width=True, key="lp_method_sent") |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### π Completion Funnel") |
| if view_mode == "Per-Path": |
| rate_col = "completion_rate" |
| x_col = "lesson_number" if "lesson_number" in merged.columns else "lesson_order" |
| st.plotly_chart(charts.create_completion_funnel(merged, x_col=x_col), |
| use_container_width=True, key="lp_completion_funnel") |
| else: |
| st.info("Method-Wide funnel shown in the Engagement Journey section above.") |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### π¬ Video Completion Rate") |
| st.caption( |
| "Of students who *started* a lesson video, what percentage finished it? " |
| "This isolates whether the content itself holds attention." |
| ) |
| x_col = "method_lesson_number" if (view_mode == "Method-Wide" |
| and "method_lesson_number" in merged.columns) else "lesson_order" |
| st.plotly_chart(charts.create_video_engagement(merged, x_col=x_col), |
| use_container_width=True, key="lp_video_chart") |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### π Volume Analysis") |
| st.caption("Total comments per lesson β shows where students are most engaged.") |
| x_col_vol = "method_lesson_number" if (view_mode == "Method-Wide" |
| and "method_lesson_number" in merged.columns) else "lesson_order" |
| st.plotly_chart( |
| charts.create_comment_volume_chart(merged, x_col=x_col_vol), |
| use_container_width=True, key="lp_volume_chart", |
| ) |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### π¬ Sentiment Journey") |
| x_col = "method_lesson_number" if (view_mode == "Method-Wide" |
| and "method_lesson_number" in merged.columns) else "lesson_order" |
| col1, col2 = st.columns(2) |
| with col1: |
| st.plotly_chart(charts.create_sentiment_journey(merged, x_col=x_col), |
| use_container_width=True, key="lp_sent_journey") |
| with col2: |
| |
| focus_pid = sorted(merged["learning_path_id"].unique())[0] \ |
| if "learning_path_id" in merged.columns else None |
| st.plotly_chart( |
| charts.create_sentiment_stacked_bar( |
| merged, x_col=x_col, path_id=focus_pid, |
| title=f"Sentiment Breakdown β {label_for_path(focus_pid, cfg)}" |
| if focus_pid else "Sentiment Breakdown"), |
| use_container_width=True, key="lp_sent_stacked", |
| ) |
|
|
| with st.expander("π Sentiment Heatmap", expanded=False): |
| focus_pid = sorted(merged["learning_path_id"].unique())[0] \ |
| if "learning_path_id" in merged.columns else None |
| st.plotly_chart( |
| charts.create_lesson_sentiment_heatmap(merged, path_id=focus_pid), |
| use_container_width=True, key="lp_heatmap", |
| ) |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### π Intent Analysis") |
| metadata_df = st.session_state.get("lp_metadata", pd.DataFrame()) |
| if view_mode == "Per-Path" and selected_paths: |
| metadata_df = metadata_df[metadata_df["learning_path_id"].isin(selected_paths)] |
|
|
| if metadata_df.empty or "intent" not in metadata_df.columns: |
| st.info("No intent data available. Load data first.") |
| else: |
| _render_intent_emotion_tabs(metadata_df, "intent", cfg, "lp_intent") |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### π Emotion Analysis") |
| metadata_df_emo = st.session_state.get("lp_metadata", pd.DataFrame()) |
| if view_mode == "Per-Path" and selected_paths: |
| metadata_df_emo = metadata_df_emo[metadata_df_emo["learning_path_id"].isin(selected_paths)] |
|
|
| has_emotions = ( |
| not metadata_df_emo.empty |
| and "emotions" in metadata_df_emo.columns |
| and metadata_df_emo["emotions"].notna().any() |
| ) |
| if not has_emotions: |
| st.info("No emotion data available. Emotions are extracted for newly processed comments.") |
| else: |
| _render_intent_emotion_tabs(metadata_df_emo, "emotion", cfg, "lp_emotion") |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### β οΈ Top Drop-off Points") |
| rate_col = "completion_rate" |
| order_col = "method_lesson_number" if (view_mode == "Method-Wide" |
| and "method_lesson_number" in merged.columns) else "lesson_order" |
| dropoffs = find_top_dropoffs(merged, n=7, rate_col=rate_col, order_col=order_col) |
| if not dropoffs.empty: |
| st.plotly_chart(charts.create_dropoff_chart(dropoffs), |
| use_container_width=True, key="lp_dropoffs") |
| else: |
| st.info("No significant lesson-to-lesson drop-offs detected.") |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### π₯ Demographics") |
|
|
| commenter_demo = st.session_state.get("lp_commenter_demo", pd.DataFrame()) |
| student_demo = st.session_state.get("lp_student_demo", pd.DataFrame()) |
| meta_for_demo = st.session_state.get("lp_metadata", pd.DataFrame()) |
|
|
| demo_tab1, demo_tab2 = st.tabs(["π¬ Commenters", "π All Students"]) |
|
|
| with demo_tab1: |
| if commenter_demo.empty: |
| st.info("No commenter demographic data available.") |
| else: |
| _render_demographics(commenter_demo, meta_for_demo, "commenter") |
|
|
| with demo_tab2: |
| if student_demo.empty: |
| st.info("No student demographic data available.") |
| else: |
| _render_demographics(student_demo, pd.DataFrame(), "student") |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### π€ AI Learning Journey Summary") |
| st.markdown( |
| "Generate an LLM-powered narrative describing the student experience " |
| "through this learning path β sentiment arcs, retention patterns, and " |
| "actionable recommendations for content designers." |
| ) |
|
|
| summary_key = (brand, view_mode, tuple(sorted(selected_paths))) |
| summary_available = ( |
| "lp_summary" in st.session_state |
| and st.session_state.get("lp_summary_key") == summary_key |
| and st.session_state["lp_summary"] is not None |
| ) |
|
|
| gen_col, _ = st.columns([1, 3]) |
| with gen_col: |
| gen_clicked = st.button("π§ Generate AI Summary", type="primary", |
| use_container_width=True, key="lp_gen_summary") |
|
|
| if gen_clicked: |
| comments_df = st.session_state.get("lp_comments", pd.DataFrame()) |
| with st.spinner("Analysing learning path data with AIβ¦ this may take 20β40 secondsβ¦"): |
| agent = LearningPathsSummaryAgent() |
| focus_pid = sorted(merged["learning_path_id"].unique())[0] \ |
| if "learning_path_id" in merged.columns and view_mode == "Per-Path" \ |
| else None |
| result = agent.process({ |
| "metrics": merged, |
| "comments": comments_df, |
| "brand": brand, |
| "path_id": focus_pid, |
| "path_label": label_for_path(focus_pid, cfg) if focus_pid else "Full Method", |
| }) |
| st.session_state["lp_summary"] = result |
| st.session_state["lp_summary_key"] = summary_key |
| st.rerun() |
|
|
| if summary_available: |
| _render_summary(st.session_state["lp_summary"]) |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### π Per-Lesson Detail") |
| st.caption("Expand any lesson to see the sentiment breakdown and sample comments.") |
| _render_lesson_cards(merged, data_loader, brand, cfg) |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### πΎ Export Data") |
| export_cols = [c for c in [ |
| "brand", "learning_path_id", "path_label", "lesson_order", "lesson_content_id", |
| "content_title", "lesson_number", "students_completed", "denominator_students", |
| "completion_rate", "total_starts", "total_completions", "video_completion_rate", |
| "total_comments", "very_positive", "positive", "neutral", |
| "negative", "very_negative", "avg_sentiment_score", |
| ] if c in merged.columns] |
| csv = merged[export_cols].to_csv(index=False) |
| st.download_button( |
| label="π₯ Download as CSV", |
| data=csv, |
| file_name=f"learning_paths_{brand}.csv", |
| mime="text/csv", |
| key="lp_csv_download", |
| ) |
|
|
|
|
| |
| |
| |
|
|
| def _fetch_all(loader: LearningPathsDataLoader, brand: str, fetch_key: tuple): |
| """Run all queries and store results in session state.""" |
| with st.spinner(f"Fetching learning path data for {brand}β¦"): |
| st.session_state["lp_lesson_map"] = loader.load_lesson_map(brand) |
| st.session_state["lp_per_path"] = loader.load_per_path_completion(brand) |
| st.session_state["lp_method_wide"] = loader.load_method_wide_completion(brand) |
| st.session_state["lp_video"] = loader.load_video_engagement(brand) |
| st.session_state["lp_sentiment"] = loader.load_lesson_sentiment(brand) |
| st.session_state["lp_metadata"] = loader.load_lesson_metadata(brand) |
| st.session_state["lp_commenter_demo"] = loader.load_lp_commenter_demographics(brand) |
| st.session_state["lp_student_demo"] = loader.load_lp_student_demographics(brand) |
| st.session_state["lp_fetch_key"] = fetch_key |
| |
| st.session_state.pop("lp_summary", None) |
| st.session_state.pop("lp_summary_key", None) |
| st.session_state["lp_drill_page"] = 1 |
|
|
|
|
| def _render_lesson_cards(merged: pd.DataFrame, loader: LearningPathsDataLoader, |
| brand: str, cfg: dict): |
| """Paginated lesson cards (10 per page). Comments fetched on expand.""" |
| if merged.empty: |
| st.info("No lesson data available.") |
| return |
|
|
| per_page = 10 |
| total = len(merged) |
| if "lp_drill_page" not in st.session_state: |
| st.session_state["lp_drill_page"] = 1 |
|
|
| total_pages = max(1, (total + per_page - 1) // per_page) |
|
|
| if total > per_page: |
| pc1, pc2, pc3 = st.columns([1, 2, 1]) |
| with pc1: |
| if st.button("β¬
οΈ Previous", key="lp_prev_top", |
| disabled=st.session_state["lp_drill_page"] == 1): |
| st.session_state["lp_drill_page"] -= 1 |
| st.rerun() |
| with pc2: |
| pg = st.session_state["lp_drill_page"] |
| st.markdown( |
| f"<div style='text-align:center;padding-top:8px;'>" |
| f"Page {pg} / {total_pages} β {total:,} lessons</div>", |
| unsafe_allow_html=True, |
| ) |
| with pc3: |
| if st.button("Next β‘οΈ", key="lp_next_top", |
| disabled=st.session_state["lp_drill_page"] >= total_pages): |
| st.session_state["lp_drill_page"] += 1 |
| st.rerun() |
|
|
| start = (st.session_state["lp_drill_page"] - 1) * per_page |
| page_df = merged.iloc[start: start + per_page] |
|
|
| for _, row in page_df.iterrows(): |
| _render_single_lesson_card(row, loader, brand, cfg) |
|
|
| if total > per_page: |
| pb1, pb2, pb3 = st.columns([1, 2, 1]) |
| with pb1: |
| if st.button("β¬
οΈ Previous", key="lp_prev_bot", |
| disabled=st.session_state["lp_drill_page"] == 1): |
| st.session_state["lp_drill_page"] -= 1 |
| st.rerun() |
| with pb2: |
| pg = st.session_state["lp_drill_page"] |
| st.markdown( |
| f"<div style='text-align:center;padding-top:8px;'>" |
| f"Page {pg} / {total_pages}</div>", |
| unsafe_allow_html=True, |
| ) |
| with pb3: |
| if st.button("Next β‘οΈ", key="lp_next_bot", |
| disabled=st.session_state["lp_drill_page"] >= total_pages): |
| st.session_state["lp_drill_page"] += 1 |
| st.rerun() |
|
|
|
|
| def _render_single_lesson_card(row: pd.Series, loader: LearningPathsDataLoader, |
| brand: str, cfg: dict): |
| """Render one lesson expander card with metrics + on-demand comments.""" |
| path_label = label_for_path(row.get("learning_path_id"), cfg) |
| order = int(row.get("lesson_order", 0)) |
| title = short_title(row.get("content_title"), 60) |
| comp = row.get("completion_rate") |
| sent = row.get("avg_sentiment_score") |
| vcr = row.get("video_completion_rate") |
| comments_n = int(row.get("total_comments", 0)) |
|
|
| sent_emoji = "βͺ" |
| if pd.notna(sent): |
| if sent >= 1.0: sent_emoji = "π’" |
| elif sent >= 0.0: sent_emoji = "π‘" |
| elif sent >= -1.0:sent_emoji = "π " |
| else: sent_emoji = "π΄" |
|
|
| header = ( |
| f"{sent_emoji} {path_label} βΊ L{order:02d}: {title}" |
| f" | Completion: {comp*100:.1f}%" if pd.notna(comp) else |
| f"{sent_emoji} {path_label} βΊ L{order:02d}: {title}" |
| ) |
|
|
| content_id = int(row.get("lesson_content_id", 0)) |
| card_key = f"lp_card_{content_id}" |
|
|
| with st.expander(header, expanded=False): |
| m1, m2, m3, m4 = st.columns(4) |
| m1.metric("Completion", f"{comp*100:.1f}%" if pd.notna(comp) else "β") |
| m2.metric("Sentiment Score", f"{sent:.2f}" if pd.notna(sent) else "β") |
| m3.metric("Video Completion", f"{vcr*100:.1f}%" if pd.notna(vcr) else "β") |
| m4.metric("Comments", f"{comments_n:,}") |
|
|
| |
| sent_cols = ["very_positive", "positive", "neutral", "negative", "very_negative"] |
| totals = {s: int(row.get(s, 0)) for s in sent_cols} |
| total_all = sum(totals.values()) |
| if total_all > 0: |
| bar_parts = " | ".join( |
| f"{s.replace('_', ' ').title()}: {totals[s]:,} " |
| f"({totals[s]/total_all*100:.1f}%)" |
| for s in sent_cols if totals[s] > 0 |
| ) |
| st.caption(f"Sentiment distribution: {bar_parts}") |
|
|
| |
| if comments_n > 0: |
| if st.button("π¬ Load Sample Comments", key=f"lp_load_comments_{content_id}"): |
| with st.spinner("Loading commentsβ¦"): |
| cache_key = f"lp_comments_{content_id}" |
| if cache_key not in st.session_state: |
| cdf = loader.load_lesson_comments( |
| brand, [content_id], |
| max_per_lesson=20, |
| ) |
| st.session_state[cache_key] = cdf |
|
|
| cache_key = f"lp_comments_{content_id}" |
| if cache_key in st.session_state: |
| cdf = st.session_state[cache_key] |
| if not cdf.empty and "display_text" in cdf.columns: |
| for _, crow in cdf.iterrows(): |
| sent_pol = crow.get("sentiment_polarity", "neutral") |
| emoji = {"very_positive": "π’", "positive": "π©", |
| "neutral": "π‘", "negative": "π ", |
| "very_negative": "π΄"}.get(sent_pol, "βͺ") |
| txt = str(crow.get("display_text", "")).strip() |
| if txt: |
| st.markdown(f"{emoji} {txt}") |
|
|
|
|
| def _render_intent_emotion_tabs( |
| metadata_df: pd.DataFrame, |
| analysis_type: str, |
| cfg: dict, |
| key_prefix: str, |
| ): |
| """Render Overall + per-path tabs for intent or emotion distribution.""" |
| dist_charts = DistributionCharts() |
| path_ids = sorted(metadata_df["learning_path_id"].unique()) \ |
| if "learning_path_id" in metadata_df.columns else [] |
| tab_labels = ["Overall"] + [label_for_path(pid, cfg) for pid in path_ids] |
| tabs = st.tabs(tab_labels) |
|
|
| subsets = [metadata_df] + [ |
| metadata_df[metadata_df["learning_path_id"] == pid] for pid in path_ids |
| ] |
| titles = ["Overall"] + [label_for_path(pid, cfg) for pid in path_ids] |
|
|
| for i, (tab, subset, title) in enumerate(zip(tabs, subsets, titles)): |
| with tab: |
| col1, col2 = st.columns(2) |
| if analysis_type == "intent": |
| with col1: |
| st.plotly_chart( |
| dist_charts.create_intent_bar_chart(subset, f"Intent β {title}"), |
| use_container_width=True, key=f"{key_prefix}_bar_{i}", |
| ) |
| with col2: |
| st.plotly_chart( |
| dist_charts.create_intent_pie_chart(subset, f"Intent β {title}"), |
| use_container_width=True, key=f"{key_prefix}_pie_{i}", |
| ) |
| else: |
| with col1: |
| st.plotly_chart( |
| dist_charts.create_emotion_bar_chart(subset, f"Emotion β {title}"), |
| use_container_width=True, key=f"{key_prefix}_bar_{i}", |
| ) |
| with col2: |
| st.plotly_chart( |
| dist_charts.create_emotion_pie_chart(subset, f"Emotion β {title}"), |
| use_container_width=True, key=f"{key_prefix}_pie_{i}", |
| ) |
|
|
|
|
| def _render_demographics( |
| demo_df: pd.DataFrame, |
| metadata_df: pd.DataFrame, |
| demo_type: str, |
| ): |
| """Render age and experience distribution charts for commenters or students.""" |
| demo_charts = DemographicCharts() |
| has_sentiment = demo_type == "commenter" and not metadata_df.empty |
|
|
| |
| merged_for_sent = pd.DataFrame() |
| if has_sentiment and "author_id" in metadata_df.columns and "user_id" in demo_df.columns: |
| meta = metadata_df.copy() |
| dem = demo_df.copy() |
| meta["_uid"] = meta["author_id"].astype(str) |
| dem["_uid"] = dem["user_id"].astype(str) |
| merged_for_sent = meta.merge( |
| dem[["_uid", "age_group", "experience_group"]], |
| on="_uid", how="left", |
| ) |
|
|
| |
| label = "Commenters" if demo_type == "commenter" else "Students" |
| total = len(demo_df) |
| with_age = (demo_df["age_group"] != "Unknown").sum() if "age_group" in demo_df.columns else 0 |
| with_exp = (demo_df["experience_group"] != "Unknown").sum() if "experience_group" in demo_df.columns else 0 |
|
|
| m1, m2, m3 = st.columns(3) |
| m1.metric(f"Total {label}", f"{total:,}") |
| m2.metric("With Age Data", f"{with_age:,} ({with_age/total*100:.0f}%)" if total else "0") |
| m3.metric("With Experience Data", f"{with_exp:,} ({with_exp/total*100:.0f}%)" if total else "0") |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("#### π Age Distribution") |
| if "age_group" in demo_df.columns: |
| age_valid = demo_df[demo_df["age_group"] != "Unknown"] |
| if not age_valid.empty: |
| age_dist = age_valid["age_group"].value_counts().reset_index() |
| age_dist.columns = ["age_group", "count"] |
| age_dist["percentage"] = (age_dist["count"] / age_dist["count"].sum() * 100).round(2) |
|
|
| if has_sentiment and not merged_for_sent.empty and "age_group" in merged_for_sent.columns: |
| age_sent = _compute_demo_by_sentiment(merged_for_sent, "age_group") |
| col1, col2 = st.columns(2) |
| with col1: |
| st.plotly_chart( |
| demo_charts.create_age_distribution_chart(age_dist, f"Age Distribution β {label}"), |
| use_container_width=True, key=f"lp_{demo_type}_age_dist", |
| ) |
| with col2: |
| if not age_sent.empty: |
| st.plotly_chart( |
| demo_charts.create_age_sentiment_chart(age_sent, f"Sentiment by Age β {label}"), |
| use_container_width=True, key=f"lp_{demo_type}_age_sent", |
| ) |
| else: |
| st.plotly_chart( |
| demo_charts.create_age_distribution_chart(age_dist, f"Age Distribution β {label}"), |
| use_container_width=True, key=f"lp_{demo_type}_age_dist", |
| ) |
| else: |
| st.info("No age data available.") |
| else: |
| st.info("Age data not loaded.") |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("#### π― Experience Level Distribution") |
| if "experience_group" in demo_df.columns: |
| exp_valid = demo_df[demo_df["experience_group"] != "Unknown"] |
| if not exp_valid.empty: |
| exp_grouped = exp_valid["experience_group"].value_counts().reset_index() |
| exp_grouped.columns = ["experience_group", "count"] |
| exp_grouped["percentage"] = (exp_grouped["count"] / exp_grouped["count"].sum() * 100).round(2) |
|
|
| exp_detailed = pd.DataFrame() |
| if "experience_level" in demo_df.columns: |
| exp_det_valid = demo_df[demo_df["experience_level"].notna()] |
| if not exp_det_valid.empty: |
| exp_detailed = exp_det_valid["experience_level"].value_counts().reset_index() |
| exp_detailed.columns = ["experience_level", "count"] |
| exp_detailed["percentage"] = ( |
| exp_detailed["count"] / exp_detailed["count"].sum() * 100 |
| ).round(2) |
|
|
| tab_det, tab_grp = st.tabs(["π Detailed (0β10)", "π Grouped"]) |
|
|
| with tab_det: |
| if not exp_detailed.empty: |
| st.plotly_chart( |
| demo_charts.create_experience_distribution_chart( |
| exp_detailed, f"Experience (0β10) β {label}", use_groups=False |
| ), |
| use_container_width=True, key=f"lp_{demo_type}_exp_det", |
| ) |
| else: |
| st.info("No detailed experience data available.") |
|
|
| with tab_grp: |
| if has_sentiment and not merged_for_sent.empty and "experience_group" in merged_for_sent.columns: |
| exp_sent = _compute_demo_by_sentiment(merged_for_sent, "experience_group") |
| col1, col2 = st.columns(2) |
| with col1: |
| st.plotly_chart( |
| demo_charts.create_experience_distribution_chart( |
| exp_grouped, f"Experience Groups β {label}", use_groups=True |
| ), |
| use_container_width=True, key=f"lp_{demo_type}_exp_grp", |
| ) |
| with col2: |
| if not exp_sent.empty: |
| st.plotly_chart( |
| demo_charts.create_experience_sentiment_chart( |
| exp_sent, f"Sentiment by Experience β {label}", use_groups=True |
| ), |
| use_container_width=True, key=f"lp_{demo_type}_exp_sent", |
| ) |
| else: |
| st.plotly_chart( |
| demo_charts.create_experience_distribution_chart( |
| exp_grouped, f"Experience Groups β {label}", use_groups=True |
| ), |
| use_container_width=True, key=f"lp_{demo_type}_exp_grp_only", |
| ) |
| else: |
| st.info("No experience data available.") |
| else: |
| st.info("Experience data not loaded.") |
|
|
|
|
| def _compute_demo_by_sentiment(merged_df: pd.DataFrame, field: str) -> pd.DataFrame: |
| """Return sentiment distribution per demographic group for a merged metadata+demo frame.""" |
| valid = merged_df[ |
| merged_df[field].notna() & (merged_df[field] != "Unknown") |
| & merged_df["sentiment_polarity"].notna() |
| ] |
| if valid.empty: |
| return pd.DataFrame() |
| grp = valid.groupby([field, "sentiment_polarity"], as_index=False).size().rename(columns={"size": "count"}) |
| grp["percentage"] = grp.groupby(field)["count"].transform( |
| lambda x: (x / x.sum() * 100).round(2) |
| ) |
| return grp |
|
|
|
|
| def _render_summary(result: dict): |
| """Render the LLM summary returned by LearningPathsSummaryAgent.""" |
| if not result.get("success"): |
| st.error(f"AI analysis failed: {result.get('error', 'Unknown error')}") |
| return |
|
|
| summary = result.get("summary", {}) |
| metadata = result.get("metadata", {}) |
|
|
| st.markdown("---") |
| st.markdown("#### π Executive Summary") |
| st.info(summary.get("executive_summary", "")) |
|
|
| col1, col2 = st.columns(2) |
|
|
| with col1: |
| arc = summary.get("journey_arc", []) |
| if arc: |
| st.markdown("#### πΊοΈ Journey Arc") |
| for phase in arc: |
| st.markdown( |
| f"**{phase.get('phase', '')}** \n{phase.get('description', '')}" |
| ) |
| st.markdown("") |
|
|
| sent_insights = summary.get("sentiment_insights", []) |
| if sent_insights: |
| st.markdown("#### π¬ Sentiment Insights") |
| for ins in sent_insights: |
| st.markdown(f"- {ins}") |
|
|
| highlights = summary.get("content_highlights", []) |
| if highlights: |
| st.markdown("#### β¨ Content Highlights") |
| for h in highlights: |
| st.markdown(f"- {h}") |
|
|
| with col2: |
| retention = summary.get("retention_insights", []) |
| if retention: |
| st.markdown("#### π Retention Insights") |
| for r in retention: |
| st.markdown(f"- {r}") |
|
|
| recs = summary.get("recommendations", []) |
| if recs: |
| st.markdown("#### π― Recommendations") |
| for rec in recs: |
| st.markdown(f"- {rec}") |
|
|
| with st.expander("βΉοΈ Analysis Metadata"): |
| mc1, mc2, mc3 = st.columns(3) |
| mc1.metric("Lessons Analysed", metadata.get("lessons_analyzed", 0)) |
| mc2.metric("Model Used", metadata.get("model_used", "N/A")) |
| mc3.metric("Tokens Used", metadata.get("tokens_used", 0)) |