Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Sleeping

App Files Files Community

Sentiment_analysis / visualization /components /learning_paths.py

Danialebrat

Adding Learning path page and improving HelpScout dashboard

599973c 9 days ago

raw

history blame contribute delete

37.4 kB

	"""
	Learning Paths Page
	Analyzes ordered sequences of lessons within Musora app brands.
	Shows how engagement and sentiment evolve as students progress through a path.

	Architecture:
	- Filter panel → Fetch button → session state → charts + cards
	- Per-Path view: one funnel per learning path, denominator resets per path
	- Method-Wide view: continuous funnel across all paths with one shared denominator
	- All data is `lp_`-prefixed in session state to avoid collision with other pages
	"""
	import sys
	from pathlib import Path
	from typing import Optional

	import pandas as pd
	import streamlit as st

	parent_dir = Path(__file__).resolve().parent.parent
	sys.path.append(str(parent_dir))

	from data.learning_paths_data_loader import LearningPathsDataLoader
	from utils.learning_paths_utils import (
	merge_lesson_metrics, merge_method_wide, find_top_dropoffs,
	get_overview_kpis, filter_by_paths, label_for_path, short_title,
	load_lp_config,
	)
	from visualizations.learning_paths_charts import LearningPathsCharts
	from visualizations.distribution_charts import DistributionCharts
	from visualizations.demographic_charts import DemographicCharts
	from agents.learning_paths_summary_agent import LearningPathsSummaryAgent

	_VIEWS = ["Per-Path", "Method-Wide"]


	def render_learning_paths(data_loader: LearningPathsDataLoader):
	"""Main entry point for the Learning Paths page."""
	st.title("📚 Learning Paths")
	st.markdown(
	"Analyze ordered lesson sequences — see how student engagement and sentiment "
	"evolve as they progress through each learning path."
	)
	st.markdown("---")

	cfg = load_lp_config()
	charts = LearningPathsCharts()
	brands = cfg.get("brands", [])

	if not brands:
	st.error("No brands configured for Learning Paths. Check `config/viz_config.json`.")
	return

	# ── Filter panel ─────────────────────────────────────────────────────────
	st.markdown("### 🎯 Filters")

	filter_col1, filter_col2, filter_col3 = st.columns([2, 2, 2])

	with filter_col1:
	brand = st.selectbox(
	"Brand", options=brands,
	index=0, key="lp_brand",
	)

	with filter_col2:
	view_mode = st.radio(
	"View Mode",
	options=_VIEWS, index=0, horizontal=True,
	key="lp_view_mode",
	help=(
	"Per-Path: each path's funnel resets to its own first-lesson count.\n\n"
	"Method-Wide: one continuous funnel using a single denominator "
	"(students who started Learning Path 1) — shows true end-to-end attrition."
	),
	)

	with filter_col3:
	# Path selector — populated after a brand is chosen
	prev_brand_key = st.session_state.get("lp_fetch_key", (None,))[0]
	prev_lesson_map = st.session_state.get("lp_lesson_map", pd.DataFrame())
	if not prev_lesson_map.empty and prev_brand_key == brand:
	available_paths = sorted(prev_lesson_map["learning_path_id"].unique().tolist())
	path_labels_opts = {pid: label_for_path(pid, cfg) for pid in available_paths}
	selected_paths = st.multiselect(
	"Learning Paths (leave empty = all)",
	options=available_paths,
	default=[],
	format_func=lambda pid: path_labels_opts[pid],
	key="lp_selected_paths",
	)
	else:
	selected_paths = []
	st.info("Fetch data to populate path selector.")

	st.markdown("---")

	# ── Fetch key & stale check ───────────────────────────────────────────────
	fetch_key = (brand, view_mode)
	has_data = (
	st.session_state.get("lp_fetch_key") == fetch_key
	and "lp_lesson_map" in st.session_state
	and not st.session_state.get("lp_lesson_map", pd.DataFrame()).empty
	)

	fetch_col, info_col = st.columns([1, 3])
	with fetch_col:
	fetch_clicked = st.button("🚀 Fetch Data", type="primary",
	use_container_width=True, key="lp_fetch_btn")
	with info_col:
	if has_data:
	n_lessons = len(st.session_state.get("lp_lesson_map", pd.DataFrame()))
	st.success(f"✅ Loaded {n_lessons:,} lessons for {brand}")
	elif not fetch_clicked:
	st.info("👆 Select a brand and click Fetch Data to load learning path metrics.")

	if fetch_clicked:
	_fetch_all(data_loader, brand, fetch_key)
	st.rerun()

	if not has_data and not fetch_clicked:
	return

	# ── Load merged frames ───────────────────────────────────────────────────
	lesson_map = st.session_state.get("lp_lesson_map", pd.DataFrame())
	per_path_df = st.session_state.get("lp_per_path", pd.DataFrame())
	method_df = st.session_state.get("lp_method_wide", pd.DataFrame())
	video_df = st.session_state.get("lp_video", pd.DataFrame())
	sentiment_df = st.session_state.get("lp_sentiment", pd.DataFrame())

	if view_mode == "Per-Path":
	merged = merge_lesson_metrics(lesson_map, per_path_df, video_df, sentiment_df)
	else:
	merged = merge_method_wide(method_df, video_df, sentiment_df, cfg)

	if merged.empty:
	st.warning("No data returned. Check your Snowflake connection.")
	return

	# Apply path filter (Per-Path only)
	if view_mode == "Per-Path" and selected_paths:
	merged = filter_by_paths(merged, selected_paths)

	# Add path labels
	if "learning_path_id" in merged.columns:
	merged["path_label"] = merged["learning_path_id"].apply(
	lambda pid: label_for_path(pid, cfg)
	)

	# ── Overview KPIs ─────────────────────────────────────────────────────────
	st.markdown("### 📊 Overview")
	kpis = get_overview_kpis(merged)

	k1, k2, k3, k4, k5, k6 = st.columns(6)
	k1.metric("Method Starters", f"{kpis.get('total_students', 0):,}")
	k2.metric("Avg Completion", f"{kpis.get('avg_completion_pct', 0):.1f}%")
	k3.metric("Avg Sentiment", f"{kpis.get('avg_sentiment_score', 0):.2f}",
	help="Scale: −2 (very negative) to +2 (very positive)")
	k4.metric("Total Comments", f"{kpis.get('total_comments', 0):,}")
	k5.metric("Learning Paths", f"{kpis.get('n_paths', 0)}")
	k6.metric("Total Lessons", f"{kpis.get('n_lessons', 0)}")

	st.markdown("---")

	# ── Headline: Dual-Axis Engagement ───────────────────────────────────────
	st.markdown("### 🎯 Engagement Journey")

	if view_mode == "Per-Path":
	path_ids = sorted(merged["learning_path_id"].unique()) \
	if "learning_path_id" in merged.columns else []
	if len(path_ids) > 1:
	tab_labels = [label_for_path(pid, cfg) for pid in path_ids]
	tabs = st.tabs(tab_labels)
	for tab, pid in zip(tabs, path_ids):
	with tab:
	st.plotly_chart(
	charts.create_dual_axis_engagement(merged, path_id=pid,
	title=f"Completion vs Sentiment — {label_for_path(pid, cfg)}"),
	use_container_width=True, key=f"lp_dual_{pid}",
	)
	elif path_ids:
	st.plotly_chart(
	charts.create_dual_axis_engagement(merged, path_id=path_ids[0]),
	use_container_width=True, key="lp_dual_single",
	)
	else:
	st.plotly_chart(
	charts.create_dual_axis_engagement(merged),
	use_container_width=True, key="lp_dual_all",
	)
	else:
	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(charts.create_method_funnel(merged),
	use_container_width=True, key="lp_method_funnel")
	with col2:
	st.plotly_chart(charts.create_method_sentiment_journey(merged),
	use_container_width=True, key="lp_method_sent")

	st.markdown("---")

	# ── Completion Funnel ─────────────────────────────────────────────────────
	st.markdown("### 📉 Completion Funnel")
	if view_mode == "Per-Path":
	rate_col = "completion_rate"
	x_col = "lesson_number" if "lesson_number" in merged.columns else "lesson_order"
	st.plotly_chart(charts.create_completion_funnel(merged, x_col=x_col),
	use_container_width=True, key="lp_completion_funnel")
	else:
	st.info("Method-Wide funnel shown in the Engagement Journey section above.")

	st.markdown("---")

	# ── Video Engagement ──────────────────────────────────────────────────────
	st.markdown("### 🎬 Video Completion Rate")
	st.caption(
	"Of students who started a lesson video, what percentage finished it? "
	"This isolates whether the content itself holds attention."
	)
	x_col = "method_lesson_number" if (view_mode == "Method-Wide"
	and "method_lesson_number" in merged.columns) else "lesson_order"
	st.plotly_chart(charts.create_video_engagement(merged, x_col=x_col),
	use_container_width=True, key="lp_video_chart")

	st.markdown("---")

	# ── Volume Analysis ───────────────────────────────────────────────────────
	st.markdown("### 📊 Volume Analysis")
	st.caption("Total comments per lesson — shows where students are most engaged.")
	x_col_vol = "method_lesson_number" if (view_mode == "Method-Wide"
	and "method_lesson_number" in merged.columns) else "lesson_order"
	st.plotly_chart(
	charts.create_comment_volume_chart(merged, x_col=x_col_vol),
	use_container_width=True, key="lp_volume_chart",
	)

	st.markdown("---")

	# ── Sentiment Journey ─────────────────────────────────────────────────────
	st.markdown("### 💬 Sentiment Journey")
	x_col = "method_lesson_number" if (view_mode == "Method-Wide"
	and "method_lesson_number" in merged.columns) else "lesson_order"
	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(charts.create_sentiment_journey(merged, x_col=x_col),
	use_container_width=True, key="lp_sent_journey")
	with col2:
	# Show stacked bar for first path (or single combined if method-wide)
	focus_pid = sorted(merged["learning_path_id"].unique())[0] \
	if "learning_path_id" in merged.columns else None
	st.plotly_chart(
	charts.create_sentiment_stacked_bar(
	merged, x_col=x_col, path_id=focus_pid,
	title=f"Sentiment Breakdown — {label_for_path(focus_pid, cfg)}"
	if focus_pid else "Sentiment Breakdown"),
	use_container_width=True, key="lp_sent_stacked",
	)

	with st.expander("📊 Sentiment Heatmap", expanded=False):
	focus_pid = sorted(merged["learning_path_id"].unique())[0] \
	if "learning_path_id" in merged.columns else None
	st.plotly_chart(
	charts.create_lesson_sentiment_heatmap(merged, path_id=focus_pid),
	use_container_width=True, key="lp_heatmap",
	)

	st.markdown("---")

	# ── Intent Analysis ───────────────────────────────────────────────────────
	st.markdown("### 🎭 Intent Analysis")
	metadata_df = st.session_state.get("lp_metadata", pd.DataFrame())
	if view_mode == "Per-Path" and selected_paths:
	metadata_df = metadata_df[metadata_df["learning_path_id"].isin(selected_paths)]

	if metadata_df.empty or "intent" not in metadata_df.columns:
	st.info("No intent data available. Load data first.")
	else:
	_render_intent_emotion_tabs(metadata_df, "intent", cfg, "lp_intent")

	st.markdown("---")

	# ── Emotion Analysis ──────────────────────────────────────────────────────
	st.markdown("### 💭 Emotion Analysis")
	metadata_df_emo = st.session_state.get("lp_metadata", pd.DataFrame())
	if view_mode == "Per-Path" and selected_paths:
	metadata_df_emo = metadata_df_emo[metadata_df_emo["learning_path_id"].isin(selected_paths)]

	has_emotions = (
	not metadata_df_emo.empty
	and "emotions" in metadata_df_emo.columns
	and metadata_df_emo["emotions"].notna().any()
	)
	if not has_emotions:
	st.info("No emotion data available. Emotions are extracted for newly processed comments.")
	else:
	_render_intent_emotion_tabs(metadata_df_emo, "emotion", cfg, "lp_emotion")

	st.markdown("---")

	# ── Drop-off Analysis ─────────────────────────────────────────────────────
	st.markdown("### ⚠️ Top Drop-off Points")
	rate_col = "completion_rate"
	order_col = "method_lesson_number" if (view_mode == "Method-Wide"
	and "method_lesson_number" in merged.columns) else "lesson_order"
	dropoffs = find_top_dropoffs(merged, n=7, rate_col=rate_col, order_col=order_col)
	if not dropoffs.empty:
	st.plotly_chart(charts.create_dropoff_chart(dropoffs),
	use_container_width=True, key="lp_dropoffs")
	else:
	st.info("No significant lesson-to-lesson drop-offs detected.")

	st.markdown("---")

	# ── Demographics ──────────────────────────────────────────────────────────
	st.markdown("### 👥 Demographics")

	commenter_demo = st.session_state.get("lp_commenter_demo", pd.DataFrame())
	student_demo = st.session_state.get("lp_student_demo", pd.DataFrame())
	meta_for_demo = st.session_state.get("lp_metadata", pd.DataFrame())

	demo_tab1, demo_tab2 = st.tabs(["💬 Commenters", "🎓 All Students"])

	with demo_tab1:
	if commenter_demo.empty:
	st.info("No commenter demographic data available.")
	else:
	_render_demographics(commenter_demo, meta_for_demo, "commenter")

	with demo_tab2:
	if student_demo.empty:
	st.info("No student demographic data available.")
	else:
	_render_demographics(student_demo, pd.DataFrame(), "student")

	st.markdown("---")

	# ── AI Summary ────────────────────────────────────────────────────────────
	st.markdown("### 🤖 AI Learning Journey Summary")
	st.markdown(
	"Generate an LLM-powered narrative describing the student experience "
	"through this learning path — sentiment arcs, retention patterns, and "
	"actionable recommendations for content designers."
	)

	summary_key = (brand, view_mode, tuple(sorted(selected_paths)))
	summary_available = (
	"lp_summary" in st.session_state
	and st.session_state.get("lp_summary_key") == summary_key
	and st.session_state["lp_summary"] is not None
	)

	gen_col, _ = st.columns([1, 3])
	with gen_col:
	gen_clicked = st.button("🧠 Generate AI Summary", type="primary",
	use_container_width=True, key="lp_gen_summary")

	if gen_clicked:
	comments_df = st.session_state.get("lp_comments", pd.DataFrame())
	with st.spinner("Analysing learning path data with AI… this may take 20–40 seconds…"):
	agent = LearningPathsSummaryAgent()
	focus_pid = sorted(merged["learning_path_id"].unique())[0] \
	if "learning_path_id" in merged.columns and view_mode == "Per-Path" \
	else None
	result = agent.process({
	"metrics": merged,
	"comments": comments_df,
	"brand": brand,
	"path_id": focus_pid,
	"path_label": label_for_path(focus_pid, cfg) if focus_pid else "Full Method",
	})
	st.session_state["lp_summary"] = result
	st.session_state["lp_summary_key"] = summary_key
	st.rerun()

	if summary_available:
	_render_summary(st.session_state["lp_summary"])

	st.markdown("---")

	# ── Per-Lesson Drill-down ─────────────────────────────────────────────────
	st.markdown("### 📖 Per-Lesson Detail")
	st.caption("Expand any lesson to see the sentiment breakdown and sample comments.")
	_render_lesson_cards(merged, data_loader, brand, cfg)

	st.markdown("---")

	# ── Export CSV ────────────────────────────────────────────────────────────
	st.markdown("### 💾 Export Data")
	export_cols = [c for c in [
	"brand", "learning_path_id", "path_label", "lesson_order", "lesson_content_id",
	"content_title", "lesson_number", "students_completed", "denominator_students",
	"completion_rate", "total_starts", "total_completions", "video_completion_rate",
	"total_comments", "very_positive", "positive", "neutral",
	"negative", "very_negative", "avg_sentiment_score",
	] if c in merged.columns]
	csv = merged[export_cols].to_csv(index=False)
	st.download_button(
	label="📥 Download as CSV",
	data=csv,
	file_name=f"learning_paths_{brand}.csv",
	mime="text/csv",
	key="lp_csv_download",
	)


	# ─────────────────────────────────────────────────────────────────────────────
	# Private helpers
	# ─────────────────────────────────────────────────────────────────────────────

	def _fetch_all(loader: LearningPathsDataLoader, brand: str, fetch_key: tuple):
	"""Run all queries and store results in session state."""
	with st.spinner(f"Fetching learning path data for {brand}…"):
	st.session_state["lp_lesson_map"] = loader.load_lesson_map(brand)
	st.session_state["lp_per_path"] = loader.load_per_path_completion(brand)
	st.session_state["lp_method_wide"] = loader.load_method_wide_completion(brand)
	st.session_state["lp_video"] = loader.load_video_engagement(brand)
	st.session_state["lp_sentiment"] = loader.load_lesson_sentiment(brand)
	st.session_state["lp_metadata"] = loader.load_lesson_metadata(brand)
	st.session_state["lp_commenter_demo"] = loader.load_lp_commenter_demographics(brand)
	st.session_state["lp_student_demo"] = loader.load_lp_student_demographics(brand)
	st.session_state["lp_fetch_key"] = fetch_key
	# Invalidate prior summary when brand/mode changes
	st.session_state.pop("lp_summary", None)
	st.session_state.pop("lp_summary_key", None)
	st.session_state["lp_drill_page"] = 1


	def _render_lesson_cards(merged: pd.DataFrame, loader: LearningPathsDataLoader,
	brand: str, cfg: dict):
	"""Paginated lesson cards (10 per page). Comments fetched on expand."""
	if merged.empty:
	st.info("No lesson data available.")
	return

	per_page = 10
	total = len(merged)
	if "lp_drill_page" not in st.session_state:
	st.session_state["lp_drill_page"] = 1

	total_pages = max(1, (total + per_page - 1) // per_page)

	if total > per_page:
	pc1, pc2, pc3 = st.columns([1, 2, 1])
	with pc1:
	if st.button("⬅️ Previous", key="lp_prev_top",
	disabled=st.session_state["lp_drill_page"] == 1):
	st.session_state["lp_drill_page"] -= 1
	st.rerun()
	with pc2:
	pg = st.session_state["lp_drill_page"]
	st.markdown(
	f"<div style='text-align:center;padding-top:8px;'>"
	f"Page {pg} / {total_pages} — {total:,} lessons</div>",
	unsafe_allow_html=True,
	)
	with pc3:
	if st.button("Next ➡️", key="lp_next_top",
	disabled=st.session_state["lp_drill_page"] >= total_pages):
	st.session_state["lp_drill_page"] += 1
	st.rerun()

	start = (st.session_state["lp_drill_page"] - 1) * per_page
	page_df = merged.iloc[start: start + per_page]

	for _, row in page_df.iterrows():
	_render_single_lesson_card(row, loader, brand, cfg)

	if total > per_page:
	pb1, pb2, pb3 = st.columns([1, 2, 1])
	with pb1:
	if st.button("⬅️ Previous", key="lp_prev_bot",
	disabled=st.session_state["lp_drill_page"] == 1):
	st.session_state["lp_drill_page"] -= 1
	st.rerun()
	with pb2:
	pg = st.session_state["lp_drill_page"]
	st.markdown(
	f"<div style='text-align:center;padding-top:8px;'>"
	f"Page {pg} / {total_pages}</div>",
	unsafe_allow_html=True,
	)
	with pb3:
	if st.button("Next ➡️", key="lp_next_bot",
	disabled=st.session_state["lp_drill_page"] >= total_pages):
	st.session_state["lp_drill_page"] += 1
	st.rerun()


	def _render_single_lesson_card(row: pd.Series, loader: LearningPathsDataLoader,
	brand: str, cfg: dict):
	"""Render one lesson expander card with metrics + on-demand comments."""
	path_label = label_for_path(row.get("learning_path_id"), cfg)
	order = int(row.get("lesson_order", 0))
	title = short_title(row.get("content_title"), 60)
	comp = row.get("completion_rate")
	sent = row.get("avg_sentiment_score")
	vcr = row.get("video_completion_rate")
	comments_n = int(row.get("total_comments", 0))

	sent_emoji = "⚪"
	if pd.notna(sent):
	if sent >= 1.0: sent_emoji = "🟢"
	elif sent >= 0.0: sent_emoji = "🟡"
	elif sent >= -1.0:sent_emoji = "🟠"
	else: sent_emoji = "🔴"

	header = (
	f"{sent_emoji} {path_label} › L{order:02d}: {title}"
	f" \| Completion: {comp*100:.1f}%" if pd.notna(comp) else
	f"{sent_emoji} {path_label} › L{order:02d}: {title}"
	)

	content_id = int(row.get("lesson_content_id", 0))
	card_key = f"lp_card_{content_id}"

	with st.expander(header, expanded=False):
	m1, m2, m3, m4 = st.columns(4)
	m1.metric("Completion", f"{comp*100:.1f}%" if pd.notna(comp) else "—")
	m2.metric("Sentiment Score", f"{sent:.2f}" if pd.notna(sent) else "—")
	m3.metric("Video Completion", f"{vcr*100:.1f}%" if pd.notna(vcr) else "—")
	m4.metric("Comments", f"{comments_n:,}")

	# Sentiment mini-bar
	sent_cols = ["very_positive", "positive", "neutral", "negative", "very_negative"]
	totals = {s: int(row.get(s, 0)) for s in sent_cols}
	total_all = sum(totals.values())
	if total_all > 0:
	bar_parts = " \| ".join(
	f"{s.replace('_', ' ').title()}: {totals[s]:,} "
	f"({totals[s]/total_all*100:.1f}%)"
	for s in sent_cols if totals[s] > 0
	)
	st.caption(f"Sentiment distribution: {bar_parts}")

	# On-demand sample comments
	if comments_n > 0:
	if st.button("💬 Load Sample Comments", key=f"lp_load_comments_{content_id}"):
	with st.spinner("Loading comments…"):
	cache_key = f"lp_comments_{content_id}"
	if cache_key not in st.session_state:
	cdf = loader.load_lesson_comments(
	brand, [content_id],
	max_per_lesson=20,
	)
	st.session_state[cache_key] = cdf

	cache_key = f"lp_comments_{content_id}"
	if cache_key in st.session_state:
	cdf = st.session_state[cache_key]
	if not cdf.empty and "display_text" in cdf.columns:
	for _, crow in cdf.iterrows():
	sent_pol = crow.get("sentiment_polarity", "neutral")
	emoji = {"very_positive": "🟢", "positive": "🟩",
	"neutral": "🟡", "negative": "🟠",
	"very_negative": "🔴"}.get(sent_pol, "⚪")
	txt = str(crow.get("display_text", "")).strip()
	if txt:
	st.markdown(f"{emoji} {txt}")


	def _render_intent_emotion_tabs(
	metadata_df: pd.DataFrame,
	analysis_type: str,
	cfg: dict,
	key_prefix: str,
	):
	"""Render Overall + per-path tabs for intent or emotion distribution."""
	dist_charts = DistributionCharts()
	path_ids = sorted(metadata_df["learning_path_id"].unique()) \
	if "learning_path_id" in metadata_df.columns else []
	tab_labels = ["Overall"] + [label_for_path(pid, cfg) for pid in path_ids]
	tabs = st.tabs(tab_labels)

	subsets = [metadata_df] + [
	metadata_df[metadata_df["learning_path_id"] == pid] for pid in path_ids
	]
	titles = ["Overall"] + [label_for_path(pid, cfg) for pid in path_ids]

	for i, (tab, subset, title) in enumerate(zip(tabs, subsets, titles)):
	with tab:
	col1, col2 = st.columns(2)
	if analysis_type == "intent":
	with col1:
	st.plotly_chart(
	dist_charts.create_intent_bar_chart(subset, f"Intent — {title}"),
	use_container_width=True, key=f"{key_prefix}_bar_{i}",
	)
	with col2:
	st.plotly_chart(
	dist_charts.create_intent_pie_chart(subset, f"Intent — {title}"),
	use_container_width=True, key=f"{key_prefix}_pie_{i}",
	)
	else:
	with col1:
	st.plotly_chart(
	dist_charts.create_emotion_bar_chart(subset, f"Emotion — {title}"),
	use_container_width=True, key=f"{key_prefix}_bar_{i}",
	)
	with col2:
	st.plotly_chart(
	dist_charts.create_emotion_pie_chart(subset, f"Emotion — {title}"),
	use_container_width=True, key=f"{key_prefix}_pie_{i}",
	)


	def _render_demographics(
	demo_df: pd.DataFrame,
	metadata_df: pd.DataFrame,
	demo_type: str,
	):
	"""Render age and experience distribution charts for commenters or students."""
	demo_charts = DemographicCharts()
	has_sentiment = demo_type == "commenter" and not metadata_df.empty

	# Merge metadata with demo data for sentiment cross-tabs (commenters only)
	merged_for_sent = pd.DataFrame()
	if has_sentiment and "author_id" in metadata_df.columns and "user_id" in demo_df.columns:
	meta = metadata_df.copy()
	dem = demo_df.copy()
	meta["_uid"] = meta["author_id"].astype(str)
	dem["_uid"] = dem["user_id"].astype(str)
	merged_for_sent = meta.merge(
	dem[["_uid", "age_group", "experience_group"]],
	on="_uid", how="left",
	)

	# ── Summary metrics ───────────────────────────────────────────
	label = "Commenters" if demo_type == "commenter" else "Students"
	total = len(demo_df)
	with_age = (demo_df["age_group"] != "Unknown").sum() if "age_group" in demo_df.columns else 0
	with_exp = (demo_df["experience_group"] != "Unknown").sum() if "experience_group" in demo_df.columns else 0

	m1, m2, m3 = st.columns(3)
	m1.metric(f"Total {label}", f"{total:,}")
	m2.metric("With Age Data", f"{with_age:,} ({with_age/total*100:.0f}%)" if total else "0")
	m3.metric("With Experience Data", f"{with_exp:,} ({with_exp/total*100:.0f}%)" if total else "0")

	st.markdown("---")

	# ── Age Distribution ──────────────────────────────────────────
	st.markdown("#### 🎂 Age Distribution")
	if "age_group" in demo_df.columns:
	age_valid = demo_df[demo_df["age_group"] != "Unknown"]
	if not age_valid.empty:
	age_dist = age_valid["age_group"].value_counts().reset_index()
	age_dist.columns = ["age_group", "count"]
	age_dist["percentage"] = (age_dist["count"] / age_dist["count"].sum() * 100).round(2)

	if has_sentiment and not merged_for_sent.empty and "age_group" in merged_for_sent.columns:
	age_sent = _compute_demo_by_sentiment(merged_for_sent, "age_group")
	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(
	demo_charts.create_age_distribution_chart(age_dist, f"Age Distribution — {label}"),
	use_container_width=True, key=f"lp_{demo_type}_age_dist",
	)
	with col2:
	if not age_sent.empty:
	st.plotly_chart(
	demo_charts.create_age_sentiment_chart(age_sent, f"Sentiment by Age — {label}"),
	use_container_width=True, key=f"lp_{demo_type}_age_sent",
	)
	else:
	st.plotly_chart(
	demo_charts.create_age_distribution_chart(age_dist, f"Age Distribution — {label}"),
	use_container_width=True, key=f"lp_{demo_type}_age_dist",
	)
	else:
	st.info("No age data available.")
	else:
	st.info("Age data not loaded.")

	st.markdown("---")

	# ── Experience Level Distribution ─────────────────────────────
	st.markdown("#### 🎯 Experience Level Distribution")
	if "experience_group" in demo_df.columns:
	exp_valid = demo_df[demo_df["experience_group"] != "Unknown"]
	if not exp_valid.empty:
	exp_grouped = exp_valid["experience_group"].value_counts().reset_index()
	exp_grouped.columns = ["experience_group", "count"]
	exp_grouped["percentage"] = (exp_grouped["count"] / exp_grouped["count"].sum() * 100).round(2)

	exp_detailed = pd.DataFrame()
	if "experience_level" in demo_df.columns:
	exp_det_valid = demo_df[demo_df["experience_level"].notna()]
	if not exp_det_valid.empty:
	exp_detailed = exp_det_valid["experience_level"].value_counts().reset_index()
	exp_detailed.columns = ["experience_level", "count"]
	exp_detailed["percentage"] = (
	exp_detailed["count"] / exp_detailed["count"].sum() * 100
	).round(2)

	tab_det, tab_grp = st.tabs(["📊 Detailed (0–10)", "📊 Grouped"])

	with tab_det:
	if not exp_detailed.empty:
	st.plotly_chart(
	demo_charts.create_experience_distribution_chart(
	exp_detailed, f"Experience (0–10) — {label}", use_groups=False
	),
	use_container_width=True, key=f"lp_{demo_type}_exp_det",
	)
	else:
	st.info("No detailed experience data available.")

	with tab_grp:
	if has_sentiment and not merged_for_sent.empty and "experience_group" in merged_for_sent.columns:
	exp_sent = _compute_demo_by_sentiment(merged_for_sent, "experience_group")
	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(
	demo_charts.create_experience_distribution_chart(
	exp_grouped, f"Experience Groups — {label}", use_groups=True
	),
	use_container_width=True, key=f"lp_{demo_type}_exp_grp",
	)
	with col2:
	if not exp_sent.empty:
	st.plotly_chart(
	demo_charts.create_experience_sentiment_chart(
	exp_sent, f"Sentiment by Experience — {label}", use_groups=True
	),
	use_container_width=True, key=f"lp_{demo_type}_exp_sent",
	)
	else:
	st.plotly_chart(
	demo_charts.create_experience_distribution_chart(
	exp_grouped, f"Experience Groups — {label}", use_groups=True
	),
	use_container_width=True, key=f"lp_{demo_type}_exp_grp_only",
	)
	else:
	st.info("No experience data available.")
	else:
	st.info("Experience data not loaded.")


	def _compute_demo_by_sentiment(merged_df: pd.DataFrame, field: str) -> pd.DataFrame:
	"""Return sentiment distribution per demographic group for a merged metadata+demo frame."""
	valid = merged_df[
	merged_df[field].notna() & (merged_df[field] != "Unknown")
	& merged_df["sentiment_polarity"].notna()
	]
	if valid.empty:
	return pd.DataFrame()
	grp = valid.groupby([field, "sentiment_polarity"], as_index=False).size().rename(columns={"size": "count"})
	grp["percentage"] = grp.groupby(field)["count"].transform(
	lambda x: (x / x.sum() * 100).round(2)
	)
	return grp


	def _render_summary(result: dict):
	"""Render the LLM summary returned by LearningPathsSummaryAgent."""
	if not result.get("success"):
	st.error(f"AI analysis failed: {result.get('error', 'Unknown error')}")
	return

	summary = result.get("summary", {})
	metadata = result.get("metadata", {})

	st.markdown("---")
	st.markdown("#### 📋 Executive Summary")
	st.info(summary.get("executive_summary", ""))

	col1, col2 = st.columns(2)

	with col1:
	arc = summary.get("journey_arc", [])
	if arc:
	st.markdown("#### 🗺️ Journey Arc")
	for phase in arc:
	st.markdown(
	f"{phase.get('phase', '')} \n{phase.get('description', '')}"
	)
	st.markdown("")

	sent_insights = summary.get("sentiment_insights", [])
	if sent_insights:
	st.markdown("#### 💬 Sentiment Insights")
	for ins in sent_insights:
	st.markdown(f"- {ins}")

	highlights = summary.get("content_highlights", [])
	if highlights:
	st.markdown("#### ✨ Content Highlights")
	for h in highlights:
	st.markdown(f"- {h}")

	with col2:
	retention = summary.get("retention_insights", [])
	if retention:
	st.markdown("#### 📉 Retention Insights")
	for r in retention:
	st.markdown(f"- {r}")

	recs = summary.get("recommendations", [])
	if recs:
	st.markdown("#### 🎯 Recommendations")
	for rec in recs:
	st.markdown(f"- {rec}")

	with st.expander("ℹ️ Analysis Metadata"):
	mc1, mc2, mc3 = st.columns(3)
	mc1.metric("Lessons Analysed", metadata.get("lessons_analyzed", 0))
	mc2.metric("Model Used", metadata.get("model_used", "N/A"))
	mc3.metric("Tokens Used", metadata.get("tokens_used", 0))