| """ |
| Learning Paths utility helpers β pure functions, no Streamlit dependency. |
| """ |
| import json |
| from pathlib import Path |
| from typing import Dict, List, Optional, Tuple |
|
|
| import pandas as pd |
|
|
|
|
| |
|
|
| def load_lp_config(config_path: str = None) -> dict: |
| if config_path is None: |
| config_path = Path(__file__).resolve().parent.parent / "config" / "viz_config.json" |
| with open(config_path) as f: |
| return json.load(f).get("learning_paths", {}) |
|
|
|
|
| def get_brands(config: dict) -> List[str]: |
| return config.get("brands", []) |
|
|
|
|
| def get_brand_color(brand: str, config: dict) -> str: |
| return config.get("brand_colors", {}).get(brand, "#607D8B") |
|
|
|
|
| def label_for_path(path_id, config: dict) -> str: |
| return config.get("path_labels", {}).get(str(path_id), f"Path {path_id}") |
|
|
|
|
| |
|
|
| def merge_lesson_metrics( |
| lesson_map: pd.DataFrame, |
| per_path_df: pd.DataFrame, |
| video_df: pd.DataFrame, |
| sentiment_df: pd.DataFrame, |
| ) -> pd.DataFrame: |
| """ |
| Join all lesson-level metric DataFrames into one tidy frame indexed by |
| (learning_path_id, lesson_order). Returns an empty frame if lesson_map is empty. |
| """ |
| if lesson_map.empty: |
| return pd.DataFrame() |
|
|
| base = lesson_map[["brand", "learning_path_id", "first_lesson_content_id", |
| "lesson_order", "lesson_content_id", "content_title"]].copy() |
|
|
| join_key = ["learning_path_id", "lesson_content_id"] |
|
|
| if not per_path_df.empty and "content_id" in per_path_df.columns: |
| pp = per_path_df.rename(columns={"content_id": "lesson_content_id"}) |
| cols = ["lesson_content_id", "learning_path_id", "lesson_number", |
| "students_completed", "denominator_students", "completion_rate"] |
| cols = [c for c in cols if c in pp.columns] |
| base = base.merge(pp[cols], on=join_key, how="left") |
|
|
| if not video_df.empty and "content_id" in video_df.columns: |
| vd = video_df.rename(columns={"content_id": "lesson_content_id"}) |
| cols = ["lesson_content_id", "learning_path_id", |
| "total_starts", "total_completions", "video_completion_rate"] |
| cols = [c for c in cols if c in vd.columns] |
| base = base.merge(vd[cols], on=join_key, how="left") |
|
|
| if not sentiment_df.empty: |
| sent_key = ["learning_path_id", "lesson_order"] |
| sent_cols = [c for c in [ |
| "learning_path_id", "lesson_order", |
| "total_comments", "very_positive", "positive", "neutral", |
| "negative", "very_negative", "avg_sentiment_score", |
| ] if c in sentiment_df.columns] |
| base = base.merge(sentiment_df[sent_cols], on=sent_key, how="left") |
|
|
| |
| for col in ["students_completed", "denominator_students", "total_starts", |
| "total_completions", "total_comments", |
| "very_positive", "positive", "neutral", "negative", "very_negative"]: |
| if col in base.columns: |
| base[col] = base[col].fillna(0).astype(int) |
|
|
| base.sort_values(["learning_path_id", "lesson_order"], inplace=True) |
| return base.reset_index(drop=True) |
|
|
|
|
| def merge_method_wide( |
| method_df: pd.DataFrame, |
| video_df: pd.DataFrame, |
| sentiment_df: pd.DataFrame, |
| config: dict, |
| ) -> pd.DataFrame: |
| """Same as merge_lesson_metrics but uses method-wide completion and |
| adds method_lesson_number as the continuous x-axis.""" |
| if method_df.empty: |
| return pd.DataFrame() |
|
|
| base = method_df.rename(columns={"content_id": "lesson_content_id"}).copy() |
| join_key = ["learning_path_id", "lesson_content_id"] |
|
|
| if not video_df.empty and "content_id" in video_df.columns: |
| vd = video_df.rename(columns={"content_id": "lesson_content_id"}) |
| cols = [c for c in ["lesson_content_id", "learning_path_id", |
| "total_starts", "total_completions", |
| "video_completion_rate"] if c in vd.columns] |
| base = base.merge(vd[cols], on=join_key, how="left") |
|
|
| if not sentiment_df.empty and "lesson_order" in base.columns: |
| sent_key = ["learning_path_id", "lesson_order"] |
| sent_cols = [c for c in [ |
| "learning_path_id", "lesson_order", |
| "total_comments", "very_positive", "positive", "neutral", |
| "negative", "very_negative", "avg_sentiment_score", |
| ] if c in sentiment_df.columns] |
| base = base.merge(sentiment_df[sent_cols], on=sent_key, how="left") |
|
|
| |
| base["path_label"] = base["learning_path_id"].apply( |
| lambda pid: label_for_path(pid, config) |
| ) |
|
|
| for col in ["students_completed", "total_starts", "total_completions", |
| "total_comments", "very_positive", "positive", "neutral", |
| "negative", "very_negative"]: |
| if col in base.columns: |
| base[col] = base[col].fillna(0).astype(int) |
|
|
| base.sort_values("method_lesson_number", inplace=True) |
| return base.reset_index(drop=True) |
|
|
|
|
| |
|
|
| def find_top_dropoffs(df: pd.DataFrame, n: int = 5, |
| rate_col: str = "completion_rate", |
| order_col: str = "lesson_order") -> pd.DataFrame: |
| """ |
| Return the top-N lessons with the largest completion-rate drop |
| compared to the previous lesson (within the same learning_path_id). |
| """ |
| if df.empty or rate_col not in df.columns: |
| return pd.DataFrame() |
|
|
| result = df.copy().sort_values(["learning_path_id", order_col]) |
| result["prev_rate"] = result.groupby("learning_path_id")[rate_col].shift(1) |
| result["dropoff"] = result["prev_rate"] - result[rate_col] |
| result = result[result["dropoff"].notna() & (result["dropoff"] > 0)] |
| return result.nlargest(n, "dropoff")[ |
| [c for c in ["learning_path_id", order_col, "content_title", |
| "prev_rate", rate_col, "dropoff"] if c in result.columns] |
| ].reset_index(drop=True) |
|
|
|
|
| def get_overview_kpis(merged: pd.DataFrame) -> dict: |
| """Return a dict of high-level KPI values from the merged metrics frame.""" |
| if merged.empty: |
| return {} |
|
|
| total_students = int(merged["denominator_students"].max()) if "denominator_students" in merged.columns else 0 |
| avg_completion = float(merged["completion_rate"].mean()) if "completion_rate" in merged.columns else 0.0 |
| avg_sentiment = float(merged["avg_sentiment_score"].mean()) if "avg_sentiment_score" in merged.columns else 0.0 |
| total_comments = int(merged["total_comments"].sum()) if "total_comments" in merged.columns else 0 |
| n_paths = merged["learning_path_id"].nunique() if "learning_path_id" in merged.columns else 0 |
| n_lessons = len(merged) |
|
|
| return { |
| "total_students": total_students, |
| "avg_completion_pct": avg_completion * 100, |
| "avg_sentiment_score": avg_sentiment, |
| "total_comments": total_comments, |
| "n_paths": n_paths, |
| "n_lessons": n_lessons, |
| } |
|
|
|
|
| def filter_by_paths(df: pd.DataFrame, |
| path_ids: Optional[List[int]]) -> pd.DataFrame: |
| """Filter df to a subset of learning_path_ids. None or empty = all.""" |
| if not path_ids or df.empty or "learning_path_id" not in df.columns: |
| return df |
| return df[df["learning_path_id"].isin(path_ids)].reset_index(drop=True) |
|
|
|
|
| def short_title(title: Optional[str], max_len: int = 35) -> str: |
| """Truncate a content title for display in labels.""" |
| if not title or pd.isna(title): |
| return "β" |
| t = str(title).strip() |
| return t if len(t) <= max_len else t[:max_len] + "β¦" |