From f5a2b0789201c78afde93b3675fe43d91d3e4c90 Mon Sep 17 00:00:00 2001
From: lsabor <lukesabor@gmail.com>
Date: Sun, 14 Dec 2025 12:04:14 -0800
Subject: [PATCH 01/10] adds daily task to update global bot leaderboard,
 refactors command for convenience

---
 misc/management/commands/cron.py              |   8 +
 scoring/jobs.py                               |  17 +
 .../commands/update_global_bot_leaderboard.py | 562 +++++++++---------
 3 files changed, 306 insertions(+), 281 deletions(-)

diff --git a/misc/management/commands/cron.py b/misc/management/commands/cron.py
index cf9360552e..0734505f5d 100644
--- a/misc/management/commands/cron.py
+++ b/misc/management/commands/cron.py
@@ -26,6 +26,7 @@
 from scoring.jobs import (
     finalize_leaderboards,
     update_global_comment_and_question_leaderboards,
+    update_gobal_bot_leaderboard,
 )
 from scoring.utils import update_medal_points_and_ranks
 
@@ -200,6 +201,13 @@ def handle(self, *args, **options):
             max_instances=1,
             replace_existing=True,
         )
+        scheduler.add_job(
+            close_old_connections(update_gobal_bot_leaderboard),
+            trigger=CronTrigger.from_crontab("0 5 * * *"),  # Every day at 05:00 UTC
+            id="update_gobal_bot_leaderboard",
+            max_instances=1,
+            replace_existing=True,
+        )
 
         #
         # Comment Jobs
diff --git a/scoring/jobs.py b/scoring/jobs.py
index 89c6900a89..1d3b60dace 100644
--- a/scoring/jobs.py
+++ b/scoring/jobs.py
@@ -6,9 +6,26 @@
 from scoring.models import Leaderboard
 from scoring.utils import update_project_leaderboard
 
+from scoring.management.commands.update_global_bot_leaderboard import (
+    run_update_global_bot_leaderboard,
+)
+
 logger = logging.getLogger(__name__)
 
 
+def update_gobal_bot_leaderboard():
+    global_bot_leaderboard = Leaderboard.objects.filter(
+        name="Global Bot Leaderboard",
+    ).first()
+    if not global_bot_leaderboard:
+        logger.warning("Global Bot Leaderboard not found.")
+        return
+    try:
+        run_update_global_bot_leaderboard()
+    except Exception as e:
+        logger.error(f"Error updating Global Bot Leaderboard: {e}")
+
+
 def update_global_comment_and_question_leaderboards():
     global_leaderboards = Leaderboard.objects.filter(
         finalized=False,
diff --git a/scoring/management/commands/update_global_bot_leaderboard.py b/scoring/management/commands/update_global_bot_leaderboard.py
index 479e57a3db..89594a307e 100644
--- a/scoring/management/commands/update_global_bot_leaderboard.py
+++ b/scoring/management/commands/update_global_bot_leaderboard.py
@@ -604,302 +604,302 @@ def bootstrap_skills(
     return ci_lower, ci_upper
 
 
-class Command(BaseCommand):
-    help = """
-    Update the global bots leaderboard
-    """
-
-    def handle(self, *args, **options) -> None:
-        baseline_player: int | str = 236038  # metac-gpt-4o+asknews
-        bootstrap_iterations = 30
-
-        # SETUP: users to evaluate & questions
-        print("Initializing...")
-        users: QuerySet[User] = User.objects.filter(
-            metadata__bot_details__metac_bot=True,
-            metadata__bot_details__include_in_calculations=True,
-            metadata__bot_details__display_in_leaderboard=True,
-            is_active=True,
-        ).order_by("id")
-        user_forecast_exists = Forecast.objects.filter(
-            question_id=OuterRef("pk"), author__in=users
-        )
-        questions: QuerySet[Question] = (
-            Question.objects.filter(
-                Q(
-                    related_posts__post__default_project__default_permission__in=[
-                        "viewer",
-                        "forecaster",
-                    ]
-                )
-                | Q(
-                    related_posts__post__default_project_id__in=[
-                        3349,  # aib q3 2024
-                        32506,  # aib q4 2024
-                        32627,  # aib q1 2025
-                        32721,  # aib q2 2025
-                        32813,  # aib fall 2025
-                    ]
-                ),
-                related_posts__post__curation_status=Post.CurationStatus.APPROVED,
-                resolution__isnull=False,
-                scheduled_close_time__lte=timezone.now(),
-            )
-            .exclude(related_posts__post__default_project__slug__startswith="minibench")
-            .exclude(resolution__in=UnsuccessfulResolutionType)
-            .filter(Exists(user_forecast_exists))
-            .prefetch_related(  # only prefetch forecasts from those users
-                Prefetch(
-                    "user_forecasts", queryset=Forecast.objects.filter(author__in=users)
-                )
+def run_update_global_bot_leaderboard() -> None:
+    baseline_player: int | str = 236038  # metac-gpt-4o+asknews
+    bootstrap_iterations = 30
+
+    # SETUP: users to evaluate & questions
+    print("Initializing...")
+    users: QuerySet[User] = User.objects.filter(
+        metadata__bot_details__metac_bot=True,
+        metadata__bot_details__include_in_calculations=True,
+        metadata__bot_details__display_in_leaderboard=True,
+        is_active=True,
+    ).order_by("id")
+    user_forecast_exists = Forecast.objects.filter(
+        question_id=OuterRef("pk"), author__in=users
+    )
+    questions: QuerySet[Question] = (
+        Question.objects.filter(
+            Q(
+                related_posts__post__default_project__default_permission__in=[
+                    "viewer",
+                    "forecaster",
+                ]
             )
-            .order_by("id")
-            .distinct("id")
+            | Q(
+                related_posts__post__default_project_id__in=[
+                    3349,  # aib q3 2024
+                    32506,  # aib q4 2024
+                    32627,  # aib q1 2025
+                    32721,  # aib q2 2025
+                    32813,  # aib fall 2025
+                ]
+            ),
+            related_posts__post__curation_status=Post.CurationStatus.APPROVED,
+            resolution__isnull=False,
+            scheduled_close_time__lte=timezone.now(),
         )
-        ###############
-        # make sure they have at least 100 resolved questions
-        print("initialize list")
-        question_list = list(questions)
-        print("Filtering users.")
-        scored_question_counts: dict[int, int] = defaultdict(int)
-        c = users.count()
-        i = 0
-        for user in users:
-            i += 1
-            print(i, "/", c, end="\r")
-            scored_question_counts[user.id] = (
-                Score.objects.filter(user=user, question__in=question_list)
-                .distinct("question_id")
-                .count()
+        .exclude(related_posts__post__default_project__slug__startswith="minibench")
+        .exclude(resolution__in=UnsuccessfulResolutionType)
+        .filter(Exists(user_forecast_exists))
+        .prefetch_related(  # only prefetch forecasts from those users
+            Prefetch(
+                "user_forecasts", queryset=Forecast.objects.filter(author__in=users)
             )
-        excluded_ids = [
-            uid for uid, count in scored_question_counts.items() if count < 100
-        ]
-        users = users.exclude(id__in=excluded_ids)
-        ###############
-        print("Initializing... DONE")
-
-        # Gather head to head scores
-        user1_ids, user2_ids, question_ids, scores, weights = gather_data(
-            users, questions
         )
-
-        # choose baseline player if not already chosen
-        if not baseline_player:
-            baseline_player = max(
-                set(user1_ids) | set(user2_ids), key=(user1_ids + user2_ids).count
-            )
-        # get variance of average scores (used in rescaling)
-        avg_scores = get_avg_scores(user1_ids, user2_ids, scores, weights)
-        var_avg_scores = (
-            np.var(np.array(list(avg_scores.values()))) if len(avg_scores) > 1 else 0
+        .order_by("id")
+        .distinct("id")
+    )
+    ###############
+    # make sure they have at least 100 resolved questions
+    print("initialize list")
+    question_list = list(questions)
+    print("Filtering users.")
+    scored_question_counts: dict[int, int] = defaultdict(int)
+    c = users.count()
+    i = 0
+    for user in users:
+        i += 1
+        print(i, "/", c, end="\r")
+        scored_question_counts[user.id] = (
+            Score.objects.filter(user=user, question__in=question_list)
+            .distinct("question_id")
+            .count()
         )
-
-        # compute skills initially
-        skills = get_skills(
-            user1_ids=user1_ids,
-            user2_ids=user2_ids,
-            question_ids=question_ids,
-            scores=scores,
-            weights=weights,
-            baseline_player=baseline_player,
-            var_avg_scores=var_avg_scores,
-            verbose=False,
+    excluded_ids = [uid for uid, count in scored_question_counts.items() if count < 100]
+    users = users.exclude(id__in=excluded_ids)
+    ###############
+    print("Initializing... DONE")
+
+    # Gather head to head scores
+    user1_ids, user2_ids, question_ids, scores, weights = gather_data(users, questions)
+
+    # choose baseline player if not already chosen
+    if not baseline_player:
+        baseline_player = max(
+            set(user1_ids) | set(user2_ids), key=(user1_ids + user2_ids).count
         )
+    # get variance of average scores (used in rescaling)
+    avg_scores = get_avg_scores(user1_ids, user2_ids, scores, weights)
+    var_avg_scores = (
+        np.var(np.array(list(avg_scores.values()))) if len(avg_scores) > 1 else 0
+    )
 
-        # Compute bootstrap confidence intervals
-        ci_lower, ci_upper = bootstrap_skills(
-            user1_ids,
-            user2_ids,
-            question_ids,
-            scores,
-            weights,
-            var_avg_scores,
-            baseline_player=baseline_player,
-            bootstrap_iterations=bootstrap_iterations,
-        )
-        print()
+    # compute skills initially
+    skills = get_skills(
+        user1_ids=user1_ids,
+        user2_ids=user2_ids,
+        question_ids=question_ids,
+        scores=scores,
+        weights=weights,
+        baseline_player=baseline_player,
+        var_avg_scores=var_avg_scores,
+        verbose=False,
+    )
 
-        ordered_skills = sorted(
-            [(user, skill) for user, skill in skills.items()], key=lambda x: -x[1]
-        )
-        player_stats: dict[int | str, list] = defaultdict(lambda: [0, set()])
-        for u1id, u2id, qid in zip(user1_ids, user2_ids, question_ids):
-            player_stats[u1id][0] += 1
-            player_stats[u1id][1].add(qid)
-            player_stats[u2id][0] += 1
-            player_stats[u2id][1].add(qid)
-
-        ##########################################################################
-        ##########################################################################
-        ##########################################################################
-        ##########################################################################
-        # UPDATE Leaderboard
-        print("Updating leaderboard...", end="\r")
-        leaderboard, _ = Leaderboard.objects.get_or_create(
-            name="Global Bot Leaderboard",
-            project=Project.objects.get(type=Project.ProjectTypes.SITE_MAIN),
-            score_type=LeaderboardScoreTypes.MANUAL,
-            bot_status=Project.BotLeaderboardStatus.BOTS_ONLY,
-        )
-        entry_dict = {
-            entry.user_id or entry.aggregation_method: entry
-            for entry in list(leaderboard.entries.all())
-        }
-        rank = 1
-        question_count = len(set(question_ids))
-        seen = set()
-        for uid, skill in ordered_skills:
-            contribution_count = len(player_stats[uid][1])
-
-            excluded = False
-            if isinstance(uid, int):
-                user = User.objects.get(id=uid)
-                bot_details = user.metadata["bot_details"]
-                if not bot_details.get("display_in_leaderboard"):
-                    excluded = True
-
-            entry: LeaderboardEntry = entry_dict.pop(uid, LeaderboardEntry())
-            entry.user_id = uid if isinstance(uid, int) else None
-            entry.aggregation_method = uid if isinstance(uid, str) else None
-            entry.leaderboard = leaderboard
-            entry.score = skill
-            entry.rank = rank
-            entry.excluded = excluded
-            entry.show_when_excluded = False
-            entry.contribution_count = contribution_count
-            entry.coverage = contribution_count / question_count
-            entry.calculated_on = timezone.now()
-            entry.ci_lower = ci_lower.get(uid, None)
-            entry.ci_upper = ci_upper.get(uid, None)
-            # TODO: support for more efficient saving once this is implemented
-            # for leaderboards with more than 100 entries
-            entry.save()
-            seen.add(entry.id)
-
-            if not excluded:
-                rank += 1
-        print("Updating leaderboard... DONE")
-        # delete unseen entries
-        leaderboard.entries.exclude(id__in=seen).delete()
-        print()
-
-        ##########################################################################
-        ##########################################################################
-        ##########################################################################
-        ##########################################################################
-        # DISPLAY
-        print("Results:")
+    # Compute bootstrap confidence intervals
+    ci_lower, ci_upper = bootstrap_skills(
+        user1_ids,
+        user2_ids,
+        question_ids,
+        scores,
+        weights,
+        var_avg_scores,
+        baseline_player=baseline_player,
+        bootstrap_iterations=bootstrap_iterations,
+    )
+    print()
+
+    ordered_skills = sorted(
+        [(user, skill) for user, skill in skills.items()], key=lambda x: -x[1]
+    )
+    player_stats: dict[int | str, list] = defaultdict(lambda: [0, set()])
+    for u1id, u2id, qid in zip(user1_ids, user2_ids, question_ids):
+        player_stats[u1id][0] += 1
+        player_stats[u1id][1].add(qid)
+        player_stats[u2id][0] += 1
+        player_stats[u2id][1].add(qid)
+
+    ##########################################################################
+    ##########################################################################
+    ##########################################################################
+    ##########################################################################
+    # UPDATE Leaderboard
+    print("Updating leaderboard...", end="\r")
+    leaderboard, _ = Leaderboard.objects.get_or_create(
+        name="Global Bot Leaderboard",
+        project=Project.objects.get(type=Project.ProjectTypes.SITE_MAIN),
+        score_type=LeaderboardScoreTypes.MANUAL,
+        bot_status=Project.BotLeaderboardStatus.BOTS_ONLY,
+    )
+    entry_dict = {
+        entry.user_id or entry.aggregation_method: entry
+        for entry in list(leaderboard.entries.all())
+    }
+    rank = 1
+    question_count = len(set(question_ids))
+    seen = set()
+    for uid, skill in ordered_skills:
+        contribution_count = len(player_stats[uid][1])
+
+        excluded = False
+        if isinstance(uid, int):
+            user = User.objects.get(id=uid)
+            bot_details = user.metadata["bot_details"]
+            if not bot_details.get("display_in_leaderboard"):
+                excluded = True
+
+        entry: LeaderboardEntry = entry_dict.pop(uid, LeaderboardEntry())
+        entry.user_id = uid if isinstance(uid, int) else None
+        entry.aggregation_method = uid if isinstance(uid, str) else None
+        entry.leaderboard = leaderboard
+        entry.score = skill
+        entry.rank = rank
+        entry.excluded = excluded
+        entry.show_when_excluded = False
+        entry.contribution_count = contribution_count
+        entry.coverage = contribution_count / question_count
+        entry.calculated_on = timezone.now()
+        entry.ci_lower = ci_lower.get(uid, None)
+        entry.ci_upper = ci_upper.get(uid, None)
+        # TODO: support for more efficient saving once this is implemented
+        # for leaderboards with more than 100 entries
+        entry.save()
+        seen.add(entry.id)
+
+        if not excluded:
+            rank += 1
+    print("Updating leaderboard... DONE")
+    # delete unseen entries
+    leaderboard.entries.exclude(id__in=seen).delete()
+    print()
+
+    ##########################################################################
+    ##########################################################################
+    ##########################################################################
+    ##########################################################################
+    # DISPLAY
+    print("Results:")
+    print(
+        "|  2.5%  "
+        "| Skill  "
+        "| 97.5%  "
+        "| Match  "
+        "| Quest. "
+        "|   ID   "
+        "| Username "
+    )
+    print(
+        "| Match  "
+        "|        "
+        "| Match  "
+        "| Count  "
+        "| Count  "
+        "|        "
+        "|          "
+    )
+    print(
+        "=========================================="
+        "=========================================="
+    )
+    unevaluated = (
+        set(user1_ids) | set(user2_ids) | set(users.values_list("id", flat=True))
+    )
+    for uid, skill in ordered_skills:
+        if isinstance(uid, str):
+            username = uid
+        else:
+            username = User.objects.get(id=uid).username
+        unevaluated.remove(uid)
+        lower = ci_lower.get(uid, 0)
+        upper = ci_upper.get(uid, 0)
         print(
-            "|  2.5%  "
-            "| Skill  "
-            "| 97.5%  "
-            "| Match  "
-            "| Quest. "
-            "|   ID   "
-            "| Username "
+            f"| {round(lower, 2):>6} "
+            f"| {round(skill, 2):>6} "
+            f"| {round(upper, 2):>6} "
+            f"| {player_stats[uid][0]:>6} "
+            f"| {len(player_stats[uid][1]):>6} "
+            f"| {uid if isinstance(uid, int) else '':>6} "
+            f"| {username}"
         )
+    for uid in unevaluated:
+        if isinstance(uid, str):
+            username = uid
+        else:
+            username = User.objects.get(id=uid).username
         print(
-            "| Match  "
-            "|        "
-            "| Match  "
-            "| Count  "
-            "| Count  "
-            "|        "
-            "|          "
+            "| ------ "
+            "| ------ "
+            "| ------ "
+            "| ------ "
+            "| ------ "
+            f"| {uid if isinstance(uid, int) else '':>5} "
+            f"| {username}"
         )
+    print()
+
+    ##########################################################################
+    ##########################################################################
+    ##########################################################################
+    ##########################################################################
+    # TESTS
+    skills_array = np.array(list(skills.values()))
+
+    # 1. Correllation between skill and avg_score (DO NOT HAVE YET - need avg_score)
+    x = []
+    y = []
+    for uid in user1_ids:
+        x.append(skills.get(uid, 0))
+        y.append(avg_scores.get(uid, 0))
+    correlation = np.corrcoef(x, y)
+    print(f"\nCorrelation between skill and avg_score: {correlation[0][1]}")
+
+    # 2. Shapiro-Wilk test (good for small to medium samples)
+    if len(skills_array) >= 3:
+        shapiro_stat, shapiro_p = stats.shapiro(skills_array)
         print(
-            "=========================================="
-            "=========================================="
+            f"  Shapiro-Wilk test: statistic={shapiro_stat:.4f}, p-value={shapiro_p:.4f}"
         )
-        unevaluated = (
-            set(user1_ids) | set(user2_ids) | set(users.values_list("id", flat=True))
-        )
-        for uid, skill in ordered_skills:
-            if isinstance(uid, str):
-                username = uid
-            else:
-                username = User.objects.get(id=uid).username
-            unevaluated.remove(uid)
-            lower = ci_lower.get(uid, 0)
-            upper = ci_upper.get(uid, 0)
-            print(
-                f"| {round(lower, 2):>6} "
-                f"| {round(skill, 2):>6} "
-                f"| {round(upper, 2):>6} "
-                f"| {player_stats[uid][0]:>6} "
-                f"| {len(player_stats[uid][1]):>6} "
-                f"| {uid if isinstance(uid, int) else '':>6} "
-                f"| {username}"
-            )
-        for uid in unevaluated:
-            if isinstance(uid, str):
-                username = uid
-            else:
-                username = User.objects.get(id=uid).username
-            print(
-                "| ------ "
-                "| ------ "
-                "| ------ "
-                "| ------ "
-                "| ------ "
-                f"| {uid if isinstance(uid, int) else '':>5} "
-                f"| {username}"
-            )
-        print()
-
-        ##########################################################################
-        ##########################################################################
-        ##########################################################################
-        ##########################################################################
-        # TESTS
-        skills_array = np.array(list(skills.values()))
-
-        # 1. Correllation between skill and avg_score (DO NOT HAVE YET - need avg_score)
-        x = []
-        y = []
-        for uid in user1_ids:
-            x.append(skills.get(uid, 0))
-            y.append(avg_scores.get(uid, 0))
-        correlation = np.corrcoef(x, y)
-        print(f"\nCorrelation between skill and avg_score: {correlation[0][1]}")
-
-        # 2. Shapiro-Wilk test (good for small to medium samples)
-        if len(skills_array) >= 3:
-            shapiro_stat, shapiro_p = stats.shapiro(skills_array)
-            print(
-                f"  Shapiro-Wilk test: statistic={shapiro_stat:.4f}, p-value={shapiro_p:.4f}"
-            )
-            if shapiro_p > 0.05:
-                print("    → Skills appear normally distributed (p > 0.05)")
-            else:
-                print("    → Skills may not be normally distributed (p ≤ 0.05)")
-
-        # 3. Anderson-Darling test (more sensitive to tails)
-        anderson_result = stats.anderson(skills_array, dist="norm")
-        print(f"  Anderson-Darling test: statistic={anderson_result.statistic:.4f}")
-        # Check at 5% significance level
-        critical_5pct = anderson_result.critical_values[2]  # Index 2 is 5% level
-        print(f"    Critical value at 5%: {critical_5pct:.4f}")
-        if anderson_result.statistic < critical_5pct:
-            print("    → Skills appear normally distributed (stat < critical)")
-        else:
-            print("    → Skills may not be normally distributed (stat ≥ critical)")
-
-        # 4. Kolmogorov-Smirnov test (compare to normal distribution)
-        ks_stat, ks_p = stats.kstest(
-            skills_array, "norm", args=(skills_array.mean(), skills_array.std())
-        )
-        print(f"  Kolmogorov-Smirnov test: statistic={ks_stat:.4f}, p-value={ks_p:.4f}")
-        if ks_p > 0.05:
+        if shapiro_p > 0.05:
             print("    → Skills appear normally distributed (p > 0.05)")
         else:
             print("    → Skills may not be normally distributed (p ≤ 0.05)")
 
-        # 5. Summary statistics
-        print("\nSkill distribution summary:")
-        print(f"  Mean: {skills_array.mean():.2f}")
-        print(f"  Std: {skills_array.std():.2f}")
-        print(f"  Skewness: {stats.skew(skills_array):.4f}")
-        print(f"  Kurtosis: {stats.kurtosis(skills_array):.4f}")
-        print()
+    # 3. Anderson-Darling test (more sensitive to tails)
+    anderson_result = stats.anderson(skills_array, dist="norm")
+    print(f"  Anderson-Darling test: statistic={anderson_result.statistic:.4f}")
+    # Check at 5% significance level
+    critical_5pct = anderson_result.critical_values[2]  # Index 2 is 5% level
+    print(f"    Critical value at 5%: {critical_5pct:.4f}")
+    if anderson_result.statistic < critical_5pct:
+        print("    → Skills appear normally distributed (stat < critical)")
+    else:
+        print("    → Skills may not be normally distributed (stat ≥ critical)")
+
+    # 4. Kolmogorov-Smirnov test (compare to normal distribution)
+    ks_stat, ks_p = stats.kstest(
+        skills_array, "norm", args=(skills_array.mean(), skills_array.std())
+    )
+    print(f"  Kolmogorov-Smirnov test: statistic={ks_stat:.4f}, p-value={ks_p:.4f}")
+    if ks_p > 0.05:
+        print("    → Skills appear normally distributed (p > 0.05)")
+    else:
+        print("    → Skills may not be normally distributed (p ≤ 0.05)")
+
+    # 5. Summary statistics
+    print("\nSkill distribution summary:")
+    print(f"  Mean: {skills_array.mean():.2f}")
+    print(f"  Std: {skills_array.std():.2f}")
+    print(f"  Skewness: {stats.skew(skills_array):.4f}")
+    print(f"  Kurtosis: {stats.kurtosis(skills_array):.4f}")
+    print()
+
+
+class Command(BaseCommand):
+    help = """
+    Update the global bots leaderboard
+    """
+
+    def handle(self, *args, **options) -> None:
+        run_update_global_bot_leaderboard()

From 4d2c0f76c64e857d82663837a7f9399a5c999141 Mon Sep 17 00:00:00 2001
From: lsabor <lukesabor@gmail.com>
Date: Fri, 23 Jan 2026 13:56:35 -0800
Subject: [PATCH 02/10] add partial cache support

---
 .../commands/update_global_bot_leaderboard.py | 22 +++++++++----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/scoring/management/commands/update_global_bot_leaderboard.py b/scoring/management/commands/update_global_bot_leaderboard.py
index 89594a307e..3d44e3dbc9 100644
--- a/scoring/management/commands/update_global_bot_leaderboard.py
+++ b/scoring/management/commands/update_global_bot_leaderboard.py
@@ -104,8 +104,13 @@ def get_score_pair(
 def gather_data(
     users: QuerySet[User],
     questions: QuerySet[Question],
-    cache: bool = False,
+    cache: bool = True,
 ) -> tuple[list[int | str], list[int | str], list[int], list[float], list[float]]:
+    user1_ids: list[int | str] = []
+    user2_ids: list[int | str] = []
+    question_ids: list[int] = []
+    scores: list[float] = []
+    coverages: list[float] = []
     if cache:
         csv_path = Path("HtH_score_data.csv")
         if csv_path.exists():
@@ -124,11 +129,6 @@ def _deserialize_user(value: str) -> int | str:
                 except ValueError:
                     return value
 
-            user1_ids: list[int | str] = []
-            user2_ids: list[int | str] = []
-            question_ids: list[int] = []
-            scores: list[float] = []
-            coverages: list[float] = []
             with csv_path.open() as input_file:
                 reader = csv.DictReader(input_file)
                 for row in reader:
@@ -138,7 +138,7 @@ def _deserialize_user(value: str) -> int | str:
                         question_ids.append(int(row["questionid"]))
                         scores.append(float(row["score"]))
                         coverages.append(float(row["coverage"]))
-            return (user1_ids, user2_ids, question_ids, scores, coverages)
+    cached_question_ids = set(question_ids)
 
     # TODO: make authoritative mapping
     print("creating AIB <> Pro AIB question mapping...", end="\r")
@@ -187,12 +187,10 @@ def _deserialize_user(value: str) -> int | str:
     print("|   Question  |  ID   |   Pairing   |    Duration    | Est. Duration  |")
     t0 = datetime.now()
     question_count = len(questions)
-    user1_ids: list[int | str] = []
-    user2_ids: list[int | str] = []
-    question_ids: list[int] = []
-    scores: list[float] = []
-    coverages: list[float] = []
     for question_number, question in enumerate(questions.iterator(chunk_size=10), 1):
+        if question.id in cached_question_ids:
+            # Skip questions that are already cached
+            continue
         # if question_number % 50 != 0:
         #     continue
         question_print_str = (

From fb932d940979b42fa38eafdc588124bdfd8cc725 Mon Sep 17 00:00:00 2001
From: lsabor <lukesabor@gmail.com>
Date: Sun, 1 Feb 2026 07:29:44 -0800
Subject: [PATCH 03/10] save work

---
 .../commands/update_global_bot_leaderboard.py | 106 ++++++++++++------
 1 file changed, 71 insertions(+), 35 deletions(-)

diff --git a/scoring/management/commands/update_global_bot_leaderboard.py b/scoring/management/commands/update_global_bot_leaderboard.py
index 73d6e45ba3..615ea20b67 100644
--- a/scoring/management/commands/update_global_bot_leaderboard.py
+++ b/scoring/management/commands/update_global_bot_leaderboard.py
@@ -105,12 +105,15 @@ def gather_data(
     users: QuerySet[User],
     questions: QuerySet[Question],
     cache: bool = True,
-) -> tuple[list[int | str], list[int | str], list[int], list[float], list[float]]:
+) -> tuple[
+    list[int | str], list[int | str], list[int], list[float], list[float], list[float]
+]:
     user1_ids: list[int | str] = []
     user2_ids: list[int | str] = []
     question_ids: list[int] = []
     scores: list[float] = []
     coverages: list[float] = []
+    timestamps: list[float] = []
     if cache:
         csv_path = Path("HtH_score_data.csv")
         if csv_path.exists():
@@ -138,6 +141,7 @@ def _deserialize_user(value: str) -> int | str:
                         question_ids.append(int(row["questionid"]))
                         scores.append(float(row["score"]))
                         coverages.append(float(row["coverage"]))
+                        timestamps.append(float(row["timestamp"]))
     cached_question_ids = set(question_ids)
 
     # TODO: make authoritative mapping
@@ -186,37 +190,56 @@ def _deserialize_user(value: str) -> int | str:
     t0 = datetime.now()
     question_count = len(questions)
     for question_number, question in enumerate(questions.iterator(chunk_size=10), 1):
-        if question.id in cached_question_ids:
-            # Skip questions that are already cached
-            continue
-        # if question_number % 50 != 0:
-        #     continue
+        # TODO: cache results every ~100 questions, clearing lists of values
         question_print_str = (
             f"\033[K"
             f"| {question_number:>5}/{question_count:<5} "
             f"| {question.id:<5} "
         )
+        if question.id in cached_question_ids:
+            # Skip questions that are already cached
+            duration = datetime.now() - t0
+            est_duration = duration / question_number * question_count
+            print(
+                f"{question_print_str}"
+                f"| {"N":>5}/{"A":<5} "
+                f"| {duration} "
+                f"| {est_duration} "
+                "|",
+                end="\r",
+            )
+            continue
         # Get forecasts
         forecast_dict: dict[int | str, list[Forecast | AggregateForecast]] = (
             defaultdict(list)
         )
-        # bot forecasts - simple
-        bot_forecasts = question.user_forecasts.filter(author_id__in=user_ids).order_by(
-            "start_time"
-        )
-        for f in bot_forecasts:
-            # don't include forecasts made 1 year or more after model release
-            user = user_id_map[f.author_id]
-            primary_base_model = user.metadata["bot_details"]["base_models"][0]
+        # bot forecasts
+        old_bot_ids: set[int] = set()
+        for user in users:
+            base_models = (
+                (user.metadata or dict())
+                .get("bot_details", dict())
+                .get("base_models", [])
+            )
+            # don't include bots on question that resolved 1 year or more
+            # after model release
+            primary_base_model = None if not base_models else base_models[0]
+            if not primary_base_model:
+                continue
             if release_date := primary_base_model.get("model_release_date"):
                 if len(release_date) == 7:
                     release_date += "-01"
                 release = datetime.fromisoformat(release_date).replace(
                     tzinfo=dt_timezone.utc
                 )
-                if f.start_time > release + timedelta(days=365):
-                    continue
-
+                if question.resolution_set_time > release + timedelta(days=365):
+                    old_bot_ids.add(user.id)
+        bot_forecasts = (
+            question.user_forecasts.filter(author_id__in=user_ids)
+            .exclude(author_id__in=old_bot_ids)
+            .order_by("start_time")
+        )
+        for f in bot_forecasts:
             forecast_dict[f.author_id].append(f)
         # human aggregate forecasts - conditional on a bunch of stuff
         human_question: Question | None = aib_question_map.get(question, question)
@@ -230,10 +253,13 @@ def _deserialize_user(value: str) -> int | str:
                 if question.default_score_type == ScoreTypes.SPOT_PEER
                 else AggregationMethod.RECENCY_WEIGHTED
             )
+            # aggregate_forecasts = human_question.aggregate_forecasts.filter(
+            #     method=aggregation_method
+            # ).order_by("start_time")
             aggregate_forecasts = get_aggregation_history(
                 human_question,
                 [aggregation_method],
-                minimize=False,
+                minimize=True,
                 include_stats=False,
                 include_bots=False,
                 include_future=False,
@@ -279,6 +305,7 @@ def _deserialize_user(value: str) -> int | str:
                     question_ids.append(q)
                     scores.append(u1s)
                     coverages.append(cov)
+                    timestamps.append(question.actual_resolve_time.timestamp())
     print("\n")
     weights = coverages
 
@@ -287,11 +314,15 @@ def _deserialize_user(value: str) -> int | str:
 
         with open("HtH_score_data.csv", "w") as output_file:
             writer = csv.writer(output_file)
-            writer.writerow(["user1", "user2", "questionid", "score", "coverage"])
-            for row in zip(user1_ids, user2_ids, question_ids, scores, weights):
+            writer.writerow(
+                ["user1", "user2", "questionid", "score", "coverage", "timestamp"]
+            )
+            for row in zip(
+                user1_ids, user2_ids, question_ids, scores, weights, timestamps
+            ):
                 writer.writerow(row)
 
-    return (user1_ids, user2_ids, question_ids, scores, weights)
+    return (user1_ids, user2_ids, question_ids, scores, weights, timestamps)
 
 
 def get_avg_scores(
@@ -607,9 +638,10 @@ def run_update_global_bot_leaderboard() -> None:
     # SETUP: users to evaluate & questions
     print("Initializing...")
     users: QuerySet[User] = User.objects.filter(
-        metadata__bot_details__metac_bot=True,
-        metadata__bot_details__include_in_calculations=True,
-        metadata__bot_details__display_in_leaderboard=True,
+        is_bot=True,
+        # metadata__bot_details__metac_bot=True,
+        # metadata__bot_details__include_in_calculations=True,
+        # metadata__bot_details__display_in_leaderboard=True,
         is_active=True,
     ).order_by("id")
     user_forecast_exists = Forecast.objects.filter(
@@ -617,14 +649,9 @@ def run_update_global_bot_leaderboard() -> None:
     )
     questions: QuerySet[Question] = (
         Question.objects.filter(
-            Q(
-                related_posts__post__default_project__default_permission__in=[
-                    "viewer",
-                    "forecaster",
-                ]
-            )
+            Q(post__default_project__default_permission__in=["viewer", "forecaster"])
             | Q(
-                related_posts__post__default_project_id__in=[
+                post__default_project_id__in=[
                     3349,  # aib q3 2024
                     32506,  # aib q4 2024
                     32627,  # aib q1 2025
@@ -632,11 +659,11 @@ def run_update_global_bot_leaderboard() -> None:
                     32813,  # aib fall 2025
                 ]
             ),
-            related_posts__post__curation_status=Post.CurationStatus.APPROVED,
+            post__curation_status=Post.CurationStatus.APPROVED,
             resolution__isnull=False,
             scheduled_close_time__lte=timezone.now(),
         )
-        .exclude(related_posts__post__default_project__slug__startswith="minibench")
+        .exclude(post__default_project__slug__startswith="minibench")
         .exclude(resolution__in=UnsuccessfulResolutionType)
         .filter(Exists(user_forecast_exists))
         .prefetch_related(  # only prefetch forecasts from those users
@@ -659,17 +686,26 @@ def run_update_global_bot_leaderboard() -> None:
         i += 1
         print(i, "/", c, end="\r")
         scored_question_counts[user.id] = (
-            Score.objects.filter(user=user, question__in=question_list)
+            Score.objects.filter(
+                user=user,
+                score_type="peer",
+                question__in=question_list,
+            )
             .distinct("question_id")
             .count()
         )
     excluded_ids = [uid for uid, count in scored_question_counts.items() if count < 100]
     users = users.exclude(id__in=excluded_ids)
     ###############
+    print(f"Filtered {c} users down to {users.count()}.")
     print("Initializing... DONE")
 
     # Gather head to head scores
-    user1_ids, user2_ids, question_ids, scores, weights = gather_data(users, questions)
+    user1_ids, user2_ids, question_ids, scores, weights, timestamps = gather_data(
+        users, questions
+    )
+
+    # TODO: set up support for yearly updates for all non-metac bots
 
     # choose baseline player if not already chosen
     if not baseline_player:

From fd222894770ec3602bda209c2d221b0ff8fc67f2 Mon Sep 17 00:00:00 2001
From: lsabor <lukesabor@gmail.com>
Date: Sun, 1 Feb 2026 13:12:42 -0800
Subject: [PATCH 04/10] bug fixes

---
 .../commands/update_global_bot_leaderboard.py | 180 +++++++++++++-----
 1 file changed, 136 insertions(+), 44 deletions(-)

diff --git a/scoring/management/commands/update_global_bot_leaderboard.py b/scoring/management/commands/update_global_bot_leaderboard.py
index 615ea20b67..e47a38ffff 100644
--- a/scoring/management/commands/update_global_bot_leaderboard.py
+++ b/scoring/management/commands/update_global_bot_leaderboard.py
@@ -1,5 +1,6 @@
 import random
 from collections import defaultdict
+import csv
 from pathlib import Path
 
 from datetime import datetime, timedelta, timezone as dt_timezone
@@ -116,33 +117,18 @@ def gather_data(
     timestamps: list[float] = []
     if cache:
         csv_path = Path("HtH_score_data.csv")
-        if csv_path.exists():
-            userset = set([str(u.id) for u in users]) | {
-                "Pro Aggregate",
-                "Community Aggregate",
-            }
-            import csv
-
-            def _deserialize_user(value: str) -> int | str:
-                value = value.strip()
-                if not value:
-                    return value
-                try:
-                    return int(value)
-                except ValueError:
-                    return value
-
-            with csv_path.open() as input_file:
-                reader = csv.DictReader(input_file)
-                for row in reader:
-                    if (row["user1"] in userset) and (row["user2"] in userset):
-                        user1_ids.append(_deserialize_user(row["user1"]))
-                        user2_ids.append(_deserialize_user(row["user2"]))
-                        question_ids.append(int(row["questionid"]))
-                        scores.append(float(row["score"]))
-                        coverages.append(float(row["coverage"]))
-                        timestamps.append(float(row["timestamp"]))
+        if not csv_path.exists():
+            with csv_path.open("w") as output_file:
+                writer = csv.writer(output_file)
+                writer.writerow(
+                    ["user1", "user2", "questionid", "score", "coverage", "timestamp"]
+                )
+        with csv_path.open("r") as input_file:
+            reader = csv.DictReader(input_file)
+            for row in reader:
+                question_ids.append(int(row["questionid"]))
     cached_question_ids = set(question_ids)
+    question_ids = []
 
     # TODO: make authoritative mapping
     print("creating AIB <> Pro AIB question mapping...", end="\r")
@@ -184,12 +170,13 @@ def _deserialize_user(value: str) -> int | str:
     print("creating AIB <> Pro AIB question mapping...DONE\n")
     #
     user_ids = users.values_list("id", flat=True)
-    user_id_map = {user.id: user for user in users}
-    print("Processing Pairwise Scoring:")
-    print("|   Question  |  ID   |   Pairing   |    Duration    | Est. Duration  |")
     t0 = datetime.now()
     question_count = len(questions)
-    for question_number, question in enumerate(questions.iterator(chunk_size=10), 1):
+    questions = list(questions)
+    cache_interval = 100
+    print("Processing Pairwise Scoring:")
+    print("|   Question  |  ID   |   Pairing   |    Duration    | Est. Duration  |")
+    for question_number, question in enumerate(questions, 1):
         # TODO: cache results every ~100 questions, clearing lists of values
         question_print_str = (
             f"\033[K"
@@ -202,7 +189,7 @@ def _deserialize_user(value: str) -> int | str:
             est_duration = duration / question_number * question_count
             print(
                 f"{question_print_str}"
-                f"| {"N":>5}/{"A":<5} "
+                f"| {'N':>5}/{'A':<5} "
                 f"| {duration} "
                 f"| {est_duration} "
                 "|",
@@ -306,23 +293,56 @@ def _deserialize_user(value: str) -> int | str:
                     scores.append(u1s)
                     coverages.append(cov)
                     timestamps.append(question.actual_resolve_time.timestamp())
+        if cache and question_number % cache_interval == 0:
+            print(f"\nCaching {len(user1_ids)} matches...")
+            with csv_path.open("a") as output_file:
+                writer = csv.writer(output_file)
+                for row in zip(
+                    user1_ids, user2_ids, question_ids, scores, coverages, timestamps
+                ):
+                    writer.writerow(row)
+            user1_ids = []
+            user2_ids = []
+            question_ids = []
+            scores = []
+            coverages = []
+            timestamps = []
     print("\n")
-    weights = coverages
 
     if cache:
-        import csv
-
-        with open("HtH_score_data.csv", "w") as output_file:
+        with csv_path.open("a") as output_file:
             writer = csv.writer(output_file)
-            writer.writerow(
-                ["user1", "user2", "questionid", "score", "coverage", "timestamp"]
-            )
             for row in zip(
-                user1_ids, user2_ids, question_ids, scores, weights, timestamps
+                user1_ids, user2_ids, question_ids, scores, coverages, timestamps
             ):
                 writer.writerow(row)
-
-    return (user1_ids, user2_ids, question_ids, scores, weights, timestamps)
+        user1_ids = []
+        user2_ids = []
+        question_ids = []
+        scores = []
+        coverages = []
+        timestamps = []
+
+        def _deserialize_user(value: str) -> int | str:
+            value = value.strip()
+            if not value:
+                return value
+            try:
+                return int(value)
+            except ValueError:
+                return value
+
+        with csv_path.open("r") as input_file:
+            reader = csv.DictReader(input_file)
+            for row in reader:
+                user1_ids.append(_deserialize_user(row["user1"]))
+                user2_ids.append(_deserialize_user(row["user2"]))
+                question_ids.append(int(row["questionid"]))
+                scores.append(float(row["score"]))
+                coverages.append(float(row["coverage"]))
+                timestamps.append(float(row["timestamp"]))
+
+    return (user1_ids, user2_ids, question_ids, scores, coverages, timestamps)
 
 
 def get_avg_scores(
@@ -434,7 +454,7 @@ def estimate_variances_from_head_to_head(
         )
         print(f"σ_true (skill variance): {skill_variance:.4f}")
         print(f"alpha = (σ_error / σ_true)² = {alpha:.4f}")
-    return 2
+    return alpha
 
 
 def compute_skills(
@@ -610,7 +630,7 @@ def bootstrap_skills(
         boot_skills = get_skills(
             user1_ids=boot_user1_ids,
             user2_ids=boot_user2_ids,
-            question_ids=question_ids,
+            question_ids=boot_question_ids,
             scores=boot_scores,
             weights=boot_weights,
             baseline_player=baseline_player,
@@ -705,7 +725,79 @@ def run_update_global_bot_leaderboard() -> None:
         users, questions
     )
 
-    # TODO: set up support for yearly updates for all non-metac bots
+    # for pro aggregation, community aggregate, and any non-metac bot,
+    # duplicate rows indicating year-specific achievements
+    user_map = {user.id: user for user in users}
+    user_map["Pro Aggregate"] = "Pro Aggregate"
+    user_map["Community Aggregate"] = "Community Aggregate"
+    new_rows = []
+    for user1_id, user2_id, question_id, score, weight, timestamp in zip(
+        user1_ids, user2_ids, question_ids, scores, weights, timestamps
+    ):
+        user1 = user_map[user1_id]
+        if isinstance(user1, User):
+            if (
+                not (user1.metadata or dict())
+                .get("bot_details", dict())
+                .get("metac_bot")
+            ):
+                # non-metac bot
+                time = datetime.fromtimestamp(timestamp, dt_timezone.utc)
+                new_rows.append(
+                    (
+                        f"{user1.username} {time.year}",
+                        user2_id,
+                        question_id,
+                        score,
+                        weight,
+                        timestamp,
+                    )
+                )
+        else:
+            # aggregation methods
+            time = datetime.fromtimestamp(timestamp, dt_timezone.utc)
+            new_rows.append(
+                (
+                    f"{user1} {time.year}",
+                    user2_id,
+                    question_id,
+                    score,
+                    weight,
+                    timestamp,
+                )
+            )
+        user2 = user_map[user2_id]
+        if isinstance(user2, User):
+            if not (
+                not (user2.metadata or dict())
+                .get("bot_details", dict())
+                .get("metac_bot")
+            ):
+                # non-metac bot
+                time = datetime.fromtimestamp(timestamp, dt_timezone.utc)
+                new_rows.append(
+                    (
+                        user1_id,
+                        f"{user2.username} {time.year}",
+                        question_id,
+                        -score,
+                        weight,
+                        timestamp,
+                    )
+                )
+        else:
+            # aggregation methods
+            time = datetime.fromtimestamp(timestamp, dt_timezone.utc)
+            new_rows.append(
+                (
+                    user1_id,
+                    f"{user2} {time.year}",
+                    question_id,
+                    -score,
+                    weight,
+                    timestamp,
+                )
+            )
 
     # choose baseline player if not already chosen
     if not baseline_player:

From 33ebff2a6853e193aea3461a6c4f54c8bdde8e91 Mon Sep 17 00:00:00 2001
From: lsabor <lukesabor@gmail.com>
Date: Wed, 4 Feb 2026 08:48:02 -0800
Subject: [PATCH 05/10] save work

---
 .../commands/update_global_bot_leaderboard.py | 36 +++++++++++--------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/scoring/management/commands/update_global_bot_leaderboard.py b/scoring/management/commands/update_global_bot_leaderboard.py
index e47a38ffff..4f9a5f6757 100644
--- a/scoring/management/commands/update_global_bot_leaderboard.py
+++ b/scoring/management/commands/update_global_bot_leaderboard.py
@@ -255,6 +255,7 @@ def gather_data(
                 pass
             elif question in aib_question_map:
                 # set the last aggregate to be the one that gets scored
+                # TODO: instead grab the aggregate that was live at spot scoring time
                 forecast = aggregate_forecasts[-1]
                 forecast.start_time = question.get_spot_scoring_time() - timedelta(
                     seconds=1
@@ -691,7 +692,7 @@ def run_update_global_bot_leaderboard() -> None:
                 "user_forecasts", queryset=Forecast.objects.filter(author__in=users)
             )
         )
-        .order_by("id")
+        .order_by("?")
         .distinct("id")
     )
     ###############
@@ -721,7 +722,7 @@ def run_update_global_bot_leaderboard() -> None:
     print("Initializing... DONE")
 
     # Gather head to head scores
-    user1_ids, user2_ids, question_ids, scores, weights, timestamps = gather_data(
+    user1_ids, user2_ids, question_ids, scores, coverages, timestamps = gather_data(
         users, questions
     )
 
@@ -731,8 +732,8 @@ def run_update_global_bot_leaderboard() -> None:
     user_map["Pro Aggregate"] = "Pro Aggregate"
     user_map["Community Aggregate"] = "Community Aggregate"
     new_rows = []
-    for user1_id, user2_id, question_id, score, weight, timestamp in zip(
-        user1_ids, user2_ids, question_ids, scores, weights, timestamps
+    for user1_id, user2_id, question_id, score, coverage, timestamp in zip(
+        user1_ids, user2_ids, question_ids, scores, coverages, timestamps
     ):
         user1 = user_map[user1_id]
         if isinstance(user1, User):
@@ -749,7 +750,7 @@ def run_update_global_bot_leaderboard() -> None:
                         user2_id,
                         question_id,
                         score,
-                        weight,
+                        coverage,
                         timestamp,
                     )
                 )
@@ -762,13 +763,13 @@ def run_update_global_bot_leaderboard() -> None:
                     user2_id,
                     question_id,
                     score,
-                    weight,
+                    coverage,
                     timestamp,
                 )
             )
         user2 = user_map[user2_id]
         if isinstance(user2, User):
-            if not (
+            if (
                 not (user2.metadata or dict())
                 .get("bot_details", dict())
                 .get("metac_bot")
@@ -781,7 +782,7 @@ def run_update_global_bot_leaderboard() -> None:
                         f"{user2.username} {time.year}",
                         question_id,
                         -score,
-                        weight,
+                        coverage,
                         timestamp,
                     )
                 )
@@ -794,10 +795,17 @@ def run_update_global_bot_leaderboard() -> None:
                     f"{user2} {time.year}",
                     question_id,
                     -score,
-                    weight,
+                    coverage,
                     timestamp,
                 )
             )
+    for user1_id, user2_id, question_id, score, coverage, timestamp in new_rows:
+        user1_ids.append(user1_id)
+        user2_ids.append(user2_id)
+        question_ids.append(question_id)
+        scores.append(score)
+        coverages.append(coverage)
+        timestamps.append(timestamp)
 
     # choose baseline player if not already chosen
     if not baseline_player:
@@ -805,7 +813,7 @@ def run_update_global_bot_leaderboard() -> None:
             set(user1_ids) | set(user2_ids), key=(user1_ids + user2_ids).count
         )
     # get variance of average scores (used in rescaling)
-    avg_scores = get_avg_scores(user1_ids, user2_ids, scores, weights)
+    avg_scores = get_avg_scores(user1_ids, user2_ids, scores, coverages)
     var_avg_scores = (
         np.var(np.array(list(avg_scores.values()))) if len(avg_scores) > 1 else 0
     )
@@ -816,7 +824,7 @@ def run_update_global_bot_leaderboard() -> None:
         user2_ids=user2_ids,
         question_ids=question_ids,
         scores=scores,
-        weights=weights,
+        weights=coverages,
         baseline_player=baseline_player,
         var_avg_scores=var_avg_scores,
         verbose=False,
@@ -828,7 +836,7 @@ def run_update_global_bot_leaderboard() -> None:
         user2_ids,
         question_ids,
         scores,
-        weights,
+        coverages,
         var_avg_scores,
         baseline_player=baseline_player,
         bootstrap_iterations=bootstrap_iterations,
@@ -870,8 +878,8 @@ def run_update_global_bot_leaderboard() -> None:
         excluded = False
         if isinstance(uid, int):
             user = User.objects.get(id=uid)
-            bot_details = user.metadata["bot_details"]
-            if not bot_details.get("display_in_leaderboard"):
+            bot_details = (user.metadata or dict()).get("bot_details")
+            if bot_details and not bot_details.get("display_in_leaderboard"):
                 excluded = True
 
         entry: LeaderboardEntry = entry_dict.pop(uid, LeaderboardEntry())

From fb72e119da70477f1778f880e3f7279610dd8aad Mon Sep 17 00:00:00 2001
From: lsabor <lukesabor@gmail.com>
Date: Wed, 4 Feb 2026 09:14:43 -0800
Subject: [PATCH 06/10] save work

---
 .../commands/update_global_bot_leaderboard.py | 212 +++++++++---------
 utils/the_math/aggregations.py                |   9 +-
 2 files changed, 112 insertions(+), 109 deletions(-)

diff --git a/scoring/management/commands/update_global_bot_leaderboard.py b/scoring/management/commands/update_global_bot_leaderboard.py
index 4f9a5f6757..3dc61b3700 100644
--- a/scoring/management/commands/update_global_bot_leaderboard.py
+++ b/scoring/management/commands/update_global_bot_leaderboard.py
@@ -246,7 +246,7 @@ def gather_data(
             aggregate_forecasts = get_aggregation_history(
                 human_question,
                 [aggregation_method],
-                minimize=True,
+                minimize=100,
                 include_stats=False,
                 include_bots=False,
                 include_future=False,
@@ -293,7 +293,7 @@ def gather_data(
                     question_ids.append(q)
                     scores.append(u1s)
                     coverages.append(cov)
-                    timestamps.append(question.actual_resolve_time.timestamp())
+                    timestamps.append(question.actual_close_time.timestamp())
         if cache and question_number % cache_interval == 0:
             print(f"\nCaching {len(user1_ids)} matches...")
             with csv_path.open("a") as output_file:
@@ -695,30 +695,30 @@ def run_update_global_bot_leaderboard() -> None:
         .order_by("?")
         .distinct("id")
     )
-    ###############
-    # make sure they have at least 100 resolved questions
-    print("initialize list")
-    question_list = list(questions)
-    print("Filtering users.")
-    scored_question_counts: dict[int, int] = defaultdict(int)
-    c = users.count()
-    i = 0
-    for user in users:
-        i += 1
-        print(i, "/", c, end="\r")
-        scored_question_counts[user.id] = (
-            Score.objects.filter(
-                user=user,
-                score_type="peer",
-                question__in=question_list,
-            )
-            .distinct("question_id")
-            .count()
-        )
-    excluded_ids = [uid for uid, count in scored_question_counts.items() if count < 100]
-    users = users.exclude(id__in=excluded_ids)
-    ###############
-    print(f"Filtered {c} users down to {users.count()}.")
+    # ###############
+    # # make sure they have at least 100 resolved questions
+    # print("initialize list")
+    # question_list = list(questions)
+    # print("Filtering users.")
+    # scored_question_counts: dict[int, int] = defaultdict(int)
+    # c = users.count()
+    # i = 0
+    # for user in users:
+    #     i += 1
+    #     print(i, "/", c, end="\r")
+    #     scored_question_counts[user.id] = (
+    #         Score.objects.filter(
+    #             user=user,
+    #             score_type="peer",
+    #             question__in=question_list,
+    #         )
+    #         .distinct("question_id")
+    #         .count()
+    #     )
+    # excluded_ids = [uid for uid, count in scored_question_counts.items() if count < 100]
+    # users = users.exclude(id__in=excluded_ids)
+    # print(f"Filtered {c} users down to {users.count()}.")
+    # ###############
     print("Initializing... DONE")
 
     # Gather head to head scores
@@ -726,86 +726,86 @@ def run_update_global_bot_leaderboard() -> None:
         users, questions
     )
 
-    # for pro aggregation, community aggregate, and any non-metac bot,
-    # duplicate rows indicating year-specific achievements
-    user_map = {user.id: user for user in users}
-    user_map["Pro Aggregate"] = "Pro Aggregate"
-    user_map["Community Aggregate"] = "Community Aggregate"
-    new_rows = []
-    for user1_id, user2_id, question_id, score, coverage, timestamp in zip(
-        user1_ids, user2_ids, question_ids, scores, coverages, timestamps
-    ):
-        user1 = user_map[user1_id]
-        if isinstance(user1, User):
-            if (
-                not (user1.metadata or dict())
-                .get("bot_details", dict())
-                .get("metac_bot")
-            ):
-                # non-metac bot
-                time = datetime.fromtimestamp(timestamp, dt_timezone.utc)
-                new_rows.append(
-                    (
-                        f"{user1.username} {time.year}",
-                        user2_id,
-                        question_id,
-                        score,
-                        coverage,
-                        timestamp,
-                    )
-                )
-        else:
-            # aggregation methods
-            time = datetime.fromtimestamp(timestamp, dt_timezone.utc)
-            new_rows.append(
-                (
-                    f"{user1} {time.year}",
-                    user2_id,
-                    question_id,
-                    score,
-                    coverage,
-                    timestamp,
-                )
-            )
-        user2 = user_map[user2_id]
-        if isinstance(user2, User):
-            if (
-                not (user2.metadata or dict())
-                .get("bot_details", dict())
-                .get("metac_bot")
-            ):
-                # non-metac bot
-                time = datetime.fromtimestamp(timestamp, dt_timezone.utc)
-                new_rows.append(
-                    (
-                        user1_id,
-                        f"{user2.username} {time.year}",
-                        question_id,
-                        -score,
-                        coverage,
-                        timestamp,
-                    )
-                )
-        else:
-            # aggregation methods
-            time = datetime.fromtimestamp(timestamp, dt_timezone.utc)
-            new_rows.append(
-                (
-                    user1_id,
-                    f"{user2} {time.year}",
-                    question_id,
-                    -score,
-                    coverage,
-                    timestamp,
-                )
-            )
-    for user1_id, user2_id, question_id, score, coverage, timestamp in new_rows:
-        user1_ids.append(user1_id)
-        user2_ids.append(user2_id)
-        question_ids.append(question_id)
-        scores.append(score)
-        coverages.append(coverage)
-        timestamps.append(timestamp)
+    # # for pro aggregation, community aggregate, and any non-metac bot,
+    # # duplicate rows indicating year-specific achievements
+    # user_map = {user.id: user for user in users}
+    # user_map["Pro Aggregate"] = "Pro Aggregate"
+    # user_map["Community Aggregate"] = "Community Aggregate"
+    # new_rows = []
+    # for user1_id, user2_id, question_id, score, coverage, timestamp in zip(
+    #     user1_ids, user2_ids, question_ids, scores, coverages, timestamps
+    # ):
+    #     user1 = user_map[user1_id]
+    #     if isinstance(user1, User):
+    #         if (
+    #             not (user1.metadata or dict())
+    #             .get("bot_details", dict())
+    #             .get("metac_bot")
+    #         ):
+    #             # non-metac bot
+    #             time = datetime.fromtimestamp(timestamp, dt_timezone.utc)
+    #             new_rows.append(
+    #                 (
+    #                     f"{user1.username} {time.year}",
+    #                     user2_id,
+    #                     question_id,
+    #                     score,
+    #                     coverage,
+    #                     timestamp,
+    #                 )
+    #             )
+    #     else:
+    #         # aggregation methods
+    #         time = datetime.fromtimestamp(timestamp, dt_timezone.utc)
+    #         new_rows.append(
+    #             (
+    #                 f"{user1} {time.year}",
+    #                 user2_id,
+    #                 question_id,
+    #                 score,
+    #                 coverage,
+    #                 timestamp,
+    #             )
+    #         )
+    #     user2 = user_map[user2_id]
+    #     if isinstance(user2, User):
+    #         if (
+    #             not (user2.metadata or dict())
+    #             .get("bot_details", dict())
+    #             .get("metac_bot")
+    #         ):
+    #             # non-metac bot
+    #             time = datetime.fromtimestamp(timestamp, dt_timezone.utc)
+    #             new_rows.append(
+    #                 (
+    #                     user1_id,
+    #                     f"{user2.username} {time.year}",
+    #                     question_id,
+    #                     -score,
+    #                     coverage,
+    #                     timestamp,
+    #                 )
+    #             )
+    #     else:
+    #         # aggregation methods
+    #         time = datetime.fromtimestamp(timestamp, dt_timezone.utc)
+    #         new_rows.append(
+    #             (
+    #                 user1_id,
+    #                 f"{user2} {time.year}",
+    #                 question_id,
+    #                 -score,
+    #                 coverage,
+    #                 timestamp,
+    #             )
+    #         )
+    # for user1_id, user2_id, question_id, score, coverage, timestamp in new_rows:
+    #     user1_ids.append(user1_id)
+    #     user2_ids.append(user2_id)
+    #     question_ids.append(question_id)
+    #     scores.append(score)
+    #     coverages.append(coverage)
+    #     timestamps.append(timestamp)
 
     # choose baseline player if not already chosen
     if not baseline_player:
diff --git a/utils/the_math/aggregations.py b/utils/the_math/aggregations.py
index 4d920961e7..afc0c835c0 100644
--- a/utils/the_math/aggregations.py
+++ b/utils/the_math/aggregations.py
@@ -907,7 +907,7 @@ def minimize_history(
 
 def get_user_forecast_history(
     forecasts: Sequence[Forecast],
-    minimize: bool = False,
+    minimize: bool | int = False,
     cutoff: datetime | None = None,
 ) -> list[ForecastSet]:
     timestep_set: set[datetime] = set()
@@ -919,7 +919,10 @@ def get_user_forecast_history(
             timestep_set.add(forecast.end_time)
     timesteps = sorted(timestep_set)
     if minimize:
-        timesteps = minimize_history(timesteps)
+        if isinstance(minimize, int):
+            timesteps = minimize_history(timesteps, minimize)
+        else:
+            timesteps = minimize_history(timesteps)
     forecast_sets: dict[datetime, ForecastSet] = {
         timestep: ForecastSet(
             forecasts_values=[],
@@ -951,7 +954,7 @@ def get_aggregation_history(
     aggregation_methods: list[AggregationMethod],
     forecasts: QuerySet[Forecast] | None = None,
     only_include_user_ids: list[int] | set[int] | None = None,
-    minimize: bool = True,
+    minimize: bool | int = True,
     include_stats: bool = True,
     include_bots: bool = False,
     histogram: bool | None = None,

From b7714cb0847e9e2a46fb193ae02402b7b90f5cde Mon Sep 17 00:00:00 2001
From: lsabor <lukesabor@gmail.com>
Date: Wed, 4 Feb 2026 11:20:15 -0800
Subject: [PATCH 07/10] save work

---
 .../commands/update_global_bot_leaderboard.py | 83 +++++++++++++++++--
 1 file changed, 74 insertions(+), 9 deletions(-)

diff --git a/scoring/management/commands/update_global_bot_leaderboard.py b/scoring/management/commands/update_global_bot_leaderboard.py
index 3dc61b3700..81120ee66b 100644
--- a/scoring/management/commands/update_global_bot_leaderboard.py
+++ b/scoring/management/commands/update_global_bot_leaderboard.py
@@ -105,7 +105,7 @@ def get_score_pair(
 def gather_data(
     users: QuerySet[User],
     questions: QuerySet[Question],
-    cache: bool = True,
+    cache_use: str | None = "partial",
 ) -> tuple[
     list[int | str], list[int | str], list[int], list[float], list[float], list[float]
 ]:
@@ -115,7 +115,31 @@ def gather_data(
     scores: list[float] = []
     coverages: list[float] = []
     timestamps: list[float] = []
-    if cache:
+    if cache_use == "full":
+        # load all from cache, don't calculate more
+        def _deserialize_user(value: str) -> int | str:
+            value = value.strip()
+            if not value:
+                return value
+            try:
+                return int(value)
+            except ValueError:
+                return value
+
+        csv_path = Path("HtH_score_data.csv")
+        with csv_path.open("r") as input_file:
+            reader = csv.DictReader(input_file)
+            for row in reader:
+                user1_ids.append(_deserialize_user(row["user1"]))
+                user2_ids.append(_deserialize_user(row["user2"]))
+                question_ids.append(int(row["questionid"]))
+                scores.append(float(row["score"]))
+                coverages.append(float(row["coverage"]))
+                timestamps.append(float(row["timestamp"]))
+
+        return (user1_ids, user2_ids, question_ids, scores, coverages, timestamps)
+
+    if cache_use == "partial":
         csv_path = Path("HtH_score_data.csv")
         if not csv_path.exists():
             with csv_path.open("w") as output_file:
@@ -171,7 +195,7 @@ def gather_data(
     #
     user_ids = users.values_list("id", flat=True)
     t0 = datetime.now()
-    question_count = len(questions)
+    question_count = questions.count()
     questions = list(questions)
     cache_interval = 100
     print("Processing Pairwise Scoring:")
@@ -294,7 +318,7 @@ def gather_data(
                     scores.append(u1s)
                     coverages.append(cov)
                     timestamps.append(question.actual_close_time.timestamp())
-        if cache and question_number % cache_interval == 0:
+        if cache_use and question_number % cache_interval == 0:
             print(f"\nCaching {len(user1_ids)} matches...")
             with csv_path.open("a") as output_file:
                 writer = csv.writer(output_file)
@@ -310,7 +334,7 @@ def gather_data(
             timestamps = []
     print("\n")
 
-    if cache:
+    if cache_use:
         with csv_path.open("a") as output_file:
             writer = csv.writer(output_file)
             for row in zip(
@@ -652,7 +676,9 @@ def bootstrap_skills(
     return ci_lower, ci_upper
 
 
-def run_update_global_bot_leaderboard() -> None:
+def run_update_global_bot_leaderboard(
+    cache_use: str = "partial",
+) -> None:
     baseline_player: int | str = 236038  # metac-gpt-4o+asknews
     bootstrap_iterations = 30
 
@@ -692,8 +718,8 @@ def run_update_global_bot_leaderboard() -> None:
                 "user_forecasts", queryset=Forecast.objects.filter(author__in=users)
             )
         )
-        .order_by("?")
         .distinct("id")
+        # .order_by("?")
     )
     # ###############
     # # make sure they have at least 100 resolved questions
@@ -723,7 +749,7 @@ def run_update_global_bot_leaderboard() -> None:
 
     # Gather head to head scores
     user1_ids, user2_ids, question_ids, scores, coverages, timestamps = gather_data(
-        users, questions
+        users, questions, cache_use=cache_use
     )
 
     # # for pro aggregation, community aggregate, and any non-metac bot,
@@ -807,6 +833,45 @@ def run_update_global_bot_leaderboard() -> None:
     #     coverages.append(coverage)
     #     timestamps.append(timestamp)
 
+    # ###############
+    # Filter out entries we don't care about
+    print(f"Filtering {len(timestamps)} matches down to only relevant identities ...")
+    relevant_identities = set(
+        User.objects.filter(
+            metadata__bot_details__metac_bot=True,
+            metadata__bot_details__include_in_calculations=True,
+            metadata__bot_details__display_in_leaderboard=True,
+            is_active=True,
+        ).values_list("id", flat=True)
+    ) | {
+        "Pro Aggregate",
+        "Community Aggregate",
+    }
+    filtered_user1_ids = []
+    filtered_user2_ids = []
+    filtered_question_ids = []
+    filtered_scores = []
+    filtered_coverages = []
+    filtered_timestamps = []
+    for u1id, u2id, qid, score, coverage, timestamp in zip(
+        user1_ids, user2_ids, question_ids, scores, coverages, timestamps
+    ):
+        if u1id in relevant_identities and u2id in relevant_identities:
+            filtered_user1_ids.append(u1id)
+            filtered_user2_ids.append(u2id)
+            filtered_question_ids.append(qid)
+            filtered_scores.append(score)
+            filtered_coverages.append(coverage)
+            filtered_timestamps.append(timestamp)
+    user1_ids = filtered_user1_ids
+    user2_ids = filtered_user2_ids
+    question_ids = filtered_question_ids
+    scores = filtered_scores
+    coverages = filtered_coverages
+    timestamps = filtered_timestamps
+    print(f"Filtered down to {len(timestamps)} matches.\n")
+    # ###############
+
     # choose baseline player if not already chosen
     if not baseline_player:
         baseline_player = max(
@@ -1034,4 +1099,4 @@ class Command(BaseCommand):
     """
 
     def handle(self, *args, **options) -> None:
-        run_update_global_bot_leaderboard()
+        run_update_global_bot_leaderboard(cache_use="full")

From 64d2d7bf4d513ab9078214739168aa1458dbf87f Mon Sep 17 00:00:00 2001
From: lsabor <lukesabor@gmail.com>
Date: Thu, 5 Feb 2026 07:31:18 -0800
Subject: [PATCH 08/10] save work

---
 .../commands/update_global_bot_leaderboard.py | 106 ++++++++++--------
 1 file changed, 57 insertions(+), 49 deletions(-)

diff --git a/scoring/management/commands/update_global_bot_leaderboard.py b/scoring/management/commands/update_global_bot_leaderboard.py
index 81120ee66b..64ba387813 100644
--- a/scoring/management/commands/update_global_bot_leaderboard.py
+++ b/scoring/management/commands/update_global_bot_leaderboard.py
@@ -44,6 +44,7 @@ def get_score_pair(
     geometric_means = get_geometric_means(forecasts)
 
     if question.default_score_type == ScoreTypes.PEER:
+        breakpoint()
         # Coverage
         coverage = 0.0
         cvs = []
@@ -79,10 +80,12 @@ def get_score_pair(
             if gm.timestamp <= spot_forecast_timestamp <= current_timestamp:
                 if gm.num_forecasters == 2:
                     # both have a forecast at spot scoring time
-                    coverage = 1 / 3  # downweight spot score questions
+                    coverage = 1.0
+                    # coverage = 1 / 3  # downweight spot score questions
                 break
             current_timestamp = gm.timestamp
         if coverage == 0:
+            breakpoint()
             return None
         user1_scores = evaluate_forecasts_peer_spot_forecast(
             forecasts=user1_forecasts,  # only evaluate user1 (user2 is opposite)
@@ -225,30 +228,8 @@ def _deserialize_user(value: str) -> int | str:
             defaultdict(list)
         )
         # bot forecasts
-        old_bot_ids: set[int] = set()
-        for user in users:
-            base_models = (
-                (user.metadata or dict())
-                .get("bot_details", dict())
-                .get("base_models", [])
-            )
-            # don't include bots on question that resolved 1 year or more
-            # after model release
-            primary_base_model = None if not base_models else base_models[0]
-            if not primary_base_model:
-                continue
-            if release_date := primary_base_model.get("model_release_date"):
-                if len(release_date) == 7:
-                    release_date += "-01"
-                release = datetime.fromisoformat(release_date).replace(
-                    tzinfo=dt_timezone.utc
-                )
-                if question.resolution_set_time > release + timedelta(days=365):
-                    old_bot_ids.add(user.id)
-        bot_forecasts = (
-            question.user_forecasts.filter(author_id__in=user_ids)
-            .exclude(author_id__in=old_bot_ids)
-            .order_by("start_time")
+        bot_forecasts = question.user_forecasts.filter(author_id__in=user_ids).order_by(
+            "start_time"
         )
         for f in bot_forecasts:
             forecast_dict[f.author_id].append(f)
@@ -689,7 +670,6 @@ def run_update_global_bot_leaderboard(
         # metadata__bot_details__metac_bot=True,
         # metadata__bot_details__include_in_calculations=True,
         # metadata__bot_details__display_in_leaderboard=True,
-        is_active=True,
     ).order_by("id")
     user_forecast_exists = Forecast.objects.filter(
         question_id=OuterRef("pk"), author__in=users
@@ -836,14 +816,13 @@ def run_update_global_bot_leaderboard(
     # ###############
     # Filter out entries we don't care about
     print(f"Filtering {len(timestamps)} matches down to only relevant identities ...")
-    relevant_identities = set(
-        User.objects.filter(
-            metadata__bot_details__metac_bot=True,
-            metadata__bot_details__include_in_calculations=True,
-            metadata__bot_details__display_in_leaderboard=True,
-            is_active=True,
-        ).values_list("id", flat=True)
-    ) | {
+    metac_bots = User.objects.filter(
+        metadata__bot_details__metac_bot=True,
+        # metadata__bot_details__include_in_calculations=True, # TODO: this should be
+        # but we don't have that data correct at the moment
+    )
+    user_map = {user.id: user for user in metac_bots}
+    relevant_identities = set(metac_bots.values_list("id", flat=True)) | {
         "Pro Aggregate",
         "Community Aggregate",
     }
@@ -856,6 +835,31 @@ def run_update_global_bot_leaderboard(
     for u1id, u2id, qid, score, coverage, timestamp in zip(
         user1_ids, user2_ids, question_ids, scores, coverages, timestamps
     ):
+        # skip if either user is either not in relevant identities or if their model
+        # is more than a year old at time of question actual close time
+        match_users = [user_map[u] for u in (u1id, u2id) if (u in user_map)]
+        skip = False
+        for user in match_users:
+            base_models = (
+                (user.metadata or dict())
+                .get("bot_details", dict())
+                .get("base_models", [])
+            )
+            if release_date := (
+                base_models[0].get("model_release_date") if base_models else None
+            ):
+                if len(release_date) == 7:
+                    release_date += "-01"
+                release = (
+                    datetime.fromisoformat(release_date)
+                    .replace(tzinfo=dt_timezone.utc)
+                    .timestamp()
+                )
+                if timestamp > release + timedelta(days=365).total_seconds():
+                    skip = True
+        if skip:
+            breakpoint()
+            continue
         if u1id in relevant_identities and u2id in relevant_identities:
             filtered_user1_ids.append(u1id)
             filtered_user2_ids.append(u2id)
@@ -911,8 +915,12 @@ def run_update_global_bot_leaderboard(
     ordered_skills = sorted(
         [(user, skill) for user, skill in skills.items()], key=lambda x: -x[1]
     )
-    player_stats: dict[int | str, list] = defaultdict(lambda: [0, set()])
+    player_stats: dict[int | str, list] = dict()
     for u1id, u2id, qid in zip(user1_ids, user2_ids, question_ids):
+        if u1id not in player_stats:
+            player_stats[u1id] = [0, set()]
+        if u2id not in player_stats:
+            player_stats[u2id] = [0, set()]
         player_stats[u1id][0] += 1
         player_stats[u1id][1].add(qid)
         player_stats[u2id][0] += 1
@@ -1020,20 +1028,20 @@ def run_update_global_bot_leaderboard(
             f"| {uid if isinstance(uid, int) else '':>6} "
             f"| {username}"
         )
-    for uid in unevaluated:
-        if isinstance(uid, str):
-            username = uid
-        else:
-            username = User.objects.get(id=uid).username
-        print(
-            "| ------ "
-            "| ------ "
-            "| ------ "
-            "| ------ "
-            "| ------ "
-            f"| {uid if isinstance(uid, int) else '':>5} "
-            f"| {username}"
-        )
+    # for uid in unevaluated:
+    #     if isinstance(uid, str):
+    #         username = uid
+    #     else:
+    #         username = User.objects.get(id=uid).username
+    #     print(
+    #         "| ------ "
+    #         "| ------ "
+    #         "| ------ "
+    #         "| ------ "
+    #         "| ------ "
+    #         f"| {uid if isinstance(uid, int) else '':>5} "
+    #         f"| {username}"
+    #     )
     print()
 
     ##########################################################################

From ff1ce67b696e491d62c02f2d3a325b3de5ac0cf0 Mon Sep 17 00:00:00 2001
From: lsabor <lukesabor@gmail.com>
Date: Thu, 5 Feb 2026 13:20:58 -0800
Subject: [PATCH 09/10] save work

---
 .../commands/update_global_bot_leaderboard.py | 95 +++++++++++--------
 1 file changed, 54 insertions(+), 41 deletions(-)

diff --git a/scoring/management/commands/update_global_bot_leaderboard.py b/scoring/management/commands/update_global_bot_leaderboard.py
index 64ba387813..2667e07ae7 100644
--- a/scoring/management/commands/update_global_bot_leaderboard.py
+++ b/scoring/management/commands/update_global_bot_leaderboard.py
@@ -44,7 +44,6 @@ def get_score_pair(
     geometric_means = get_geometric_means(forecasts)
 
     if question.default_score_type == ScoreTypes.PEER:
-        breakpoint()
         # Coverage
         coverage = 0.0
         cvs = []
@@ -80,8 +79,7 @@ def get_score_pair(
             if gm.timestamp <= spot_forecast_timestamp <= current_timestamp:
                 if gm.num_forecasters == 2:
                     # both have a forecast at spot scoring time
-                    coverage = 1.0
-                    # coverage = 1 / 3  # downweight spot score questions
+                    coverage = 1 / 3  # downweight spot score questions
                 break
             current_timestamp = gm.timestamp
         if coverage == 0:
@@ -699,32 +697,7 @@ def run_update_global_bot_leaderboard(
             )
         )
         .distinct("id")
-        # .order_by("?")
     )
-    # ###############
-    # # make sure they have at least 100 resolved questions
-    # print("initialize list")
-    # question_list = list(questions)
-    # print("Filtering users.")
-    # scored_question_counts: dict[int, int] = defaultdict(int)
-    # c = users.count()
-    # i = 0
-    # for user in users:
-    #     i += 1
-    #     print(i, "/", c, end="\r")
-    #     scored_question_counts[user.id] = (
-    #         Score.objects.filter(
-    #             user=user,
-    #             score_type="peer",
-    #             question__in=question_list,
-    #         )
-    #         .distinct("question_id")
-    #         .count()
-    #     )
-    # excluded_ids = [uid for uid, count in scored_question_counts.items() if count < 100]
-    # users = users.exclude(id__in=excluded_ids)
-    # print(f"Filtered {c} users down to {users.count()}.")
-    # ###############
     print("Initializing... DONE")
 
     # Gather head to head scores
@@ -815,14 +788,48 @@ def run_update_global_bot_leaderboard(
 
     # ###############
     # Filter out entries we don't care about
+    # and map some users to other users
+    userid_mapping = {
+        189585: 236038,  # mf-bot-1 -> metac-gpt-4o+asknews
+        189588: 236041,  # mf-bot-3 -> metac-claude-3-5-sonnet-20240620+asknews
+        208405: 240416,  # mf-bot-4 -> metac-o1-preview
+        221727: 236040,  # mf-bot-5 -> metac-claude-3-5-sonnet-latest+asknews
+    }
     print(f"Filtering {len(timestamps)} matches down to only relevant identities ...")
-    metac_bots = User.objects.filter(
+    relevant_users = User.objects.filter(
         metadata__bot_details__metac_bot=True,
         # metadata__bot_details__include_in_calculations=True, # TODO: this should be
         # but we don't have that data correct at the moment
     )
-    user_map = {user.id: user for user in metac_bots}
-    relevant_identities = set(metac_bots.values_list("id", flat=True)) | {
+    ###############
+    # make sure they have at least 'minimum_resolved_questions' resolved questions
+    print("Filtering users.")
+    minimum_resolved_questions = 100
+    scored_question_counts: dict[int, int] = defaultdict(int)
+    c = relevant_users.count()
+    i = 0
+    for user in relevant_users:
+        i += 1
+        print(i, "/", c, end="\r")
+        scored_question_counts[user.id] = (
+            Score.objects.filter(
+                user=user,
+                score_type="peer",
+                question__in=questions,
+            )
+            .distinct("question_id")
+            .count()
+        )
+    excluded_ids = [
+        uid
+        for uid, count in scored_question_counts.items()
+        if count < minimum_resolved_questions
+    ]
+    relevant_users = relevant_users.exclude(id__in=excluded_ids)
+    print(f"Filtered {c} users down to {relevant_users.count()}.")
+    ###############
+    user_map = {user.id: user for user in relevant_users}
+    relevant_identities = set(relevant_users.values_list("id", flat=True)) | {
         "Pro Aggregate",
         "Community Aggregate",
     }
@@ -835,8 +842,13 @@ def run_update_global_bot_leaderboard(
     for u1id, u2id, qid, score, coverage, timestamp in zip(
         user1_ids, user2_ids, question_ids, scores, coverages, timestamps
     ):
-        # skip if either user is either not in relevant identities or if their model
-        # is more than a year old at time of question actual close time
+        # replace userIds according to the mapping
+        u1id = userid_mapping.get(u1id, u1id)
+        u2id = userid_mapping.get(u2id, u2id)
+        # skip if either user is not in relevant identities
+        if (u1id not in relevant_identities) or (u2id not in relevant_identities):
+            continue
+        # skip if either user model is more than a year old at time of 'timestamp'
         match_users = [user_map[u] for u in (u1id, u2id) if (u in user_map)]
         skip = False
         for user in match_users:
@@ -860,13 +872,14 @@ def run_update_global_bot_leaderboard(
         if skip:
             breakpoint()
             continue
-        if u1id in relevant_identities and u2id in relevant_identities:
-            filtered_user1_ids.append(u1id)
-            filtered_user2_ids.append(u2id)
-            filtered_question_ids.append(qid)
-            filtered_scores.append(score)
-            filtered_coverages.append(coverage)
-            filtered_timestamps.append(timestamp)
+
+        # done
+        filtered_user1_ids.append(u1id)
+        filtered_user2_ids.append(u2id)
+        filtered_question_ids.append(qid)
+        filtered_scores.append(score)
+        filtered_coverages.append(coverage)
+        filtered_timestamps.append(timestamp)
     user1_ids = filtered_user1_ids
     user2_ids = filtered_user2_ids
     question_ids = filtered_question_ids
@@ -1107,4 +1120,4 @@ class Command(BaseCommand):
     """
 
     def handle(self, *args, **options) -> None:
-        run_update_global_bot_leaderboard(cache_use="full")
+        run_update_global_bot_leaderboard(cache_use="partial")

From a0c679e5438375ba525532a0418e4c21c9e03cd3 Mon Sep 17 00:00:00 2001
From: lsabor <lukesabor@gmail.com>
Date: Sat, 7 Feb 2026 10:44:55 -0800
Subject: [PATCH 10/10] save work

---
 scoring/management/commands/update_global_bot_leaderboard.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scoring/management/commands/update_global_bot_leaderboard.py b/scoring/management/commands/update_global_bot_leaderboard.py
index 2667e07ae7..85b95d249c 100644
--- a/scoring/management/commands/update_global_bot_leaderboard.py
+++ b/scoring/management/commands/update_global_bot_leaderboard.py
@@ -57,6 +57,8 @@ def get_score_pair(
                 cvs.append(max(0, (end - start)) / total_duration)
             current_timestamp = gm.timestamp
         if coverage == 0:
+            # investigate!
+            breakpoint()
             return None
         user1_scores = evaluate_forecasts_peer_accuracy(
             forecasts=user1_forecasts,  # only evaluate user1 (user2 is opposite)
@@ -83,7 +85,6 @@ def get_score_pair(
                 break
             current_timestamp = gm.timestamp
         if coverage == 0:
-            breakpoint()
             return None
         user1_scores = evaluate_forecasts_peer_spot_forecast(
             forecasts=user1_forecasts,  # only evaluate user1 (user2 is opposite)
@@ -164,6 +165,7 @@ def _deserialize_user(value: str) -> int | str:
             32627,  # Q1 2025
             32721,  # Q2 2025
             32813,  # fall 2025
+            32916,  # Q1 2026
         ]
     )
     aib_to_pro_version = {
@@ -172,6 +174,7 @@ def _deserialize_user(value: str) -> int | str:
         32627: 32631,
         32721: 32761,
         32813: None,
+        32916: 32930,
     }
     aib_question_map: dict[Question, Question | None] = dict()
     for aib in aib_projects: