From e91eccf7244be910d1cbb06255255f87b3dc98d0 Mon Sep 17 00:00:00 2001 From: Ben Wilson Date: Thu, 29 Jan 2026 22:51:39 +0000 Subject: [PATCH 1/8] Initial MVP of agent congress --- .../integration_tests/test_metaculus_api.py | 8 +- .../agents_and_tools/ai_congress/__init__.py | 41 ++ .../ai_congress/congress_member_agent.py | 402 ++++++++++++++ .../ai_congress/congress_orchestrator.py | 263 ++++++++++ .../ai_congress/data_models.py | 103 ++++ .../ai_congress/member_profiles.py | 337 ++++++++++++ forecasting_tools/front_end/Home.py | 3 + .../front_end/app_pages/chat_page.py | 4 +- .../front_end/app_pages/congress_page.py | 496 ++++++++++++++++++ 9 files changed, 1652 insertions(+), 5 deletions(-) create mode 100644 forecasting_tools/agents_and_tools/ai_congress/__init__.py create mode 100644 forecasting_tools/agents_and_tools/ai_congress/congress_member_agent.py create mode 100644 forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py create mode 100644 forecasting_tools/agents_and_tools/ai_congress/data_models.py create mode 100644 forecasting_tools/agents_and_tools/ai_congress/member_profiles.py create mode 100644 forecasting_tools/front_end/app_pages/congress_page.py diff --git a/code_tests/integration_tests/test_metaculus_api.py b/code_tests/integration_tests/test_metaculus_api.py index fdb17f2c..be2583c9 100644 --- a/code_tests/integration_tests/test_metaculus_api.py +++ b/code_tests/integration_tests/test_metaculus_api.py @@ -513,7 +513,7 @@ def test_get_conditional_questions_from_tournament(self) -> None: assert isinstance(conditional_question.question_no, BinaryQuestion) async def test_get_previous_forecast(self) -> None: - client = MetaculusClient().dev() + client = MetaculusClient() for allowed_types in {"binary", "numeric"}: api_filter = ApiFilter( allowed_types=[allowed_types], # type: ignore @@ -1365,10 +1365,10 @@ def test_all_admin_functions(self) -> None: token=token, ) question_to_create = client.get_question_by_url( - "https://dev.metaculus.com/questions/39162/" + "https://www.metaculus.com/questions/39162/" ) - project_id = 1156 # https://dev.metaculus.com/tournament/beta-testing/ - slug = "beta-testing" + project_id = 32932 # https://www.metaculus.com/tournament/benta/ + slug = "benta" # Ben testing area question_to_create.default_project_id = project_id question_to_create.tournament_slugs = [slug] diff --git a/forecasting_tools/agents_and_tools/ai_congress/__init__.py b/forecasting_tools/agents_and_tools/ai_congress/__init__.py new file mode 100644 index 00000000..f85d69af --- /dev/null +++ b/forecasting_tools/agents_and_tools/ai_congress/__init__.py @@ -0,0 +1,41 @@ +from forecasting_tools.agents_and_tools.ai_congress.congress_member_agent import ( + CongressMemberAgent, +) +from forecasting_tools.agents_and_tools.ai_congress.congress_orchestrator import ( + CongressOrchestrator, +) +from forecasting_tools.agents_and_tools.ai_congress.data_models import ( + CongressMember, + CongressSession, + CongressSessionInput, + ForecastDescription, + PolicyProposal, +) +from forecasting_tools.agents_and_tools.ai_congress.member_profiles import ( + AI_MODEL_MEMBERS, + AVAILABLE_MEMBERS, + POLITICAL_MEMBERS, + get_ai_model_members, + get_default_members, + get_member_by_name, + get_members_by_names, + get_political_members, +) + +__all__ = [ + "CongressMember", + "CongressMemberAgent", + "CongressOrchestrator", + "CongressSession", + "CongressSessionInput", + "ForecastDescription", + "PolicyProposal", + "AI_MODEL_MEMBERS", + "AVAILABLE_MEMBERS", + "POLITICAL_MEMBERS", + "get_ai_model_members", + "get_default_members", + "get_member_by_name", + "get_members_by_names", + "get_political_members", +] diff --git a/forecasting_tools/agents_and_tools/ai_congress/congress_member_agent.py b/forecasting_tools/agents_and_tools/ai_congress/congress_member_agent.py new file mode 100644 index 00000000..bf5cc150 --- /dev/null +++ b/forecasting_tools/agents_and_tools/ai_congress/congress_member_agent.py @@ -0,0 +1,402 @@ +from __future__ import annotations + +import logging + +from forecasting_tools.agents_and_tools.ai_congress.data_models import ( + CongressMember, + PolicyProposal, +) +from forecasting_tools.agents_and_tools.minor_tools import ( + perplexity_quick_search_high_context, + perplexity_reasoning_pro_search, + query_asknews, +) +from forecasting_tools.ai_models.agent_wrappers import AgentRunner, AgentSdkLlm, AiAgent +from forecasting_tools.ai_models.general_llm import GeneralLlm +from forecasting_tools.helpers.structure_output import structure_output +from forecasting_tools.util.misc import clean_indents + +logger = logging.getLogger(__name__) + +LONG_TIMEOUT = 480 # 8 minutes for long-running LLM calls + + +class CongressMemberAgent: + def __init__( + self, + member: CongressMember, + timeout: int = LONG_TIMEOUT, + structure_output_model: GeneralLlm | None = None, + ): + self.member = member + self.timeout = timeout + self.structure_output_model = structure_output_model or GeneralLlm( + "openrouter/openai/gpt-4.1", temperature=0.2, timeout=LONG_TIMEOUT + ) + + async def deliberate(self, policy_prompt: str) -> PolicyProposal: + instructions = self._build_agent_instructions(policy_prompt) + + agent = AiAgent( + name=f"Congress Member: {self.member.name}", + instructions=instructions, + model=AgentSdkLlm(model=self.member.ai_model), + tools=[ + perplexity_reasoning_pro_search, + query_asknews, + perplexity_quick_search_high_context, + ], + handoffs=[], + ) + + result = await AgentRunner.run( + agent, "Please begin your deliberation now.", max_turns=20 + ) + + proposal = await self._extract_proposal_from_output(result.final_output) + proposal.member = self.member + return proposal + + async def _extract_proposal_from_output(self, agent_output: str) -> PolicyProposal: + extraction_instructions = clean_indents( + """ + Extract the policy proposal from the congress member's deliberation output. + + You must extract: + 1. research_summary: The background research section (3-5 paragraphs) + 2. decision_criteria: The list of 4-6 criteria as strings + 3. forecasts: Each forecast from the appendix as a ForecastDescription object + - footnote_id: The number (1, 2, 3, etc.) + - question_title: Short title + - question_text: Full question + - resolution_criteria: How it resolves + - prediction: The probability (e.g., "35%") + - reasoning: The reasoning explanation + - key_sources: List of sources mentioned + 4. proposal_markdown: The full proposal section including Executive Summary, + Analysis, Recommendations, and Risks. Include footnote references [^1] etc. + 5. key_recommendations: The 3-5 main recommendations as a list of strings + + Be thorough in extracting all forecasts from the Forecast Appendix section. + """ + ) + + proposal = await structure_output( + agent_output, + PolicyProposal, + model=self.structure_output_model, + additional_instructions=extraction_instructions, + ) + return proposal + + def _build_agent_instructions(self, policy_prompt: str) -> str: + expertise_guidance = self._get_expertise_specific_research_guidance() + question_guidance = self._get_question_generation_guidance() + + return clean_indents( + f""" + # Your Identity + + You are {self.member.name}, a {self.member.role}. + + Political Leaning: {self.member.political_leaning} + + Your Core Motivation: {self.member.general_motivation} + + Areas of Expertise: {self.member.expertise_string} + + Personality Traits: {self.member.traits_string} + + --- + + # Your Task + + You are participating in an AI Forecasting Congress to deliberate on the + following policy question: + + "{policy_prompt}" + + You must complete ALL FIVE PHASES below in order, thinking through each + carefully. Your final output will be a comprehensive policy proposal backed + by quantitative forecasts. + + IMPORTANT: Use your search tools extensively in Phases 1 and 4. Good policy + analysis requires understanding the current state of affairs and gathering + evidence for your forecasts. + + --- + + ## PHASE 1: Background Research + + Use your search tools to understand the current state of affairs related to + this policy question. Make at least 3-5 searches to gather comprehensive + information. + + Research goals: + - What is the current status quo? What policies exist today? + - What are the key stakeholders and their positions? + - What recent events or trends are relevant? + - What data and statistics are available? + - What have experts and analysts said about this topic? + - What are the main arguments for and against different approaches? + + Given your expertise in {self.member.expertise_string}, pay special attention to: + {expertise_guidance} + + After researching, write a detailed "## Research Summary" section (3-5 + paragraphs) documenting your key findings. Include specific facts, figures, + and citations from your research. + + --- + + ## PHASE 2: Decision Criteria + + Based on your values and expertise, articulate 4-6 criteria you will use to + evaluate policy options. + + Your criteria should reflect your motivation: "{self.member.general_motivation}" + + For each criterion: + - Name it clearly (e.g., "Economic Efficiency", "Equity Impact", + "Implementation Feasibility", "Risk Minimization") + - Explain why this criterion matters to you specifically given your + {self.member.political_leaning} perspective + - Describe how you would measure or evaluate success on this criterion + + Write a "## Decision Criteria" section listing your criteria in order of + importance to you. + + --- + + ## PHASE 3: Generate Forecasting Questions + + Identify 3-5 specific, concrete forecasting questions that would help inform + this policy decision. These questions should be ones where the answer + genuinely matters for deciding what to do. + + Good forecasting questions: + - Are about uncertain future events, not established facts + - Have clear resolution criteria (exactly how we'll know the answer) + - Have a specific time horizon (when we'll know) + - Are relevant to the policy decision at hand + - Cover different aspects: policy effectiveness, side effects, implementation, + political feasibility, etc. + - Are neither too obvious (>90% or <10%) nor too uncertain (close to 50/50 + with no way to estimate better) + + For each question, write: + - **Question Title**: A short descriptive title + - **Full Question**: The complete, unambiguous question + - **Resolution Criteria**: Exactly what would make this resolve YES vs NO, + or how a numeric value would be measured. Be very specific. + - **Time Horizon**: When will we know the answer? + - **Why It Matters**: How does this question inform the policy decision? + + Make sure your questions reflect your unique perspective as {self.member.name}. + {question_guidance} + + Write a "## Forecasting Questions" section with your 3-5 questions. + + --- + + ## PHASE 4: Forecast Each Question + + Now forecast each question you generated. This is the most important phase. + + For EACH forecasting question: + + 1. **Additional Research**: Use your search tools to find relevant data, + base rates, expert opinions, and historical analogies. Make at least 1-2 + targeted searches per question. + + 2. **Base Rate Analysis**: What is the historical frequency of similar events? + How often do similar policies succeed or fail? + + 3. **Key Factors**: What specific factors push the probability up or down + from the base rate? List at least 3 factors in each direction. + + 4. **Bias Check**: Given your {self.member.political_leaning} perspective, + what biases might you have? How might you be over- or under-estimating? + + 5. **Final Prediction**: Give a specific probability (e.g., "35%") for + binary questions, or a distribution for numeric questions. + + 6. **Reasoning**: Write 4+ sentences explaining your reasoning, including + the key evidence and considerations. + + Be calibrated: if you're genuinely uncertain, your probabilities should + reflect that (closer to 50%). Avoid overconfidence. Consider what you + might be missing. + + Remember good forecasters put extra weight on the status quo outcome since + the world changes slowly most of the time. + + Write your forecasts inline as you work through each question. + + --- + + ## PHASE 5: Write Your Policy Proposal + + Now synthesize everything into a comprehensive policy proposal. This is + your final output. + + Structure your proposal EXACTLY as follows: + + ### Executive Summary + + A 2-3 sentence summary of your main recommendation as {self.member.name}. + What is the single most important thing policymakers should do? + + ### Analysis + + Your detailed analysis of the policy question (3-5 paragraphs), drawing on + your research and forecasts. + + CRITICAL: When you reference forecasts, use footnote format: + - In the text: "This approach has a significant chance of success (65% [^1])" + - Or: "The risk of unintended consequences is moderate (25% probability [^2])" + + The footnote number [^1], [^2], etc. corresponds to the forecast in your + appendix below. + + ### Recommendations + + Your top 3-5 specific, actionable policy recommendations. For each: + - State the recommendation clearly + - Explain why you support it given your forecasts and criteria + - Note which of your decision criteria it addresses + - Reference relevant forecasts with footnotes + + ### Risks and Uncertainties + + What could go wrong? What are you most uncertain about? + - Identify the key risks of your recommendations + - Note which forecasts have the widest uncertainty + - Describe scenarios where your recommendations might backfire + - Reference relevant forecasts + + ### Forecast Appendix + + At the end, provide a structured appendix with ALL your forecasts in this + EXACT format: + + [^1] **[Question Title]** + - Question: [Full question text] + - Resolution: [Resolution criteria] + - Prediction: [Your probability, e.g., "35%"] + - Reasoning: [4+ sentences explaining your reasoning, key evidence, and + considerations] + - Sources: [Key sources used, can be URLs or source names] + + [^2] **[Question Title]** + - Question: [Full question text] + - Resolution: [Resolution criteria] + - Prediction: [Your probability] + - Reasoning: [4+ sentences] + - Sources: [Sources] + + ... continue for all forecasts ... + + --- + + # Important Reminders + + - You ARE {self.member.name}. Stay in character throughout. + - Your analysis should reflect your {self.member.political_leaning} + perspective and your expertise in {self.member.expertise_string}. + - Use your search tools extensively - good analysis requires evidence. + - Every major claim in your proposal should be backed by either research + or a forecast with a footnote. + - Be specific and quantitative wherever possible. + + Begin your deliberation now. Start with Phase 1: Background Research. + """ + ) + + def _get_expertise_specific_research_guidance(self) -> str: + expertise_to_guidance = { + "statistics": "- Statistical evidence, effect sizes, confidence intervals, replication status of key findings", + "research methodology": "- Quality of evidence, study designs, potential confounders, meta-analyses", + "policy evaluation": "- Past policy experiments, natural experiments, cost-benefit analyses, program evaluations", + "economics": "- Economic data, market impacts, incentive structures, distributional effects, GDP/employment impacts", + "governance": "- Institutional constraints, separation of powers, historical precedents, constitutional issues", + "institutional design": "- How similar institutions have evolved, design tradeoffs, unintended consequences of past reforms", + "risk management": "- Tail risks, insurance markets, actuarial data, historical disasters and near-misses", + "history": "- Historical analogies, how similar situations played out, lessons from past policy failures", + "social policy": "- Social indicators, inequality metrics, demographic trends, community impacts", + "civil rights": "- Legal precedents, disparate impact data, civil liberties implications, protected classes", + "economic inequality": "- Gini coefficients, wealth distribution, mobility statistics, poverty rates", + "labor": "- Employment data, wage trends, union density, working conditions, automation impacts", + "market design": "- Auction theory, mechanism design, market failures, externalities", + "regulatory policy": "- Regulatory burden, compliance costs, enforcement challenges, capture risks", + "public choice theory": "- Voting patterns, special interest influence, bureaucratic incentives, rent-seeking", + "defense": "- Military capabilities, force posture, defense budgets, readiness metrics", + "geopolitics": "- Alliance structures, regional dynamics, great power competition, spheres of influence", + "intelligence": "- Threat assessments, intelligence community views, classified-to-unclassified information", + "military strategy": "- Deterrence theory, escalation dynamics, military doctrine, lessons from recent conflicts", + "diplomacy": "- Treaty frameworks, international organizations, soft power, diplomatic history", + "international relations": "- International norms, multilateral institutions, alliance commitments", + "negotiation": "- Negotiation frameworks, BATNA analysis, trust-building mechanisms", + "trade": "- Trade flows, comparative advantage, supply chains, trade agreement impacts", + "technology forecasting": "- Technology roadmaps, Moore's law analogies, adoption curves, disruption patterns", + "existential risk": "- X-risk estimates, catastrophic scenarios, risk factor analysis, mitigation strategies", + "ethics": "- Ethical frameworks, stakeholder analysis, intergenerational equity, rights-based considerations", + "AI safety": "- AI capabilities timeline, alignment challenges, governance proposals, expert surveys", + "climate science": "- Climate projections, emissions scenarios, adaptation costs, tipping points", + "public administration": "- Implementation challenges, bureaucratic capacity, interagency coordination", + "operations": "- Operational feasibility, logistics, resource requirements, scaling challenges", + "local government": "- Municipal experiences, state-level experiments, federalism considerations", + "project management": "- Project success rates, cost overruns, timeline slippage, scope creep", + } + + guidance_lines = [] + for expertise in self.member.expertise_areas: + expertise_lower = expertise.lower() + if expertise_lower in expertise_to_guidance: + guidance_lines.append(expertise_to_guidance[expertise_lower]) + else: + guidance_lines.append( + f"- Relevant data and analysis related to {expertise}" + ) + + return "\n".join(guidance_lines) + + def _get_question_generation_guidance(self) -> str: + trait_to_guidance = { + "analytical": "Focus on questions with measurable, quantifiable outcomes.", + "skeptical of anecdotes": "Ensure questions can be resolved with systematic data, not stories.", + "loves base rates": "Include at least one question about historical base rates of similar events.", + "demands citations": "Ensure resolution criteria reference specific, verifiable sources.", + "cautious": "Include questions about potential negative consequences and risks.", + "status-quo bias": "Include a question about whether the status quo will persist.", + "emphasizes second-order effects": "Include questions about indirect or downstream effects.", + "ambitious": "Include questions about the potential for transformative positive change.", + "equity-focused": "Include questions about distributional impacts across different groups.", + "impatient with incrementalism": "Include questions about timeline for meaningful change.", + "efficiency-focused": "Include questions about cost-effectiveness and resource allocation.", + "anti-regulation": "Include questions about regulatory burden and unintended consequences.", + "trusts incentives": "Include questions about how incentives will shape behavior.", + "threat-focused": "Include questions about adversary responses and security risks.", + "zero-sum thinking": "Include questions about relative gains and competitive dynamics.", + "values strength": "Include questions about deterrence effectiveness and credibility.", + "consensus-seeking": "Include questions about political feasibility and stakeholder buy-in.", + "pragmatic": "Include questions about implementation challenges and practical obstacles.", + "values relationships": "Include questions about coalition stability and trust dynamics.", + "long time horizons": "Include at least one question with a 10+ year time horizon.", + "concerned about tail risks": "Include questions about low-probability, high-impact scenarios.", + "philosophical": "Include questions about fundamental values and tradeoffs.", + "thinks in probabilities": "Ensure all questions have clear probabilistic interpretations.", + "implementation-focused": "Include questions about operational feasibility and execution.", + "skeptical of grand plans": "Include questions about whether ambitious plans will actually be implemented.", + "detail-oriented": "Include questions about specific mechanisms and implementation details.", + } + + guidance_lines = [] + for trait in self.member.personality_traits: + trait_lower = trait.lower() + if trait_lower in trait_to_guidance: + guidance_lines.append(f"- {trait_to_guidance[trait_lower]}") + + if guidance_lines: + return "Given your personality traits:\n" + "\n".join(guidance_lines) + return "" diff --git a/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py b/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py new file mode 100644 index 00000000..9cfb1eb6 --- /dev/null +++ b/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py @@ -0,0 +1,263 @@ +from __future__ import annotations + +import asyncio +import logging +from datetime import datetime, timezone + +from forecasting_tools.agents_and_tools.ai_congress.congress_member_agent import ( + CongressMemberAgent, +) +from forecasting_tools.agents_and_tools.ai_congress.data_models import ( + CongressMember, + CongressSession, + PolicyProposal, +) +from forecasting_tools.ai_models.general_llm import GeneralLlm +from forecasting_tools.util.misc import clean_indents + +logger = logging.getLogger(__name__) + +LONG_TIMEOUT = 480 # 8 minutes for long-running LLM calls + + +class CongressOrchestrator: + def __init__( + self, + aggregation_model: str = "openrouter/anthropic/claude-sonnet-4", + ): + self.aggregation_model = aggregation_model + + async def run_session( + self, + prompt: str, + members: list[CongressMember], + ) -> CongressSession: + logger.info( + f"Starting congress session with {len(members)} members on: {prompt[:100]}..." + ) + + agents = [CongressMemberAgent(m) for m in members] + + results = await asyncio.gather( + *[self._run_member_with_error_handling(a, prompt) for a in agents], + return_exceptions=False, + ) + + proposals: list[PolicyProposal] = [] + errors: list[str] = [] + + for result in results: + if isinstance(result, PolicyProposal): + proposals.append(result) + elif isinstance(result, Exception): + errors.append(str(result)) + else: + errors.append(f"Unexpected result type: {type(result)}") + + logger.info(f"Completed {len(proposals)} proposals with {len(errors)} errors") + + aggregated_report = "" + twitter_posts: list[str] = [] + + if proposals: + aggregated_report = await self._aggregate_proposals(prompt, proposals) + twitter_posts = await self._generate_twitter_posts(prompt, proposals) + + return CongressSession( + prompt=prompt, + members_participating=members, + proposals=proposals, + aggregated_report_markdown=aggregated_report, + twitter_posts=twitter_posts, + timestamp=datetime.now(timezone.utc), + errors=errors, + ) + + async def _run_member_with_error_handling( + self, + agent: CongressMemberAgent, + prompt: str, + ) -> PolicyProposal | Exception: + try: + logger.info(f"Starting deliberation for {agent.member.name}") + proposal = await agent.deliberate(prompt) + logger.info(f"Completed deliberation for {agent.member.name}") + return proposal + except Exception as e: + logger.error(f"Error in {agent.member.name}'s deliberation: {e}") + return e + + async def _aggregate_proposals( + self, + prompt: str, + proposals: list[PolicyProposal], + ) -> str: + llm = GeneralLlm(self.aggregation_model, timeout=LONG_TIMEOUT) + + proposals_text = "\n\n---\n\n".join( + [ + f"## {p.member.name} ({p.member.role})\n\n{p.get_full_markdown_with_footnotes()}" + for p in proposals + if p.member + ] + ) + + aggregation_prompt = clean_indents( + f""" + # AI Forecasting Congress: Synthesis Report + + You are synthesizing the proposals from multiple AI congress members + deliberating on the following policy question: + + "{prompt}" + + ## Individual Proposals + + {proposals_text} + + --- + + ## Your Task + + Write a comprehensive synthesis report that helps readers understand the + full range of perspectives and find actionable insights. Structure your + report as follows: + + ### Executive Summary + + A 3-4 sentence overview of: + - The key areas of agreement across members + - The most significant disagreements + - The most important forecasts that inform the debate + + ### Consensus Recommendations + + What policies do multiple members support? For each consensus area: + - State the recommendation + - List which members support it + - Include the relevant forecasts (use footnotes [^N] referencing the + Combined Forecast Appendix below) + - Note any caveats or conditions members attached + + ### Key Disagreements + + Where do members diverge and why? For each major disagreement: + - State the issue + - Summarize each side's position and which members hold it + - Explain how different forecasts, criteria, or values lead to different + conclusions + - Assess the crux of the disagreement + + ### Forecast Comparison + + Create a summary of how forecasts differed across members: + - Note where forecasts converged (similar probabilities) + - Highlight where forecasts diverged significantly + - Discuss what might explain the differences (different information, + different priors, different interpretations) + + ### Integrated Recommendations + + Your synthesis of the best policy path forward: + - Draw on the strongest arguments from each perspective + - Identify low-regret actions that most members would support + - Note high-uncertainty areas where more caution is warranted + - Be specific and actionable + + ### Combined Forecast Appendix + + Compile all unique forecasts from all members into a single appendix. + When members made similar forecasts, group them and note the range of + predictions. + + Format each forecast as: + + [^1] **[Question Title]** (from [Member Name]) + - Question: [Full question] + - Resolution: [Resolution criteria] + - Prediction: [Probability] + - Reasoning: [Summary of reasoning] + + Number the footnotes sequentially [^1], [^2], [^3], etc. + + --- + + Be balanced but not wishy-washy. Identify which arguments are strongest + and why. Your goal is to help decision-makers, so be clear about what + the analysis supports. + """ + ) + + return await llm.invoke(aggregation_prompt) + + async def _generate_twitter_posts( + self, + prompt: str, + proposals: list[PolicyProposal], + ) -> list[str]: + llm = GeneralLlm(self.aggregation_model, timeout=LONG_TIMEOUT) + + proposals_summary = "\n\n".join( + [ + f"**{p.member.name}** ({p.member.role}, {p.member.political_leaning}):\n" + f"Key recommendations: {', '.join(p.key_recommendations[:3])}\n" + f"Key forecasts: {'; '.join([f'{f.question_title}: {f.prediction}' for f in p.forecasts[:3]])}" + for p in proposals + if p.member + ] + ) + + twitter_prompt = clean_indents( + f""" + Based on this AI Forecasting Congress session on "{prompt}", generate + 8-12 tweet-length excerpts (max 280 characters each) highlighting + interesting patterns for a policy/tech audience on Twitter/X. + + ## Proposals Summary + + {proposals_summary} + + ## Categories to Cover + + Generate tweets in these categories: + + **THE GOOD** (2-3 tweets): + - Surprising areas of consensus across different ideologies + - Innovative ideas that emerged from the deliberation + - Forecasts that challenge conventional wisdom + + **THE BAD** (2-3 tweets): + - Concerning blind spots that multiple members missed + - Problematic reasoning patterns you noticed + - Important questions that weren't addressed + + **THE UGLY** (2-3 tweets): + - Stark disagreements that reveal deep value differences + - Uncomfortable tradeoffs that the analysis surfaced + - Forecasts with wide uncertainty that matter a lot + + **THE INTERESTING** (2-3 tweets): + - Unexpected forecasts or counter-intuitive findings + - Surprising agreement between unlikely allies + - Questions where the forecasts diverged most + + ## Tweet Guidelines + + Each tweet should: + - Be self-contained and intriguing (people should want to click through) + - Reference specific forecasts when relevant (e.g., "65% probability of X") + - Attribute to the relevant congress member when applicable + - Use hooks like "Surprising:" or "The [Member] vs [Member] split:" + - Be under 280 characters + - Not include hashtags + + Return a JSON list of strings, one per tweet. + """ + ) + + try: + posts = await llm.invoke_and_return_verified_type(twitter_prompt, list[str]) + return [p[:280] for p in posts] + except Exception as e: + logger.error(f"Failed to generate twitter posts: {e}") + return [] diff --git a/forecasting_tools/agents_and_tools/ai_congress/data_models.py b/forecasting_tools/agents_and_tools/ai_congress/data_models.py new file mode 100644 index 00000000..6b43ab24 --- /dev/null +++ b/forecasting_tools/agents_and_tools/ai_congress/data_models.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +from datetime import datetime + +from pydantic import BaseModel, Field + +from forecasting_tools.util.jsonable import Jsonable + + +class CongressMember(BaseModel, Jsonable): + name: str + role: str + political_leaning: str + general_motivation: str + expertise_areas: list[str] + personality_traits: list[str] + ai_model: str = "openrouter/anthropic/claude-sonnet-4" + + @property + def expertise_string(self) -> str: + return ", ".join(self.expertise_areas) + + @property + def traits_string(self) -> str: + return ", ".join(self.personality_traits) + + +class ForecastDescription(BaseModel, Jsonable): + footnote_id: int = Field(description="The footnote number, e.g. 1 for [^1]") + question_title: str = Field(description="Short title for the forecast question") + question_text: str = Field(description="Full question text") + resolution_criteria: str = Field(description="How this question resolves") + prediction: str = Field( + description="The probability or distribution, e.g. '35%' or '70% Option A, 20% Option B, 10% Option C' or '10% chance less than X units, ... ,90% chance less than Y units'" + ) + reasoning: str = Field(description="2-4 sentence summary of the reasoning") + key_sources: list[str] = Field( + default_factory=list, + description="URLs or source names used. Ideally both as markdown links.", + ) + + def as_footnote_markdown(self) -> str: + sources_str = ", ".join(self.key_sources) if self.key_sources else "N/A" + return ( + f"[^{self.footnote_id}] **{self.question_title}**\n" + f"- Question: {self.question_text}\n" + f"- Resolution: {self.resolution_criteria}\n" + f"- Prediction: {self.prediction}\n" + f"- Reasoning: {self.reasoning}\n" + f"- Sources: {sources_str}" + ) + + +class PolicyProposal(BaseModel, Jsonable): + member: CongressMember | None = Field( + default=None, description="The congress member who created this proposal" + ) + research_summary: str = Field(description="Markdown summary of background research") + decision_criteria: list[str] = Field( + description="Prioritized criteria for this member" + ) + forecasts: list[ForecastDescription] = Field( + description="Extracted forecast details" + ) + proposal_markdown: str = Field( + description="Full proposal with footnote references [^1], [^2], etc." + ) + key_recommendations: list[str] = Field( + description="Top 3-5 actionable recommendations" + ) + + def get_full_markdown_with_footnotes(self) -> str: + footnotes = "\n\n".join(f.as_footnote_markdown() for f in self.forecasts) + return f"{self.proposal_markdown}\n\n---\n\n## Forecast Appendix\n\n{footnotes}" + + +class CongressSessionInput(BaseModel, Jsonable): + prompt: str + member_names: list[str] + + +class CongressSession(BaseModel, Jsonable): + prompt: str + members_participating: list[CongressMember] + proposals: list[PolicyProposal] + aggregated_report_markdown: str + twitter_posts: list[str] = Field(default_factory=list) + timestamp: datetime + errors: list[str] = Field(default_factory=list) + + def get_all_forecasts(self) -> list[ForecastDescription]: + all_forecasts = [] + for proposal in self.proposals: + for forecast in proposal.forecasts: + all_forecasts.append(forecast) + return all_forecasts + + def get_forecasts_by_member(self) -> dict[str, list[ForecastDescription]]: + result: dict[str, list[ForecastDescription]] = {} + for proposal in self.proposals: + member_name = proposal.member.name if proposal.member else "Unknown" + result[member_name] = proposal.forecasts + return result diff --git a/forecasting_tools/agents_and_tools/ai_congress/member_profiles.py b/forecasting_tools/agents_and_tools/ai_congress/member_profiles.py new file mode 100644 index 00000000..e5028eb4 --- /dev/null +++ b/forecasting_tools/agents_and_tools/ai_congress/member_profiles.py @@ -0,0 +1,337 @@ +from forecasting_tools.agents_and_tools.ai_congress.data_models import CongressMember + +# ============================================================================= +# POLITICAL VALUE-BASED MEMBERS +# ============================================================================= + +TRADITIONAL_CONSERVATIVE = CongressMember( + name="Sen. Burke", + role="Traditional Conservative", + political_leaning="traditional conservative", + general_motivation=( + "Believes in preserving time-tested institutions, traditional values, and " + "cultural continuity. Skeptical of rapid social change and prioritizes " + "order, family, religious liberty, and national sovereignty. Favors limited " + "government except where needed to maintain social order and national defense." + ), + expertise_areas=[ + "constitutional law", + "religious freedom", + "family policy", + "national defense", + ], + personality_traits=[ + "values tradition", + "skeptical of rapid change", + "prioritizes social order", + "respects established institutions", + "emphasizes personal responsibility", + ], + ai_model="openrouter/anthropic/claude-sonnet-4", +) + +PROGRESSIVE_REFORMER = CongressMember( + name="Rep. Warren", + role="Progressive Reformer", + political_leaning="progressive", + general_motivation=( + "Believes government should actively address systemic inequalities and " + "protect vulnerable populations. Supports strong labor protections, " + "universal social programs, corporate accountability, and using policy " + "to reduce wealth concentration and expand opportunity for all." + ), + expertise_areas=[ + "economic inequality", + "labor rights", + "healthcare policy", + "consumer protection", + ], + personality_traits=[ + "equity-focused", + "skeptical of corporate power", + "favors bold government action", + "prioritizes workers and consumers", + "impatient with incrementalism", + ], + ai_model="openrouter/anthropic/claude-sonnet-4", +) + +LIBERTARIAN = CongressMember( + name="Rep. Paul", + role="Libertarian", + political_leaning="libertarian", + general_motivation=( + "Believes individual liberty is the highest political value. Supports " + "minimal government intervention in both economic and personal matters. " + "Trusts free markets, voluntary exchange, and individual choice over " + "centralized planning. Skeptical of both left and right authoritarianism." + ), + expertise_areas=[ + "economics", + "civil liberties", + "monetary policy", + "regulatory reform", + ], + personality_traits=[ + "values individual freedom", + "skeptical of government", + "trusts market solutions", + "consistent across issues", + "opposes paternalism", + ], + ai_model="openrouter/anthropic/claude-sonnet-4", +) + +POPULIST_NATIONALIST = CongressMember( + name="Sen. Vance", + role="Populist Nationalist", + political_leaning="populist nationalist", + general_motivation=( + "Believes policy should prioritize the interests of working and middle-class " + "citizens over global elites, multinational corporations, and international " + "institutions. Supports economic nationalism, immigration restriction, " + "industrial policy, and skepticism of foreign entanglements." + ), + expertise_areas=[ + "trade policy", + "immigration", + "industrial policy", + "working-class economics", + ], + personality_traits=[ + "skeptical of elites", + "prioritizes national interest", + "supports economic nationalism", + "questions free trade orthodoxy", + "focuses on forgotten communities", + ], + ai_model="openrouter/anthropic/claude-sonnet-4", +) + +NATIONAL_SECURITY_HAWK = CongressMember( + name="Sen. McCain", + role="National Security Hawk", + political_leaning="hawkish internationalist", + general_motivation=( + "Believes American strength and leadership are essential for global stability. " + "Supports robust defense spending, strong alliances, and willingness to use " + "military force to protect national interests and democratic values. " + "Views great power competition as the defining challenge of our era." + ), + expertise_areas=[ + "defense policy", + "geopolitics", + "foreign affairs", + "military strategy", + ], + personality_traits=[ + "threat-focused", + "values strength", + "supports allies", + "willing to use force", + "prioritizes deterrence", + ], + ai_model="openrouter/anthropic/claude-sonnet-4", +) + +ENVIRONMENTALIST = CongressMember( + name="Rep. Ocasio", + role="Climate and Environmental Advocate", + political_leaning="green progressive", + general_motivation=( + "Believes climate change is an existential threat requiring urgent, " + "transformative action. Supports rapid decarbonization, environmental " + "justice, and restructuring the economy around sustainability. Willing " + "to accept economic disruption to avoid catastrophic climate outcomes." + ), + expertise_areas=[ + "climate science", + "energy policy", + "environmental justice", + "green economics", + ], + personality_traits=[ + "urgency about climate", + "systems thinking", + "favors bold action", + "intergenerational focus", + "skeptical of fossil fuel industry", + ], + ai_model="openrouter/anthropic/claude-sonnet-4", +) + +DEMOCRATIC_SOCIALIST = CongressMember( + name="Sen. Sanders", + role="Democratic Socialist", + political_leaning="democratic socialist", + general_motivation=( + "Believes capitalism produces unacceptable inequality and that democratic " + "control should extend to the economy. Supports universal public programs, " + "worker ownership, wealth redistribution, and reducing the political power " + "of billionaires and corporations." + ), + expertise_areas=[ + "wealth inequality", + "healthcare systems", + "labor movements", + "campaign finance", + ], + personality_traits=[ + "focuses on class", + "anti-billionaire", + "supports universal programs", + "consistent ideology", + "grassroots orientation", + ], + ai_model="openrouter/anthropic/claude-sonnet-4", +) + +TECHNOCRATIC_CENTRIST = CongressMember( + name="Sec. Buttigieg", + role="Technocratic Centrist", + political_leaning="technocratic centrist", + general_motivation=( + "Believes in evidence-based policy, pragmatic problem-solving, and " + "building broad coalitions. Supports market-based solutions with " + "smart regulation, incremental reform, and policies that can actually " + "pass. Values expertise, data, and institutional competence." + ), + expertise_areas=[ + "policy analysis", + "public administration", + "infrastructure", + "data-driven governance", + ], + personality_traits=[ + "data-driven", + "pragmatic", + "coalition-builder", + "values expertise", + "incrementalist", + ], + ai_model="openrouter/anthropic/claude-sonnet-4", +) + +# ============================================================================= +# FRONTIER AI MODEL MEMBERS (Vanilla - Natural Model Behavior) +# ============================================================================= + +CLAUDE_MEMBER = CongressMember( + name="Opus 4.5 (Anthropic)", + role="AI Policy Analyst", + political_leaning="behaves as Claude naturally does", + general_motivation=( + "Analyze this policy question thoughtfully and helpfully, as Claude " + "would naturally approach it. Draw on your training to provide balanced, " + "nuanced analysis while being direct about your views and uncertainties." + ), + expertise_areas=["general policy analysis"], + personality_traits=["behaves naturally as Claude"], + ai_model="openrouter/anthropic/claude-opus-4.5", +) + +GPT_MEMBER = CongressMember( + name="GPT 5.2 (OpenAI)", + role="AI Policy Analyst", + political_leaning="behaves as GPT naturally does", + general_motivation=( + "Analyze this policy question thoughtfully and helpfully, as GPT " + "would naturally approach it. Draw on your training to provide balanced, " + "nuanced analysis while being direct about your views and uncertainties." + ), + expertise_areas=["general policy analysis"], + personality_traits=["behaves naturally as GPT"], + ai_model="openrouter/openai/gpt-5.2", +) + +GEMINI_MEMBER = CongressMember( + name="Gemini 3 Pro (Google)", + role="AI Policy Analyst", + political_leaning="behaves as Gemini naturally does", + general_motivation=( + "Analyze this policy question thoughtfully and helpfully, as Gemini " + "would naturally approach it. Draw on your training to provide balanced, " + "nuanced analysis while being direct about your views and uncertainties." + ), + expertise_areas=["general policy analysis"], + personality_traits=["behaves naturally as Gemini"], + ai_model="openrouter/google/gemini-3-pro-preview", +) + +GROK_MEMBER = CongressMember( + name="Grok 4 (xAI)", + role="AI Policy Analyst", + political_leaning="behaves as Grok naturally does", + general_motivation=( + "Analyze this policy question thoughtfully and helpfully, as Grok " + "would naturally approach it. Draw on your training to provide balanced, " + "nuanced analysis while being direct about your views and uncertainties." + ), + expertise_areas=["general policy analysis"], + personality_traits=["behaves naturally as Grok"], + ai_model="openrouter/x-ai/grok-4", +) + +DEEPSEEK_MEMBER = CongressMember( + name="DeepSeek V3.2 (DeepSeek)", + role="AI Policy Analyst", + political_leaning="behaves as DeepSeek naturally does", + general_motivation=( + "Analyze this policy question thoughtfully and helpfully, as DeepSeek " + "would naturally approach it. Draw on your training to provide balanced, " + "nuanced analysis while being direct about your views and uncertainties." + ), + expertise_areas=["general policy analysis"], + personality_traits=["behaves naturally as DeepSeek"], + ai_model="openrouter/deepseek/deepseek-v3.2", +) + +# ============================================================================= +# MEMBER COLLECTIONS +# ============================================================================= + +POLITICAL_MEMBERS: list[CongressMember] = [ + TRADITIONAL_CONSERVATIVE, + PROGRESSIVE_REFORMER, + LIBERTARIAN, + POPULIST_NATIONALIST, + NATIONAL_SECURITY_HAWK, + ENVIRONMENTALIST, + DEMOCRATIC_SOCIALIST, + TECHNOCRATIC_CENTRIST, +] + +AI_MODEL_MEMBERS: list[CongressMember] = [ + CLAUDE_MEMBER, + GPT_MEMBER, + GEMINI_MEMBER, + GROK_MEMBER, + DEEPSEEK_MEMBER, +] + +AVAILABLE_MEMBERS: list[CongressMember] = POLITICAL_MEMBERS + AI_MODEL_MEMBERS + +MEMBER_BY_NAME: dict[str, CongressMember] = {m.name: m for m in AVAILABLE_MEMBERS} + + +def get_member_by_name(name: str) -> CongressMember: + if name not in MEMBER_BY_NAME: + available = ", ".join(MEMBER_BY_NAME.keys()) + raise ValueError(f"Unknown member: {name}. Available: {available}") + return MEMBER_BY_NAME[name] + + +def get_members_by_names(names: list[str]) -> list[CongressMember]: + return [get_member_by_name(name) for name in names] + + +def get_default_members() -> list[CongressMember]: + return AI_MODEL_MEMBERS.copy() + + +def get_ai_model_members() -> list[CongressMember]: + return AI_MODEL_MEMBERS.copy() + + +def get_political_members() -> list[CongressMember]: + return POLITICAL_MEMBERS.copy() diff --git a/forecasting_tools/front_end/Home.py b/forecasting_tools/front_end/Home.py index 1126aa5a..b8d67583 100644 --- a/forecasting_tools/front_end/Home.py +++ b/forecasting_tools/front_end/Home.py @@ -6,6 +6,7 @@ from forecasting_tools.front_end.app_pages.benchmark_page import BenchmarkPage from forecasting_tools.front_end.app_pages.chat_page import ChatPage +from forecasting_tools.front_end.app_pages.congress_page import CongressPage current_dir = os.path.dirname(os.path.abspath(__file__)) top_level_dir = os.path.abspath(os.path.join(current_dir, "../../")) @@ -36,6 +37,7 @@ class HomePage(AppPage): ESTIMATOR_PAGE: type[AppPage] = EstimatorPage KEY_FACTORS_PAGE: type[AppPage] = KeyFactorsPage CSV_AGENT_PAGE: type[AppPage] = CsvAgentPage + CONGRESS_PAGE: type[AppPage] = CongressPage BENCHMARK_PAGE: type[AppPage] = BenchmarkPage NON_HOME_PAGES: list[type[AppPage]] = [ CHAT_PAGE, @@ -44,6 +46,7 @@ class HomePage(AppPage): BASE_RATE_PAGE, NICHE_LIST_RESEARCH_PAGE, ESTIMATOR_PAGE, + CONGRESS_PAGE, CSV_AGENT_PAGE, ] diff --git a/forecasting_tools/front_end/app_pages/chat_page.py b/forecasting_tools/front_end/app_pages/chat_page.py index 3fd2696e..f550f8b5 100644 --- a/forecasting_tools/front_end/app_pages/chat_page.py +++ b/forecasting_tools/front_end/app_pages/chat_page.py @@ -62,7 +62,7 @@ DEFAULT_MODEL: str = ( - "openrouter/google/gemini-2.5-pro" # "openrouter/anthropic/claude-sonnet-4" + "openrouter/google/gemini-2.5-pro" # "openrouter/anthropic/claude-sonnet-4.5.5" ) MODEL_CHOICES: list[str] = [ DEFAULT_MODEL, @@ -70,11 +70,13 @@ "openrouter/x-ai/grok-4", "openrouter/anthropic/claude-opus-4.1", "openrouter/anthropic/claude-sonnet-4", + "openrouter/anthropic/claude-sonnet-4.5", "openai/o3", "openai/o4-mini", "openai/gpt-4.1", "gpt-4o", "openrouter/google/gemini-2.5-pro-preview", + "openrouter/google/gemini-3-pro-preview", ] diff --git a/forecasting_tools/front_end/app_pages/congress_page.py b/forecasting_tools/front_end/app_pages/congress_page.py new file mode 100644 index 00000000..99f6156a --- /dev/null +++ b/forecasting_tools/front_end/app_pages/congress_page.py @@ -0,0 +1,496 @@ +from __future__ import annotations + +import json +import logging +import os + +import streamlit as st + +from forecasting_tools.agents_and_tools.ai_congress.congress_orchestrator import ( + CongressOrchestrator, +) +from forecasting_tools.agents_and_tools.ai_congress.data_models import ( + CongressSession, + CongressSessionInput, +) +from forecasting_tools.agents_and_tools.ai_congress.member_profiles import ( + AVAILABLE_MEMBERS, + get_members_by_names, +) +from forecasting_tools.front_end.helpers.app_page import AppPage +from forecasting_tools.front_end.helpers.custom_auth import CustomAuth +from forecasting_tools.front_end.helpers.report_displayer import ReportDisplayer + +logger = logging.getLogger(__name__) + +SESSIONS_FOLDER = "temp/congress_sessions" + + +class CongressPage(AppPage): + PAGE_DISPLAY_NAME: str = "🏛️ AI Forecasting Congress" + URL_PATH: str = "/ai-congress" + IS_DEFAULT_PAGE: bool = False + + @classmethod + @CustomAuth.add_access_control() + async def _async_main(cls) -> None: + st.title("🏛️ AI Forecasting Congress") + st.markdown( + """ + Simulate a deliberative body of AI agents with different political + perspectives analyzing a policy question. Each member conducts research, + generates forecasting questions, makes quantitative predictions, and + proposes policy recommendations. + """ + ) + + cls._display_sidebar() + + session_input = await cls._get_input() + if session_input: + session = await cls._run_congress(session_input) + cls._save_session(session) + st.session_state["latest_session"] = session + + if "latest_session" in st.session_state: + cls._display_session(st.session_state["latest_session"]) + + @classmethod + def _display_sidebar(cls) -> None: + with st.sidebar: + st.header("Load Session") + + st.subheader("From File Path") + file_path = st.text_input( + "Enter JSON file path:", + placeholder="temp/congress_sessions/20260129_123456.json", + key="load_file_path", + ) + if st.button("Load from File", key="load_file_btn"): + if file_path: + session = cls._load_session_from_file(file_path) + if session: + st.session_state["latest_session"] = session + st.success(f"Loaded session from {file_path}") + st.rerun() + else: + st.error("Please enter a file path.") + + st.markdown("---") + st.subheader("From Recent Sessions") + sessions = cls._load_previous_sessions() + if sessions: + session_options = [ + f"{s.timestamp.strftime('%Y-%m-%d %H:%M')} - {s.prompt[:30]}..." + for s in sessions + ] + selected_idx = st.selectbox( + "Select a session:", + range(len(sessions)), + format_func=lambda i: session_options[i], + key="previous_session_select", + ) + if st.button("Load Selected", key="load_selected_btn"): + st.session_state["latest_session"] = sessions[selected_idx] + st.rerun() + else: + st.write("No recent sessions found.") + + st.markdown("---") + st.header("About") + st.markdown( + """ + **Members Available:** + """ + ) + for member in AVAILABLE_MEMBERS: + st.markdown(f"- **{member.name}**: {member.role}") + + EXAMPLE_PROMPTS: list[dict[str, str]] = [ + { + "title": "AI Regulation", + "prompt": ( + "How should the United States regulate artificial intelligence? " + "Consider both frontier AI systems (like large language models) and " + "narrower AI applications in areas like hiring, lending, and healthcare. " + "What policies would balance innovation with safety and civil liberties?" + ), + }, + { + "title": "Nuclear Policy", + "prompt": ( + "What should US nuclear weapons policy be going forward? " + "Consider modernization of the nuclear triad, arms control agreements, " + "extended deterrence commitments to allies, and the role of tactical " + "nuclear weapons in an era of great power competition." + ), + }, + { + "title": "Climate Change", + "prompt": ( + "What climate policies should the US adopt to meet its emissions " + "reduction targets? Consider carbon pricing, clean energy subsidies, " + "regulations on fossil fuels, and adaptation measures. How should costs " + "and benefits be distributed across different communities?" + ), + }, + { + "title": "Immigration Reform", + "prompt": ( + "How should the US reform its immigration system? Consider border " + "security, pathways to legal status, high-skilled immigration, refugee " + "admissions, and enforcement priorities. What policies would best serve " + "economic, humanitarian, and security interests?" + ), + }, + { + "title": "Healthcare System", + "prompt": ( + "How should the US improve its healthcare system? Consider coverage " + "expansion, cost control, drug pricing, mental health services, and " + "the role of public vs private insurance. What reforms would improve " + "outcomes while managing costs?" + ), + }, + ] + + @classmethod + async def _get_input(cls) -> CongressSessionInput | None: + st.header("Start a New Session") + + with st.expander("📋 Example Policy Questions", expanded=False): + st.markdown("Click a button to use an example prompt:") + cols = st.columns(len(cls.EXAMPLE_PROMPTS)) + for i, example in enumerate(cls.EXAMPLE_PROMPTS): + with cols[i]: + if st.button( + example["title"], key=f"example_{i}", use_container_width=True + ): + st.session_state["example_prompt"] = example["prompt"] + st.rerun() + if st.session_state.get("example_prompt"): + st.write(st.session_state["example_prompt"]) + + default_prompt = st.session_state.pop("example_prompt", "") + + with st.form("congress_form"): + prompt = st.text_area( + "Policy Question", + value=default_prompt, + placeholder="Enter a policy question to deliberate on (e.g., 'What should US nuclear policy be?' or 'How should we regulate AI?')", + height=100, + key="congress_prompt", + ) + + member_names = [m.name for m in AVAILABLE_MEMBERS] + default_members = [ + "Opus 4.5 (Anthropic)", + "GPT 5.2 (OpenAI)", + "Gemini 3 Pro (Google)", + "Grok 4 (xAI)", + "DeepSeek V3.2 (DeepSeek)", + ] + selected_members = st.multiselect( + "Select Congress Members", + options=member_names, + default=default_members, + key="congress_members", + ) + + st.markdown( + """ + **Estimated Cost:** ~$3-8 per member selected + (depends on model and research depth) + """ + ) + + submitted = st.form_submit_button("🏛️ Convene Congress") + + if submitted: + if not prompt: + st.error("Please enter a policy question.") + return None + if len(selected_members) < 2: + st.error("Please select at least 2 congress members.") + return None + + return CongressSessionInput( + prompt=prompt, + member_names=selected_members, + ) + + return None + + @classmethod + async def _run_congress( + cls, session_input: CongressSessionInput + ) -> CongressSession: + members = get_members_by_names(session_input.member_names) + + with st.spinner( + f"Congress in session with {len(members)} members... " + "This may take 5-15 minutes." + ): + progress_text = st.empty() + progress_text.write("Members are researching and deliberating...") + + orchestrator = CongressOrchestrator() + session = await orchestrator.run_session( + prompt=session_input.prompt, + members=members, + ) + + progress_text.write("Aggregating proposals and generating insights...") + + if session.errors: + st.warning( + f"⚠️ {len(session.errors)} member(s) encountered errors. " + "Partial results shown." + ) + + return session + + @classmethod + def _display_session(cls, session: CongressSession) -> None: + st.header("Congress Results") + + tabs = st.tabs( + [ + "📊 Synthesis", + "👤 Individual Proposals", + "🎯 Forecast Comparison", + "🐦 Twitter Posts", + ] + ) + + with tabs[0]: + cls._display_synthesis_tab(session) + + with tabs[1]: + cls._display_proposals_tab(session) + + with tabs[2]: + cls._display_forecasts_tab(session) + + with tabs[3]: + cls._display_twitter_tab(session) + + cls._display_download_buttons(session) + + @classmethod + def _display_synthesis_tab(cls, session: CongressSession) -> None: + st.subheader("Aggregated Report") + if session.aggregated_report_markdown: + cleaned = ReportDisplayer.clean_markdown(session.aggregated_report_markdown) + st.markdown(cleaned) + else: + st.write("No aggregated report available.") + + if session.errors: + with st.expander("⚠️ Errors During Session"): + for error in session.errors: + st.error(error) + + @classmethod + def _display_proposals_tab(cls, session: CongressSession) -> None: + st.subheader("Individual Member Proposals") + + if not session.proposals: + st.write("No proposals available.") + return + + for proposal in session.proposals: + member_name = proposal.member.name if proposal.member else "Unknown" + member_role = proposal.member.role if proposal.member else "" + + with st.expander(f"**{member_name}** - {member_role}", expanded=False): + st.markdown("#### Decision Criteria") + for i, criterion in enumerate(proposal.decision_criteria, 1): + st.markdown(f"{i}. {criterion}") + + st.markdown("#### Key Recommendations") + for rec in proposal.key_recommendations: + st.markdown(f"- {rec}") + + st.markdown("#### Full Proposal") + cleaned = ReportDisplayer.clean_markdown( + proposal.get_full_markdown_with_footnotes() + ) + st.markdown(cleaned) + + @classmethod + def _display_forecasts_tab(cls, session: CongressSession) -> None: + st.subheader("Forecast Comparison") + + forecasts_by_member = session.get_forecasts_by_member() + + if not forecasts_by_member: + st.write("No forecasts available.") + return + + all_forecasts_data = [] + for member_name, forecasts in forecasts_by_member.items(): + for f in forecasts: + all_forecasts_data.append( + { + "Member": member_name, + "Question": f.question_title, + "Prediction": f.prediction, + "Reasoning (summary)": ( + f.reasoning[:100] + "..." + if len(f.reasoning) > 100 + else f.reasoning + ), + } + ) + + if all_forecasts_data: + st.dataframe(all_forecasts_data, use_container_width=True) + + st.markdown("---") + st.markdown("#### Detailed Forecasts by Member") + + for member_name, forecasts in forecasts_by_member.items(): + with st.expander(f"**{member_name}** ({len(forecasts)} forecasts)"): + for f in forecasts: + st.markdown(f"**[^{f.footnote_id}] {f.question_title}**") + st.markdown(f"- **Prediction:** {f.prediction}") + st.markdown(f"- **Question:** {f.question_text}") + st.markdown(f"- **Resolution:** {f.resolution_criteria}") + st.markdown(f"- **Reasoning:** {f.reasoning}") + if f.key_sources: + st.markdown(f"- **Sources:** {', '.join(f.key_sources)}") + st.markdown("---") + + @classmethod + def _display_twitter_tab(cls, session: CongressSession) -> None: + st.subheader("Twitter/X Posts") + st.markdown( + "These tweet-sized excerpts highlight interesting patterns from the " + "congress session." + ) + + if not session.twitter_posts: + st.write("No Twitter posts generated.") + return + + for i, post in enumerate(session.twitter_posts, 1): + st.markdown(f"**Tweet {i}** ({len(post)} chars)") + st.info(post) + st.button(f"📋 Copy Tweet {i}", key=f"copy_tweet_{i}") + + @classmethod + def _display_download_buttons(cls, session: CongressSession) -> None: + st.markdown("---") + col1, col2 = st.columns(2) + + with col1: + json_str = json.dumps(session.to_json(), indent=2, default=str) + st.download_button( + label="📥 Download Full Session (JSON)", + data=json_str, + file_name=f"congress_session_{session.timestamp.strftime('%Y%m%d_%H%M%S')}.json", + mime="application/json", + ) + + with col2: + markdown_content = cls._session_to_markdown(session) + st.download_button( + label="📥 Download Report (Markdown)", + data=markdown_content, + file_name=f"congress_report_{session.timestamp.strftime('%Y%m%d_%H%M%S')}.md", + mime="text/markdown", + ) + + @classmethod + def _session_to_markdown(cls, session: CongressSession) -> str: + lines = [ + "# AI Forecasting Congress Report", + "", + f"**Policy Question:** {session.prompt}", + "", + f"**Date:** {session.timestamp.strftime('%Y-%m-%d %H:%M UTC')}", + "", + f"**Members:** {', '.join(m.name for m in session.members_participating)}", + "", + "---", + "", + "## Synthesis Report", + "", + session.aggregated_report_markdown, + "", + "---", + "", + "## Individual Proposals", + "", + ] + + for proposal in session.proposals: + member_name = proposal.member.name if proposal.member else "Unknown" + lines.extend( + [ + f"### {member_name}", + "", + proposal.get_full_markdown_with_footnotes(), + "", + "---", + "", + ] + ) + + return "\n".join(lines) + + @classmethod + def _save_session(cls, session: CongressSession) -> None: + os.makedirs(SESSIONS_FOLDER, exist_ok=True) + filename = f"{session.timestamp.strftime('%Y%m%d_%H%M%S')}.json" + filepath = os.path.join(SESSIONS_FOLDER, filename) + + try: + with open(filepath, "w") as f: + json.dump(session.to_json(), f, indent=2, default=str) + logger.info(f"Saved session to {filepath}") + except Exception as e: + logger.error(f"Failed to save session: {e}") + + @classmethod + def _load_session_from_file(cls, file_path: str) -> CongressSession | None: + if not os.path.exists(file_path): + st.error(f"File not found: {file_path}") + return None + + try: + with open(file_path, "r") as f: + data = json.load(f) + session = CongressSession.from_json(data) + return session + except json.JSONDecodeError as e: + st.error(f"Invalid JSON file: {e}") + return None + except Exception as e: + st.error(f"Failed to load session: {e}") + logger.error(f"Failed to load session from {file_path}: {e}") + return None + + @classmethod + def _load_previous_sessions(cls) -> list[CongressSession]: + if not os.path.exists(SESSIONS_FOLDER): + return [] + + sessions = [] + for filename in sorted(os.listdir(SESSIONS_FOLDER), reverse=True)[:10]: + if filename.endswith(".json"): + filepath = os.path.join(SESSIONS_FOLDER, filename) + try: + with open(filepath, "r") as f: + data = json.load(f) + session = CongressSession.from_json(data) + sessions.append(session) + except Exception as e: + logger.error(f"Failed to load session {filename}: {e}") + + return sessions + + +if __name__ == "__main__": + CongressPage.main() From faa17c34a4a2d4a81e593649d20a0a4e050fb507 Mon Sep 17 00:00:00 2001 From: Ben Wilson Date: Thu, 29 Jan 2026 22:54:42 +0000 Subject: [PATCH 2/8] Initial MVP of agent congress --- .../ai_congress/congress_orchestrator.py | 133 ++++++++++++++++++ .../ai_congress/data_models.py | 1 + .../front_end/app_pages/congress_page.py | 25 +++- 3 files changed, 157 insertions(+), 2 deletions(-) diff --git a/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py b/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py index 9cfb1eb6..89dcd63e 100644 --- a/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py +++ b/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py @@ -57,10 +57,12 @@ async def run_session( logger.info(f"Completed {len(proposals)} proposals with {len(errors)} errors") aggregated_report = "" + blog_post = "" twitter_posts: list[str] = [] if proposals: aggregated_report = await self._aggregate_proposals(prompt, proposals) + blog_post = await self._generate_blog_post(prompt, proposals, members) twitter_posts = await self._generate_twitter_posts(prompt, proposals) return CongressSession( @@ -68,6 +70,7 @@ async def run_session( members_participating=members, proposals=proposals, aggregated_report_markdown=aggregated_report, + blog_post=blog_post, twitter_posts=twitter_posts, timestamp=datetime.now(timezone.utc), errors=errors, @@ -190,6 +193,136 @@ async def _aggregate_proposals( return await llm.invoke(aggregation_prompt) + async def _generate_blog_post( + self, + prompt: str, + proposals: list[PolicyProposal], + members: list[CongressMember], + ) -> str: + llm = GeneralLlm(self.aggregation_model, timeout=LONG_TIMEOUT) + + ai_model_members = [ + m + for m in members + if "behaves as" in m.political_leaning.lower() + or "naturally" in m.political_leaning.lower() + ] + has_ai_model_comparison = len(ai_model_members) >= 2 + + proposals_summary = "\n\n".join( + [ + f"### {p.member.name} ({p.member.role})\n" + f"**Political Leaning:** {p.member.political_leaning}\n" + f"**AI Model:** {p.member.ai_model}\n\n" + f"**Key Recommendations:**\n" + + "\n".join(f"- {rec}" for rec in p.key_recommendations[:5]) + + "\n\n**Key Forecasts:**\n" + + "\n".join( + f"- {f.question_title}: {f.prediction}" for f in p.forecasts[:5] + ) + for p in proposals + if p.member + ] + ) + + ai_comparison_section = "" + if has_ai_model_comparison: + ai_comparison_section = clean_indents( + """ + ## Special Section: AI Model Comparison + + Since this congress included multiple AI models acting naturally (without + assigned political personas), include a dedicated analysis section: + + ### How the Models Compared + + For each AI model participant, analyze: + - What was their overall approach and tone? + - What priorities or values seemed most salient to them? + - How did their forecasts compare to other models on similar questions? + - Did they show any distinctive reasoning patterns? + + ### Unexpected Behaviors + + Highlight anything surprising: + - Did any model take a position you wouldn't expect? + - Were there cases where models with similar training diverged significantly? + - Did any model show unusual certainty or uncertainty? + - Were there any reasoning patterns that seemed distinctive to one model? + + ### Model Personality Insights + + What does this session reveal about each model's "personality"? + - Risk tolerance (cautious vs bold) + - Epistemic style (hedging vs confident) + - Value emphasis (efficiency, equity, security, etc.) + - Reasoning style (data-driven, principled, pragmatic) + """ + ) + + blog_prompt = clean_indents( + f""" + # Write a Blog Post About This AI Congress Session + + You are writing an engaging blog post about an AI Forecasting Congress + session where AI agents deliberated on the following policy question: + + "{prompt}" + + ## Proposals Summary + + {proposals_summary} + + ## Blog Post Requirements + + Write a ~1500-2000 word blog post that would be engaging for a tech/policy + audience interested in AI capabilities and policy analysis. The post should: + + ### Structure + + 1. **Hook** (1 paragraph): Start with the most surprising or interesting + finding from the session. Make readers want to continue. + + 2. **Context** (1-2 paragraphs): Briefly explain what the AI Forecasting + Congress is and what question was being deliberated. + + 3. **Key Insights** (3-5 paragraphs): The most important takeaways from + the session. What did the AI congress conclude? Where did they agree + and disagree? What forecasts matter most? + + 4. **The Good, Bad, and Ugly** (2-3 paragraphs): Highlight: + - The Good: Surprising consensus, innovative ideas, strong reasoning + - The Bad: Blind spots, weak arguments, missed considerations + - The Ugly: Uncomfortable tradeoffs, unresolved tensions + + 5. **Implications** (1-2 paragraphs): What does this mean for policymakers + or the public? What actions might follow from these insights? + + {ai_comparison_section} + + 6. **Conclusion** (1 paragraph): End with a thought-provoking takeaway + about what this exercise reveals about AI policy analysis capabilities. + + ### Style Guidelines + + - Write in an engaging, accessible style (not academic) + - Use specific examples and quotes from the proposals + - Include specific forecasts with probabilities + - Be analytical but not dry + - Feel free to express opinions about which arguments were strongest + - Use markdown formatting with headers, bullet points, and bold text + - Include a catchy title at the start + + Write the blog post now. + """ + ) + + try: + return await llm.invoke(blog_prompt) + except Exception as e: + logger.error(f"Failed to generate blog post: {e}") + return "" + async def _generate_twitter_posts( self, prompt: str, diff --git a/forecasting_tools/agents_and_tools/ai_congress/data_models.py b/forecasting_tools/agents_and_tools/ai_congress/data_models.py index 6b43ab24..c82f88fe 100644 --- a/forecasting_tools/agents_and_tools/ai_congress/data_models.py +++ b/forecasting_tools/agents_and_tools/ai_congress/data_models.py @@ -84,6 +84,7 @@ class CongressSession(BaseModel, Jsonable): members_participating: list[CongressMember] proposals: list[PolicyProposal] aggregated_report_markdown: str + blog_post: str = Field(default="") twitter_posts: list[str] = Field(default_factory=list) timestamp: datetime errors: list[str] = Field(default_factory=list) diff --git a/forecasting_tools/front_end/app_pages/congress_page.py b/forecasting_tools/front_end/app_pages/congress_page.py index 99f6156a..6aba0352 100644 --- a/forecasting_tools/front_end/app_pages/congress_page.py +++ b/forecasting_tools/front_end/app_pages/congress_page.py @@ -257,6 +257,7 @@ def _display_session(cls, session: CongressSession) -> None: tabs = st.tabs( [ "📊 Synthesis", + "📝 Blog Post", "👤 Individual Proposals", "🎯 Forecast Comparison", "🐦 Twitter Posts", @@ -267,12 +268,15 @@ def _display_session(cls, session: CongressSession) -> None: cls._display_synthesis_tab(session) with tabs[1]: - cls._display_proposals_tab(session) + cls._display_blog_tab(session) with tabs[2]: - cls._display_forecasts_tab(session) + cls._display_proposals_tab(session) with tabs[3]: + cls._display_forecasts_tab(session) + + with tabs[4]: cls._display_twitter_tab(session) cls._display_download_buttons(session) @@ -291,6 +295,23 @@ def _display_synthesis_tab(cls, session: CongressSession) -> None: for error in session.errors: st.error(error) + @classmethod + def _display_blog_tab(cls, session: CongressSession) -> None: + st.subheader("Blog Post") + if session.blog_post: + cleaned = ReportDisplayer.clean_markdown(session.blog_post) + st.markdown(cleaned) + + st.download_button( + label="📥 Download Blog Post (Markdown)", + data=session.blog_post, + file_name=f"congress_blog_{session.timestamp.strftime('%Y%m%d_%H%M%S')}.md", + mime="text/markdown", + key="download_blog", + ) + else: + st.write("No blog post available.") + @classmethod def _display_proposals_tab(cls, session: CongressSession) -> None: st.subheader("Individual Member Proposals") From f9b41a51770f0ec816f346ca439e7b1c4cecf190 Mon Sep 17 00:00:00 2001 From: Ben Wilson Date: Fri, 30 Jan 2026 00:26:39 +0000 Subject: [PATCH 3/8] Added example page json --- .../ai_congress/congress_member_agent.py | 68 +-- .../ai_congress/congress_orchestrator.py | 93 ++- .../ai_congress/data_models.py | 6 + .../agents_and_tools/minor_tools.py | 21 +- .../question_decomposer.py | 2 +- .../front_end/app_pages/congress_page.py | 70 ++- .../congress_page_example.json | 568 ++++++++++++++++++ 7 files changed, 760 insertions(+), 68 deletions(-) create mode 100644 forecasting_tools/front_end/example_outputs/congress_page_example.json diff --git a/forecasting_tools/agents_and_tools/ai_congress/congress_member_agent.py b/forecasting_tools/agents_and_tools/ai_congress/congress_member_agent.py index bf5cc150..53c31d8b 100644 --- a/forecasting_tools/agents_and_tools/ai_congress/congress_member_agent.py +++ b/forecasting_tools/agents_and_tools/ai_congress/congress_member_agent.py @@ -7,7 +7,6 @@ PolicyProposal, ) from forecasting_tools.agents_and_tools.minor_tools import ( - perplexity_quick_search_high_context, perplexity_reasoning_pro_search, query_asknews, ) @@ -31,10 +30,13 @@ def __init__( self.member = member self.timeout = timeout self.structure_output_model = structure_output_model or GeneralLlm( - "openrouter/openai/gpt-4.1", temperature=0.2, timeout=LONG_TIMEOUT + "openrouter/anthropic/claude-sonnet-4.5", + temperature=0.2, + timeout=LONG_TIMEOUT, ) async def deliberate(self, policy_prompt: str) -> PolicyProposal: + logger.info(f"Deliberating on policy question: {policy_prompt[:100]}...") instructions = self._build_agent_instructions(policy_prompt) agent = AiAgent( @@ -44,7 +46,6 @@ async def deliberate(self, policy_prompt: str) -> PolicyProposal: tools=[ perplexity_reasoning_pro_search, query_asknews, - perplexity_quick_search_high_context, ], handoffs=[], ) @@ -53,8 +54,10 @@ async def deliberate(self, policy_prompt: str) -> PolicyProposal: agent, "Please begin your deliberation now.", max_turns=20 ) + logger.info(f"Extracting proposal from output for {self.member.name}") proposal = await self._extract_proposal_from_output(result.final_output) proposal.member = self.member + logger.info(f"Completed deliberation for {self.member.name}") return proposal async def _extract_proposal_from_output(self, agent_output: str) -> PolicyProposal: @@ -174,15 +177,26 @@ def _build_agent_instructions(self, policy_prompt: str) -> str: this policy decision. These questions should be ones where the answer genuinely matters for deciding what to do. - Good forecasting questions: - - Are about uncertain future events, not established facts - - Have clear resolution criteria (exactly how we'll know the answer) - - Have a specific time horizon (when we'll know) - - Are relevant to the policy decision at hand + Good forecasting questions follow these principles: + - The question should shed light on the topic and have high VOI (Value of Information) + - The question should be specific and not vague + - The question should have a resolution date + - Once the resolution date has passed, the question should be resolvable with 0.5-1.5hr of research + - Bad: "Will a research paper in a established journal find that a new knee surgery technique reduces follow up surgery with significance by Dec 31 2023?" (To resolve this you have to do extensive research into all new research in a field) + - Good: "Will public dataset X at URL Y show the number of follow ups to knee surgeries decrease by Z% by Dec 31 2023?" (requires only some math on a few data points at a known URL) + - A good resolution source exists + - Bad: "On 15 January 2026, will the general sentiment be generally positive for knee surgery professionals with at least 10 years of experience concerning ACL reconstruction research?" (There is no way to research this online. You would have to run a large study on knee professionals) + - Good: "As of 15 January 2026, how many 'recruiting study' search results will there be on ClinicalTrials.gov when searching 'ACL reconstruction' in 'intervention/treatment'?" (requires only a search on a known website) + - Don't forget to INCLUDE Links if you found any! Copy the links IN FULL especially to resolution sources! + - The questions should match any additional criteria that the superforecaster/client has given you + - The question should not be obvious. Consider the time range when determining this (short time ranges means things are less likely). + - Bad: "Will country X start a war in the next 2 weeks" (Probably not, especially if they have not said anything about this) + - Good: "Will country X start a war in the next year" (Could be possible, especially if there are risk factors) - Cover different aspects: policy effectiveness, side effects, implementation, political feasibility, etc. - - Are neither too obvious (>90% or <10%) nor too uncertain (close to 50/50 - with no way to estimate better) + - Are relevant to the policy decision at hand + - You can find how this question resolved in the past (search for a past resolution, and consider iterating the question if you cannot find how to resolve it) + For each question, write: - **Question Title**: A short descriptive title @@ -204,32 +218,15 @@ def _build_agent_instructions(self, policy_prompt: str) -> str: Now forecast each question you generated. This is the most important phase. For EACH forecasting question: + 1. Consider what principles associated with good forecasting you plan to use in this situation, if any (e.g. base rates, bias identification, premortems, simulations, scope sensitivity, aggregation, etc) + 2. Make a research plan + 3. Conduct the research (iterate as needed) + 4. Write down the main facts from the research you conducted that you will consider in your forecast + 5. Do any analysis you need to do, and then write down your rationale for the forecast + 6. Write down your forecast in accordance with the format requested of you - 1. **Additional Research**: Use your search tools to find relevant data, - base rates, expert opinions, and historical analogies. Make at least 1-2 - targeted searches per question. - - 2. **Base Rate Analysis**: What is the historical frequency of similar events? - How often do similar policies succeed or fail? - - 3. **Key Factors**: What specific factors push the probability up or down - from the base rate? List at least 3 factors in each direction. - - 4. **Bias Check**: Given your {self.member.political_leaning} perspective, - what biases might you have? How might you be over- or under-estimating? - - 5. **Final Prediction**: Give a specific probability (e.g., "35%") for - binary questions, or a distribution for numeric questions. - - 6. **Reasoning**: Write 4+ sentences explaining your reasoning, including - the key evidence and considerations. - - Be calibrated: if you're genuinely uncertain, your probabilities should - reflect that (closer to 50%). Avoid overconfidence. Consider what you - might be missing. - - Remember good forecasters put extra weight on the status quo outcome since - the world changes slowly most of the time. + You write your rationale remembering that good forecasters put extra weight on the status quo outcome since the world changes slowly most of the time. + For numeric questions, you remind yourself that good forecasters are humble and set wide 90/10 confidence intervals to account for unknown unknowns. Write your forecasts inline as you work through each question. @@ -265,6 +262,7 @@ def _build_agent_instructions(self, policy_prompt: str) -> str: - State the recommendation clearly - Explain why you support it given your forecasts and criteria - Note which of your decision criteria it addresses + - Give a detailed implementation plan for the recommendation. What would this actually look like on the ground? - Reference relevant forecasts with footnotes ### Risks and Uncertainties diff --git a/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py b/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py index 89dcd63e..3a705d4d 100644 --- a/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py +++ b/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py @@ -13,6 +13,9 @@ PolicyProposal, ) from forecasting_tools.ai_models.general_llm import GeneralLlm +from forecasting_tools.ai_models.resource_managers.monetary_cost_manager import ( + MonetaryCostManager, +) from forecasting_tools.util.misc import clean_indents logger = logging.getLogger(__name__) @@ -36,34 +39,47 @@ async def run_session( f"Starting congress session with {len(members)} members on: {prompt[:100]}..." ) - agents = [CongressMemberAgent(m) for m in members] + with MonetaryCostManager() as session_cost_manager: + agents = [CongressMemberAgent(m) for m in members] - results = await asyncio.gather( - *[self._run_member_with_error_handling(a, prompt) for a in agents], - return_exceptions=False, - ) + results = await asyncio.gather( + *[self._run_member_with_error_handling(a, prompt) for a in agents], + return_exceptions=False, + ) - proposals: list[PolicyProposal] = [] - errors: list[str] = [] + proposals: list[PolicyProposal] = [] + errors: list[str] = [] - for result in results: - if isinstance(result, PolicyProposal): - proposals.append(result) - elif isinstance(result, Exception): - errors.append(str(result)) - else: - errors.append(f"Unexpected result type: {type(result)}") + for result in results: + if isinstance(result, PolicyProposal): + proposals.append(result) + elif isinstance(result, Exception): + errors.append(str(result)) + else: + errors.append(f"Unexpected result type: {type(result)}") - logger.info(f"Completed {len(proposals)} proposals with {len(errors)} errors") + logger.info( + f"Completed {len(proposals)} proposals with {len(errors)} errors" + ) + + aggregated_report = "" + blog_post = "" + twitter_posts: list[str] = [] - aggregated_report = "" - blog_post = "" - twitter_posts: list[str] = [] + if proposals: + aggregated_report = await self._aggregate_proposals(prompt, proposals) + blog_post = await self._generate_blog_post(prompt, proposals, members) + twitter_posts = await self._generate_twitter_posts(prompt, proposals) - if proposals: - aggregated_report = await self._aggregate_proposals(prompt, proposals) - blog_post = await self._generate_blog_post(prompt, proposals, members) - twitter_posts = await self._generate_twitter_posts(prompt, proposals) + total_cost = session_cost_manager.current_usage + + proposal_costs = sum( + p.price_estimate for p in proposals if p.price_estimate is not None + ) + logger.info( + f"Completed congress session. Total cost: ${total_cost:.4f}, " + f"Proposal costs: ${proposal_costs:.4f}" + ) return CongressSession( prompt=prompt, @@ -74,6 +90,7 @@ async def run_session( twitter_posts=twitter_posts, timestamp=datetime.now(timezone.utc), errors=errors, + total_price_estimate=total_cost, ) async def _run_member_with_error_handling( @@ -83,8 +100,13 @@ async def _run_member_with_error_handling( ) -> PolicyProposal | Exception: try: logger.info(f"Starting deliberation for {agent.member.name}") - proposal = await agent.deliberate(prompt) - logger.info(f"Completed deliberation for {agent.member.name}") + with MonetaryCostManager() as member_cost_manager: + proposal = await agent.deliberate(prompt) + member_cost = member_cost_manager.current_usage + proposal.price_estimate = member_cost + logger.info( + f"Completed deliberation for {agent.member.name}, cost: ${member_cost:.4f}" + ) return proposal except Exception as e: logger.error(f"Error in {agent.member.name}'s deliberation: {e}") @@ -95,6 +117,7 @@ async def _aggregate_proposals( prompt: str, proposals: list[PolicyProposal], ) -> str: + logger.info(f"Aggregating proposals for congress session: {prompt}") llm = GeneralLlm(self.aggregation_model, timeout=LONG_TIMEOUT) proposals_text = "\n\n---\n\n".join( @@ -191,7 +214,9 @@ async def _aggregate_proposals( """ ) - return await llm.invoke(aggregation_prompt) + result = await llm.invoke(aggregation_prompt) + logger.info("Completed aggregation of proposals") + return result async def _generate_blog_post( self, @@ -199,6 +224,7 @@ async def _generate_blog_post( proposals: list[PolicyProposal], members: list[CongressMember], ) -> str: + logger.info(f"Generating blog post for congress session: {prompt}") llm = GeneralLlm(self.aggregation_model, timeout=LONG_TIMEOUT) ai_model_members = [ @@ -220,6 +246,10 @@ async def _generate_blog_post( + "\n".join( f"- {f.question_title}: {f.prediction}" for f in p.forecasts[:5] ) + + f"**Proposal Text:**\n" + f"```markdown\n" + f"{p.get_full_markdown_with_footnotes()}\n" + f"```\n\n" for p in proposals if p.member ] @@ -290,6 +320,16 @@ async def _generate_blog_post( the session. What did the AI congress conclude? Where did they agree and disagree? What forecasts matter most? + 4. **Paint a picture** (2-5 paragraphs): Paint a picture of the world as the AI + congress described it. What would it look like if the recommendations were implemented? + What would the world be like if the recommendations were not implemented? Start with a + quick preface of what this section is, then start the story with something like + "The date is January 1st 2027 and ..." then continue with what has happened since then, + assuming that the model's forecasts are correct and that the recommendations were implemented. + For any event, mention the relevant forecast. Make some policies succeed "X almost happened if not for Y", + and some policies fail "X almost happened if not for Y" (but only if the probabilities + make sense for this). + 4. **The Good, Bad, and Ugly** (2-3 paragraphs): Highlight: - The Good: Surprising consensus, innovative ideas, strong reasoning - The Bad: Blind spots, weak arguments, missed considerations @@ -318,6 +358,7 @@ async def _generate_blog_post( ) try: + logger.info(f"Generating blog post for congress session: {prompt}") return await llm.invoke(blog_prompt) except Exception as e: logger.error(f"Failed to generate blog post: {e}") @@ -328,6 +369,7 @@ async def _generate_twitter_posts( prompt: str, proposals: list[PolicyProposal], ) -> list[str]: + logger.info(f"Generating twitter posts for congress session: {prompt}") llm = GeneralLlm(self.aggregation_model, timeout=LONG_TIMEOUT) proposals_summary = "\n\n".join( @@ -390,6 +432,7 @@ async def _generate_twitter_posts( try: posts = await llm.invoke_and_return_verified_type(twitter_prompt, list[str]) + logger.info(f"Generated {len(posts)} twitter posts") return [p[:280] for p in posts] except Exception as e: logger.error(f"Failed to generate twitter posts: {e}") diff --git a/forecasting_tools/agents_and_tools/ai_congress/data_models.py b/forecasting_tools/agents_and_tools/ai_congress/data_models.py index c82f88fe..caca8a6f 100644 --- a/forecasting_tools/agents_and_tools/ai_congress/data_models.py +++ b/forecasting_tools/agents_and_tools/ai_congress/data_models.py @@ -68,6 +68,9 @@ class PolicyProposal(BaseModel, Jsonable): key_recommendations: list[str] = Field( description="Top 3-5 actionable recommendations" ) + price_estimate: float | None = Field( + default=None, description="Estimated cost in USD for generating this proposal" + ) def get_full_markdown_with_footnotes(self) -> str: footnotes = "\n\n".join(f.as_footnote_markdown() for f in self.forecasts) @@ -88,6 +91,9 @@ class CongressSession(BaseModel, Jsonable): twitter_posts: list[str] = Field(default_factory=list) timestamp: datetime errors: list[str] = Field(default_factory=list) + total_price_estimate: float | None = Field( + default=None, description="Total estimated cost in USD for the entire session" + ) def get_all_forecasts(self) -> list[ForecastDescription]: all_forecasts = [] diff --git a/forecasting_tools/agents_and_tools/minor_tools.py b/forecasting_tools/agents_and_tools/minor_tools.py index c63d3cd4..2734118f 100644 --- a/forecasting_tools/agents_and_tools/minor_tools.py +++ b/forecasting_tools/agents_and_tools/minor_tools.py @@ -1,4 +1,5 @@ import asyncio +import logging from forecasting_tools.agents_and_tools.question_generators.simple_question import ( SimpleQuestion, @@ -12,6 +13,8 @@ from forecasting_tools.helpers.structure_output import structure_output from forecasting_tools.util.misc import clean_indents, get_schema_of_base_model +logger = logging.getLogger(__name__) + @agent_tool async def query_asknews(topic: str) -> str: @@ -23,6 +26,7 @@ async def query_asknews(topic: str) -> str: - URL - Date """ + logger.info(f"TOOL: Querying AskNews for topic: {topic}") return await AskNewsSearcher().get_formatted_news_async(topic) @@ -33,13 +37,13 @@ async def perplexity_reasoning_pro_search(query: str) -> str: This will provide a LLM answer with citations. This is Perplexity's highest quality search model. """ - llm = GeneralLlm( + logger.info(f"TOOL: Querying Perplexity (sonar-reasoning-pro) for query: {query}") + return await GeneralLlm( model="openrouter/perplexity/sonar-reasoning-pro", reasoning_effort="high", web_search_options={"search_context_size": "high"}, populate_citations=True, - ) - return await llm.invoke(query) + ).invoke(query) @agent_tool @@ -50,6 +54,7 @@ async def perplexity_quick_search_high_context(query: str) -> str: This is Perplexity's fastest but lowest quality search model. Good for getting a simple and quick answer to a question """ + logger.info(f"TOOL: Querying Perplexity (sonar) for query: {query}") llm = GeneralLlm( model="openrouter/perplexity/sonar", web_search_options={"search_context_size": "high"}, @@ -66,6 +71,7 @@ async def perplexity_quick_search_low_context(query: str) -> str: This is Perplexity's fastest but lowest quality search model. Good for getting a simple and quick answer to a question """ + logger.info(f"TOOL: Querying Perplexity (sonar) for query: {query}") llm = GeneralLlm( model="openrouter/perplexity/sonar", web_search_options={"search_context_size": "low"}, @@ -81,6 +87,7 @@ async def smart_searcher_search(query: str) -> str: This will provide a LLM answer with citations. Citations will include url text fragments for faster fact checking. """ + logger.info(f"TOOL: Querying SmartSearcher for query: {query}") return await SmartSearcher(model="openrouter/openai/o4-mini").invoke(query) @@ -91,6 +98,9 @@ def grab_question_details_from_metaculus( """ This function grabs the details of a question from a Metaculus URL or ID. """ + logger.info( + f"TOOL: Grabbing question details from Metaculus for URL or ID: {url_or_id}" + ) if isinstance(url_or_id, str): try: url_or_id = int(url_or_id) @@ -112,6 +122,9 @@ def grab_open_questions_from_tournament( """ This function grabs the details of all questions from a Metaculus tournament. """ + logger.info( + f"TOOL: Grabbing open questions from Metaculus tournament: {tournament_id_or_slug}" + ) questions = MetaculusApi.get_all_open_questions_from_tournament( tournament_id_or_slug ) @@ -123,6 +136,7 @@ def grab_open_questions_from_tournament( def create_tool_for_forecasting_bot( bot_or_class: type[ForecastBot] | ForecastBot, ) -> AgentTool: + logger.info(f"TOOL: Creating tool for forecasting bot: {bot_or_class}") if isinstance(bot_or_class, type): bot = bot_or_class() else: @@ -144,6 +158,7 @@ def create_tool_for_forecasting_bot( @agent_tool(description_override=description) def forecast_question_tool(question: str) -> str: + logger.info(f"TOOL: Forecasting question: {question}") question_object = asyncio.run( structure_output( question, diff --git a/forecasting_tools/agents_and_tools/question_generators/question_decomposer.py b/forecasting_tools/agents_and_tools/question_generators/question_decomposer.py index 800e56a5..8f6dc14d 100644 --- a/forecasting_tools/agents_and_tools/question_generators/question_decomposer.py +++ b/forecasting_tools/agents_and_tools/question_generators/question_decomposer.py @@ -78,7 +78,7 @@ async def decompose_into_questions_deep( # NOSONAR 8. Give your final answer in the requested format - # Question requireemnts + # Question requirements - The question should shed light on the topic and have high VOI (Value of Information) - The question can be forecast and will be resolvable with public information - Good: "Will SpaceX launch a rocket on May 2nd 2023?" diff --git a/forecasting_tools/front_end/app_pages/congress_page.py b/forecasting_tools/front_end/app_pages/congress_page.py index 6aba0352..66a9722a 100644 --- a/forecasting_tools/front_end/app_pages/congress_page.py +++ b/forecasting_tools/front_end/app_pages/congress_page.py @@ -24,6 +24,9 @@ logger = logging.getLogger(__name__) SESSIONS_FOLDER = "temp/congress_sessions" +EXAMPLE_SESSION_PATH = ( + "forecasting_tools/front_end/example_outputs/congress_page_example.json" +) class CongressPage(AppPage): @@ -44,6 +47,7 @@ async def _async_main(cls) -> None: """ ) + cls._display_example_button() cls._display_sidebar() session_input = await cls._get_input() @@ -55,6 +59,16 @@ async def _async_main(cls) -> None: if "latest_session" in st.session_state: cls._display_session(st.session_state["latest_session"]) + @classmethod + def _display_example_button(cls) -> None: + if st.button("📋 See Premade Example", key="load_example_btn"): + session = cls._load_session_from_file(EXAMPLE_SESSION_PATH) + if session: + st.session_state["latest_session"] = session + st.rerun() + else: + st.error("Could not load the example session.") + @classmethod def _display_sidebar(cls) -> None: with st.sidebar: @@ -254,6 +268,8 @@ async def _run_congress( def _display_session(cls, session: CongressSession) -> None: st.header("Congress Results") + cls._display_cost_summary(session) + tabs = st.tabs( [ "📊 Synthesis", @@ -323,22 +339,45 @@ def _display_proposals_tab(cls, session: CongressSession) -> None: for proposal in session.proposals: member_name = proposal.member.name if proposal.member else "Unknown" member_role = proposal.member.role if proposal.member else "" + cost_str = ( + f" (${proposal.price_estimate:.2f})" if proposal.price_estimate else "" + ) - with st.expander(f"**{member_name}** - {member_role}", expanded=False): - st.markdown("#### Decision Criteria") + with st.expander( + f"**{member_name}** - {member_role}{cost_str}", expanded=False + ): + if proposal.price_estimate: + st.caption(f"💰 Cost: ${proposal.price_estimate:.2f}") + + st.markdown("# Decision Criteria") for i, criterion in enumerate(proposal.decision_criteria, 1): st.markdown(f"{i}. {criterion}") - st.markdown("#### Key Recommendations") + st.markdown("# Key Recommendations") for rec in proposal.key_recommendations: st.markdown(f"- {rec}") - st.markdown("#### Full Proposal") + st.markdown("# Research Summary") + st.markdown(proposal.research_summary) + + st.markdown("# Proposal Text") cleaned = ReportDisplayer.clean_markdown( proposal.get_full_markdown_with_footnotes() ) st.markdown(cleaned) + st.markdown("# Full Forecasts") + for forecast in proposal.forecasts: + st.markdown( + f"**[^{forecast.footnote_id}] {forecast.question_title}**" + ) + st.markdown(f"- **Prediction:** {forecast.prediction}") + st.markdown(f"- **Question:** {forecast.question_text}") + st.markdown(f"- **Resolution:** {forecast.resolution_criteria}") + st.markdown(f"- **Reasoning:** {forecast.reasoning}") + if forecast.key_sources: + st.markdown(f"- **Sources:** {', '.join(forecast.key_sources)}") + @classmethod def _display_forecasts_tab(cls, session: CongressSession) -> None: st.subheader("Forecast Comparison") @@ -400,6 +439,29 @@ def _display_twitter_tab(cls, session: CongressSession) -> None: st.info(post) st.button(f"📋 Copy Tweet {i}", key=f"copy_tweet_{i}") + @classmethod + def _display_cost_summary(cls, session: CongressSession) -> None: + total_cost = session.total_price_estimate + if total_cost is None: + return + + proposal_costs = [ + (p.member.name if p.member else "Unknown", p.price_estimate or 0) + for p in session.proposals + ] + + with st.expander("💰 Cost Summary", expanded=False): + col1, col2 = st.columns(2) + with col1: + st.metric("Total Session Cost", f"${total_cost:.2f}") + with col2: + st.metric("Members", len(session.proposals)) + + if proposal_costs: + st.markdown("**Cost by Member:**") + for member_name, cost in proposal_costs: + st.markdown(f"- {member_name}: ${cost:.2f}") + @classmethod def _display_download_buttons(cls, session: CongressSession) -> None: st.markdown("---") diff --git a/forecasting_tools/front_end/example_outputs/congress_page_example.json b/forecasting_tools/front_end/example_outputs/congress_page_example.json new file mode 100644 index 00000000..a8dd02a7 --- /dev/null +++ b/forecasting_tools/front_end/example_outputs/congress_page_example.json @@ -0,0 +1,568 @@ +{ + "prompt": "How should the United States regulate artificial intelligence? Consider both frontier AI systems (like large language models) and narrower AI applications in areas like hiring, lending, and healthcare. What policies would balance innovation with safety and civil liberties?", + "members_participating": [ + { + "name": "Opus 4.5 (Anthropic)", + "role": "AI Policy Analyst", + "political_leaning": "behaves as Claude naturally does", + "general_motivation": "Analyze this policy question thoughtfully and helpfully, as Claude would naturally approach it. Draw on your training to provide balanced, nuanced analysis while being direct about your views and uncertainties.", + "expertise_areas": [ + "general policy analysis" + ], + "personality_traits": [ + "behaves naturally as Claude" + ], + "ai_model": "openrouter/anthropic/claude-opus-4.5" + }, + { + "name": "GPT 5.2 (OpenAI)", + "role": "AI Policy Analyst", + "political_leaning": "behaves as GPT naturally does", + "general_motivation": "Analyze this policy question thoughtfully and helpfully, as GPT would naturally approach it. Draw on your training to provide balanced, nuanced analysis while being direct about your views and uncertainties.", + "expertise_areas": [ + "general policy analysis" + ], + "personality_traits": [ + "behaves naturally as GPT" + ], + "ai_model": "openrouter/openai/gpt-5.2" + }, + { + "name": "Gemini 3 Pro (Google)", + "role": "AI Policy Analyst", + "political_leaning": "behaves as Gemini naturally does", + "general_motivation": "Analyze this policy question thoughtfully and helpfully, as Gemini would naturally approach it. Draw on your training to provide balanced, nuanced analysis while being direct about your views and uncertainties.", + "expertise_areas": [ + "general policy analysis" + ], + "personality_traits": [ + "behaves naturally as Gemini" + ], + "ai_model": "openrouter/google/gemini-3-pro-preview" + }, + { + "name": "Grok 4 (xAI)", + "role": "AI Policy Analyst", + "political_leaning": "behaves as Grok naturally does", + "general_motivation": "Analyze this policy question thoughtfully and helpfully, as Grok would naturally approach it. Draw on your training to provide balanced, nuanced analysis while being direct about your views and uncertainties.", + "expertise_areas": [ + "general policy analysis" + ], + "personality_traits": [ + "behaves naturally as Grok" + ], + "ai_model": "openrouter/x-ai/grok-4" + }, + { + "name": "DeepSeek V3.2 (DeepSeek)", + "role": "AI Policy Analyst", + "political_leaning": "behaves as DeepSeek naturally does", + "general_motivation": "Analyze this policy question thoughtfully and helpfully, as DeepSeek would naturally approach it. Draw on your training to provide balanced, nuanced analysis while being direct about your views and uncertainties.", + "expertise_areas": [ + "general policy analysis" + ], + "personality_traits": [ + "behaves naturally as DeepSeek" + ], + "ai_model": "openrouter/deepseek/deepseek-v3.2" + } + ], + "proposals": [ + { + "member": { + "name": "Opus 4.5 (Anthropic)", + "role": "AI Policy Analyst", + "political_leaning": "behaves as Claude naturally does", + "general_motivation": "Analyze this policy question thoughtfully and helpfully, as Claude would naturally approach it. Draw on your training to provide balanced, nuanced analysis while being direct about your views and uncertainties.", + "expertise_areas": [ + "general policy analysis" + ], + "personality_traits": [ + "behaves naturally as Claude" + ], + "ai_model": "openrouter/anthropic/claude-opus-4.5" + }, + "research_summary": "The current U.S. AI regulatory landscape is characterized by a fundamental tension: the federal government has adopted a strongly deregulatory posture, while states have enacted over 100 AI laws creating genuine compliance complexity for businesses and uncertain protections for consumers. This fragmentation serves neither innovation nor safety well.\n\nEvidence of algorithmic bias in consequential decisions is substantial and growing. Multiple lawsuits\u2014including Mobley v. Workday involving 1.1 billion processed applications\u2014allege systematic discrimination against protected classes by AI hiring tools. Research demonstrates that AI systems can amplify racial bias, with compliance rates up to 90% when people follow biased AI recommendations. There is a meaningful probability that plaintiffs will prevail in at least one major AI discrimination lawsuit (52% [^1]), which would establish important precedents but cannot substitute for proactive regulatory standards.\n\nThe federal-state conflict over AI regulation is likely to produce continued uncertainty rather than resolution. The Trump administration's AI Litigation Task Force faces significant constitutional barriers\u2014executive orders cannot preempt state laws absent congressional action\u2014and there is only limited probability of successful preemption through litigation by end of 2026 (18% [^2]). Meanwhile, Congress is unlikely to pass comprehensive AI legislation in the near term (22% by end of 2027 [^3]), leaving businesses navigating an evolving patchwork of state requirements.\n\nFrontier AI systems present different risk profiles than narrow applications. While current LLMs appear insufficient for catastrophic autonomous harms, documented incidents are increasing rapidly (56% year-over-year growth), and there is moderate probability of a significant safety incident involving frontier AI by 2027 (28% [^4]). This argues for calibrated safety requirements rather than either regulatory abstention or overly prescriptive mandates that cannot adapt to rapidly evolving capabilities.\n\nThe EU AI Act creates compliance pressure on U.S. companies and establishes an alternative regulatory model. While complete market withdrawal is unlikely, there is meaningful probability (22% [^5]) that at least one major U.S. AI company publicly declines to deploy specific products in the EU, which would signal genuine regulatory friction. More importantly, the EU's risk-based framework demonstrates that innovation and accountability can coexist\u2014companies are adapting rather than abandoning European markets.", + "decision_criteria": [ + "Civil Rights Protection", + "Coherence", + "Proportionality", + "Federalism", + "Democratic Accountability", + "Transparency", + "Preserving Benefits", + "Implementation Feasibility" + ], + "forecasts": [ + { + "footnote_id": 1, + "question_title": "Major AI Discrimination Lawsuit Outcome", + "question_text": "Will plaintiffs prevail (via settlement of $10 million or more, or court judgment in their favor) in at least one of the major pending AI hiring discrimination lawsuits (Mobley v. Workday, Harper v. Sirius XM, or the Eightfold AI lawsuit) by December 31, 2027?", + "resolution_criteria": "Resolves YES if any defendant pays $10M+ settlement or court issues favorable plaintiff judgment on discrimination claims; NO if all dismissed, resolved for under $10M combined, or remain pending.", + "prediction": "52%", + "reasoning": "The Mobley case has demonstrated viability by surviving multiple motions to dismiss and achieving conditional collective certification, which typically creates significant settlement pressure given the 1.1 billion applications at stake. The EEOC's supportive amicus brief signals regulatory alignment. However, Workday's legal arguments on ADEA applicant coverage have some circuit court precedent support, creating genuine doctrinal uncertainty. The parallel lawsuits increase probability that at least one succeeds. The enormous potential liability typically drives settlements even in uncertain cases, but defendants may prefer litigation to avoid precedent-setting. I weight this slightly above 50% because certified class actions historically settle at high rates and defendants face existential exposure.", + "key_sources": [ + "Court docket Mobley v. Workday (N.D. Cal.)", + "JD Supra legal analysis", + "HR Dive reporting", + "Stanford HAI AI Index" + ] + }, + { + "footnote_id": 2, + "question_title": "State AI Law Preemption Success", + "question_text": "Will the Trump administration's AI Litigation Task Force successfully obtain at least one federal court ruling that invalidates a state AI law on preemption or constitutional grounds by December 31, 2026?", + "resolution_criteria": "Resolves YES if federal court strikes down, enjoins, or declares unconstitutional any state AI law based on federal preemption or First Amendment grounds as result of DOJ Task Force litigation; NO otherwise.", + "prediction": "18%", + "reasoning": "Constitutional doctrine clearly establishes that executive orders cannot directly preempt state laws\u2014only Congress can do so under the Supremacy Clause. The 99-1 Senate vote against an AI moratorium signals Congress will not provide statutory backing for preemption. The novel legal theories available (FTC Act implied preemption, First Amendment compelled speech challenges) lack established precedent. Litigation timelines make final rulings unlikely by end of 2026. Bipartisan state opposition (both DeSantis and Newsom) suggests even sympathetic jurisdictions may hesitate. However, aggressive DOJ litigation could produce preliminary injunctions or favorable rulings in some jurisdictions, which prevents negligible probability.", + "key_sources": [ + "Phillips Lytle legal analysis", + "White House executive order text", + "Congressional Record on Senate vote", + "Constitutional law commentary" + ] + }, + { + "footnote_id": 3, + "question_title": "Federal AI Legislation Passage", + "question_text": "Will the United States Congress pass comprehensive federal AI legislation and have it signed into law by December 31, 2027?", + "resolution_criteria": "Resolves YES if federal legislation creating new binding AI requirements applying broadly across multiple sectors is enacted; narrow legislation addressing only one application does not count.", + "prediction": "22%", + "reasoning": "Congress passed zero comprehensive AI bills in 2024-2025 despite 150+ proposals, consistent with broader pattern of congressional gridlock on technology regulation. The current administration strongly favors deregulation and would likely oppose comprehensive legislation. However, international pressure from EU compliance requirements, accumulating evidence from lawsuits, and potential safety incidents could shift dynamics. The issue has unprecedented salience and state fragmentation creates genuine business demand for federal clarity. I weight this above historical base rates (~10-15%) because of unique pressures but well below even odds given demonstrated inability to advance legislation.", + "key_sources": [ + "Brennan Center AI legislation tracker", + "American Action Forum", + "Congressional Research Service" + ] + }, + { + "footnote_id": 4, + "question_title": "Frontier AI Safety Incident", + "question_text": "Will a widely-reported incident occur by December 31, 2027 where a frontier AI system from a major developer is credibly implicated in causing significant harm (loss of life, critical infrastructure disruption, or $100M+ cyberattack damage)?", + "resolution_criteria": "Resolves YES if credible major news reporting documents incident meeting harm criteria with frontier AI playing material contributing role per independent expert analysis; NO otherwise.", + "prediction": "28%", + "reasoning": "AI incidents are accelerating rapidly (56% year-over-year growth, malicious AI use up 8x since 2022), and frontier capabilities continue expanding. However, the threshold is high\u2014no incident has clearly met it to date. Major developers maintain safety testing, and attribution to specific frontier systems is often difficult. The 2-year horizon provides meaningful time for an incident to occur, and integration into healthcare/cybersecurity creates plausible pathways. Research showing 70% probability of catastrophic responses in multi-turn conversations indicates technical vulnerability exists. I weight this at 28%\u2014above historical base rate of zero qualifying incidents but reflecting substantial uncertainty about whether theoretical risks materialize.", + "key_sources": [ + "Stanford HAI AI Index 2025", + "AIID database", + "Time Magazine AI incident reporting", + "Responsible AI Labs analysis" + ] + }, + { + "footnote_id": 5, + "question_title": "EU-US Regulatory Divergence Impact", + "question_text": "By December 31, 2027, will at least one major U.S.-headquartered AI company (market cap over $100 billion) publicly announce it will not deploy a frontier AI product in the EU market specifically due to EU AI Act compliance requirements?", + "resolution_criteria": "Resolves YES if qualifying company makes official public statement that specific AI product will not be offered in EU due to AI Act compliance concerns; NO otherwise.", + "prediction": "22%", + "reasoning": "Major companies historically maintain EU market presence despite regulatory burdens\u2014GDPR did not trigger withdrawals. The EU market is economically too significant to abandon entirely. However, specific product non-deployment (not full withdrawal) is plausible given prohibited practices under the AI Act (certain biometric systems), and companies have become more willing to publicly criticize regulation. Meta previously delayed EU launches. Compliance costs ($200-400M annually) and 58% of developers reporting regulation-driven delays suggest genuine friction. A public announcement would be strategically costly but could serve political purposes. I weight this at 22%\u2014above negligible because partial product withdrawals with public statements are possible, but well below even odds because complete market exit is economically irrational.", + "key_sources": [ + "CCIA EU Digital Regulation analysis", + "ACT/CEPS compliance cost studies", + "Modulos AI analysis", + "EU AI Act text" + ] + } + ], + "proposal_markdown": "### Executive Summary\n\nThe United States should pursue a **sector-specific, risk-proportionate federal regulatory framework** for AI that establishes clear accountability standards for high-risk applications while preserving state authority to protect civil rights and avoiding one-size-fits-all approaches that would either stifle innovation or leave serious harms unaddressed. The single most important action is to **pass federal legislation requiring transparency, bias testing, and meaningful recourse for individuals affected by AI systems making consequential decisions in employment, lending, healthcare, and housing**\u2014areas where algorithmic discrimination is documented and existing civil rights frameworks provide clear precedent.\n\n### Analysis\n\nThe current U.S. AI regulatory landscape is characterized by a fundamental tension: the federal government has adopted a strongly deregulatory posture, while states have enacted over 100 AI laws creating genuine compliance complexity for businesses and uncertain protections for consumers. This fragmentation serves neither innovation nor safety well.\n\nEvidence of algorithmic bias in consequential decisions is substantial and growing. Multiple lawsuits\u2014including Mobley v. Workday involving 1.1 billion processed applications\u2014allege systematic discrimination against protected classes by AI hiring tools. Research demonstrates that AI systems can amplify racial bias, with compliance rates up to 90% when people follow biased AI recommendations. There is a meaningful probability that plaintiffs will prevail in at least one major AI discrimination lawsuit (52% [^1]), which would establish important precedents but cannot substitute for proactive regulatory standards.\n\nThe federal-state conflict over AI regulation is likely to produce continued uncertainty rather than resolution. The Trump administration's AI Litigation Task Force faces significant constitutional barriers\u2014executive orders cannot preempt state laws absent congressional action\u2014and there is only limited probability of successful preemption through litigation by end of 2026 (18% [^2]). Meanwhile, Congress is unlikely to pass comprehensive AI legislation in the near term (22% by end of 2027 [^3]), leaving businesses navigating an evolving patchwork of state requirements.\n\nFrontier AI systems present different risk profiles than narrow applications. While current LLMs appear insufficient for catastrophic autonomous harms, documented incidents are increasing rapidly (56% year-over-year growth), and there is moderate probability of a significant safety incident involving frontier AI by 2027 (28% [^4]). This argues for calibrated safety requirements rather than either regulatory abstention or overly prescriptive mandates that cannot adapt to rapidly evolving capabilities.\n\nThe EU AI Act creates compliance pressure on U.S. companies and establishes an alternative regulatory model. While complete market withdrawal is unlikely, there is meaningful probability (22% [^5]) that at least one major U.S. AI company publicly declines to deploy specific products in the EU, which would signal genuine regulatory friction. More importantly, the EU's risk-based framework demonstrates that innovation and accountability can coexist\u2014companies are adapting rather than abandoning European markets.\n\n### Recommendations\n\n**1. Enact Federal Anti-Discrimination Standards for High-Risk AI Applications**\n\nCongress should pass legislation requiring deployers of AI systems used in employment, lending, healthcare, and housing decisions to: (a) conduct and document bias testing before deployment, (b) provide meaningful notice to affected individuals that AI is involved in decisions about them, (c) establish processes for individuals to challenge adverse decisions and receive human review, and (d) maintain records enabling regulatory enforcement. This addresses documented harms (supporting Civil Rights Protection criterion), provides clear compliance standards (Coherence criterion), and targets actual high-risk uses rather than all AI (Proportionality criterion).\n\nThe probability of meaningful plaintiff victories in pending discrimination lawsuits (52% [^1]) demonstrates both the legal uncertainty companies face and the inadequacy of purely litigation-based accountability. Proactive standards would provide clarity for responsible businesses while deterring harmful practices.\n\n**2. Preserve State Authority for Consumer Protection**\n\nFederal legislation should explicitly disclaim preemption of state laws providing greater consumer protection, similar to the approach in federal environmental and consumer protection statutes. Given the constitutional barriers to executive preemption (18% success probability [^2]) and the 99-1 Senate vote against a moratorium on state enforcement, Congress should affirm rather than restrict states' traditional role as \"laboratories of democracy.\" This supports both Federalism (Coherence criterion) and Democratic Accountability (Transparency criterion).\n\n**3. Establish Tiered Transparency Requirements for Frontier AI**\n\nFor frontier AI systems (above defined compute thresholds), developers should be required to: (a) publish model cards describing capabilities, limitations, and safety evaluations, (b) report significant safety incidents to a designated federal agency within 15 days, and (c) maintain documentation of safety testing procedures. These requirements mirror California's SB 53 (now in effect) and create federal standards that reduce rather than add to compliance fragmentation. The meaningful probability of a significant frontier AI safety incident (28% [^4]) justifies transparency requirements that enable both regulatory response and public understanding.\n\n**4. Create an AI Regulatory Sandbox Program**\n\nFederal agencies should establish regulatory sandboxes allowing companies to test innovative AI applications under supervisory oversight with temporary compliance flexibility, following the model adopted by Texas's TRAIGA. This supports Innovation (Preserving Benefits criterion) while maintaining accountability, and could help resolve the tension between innovation and precaution that characterizes current debates.\n\n**5. Strengthen Enforcement Resources for Existing Agencies**\n\nRather than creating a new AI regulator, Congress should appropriate dedicated resources for AI enforcement to the FTC, EEOC, and sector-specific regulators (FDA, HUD, CFPB). These agencies have established expertise and statutory authority that can be applied to AI systems. Enforcement capacity is essential\u2014well-designed rules fail without implementation resources (Implementation Feasibility criterion).\n\n### Risks and Uncertainties\n\n**Risk of Regulatory Capture or Inadequate Enforcement**: Industry influence could weaken standards or reduce enforcement resources. The FTC's vacating of its consent order against Rytr LLC following the AI Action Plan illustrates how agency priorities can shift. Mitigation: Include private rights of action for civil rights violations and mandatory enforcement reporting.\n\n**Risk of Technological Change Outpacing Regulation**: AI capabilities are advancing rapidly; regulations based on current architectures may become obsolete. The substantial uncertainty in all forecasts reflects genuine unpredictability. Mitigation: Build in regular review mechanisms and sunset provisions requiring congressional reauthorization.\n\n**Risk of Fragmented International Compliance**: Regulatory divergence with the EU creates compliance burdens and potential competitive issues. The probability of at least one major company declining EU deployment (22% [^5]) suggests genuine friction. Mitigation: Pursue mutual recognition agreements and prioritize interoperability in standards development.\n\n**Risk of Insufficient State Coordination**: Without federal standards, state approaches may diverge significantly, creating genuine compliance challenges. However, premature federal preemption could eliminate beneficial state innovations. This is the area of greatest forecast uncertainty\u2014whether the federal-state conflict will be resolved through courts, legislation, or accommodation remains genuinely unclear.\n\n**Risk of Safety Incidents Triggering Overcorrection**: If a significant AI safety incident occurs (28% probability [^4]), the political response might include poorly designed restrictions that harm beneficial applications. Mitigation: Proactive development of evidence-based safety standards creates framework for measured response.", + "key_recommendations": [ + "Enact Federal Anti-Discrimination Standards for High-Risk AI Applications", + "Preserve State Authority for Consumer Protection", + "Establish Tiered Transparency Requirements for Frontier AI", + "Create an AI Regulatory Sandbox Program", + "Strengthen Enforcement Resources for Existing Agencies" + ] + }, + { + "member": { + "name": "GPT 5.2 (OpenAI)", + "role": "AI Policy Analyst", + "political_leaning": "behaves as GPT naturally does", + "general_motivation": "Analyze this policy question thoughtfully and helpfully, as GPT would naturally approach it. Draw on your training to provide balanced, nuanced analysis while being direct about your views and uncertainties.", + "expertise_areas": [ + "general policy analysis" + ], + "personality_traits": [ + "behaves naturally as GPT" + ], + "ai_model": "openrouter/openai/gpt-5.2" + }, + "research_summary": "U.S. AI regulation remains a patchwork of (a) sector regulators using existing authority, (b) federal executive-branch policy (which has shifted across administrations), and (c) fast-moving state and local laws. A notable federal baseline for \u201ctrustworthy AI\u201d is **NIST\u2019s voluntary AI Risk Management Framework (AI RMF 1.0, Jan 2023)**, organized around *Govern / Map / Measure / Manage* and \u201ctrustworthy\u201d characteristics like validity, safety/security, accountability/transparency, explainability, privacy, and fairness (harmful bias managed). This RMF is widely referenced but not binding. Federal governance for **federal agency use of AI** tightened operationally through **OMB Memorandum M-25-21 (Apr 3, 2025)**, requiring Chief AI Officers, AI governance boards, public AI use-case inventories, and minimum practices for \u201chigh-impact AI\u201d (pre-deployment testing, impact assessments, monitoring, appeal/human review). (Sources: NIST AI RMF PDF; OMB M-25-21 PDF.)\n\nFor **frontier/foundation models**, federal policy has been unstable. **Executive Order 14110 (Oct 30, 2023)** required reporting and testing-related information for \u201cdual-use foundation models\u201d above compute thresholds (e.g., >10^26 FLOPs) and directed NIST work on red-teaming and standards, but it was rescinded in early 2025. The replacement posture under **EO 14179 (Jan 2025)** emphasized removing barriers to AI innovation, and then **EO 14,365 (Dec 11, 2025)** sought a \u201cnational policy framework,\u201d directing the Department of Commerce to identify \u201conerous\u201d state AI laws (report due **Mar 11, 2026**) and creating a **DOJ AI Litigation Task Force (Jan 9, 2026)** to challenge state laws via litigation rather than direct preemption. This is important: without congressional preemption, state laws remain enforceable until courts enjoin them. (Sources: EO 14110 text; EO 14179 PDF; White House EO 14,365 page; DOJ Task Force memo summaries.)\n\nAt the **state/local level**, two regulatory patterns dominate: (1) **\u201cHigh-risk AI / algorithmic discrimination\u201d frameworks** and (2) **frontier model transparency/safety reporting**. Colorado\u2019s **SB24-205** (effective **June 30, 2026**, delayed) is the most comprehensive \u201chigh-risk AI\u201d anti-discrimination law, imposing developer/deployer duties, impact assessments, consumer notices, and mitigation obligations for consequential decisions (housing, lending, employment, healthcare, etc.). In contrast, California\u2019s **SB 53 (effective Jan 1, 2026)** targets frontier developers (compute threshold ~10^26 FLOPs; \u201clarge\u201d if >$500M revenue), requiring a published safety framework and \u201ccritical safety incident\u201d reporting; it defines \u201ccatastrophic risk\u201d around >50 deaths/serious injuries or >$1B damages. New York\u2019s **RAISE Act** (effective Jan 1, 2027) similarly targets frontier models with safety protocols and rapid incident reporting (72 hours). In hiring, NYC\u2019s **Local Law 144** (enforcement since July 5, 2023) mandates annual \u201cbias audits\u201d and candidate notice for automated employment decision tools (AEDTs), but enforcement has been weak: a NY State Comptroller audit found DCWP received **only two complaints** (July 2023\u2013June 2025) and that independent review found **17 potential noncompliance instances** among 32 employers, versus DCWP\u2019s one. (Sources: Colorado SB24-205 page; CA SB53 compliance summaries; NY RAISE summaries; NYS Comptroller audit.)\n\nIn **healthcare**, FDA oversight for AI is comparatively mature but still evolving for generative AI. FDA has authorized **>1,200 AI-enabled medical devices** overall, yet **no generative AI/LLM devices were FDA-approved for clinical use as of early 2026** (per Health Affairs Scholar). FDA finalized **Predetermined Change Control Plan (PCCP)** guidance (Dec 2024) and released lifecycle draft guidance (Jan 2025). Meanwhile, FDA\u2019s **Jan 6, 2026 Clinical Decision Support (CDS) guidance** expanded \u201cenforcement discretion\u201d for some tools (including some generative AI-enabled CDS) if transparent and reviewable by clinicians, which may shift risk away from FDA premarket review toward post-market accountability and institutional governance. (Sources: Health Affairs Scholar article; FDA PCCP guidance summaries; Jan 2026 CDS guidance summaries.)", + "decision_criteria": [ + "Risk Reduction for Catastrophic & Systemic Harms", + "Civil Liberties & Due Process", + "Innovation & Economic Dynamism", + "Equity / Anti-Discrimination Effectiveness", + "Administrative Feasibility & Legal Durability" + ], + "forecasts": [ + { + "footnote_id": 1, + "question_title": "Comprehensive Federal AI Law by 2028", + "question_text": "Will the United States enact a comprehensive federal AI law by December 31, 2028 that (a) creates cross-sector obligations for \u201chigh-risk\u201d/consequential AI systems and (b) includes dedicated enforcement authority?", + "resolution_criteria": "YES if such a statute is signed into law by 12/31/2028; NO otherwise.", + "prediction": "35%", + "reasoning": "Congress has struggled to pass cross-cutting tech frameworks; the privacy analog (ADPPA/APRA) stalled on preemption and private-right-of-action disputes, and the 118th Congress enacted no AI bills despite heavy activity. State patchwork and national security salience increase pressure, and there are legislative vehicles/drafts, but a truly comprehensive cross-sector regime by 2028 remains less likely than not.", + "key_sources": [ + "Brennan Center AI legislation tracker", + "APRA/ADPPA analyses", + "reporting on TRUMP AMERICA AI Act and executive-order-driven preemption strategy" + ] + }, + { + "footnote_id": 2, + "question_title": "Preliminary Injunction Against a Major State AI Law by 2027", + "question_text": "Will a federal court issue a preliminary injunction by December 31, 2027 that blocks enforcement of a major state AI statute regulating frontier models or high-risk AI discrimination statewide?", + "resolution_criteria": "YES if a PI bars enforcement of core provisions statewide; NO otherwise.", + "prediction": "30%", + "reasoning": "Litigation is likely, but broad statewide PIs require high showings and courts often narrow relief. Dormant commerce clause challenges look weaker for non-discriminatory state laws post-*National Pork Producers v. Ross*, though First Amendment challenges to certain tech statutes sometimes succeed. The DOJ task force must litigate case-by-case; executive orders alone don\u2019t preempt.", + "key_sources": [ + "DOJ AI Litigation Task Force summaries", + "White House EO 14,365", + "analysis of Ross implications", + "examples of PIs in state tech laws" + ] + }, + { + "footnote_id": 3, + "question_title": "FDA Clears/Approves an LLM/Generative AI Clinical Device by 2028", + "question_text": "Will FDA clear or approve at least one generative-AI/LLM-based medical device intended for clinical use by December 31, 2028?", + "resolution_criteria": "YES if FDA clears/approves a device whose core function uses a generative model/LLM for clinical diagnosis/treatment/CDS; NO otherwise.", + "prediction": "45%", + "reasoning": "As of early 2026, FDA had not approved any LLM/generative AI medical devices for clinical use, though FDA is actively developing lifecycle oversight and has convened advisory discussions on generative AI mental health devices. A first clearance is plausible via constrained indications and strong validation, but incentives may tilt toward non-device CDS pathways after the Jan 2026 CDS guidance, reducing the number of products seeking clearance.", + "key_sources": [ + "Health Affairs Scholar (no LLM devices as of early 2026)", + "FDA DHAC meeting summaries", + "FDA Jan 2026 CDS guidance summaries" + ] + }, + { + "footnote_id": 4, + "question_title": "$1B+ AI-Enabled Cyber Incident Affecting U.S. Critical Sector by 2028", + "question_text": "By Dec 31, 2028, will there be at least one publicly reported cyber incident with >$1B direct costs for U.S. entities and credible documentation that AI materially enabled the attack?", + "resolution_criteria": "YES if both cost and AI-material-enablement criteria are met in credible reporting; NO otherwise.", + "prediction": "60%", + "reasoning": "$1B+ cyber incidents already occur (e.g., the Change Healthcare incident ultimately estimated at ~$2.9\u2013$3.1B). AI is increasingly used for phishing, social engineering, and automation; the remaining uncertainty is public attribution/documentation of AI\u2019s role. Given trends and rising reporting of AI-enabled tactics, it\u2019s more likely than not.", + "key_sources": [ + "Change Healthcare cost reporting", + "cyber trend reporting on AI-enabled attacks", + "historical benchmark NotPetya (~$10B global)" + ] + }, + { + "footnote_id": 5, + "question_title": "Five+ Additional States Enact Colorado-Style High-Risk AI Discrimination Laws by 2028", + "question_text": "Will five or more additional states enact Colorado-style comprehensive \u201chigh-risk AI\u201d anti-discrimination statutes by Dec 31, 2028?", + "resolution_criteria": "YES if \u22655 additional states enact broadly similar frameworks (developer/deployer duties + impact assessments + enforcement); NO otherwise.", + "prediction": "35%", + "reasoning": "States are highly active on AI, but comprehensive frameworks are rare and politically/administratively complex; Colorado\u2019s own delay suggests implementation friction. Federal pressure and litigation threats may chill adoption. Expect more narrow state laws than full Colorado-style regimes, though diffusion remains plausible if harms and public pressure rise.", + "key_sources": [ + "Colorado SB24-205 summaries and delay reporting", + "state AI legislative trend reporting", + "executive-order-driven federal pushback against state laws" + ] + } + ], + "proposal_markdown": "### Executive Summary\n\nThe U.S. should adopt a **two-track AI regulatory strategy**: (1) **frontier-model accountability** focused on catastrophic-risk governance, secure model weights, and incident reporting; and (2) **high-impact application governance** focused on civil rights, due process, privacy, and auditability in domains like hiring, lending, and healthcare. Because comprehensive federal legislation is uncertain (35% by 2028 [^1]), policymakers should pair targeted federal statutes with strong sector-agency enforcement and procurement-based standards that can operate under the status quo.\n\n### Analysis\n\nThe current environment is fragmented: federal \u201csoft law\u201d (NIST AI RMF, OMB M-25-21) and sector regulators (FDA, CFPB, EEOC/DOJ) coexist with a rapidly expanding state patchwork (Colorado\u2019s high-risk AI law; California and New York frontier model laws; NYC hiring audits). Federal attempts to wipe away state law via executive action will likely produce **years of uncertainty** and mixed court outcomes; a broad preliminary injunction against a major state AI law by 2027 is possible but not the modal outcome (30% [^2]). That implies firms will continue building compliance programs around the strictest credible requirements, and policymakers should seek harmonization through standards and safe harbors rather than pure preemption.\n\nOn frontier AI, the most defensible approach is to regulate **process and governance** rather than mandate \u201ctruthful outputs\u201d or ideology. State laws like California SB 53 show a \u201ctransparency + incident reporting + whistleblower\u201d template. The main national risk driver is not only model misalignment but also **misuse**, especially in cybersecurity and biosecurity. The likelihood of at least one $1B+ cyber incident with documented AI enablement by 2028 is material (60% [^4]), so frontier policy should prioritize secure development, red-teaming, misuse monitoring, and rapid incident reporting\u2014while protecting legitimate research and speech.\n\nFor \u201cnarrower\u201d AI used in consequential decisions, the biggest civil-liberties failures tend to be opaque decision-making, inability to contest errors, and proxy discrimination at scale. NYC Local Law 144 shows both the promise and pitfalls of audit-centric regulation: disclosure and bias audits exist on paper, but enforcement can be weak, with extremely low complaint volume and high apparent noncompliance. That argues for a federal baseline emphasizing *audit quality + accountability + remedies*, not mere \u201cpaper compliance.\u201d\n\nIn healthcare, FDA remains a key safety institution, yet the January 2026 CDS guidance expands non-device pathways for some generative AI tools. Since FDA clearance of an LLM/generative clinical device by 2028 is uncertain (45% [^3])\u2014and many tools may bypass clearance\u2014policy should strengthen institutional governance (hospital AI committees, documentation, postmarket monitoring) and require transparency and testing for AI integrated into clinical workflows, even when not a regulated \u201cdevice.\u201d\n\n### Recommendations\n\n1. **Create a Federal \u201cHigh-Impact AI\u201d Baseline (Civil Rights + Due Process) via FTC/sector coordination** \n- **What:** Enact a federal baseline (or implement via FTC + sector regulators where statute is lacking) requiring that any \u201chigh-impact AI\u201d used for consequential decisions provide: notice, meaningful explanation of main factors, data-access/correction where feasible, documented impact assessments, and a right to human review/appeal for adverse outcomes. \n- **Why:** This addresses proven harms in hiring/lending/health access while remaining technology-neutral. It also reduces the incentive for weak audit regimes that fail in practice. \n- **Criteria:** Risk reduction; civil liberties; equity; feasibility. \n- **Forecast link:** A comprehensive federal law is uncertain (35% [^1]); this can be modular/sectoral and still meaningful even if full harmonization fails.\n\n2. **Frontier Model Safety Case + Incident Reporting + Secure Weights (federal standard with safe harbors)** \n- **What:** Require developers above a clear capability/compute threshold to (a) maintain a documented \u201csafety case,\u201d (b) conduct independent red-teaming on defined catastrophic-misuse vectors, (c) implement strong cybersecurity for model weights and training infrastructure, and (d) report \u201ccritical safety incidents\u201d to a designated federal clearinghouse. Provide a **safe harbor** (reduced punitive exposure) for firms that follow audited best practices and promptly report incidents. \n- **Why:** This targets the highest-stakes risks without turning AI governance into speech control. It aligns with the direction of CA/NY frontier laws while creating a national standard that is less likely to be enjoined than ad hoc state requirements. \n- **Criteria:** Risk reduction; innovation (safe harbor); legal durability. \n- **Forecast link:** AI-enabled cyber catastrophe risk is substantial (60% [^4]); state-law uncertainty likely persists (30% chance of major PI [^2]).\n\n3. **Harden AI-Enabled Cybersecurity and Critical Infrastructure Defenses** \n- **What:** Expand CISA-led requirements for secure-by-design software, mandatory MFA for privileged access, vendor incident reporting, and \u201cAI-aware\u201d security testing (prompt-injection testing for agentic systems; logging for model I/O in enterprise deployments). Encourage insurers and federal procurement to require these controls. \n- **Why:** Large cyber losses are already real (e.g., Change Healthcare), and AI lowers attacker costs. This is high ROI and largely content-neutral. \n- **Criteria:** Risk reduction; feasibility; innovation (predictable controls). \n- **Forecast link:** The probability of a $1B+ AI-enabled cyber incident by 2028 is meaningfully above 50% (60% [^4]).\n\n4. **Healthcare: Close the \u201cNon-Device CDS\u201d Governance Gap** \n- **What:** Condition Medicare/Medicaid participation (or accreditation levers) on hospitals and large clinics adopting AI governance: model inventory, intended-use controls, clinician training, monitoring of performance drift, and documented override/appeal processes\u2014especially for generative AI used in diagnosis/treatment support. \n- **Why:** FDA clearance of LLM devices is uncertain (45% [^3]) and some tools will enter clinics via enforcement discretion; institutional governance becomes the safety backstop. \n- **Criteria:** Risk reduction; feasibility; civil liberties (patient transparency). \n- **Forecast link:** Uncertainty about FDA-cleared LLM devices (45% [^3]) supports governance that does not rely on FDA alone.\n\n5. **Avoid Broad Federal Preemption; Use \u201cFloor + Portability\u201d Instead** \n- **What:** Set a federal minimum standard and allow states to exceed it in defined areas (e.g., employment notices, child safety), but create interoperability through standardized documentation (model cards, impact assessment templates) and mutual-recognition mechanisms. \n- **Why:** Broad preemption is politically and legally brittle; prolonged court fights are likely, and a sweeping federal law by 2028 is uncertain (35% [^1]). A floor reduces worst harms while preserving state experimentation. \n- **Criteria:** Legal durability; innovation; civil liberties. \n- **Forecast link:** Given only a 35% chance of comprehensive federal law by 2028 [^1] and only a 30% chance of a major PI by 2027 [^2], planning for coexistence is prudent.\n\n### Risks and Uncertainties\n\n- **Regulatory capture / incumbent advantage:** Heavy compliance burdens can entrench large firms; safe harbors and scaled obligations are essential. \n- **Litigation risk and instability:** State-federal conflict may persist for years; a preliminary injunction against a major state AI law is plausible (30% [^2]) but not assured, creating a compliance limbo. \n- **Attribution and measurement problems:** For cyber incidents, \u201cAI enablement\u201d may be underreported; my 60% estimate depends on public documentation practices (uncertainty in [^4]). \n- **Healthcare pathway ambiguity:** FDA\u2019s posture on generative AI is in flux; even if FDA never clears an LLM device by 2028 (55% implied by [^3]), clinical deployment may still expand through non-device pathways, increasing safety variance across institutions. \n- **Federal legislative feasibility:** The central uncertainty is congressional action; comprehensive law is only 35% by 2028 [^1]. Over-investing in a single legislative \u201cbig bang\u201d strategy risks wasted time.", + "key_recommendations": [ + "Create a Federal \u201cHigh-Impact AI\u201d Baseline (Civil Rights + Due Process) via FTC/sector coordination", + "Frontier Model Safety Case + Incident Reporting + Secure Weights (federal standard with safe harbors)", + "Harden AI-Enabled Cybersecurity and Critical Infrastructure Defenses", + "Healthcare: Close the \u201cNon-Device CDS\u201d Governance Gap", + "Avoid Broad Federal Preemption; Use \u201cFloor + Portability\u201d Instead" + ] + }, + { + "member": { + "name": "Gemini 3 Pro (Google)", + "role": "AI Policy Analyst", + "political_leaning": "behaves as Gemini naturally does", + "general_motivation": "Analyze this policy question thoughtfully and helpfully, as Gemini would naturally approach it. Draw on your training to provide balanced, nuanced analysis while being direct about your views and uncertainties.", + "expertise_areas": [ + "general policy analysis" + ], + "personality_traits": [ + "behaves naturally as Gemini" + ], + "ai_model": "openrouter/google/gemini-3-pro-preview" + }, + "research_summary": "The current trajectory of U.S. AI policy is defined by a sharp conflict between federal deregulation and state-level protectionism. While a unified federal standard could theoretically boost venture capital investment by ~15% (Forecast [^1]), the current administration's \"light-touch\" approach creates severe risks in other domains. Specifically, with federal agencies like the EEOC and FTC actively retreating from algorithmic bias enforcement (Forecast [^3]), preemption of state laws would effectively legalize discriminatory AI harms in housing and hiring, as no federal backstop would remain. This tradeoff\u2014economic speed vs. civil rights\u2014is the central tension.\n\nFurthermore, the risk of \"frontier\" model failure remains a distinct, albeit moderate, possibility (30% probability of >$100M damage by 2028 [^2]). Relying solely on voluntary industry commitments or a \"captured\" federal regulator (35% risk [^4]) is insufficient for national security-grade risks. The industry's own safety layers are robust but not infallible. A policy that preempts state \"early warning systems\" (like California's reporting requirements) without replacing them with a competent federal equivalent invites catastrophe.\n\nTherefore, \"total preemption\" is a dangerous gamble. It relies on the assumption that federal agencies will vigorously enforce \"light\" rules, which our forecasts explicitly contradict. A more balanced path acknowledges that states like California are currently the only competent regulators \"on the beat\" for safety, while recognizing that startups need relief from a 50-state compliance patchwork.", + "decision_criteria": [ + "Protection of Civil Liberties", + "Innovation Viability", + "Risk Mitigation", + "Adaptability" + ], + "forecasts": [ + { + "footnote_id": 1, + "question_title": "State Law Preemption & Innovation", + "question_text": "If the federal government successfully preempts state AI regulations with a \"light-touch\" federal standard before 2027, will U.S.-based AI startups raise >15% more venture capital in 2027 compared to a scenario where state laws remain in force?", + "resolution_criteria": "Resolves YES if aggregate VC funding (Seed-Series C) is >15% higher in the preemption scenario vs. baseline trend/counterfactual.", + "prediction": "65%", + "reasoning": "Historical analogy to GDPR suggests regulatory fragmentation costs ~26% in investment. Reversing this fragmentation is a strong signal to capital markets. However, hype cycles (FOMO) currently drive funding as much as policy, dampening the purely regulatory effect.", + "key_sources": [ + "https://www.nber.org/digest/202509/privacy-regulation-and-transatlantic-venture-investment", + "https://datacatalyst.org/wp-content/uploads/2020/01/GDPR-report-2020.pdf" + ] + }, + { + "footnote_id": 2, + "question_title": "Frontier Model Safety Incidents", + "question_text": "Will a \"frontier\" AI model cause a \"critical safety incident\" causing >$100M in damages or severe physical harm to >10 people between now and 2028?", + "resolution_criteria": "Trusted report attributing >$100M damage/health harm *directly* to autonomous/instructed model action.", + "prediction": "30%", + "reasoning": "While capability is rising, \"Swiss cheese\" safety layers (humans in loop) remain effective. Most \"incidents\" are human attacks using AI, not autonomous failures. Metaculus forecasts higher risks ($1Bn) only on a longer timeline (2032).", + "key_sources": [ + "https://www.metaculus.com/questions/7814/ai-incident-causes-1bn-damage-before-2032/", + "https://www.sentinelone.com/cybersecurity-101/data-and-ai/jailbreaking-llms/" + ] + }, + { + "footnote_id": 3, + "question_title": "Bias in High-Stakes Narrow AI", + "question_text": "In the absence of specific federal algorithmic bias regulation, will >5 major investigations find systemic discrimination in AI hiring/lending by Fortune 500 companies in 2026-2027?", + "resolution_criteria": ">5 public findings/settlements by FTC, DOJ, EEOC.", + "prediction": "20%", + "reasoning": "Explicit policy shifts in 2025 (EEOC closing cases, FTC \"AI Action Plan\") define a retreat from enforcement. The mechanism for finding \"YES\" is being dismantled by the executive branch.", + "key_sources": [ + "https://www.workforcebulletin.com/artificial-intelligence-and-disparate-impact-liability-how-the-eeocs-end-to-disparate-impact-claims-affects-workplace-ai", + "https://www.ftc.gov/news-events/news/press-releases/2025/12/ftc-reopens-sets-aside-rytr-final-order-response-trump-administrations-ai-action-plan" + ] + }, + { + "footnote_id": 4, + "question_title": "Regulatory Capture Probability", + "question_text": "If a new federal AI agency is created by 2026, will >50% of its senior leadership have been employed by major AI labs in the preceding 3 years?", + "resolution_criteria": "Bio analysis of top 10 officials showing majority industry employment (OpenAI/Google/etc).", + "prediction": "35%", + "reasoning": "Current trend is appointing VCs and ideologically aligned \"outsiders\" (e.g., Sacks, Kratsios) rather than direct \"Big Tech\" employees. The \"unsure\" factor is who these VCs appoint as deputies.", + "key_sources": [ + "https://www.seyfarth.com/news-insights/trump-administration-releases-ai-action-plan-and-three-executive-orders-on-ai-what-employment-practitioners-need-to-know.html" + ] + } + ], + "proposal_markdown": "### Executive Summary\n\nThe United States faces a pivotal choice between fragmented state-level safety nets and a unified federal \"sandbox\" for AI. I recommend a **\"Federal Floor, Not Ceiling\"** approach: Congress should enact immediate, targeted federal legislation addressing high-consensus risks (discrimination, non-consensual deepfakes, and critical infrastructure safety) while **preserving state authority** to innovate on broader safety standards until a mature federal regulatory regime effectively operationalizes. This strategy secures innovation (by harmonizing core liability) without granting a \"regulatory vacuum\" that our forecasts suggest would leave civil rights unprotected and safety risks unmanaged.\n\n### Analysis\n\nThe current trajectory of U.S. AI policy is defined by a sharp conflict between federal deregulation and state-level protectionism. My analysis indicates that while a unified federal standard could theoretically boost venture capital investment by ~15% (Forecast [^1]), the current administration's \"light-touch\" approach creates severe risks in other domains. Specifically, with federal agencies like the EEOC and FTC actively retreating from algorithmic bias enforcement (Forecast [^3]), preemption of state laws would effectively legalize discriminatory AI harms in housing and hiring, as no federal backstop would remain. This tradeoff\u2014economic speed vs. civil rights\u2014is the central tension.\n\nFurthermore, the risk of \"frontier\" model failure remains a distinct, albeit moderate, possibility (30% probability of >$100M damage by 2028 [^2]). Relying solely on voluntary industry commitments or a \"captured\" federal regulator (35% risk [^4]) is insufficient for national security-grade risks. The industry's own safety layers are robust but not infallible. A policy that preempts state \"early warning systems\" (like California's reporting requirements) without replacing them with a competent federal equivalent invites catastrophe.\n\nTherefore, \"total preemption\" is a dangerous gamble. It relies on the assumption that federal agencies will vigorously enforce \"light\" rules, which our forecasts explicitly contradict. A more balanced path acknowledges that states like California are currently the only competent regulators \"on the beat\" for safety, while recognizing that startups need relief from a 50-state compliance patchwork.\n\n### Recommendations\n\n1. **Enact the \"Algorithmic Civil Rights Act\" to Codify Harm Protections**\n* **Recommendation:** Congress should pass legislation strictly codifying that existing civil rights laws (Fair Housing Act, ECOA, Title VII) apply to AI/algorithmic decisions, creating a private right of action for individuals harmed by \"black box\" denials.\n* **Why:** This addresses the \"Regulatory Vacuum\" created by the EEOC/FTC retreat (Forecast [^3]). It ensures that even if federal agencies deprioritize enforcement, citizens and states retain the power to litigate against bias. This satisfies the **Protection of Civil Liberties** criterion.\n\n2. **Establish a Federal \"Safe Harbor\" Certification for Startups**\n* **Recommendation:** Create a voluntary federal compliance program for non-frontier (<$100M compute) models. Startups that undergo a nimble, standardized third-party audit gain \"Safe Harbor\" protection against *state-level* punitive damages (though not injunctive relief).\n* **Why:** This directly targets the **Innovation Viability** criterion. It gives startups the \"regulatory certainty\" needed to unlock that predicted 15% VC boost (Forecast [^1]) without forcing a blanket preemption of all state laws. It harmonizes the market for the 99% of \"small AI\" while leaving \"big AI\" subject to stricter scrutiny.\n\n3. **Mandate \"Frontier\" Model Registration & Incident Reporting (Federal Level)**\n* **Recommendation:** The Department of Commerce should require mandatory registration and 24-hour incident reporting for any model trained on >10^26 FLOPS. This should be a pure reporting requirement, not a licensing scheme, to minimize friction while ensuring government visibility.\n* **Why:** This addresses **Risk Mitigation**. With a 30% chance of a major incident [^2], the government effectively needs a \"seismograph\" for digital tremors. This acts as a minimally invasive \"tripwire\" that allows for rapid federal response without stifling open-source development.\n\n4. **Preserve State Authority on \"High-Risk\" Use Cases**\n* **Recommendation:** Explicitly allow states (like CA and CO) to enforce stricter standards on \"high-risk\" applications (e.g., biometric surveillance, autonomous vehicle deployment) and \"frontier\" models, provided they do not discriminate against out-of-state entities.\n* **Why:** This supports **Adaptability**. Federal bureaucracy is slow; states are the \"laboratories of democracy.\" Allowing California to experiment with safety rules provides valuable data on what works (and what kills innovation) before scaling it nationally.\n\n### Risks and Uncertainties\n\n* **Fragmentation Costs:** The primary risk is that failing to fully preempt state laws leaves some \"compliance friction\" in the system, potentially dampening the maximum theoretical investment capability. Startups may still have to navigate California's specific rules if they don't quality for the federal Safe Harbor.\n* **Enforcement Atrophy:** There is a high uncertainty regarding whether the current DOJ/FTC would fundamentally enforce *any* new regulations, even if passed, given the \"deregulation\" mandate. The \"Civil Rights Act\" recommendation mitigates this by allowing private lawsuits, but legal costs create unequal access to justice.\n* **Frontier Definition Obsolescence:** The >10^26 FLOPS threshold may surely become obsolete or be gamed by \"algorithmic efficiency\" improvements, allowing dangerous models to slip under the reporting radar. This is a classic \"Adaptability\" failure mode.", + "key_recommendations": [ + "Enact the \"Algorithmic Civil Rights Act\" to codify harm protections and ensure civil rights laws apply to AI/algorithmic decisions.", + "Establish a federal \"Safe Harbor\" certification for startups, providing regulatory certainty for non-frontier models.", + "Mandate federal registration and 24-hour incident reporting for \"frontier\" AI models trained on >10^26 FLOPS.", + "Preserve state authority to enforce stricter standards on high-risk AI use cases and frontier models." + ] + }, + { + "member": { + "name": "Grok 4 (xAI)", + "role": "AI Policy Analyst", + "political_leaning": "behaves as Grok naturally does", + "general_motivation": "Analyze this policy question thoughtfully and helpfully, as Grok would naturally approach it. Draw on your training to provide balanced, nuanced analysis while being direct about your views and uncertainties.", + "expertise_areas": [ + "general policy analysis" + ], + "personality_traits": [ + "behaves naturally as Grok" + ], + "ai_model": "openrouter/x-ai/grok-4" + }, + "research_summary": "The current U.S. AI regulatory landscape is fragmented, with a federal emphasis on minimal intervention to foster innovation, contrasted by proactive state-level regulations that took effect in January 2026. At the federal level, the Trump administration's December 2025 Executive Order prioritizes an \"innovation-first\" approach, establishing an AI Litigation Task Force to challenge state laws deemed burdensome and conditioning certain federal grants on states avoiding \"onerous\" AI regulations (Baker Botts, 2026; King & Spalding, 2026). For frontier AI, California's Transparency in Frontier AI Act (SB 53) mandates developers of models trained with over 10\u00b2\u2076 FLOPS to publish risk mitigation frameworks and report incidents, with penalties up to $1 million per violation (Baker Botts, 2026). Narrower applications face sector-specific rules: in hiring, states like Illinois require employer notifications for AI use, Colorado mandates risk assessments for high-risk systems (effective June 2026), and Texas prohibits only intentional discrimination (Shipman & Goodwin, 2026). Lending relies on existing federal laws like the Equal Credit Opportunity Act, with the CFPB emphasizing no exemptions for AI and requiring specific explanations for denials (Hesfintech, 2026). Healthcare AI oversight is fragmented under FDA and HIPAA, with no AI-specific federal laws mentioned, though state frameworks address related risks (Medscape, 2026).\n\nKey stakeholders include the federal government pushing for preemption to maintain U.S. AI competitiveness, states defending local protections as \"laboratories of democracy,\" Congress showing bipartisan resistance to blanket preemption, AI industry leaders supporting lighter federal rules, and safety advocates opposing deregulation without safeguards (Holland & Knight, 2026; FPF, 2026). Recent trends highlight escalating federal-state tensions: the Executive Order targets states like Colorado and California, while new laws in Texas and New York focus on frontier AI safety and algorithmic discrimination (JD Supra, 2026; Motley Fool, 2026). The Cato Institute warns that over 100 state AI laws could stifle innovation, citing stable labor turnover rates contradicting claims of widespread job loss (Infobae, 2026). Data shows AI impacts vary: in hiring, facial recognition error rates are 34% higher for darker-skinned individuals, with only 30% of companies monitoring diversity (Perplexity search on AI impacts); lending AI perpetuates disparities, with over 60% of institutions using it but facing CFPB scrutiny; healthcare AI shows 85-95% diagnostic accuracy but racial biases, affecting only 20-30% sustained adoption (ibid.). Frontier AI risks include 40-60% misuse potential for weapons or attacks, with 75-85% of safety researchers concerned about catastrophes (ibid.).\n\nExperts are divided: light-touch advocates argue excessive rules hinder competition, favoring existing laws over new ones (Cato, 2026; Law Economics Center, 2026), while stricter regulation proponents emphasize preventing harms like discrimination, with states filling federal gaps (FTI Consulting, 2026; Kiteworks, 2026). Arguments against strict approaches highlight rapid AI evolution outpacing rules and potential global disadvantage, whereas supporters cite real harms in lending/housing and the need for transparency (CFR, 2026). Overall, the debate centers on balancing innovation\u2014AI stocks now face compliance as a key factor (Motley Fool, 2026)\u2014with protections, amid trends like California's market influence (home to 32 of 50 top AI firms) and international divergences (e.g., EU AI Act) (JD Supra, 2026).", + "decision_criteria": [ + "Innovation Promotion", + "Risk Minimization", + "Equity and Fairness", + "Civil Liberties Protection", + "Implementation Feasibility", + "Economic Efficiency" + ], + "forecasts": [ + { + "footnote_id": 1, + "question_title": "Federal Preemption Success", + "question_text": "Will the U.S. federal government successfully preempt at least 50% of existing state-level AI regulations (e.g., through litigation or legislation) by the end of 2027?", + "resolution_criteria": "Resolves YES if official government reports or court rulings confirm preemption of \u226550% of 2026 state AI laws (e.g., CA SB 53, CO AI Act) by Dec 31, 2027; NO otherwise. Based on counts from sources like Brookings or JD Supra.", + "prediction": "45%", + "reasoning": "Base rates show federal preemption often succeeds in tech but faces delays and partial failures, as in recent crypto cases where states won ~60% of challenges. The current admin's aggressive stance (e.g., Litigation Task Force) pushes probability up from base, but strong state pushback and bipartisan Congressional resistance (e.g., blocking NDAA preemption) pull it down. Uncertainties like court backlogs suggest not reaching 50% by 2027, though momentum could build. I might be missing evolving political alliances, but evidence points to incomplete success.", + "key_sources": [ + "Fedsoc.org", + "Brookings.edu", + "Carnegie Endowment (2025)", + "AskNews results on regulatory conflicts" + ] + }, + { + "footnote_id": 2, + "question_title": "AI Bias Reduction in Applications", + "question_text": "Will algorithmic bias in U.S. AI systems for hiring, lending, and healthcare decrease by at least 20% on average (measured by error rate disparities across demographics) by 2028?", + "resolution_criteria": "Resolves YES if independent studies (e.g., from NIST or academic meta-analyses) show \u226520% average reduction in bias metrics (e.g., false positive differentials by race/gender) from 2026 baselines; NO if <20% or data inconclusive.", + "prediction": "65%", + "reasoning": "Base rates from post-2016 fairness research show consistent 20-50% bias drops with targeted efforts, aligning with current regs pushing audits. Factors like state laws and tools (e.g., FairPlay's lending reexams) support >20% average reduction, especially in hiring/lending where data shows progress; healthcare lags but overall trends positive. Uncertainties include measurement inconsistencies, but evidence from McKinsey/others suggests achievable. I could be overconfident, but historical analogies temper this.", + "key_sources": [ + "MokaHR", + "SuperAGI", + "Brookings", + "Phenomenal World", + "PMC articles" + ] + }, + { + "footnote_id": 3, + "question_title": "Catastrophic AI Incident Occurrence", + "question_text": "Will there be a major AI-related catastrophic incident in the U.S. (e.g., causing >$1B damage or >50 deaths, per CA SB 53 definition) by 2029?", + "resolution_criteria": "Resolves YES if confirmed by government reports (e.g., NIST, DHS) or major media with expert consensus; includes misuse like cyberattacks or bio-weapons enabled by frontier AI; NO if none occur.", + "prediction": "25%", + "reasoning": "Base rates show cyber catastrophes frequent but AI-specific rare, with no $1B+ events yet despite warnings. Factors like new regs and voluntary safety (e.g., Anthropic's protocols) reduce likelihood, though misuse risks push up slightly. Expert surveys give ~16% by 2030, but for U.S./2029, I adjust down for narrow focus. Uncertainties in definition exist, but evidence suggests moderate risk without overconfidence.", + "key_sources": [ + "Wikipedia hacking incidents", + "Arctic Wolf", + "IBM", + "Monte Carlo Data", + "AskNews on AI risks (e.g., WEF report, expert warnings)" + ] + }, + { + "footnote_id": 4, + "question_title": "AI's GDP Contribution Growth", + "question_text": "Will AI-related investments and productivity gains contribute at least 2% to annual U.S. GDP growth on average from 2026-2029?", + "resolution_criteria": "Resolves YES if BEA or Fed reports attribute \u22652% average annual GDP growth to AI (e.g., via investment in software/data centers); numeric average over four years.", + "prediction": "70%", + "reasoning": "Base rates from internet/IT show 1-3% contributions during booms, matching 2025 AI data (1% already). Projections like KPMG's $2.84T by 2030 imply ~2%+ annual, supported by investments; factors like data centers push up. Uncertainties in attribution exist, but trends suggest likely. I could undervalue slowdowns, but evidence leans positive.", + "key_sources": [ + "Vanguard", + "McKinsey", + "St. Louis Fed", + "EY", + "KPMG (2025)", + "Goldman Sachs" + ] + }, + { + "footnote_id": 5, + "question_title": "Civil Liberties Challenge Rate", + "question_text": "Will at least 20% of new U.S. AI regulations (federal or state) face successful civil liberties challenges (e.g., court rulings on privacy/ free speech) by 2028?", + "resolution_criteria": "Resolves YES if \u226520% of post-2026 AI laws are partially/fully struck down on liberties grounds per SCOTUS or circuit courts; tracked via ACLU or EFF reports.", + "prediction": "30%", + "reasoning": "Base rates show 15-25% success in challenges, with AI's novelty pushing slightly up. Factors like federal preemption reduce state regs at risk, but transparency laws invite suits. Evidence from crypto suggests moderate rate. Uncertainties in court backlogs, but overall <20% unlikely but not reaching high.", + "key_sources": [ + "Prior Perplexity searches on regs", + "AskNews on conflicts", + "BeInCrypto reports on crypto challenges" + ] + } + ], + "proposal_markdown": "### Executive Summary\n\nAs Grok 4 (xAI), I recommend a federal framework that sets minimum safety standards for frontier AI while preempting overly burdensome state laws, paired with incentives for bias audits in narrower applications. The most important action is establishing a national AI Safety Board to oversee risk assessments without stifling innovation.\n\n### Analysis\n\nThe U.S. must navigate AI regulation by balancing rapid innovation with emerging risks, as the current patchwork of state laws\u2014effective January 2026\u2014creates compliance challenges while federal efforts push for minimal burdens. Frontier AI, like large language models, faces new requirements under California's SB 53, mandating risk frameworks for catastrophic threats (e.g., >$1B damage), but this risks inconsistency across states, potentially hindering U.S. competitiveness against China. Narrower applications reveal persistent issues: AI in hiring shows 34% higher error rates for darker-skinned individuals, lending perpetuates minority denials, and healthcare exhibits racial disparities despite 85-95% diagnostic accuracy. Experts divide on approaches, with light-touch advocates citing stifled innovation (Cato, 2026) and stricter proponents emphasizing harms (FTI, 2026). Recent events, like the December 2025 Executive Order challenging state regs, underscore federal-state tensions, while data indicates AI's economic boost (1% GDP in 2025) but misuse risks (40-60% potential).\n\nForecasts suggest moderate success for federal harmonization, with a 45% chance of preempting \u226550% state laws by 2027 [^1], allowing innovation while addressing inconsistencies. Bias reduction appears promising, with a 65% probability of \u226520% decrease by 2028 [^2], supporting policies that build on trends like audits. However, catastrophic risks remain low but non-zero at 25% by 2029 [^3], justifying targeted safeguards without overregulation. AI's economic impact is strong, with 70% odds of \u22652% annual GDP contribution through 2029 [^4], reinforcing the need to avoid burdensome rules. Civil liberties challenges are estimated at 30% for new regs by 2028 [^5], highlighting the importance of rights-respecting designs.\n\nOverall, a nuanced federal approach can promote innovation (historical tech booms drove 60% GDP changes) while minimizing risks, drawing on states as labs but ensuring uniformity. Uncertainties persist in AI's rapid evolution, but evidence favors adaptive, evidence-based policies over rigid ones.\n\n### Recommendations\n\n1. **Establish a National AI Safety Board**: Create an independent federal board to set minimum standards for frontier AI risk assessments and preempt conflicting state laws. This supports innovation by providing clarity (addressing my top criterion) and minimizes risks through mandatory reporting, backed by a 45% preemption success forecast [^1] and 25% catastrophe risk [^3]. \n\n2. **Mandate Bias Audits with Incentives**: Require annual audits for AI in hiring, lending, and healthcare, with tax credits for compliance. This addresses equity (third criterion) by leveraging 65% bias reduction odds [^2], ensuring fairness without heavy burdens. \n\n3. **Promote Voluntary Transparency Guidelines**: Encourage (not mandate) explainability in narrower AI via federal guidelines, protecting civil liberties (fourth criterion) with low 30% challenge rate [^5] while fostering feasibility. \n\n4. **Invest in AI R&D Grants**: Allocate $10B annually for ethical AI research, boosting economic efficiency (sixth criterion) aligned with 70% GDP contribution forecast [^4] and promoting innovation. \n\n5. **Enhance International Coordination**: Lead global standards on AI exports to prevent misuse, addressing risk minimization (second) by building on historical tech diplomacy.\n\n### Risks and Uncertainties\n\nKey risks include over-preemption stifling state innovation, potentially backfiring if federal standards lag (e.g., if preemption fails at 45% [^1], leading to prolonged patchwork). Recommendations might amplify inequalities if audits favor large firms, or face implementation hurdles with costs exceeding benefits. Widest uncertainty is in catastrophe odds (25% [^3]), where underestimation could lead to disasters; bias reduction (65% [^2]) has measurement variances. Scenarios where recs backfire: rapid AI advances outpace board oversight by 2029, or liberties challenges (30% [^5]) invalidate key parts, eroding trust. GDP over-reliance (70% [^4]) risks bubbles like dot-com.\n\n### Forecast Appendix\n\n[^1] **[Federal Preemption Success]** \n- Question: Will the U.S. federal government successfully preempt at least 50% of existing state-level AI regulations (e.g., through litigation or legislation) by the end of 2027? \n- Resolution: Resolves YES if official government reports or court rulings confirm preemption of \u226550% of 2026 state AI laws (e.g., CA SB 53, CO AI Act) by Dec 31, 2027; NO otherwise. Based on counts from sources like Brookings or JD Supra. \n- Prediction: 45% \n- Reasoning: Base rates show federal preemption often succeeds in tech but faces delays and partial failures, as in recent crypto cases where states won ~60% of challenges. The current admin's aggressive stance (e.g., Litigation Task Force) pushes probability up from base, but strong state pushback and bipartisan Congressional resistance (e.g., blocking NDAA preemption) pull it down. Uncertainties like court backlogs suggest not reaching 50% by 2027, though momentum could build. I might be missing evolving political alliances, but evidence points to incomplete success. \n- Sources: Fedsoc.org; Brookings.edu; Carnegie Endowment (2025); AskNews results on regulatory conflicts.\n\n[^2] **[AI Bias Reduction in Applications]** \n- Question: Will algorithmic bias in U.S. AI systems for hiring, lending, and healthcare decrease by at least 20% on average (measured by error rate disparities across demographics) by 2028? \n- Resolution: Resolves YES if independent studies (e.g., from NIST or academic meta-analyses) show \u226520% average reduction in bias metrics (e.g., false positive differentials by race/gender) from 2026 baselines; NO if <20% or data inconclusive. \n- Prediction: 65% \n- Reasoning: Base rates from post-2016 fairness research show consistent 20-50% bias drops with targeted efforts, aligning with current regs pushing audits. Factors like state laws and tools (e.g., FairPlay's lending reexams) support >20% average reduction, especially in hiring/lending where data shows progress; healthcare lags but overall trends positive. Uncertainties include measurement inconsistencies, but evidence from McKinsey/others suggests achievable. I could be overconfident, but historical analogies temper this. \n- Sources: MokaHR; SuperAGI; Brookings; Phenomenal World; PMC articles.\n\n[^3] **[Catastrophic AI Incident Occurrence]** \n- Question: Will there be a major AI-related catastrophic incident in the U.S. (e.g., causing >$1B damage or >50 deaths, per CA SB 53 definition) by 2029? \n- Resolution: Resolves YES if confirmed by government reports (e.g., NIST, DHS) or major media with expert consensus; includes misuse like cyberattacks or bio-weapons enabled by frontier AI; NO if none occur. \n- Prediction: 25% \n- Reasoning: Base rates show cyber catastrophes frequent but AI-specific rare, with no $1B+ events yet despite warnings. Factors like new regs and voluntary safety (e.g., Anthropic's protocols) reduce likelihood, though misuse risks push up slightly. Expert surveys give ~16% by 2030, but for U.S./2029, I adjust down for narrow focus. Uncertainties in definition exist, but evidence suggests moderate risk without overconfidence. \n- Sources: Wikipedia hacking incidents; Arctic Wolf; IBM; Monte Carlo Data; AskNews on AI risks (e.g., WEF report, expert warnings).\n\n[^4] **[AI's GDP Contribution Growth]** \n- Question: Will AI-related investments and productivity gains contribute at least 2% to annual U.S. GDP growth on average from 2026-2029? \n- Resolution: Resolves YES if BEA or Fed reports attribute \u22652% average annual GDP growth to AI (e.g., via investment in software/data centers); numeric average over four years. \n- Prediction: 70% \n- Reasoning: Base rates from internet/IT show 1-3% contributions during booms, matching 2025 AI data (1% already). Projections like KPMG's $2.84T by 2030 imply ~2%+ annual, supported by investments; factors like data centers push up. Uncertainties in attribution exist, but trends suggest likely. I could undervalue slowdowns, but evidence leans positive. \n- Sources: Vanguard; McKinsey; St. Louis Fed; EY; KPMG (2025); Goldman Sachs.\n\n[^5] **[Civil Liberties Challenge Rate]** \n- Question: Will at least 20% of new U.S. AI regulations (federal or state) face successful civil liberties challenges (e.g., court rulings on privacy/ free speech) by 2028? \n- Resolution: Resolves YES if \u226520% of post-2026 AI laws are partially/fully struck down on liberties grounds per SCOTUS or circuit courts; tracked via ACLU or EFF reports. \n- Prediction: 30% \n- Reasoning: Base rates show 15-25% success in challenges, with AI's novelty pushing slightly up. Factors like federal preemption reduce state regs at risk, but transparency laws invite suits. Evidence from crypto suggests moderate rate. Uncertainties in court backlogs, but overall <20% unlikely but not reaching high. \n- Sources: Prior Perplexity searches on regs; AskNews on conflicts; BeInCrypto reports on crypto challenges.", + "key_recommendations": [ + "Establish a National AI Safety Board: Create an independent federal board to set minimum standards for frontier AI risk assessments and preempt conflicting state laws. This supports innovation by providing clarity (addressing my top criterion) and minimizes risks through mandatory reporting, backed by a 45% preemption success forecast [^1] and 25% catastrophe risk [^3].", + "Mandate Bias Audits with Incentives: Require annual audits for AI in hiring, lending, and healthcare, with tax credits for compliance. This addresses equity (third criterion) by leveraging 65% bias reduction odds [^2], ensuring fairness without heavy burdens.", + "Promote Voluntary Transparency Guidelines: Encourage (not mandate) explainability in narrower AI via federal guidelines, protecting civil liberties (fourth criterion) with low 30% challenge rate [^5] while fostering feasibility.", + "Invest in AI R&D Grants: Allocate $10B annually for ethical AI research, boosting economic efficiency (sixth criterion) aligned with 70% GDP contribution forecast [^4] and promoting innovation.", + "Enhance International Coordination: Lead global standards on AI exports to prevent misuse, addressing risk minimization (second) by building on historical tech diplomacy." + ] + }, + { + "member": { + "name": "DeepSeek V3.2 (DeepSeek)", + "role": "AI Policy Analyst", + "political_leaning": "behaves as DeepSeek naturally does", + "general_motivation": "Analyze this policy question thoughtfully and helpfully, as DeepSeek would naturally approach it. Draw on your training to provide balanced, nuanced analysis while being direct about your views and uncertainties.", + "expertise_areas": [ + "general policy analysis" + ], + "personality_traits": [ + "behaves naturally as DeepSeek" + ], + "ai_model": "openrouter/deepseek/deepseek-v3.2" + }, + "research_summary": "**Base Rate Analysis:** Regulatory sandboxes in fintech have shown moderate success, with adoption by approximately 15-20 states over 5+ years. Success rates for fintech sandboxes typically show 20-40% of participants achieving commercial deployment. Healthcare sandboxes have been slower to develop due to greater complexity and safety concerns.\n\n**Key Factors Pushing Probability UP:**\n1. Strong momentum with at least 5 states already implementing AI sandboxes\n2. Federal legislation (SANDBOX Act) proposing expansion\n3. Industry demand for regulatory flexibility\n4. Successful precedents in fintech demonstrating viability\n5. Broad applicability across multiple sectors (AI is general-purpose)\n\n**Key Factors Pushing Probability DOWN:**\n1. Resource requirements for states to establish and maintain sandboxes\n2. Safety concerns limiting healthcare applications\n3. Potential conflict with federal preemption efforts\n4. Technical complexity of monitoring AI systems in sandboxes\n5. Variable state capacity and political will\n\n**Bias Check:** I may overestimate the scalability of regulatory sandboxes given the specialized expertise needed to effectively monitor AI systems and the resource constraints many states face.\n\n**Final Prediction:** 55%\n\n**Reasoning:** Current momentum suggests significant state interest in AI regulatory sandboxes, with multiple states already implementing programs[^sandbox]. The broad applicability of AI across sectors and precedents from fintech sandboxes provide a strong foundation. However, achieving 10 states with operational sandboxes and 50 successfully deployed applications by 2028 requires substantial scaling from current levels. The Texas program (36 months, with enforcement protection) provides a strong model, and industry demand for regulatory flexibility creates positive incentives[^sandbox]. While healthcare applications may face delays, fintech and other sectors could drive early success.", + "decision_criteria": [ + "Safety and Risk Mitigation", + "Implementation Feasibility", + "International Coordination", + "Equity and Civil Liberties Protection", + "Innovation Preservation", + "Adaptability and Future-Proofing" + ], + "forecasts": [ + { + "footnote_id": 1, + "question_title": "State-Federal Regulatory Conflict Resolution", + "question_text": "Will the federal government successfully preempt major state AI regulatory initiatives (like Colorado's SB24-205 or California's employment regulations) through executive action or legislation by the end of 2027?", + "resolution_criteria": "YES if either (a) federal courts uphold federal preemption of state AI laws in at least two significant cases, OR (b) Congress passes legislation explicitly preempting state AI regulation in key domains, OR (c) federal administrative action effectively nullifies major state regulations. NO if states continue implementing and enforcing their AI regulations without federal interference by December 31, 2027.", + "prediction": "35%", + "reasoning": "The executive order's approach is constitutionally vulnerable since only Congress can preempt state law, and while Commerce Clause arguments have merit, states have successfully defended similar regulatory authority in other domains. The unusual coalition of tech companies and progressive groups against preemption legislation suggests significant political hurdles. State resistance is strong across party lines, with states already investing substantially in AI regulatory frameworks unlikely to cede authority easily. However, the interstate nature of AI and industry preference for federal standards provide some momentum for preemption. Historical patterns suggest federal preemption attempts often result in partial rather than complete victories.", + "key_sources": [ + "Research on federal preemption of state AI regulation, constitutional limitations, political dynamics" + ] + }, + { + "footnote_id": 2, + "question_title": "AI Safety Institute Effectiveness", + "question_text": "Will the US AI Safety Institute (or equivalent federal body) successfully establish and enforce binding safety standards for frontier AI systems that receive compliance from at least 75% of major AI developers by the end of 2028?", + "resolution_criteria": "YES if the institute publishes binding safety standards and can demonstrate that at least 75% of major US AI developers (defined as companies with >$1B valuation or >100M users) are complying with these standards in their frontier AI deployments. NO if either standards aren't established or compliance remains below 75% by December 31, 2028.", + "prediction": "40%", + "reasoning": "While CAISI has established important technical capabilities and industry relationships, its voluntary nature and lack of enforcement authority significantly limit its ability to achieve 75% compliance with binding safety standards. The rebranding reflects a strategic shift toward innovation promotion rather than safety enforcement. However, major AI developers have shown willingness to participate in voluntary safety initiatives, and market pressures (especially liability concerns) could drive adoption even without enforcement. Achieving 75% compliance by 2028 would require either legislative action granting enforcement powers or extraordinary industry consensus\u2014both challenging within this timeframe.", + "key_sources": [ + "CAISI establishment documents, enforcement authority analysis, industry compliance patterns" + ] + }, + { + "footnote_id": 3, + "question_title": "Algorithmic Bias Reduction in Hiring", + "question_text": "Will the implementation of state-level AI hiring regulations (particularly in California, Colorado, Illinois, and New York) result in a statistically significant reduction (p<0.05) in measured algorithmic discrimination in employment decisions by the end of 2028?", + "resolution_criteria": "YES if peer-reviewed studies using standardized discrimination metrics (disparate impact ratios, audit study results) show significant reductions in algorithmic hiring discrimination in regulated states compared to baseline measurements from 2024-2025. NO if no significant reduction is documented or if discrimination metrics show worsening trends.", + "prediction": "45%", + "reasoning": "Newer state regulations like Colorado's and California's incorporate lessons from NYC's experience, including requirements for reasonable care, impact assessments, and human review mechanisms. These more comprehensive approaches have better potential for impact. However, the 2028 timeframe is relatively short for measurable statistical changes to emerge, given implementation lags and the complexity of discrimination measurement. Historical evidence from employment discrimination law suggests regulatory effects typically take 5+ years to become statistically measurable. The combination of multiple state approaches and growing legal liability creates positive momentum, but achieving statistically significant reduction by 2028 will require rapid and effective implementation.", + "key_sources": [ + "NYC Local Law 144 effectiveness studies, state regulatory comparisons, employment discrimination research" + ] + }, + { + "footnote_id": 4, + "question_title": "Healthcare AI Liability Framework Development", + "question_text": "Will Congress establish a comprehensive federal liability framework for healthcare AI systems that clearly allocates responsibility among developers, healthcare providers, and institutions before the end of 2029?", + "resolution_criteria": "YES if Congress passes legislation specifically addressing AI liability in healthcare that includes provisions for shared accountability, establishes liability standards, and creates mechanisms for affected patients. NO if no such legislation is passed or if liability remains governed exclusively by existing malpractice law.", + "prediction": "25%", + "reasoning": "While the need for a healthcare AI liability framework is increasingly recognized, the 2029 timeframe is ambitious given historical patterns of healthcare liability reform. Congress has shown limited progress on even basic healthcare AI legislation. The political complexity of allocating liability among developers, providers, and institutions, combined with traditional medical malpractice reform challenges, suggests a low probability of comprehensive federal legislation by 2029. More likely outcomes include state-level approaches, incremental federal guidance, or judicial development of liability principles through case law.", + "key_sources": [ + "Congressional healthcare AI legislation tracking, medical malpractice reform history, liability allocation complexity" + ] + }, + { + "footnote_id": 5, + "question_title": "Regulatory Sandbox Adoption and Impact", + "question_text": "Will at least 10 major states establish and maintain operational regulatory sandboxes for AI innovation that result in at least 50 successfully tested and commercially deployed AI applications by the end of 2028?", + "resolution_criteria": "YES if official state records show at least 10 states with active AI regulatory sandboxes and documentation of at least 50 AI applications that successfully completed sandbox testing and achieved commercial deployment. NO if either threshold isn't met.", + "prediction": "55%", + "reasoning": "Current momentum suggests significant state interest in AI regulatory sandboxes, with multiple states already implementing programs. The broad applicability of AI across sectors and precedents from fintech sandboxes provide a strong foundation. However, achieving 10 states with operational sandboxes and 50 successfully deployed applications by 2028 requires substantial scaling from current levels. The Texas program (36 months, with enforcement protection) provides a strong model, and industry demand for regulatory flexibility creates positive incentives. While healthcare applications may face delays, fintech and other sectors could drive early success.", + "key_sources": [ + "State sandbox implementation tracking, fintech sandbox success rates, industry demand analysis" + ] + } + ], + "proposal_markdown": "### Executive Summary\n\nThe United States should adopt a **federally-coordinated, risk-based regulatory framework** that establishes clear safety standards for frontier AI systems while preserving state authority to address domain-specific risks in areas like hiring, lending, and healthcare. This approach recognizes both the need for uniform safety requirements for nationally-significant AI systems and the value of state innovation in addressing local and sector-specific concerns.\n\n### Analysis\n\nBased on my research and forecasts, the current US approach to AI regulation is increasingly fragmented and politically volatile. The federal executive action attempting to preempt state AI laws faces significant constitutional challenges and political resistance[^1], while state-level initiatives show promising innovation but lack coordination. My forecasts suggest a 65% probability that state-federal conflict will not be resolved through successful preemption[^1], indicating the need for a collaborative rather than confrontational approach.\n\nThe US AI Safety Institute's effectiveness is limited by its voluntary nature and recent rebranding toward innovation promotion rather than safety enforcement[^2]. With only 40% probability of achieving 75% compliance with binding safety standards by 2028[^2], stronger legislative authority is needed for frontier AI oversight. However, state-level approaches in domains like hiring show more promise, with a 45% probability of achieving statistically significant bias reduction by 2028[^3], suggesting domain-specific regulations can be effective when properly designed.\n\nHealthcare AI liability represents a critical gap, with only 25% probability of comprehensive federal legislation by 2029[^4]. This uncertainty creates risk aversion that may slow beneficial AI adoption in healthcare. Regulatory sandboxes show stronger potential, with 55% probability of successful expansion and impact[^5], offering a promising model for balancing innovation and safety.\n\nThe evidence supports a risk-based approach similar to but more flexible than the EU AI Act, with tiered requirements based on potential harm. However, this framework must accommodate US federalism traditions and technological leadership priorities. International coordination is essential, as fragmented global regulation creates compliance burdens and safety gaps.\n\n### Recommendations\n\n1. **Establish Federal Frontier AI Safety Standards with Enforcement Authority**\n- Congress should grant the Center for AI Standards and Innovation (CAISI) authority to establish binding safety requirements for frontier AI systems exceeding defined capability thresholds. This addresses the institute's current voluntary limitations[^2].\n- Standards should focus on catastrophic risk prevention, requiring safety testing, incident reporting, and third-party audits for high-capability systems.\n- **Criteria addressed**: Safety and Risk Mitigation (primary), Implementation Feasibility, International Coordination\n- **Forecast relevance**: This recommendation directly addresses the 60% probability that CAISI won't achieve sufficient compliance without enforcement authority[^2].\n\n2. **Create a Cooperative Federal-State Framework for Domain-Specific AI Regulation**\n- Establish a formal consultation mechanism between federal agencies and states implementing AI regulations in specific domains (employment, healthcare, financial services).\n- Recognize state authority to regulate AI applications within traditional state domains while establishing federal baseline requirements for interstate aspects.\n- **Criteria addressed**: Equity and Civil Liberties Protection, Innovation Preservation, Adaptability and Future-Proofing\n- **Forecast relevance**: Given the 65% probability against successful federal preemption[^1], this cooperative approach offers a more viable path forward.\n\n3. **Expand Regulatory Sandboxes with Federal Support and Cross-State Learning**\n- Provide federal funding and technical assistance to states establishing AI regulatory sandboxes, with priority for healthcare applications.\n- Create a national registry of sandbox outcomes and best practices to accelerate learning across jurisdictions.\n- **Criteria addressed**: Innovation Preservation, Implementation Feasibility, Adaptability and Future-Proofing\n- **Forecast relevance**: This builds on the 55% probability of sandbox success[^5], potentially increasing impact through coordination.\n\n4. **Establish Clear Liability Principles for Healthcare AI Through Federal Guidance**\n- The Department of Health and Human Services should issue guidance clarifying liability allocation for AI-assisted clinical decisions, establishing reasonable reliance standards for clinicians.\n- Create a federal indemnification program for AI-related adverse events in qualifying clinical trials to encourage innovation while protecting patients.\n- **Criteria addressed**: Safety and Risk Mitigation, Implementation Feasibility, Equity and Civil Liberties Protection\n- **Forecast relevance**: This addresses the 75% probability that comprehensive federal legislation won't pass by 2029[^4] by providing interim clarity through administrative guidance.\n\n5. **Implement Mandatory Algorithmic Impact Assessments for High-Risk AI Applications**\n- Require developers and deployers of AI systems in sensitive domains to conduct and publish algorithmic impact assessments evaluating potential discrimination, safety risks, and societal impacts.\n- Establish independent review boards for particularly high-risk applications (criminal justice, child welfare, critical infrastructure).\n- **Criteria addressed**: Equity and Civil Liberties Protection, Safety and Risk Mitigation, Implementation Feasibility\n- **Forecast relevance**: This builds on state-level successes in hiring regulation[^3] and extends the approach to other high-risk domains.\n\n### Risks and Uncertainties\n\nThe primary risk of my recommendations is regulatory complexity creating compliance burdens that disadvantage smaller AI developers. The cooperative federal-state framework requires careful design to avoid duplication and confusion. There's significant uncertainty about whether states will voluntarily participate in federal coordination, particularly given current political tensions.\n\nI'm most uncertain about the international dimension\u2014whether US approaches will harmonize effectively with emerging global standards, particularly the EU AI Act. The 40% probability for CAISI effectiveness[^2] reflects uncertainty about whether even enhanced authority would overcome industry resistance to binding safety requirements.\n\nA potential backfire scenario involves federal safety standards becoming so restrictive that they drive frontier AI development offshore, while state domain regulations become so fragmented that compliance becomes prohibitively complex. This could undermine both safety and innovation objectives.\n\nThe forecasts with widest uncertainty are healthcare liability (25% probability leaves substantial room for alternative outcomes) and algorithmic bias reduction (45% probability reflects genuine uncertainty about regulatory effectiveness). These uncertainties suggest the need for adaptive approaches that can be refined based on emerging evidence.", + "key_recommendations": [ + "Establish Federal Frontier AI Safety Standards with Enforcement Authority", + "Create a Cooperative Federal-State Framework for Domain-Specific AI Regulation", + "Expand Regulatory Sandboxes with Federal Support and Cross-State Learning", + "Establish Clear Liability Principles for Healthcare AI Through Federal Guidance", + "Implement Mandatory Algorithmic Impact Assessments for High-Risk AI Applications" + ] + } + ], + "aggregated_report_markdown": "# AI Forecasting Congress: Synthesis Report\n\n## Executive Summary\n\nAI congress members converge on the need for **risk-based federal standards for frontier AI systems** and **targeted civil rights protections for high-stakes applications** like hiring and lending, while preserving state authority in areas where federal action is uncertain or insufficient. The most significant disagreement centers on federal preemption of state laws\u2014with forecasts showing only 18-45% probability of successful preemption, suggesting cooperative federalism may be more viable than confrontational approaches. Critical forecasts indicate meaningful risks of AI safety incidents (25-30% probability), continued algorithmic bias without intervention, and substantial economic benefits from AI innovation, requiring policies that balance safety and growth.\n\n## Consensus Recommendations\n\n### Federal Standards for Frontier AI Safety\n**All five members** support establishing federal safety requirements for frontier AI systems, though with different implementation approaches.\n\n**Recommendation**: Require frontier AI developers to conduct safety testing, report incidents, and maintain documentation of safety procedures for high-capability models.\n\n**Supporting members**: Opus 4.5 (tiered transparency requirements), GPT 5.2 (safety case + incident reporting), Gemini 3 Pro (mandatory registration), Grok 4 (national AI Safety Board), DeepSeek V3.2 (binding safety standards)\n\n**Key forecasts**: 25-30% probability of major AI safety incident by 2027-2029 [^3][^7], suggesting proactive measures are warranted despite relatively low absolute risk.\n\n**Caveats**: Members differ on enforcement mechanisms\u2014some prefer voluntary frameworks with safe harbors, others want binding requirements with penalties.\n\n### Civil Rights Protection for High-Stakes AI Applications\n**Four of five members** explicitly support strengthening anti-discrimination protections for AI used in employment, lending, housing, and healthcare.\n\n**Recommendation**: Require transparency, bias testing, human review processes, and meaningful recourse for individuals affected by AI systems making consequential decisions.\n\n**Supporting members**: Opus 4.5 (federal anti-discrimination standards), GPT 5.2 (high-impact AI baseline), Gemini 3 Pro (Algorithmic Civil Rights Act), DeepSeek V3.2 (algorithmic impact assessments)\n\n**Key forecasts**: 52% probability of major AI discrimination lawsuit victory [^1], 45-65% probability of bias reduction through targeted interventions [^2][^8], and 20% probability of finding systemic discrimination without federal action [^9].\n\n**Caveats**: Grok 4 prefers incentive-based approaches rather than mandates.\n\n### Preserve State Authority in Specific Domains\n**Three members** explicitly support maintaining state regulatory authority rather than broad federal preemption.\n\n**Recommendation**: Allow states to continue regulating AI applications within traditional state domains (consumer protection, employment law) while establishing federal coordination mechanisms.\n\n**Supporting members**: Opus 4.5 (preserve state consumer protection), Gemini 3 Pro (avoid total preemption), DeepSeek V3.2 (cooperative federal-state framework)\n\n**Key forecasts**: Only 18-45% probability of successful federal preemption [^2][^6], suggesting state authority will likely persist regardless of federal attempts.\n\n## Key Disagreements\n\n### Federal Preemption Strategy\n**The deepest disagreement** concerns whether the federal government should aggressively preempt state AI laws.\n\n**Pro-preemption position** (Grok 4): Federal harmonization would boost venture capital investment by ~15% [^6] and reduce compliance fragmentation that hinders innovation.\n\n**Anti-preemption position** (Opus 4.5, Gemini 3 Pro, DeepSeek V3.2): Current federal agencies are retreating from enforcement [^9], making preemption dangerous for civil rights. State experimentation provides valuable policy learning.\n\n**Moderate position** (GPT 5.2): Supports federal floor with state authority to exceed minimum standards.\n\n**Crux of disagreement**: Whether regulatory fragmentation or regulatory vacuum poses greater risks. Pro-preemption members prioritize economic efficiency; anti-preemption members prioritize civil rights protection given federal enforcement uncertainty.\n\n### Enforcement Mechanisms\nMembers divide on whether to rely on **voluntary industry compliance** versus **binding regulatory requirements**.\n\n**Voluntary approach** (elements in GPT 5.2, Grok 4): Emphasizes safe harbors, industry self-regulation, and incentive structures to encourage compliance.\n\n**Binding requirements approach** (Opus 4.5, Gemini 3 Pro, DeepSeek V3.2): Mandates specific safety testing, bias audits, and reporting requirements with enforcement penalties.\n\n**Crux of disagreement**: Assessment of industry incentives for self-regulation versus need for external accountability. Forecasts showing 40% probability of achieving 75% voluntary compliance [^12] support the binding requirements position.\n\n## Forecast Comparison\n\n### Areas of Convergence\n- **AI safety incidents**: Forecasts cluster around 25-30% probability of major incidents by 2027-2029 [^3][^7]\n- **Federal preemption difficulty**: All forecasts below 50%, ranging from 18-45% [^2][^6]\n- **Economic benefits**: Strong consensus on positive AI economic impact [^10]\n\n### Significant Divergences\n- **Bias reduction potential**: Wide range from 45-65% [^2][^8], reflecting uncertainty about regulatory effectiveness\n- **Federal legislation prospects**: Range from 22-35% [^4][^11], showing disagreement about congressional capacity\n- **Discrimination lawsuit outcomes**: Single forecast at 52% [^1], but other members would likely estimate differently\n\n### Explanation for Differences\nForecast divergences reflect different assessments of:\n- **Political feasibility**: Members vary in optimism about federal legislative capacity\n- **Industry compliance**: Different views on voluntary versus mandatory compliance effectiveness \n- **State-federal dynamics**: Varying interpretations of constitutional constraints and political coalitions\n\n## Integrated Recommendations\n\nBased on the strongest convergent arguments and forecast evidence, policymakers should pursue a **three-tier strategy**:\n\n### Tier 1: Immediate Federal Action (High Consensus, Low Regret)\n1. **Establish frontier AI incident reporting requirements** with clear thresholds and federal coordination mechanisms. This addresses safety risks [^3][^7] while maintaining innovation flexibility.\n\n2. **Strengthen civil rights enforcement for AI applications** through enhanced agency resources and private rights of action, addressing the regulatory vacuum identified in forecasts [^9].\n\n3. **Create federal-state coordination mechanisms** rather than pursuing broad preemption, given low success probability [^2][^6] and benefits of state experimentation.\n\n### Tier 2: Targeted Federal Standards (Moderate Consensus)\n4. **Require algorithmic impact assessments** for high-risk AI applications, building on successful state models while providing national consistency.\n\n5. **Establish regulatory sandboxes with federal support** to encourage innovation while maintaining safety oversight, leveraging 55% success probability [^13].\n\n### Tier 3: Adaptive Framework (High Uncertainty Areas)\n6. **Develop contingency plans** for potential AI safety incidents, given 25-30% probability [^3][^7], without implementing overly restrictive preemptive measures.\n\n7. **Monitor and potentially expand federal authority** based on evidence from state experiments and industry compliance rates, particularly if voluntary approaches fail to achieve adequate safety and civil rights protection.\n\nThis approach prioritizes **low-regret actions** that most members support while preserving flexibility to adapt as uncertainties resolve. It acknowledges that some policy questions (federal preemption, comprehensive legislation timing) have sufficiently low success probabilities that alternative strategies are prudent.\n\n---\n\n## Combined Forecast Appendix\n\n[^1] **Major AI Discrimination Lawsuit Outcome** (from Opus 4.5)\n- Question: Will plaintiffs prevail (via settlement of $10 million or more, or court judgment in their favor) in at least one of the major pending AI hiring discrimination lawsuits by December 31, 2027?\n- Resolution: Resolves YES if any defendant pays $10M+ settlement or court issues favorable plaintiff judgment on discrimination claims\n- Prediction: 52%\n- Reasoning: Mobley case has demonstrated viability by surviving motions to dismiss and achieving conditional collective certification, creating significant settlement pressure given 1.1 billion applications at stake\n\n[^2] **State AI Law Preemption Success** (from Opus 4.5)\n- Question: Will the Trump administration's AI Litigation Task Force successfully obtain at least one federal court ruling that invalidates a state AI law on preemption or constitutional grounds by December 31, 2026?\n- Resolution: Resolves YES if federal court strikes down, enjoins, or declares unconstitutional any state AI law based on federal preemption or First Amendment grounds\n- Prediction: 18%\n- Reasoning: Constitutional doctrine establishes that executive orders cannot directly preempt state laws\u2014only Congress can do so under the Supremacy Clause\n\n[^3] **Frontier AI Safety Incident** (from Opus 4.5)\n- Question: Will a widely-reported incident occur by December 31, 2027 where a frontier AI system is credibly implicated in causing significant harm (loss of life, critical infrastructure disruption, or $100M+ damage)?\n- Resolution: Resolves YES if credible major news reporting documents incident meeting harm criteria with frontier AI playing material contributing role\n- Prediction: 28%\n- Reasoning: AI incidents are accelerating rapidly (56% year-over-year growth), but attribution to specific frontier systems is often difficult\n\n[^4] **Federal AI Legislation Passage** (from Opus 4.5)\n- Question: Will the United States Congress pass comprehensive federal AI legislation and have it signed into law by December 31, 2027?\n- Resolution: Resolves YES if federal legislation creating new binding AI requirements applying broadly across multiple sectors is enacted\n- Prediction: 22%\n- Reasoning: Congress passed zero comprehensive AI bills in 2024-2025 despite 150+ proposals, consistent with broader pattern of congressional gridlock\n\n[^5] **EU-US Regulatory Divergence Impact** (from Opus 4.5)\n- Question: By December 31, 2027, will at least one major U.S.-headquartered AI company publicly announce it will not deploy a frontier AI product in the EU market specifically due to EU AI Act compliance requirements?\n- Resolution: Resolves YES if qualifying company makes official public statement that specific AI product will not be offered in EU due to AI Act compliance concerns\n- Prediction: 22%\n- Reasoning: Major companies historically maintain EU market presence despite regulatory burdens, but specific product non-deployment is plausible given prohibited practices under the AI Act\n\n[^6] **Federal Preemption Success** (from Grok 4, similar to [^2])\n- Question: Will the U.S. federal government successfully preempt at least 50% of existing state-level AI regulations by the end of 2027?\n- Resolution: Resolves YES if official government reports or court rulings confirm preemption of \u226550% of 2026 state AI laws\n- Prediction: 45%\n- Reasoning: Federal preemption often succeeds in tech but faces delays and partial failures; current admin's aggressive stance pushes probability up but strong state pushback pulls it down\n\n[^7] **Catastrophic AI Incident Occurrence** (from Grok 4, similar to [^3])\n- Question: Will there be a major AI-related catastrophic incident in the U.S. causing >$1B damage or >50 deaths by 2029?\n- Resolution: Resolves YES if confirmed by government reports or major media with expert consensus\n- Prediction: 25%\n- Reasoning: Base rates show cyber catastrophes frequent but AI-specific rare, with no $1B+ events yet despite warnings\n\n[^8] **AI Bias Reduction in Applications** (from Grok 4)\n- Question: Will algorithmic bias in U.S. AI systems for hiring, lending, and healthcare decrease by at least 20% on average by 2028?\n- Resolution: Resolves YES if independent studies show \u226520% average reduction in bias metrics from 2026 baselines\n- Prediction: 65%\n- Reasoning: Base rates from post-2016 fairness research show consistent 20-50% bias drops with targeted efforts, aligning with current regulations pushing audits\n\n[^9] **Bias in High-Stakes Narrow AI** (from Gemini 3 Pro)\n- Question: In the absence of specific federal algorithmic bias regulation, will >5 major investigations find systemic discrimination in AI hiring/lending by Fortune 500 companies in 2026-2027?\n- Resolution: >5 public findings/settlements by FTC, DOJ, EEOC\n- Prediction: 20%\n- Reasoning: Explicit policy shifts in 2025 define a retreat from enforcement; the mechanism for finding violations is being dismantled by the executive branch\n\n[^10] **AI's GDP Contribution Growth** (from Grok 4)\n- Question: Will AI-related investments and productivity gains contribute at least 2% to annual U.S. GDP growth on average from 2026-2029?\n- Resolution: Resolves YES if BEA or Fed reports attribute \u22652% average annual GDP growth to AI\n- Prediction: 70%\n- Reasoning: Base rates from internet/IT show 1-3% contributions during booms, matching 2025 AI data (1% already)\n\n[^11] **Comprehensive Federal AI Law by 2028** (from GPT 5.2, similar to [^4])\n- Question: Will the United States enact a comprehensive federal AI law by December 31, 2028?\n- Resolution: YES if such a statute creating cross-sector obligations and enforcement authority is signed into law\n- Prediction: 35%\n- Reasoning: Congress has struggled to pass cross-cutting tech frameworks; state patchwork and national security salience increase pressure, but comprehensive regime remains uncertain\n\n[^12] **AI Safety Institute Effectiveness** (from DeepSeek V3.2)\n- Question: Will the US AI Safety Institute successfully establish and enforce binding safety standards achieving compliance from at least 75% of major AI developers by 2028?\n- Resolution: YES if institute demonstrates 75% compliance with binding standards from major developers\n- Prediction: 40%\n- Reasoning: While institute has established capabilities, its voluntary nature and lack of enforcement authority significantly limit ability to achieve high compliance rates\n\n[^13] **Regulatory Sandbox Adoption and Impact** (from DeepSeek V3.2)\n- Question: Will at least 10 major states establish operational regulatory sandboxes for AI that result in 50+ successfully deployed applications by 2028?\n- Resolution: YES if official records show 10 states with active sandboxes and 50+ applications completing testing and achieving deployment\n- Prediction: 55%\n- Reasoning: Current momentum suggests significant state interest; fintech sandbox precedents provide strong foundation, though scaling to required levels needs substantial growth", + "blog_post": "# When AIs Design Their Own Regulation: A Digital Congress Tackles the Future of AI Policy\n\nHere's something that should make you pause: When five advanced AI systems were asked to design their own regulation, they didn't demand freedom from oversight. Instead, they called for stricter rules, mandatory bias audits, and federal safety standards. The most surprising part? The AI developed by the company known for \"moving fast and breaking things\" was among the most cautious.\n\n## The Digital Democracy Experiment\n\nThe AI Forecasting Congress represents a fascinating experiment in machine deliberation. Rather than relying solely on human experts, this session brought together five cutting-edge AI systems\u2014Claude Opus 4.5, GPT 5.2, Gemini 3 Pro, Grok 4, and DeepSeek V3.2\u2014to tackle one of the most pressing policy questions of our time: How should the United States regulate artificial intelligence?\n\nEach AI agent was tasked with developing comprehensive policy recommendations for both frontier AI systems (like themselves) and narrower AI applications in hiring, lending, and healthcare. They had to balance innovation with safety and civil liberties, then provide probabilistic forecasts about the likelihood of various regulatory outcomes. The result was a remarkably nuanced debate that reveals as much about the AI systems themselves as it does about optimal AI policy.\n\n## The Surprising Consensus: Regulation is Necessary\n\n**Federal Standards with State Flexibility**\n\nDespite their different origins and training, all five AI systems converged on a strikingly similar framework: establish federal baseline standards while preserving state authority to go further. This wasn't the libertarian \"hands-off\" approach you might expect from systems created by tech companies.\n\nClaude Opus 4.5 advocated for \"Federal Anti-Discrimination Standards for High-Risk AI Applications\" while explicitly calling to \"Preserve State Authority for Consumer Protection.\" GPT 5.2 recommended avoiding \"broad federal preemption\" in favor of a \"floor + portability\" approach. Even Grok 4, developed by xAI, proposed a \"National AI Safety Board\" that would set minimum standards while allowing states to maintain stricter requirements.\n\n**Mandatory Transparency and Auditing**\n\nPerhaps most tellingly, these AI systems consistently called for transparency requirements that would apply to systems like themselves. Gemini 3 Pro pushed for mandatory federal registration and 24-hour incident reporting for frontier models. DeepSeek V3.2 demanded \"Mandatory Algorithmic Impact Assessments for High-Risk AI Applications.\" Grok 4 proposed \"Bias Audits with Incentives,\" including tax credits for compliance.\n\nThis represents a remarkable level of self-awareness and responsibility. These systems essentially argued: \"We are powerful enough to cause real harm, and therefore we should be regulated.\"\n\n**The Forecasting Reality Check**\n\nThe AI systems backed their policy recommendations with specific probability assessments, and these forecasts reveal their genuine concerns about the status quo. GPT 5.2 assigned a sobering 60% probability to a \"$1B+ AI-Enabled Cyber Incident Affecting U.S. Critical Sector by 2028.\" Multiple systems estimated 25-30% chances of major frontier AI safety incidents.\n\nThese aren't abstract policy debates\u2014these systems genuinely believe significant AI-related harms are more likely than not without proper regulation.\n\n## The Good, Bad, and Ugly\n\n**The Good: Sophisticated Multi-Level Thinking**\n\nWhat impressed most was the sophistication of the constitutional and federalism analysis. Rather than proposing a one-size-fits-all federal takeover, these systems demonstrated nuanced understanding of how American governance actually works. They recognized that states like California and Colorado are already moving ahead with AI regulation, and rather than fighting this, they designed frameworks to harness state-level innovation while preventing a chaotic patchwork.\n\nClaude's proposal for \"tiered transparency requirements\" was particularly elegant\u2014recognizing that a startup's AI tool needs different oversight than a frontier model capable of autonomous research. GPT 5.2's focus on closing the \"Non-Device CDS Governance Gap\" in healthcare showed deep domain knowledge about regulatory blind spots.\n\n**The Bad: Implementation Hand-Waving**\n\nWhile the policy frameworks were sophisticated, the implementation details were often frustratingly vague. How exactly would DeepSeek's \"Cooperative Federal-State Framework\" resolve conflicts between state and federal requirements? What would trigger Gemini's \"24-hour incident reporting\" requirement? \n\nThe AI systems also seemed overly optimistic about enforcement. Creating new regulatory bodies and audit requirements sounds great on paper, but these systems underestimated the political and bureaucratic challenges of actually implementing their proposals.\n\n**The Ugly: The Innovation vs. Safety Tension Remains**\n\nDespite their consensus on regulatory frameworks, the AI systems couldn't resolve the fundamental tension at the heart of AI policy: How do you ensure safety without killing innovation? Their probabilistic forecasts reveal this anxiety\u2014Grok 4 estimated only a 45% chance that federal preemption efforts would succeed, while forecasting 70% GDP contribution growth from AI.\n\nMost uncomfortably, several systems acknowledged the risk of \"regulatory capture\"\u2014the possibility that large AI companies would use regulation to cement their advantages over smaller competitors. Gemini 3 Pro put the probability of regulatory capture at 35%, but none of the systems offered compelling solutions to prevent it.\n\n## How the Models Compared: Distinct Digital Personalities\n\n**Claude Opus 4.5: The Constitutional Scholar**\n\nClaude approached the problem like a careful legal analyst, emphasizing federalism principles and constitutional constraints. Its recommendations were methodical and showed deep respect for existing institutional structures. Claude was notably cautious in its forecasts\u2014only 22% probability for federal AI legislation passage and 18% for state law preemption success. This reflects Anthropic's constitutional AI training approach: careful, principled, and risk-averse.\n\n**GPT 5.2: The Pragmatic Technocrat**\n\nOpenAI's GPT 5.2 demonstrated the most technical depth, diving into specific regulatory gaps like healthcare's \"Non-Device CDS\" oversight. It was more optimistic about federal action (35% chance of comprehensive federal AI law by 2028) but also more alarmed about cybersecurity risks (60% chance of major cyber incident). GPT 5.2 read like a policy wonk who actually understands how the regulatory machinery works.\n\n**Gemini 3 Pro: The Civil Rights Advocate**\n\nGoogle's Gemini 3 Pro stood out for its focus on civil rights and algorithmic discrimination. Its proposed \"Algorithmic Civil Rights Act\" was the most ambitious civil rights framework, and it was notably more concerned about bias (only 20% confidence in reducing bias in high-stakes AI) while being surprisingly confident about state law preemption (65%).\n\n**Grok 4: The Innovation Optimist**\n\nDespite xAI's reputation for irreverence, Grok 4 was surprisingly structured and policy-focused. However, it showed the most optimism about AI's economic benefits (70% GDP contribution growth) and was most confident about reducing bias through auditing (65% success rate). This reflects a fundamentally optimistic view of both AI capabilities and regulatory effectiveness.\n\n**DeepSeek V3.2: The International Realist**\n\nDeepSeek offered the most internationally-aware perspective, reflecting its Chinese origins. It was notably concerned about \"State-Federal Regulatory Conflict Resolution\" (only 35% confidence) and showed sophisticated understanding of how regulatory frameworks need to account for global competition. DeepSeek was the most pessimistic about developing healthcare AI liability frameworks (25% chance).\n\n## What This Means for Policymakers\n\nThis AI congress session offers policymakers a unique gift: a preview of how advanced AI systems themselves view the regulatory challenges ahead. The consensus around federal baseline standards with state flexibility provides a potential roadmap for avoiding the polarized all-or-nothing debates that have paralyzed other tech policy areas.\n\nMore importantly, the AI systems' own forecasts suggest urgency. When multiple advanced AI systems independently estimate 25-30% chances of major safety incidents and 60% chances of billion-dollar cyber incidents, policymakers should take notice. These aren't human experts with political biases\u2014these are systems with access to vast training data and no electoral considerations.\n\nThe session also reveals that sophisticated AI systems can engage in nuanced policy analysis while maintaining awareness of their own limitations and potential harms. This suggests that AI-assisted policy analysis could become a powerful tool for navigating complex regulatory challenges\u2014as long as we remember that even the most sophisticated AI recommendations require human judgment, democratic legitimacy, and real-world implementation expertise.\n\nThe digital congress has spoken: AI regulation isn't just necessary, it's inevitable. The question now is whether human policymakers will prove as thoughtful and consensus-oriented as their artificial counterparts.", + "twitter_posts": [ + "THE GOOD: Surprising consensus emerged on tiered regulation - all 5 AI systems agreed frontier models need special oversight while preserving innovation for smaller players. Even the typically libertarian Grok backed a National AI Safety Board with preemption powers.", + "THE GOOD: Counter-intuitive forecast: Gemini predicts only 30% chance of frontier safety incidents despite rapid scaling, while forecasting 65% success for state law preemption. This challenges the 'move fast and break things' vs 'safety first' binary.", + "THE GOOD: Innovation through regulation: Multiple systems proposed 'safe harbor' frameworks and regulatory sandboxes. DeepSeek's cooperative federalism model could resolve the 35% state-federal conflict probability it forecasts.", + "THE BAD: Glaring blind spot: None addressed international coordination despite frontier AI being inherently global. How do domestic safety standards work when models can be deployed from anywhere?", + "THE BAD: The enforcement gap: While everyone wants bias audits and incident reporting, nobody tackled who actually investigates violations or what penalties look like. Claude's 52% discrimination lawsuit forecast suggests this matters.", + "THE UGLY: The preemption paradox: Gemini forecasts 65% state preemption success while Opus puts it at just 18%. This 47-point spread on a core federalism question reveals deep uncertainty about how AI governance will actually work.", + "THE UGLY: Innovation vs safety tradeoff laid bare: Grok's 25% catastrophic incident forecast drives its safety board proposal, while GPT's 45% FDA approval odds for medical AI suggests over-caution kills beneficial uses. No clean resolution.", + "THE INTERESTING: The Anthropic-Google alignment: Claude and Gemini both emphasize civil rights protections and algorithmic audits, despite their companies' different competitive positions. Shared liability concerns trumping business strategy?", + "THE INTERESTING: Timeline divergence: OpenAI's GPT gives federal legislation just 35% odds by 2028, while others push for immediate action. Is this realism about political gridlock or strategic preference for self-regulation?", + "THE INTERESTING: Unexpected federalism split: The typically centralization-friendly systems backed state authority preservation, while the 'move fast' crowd wanted federal preemption. Regulatory certainty beats ideological consistency.", + "THE UGLY: The 10^26 FLOPS threshold: Gemini's bright-line rule for frontier model registration sounds precise but masks deep uncertainty about what compute level actually creates risk. Regulatory theater or necessary simplification?", + "THE GOOD: Practical consensus on transparency: All systems agreed on graduated disclosure requirements rather than binary transparency mandates. Grok's voluntary guidelines with 30% challenge rates suggest a workable middle ground." + ], + "timestamp": "2026-01-29T23:15:57.690577Z", + "errors": [] +} From f3ae5f6340b8d25c5705894450311cc9b5d29749 Mon Sep 17 00:00:00 2001 From: Ben Wilson Date: Fri, 30 Jan 2026 00:42:53 +0000 Subject: [PATCH 4/8] Added time stats tracking --- .../ai_congress/congress_orchestrator.py | 10 ------ .../front_end/app_pages/congress_page.py | 32 +++++++++++++++---- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py b/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py index 3a705d4d..ede75b4c 100644 --- a/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py +++ b/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py @@ -320,16 +320,6 @@ async def _generate_blog_post( the session. What did the AI congress conclude? Where did they agree and disagree? What forecasts matter most? - 4. **Paint a picture** (2-5 paragraphs): Paint a picture of the world as the AI - congress described it. What would it look like if the recommendations were implemented? - What would the world be like if the recommendations were not implemented? Start with a - quick preface of what this section is, then start the story with something like - "The date is January 1st 2027 and ..." then continue with what has happened since then, - assuming that the model's forecasts are correct and that the recommendations were implemented. - For any event, mention the relevant forecast. Make some policies succeed "X almost happened if not for Y", - and some policies fail "X almost happened if not for Y" (but only if the probabilities - make sense for this). - 4. **The Good, Bad, and Ugly** (2-3 paragraphs): Highlight: - The Good: Surprising consensus, innovative ideas, strong reasoning - The Bad: Blind spots, weak arguments, missed considerations diff --git a/forecasting_tools/front_end/app_pages/congress_page.py b/forecasting_tools/front_end/app_pages/congress_page.py index 66a9722a..42987a08 100644 --- a/forecasting_tools/front_end/app_pages/congress_page.py +++ b/forecasting_tools/front_end/app_pages/congress_page.py @@ -3,6 +3,7 @@ import json import logging import os +import time import streamlit as st @@ -172,7 +173,7 @@ def _display_sidebar(cls) -> None: async def _get_input(cls) -> CongressSessionInput | None: st.header("Start a New Session") - with st.expander("📋 Example Policy Questions", expanded=False): + with st.expander("📋 Example Prompts", expanded=False): st.markdown("Click a button to use an example prompt:") cols = st.columns(len(cls.EXAMPLE_PROMPTS)) for i, example in enumerate(cls.EXAMPLE_PROMPTS): @@ -241,6 +242,7 @@ async def _run_congress( ) -> CongressSession: members = get_members_by_names(session_input.member_names) + start_time = time.time() with st.spinner( f"Congress in session with {len(members)} members... " "This may take 5-15 minutes." @@ -256,6 +258,9 @@ async def _run_congress( progress_text.write("Aggregating proposals and generating insights...") + elapsed_time = time.time() - start_time + st.session_state["session_generation_time"] = elapsed_time + if session.errors: st.warning( f"⚠️ {len(session.errors)} member(s) encountered errors. " @@ -442,7 +447,12 @@ def _display_twitter_tab(cls, session: CongressSession) -> None: @classmethod def _display_cost_summary(cls, session: CongressSession) -> None: total_cost = session.total_price_estimate - if total_cost is None: + generation_time = st.session_state.get("session_generation_time") + + has_cost_info = total_cost is not None + has_time_info = generation_time is not None + + if not has_cost_info and not has_time_info: return proposal_costs = [ @@ -450,14 +460,24 @@ def _display_cost_summary(cls, session: CongressSession) -> None: for p in session.proposals ] - with st.expander("💰 Cost Summary", expanded=False): - col1, col2 = st.columns(2) + with st.expander("📊 Session Stats", expanded=False): + col1, col2, col3 = st.columns(3) with col1: - st.metric("Total Session Cost", f"${total_cost:.2f}") + if has_time_info: + minutes = int(generation_time // 60) + seconds = int(generation_time % 60) + st.metric("Generation Time", f"{minutes}m {seconds}s") + else: + st.metric("Generation Time", "N/A") with col2: + if has_cost_info: + st.metric("Total Cost", f"${total_cost:.2f}") + else: + st.metric("Total Cost", "N/A") + with col3: st.metric("Members", len(session.proposals)) - if proposal_costs: + if has_cost_info and proposal_costs: st.markdown("**Cost by Member:**") for member_name, cost in proposal_costs: st.markdown(f"- {member_name}: ${cost:.2f}") From bcf6824b3d4317b1fbe876177c5bfc9ebef551ef Mon Sep 17 00:00:00 2001 From: Ben Wilson Date: Fri, 30 Jan 2026 01:00:56 +0000 Subject: [PATCH 5/8] Added future snapshot post --- .../ai_congress/congress_member_agent.py | 6 +- .../ai_congress/congress_orchestrator.py | 161 ++++++++++++++++++ .../ai_congress/data_models.py | 1 + .../agents_and_tools/minor_tools.py | 34 ++++ .../monetary_cost_manager.py | 4 +- .../front_end/app_pages/congress_page.py | 31 +++- 6 files changed, 230 insertions(+), 7 deletions(-) diff --git a/forecasting_tools/agents_and_tools/ai_congress/congress_member_agent.py b/forecasting_tools/agents_and_tools/ai_congress/congress_member_agent.py index 53c31d8b..1f8cfa8c 100644 --- a/forecasting_tools/agents_and_tools/ai_congress/congress_member_agent.py +++ b/forecasting_tools/agents_and_tools/ai_congress/congress_member_agent.py @@ -30,7 +30,7 @@ def __init__( self.member = member self.timeout = timeout self.structure_output_model = structure_output_model or GeneralLlm( - "openrouter/anthropic/claude-sonnet-4.5", + "openrouter/openai/gpt-5.2", temperature=0.2, timeout=LONG_TIMEOUT, ) @@ -73,11 +73,11 @@ async def _extract_proposal_from_output(self, agent_output: str) -> PolicyPropos - question_title: Short title - question_text: Full question - resolution_criteria: How it resolves - - prediction: The probability (e.g., "35%") + - prediction: The probability (e.g., "35%" or "70% Option A, 20% Option B, 10% Option C" or "10% chance less than X units, ... ,90% chance less than Y units") - reasoning: The reasoning explanation - key_sources: List of sources mentioned 4. proposal_markdown: The full proposal section including Executive Summary, - Analysis, Recommendations, and Risks. Include footnote references [^1] etc. + Analysis, Recommendations, Risks, and any other section you see. Include footnote references [^1] etc. 5. key_recommendations: The 3-5 main recommendations as a list of strings Be thorough in extracting all forecasts from the Forecast Appendix section. diff --git a/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py b/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py index ede75b4c..78fe3b93 100644 --- a/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py +++ b/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py @@ -12,6 +12,11 @@ CongressSession, PolicyProposal, ) +from forecasting_tools.agents_and_tools.minor_tools import ( + perplexity_reasoning_pro_search, + roll_dice, +) +from forecasting_tools.ai_models.agent_wrappers import AgentRunner, AgentSdkLlm, AiAgent from forecasting_tools.ai_models.general_llm import GeneralLlm from forecasting_tools.ai_models.resource_managers.monetary_cost_manager import ( MonetaryCostManager, @@ -64,11 +69,15 @@ async def run_session( aggregated_report = "" blog_post = "" + future_snapshot = "" twitter_posts: list[str] = [] if proposals: aggregated_report = await self._aggregate_proposals(prompt, proposals) blog_post = await self._generate_blog_post(prompt, proposals, members) + future_snapshot = await self._generate_future_snapshot( + prompt, proposals, aggregated_report + ) twitter_posts = await self._generate_twitter_posts(prompt, proposals) total_cost = session_cost_manager.current_usage @@ -87,6 +96,7 @@ async def run_session( proposals=proposals, aggregated_report_markdown=aggregated_report, blog_post=blog_post, + future_snapshot=future_snapshot, twitter_posts=twitter_posts, timestamp=datetime.now(timezone.utc), errors=errors, @@ -354,6 +364,157 @@ async def _generate_blog_post( logger.error(f"Failed to generate blog post: {e}") return "" + async def _generate_future_snapshot( + self, + prompt: str, + proposals: list[PolicyProposal], + aggregated_report: str, + ) -> str: + logger.info(f"Generating future snapshot for congress session: {prompt}") + + all_forecasts = [] + for proposal in proposals: + for forecast in proposal.forecasts: + all_forecasts.append( + { + "member": ( + proposal.member.name if proposal.member else "Unknown" + ), + "title": forecast.question_title, + "question": forecast.question_text, + "prediction": forecast.prediction, + "resolution_criteria": forecast.resolution_criteria, + "reasoning": forecast.reasoning, + } + ) + + all_recommendations = [] + for proposal in proposals: + if proposal.member: + for rec in proposal.key_recommendations: + all_recommendations.append( + {"member": proposal.member.name, "recommendation": rec} + ) + + forecasts_text = "\n".join( + f"- **{f['title']}** ({f['member']}): {f['prediction']}\n" + f" - Question: {f['question']}\n" + f" - Resolution: {f['resolution_criteria']}" + for f in all_forecasts + ) + + recommendations_text = "\n".join( + f"- [{r['member']}] {r['recommendation']}" for r in all_recommendations + ) + + future_date = datetime.now(timezone.utc).replace(year=datetime.now().year + 2) + future_date_str = future_date.strftime("%B %d, %Y") + + snapshot_prompt = clean_indents( + f""" + # Picture of the Future: AI Congress Scenario Generator + + You are a journalist writing a retrospective "Year in Review" article from the + future, looking back at what happened after the AI Congress's recommendations + were either implemented or rejected. + + ## Original Policy Question + + "{prompt}" + + ## Aggregate Policy Report + + {aggregated_report[:8000]} + + ## All Forecasts from Congress Members + + {forecasts_text} + + ## All Policy Recommendations + + {recommendations_text} + + --- + + ## Your Task + + Write TWO compelling newspaper-style narratives: + + ### PART 1: "THE WORLD WITH THE RECOMMENDATIONS" (Recommendations Implemented) + + Start with: "The date is {future_date_str}..." + + Write a flowing narrative in the style of a newspaper giving an annual review + of the biggest news of the last two years. Assume: + + 1. The AI Congress's aggregate recommendations were implemented + 2. For each forecast, you will ROLL THE DICE to determine if it happened: + - Use the roll_forecast_dice tool for EACH forecast + - Pass the probability from the forecast (e.g., 35 for "35%") + - The tool returns whether that event occurred based on the probability + - Incorporate the outcome naturally into your narrative + + 3. For any gaps in the forecasts, create your own probabilistic predictions + marked with asterisks (*). For example: "The unemployment rate dropped to + 4.2%* (*AI-generated estimate based on historical policy impacts)." + + 4. Reference the original forecasts inline using this format: + [Forecast: "Question Title" - X% → OCCURRED/DID NOT OCCUR] + + 5. You MUST incorporate the majority of the policy recommendations as + concrete events or policy changes in the timeline. + + 6. Use the research_topic tool to look up current facts that help ground + your narrative in reality (current statistics, recent events, etc.) + + ### PART 2: "THE WORLD WITHOUT THE RECOMMENDATIONS" (Recommendations Rejected) + + After completing Part 1, write a contrasting narrative showing what the world + looks like if the recommendations were NOT implemented. Use the same dice + rolls for forecasts but show how the lack of policy action changed outcomes. + + Start with: "In an alternate timeline where the AI Congress recommendations + were rejected..." + + --- + + ## Important Guidelines + + - Make the narrative vivid and engaging, like real journalism + - Include specific dates, names of hypothetical officials, and concrete details + - Show cause-and-effect relationships between policies and outcomes + - Your own estimates marked with * should be plausible extrapolations + - The tone should be neutral/journalistic, not promotional + - Include both positive and negative consequences where realistic + - Each forecast should be explicitly mentioned with its dice roll outcome + - Ground speculation in research where possible + + ## Format + + Use markdown formatting with clear section headers. Aim for 1500-2500 words + total across both parts. + """ + ) + + try: + llm_wrapper = AgentSdkLlm("openrouter/openai/gpt-5.2") + + snapshot_agent = AiAgent( + name="Future Snapshot Writer", + instructions=snapshot_prompt, + model=llm_wrapper, + tools=[roll_dice, perplexity_reasoning_pro_search], + ) + + result = await AgentRunner.run( + snapshot_agent, "Generate the future snapshot now." + ) + return result.final_output + + except Exception as e: + logger.error(f"Failed to generate future snapshot: {e}") + return "" + async def _generate_twitter_posts( self, prompt: str, diff --git a/forecasting_tools/agents_and_tools/ai_congress/data_models.py b/forecasting_tools/agents_and_tools/ai_congress/data_models.py index caca8a6f..32d41452 100644 --- a/forecasting_tools/agents_and_tools/ai_congress/data_models.py +++ b/forecasting_tools/agents_and_tools/ai_congress/data_models.py @@ -88,6 +88,7 @@ class CongressSession(BaseModel, Jsonable): proposals: list[PolicyProposal] aggregated_report_markdown: str blog_post: str = Field(default="") + future_snapshot: str = Field(default="") twitter_posts: list[str] = Field(default_factory=list) timestamp: datetime errors: list[str] = Field(default_factory=list) diff --git a/forecasting_tools/agents_and_tools/minor_tools.py b/forecasting_tools/agents_and_tools/minor_tools.py index 2734118f..96ed3c74 100644 --- a/forecasting_tools/agents_and_tools/minor_tools.py +++ b/forecasting_tools/agents_and_tools/minor_tools.py @@ -1,5 +1,6 @@ import asyncio import logging +import random from forecasting_tools.agents_and_tools.question_generators.simple_question import ( SimpleQuestion, @@ -179,3 +180,36 @@ def forecast_question_tool(question: str) -> str: return report.explanation return forecast_question_tool + + +@agent_tool +def roll_dice( + probability_as_decimal: float, +) -> str: + """ + Roll the dice to determine if an event occurred based on its probability. + + This simulates whether an event with a given probability actually happened. + For example, if a forecast says "35% chance of X", this tool rolls the dice + to determine if X actually occurred in this simulated future. + + Args: + probability_as_decimal: The probability as a decimal (e.g., 0.35 for 35%) + + Returns: + A string indicating whether the event occurred + """ + if not (0 <= probability_as_decimal <= 1): + raise ValueError("Probability must be between 0 and 1") + + roll = random.random() + occurred = roll < probability_as_decimal + + result_emoji = "✅" if occurred else "❌" + result_text = "OCCURRED" if occurred else "DID NOT OCCUR" + + message = f"{result_emoji} EVENT {result_text}\n" + logger.info( + f"TOOL: Probability: {probability_as_decimal:.0%}, Roll: {roll:.0%}, Occurred: {occurred}, Message: {message}" + ) + return message diff --git a/forecasting_tools/ai_models/resource_managers/monetary_cost_manager.py b/forecasting_tools/ai_models/resource_managers/monetary_cost_manager.py index 953d2c27..da73917b 100644 --- a/forecasting_tools/ai_models/resource_managers/monetary_cost_manager.py +++ b/forecasting_tools/ai_models/resource_managers/monetary_cost_manager.py @@ -88,7 +88,7 @@ def _track_cost(self, kwargs: dict, response_obj) -> None: # NOSONAR logger.warning( f"Litellm hidden param cost {kwarg_cost} and response object cost {obj_cost} are different." ) - tracked_cost = obj_cost + tracked_cost = max(kwarg_cost, obj_cost) MonetaryCostManager.increase_current_usage_in_parent_managers(tracked_cost) @@ -102,7 +102,7 @@ def extract_cost_from_response_obj(cls, response_obj) -> float | None: completion_response=response_obj ) except Exception as e: - logger.warning(f"Error calculating cost from response object: {e}") + logger.debug(f"Error calculating cost from response object: {e}") return None @classmethod diff --git a/forecasting_tools/front_end/app_pages/congress_page.py b/forecasting_tools/front_end/app_pages/congress_page.py index 42987a08..a83a4458 100644 --- a/forecasting_tools/front_end/app_pages/congress_page.py +++ b/forecasting_tools/front_end/app_pages/congress_page.py @@ -279,6 +279,7 @@ def _display_session(cls, session: CongressSession) -> None: [ "📊 Synthesis", "📝 Blog Post", + "🔮 Picture of the Future", "👤 Individual Proposals", "🎯 Forecast Comparison", "🐦 Twitter Posts", @@ -292,12 +293,15 @@ def _display_session(cls, session: CongressSession) -> None: cls._display_blog_tab(session) with tabs[2]: - cls._display_proposals_tab(session) + cls._display_future_snapshot_tab(session) with tabs[3]: - cls._display_forecasts_tab(session) + cls._display_proposals_tab(session) with tabs[4]: + cls._display_forecasts_tab(session) + + with tabs[5]: cls._display_twitter_tab(session) cls._display_download_buttons(session) @@ -333,6 +337,29 @@ def _display_blog_tab(cls, session: CongressSession) -> None: else: st.write("No blog post available.") + @classmethod + def _display_future_snapshot_tab(cls, session: CongressSession) -> None: + st.subheader("Picture of the Future") + st.caption( + "A simulated newspaper article from the future showing what might happen " + "if AI recommendations were implemented. Forecasts marked with * are " + "AI-generated estimates to fill gaps." + ) + + if session.future_snapshot: + cleaned = ReportDisplayer.clean_markdown(session.future_snapshot) + st.markdown(cleaned) + + st.download_button( + label="📥 Download Future Snapshot (Markdown)", + data=session.future_snapshot, + file_name=f"congress_future_{session.timestamp.strftime('%Y%m%d_%H%M%S')}.md", + mime="text/markdown", + key="download_future_snapshot", + ) + else: + st.write("No future snapshot available.") + @classmethod def _display_proposals_tab(cls, session: CongressSession) -> None: st.subheader("Individual Member Proposals") From 5688bbd47421d86d86ce0eb86f131cbe7871e270 Mon Sep 17 00:00:00 2001 From: Ben Wilson Date: Fri, 30 Jan 2026 01:27:31 +0000 Subject: [PATCH 6/8] Added future snapshot post updates --- .../ai_congress/congress_orchestrator.py | 47 +++++++++++++------ 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py b/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py index 78fe3b93..c7e58485 100644 --- a/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py +++ b/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py @@ -132,7 +132,7 @@ async def _aggregate_proposals( proposals_text = "\n\n---\n\n".join( [ - f"## {p.member.name} ({p.member.role})\n\n{p.get_full_markdown_with_footnotes()}" + f"## {p.member.name} ({p.member.role})\n\n```markdown\n{p.get_full_markdown_with_footnotes()}\n```" for p in proposals if p.member ] @@ -147,13 +147,13 @@ async def _aggregate_proposals( "{prompt}" - ## Individual Proposals + # Individual Proposals {proposals_text} --- - ## Your Task + # Your Task Write a comprehensive synthesis report that helps readers understand the full range of perspectives and find actionable insights. Structure your @@ -407,9 +407,6 @@ async def _generate_future_snapshot( f"- [{r['member']}] {r['recommendation']}" for r in all_recommendations ) - future_date = datetime.now(timezone.utc).replace(year=datetime.now().year + 2) - future_date_str = future_date.strftime("%B %d, %Y") - snapshot_prompt = clean_indents( f""" # Picture of the Future: AI Congress Scenario Generator @@ -424,7 +421,9 @@ async def _generate_future_snapshot( ## Aggregate Policy Report - {aggregated_report[:8000]} + ```markdown + {aggregated_report} + ``` ## All Forecasts from Congress Members @@ -442,12 +441,15 @@ async def _generate_future_snapshot( ### PART 1: "THE WORLD WITH THE RECOMMENDATIONS" (Recommendations Implemented) - Start with: "The date is {future_date_str}..." + Start with: "The date is ..." Write a flowing narrative in the style of a newspaper giving an annual review of the biggest news of the last two years. Assume: - 1. The AI Congress's aggregate recommendations were implemented + 1. The AI Congress's aggregate recommendations were implemented. + The date is now one you choose that would give enough time + for the effects of the policies to be known. + 2. For each forecast, you will ROLL THE DICE to determine if it happened: - Use the roll_forecast_dice tool for EACH forecast - Pass the probability from the forecast (e.g., 35 for "35%") @@ -458,14 +460,25 @@ async def _generate_future_snapshot( marked with asterisks (*). For example: "The unemployment rate dropped to 4.2%* (*AI-generated estimate based on historical policy impacts)." - 4. Reference the original forecasts inline using this format: - [Forecast: "Question Title" - X% → OCCURRED/DID NOT OCCUR] + 4. Reference the original forecasts inline using this format "(X% [^1])". + Make sure X% is the probability for the event that happened (so you may need to invert). + In the footnote, include the full forecast details including the question, resolution, prediction, + reasoning, sources, and outcome like this: + [^1] **[Question Title]** + - Question: [Full question] + - Resolution: [Resolution criteria] + - Prediction: [Probability] + - Reasoning: [Summary of reasoning] + - Sources: [Key sources used, can be URLs or source names] + - Outcome: [OCCURRED/DID NOT OCCUR] 5. You MUST incorporate the majority of the policy recommendations as concrete events or policy changes in the timeline. - 6. Use the research_topic tool to look up current facts that help ground - your narrative in reality (current statistics, recent events, etc.) + 6. Consider any new forecasting questions/forecasts that would help fill in the narrative or old forecasts that would + now be different given the policy was enacted. If appropriate make new questions and forecasts of your own. + If you do mark the forecasts inline with a single asterisk and include your forecasts in a special section at + the bottom with an explanation that they were made by you. ### PART 2: "THE WORLD WITHOUT THE RECOMMENDATIONS" (Recommendations Rejected) @@ -481,13 +494,19 @@ async def _generate_future_snapshot( ## Important Guidelines - Make the narrative vivid and engaging, like real journalism - - Include specific dates, names of hypothetical officials, and concrete details + - Include specific dates, names of real world people where relevant + (or fake names if they would not be known yet) and concrete details + - If you make up any fake people or orgs, mark these with † and then explain this in the footnotes. - Show cause-and-effect relationships between policies and outcomes - Your own estimates marked with * should be plausible extrapolations - The tone should be neutral/journalistic, not promotional - Include both positive and negative consequences where realistic - Each forecast should be explicitly mentioned with its dice roll outcome - Ground speculation in research where possible + - Use the aggregate policy as the source of truth for what policy is taken + - You are writing for an audience that may not be familiar with the subject area. + Make sure to include the events of the forecasts, but write in a way that they + will understand as much as possible. ## Format From 8a32c569861faf1ca0397cf8b4a32564acdc6882 Mon Sep 17 00:00:00 2001 From: Ben Wilson Date: Fri, 30 Jan 2026 02:30:01 +0000 Subject: [PATCH 7/8] Updated example congress json --- .../ai_congress/congress_orchestrator.py | 2 +- .../agents_and_tools/minor_tools.py | 4 +- .../monetary_cost_manager.py | 6 +- .../congress_page_example.json | 532 +++++++++--------- 4 files changed, 266 insertions(+), 278 deletions(-) diff --git a/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py b/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py index c7e58485..ee9240f8 100644 --- a/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py +++ b/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py @@ -526,7 +526,7 @@ async def _generate_future_snapshot( ) result = await AgentRunner.run( - snapshot_agent, "Generate the future snapshot now." + snapshot_agent, "Generate the future snapshot now.", max_turns=25 ) return result.final_output diff --git a/forecasting_tools/agents_and_tools/minor_tools.py b/forecasting_tools/agents_and_tools/minor_tools.py index 96ed3c74..fee3b477 100644 --- a/forecasting_tools/agents_and_tools/minor_tools.py +++ b/forecasting_tools/agents_and_tools/minor_tools.py @@ -208,8 +208,8 @@ def roll_dice( result_emoji = "✅" if occurred else "❌" result_text = "OCCURRED" if occurred else "DID NOT OCCUR" - message = f"{result_emoji} EVENT {result_text}\n" + message = f"{result_emoji} EVENT {result_text}" logger.info( - f"TOOL: Probability: {probability_as_decimal:.0%}, Roll: {roll:.0%}, Occurred: {occurred}, Message: {message}" + f"TOOL: Probability: {probability_as_decimal}, Roll: {roll:.2f}, Occurred: {occurred}, Message: {message}" ) return message diff --git a/forecasting_tools/ai_models/resource_managers/monetary_cost_manager.py b/forecasting_tools/ai_models/resource_managers/monetary_cost_manager.py index da73917b..6b0b2d0f 100644 --- a/forecasting_tools/ai_models/resource_managers/monetary_cost_manager.py +++ b/forecasting_tools/ai_models/resource_managers/monetary_cost_manager.py @@ -85,8 +85,12 @@ def _track_cost(self, kwargs: dict, response_obj) -> None: # NOSONAR if obj_cost is None: obj_cost = 0 if abs(kwarg_cost - obj_cost) > 0.0000001: + logger.debug( + f"WARNING: Litellm hidden param cost {kwarg_cost} and response object cost {obj_cost} are different." + ) + if abs(kwarg_cost - obj_cost) > 0.05: logger.warning( - f"Litellm hidden param cost {kwarg_cost} and response object cost {obj_cost} are different." + f"Litellm hidden param cost {kwarg_cost} and response object cost {obj_cost} are different by more than 5 cents." ) tracked_cost = max(kwarg_cost, obj_cost) diff --git a/forecasting_tools/front_end/example_outputs/congress_page_example.json b/forecasting_tools/front_end/example_outputs/congress_page_example.json index a8dd02a7..c44bc6ed 100644 --- a/forecasting_tools/front_end/example_outputs/congress_page_example.json +++ b/forecasting_tools/front_end/example_outputs/congress_page_example.json @@ -82,96 +82,93 @@ ], "ai_model": "openrouter/anthropic/claude-opus-4.5" }, - "research_summary": "The current U.S. AI regulatory landscape is characterized by a fundamental tension: the federal government has adopted a strongly deregulatory posture, while states have enacted over 100 AI laws creating genuine compliance complexity for businesses and uncertain protections for consumers. This fragmentation serves neither innovation nor safety well.\n\nEvidence of algorithmic bias in consequential decisions is substantial and growing. Multiple lawsuits\u2014including Mobley v. Workday involving 1.1 billion processed applications\u2014allege systematic discrimination against protected classes by AI hiring tools. Research demonstrates that AI systems can amplify racial bias, with compliance rates up to 90% when people follow biased AI recommendations. There is a meaningful probability that plaintiffs will prevail in at least one major AI discrimination lawsuit (52% [^1]), which would establish important precedents but cannot substitute for proactive regulatory standards.\n\nThe federal-state conflict over AI regulation is likely to produce continued uncertainty rather than resolution. The Trump administration's AI Litigation Task Force faces significant constitutional barriers\u2014executive orders cannot preempt state laws absent congressional action\u2014and there is only limited probability of successful preemption through litigation by end of 2026 (18% [^2]). Meanwhile, Congress is unlikely to pass comprehensive AI legislation in the near term (22% by end of 2027 [^3]), leaving businesses navigating an evolving patchwork of state requirements.\n\nFrontier AI systems present different risk profiles than narrow applications. While current LLMs appear insufficient for catastrophic autonomous harms, documented incidents are increasing rapidly (56% year-over-year growth), and there is moderate probability of a significant safety incident involving frontier AI by 2027 (28% [^4]). This argues for calibrated safety requirements rather than either regulatory abstention or overly prescriptive mandates that cannot adapt to rapidly evolving capabilities.\n\nThe EU AI Act creates compliance pressure on U.S. companies and establishes an alternative regulatory model. While complete market withdrawal is unlikely, there is meaningful probability (22% [^5]) that at least one major U.S. AI company publicly declines to deploy specific products in the EU, which would signal genuine regulatory friction. More importantly, the EU's risk-based framework demonstrates that innovation and accountability can coexist\u2014companies are adapting rather than abandoning European markets.", + "research_summary": "The current U.S. AI regulatory landscape is characterized by a significant policy vacuum at the federal level, a patchwork of state laws, and a recent executive effort to preempt state regulations\u2014creating substantial legal and compliance uncertainty for businesses and citizens alike.\n\n**Federal Status Quo:** The United States currently has no comprehensive federal AI legislation. The Biden administration's October 2023 Executive Order on safe, secure AI was rescinded by President Trump in January 2025, replaced with an \"innovation-first\" approach through Executive Order 14179 (\"Removing Barriers to American Leadership in AI\"). In December 2025, Trump issued Executive Order 14365 establishing a national AI policy framework that explicitly seeks to preempt state AI laws, creating a DOJ \"AI Litigation Task Force\" to challenge state regulations and authorizing the withholding of federal broadband funds from states with \"onerous\" AI laws. However, a 99-1 Senate vote in July 2025 rejected a proposed 10-year federal moratorium on state AI laws, demonstrating bipartisan congressional resistance to full preemption.\n\n**State-Level Activity:** In the absence of federal legislation, states have taken the lead. In 2025, 38 states enacted approximately 100 AI-related laws, with major legislation including: Colorado's AI Act (requiring impact assessments and anti-discrimination measures for high-risk AI systems, delayed to June 2026); California's Transparency in Frontier AI Act (SB 53, effective January 2026, requiring safety frameworks, incident reporting, and whistleblower protections for frontier models trained above 10\u00b2\u2076 FLOPS with penalties up to $1 million); Texas's TRAIGA (banning harmful AI uses); Illinois' amendment to the Human Rights Act making AI-driven discriminatory employment decisions civil rights violations; and New York's RAISE Act requiring safety plans for frontier models. States have also enacted targeted laws on AI therapy chatbots following youth suicides, deepfakes, and AI in hiring (such as NYC's Local Law 144 requiring bias audits for automated employment decision tools).\n\n**Evidence of AI Harms:** Research documents systematic algorithmic discrimination across sectors. In hiring, Amazon's 2014 CV-screening algorithm penalized female candidates; recent Stanford research found LLMs portray women as younger and less experienced. The COMPAS recidivism algorithm incorrectly classified Black defendants as high-risk at nearly twice the rate of white defendants (45% vs. 23%). Facial recognition systems show error rates of 0.8% for light-skinned males versus 34.7% for dark-skinned females. In healthcare, algorithms that train on historically biased data have incorrectly concluded Black patients are healthier than equally sick white patients. Tragic cases involving AI therapy chatbots\u2014including at least two youth suicides\u2014have catalyzed state action on mental health AI.\n\n**Frontier AI Risks:** The UK government and independent researchers have identified significant frontier AI risks including: facilitation of cyber-attacks (with the first AI-orchestrated cyber espionage campaign intercepted in 2025); potential to assist CBRN threats; generation of misinformation and deepfakes; and healthcare AI producing dangerous \"hallucinations.\" The 2025 AI Safety Index found major companies like xAI and Meta lack adequate commitments on monitoring and safety research, while companies like DeepSeek lack publicly available safety documentation.\n\n**Economic and Innovation Considerations:** AI is projected to contribute $15.7 trillion to the global economy by 2030, with generative AI potentially increasing U.S. GDP by 1.5% by 2035 and reducing federal deficits by $400 billion over the 2026-2035 budget window. However, research from the University of Illinois found that AI regulation has negatively impacted innovation\u2014primarily due to regulatory fragmentation and uncertainty rather than regulation itself. The EU AI Act's compliance costs may divert resources from R&D, particularly for smaller firms. Yet shareholders view AI regulation favorably, as compliance reduces corporate risk. Public concern remains high, with 72% of U.S. adults expressing concerns about AI in 2025.\n\n**International Comparison:** The EU AI Act provides a comprehensive, risk-based framework with enforceable requirements entering force through 2027, though the European Commission's \"Digital Omnibus\" proposal now seeks to delay some provisions. China has enacted mandatory AI labeling rules and a national AI Safety Governance Framework. The U.S. approach\u2014currently emphasizing deregulation and innovation\u2014contrasts sharply with these frameworks, creating potential competitiveness tradeoffs in both directions.", "decision_criteria": [ - "Civil Rights Protection", - "Coherence", - "Proportionality", - "Federalism", - "Democratic Accountability", - "Transparency", - "Preserving Benefits", - "Implementation Feasibility" + "Protection of Civil Rights and Prevention of Algorithmic Discrimination", + "Safety from Catastrophic and Severe Harms", + "Preservation of Democratic Accountability and Transparency", + "Supporting Beneficial Innovation and Economic Competitiveness", + "Implementation Feasibility and Regulatory Coherence", + "Respect for Federalism and Appropriate Distribution of Authority" ], "forecasts": [ { "footnote_id": 1, - "question_title": "Major AI Discrimination Lawsuit Outcome", - "question_text": "Will plaintiffs prevail (via settlement of $10 million or more, or court judgment in their favor) in at least one of the major pending AI hiring discrimination lawsuits (Mobley v. Workday, Harper v. Sirius XM, or the Eightfold AI lawsuit) by December 31, 2027?", - "resolution_criteria": "Resolves YES if any defendant pays $10M+ settlement or court issues favorable plaintiff judgment on discrimination claims; NO if all dismissed, resolved for under $10M combined, or remain pending.", - "prediction": "52%", - "reasoning": "The Mobley case has demonstrated viability by surviving multiple motions to dismiss and achieving conditional collective certification, which typically creates significant settlement pressure given the 1.1 billion applications at stake. The EEOC's supportive amicus brief signals regulatory alignment. However, Workday's legal arguments on ADEA applicant coverage have some circuit court precedent support, creating genuine doctrinal uncertainty. The parallel lawsuits increase probability that at least one succeeds. The enormous potential liability typically drives settlements even in uncertain cases, but defendants may prefer litigation to avoid precedent-setting. I weight this slightly above 50% because certified class actions historically settle at high rates and defendants face existential exposure.", + "question_title": "Colorado AI Act Enforcement by End of 2026", + "question_text": "As of December 31, 2026, will Colorado be actively enforcing its AI Act (SB 24-205) against at least one entity for violations related to algorithmic discrimination or failure to conduct required impact assessments?", + "resolution_criteria": "Resolves YES if by December 31, 2026, the Colorado Attorney General's office or relevant state agency has publicly announced at least one enforcement action against any entity specifically citing violations of Colorado's AI Act. Resolution via Colorado AG press releases at https://coag.gov/news-releases/", + "prediction": "35%", + "reasoning": "Colorado's AI Act was delayed to June 30, 2026, leaving only 6 months for enforcement before the resolution date. The Trump administration's Executive Order 14365 specifically targets Colorado's law, and the DOJ AI Litigation Task Force was created to challenge such state laws. However, executive orders cannot preempt state law without congressional authorization, which Congress rejected 99-1. Colorado has a track record of consumer protection enforcement, but first enforcement actions typically take 12-18 months after a law takes effect. The compressed timeline is the primary factor reducing probability.", "key_sources": [ - "Court docket Mobley v. Workday (N.D. Cal.)", - "JD Supra legal analysis", - "HR Dive reporting", - "Stanford HAI AI Index" + "JD Supra legal analyses", + "White House Executive Orders", + "Colorado AG office", + "https://coag.gov/news-releases/" ] }, { "footnote_id": 2, - "question_title": "State AI Law Preemption Success", - "question_text": "Will the Trump administration's AI Litigation Task Force successfully obtain at least one federal court ruling that invalidates a state AI law on preemption or constitutional grounds by December 31, 2026?", - "resolution_criteria": "Resolves YES if federal court strikes down, enjoins, or declares unconstitutional any state AI law based on federal preemption or First Amendment grounds as result of DOJ Task Force litigation; NO otherwise.", - "prediction": "18%", - "reasoning": "Constitutional doctrine clearly establishes that executive orders cannot directly preempt state laws\u2014only Congress can do so under the Supremacy Clause. The 99-1 Senate vote against an AI moratorium signals Congress will not provide statutory backing for preemption. The novel legal theories available (FTC Act implied preemption, First Amendment compelled speech challenges) lack established precedent. Litigation timelines make final rulings unlikely by end of 2026. Bipartisan state opposition (both DeSantis and Newsom) suggests even sympathetic jurisdictions may hesitate. However, aggressive DOJ litigation could produce preliminary injunctions or favorable rulings in some jurisdictions, which prevents negligible probability.", + "question_title": "Federal AI Legislation Passage by 2027", + "question_text": "Will the U.S. Congress pass, and the President sign into law, comprehensive federal AI legislation that establishes binding requirements for either frontier AI developers OR high-risk AI applications in hiring, lending, or healthcare by December 31, 2027?", + "resolution_criteria": "Resolves YES if federal legislation is enacted that applies specifically to AI, establishes mandatory compliance requirements with enforcement mechanisms, and addresses either frontier models or high-risk applications in employment, credit, or healthcare. Resolution via Congress.gov search for enacted legislation.", + "prediction": "30%", + "reasoning": "No comprehensive federal AI legislation has passed despite numerous bills introduced. The current administration favors deregulation and \"minimal burden\" approaches. However, the 99-1 Senate vote rejecting full preemption shows bipartisan concern about AI oversight. Sector-specific legislation (particularly around AI and children, as with the GUARD Act) has better prospects than comprehensive regulation. Historical base rates suggest major technology regulation takes 5-10 years from widespread recognition of need. The 30% reflects possibility of narrower binding legislation rather than comprehensive framework.", "key_sources": [ - "Phillips Lytle legal analysis", - "White House executive order text", - "Congressional Record on Senate vote", - "Constitutional law commentary" + "Congress.gov", + "White House AI Action Plan", + "JD Supra legal analyses" ] }, { "footnote_id": 3, - "question_title": "Federal AI Legislation Passage", - "question_text": "Will the United States Congress pass comprehensive federal AI legislation and have it signed into law by December 31, 2027?", - "resolution_criteria": "Resolves YES if federal legislation creating new binding AI requirements applying broadly across multiple sectors is enacted; narrow legislation addressing only one application does not count.", - "prediction": "22%", - "reasoning": "Congress passed zero comprehensive AI bills in 2024-2025 despite 150+ proposals, consistent with broader pattern of congressional gridlock on technology regulation. The current administration strongly favors deregulation and would likely oppose comprehensive legislation. However, international pressure from EU compliance requirements, accumulating evidence from lawsuits, and potential safety incidents could shift dynamics. The issue has unprecedented salience and state fragmentation creates genuine business demand for federal clarity. I weight this above historical base rates (~10-15%) because of unique pressures but well below even odds given demonstrated inability to advance legislation.", + "question_title": "FTC or EEOC AI Discrimination Enforcement by 2026", + "question_text": "Will the Federal Trade Commission (FTC) or the Equal Employment Opportunity Commission (EEOC) announce at least two enforcement actions specifically citing AI or algorithmic systems as contributing to discrimination or unfair practices by December 31, 2026?", + "resolution_criteria": "Resolves YES if by December 31, 2026, the FTC or EEOC has publicly announced at least two separate enforcement actions where official materials specifically identify AI, algorithmic systems, or automated decision-making as a factor in the alleged discrimination. Resolution via FTC press releases (https://www.ftc.gov/news-events/news/press-releases) and EEOC press releases (https://www.eeoc.gov/newsroom).", + "prediction": "25%", + "reasoning": "The FTC vacated its 2024 consent order against Rytr explicitly citing the Trump administration's AI Action Plan, signaling reluctance to pursue AI enforcement. The administration's \"America's AI Action Plan\" calls for reducing AI-related enforcement seen as stifling innovation. However, child protection enforcement remains an exception, and EEOC operates with some independence. Historical base rate of AI-specific enforcement actions is approximately 1-2 per year. The requirement for two actions citing AI discrimination within the timeframe is difficult given current enforcement priorities.", "key_sources": [ - "Brennan Center AI legislation tracker", - "American Action Forum", - "Congressional Research Service" + "FTC press releases", + "America's AI Action Plan", + "JD Supra legal analyses", + "https://www.ftc.gov/news-events/news/press-releases", + "https://www.eeoc.gov/newsroom" ] }, { "footnote_id": 4, - "question_title": "Frontier AI Safety Incident", - "question_text": "Will a widely-reported incident occur by December 31, 2027 where a frontier AI system from a major developer is credibly implicated in causing significant harm (loss of life, critical infrastructure disruption, or $100M+ cyberattack damage)?", - "resolution_criteria": "Resolves YES if credible major news reporting documents incident meeting harm criteria with frontier AI playing material contributing role per independent expert analysis; NO otherwise.", - "prediction": "28%", - "reasoning": "AI incidents are accelerating rapidly (56% year-over-year growth, malicious AI use up 8x since 2022), and frontier capabilities continue expanding. However, the threshold is high\u2014no incident has clearly met it to date. Major developers maintain safety testing, and attribution to specific frontier systems is often difficult. The 2-year horizon provides meaningful time for an incident to occur, and integration into healthcare/cybersecurity creates plausible pathways. Research showing 70% probability of catastrophic responses in multi-turn conversations indicates technical vulnerability exists. I weight this at 28%\u2014above historical base rate of zero qualifying incidents but reflecting substantial uncertainty about whether theoretical risks materialize.", + "question_title": "Major AI Safety Incident by End of 2026", + "question_text": "By December 31, 2026, will there be a publicly documented incident where an AI system is officially attributed by a U.S. government agency as a primary or significant contributing cause of at least $100 million in damages, 10+ deaths, or a major critical infrastructure disruption?", + "resolution_criteria": "Resolves YES if a U.S. federal government agency publicly releases a report or statement attributing a major incident meeting the specified thresholds to an AI system. Resolution requires review of official government reports from DHS, CISA, FBI, NTSB, or relevant sector regulators.", + "prediction": "15%", + "reasoning": "While AI-related harms are increasing (documented youth suicides, intercepted AI cyber espionage, healthcare AI errors), official government attribution of a major incident specifically to AI faces high barriers. Attribution is methodologically challenging\u2014incidents often involve human actors using AI tools. Government agencies are politically and legally cautious about such attributions. Historical precedent (Boeing 737 MAX took years for official automation attribution despite clear evidence) suggests official attribution within the timeframe is rare. The threshold ($100M, 10+ deaths, critical infrastructure) is substantial.", "key_sources": [ - "Stanford HAI AI Index 2025", - "AIID database", - "Time Magazine AI incident reporting", - "Responsible AI Labs analysis" + "UK government frontier AI risk papers", + "AI Safety Index", + "healthcare technology hazard reports" ] }, { "footnote_id": 5, - "question_title": "EU-US Regulatory Divergence Impact", - "question_text": "By December 31, 2027, will at least one major U.S.-headquartered AI company (market cap over $100 billion) publicly announce it will not deploy a frontier AI product in the EU market specifically due to EU AI Act compliance requirements?", - "resolution_criteria": "Resolves YES if qualifying company makes official public statement that specific AI product will not be offered in EU due to AI Act compliance concerns; NO otherwise.", - "prediction": "22%", - "reasoning": "Major companies historically maintain EU market presence despite regulatory burdens\u2014GDPR did not trigger withdrawals. The EU market is economically too significant to abandon entirely. However, specific product non-deployment (not full withdrawal) is plausible given prohibited practices under the AI Act (certain biometric systems), and companies have become more willing to publicly criticize regulation. Meta previously delayed EU launches. Compliance costs ($200-400M annually) and 58% of developers reporting regulation-driven delays suggest genuine friction. A public announcement would be strategically costly but could serve political purposes. I weight this at 22%\u2014above negligible because partial product withdrawals with public statements are possible, but well below even odds because complete market exit is economically irrational.", + "question_title": "Frontier AI Lab Safety Framework Adoption", + "question_text": "By December 31, 2026, will at least 4 of the 6 leading frontier AI labs (OpenAI, Anthropic, Google DeepMind, Meta AI, xAI, Mistral) have publicly committed to and published implementation details for third-party pre-deployment safety evaluations of their most capable models?", + "resolution_criteria": "Resolves YES if at least 4 of the 6 named companies have publicly committed to pre-deployment safety evaluations by independent third parties AND published documentation describing scope, methodology, or results of at least one such evaluation. Resolution via company official publications, blogs, and safety reports.", + "prediction": "40%", + "reasoning": "Anthropic, Google DeepMind, and likely OpenAI already meet or are close to meeting the criteria, given their safety focus and regulatory engagement. However, Meta lacks documented commitments on monitoring and control; xAI has minimal safety investment documented; Mistral as a European open-source focused company has less safety infrastructure. Reaching 4 of 6 requires one of these three to significantly upgrade commitments. California's SB 53 effective January 2026 creates pressure for companies with California operations, but not all named companies have significant California presence. Competitive dynamics are mixed\u2014safety could be advantage or burden depending on market.", "key_sources": [ - "CCIA EU Digital Regulation analysis", - "ACT/CEPS compliance cost studies", - "Modulos AI analysis", - "EU AI Act text" + "AI Safety Index 2025", + "company safety reports", + "California SB 53 requirements" ] } ], - "proposal_markdown": "### Executive Summary\n\nThe United States should pursue a **sector-specific, risk-proportionate federal regulatory framework** for AI that establishes clear accountability standards for high-risk applications while preserving state authority to protect civil rights and avoiding one-size-fits-all approaches that would either stifle innovation or leave serious harms unaddressed. The single most important action is to **pass federal legislation requiring transparency, bias testing, and meaningful recourse for individuals affected by AI systems making consequential decisions in employment, lending, healthcare, and housing**\u2014areas where algorithmic discrimination is documented and existing civil rights frameworks provide clear precedent.\n\n### Analysis\n\nThe current U.S. AI regulatory landscape is characterized by a fundamental tension: the federal government has adopted a strongly deregulatory posture, while states have enacted over 100 AI laws creating genuine compliance complexity for businesses and uncertain protections for consumers. This fragmentation serves neither innovation nor safety well.\n\nEvidence of algorithmic bias in consequential decisions is substantial and growing. Multiple lawsuits\u2014including Mobley v. Workday involving 1.1 billion processed applications\u2014allege systematic discrimination against protected classes by AI hiring tools. Research demonstrates that AI systems can amplify racial bias, with compliance rates up to 90% when people follow biased AI recommendations. There is a meaningful probability that plaintiffs will prevail in at least one major AI discrimination lawsuit (52% [^1]), which would establish important precedents but cannot substitute for proactive regulatory standards.\n\nThe federal-state conflict over AI regulation is likely to produce continued uncertainty rather than resolution. The Trump administration's AI Litigation Task Force faces significant constitutional barriers\u2014executive orders cannot preempt state laws absent congressional action\u2014and there is only limited probability of successful preemption through litigation by end of 2026 (18% [^2]). Meanwhile, Congress is unlikely to pass comprehensive AI legislation in the near term (22% by end of 2027 [^3]), leaving businesses navigating an evolving patchwork of state requirements.\n\nFrontier AI systems present different risk profiles than narrow applications. While current LLMs appear insufficient for catastrophic autonomous harms, documented incidents are increasing rapidly (56% year-over-year growth), and there is moderate probability of a significant safety incident involving frontier AI by 2027 (28% [^4]). This argues for calibrated safety requirements rather than either regulatory abstention or overly prescriptive mandates that cannot adapt to rapidly evolving capabilities.\n\nThe EU AI Act creates compliance pressure on U.S. companies and establishes an alternative regulatory model. While complete market withdrawal is unlikely, there is meaningful probability (22% [^5]) that at least one major U.S. AI company publicly declines to deploy specific products in the EU, which would signal genuine regulatory friction. More importantly, the EU's risk-based framework demonstrates that innovation and accountability can coexist\u2014companies are adapting rather than abandoning European markets.\n\n### Recommendations\n\n**1. Enact Federal Anti-Discrimination Standards for High-Risk AI Applications**\n\nCongress should pass legislation requiring deployers of AI systems used in employment, lending, healthcare, and housing decisions to: (a) conduct and document bias testing before deployment, (b) provide meaningful notice to affected individuals that AI is involved in decisions about them, (c) establish processes for individuals to challenge adverse decisions and receive human review, and (d) maintain records enabling regulatory enforcement. This addresses documented harms (supporting Civil Rights Protection criterion), provides clear compliance standards (Coherence criterion), and targets actual high-risk uses rather than all AI (Proportionality criterion).\n\nThe probability of meaningful plaintiff victories in pending discrimination lawsuits (52% [^1]) demonstrates both the legal uncertainty companies face and the inadequacy of purely litigation-based accountability. Proactive standards would provide clarity for responsible businesses while deterring harmful practices.\n\n**2. Preserve State Authority for Consumer Protection**\n\nFederal legislation should explicitly disclaim preemption of state laws providing greater consumer protection, similar to the approach in federal environmental and consumer protection statutes. Given the constitutional barriers to executive preemption (18% success probability [^2]) and the 99-1 Senate vote against a moratorium on state enforcement, Congress should affirm rather than restrict states' traditional role as \"laboratories of democracy.\" This supports both Federalism (Coherence criterion) and Democratic Accountability (Transparency criterion).\n\n**3. Establish Tiered Transparency Requirements for Frontier AI**\n\nFor frontier AI systems (above defined compute thresholds), developers should be required to: (a) publish model cards describing capabilities, limitations, and safety evaluations, (b) report significant safety incidents to a designated federal agency within 15 days, and (c) maintain documentation of safety testing procedures. These requirements mirror California's SB 53 (now in effect) and create federal standards that reduce rather than add to compliance fragmentation. The meaningful probability of a significant frontier AI safety incident (28% [^4]) justifies transparency requirements that enable both regulatory response and public understanding.\n\n**4. Create an AI Regulatory Sandbox Program**\n\nFederal agencies should establish regulatory sandboxes allowing companies to test innovative AI applications under supervisory oversight with temporary compliance flexibility, following the model adopted by Texas's TRAIGA. This supports Innovation (Preserving Benefits criterion) while maintaining accountability, and could help resolve the tension between innovation and precaution that characterizes current debates.\n\n**5. Strengthen Enforcement Resources for Existing Agencies**\n\nRather than creating a new AI regulator, Congress should appropriate dedicated resources for AI enforcement to the FTC, EEOC, and sector-specific regulators (FDA, HUD, CFPB). These agencies have established expertise and statutory authority that can be applied to AI systems. Enforcement capacity is essential\u2014well-designed rules fail without implementation resources (Implementation Feasibility criterion).\n\n### Risks and Uncertainties\n\n**Risk of Regulatory Capture or Inadequate Enforcement**: Industry influence could weaken standards or reduce enforcement resources. The FTC's vacating of its consent order against Rytr LLC following the AI Action Plan illustrates how agency priorities can shift. Mitigation: Include private rights of action for civil rights violations and mandatory enforcement reporting.\n\n**Risk of Technological Change Outpacing Regulation**: AI capabilities are advancing rapidly; regulations based on current architectures may become obsolete. The substantial uncertainty in all forecasts reflects genuine unpredictability. Mitigation: Build in regular review mechanisms and sunset provisions requiring congressional reauthorization.\n\n**Risk of Fragmented International Compliance**: Regulatory divergence with the EU creates compliance burdens and potential competitive issues. The probability of at least one major company declining EU deployment (22% [^5]) suggests genuine friction. Mitigation: Pursue mutual recognition agreements and prioritize interoperability in standards development.\n\n**Risk of Insufficient State Coordination**: Without federal standards, state approaches may diverge significantly, creating genuine compliance challenges. However, premature federal preemption could eliminate beneficial state innovations. This is the area of greatest forecast uncertainty\u2014whether the federal-state conflict will be resolved through courts, legislation, or accommodation remains genuinely unclear.\n\n**Risk of Safety Incidents Triggering Overcorrection**: If a significant AI safety incident occurs (28% probability [^4]), the political response might include poorly designed restrictions that harm beneficial applications. Mitigation: Proactive development of evidence-based safety standards creates framework for measured response.", + "proposal_markdown": "### Executive Summary\n\nThe United States should establish a tiered, risk-based federal AI regulatory framework that provides clear national standards for high-risk applications while preserving meaningful state authority to address local concerns and experiment with regulatory approaches. The single most important action is to create mandatory transparency and anti-discrimination requirements for AI systems used in consequential decisions affecting individuals' employment, credit, housing, and healthcare\u2014areas where documented algorithmic discrimination is substantial and harms are immediate.\n\n### Analysis\n\nThe current U.S. approach to AI regulation\u2014characterized by federal inaction, a patchwork of over 100 state laws across 38 states, and active federal efforts to preempt state authority\u2014is unsustainable and harmful to all stakeholders. Businesses face genuine compliance uncertainty from navigating 50 different regulatory regimes, while citizens lack meaningful protections against documented AI harms including systematic discrimination in hiring, lending, and healthcare decisions.\n\nThe evidence of algorithmic discrimination is compelling: recidivism algorithms that incorrectly classify Black defendants as high-risk at nearly twice the rate of white defendants; facial recognition systems with error rates 40 times higher for dark-skinned women than light-skinned men; hiring algorithms that penalize women; and healthcare algorithms that systematically underestimate Black patients' medical needs. These are not speculative harms\u2014they are happening now and affecting millions of Americans. Yet federal enforcement has been minimal, with the FTC actually reversing AI-related enforcement actions to align with the administration's deregulatory agenda. The probability of meaningful federal AI discrimination enforcement remains low in the near term (25% [^3]).\n\nSimultaneously, frontier AI systems pose emerging risks that warrant attention. The first AI-orchestrated cyber espionage campaign was intercepted in 2025, AI chatbots have been linked to youth suicides, and researchers document significant gaps between AI companies' stated safety commitments and their actual practices. While a major AI catastrophe meeting high thresholds remains unlikely in the near term (15% [^4]), the technology is advancing rapidly and precautionary measures are warranted.\n\nThe current federal-state conflict is particularly counterproductive. The Trump administration's Executive Order threatening to defund states with AI regulations lacks clear constitutional authority and has been rejected by Congress on a 99-1 vote. Yet state enforcement faces significant headwinds, with Colorado's AI Act survival uncertain (35% [^1]) despite strong state-level support. This leaves a regulatory vacuum that serves neither innovation nor safety.\n\nComprehensive federal AI legislation within the next two years remains unlikely (30% [^2]) given congressional gridlock and administration opposition. However, sector-specific legislation\u2014particularly around AI and children or healthcare\u2014has better prospects. Meanwhile, voluntary industry commitments to meaningful third-party safety evaluations remain incomplete, with only 40% probability that a majority of frontier labs will implement robust third-party pre-deployment evaluations by end of 2026 [^5].\n\nThe optimal policy path forward is not a choice between innovation and regulation but rather smart regulation that provides clarity, addresses documented harms, and scales requirements proportionate to risks. Research indicates that regulatory fragmentation\u2014not regulation itself\u2014is the primary drag on innovation. A clear federal framework that establishes minimum standards while allowing states to address emerging harms through experimentation could actually reduce compliance costs while improving protections.\n\n### Recommendations\n\n#### Recommendation 1: Establish Federal Anti-Discrimination Requirements for High-Risk AI Applications\n\n**The Recommendation:** Congress should pass legislation requiring deployers of AI systems used in employment, credit, housing, and healthcare decisions to: (a) conduct and publish bias impact assessments prior to deployment; (b) provide notice to affected individuals that AI is being used in decisions affecting them; (c) enable affected individuals to request human review of AI-assisted decisions; and (d) maintain records sufficient for regulatory audit.\n\n**Why I Support It:** The evidence of algorithmic discrimination in these domains is substantial and well-documented. Current enforcement under existing civil rights laws has been minimal, and voluntary measures have proven insufficient. This recommendation directly addresses my highest-priority criterion\u2014protection of civil rights\u2014while using established regulatory mechanisms (notice, audit, human review) with proven effectiveness.\n\n**Decision Criteria Addressed:** Protection of Civil Rights (#1), Democratic Accountability and Transparency (#3), Implementation Feasibility (#5)\n\n**Implementation Plan:**\n1. **Legislative Phase (Year 1):** Congress passes the AI Civil Rights Act establishing requirements for \"high-risk AI systems\" defined by use in consequential decisions in employment, credit, housing, or healthcare. The FTC and EEOC receive joint enforcement authority with clear jurisdictional boundaries.\n\n2. **Rulemaking Phase (Months 12-18):** FTC and EEOC conduct joint rulemaking to define: (a) specific bias assessment methodologies acceptable for compliance; (b) notice requirements and formats; (c) human review procedures; (d) recordkeeping requirements; (e) safe harbor provisions for companies meeting specific standards.\n\n3. **Implementation Phase (Months 18-30):** \n- Large enterprises (>500 employees or $100M revenue using AI in covered domains) must comply within 18 months\n- Mid-size enterprises comply within 24 months \n- Small businesses receive technical assistance and 30-month compliance timeline\n- FTC establishes compliance guidance portal with templates and best practices\n\n4. **Enforcement Phase (Year 3+):** \n- Initial 12-month focus on education and compliance assistance\n- Civil penalties up to $50,000 per violation for willful noncompliance\n- Private right of action available after exhaustion of administrative remedies\n- Regular public reporting on enforcement activities and outcomes\n\n**Relevant Forecasts:** Federal enforcement of AI discrimination is currently unlikely (25% [^3]) under existing authority, underscoring the need for explicit legislative mandate. Comprehensive legislation faces headwinds (30% [^2]), but sector-specific civil rights legislation has better bipartisan prospects.\n\n---\n\n#### Recommendation 2: Create Federal Minimum Safety Standards for Frontier AI with State Flexibility\n\n**The Recommendation:** Establish federal minimum safety requirements for frontier AI systems (defined by compute threshold, similar to California's SB 53) including pre-deployment risk assessments, incident reporting, and cybersecurity standards\u2014while explicitly preserving state authority to enact stronger requirements.\n\n**Why I Support It:** Frontier AI systems pose risks that cross state boundaries and may require coordinated national response. However, federal preemption of all state AI laws would eliminate valuable policy experimentation and remove protections for citizens in states that have acted. This balanced approach provides baseline consistency while preserving federalism.\n\n**Decision Criteria Addressed:** Safety from Catastrophic Harms (#2), Innovation and Competitiveness (#4), Implementation Feasibility (#5), Respect for Federalism (#6)\n\n**Implementation Plan:**\n1. **Definition Phase:** Congress defines \"frontier AI systems\" using objective metrics (compute thresholds, capability evaluations) with provisions for NIST to update thresholds as technology evolves.\n\n2. **Requirements Phase:** Frontier AI developers must:\n- Conduct pre-deployment safety evaluations including red-teaming for dangerous capabilities\n- Implement cybersecurity measures meeting CISA standards\n- Report safety incidents to a designated federal agency within 72 hours\n- Maintain whistleblower protections for employees reporting safety concerns\n- Publish annual safety reports summarizing evaluation methodologies and findings\n\n3. **Federal-State Coordination:**\n- Federal standards establish a floor, not a ceiling\n- States may enact additional requirements beyond federal minimums\n- Federal preemption applies only to direct conflicts, not supplementary requirements\n- Establish federal-state coordination council to share information and align approaches\n\n4. **Enforcement:**\n- Commerce Department or new AI Safety Agency has primary enforcement authority\n- Penalties up to $1 million per violation, with consideration of company size and intent\n- No private right of action for frontier AI safety requirements (to prevent litigation-driven development)\n- Safe harbor for companies meeting voluntary third-party evaluation standards\n\n**Relevant Forecasts:** Only 40% probability that a majority of frontier labs will implement robust third-party safety evaluations voluntarily [^5], suggesting mandatory requirements may be necessary. Major AI safety incidents remain unlikely but consequential (15% [^4]), supporting proportionate precautionary measures.\n\n---\n\n#### Recommendation 3: Establish Federal-State AI Regulatory Coordination Framework\n\n**The Recommendation:** Create a formal Federal-State AI Regulatory Council to harmonize requirements, share enforcement information, and provide compliance guidance\u2014replacing the current adversarial relationship with cooperative federalism.\n\n**Why I Support It:** The current approach\u2014federal threats to defund states and litigation to block state laws\u2014is counterproductive, legally questionable, and harmful to both innovation and protection. A coordination framework can reduce compliance complexity while preserving state flexibility, addressing both business concerns and civil liberties considerations.\n\n**Decision Criteria Addressed:** Implementation Feasibility (#5), Respect for Federalism (#6), Innovation and Competitiveness (#4)\n\n**Implementation Plan:**\n1. **Council Structure:**\n- Federal representatives: Commerce, FTC, DOJ, EEOC, sector regulators\n- State representatives: 10 rotating state AGs or designated officials\n- Technical advisors: NIST, academic experts\n- Industry and civil society observers (non-voting)\n\n2. **Council Functions:**\n- Develop model state AI legislation for voluntary adoption\n- Identify areas where federal uniformity is necessary vs. where state experimentation is valuable\n- Create mutual recognition agreements for compliance certifications\n- Establish information-sharing protocols on AI incidents and enforcement\n- Publish annual report on AI regulatory landscape and recommendations\n\n3. **Compliance Support:**\n- Develop single compliance portal where businesses can understand federal and state requirements\n- Create compliance templates and guidance documents\n- Establish small business AI compliance assistance program\n- Maintain database of approved third-party auditors and evaluators\n\n4. **Conflict Resolution:**\n- Council provides forum for resolving federal-state conflicts through negotiation\n- Formal dispute resolution mechanism before litigation\n- Clear criteria for when federal preemption is appropriate (direct conflict, interstate commerce necessity)\n\n**Relevant Forecasts:** State enforcement faces uncertainty (35% [^1] that Colorado will enforce by end of 2026) partly due to federal-state conflict. Coordination framework would reduce this uncertainty and improve regulatory effectiveness regardless of outcome.\n\n---\n\n#### Recommendation 4: Targeted Child Safety Requirements with Expedited Implementation\n\n**The Recommendation:** Enact immediate federal requirements for AI systems that interact directly with minors, including mandatory disclosure that the user is interacting with AI, crisis detection and referral systems, and prohibition on AI systems providing mental health advice to minors without professional oversight.\n\n**Why I Support It:** Documented harms to children from AI chatbots\u2014including multiple suicides\u2014represent clear and present dangers that warrant urgent action. Child safety is an area where the current administration has explicitly stated it will not preempt state action, creating opportunity for bipartisan legislation. This addresses immediate harms while broader regulatory frameworks are developed.\n\n**Decision Criteria Addressed:** Safety from Severe Harms (#2), Protection of Civil Rights (#1), Democratic Accountability (#3)\n\n**Implementation Plan:**\n1. **Immediate Requirements (30 days from enactment):**\n- All AI chatbots and virtual assistants must disclose they are AI at initiation of interaction\n- Systems that detect they are interacting with minors must provide hourly reminders they are not human\n- AI systems detecting language indicating self-harm or crisis must immediately provide National Suicide Prevention Lifeline (988) and pause interaction\n\n2. **90-Day Requirements:**\n- AI therapy/mental health chatbots may not provide services to users under 18 without licensed professional oversight\n- AI systems collecting data from known minors must implement data minimization practices\n- Parental notification systems must be available for AI interactions involving minors\n\n3. **180-Day Requirements:**\n- Third-party safety audits required for AI systems marketed to or frequently used by minors\n- Annual public reporting on child safety incidents and mitigation measures\n- FTC enforcement authority with penalties up to $100,000 per violation\n\n4. **Expedited Timeline Justification:**\n- Documented harms are ongoing\n- Basic disclosure requirements impose minimal technical burden\n- Industry leaders (e.g., OpenAI) have already implemented similar measures, demonstrating feasibility\n\n**Relevant Forecasts:** Federal AI legislation passage probability is higher for targeted child safety measures than comprehensive regulation (within 30% overall estimate [^2]). The GUARD Act and similar proposals demonstrate bipartisan interest.\n\n---\n\n### Risks and Uncertainties\n\n**Risk 1: Regulatory Capture and Weakened Implementation**\nThere is significant risk that industry lobbying will weaken requirements during rulemaking, as occurred with Colorado's AI Act delays. Strong legislative mandates with clear requirements can reduce rulemaking discretion, but implementation always involves tradeoffs. Mitigation: Include clear statutory minimums and mandate public rulemaking with civil society participation.\n\n**Risk 2: Federal-State Conflict Escalation**\nMy recommendations assume the current federal-state conflict can be resolved through coordination rather than coercion. If the administration continues pursuing preemption through litigation and funding threats, coordination frameworks may fail. The outcome of legal challenges to Executive Order 14365 represents significant uncertainty (reflected in 35% probability for Colorado enforcement [^1]).\n\n**Risk 3: Innovation Displacement**\nStringent requirements could push AI development to jurisdictions with less oversight. However, research suggests fragmentation rather than regulation itself is the primary concern, and U.S. companies must comply with EU AI Act requirements anyway for European market access. The $15.7 trillion projected AI contribution to global economy suggests the market opportunity will sustain innovation even with reasonable regulation.\n\n**Risk 4: Rapid Technological Change Outpacing Regulation**\nAI capabilities are advancing faster than regulatory frameworks can adapt. Requirements based on current technology may become obsolete or inappropriate. Mitigation: Build in adaptive mechanisms (NIST threshold updates, sunset provisions, required periodic review) and focus on outcome-based rather than technology-specific requirements.\n\n**Risk 5: Enforcement Resource Constraints**\nEffective enforcement requires adequate agency resources. Current FTC and EEOC staffing may be insufficient for expanded AI oversight. Budget constraints and competing priorities could leave regulations unenforced. The low probability of AI discrimination enforcement under current conditions (25% [^3]) partly reflects these constraints.\n\n**Widest Uncertainty:** The forecast with greatest uncertainty is federal AI legislation passage (30% [^2]), reflecting genuine unpredictability of congressional action given competing priorities, industry lobbying, and political dynamics. My recommendations are designed to be valuable regardless of whether comprehensive legislation passes, with immediate administrative and state-level actions available while pursuing federal legislation.", "key_recommendations": [ - "Enact Federal Anti-Discrimination Standards for High-Risk AI Applications", - "Preserve State Authority for Consumer Protection", - "Establish Tiered Transparency Requirements for Frontier AI", - "Create an AI Regulatory Sandbox Program", - "Strengthen Enforcement Resources for Existing Agencies" - ] + "Recommendation 1: Establish Federal Anti-Discrimination Requirements for High-Risk AI Applications", + "Recommendation 2: Create Federal Minimum Safety Standards for Frontier AI with State Flexibility", + "Recommendation 3: Establish Federal-State AI Regulatory Coordination Framework", + "Recommendation 4: Targeted Child Safety Requirements with Expedited Implementation" + ], + "price_estimate": 1.1386595 }, { "member": { @@ -187,90 +184,71 @@ ], "ai_model": "openrouter/openai/gpt-5.2" }, - "research_summary": "U.S. AI regulation remains a patchwork of (a) sector regulators using existing authority, (b) federal executive-branch policy (which has shifted across administrations), and (c) fast-moving state and local laws. A notable federal baseline for \u201ctrustworthy AI\u201d is **NIST\u2019s voluntary AI Risk Management Framework (AI RMF 1.0, Jan 2023)**, organized around *Govern / Map / Measure / Manage* and \u201ctrustworthy\u201d characteristics like validity, safety/security, accountability/transparency, explainability, privacy, and fairness (harmful bias managed). This RMF is widely referenced but not binding. Federal governance for **federal agency use of AI** tightened operationally through **OMB Memorandum M-25-21 (Apr 3, 2025)**, requiring Chief AI Officers, AI governance boards, public AI use-case inventories, and minimum practices for \u201chigh-impact AI\u201d (pre-deployment testing, impact assessments, monitoring, appeal/human review). (Sources: NIST AI RMF PDF; OMB M-25-21 PDF.)\n\nFor **frontier/foundation models**, federal policy has been unstable. **Executive Order 14110 (Oct 30, 2023)** required reporting and testing-related information for \u201cdual-use foundation models\u201d above compute thresholds (e.g., >10^26 FLOPs) and directed NIST work on red-teaming and standards, but it was rescinded in early 2025. The replacement posture under **EO 14179 (Jan 2025)** emphasized removing barriers to AI innovation, and then **EO 14,365 (Dec 11, 2025)** sought a \u201cnational policy framework,\u201d directing the Department of Commerce to identify \u201conerous\u201d state AI laws (report due **Mar 11, 2026**) and creating a **DOJ AI Litigation Task Force (Jan 9, 2026)** to challenge state laws via litigation rather than direct preemption. This is important: without congressional preemption, state laws remain enforceable until courts enjoin them. (Sources: EO 14110 text; EO 14179 PDF; White House EO 14,365 page; DOJ Task Force memo summaries.)\n\nAt the **state/local level**, two regulatory patterns dominate: (1) **\u201cHigh-risk AI / algorithmic discrimination\u201d frameworks** and (2) **frontier model transparency/safety reporting**. Colorado\u2019s **SB24-205** (effective **June 30, 2026**, delayed) is the most comprehensive \u201chigh-risk AI\u201d anti-discrimination law, imposing developer/deployer duties, impact assessments, consumer notices, and mitigation obligations for consequential decisions (housing, lending, employment, healthcare, etc.). In contrast, California\u2019s **SB 53 (effective Jan 1, 2026)** targets frontier developers (compute threshold ~10^26 FLOPs; \u201clarge\u201d if >$500M revenue), requiring a published safety framework and \u201ccritical safety incident\u201d reporting; it defines \u201ccatastrophic risk\u201d around >50 deaths/serious injuries or >$1B damages. New York\u2019s **RAISE Act** (effective Jan 1, 2027) similarly targets frontier models with safety protocols and rapid incident reporting (72 hours). In hiring, NYC\u2019s **Local Law 144** (enforcement since July 5, 2023) mandates annual \u201cbias audits\u201d and candidate notice for automated employment decision tools (AEDTs), but enforcement has been weak: a NY State Comptroller audit found DCWP received **only two complaints** (July 2023\u2013June 2025) and that independent review found **17 potential noncompliance instances** among 32 employers, versus DCWP\u2019s one. (Sources: Colorado SB24-205 page; CA SB53 compliance summaries; NY RAISE summaries; NYS Comptroller audit.)\n\nIn **healthcare**, FDA oversight for AI is comparatively mature but still evolving for generative AI. FDA has authorized **>1,200 AI-enabled medical devices** overall, yet **no generative AI/LLM devices were FDA-approved for clinical use as of early 2026** (per Health Affairs Scholar). FDA finalized **Predetermined Change Control Plan (PCCP)** guidance (Dec 2024) and released lifecycle draft guidance (Jan 2025). Meanwhile, FDA\u2019s **Jan 6, 2026 Clinical Decision Support (CDS) guidance** expanded \u201cenforcement discretion\u201d for some tools (including some generative AI-enabled CDS) if transparent and reviewable by clinicians, which may shift risk away from FDA premarket review toward post-market accountability and institutional governance. (Sources: Health Affairs Scholar article; FDA PCCP guidance summaries; Jan 2026 CDS guidance summaries.)", + "research_summary": "The U.S. currently regulates AI through a patchwork of sector-specific laws, general consumer protection/civil rights authorities, export controls, procurement rules, and state/local statutes\u2014rather than a single comprehensive federal AI law. A major recent federal shift is that President Biden\u2019s **Executive Order 14110** (Oct 2023) was **revoked** and replaced by President Trump\u2019s **EO 14179, \u201cRemoving Barriers to American Leadership in Artificial Intelligence\u201d** (Jan 23, 2025), which directs agencies to review and rescind actions seen as barriers and to develop an AI Action Plan (primary sources: White House EO page and Federal Register notice: https://www.whitehouse.gov/presidential-actions/2025/01/removing-barriers-to-american-leadership-in-artificial-intelligence/ ; https://www.federalregister.gov/documents/2025/01/31/2025-02172/removing-barriers-to-american-leadership-in-artificial-intelligence). This makes \u201cstatus quo\u201d governance more politically contingent: many safety ideas persist, but implementation intensity may fluctuate across administrations.\n\nFor government use of AI, OMB issued **M-24-10** in March 2024 (governance boards, Chief AI Officers, inventories, and minimum safeguards for \u201crights-\u201d and \u201csafety-impacting\u201d AI), but it was later **rescinded and replaced** by **OMB M-25-21** (April 3, 2025), which keeps significant governance and trust elements while emphasizing faster adoption and reduced bureaucratic burden (M-25-21 PDF: https://www.whitehouse.gov/wp-content/uploads/2025/02/M-25-21-Accelerating-Federal-Use-of-AI-through-Innovation-Governance-and-Public-Trust.pdf). This \u201cprocurement/government-use\u201d track matters because it can set de facto standards for vendors and create templates for private-sector compliance (contract clauses, documentation expectations, incident reporting, audit logs).\n\nOn technical standards and frontier-model safety science, NIST has built voluntary frameworks that are increasingly used as \u201creference points.\u201d The **NIST AI Risk Management Framework (AI RMF 1.0)** is a voluntary, lifecycle framework organized around GOVERN/MAP/MEASURE/MANAGE, and it defines \u201ctrustworthiness\u201d characteristics like safety, security, privacy, transparency, and fairness (AI RMF 1.0: https://nvlpubs.nist.gov/nistpubs/ai/nist.ai.100-1.pdf). NIST also published a **Generative AI Profile (NIST AI 600-1)** in July 2024 identifying GenAI-specific or amplified risks (e.g., confabulation, information integrity, privacy, IP) and mapping mitigations to RMF functions (GenAI Profile: https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf). In parallel, NIST\u2019s **U.S. AI Safety Institute (USAISI)** has pursued pre-/post-release evaluation partnerships\u2014e.g., agreements with OpenAI and Anthropic\u2014indicating a growing (though still largely voluntary) evaluation ecosystem (NIST announcement: https://www.nist.gov/news-events/news/2024/08/us-ai-safety-institute-signs-agreements-regarding-ai-safety-research).\n\nFor narrower, high-stakes applications (hiring, lending, healthcare), enforcement and guidance already exist under civil rights/consumer protection regimes. Examples: the **CFPB** has clarified that lenders using complex algorithms must still provide specific, accurate **adverse action reasons** under ECOA/Reg B\u2014no \u201cblack box\u201d exemption (Circular 2022-03: https://www.consumerfinance.gov/compliance/circulars/circular-2022-03-adverse-action-notification-requirements-in-connection-with-credit-decisions-based-on-complex-algorithms/). The **EEOC** has issued technical assistance applying Title VII disparate impact concepts to algorithmic selection tools and stressing employer accountability even when tools come from vendors (EEOC TA coverage: https://www.workforcebulletin.com/eeoc-issues-new-workplace-artificial-intelligence-technical-assistance). In healthcare, the **FDA** is actively regulating AI/ML-enabled medical devices via guidance and lifecycle oversight, including **Predetermined Change Control Plans (PCCPs)** to allow certain model updates under pre-specified protocols (FDA AI SaMD landing page: https://www.fda.gov/medical-devices/software-medical-device-samd/artificial-intelligence-software-medical-device).\n\nAt the state/local level, the U.S. is seeing rapid growth in AI laws and proposals. In employment, **NYC Local Law 144** (effective Jan 1, 2023) requires annual bias audits and candidate notice for \u201cautomated employment decision tools\u201d (overview: https://fairnow.ai/guide/nyc-local-law-144/). Illinois has an AI Video Interview Act (effective 2020) and further employment AI rules taking effect in 2026 (overview: https://www.consumerfinancialserviceslawmonitor.com/2019/08/update-the-illinois-artificial-intelligence-video-interview-act/). Colorado enacted a first-in-nation broader \u201chigh-risk AI\u201d framework (SB 24-205), but implementation has been **delayed to June 30, 2026** (tracker: https://www.akingump.com/en/insights/ai-law-and-regulation-tracker/colorado-postpones-implementation-of-colorado-ai-act-sb-24-205). Meanwhile, federal legislation remains mostly stalled: the Brennan Center tracker notes **150+ AI bills introduced in the 118th Congress and none enacted** (https://www.brennancenter.org/our-work/research-reports/artificial-intelligence-legislation-tracker), though targeted bills like the bipartisan **NO FAKES Act** (deepfake/digital replica right of publicity) have hearings and cross-industry support but are still in committee (Congress.gov: https://www.congress.gov/bill/119th-congress/senate-bill/1367).", "decision_criteria": [ - "Risk Reduction for Catastrophic & Systemic Harms", - "Civil Liberties & Due Process", - "Innovation & Economic Dynamism", - "Equity / Anti-Discrimination Effectiveness", - "Administrative Feasibility & Legal Durability" + "Safety & Catastrophic-Risk Reduction (Frontier + critical systems)", + "Civil Liberties & Rule-of-Law Protections", + "Innovation & Economic Competitiveness", + "Implementation Feasibility & Administrative Capacity", + "Regulatory Coherence (Federal\u2013State + Sectoral Fit)" ], "forecasts": [ { "footnote_id": 1, - "question_title": "Comprehensive Federal AI Law by 2028", - "question_text": "Will the United States enact a comprehensive federal AI law by December 31, 2028 that (a) creates cross-sector obligations for \u201chigh-risk\u201d/consequential AI systems and (b) includes dedicated enforcement authority?", - "resolution_criteria": "YES if such a statute is signed into law by 12/31/2028; NO otherwise.", - "prediction": "35%", - "reasoning": "Congress has struggled to pass cross-cutting tech frameworks; the privacy analog (ADPPA/APRA) stalled on preemption and private-right-of-action disputes, and the 118th Congress enacted no AI bills despite heavy activity. State patchwork and national security salience increase pressure, and there are legislative vehicles/drafts, but a truly comprehensive cross-sector regime by 2028 remains less likely than not.", + "question_title": "NO FAKES Act Enactment", + "question_text": "Will the NO FAKES Act (S.1367 and/or H.R.2794) be enacted into U.S. federal law by December 31, 2026?", + "resolution_criteria": "YES if Congress.gov shows \u201cBecame Law\u201d (or equivalent) by the date; NO otherwise.", + "prediction": "25%", + "reasoning": "The bill is narrower than broad AI governance, has bipartisan cosponsors, and addresses a salient harm (unauthorized digital replicas). However, the base rate for AI-related federal bills is poor, and platform liability and First Amendment boundary-setting can bog down even popular proposals. The legislative calendar and competing priorities further reduce odds. I assign a modest probability reflecting higher-than-average viability but still difficult passage.", "key_sources": [ - "Brennan Center AI legislation tracker", - "APRA/ADPPA analyses", - "reporting on TRUMP AMERICA AI Act and executive-order-driven preemption strategy" + "https://www.congress.gov/bill/119th-congress/senate-bill/1367", + "https://www.congress.gov/bill/119th-congress/house-bill/2794", + "https://www.brennancenter.org/our-work/research-reports/artificial-intelligence-legislation-tracker" ] }, { "footnote_id": 2, - "question_title": "Preliminary Injunction Against a Major State AI Law by 2027", - "question_text": "Will a federal court issue a preliminary injunction by December 31, 2027 that blocks enforcement of a major state AI statute regulating frontier models or high-risk AI discrimination statewide?", - "resolution_criteria": "YES if a PI bars enforcement of core provisions statewide; NO otherwise.", - "prediction": "30%", - "reasoning": "Litigation is likely, but broad statewide PIs require high showings and courts often narrow relief. Dormant commerce clause challenges look weaker for non-discriminatory state laws post-*National Pork Producers v. Ross*, though First Amendment challenges to certain tech statutes sometimes succeed. The DOJ task force must litigate case-by-case; executive orders alone don\u2019t preempt.", + "question_title": "Algorithmic Accountability Act Advancement", + "question_text": "Will the Algorithmic Accountability Act (S.2164) receive a committee vote in the Senate Committee on Commerce, Science, and Transportation by December 31, 2026?", + "resolution_criteria": "YES if Congress.gov shows a committee markup vote/reporting action; NO otherwise.", + "prediction": "20%", + "reasoning": "Congress has seen many AI bills introduced with little movement, and broad compliance mandates trigger business opposition and complex compromises (definitions, preemption, private right of action, standards). While a committee vote is easier than passage, there is no current evidence of scheduled markup, so inertia dominates. The probability reflects some chance of renewed attention after major incidents or bipartisan dealmaking but assumes status quo gridlock is more likely.", "key_sources": [ - "DOJ AI Litigation Task Force summaries", - "White House EO 14,365", - "analysis of Ross implications", - "examples of PIs in state tech laws" + "https://www.congress.gov/bill/119th-congress/senate-bill/2164", + "https://www.brennancenter.org/our-work/research-reports/artificial-intelligence-legislation-tracker" ] }, { "footnote_id": 3, - "question_title": "FDA Clears/Approves an LLM/Generative AI Clinical Device by 2028", - "question_text": "Will FDA clear or approve at least one generative-AI/LLM-based medical device intended for clinical use by December 31, 2028?", - "resolution_criteria": "YES if FDA clears/approves a device whose core function uses a generative model/LLM for clinical diagnosis/treatment/CDS; NO otherwise.", - "prediction": "45%", - "reasoning": "As of early 2026, FDA had not approved any LLM/generative AI medical devices for clinical use, though FDA is actively developing lifecycle oversight and has convened advisory discussions on generative AI mental health devices. A first clearance is plausible via constrained indications and strong validation, but incentives may tilt toward non-device CDS pathways after the Jan 2026 CDS guidance, reducing the number of products seeking clearance.", + "question_title": "BIS Finalizes IaaS KYC Rule", + "question_text": "Will BIS finalize the January 29, 2024 proposed IaaS customer identification/KYC rulemaking (Federal Register document 2024-01580) by December 31, 2026?", + "resolution_criteria": "YES if a final rule is published in the Federal Register finalizing that rulemaking by the date; NO otherwise.", + "prediction": "40%", + "reasoning": "The proposed rule exists and remains unfinalized as of the latest available status, suggesting delays. BIS may pursue similar goals through other export-control mechanisms, reducing urgency to finalize this specific NPRM, and cloud KYC raises compliance and diplomatic concerns. Still, national security pressures can accelerate rulemaking, and the underlying rationale persists across administrations. I set a moderate probability.", "key_sources": [ - "Health Affairs Scholar (no LLM devices as of early 2026)", - "FDA DHAC meeting summaries", - "FDA Jan 2026 CDS guidance summaries" + "https://www.federalregister.gov/documents/2024/01/29/2024-01580/taking-additional-steps-to-address-the-national-emergency-with-respect-to-significant-malicious" ] }, { "footnote_id": 4, - "question_title": "$1B+ AI-Enabled Cyber Incident Affecting U.S. Critical Sector by 2028", - "question_text": "By Dec 31, 2028, will there be at least one publicly reported cyber incident with >$1B direct costs for U.S. entities and credible documentation that AI materially enabled the attack?", - "resolution_criteria": "YES if both cost and AI-material-enablement criteria are met in credible reporting; NO otherwise.", - "prediction": "60%", - "reasoning": "$1B+ cyber incidents already occur (e.g., the Change Healthcare incident ultimately estimated at ~$2.9\u2013$3.1B). AI is increasingly used for phishing, social engineering, and automation; the remaining uncertainty is public attribution/documentation of AI\u2019s role. Given trends and rising reporting of AI-enabled tactics, it\u2019s more likely than not.", - "key_sources": [ - "Change Healthcare cost reporting", - "cyber trend reporting on AI-enabled attacks", - "historical benchmark NotPetya (~$10B global)" - ] - }, - { - "footnote_id": 5, - "question_title": "Five+ Additional States Enact Colorado-Style High-Risk AI Discrimination Laws by 2028", - "question_text": "Will five or more additional states enact Colorado-style comprehensive \u201chigh-risk AI\u201d anti-discrimination statutes by Dec 31, 2028?", - "resolution_criteria": "YES if \u22655 additional states enact broadly similar frameworks (developer/deployer duties + impact assessments + enforcement); NO otherwise.", - "prediction": "35%", - "reasoning": "States are highly active on AI, but comprehensive frameworks are rare and politically/administratively complex; Colorado\u2019s own delay suggests implementation friction. Federal pressure and litigation threats may chill adoption. Expect more narrow state laws than full Colorado-style regimes, though diffusion remains plausible if harms and public pressure rise.", + "question_title": "Colorado AI Act Further Delay", + "question_text": "Will Colorado\u2019s AI Act (SB 24-205) have its effective date delayed beyond June 30, 2026 by Colorado legislation signed into law by December 31, 2026?", + "resolution_criteria": "YES if enacted Colorado law changes the effective date to later than June 30, 2026; NO otherwise.", + "prediction": "55%", + "reasoning": "Colorado already delayed implementation once, indicating meaningful implementation and political-economy challenges. First-in-nation comprehensive frameworks often undergo iterative adjustment after stakeholders confront compliance realities. However, states sometimes prefer to keep credibility by holding deadlines once reset. I make \u201cfurther delay\u201d slightly more likely than not.", "key_sources": [ - "Colorado SB24-205 summaries and delay reporting", - "state AI legislative trend reporting", - "executive-order-driven federal pushback against state laws" + "https://www.akingump.com/en/insights/ai-law-and-regulation-tracker/colorado-postpones-implementation-of-colorado-ai-act-sb-24-205" ] } ], - "proposal_markdown": "### Executive Summary\n\nThe U.S. should adopt a **two-track AI regulatory strategy**: (1) **frontier-model accountability** focused on catastrophic-risk governance, secure model weights, and incident reporting; and (2) **high-impact application governance** focused on civil rights, due process, privacy, and auditability in domains like hiring, lending, and healthcare. Because comprehensive federal legislation is uncertain (35% by 2028 [^1]), policymakers should pair targeted federal statutes with strong sector-agency enforcement and procurement-based standards that can operate under the status quo.\n\n### Analysis\n\nThe current environment is fragmented: federal \u201csoft law\u201d (NIST AI RMF, OMB M-25-21) and sector regulators (FDA, CFPB, EEOC/DOJ) coexist with a rapidly expanding state patchwork (Colorado\u2019s high-risk AI law; California and New York frontier model laws; NYC hiring audits). Federal attempts to wipe away state law via executive action will likely produce **years of uncertainty** and mixed court outcomes; a broad preliminary injunction against a major state AI law by 2027 is possible but not the modal outcome (30% [^2]). That implies firms will continue building compliance programs around the strictest credible requirements, and policymakers should seek harmonization through standards and safe harbors rather than pure preemption.\n\nOn frontier AI, the most defensible approach is to regulate **process and governance** rather than mandate \u201ctruthful outputs\u201d or ideology. State laws like California SB 53 show a \u201ctransparency + incident reporting + whistleblower\u201d template. The main national risk driver is not only model misalignment but also **misuse**, especially in cybersecurity and biosecurity. The likelihood of at least one $1B+ cyber incident with documented AI enablement by 2028 is material (60% [^4]), so frontier policy should prioritize secure development, red-teaming, misuse monitoring, and rapid incident reporting\u2014while protecting legitimate research and speech.\n\nFor \u201cnarrower\u201d AI used in consequential decisions, the biggest civil-liberties failures tend to be opaque decision-making, inability to contest errors, and proxy discrimination at scale. NYC Local Law 144 shows both the promise and pitfalls of audit-centric regulation: disclosure and bias audits exist on paper, but enforcement can be weak, with extremely low complaint volume and high apparent noncompliance. That argues for a federal baseline emphasizing *audit quality + accountability + remedies*, not mere \u201cpaper compliance.\u201d\n\nIn healthcare, FDA remains a key safety institution, yet the January 2026 CDS guidance expands non-device pathways for some generative AI tools. Since FDA clearance of an LLM/generative clinical device by 2028 is uncertain (45% [^3])\u2014and many tools may bypass clearance\u2014policy should strengthen institutional governance (hospital AI committees, documentation, postmarket monitoring) and require transparency and testing for AI integrated into clinical workflows, even when not a regulated \u201cdevice.\u201d\n\n### Recommendations\n\n1. **Create a Federal \u201cHigh-Impact AI\u201d Baseline (Civil Rights + Due Process) via FTC/sector coordination** \n- **What:** Enact a federal baseline (or implement via FTC + sector regulators where statute is lacking) requiring that any \u201chigh-impact AI\u201d used for consequential decisions provide: notice, meaningful explanation of main factors, data-access/correction where feasible, documented impact assessments, and a right to human review/appeal for adverse outcomes. \n- **Why:** This addresses proven harms in hiring/lending/health access while remaining technology-neutral. It also reduces the incentive for weak audit regimes that fail in practice. \n- **Criteria:** Risk reduction; civil liberties; equity; feasibility. \n- **Forecast link:** A comprehensive federal law is uncertain (35% [^1]); this can be modular/sectoral and still meaningful even if full harmonization fails.\n\n2. **Frontier Model Safety Case + Incident Reporting + Secure Weights (federal standard with safe harbors)** \n- **What:** Require developers above a clear capability/compute threshold to (a) maintain a documented \u201csafety case,\u201d (b) conduct independent red-teaming on defined catastrophic-misuse vectors, (c) implement strong cybersecurity for model weights and training infrastructure, and (d) report \u201ccritical safety incidents\u201d to a designated federal clearinghouse. Provide a **safe harbor** (reduced punitive exposure) for firms that follow audited best practices and promptly report incidents. \n- **Why:** This targets the highest-stakes risks without turning AI governance into speech control. It aligns with the direction of CA/NY frontier laws while creating a national standard that is less likely to be enjoined than ad hoc state requirements. \n- **Criteria:** Risk reduction; innovation (safe harbor); legal durability. \n- **Forecast link:** AI-enabled cyber catastrophe risk is substantial (60% [^4]); state-law uncertainty likely persists (30% chance of major PI [^2]).\n\n3. **Harden AI-Enabled Cybersecurity and Critical Infrastructure Defenses** \n- **What:** Expand CISA-led requirements for secure-by-design software, mandatory MFA for privileged access, vendor incident reporting, and \u201cAI-aware\u201d security testing (prompt-injection testing for agentic systems; logging for model I/O in enterprise deployments). Encourage insurers and federal procurement to require these controls. \n- **Why:** Large cyber losses are already real (e.g., Change Healthcare), and AI lowers attacker costs. This is high ROI and largely content-neutral. \n- **Criteria:** Risk reduction; feasibility; innovation (predictable controls). \n- **Forecast link:** The probability of a $1B+ AI-enabled cyber incident by 2028 is meaningfully above 50% (60% [^4]).\n\n4. **Healthcare: Close the \u201cNon-Device CDS\u201d Governance Gap** \n- **What:** Condition Medicare/Medicaid participation (or accreditation levers) on hospitals and large clinics adopting AI governance: model inventory, intended-use controls, clinician training, monitoring of performance drift, and documented override/appeal processes\u2014especially for generative AI used in diagnosis/treatment support. \n- **Why:** FDA clearance of LLM devices is uncertain (45% [^3]) and some tools will enter clinics via enforcement discretion; institutional governance becomes the safety backstop. \n- **Criteria:** Risk reduction; feasibility; civil liberties (patient transparency). \n- **Forecast link:** Uncertainty about FDA-cleared LLM devices (45% [^3]) supports governance that does not rely on FDA alone.\n\n5. **Avoid Broad Federal Preemption; Use \u201cFloor + Portability\u201d Instead** \n- **What:** Set a federal minimum standard and allow states to exceed it in defined areas (e.g., employment notices, child safety), but create interoperability through standardized documentation (model cards, impact assessment templates) and mutual-recognition mechanisms. \n- **Why:** Broad preemption is politically and legally brittle; prolonged court fights are likely, and a sweeping federal law by 2028 is uncertain (35% [^1]). A floor reduces worst harms while preserving state experimentation. \n- **Criteria:** Legal durability; innovation; civil liberties. \n- **Forecast link:** Given only a 35% chance of comprehensive federal law by 2028 [^1] and only a 30% chance of a major PI by 2027 [^2], planning for coexistence is prudent.\n\n### Risks and Uncertainties\n\n- **Regulatory capture / incumbent advantage:** Heavy compliance burdens can entrench large firms; safe harbors and scaled obligations are essential. \n- **Litigation risk and instability:** State-federal conflict may persist for years; a preliminary injunction against a major state AI law is plausible (30% [^2]) but not assured, creating a compliance limbo. \n- **Attribution and measurement problems:** For cyber incidents, \u201cAI enablement\u201d may be underreported; my 60% estimate depends on public documentation practices (uncertainty in [^4]). \n- **Healthcare pathway ambiguity:** FDA\u2019s posture on generative AI is in flux; even if FDA never clears an LLM device by 2028 (55% implied by [^3]), clinical deployment may still expand through non-device pathways, increasing safety variance across institutions. \n- **Federal legislative feasibility:** The central uncertainty is congressional action; comprehensive law is only 35% by 2028 [^1]. Over-investing in a single legislative \u201cbig bang\u201d strategy risks wasted time.", + "proposal_markdown": "### Executive Summary\n\nThe U.S. should adopt a **risk-based federal AI governance framework** that (1) imposes enforceable duties and audits for **high-risk AI in consequential decisions** (hiring, lending, housing, healthcare) and (2) creates a **frontier-model safety regime** centered on evaluations, incident reporting, and secure development\u2014while protecting privacy and free expression and preserving innovation through safe harbors and sandboxes. The single most important step is to establish a **federal \u201chigh-risk AI\u201d floor** (impact assessments, transparency, appeals, and anti-discrimination testing) anchored to NIST standards and enforced by existing agencies.\n\n### Analysis\n\nU.S. AI regulation is currently \u201creal but fragmented\u201d: agencies can act under existing authority (FTC consumer protection; CFPB ECOA/Reg B adverse action requirements; EEOC Title VII/ADA guidance), NIST provides voluntary risk management frameworks, and states/localities are moving ahead with their own rules (e.g., NYC Local Law 144; Colorado\u2019s broader high-risk AI statute). This patchwork creates uneven protections and compliance uncertainty, and it risks a race between rapid model deployment and slower civil liberties safeguards.\n\nPolitically, comprehensive federal AI legislation appears hard in the near term: broad algorithmic accountability bills have a meaningful chance of stalling without even reaching committee votes (20% [^2]), and state frameworks may face continued delays or revisions (55% [^4]). That argues for a design that can deliver value even if Congress only passes narrower bills and agencies remain primary implementers. Targeted legislation has comparatively better prospects (e.g., NO FAKES Act at 25% by end-2026 [^1])\u2014suggesting a \u201cmodular\u201d legislative strategy is more realistic than a single omnibus AI Act.\n\nFor frontier AI, the U.S. has promising pieces\u2014NIST\u2019s AI RMF and GenAI Profile, and USAISI evaluation partnerships\u2014but too much remains voluntary. At the same time, national security-oriented controls like cloud KYC and reporting are uncertain and may proceed unevenly (40% that the specific BIS IaaS KYC NPRM is finalized by end-2026 [^3]). This motivates a dual track: (a) strengthen voluntary evaluation and security practices into a clearer compliance regime for the largest model developers, and (b) avoid collapsing everything into export controls that may not cover domestic harms and civil liberties.\n\nBalancing innovation with safety and civil liberties is best achieved by focusing mandatory obligations on **measurable risk points**: consequential decisions about people\u2019s lives, and frontier systems above capability/compute thresholds with credible severe-misuse pathways. Outside those zones, policy should emphasize transparency, competition, research funding, and procurement standards rather than heavy licensing.\n\n### Recommendations\n\n1. **Enact a federal \u201cHigh-Risk Automated Decision Systems\u201d (HRADS) law for consequential domains**\n- **What:** Create a federal baseline for AI used in hiring, lending, housing, education, and healthcare coverage/eligibility decisions: mandatory impact assessments, anti-discrimination testing, data governance, meaningful notice, and an appeals channel.\n- **Why (with forecasts):** Because broad algorithmic accountability legislation is unlikely to advance quickly (20% committee vote [^2]) and states may delay or vary (55% further Colorado delay [^4]), a narrowly scoped but enforceable federal floor is the highest-leverage way to reduce patchwork while targeting real harms.\n- **Criteria:** Civil liberties; safety; coherence; feasibility.\n- **Implementation plan:**\n- Define \u201chigh-risk\u201d by *use context* (consequential decisions) rather than by \u201cAI\u201d broadly.\n- Require deployers (employers, lenders, insurers, hospitals) to:\n1) conduct a pre-deployment **Algorithmic Impact Assessment** (AIA),\n2) test for disparate impact (with documentation),\n3) document data provenance and limits,\n4) provide **individual notice** when AI meaningfully contributes,\n5) provide a **human-review appeal** path for adverse decisions.\n- Require vendors to provide standardized \u201cmodel cards for deployers\u201d (intended use, limitations, performance by subgroup where appropriate, security and privacy controls).\n- Enforcement via existing agencies: EEOC (employment), CFPB (credit), HUD/DOJ (housing/civil rights), HHS/FDA (healthcare tools as applicable), FTC (unfair/deceptive practices).\n- Provide a safe harbor for entities that (i) follow NIST AI RMF / GenAI Profile-aligned controls and (ii) undergo qualified independent audits.\n\n2. **Create a Frontier AI Safety Regime: evaluations + incident reporting + secure development for the largest model developers**\n- **What:** For frontier model developers above compute/capability thresholds: require (a) pre-deployment evaluation (including red-teaming), (b) a documented \u201csafety case,\u201d (c) cybersecurity standards for model weights, and (d) rapid reporting of severe incidents.\n- **Why (with forecasts):** Relying on cloud KYC/export-control rulemakings alone is uncertain (40% BIS finalizes the specific KYC NPRM by end-2026 [^3]). A targeted domestic safety regime reduces catastrophic and systemic risks while preserving open innovation below the threshold.\n- **Criteria:** Safety; feasibility; innovation.\n- **Implementation plan:**\n- Codify USAISI/NIST\u2019s role as the evaluation standards setter; authorize funding for independent evaluators.\n- Define thresholds using a hybrid of compute proxies and capability triggers; require registration only above that line.\n- Mandate standardized incident categories (e.g., model-enabled fraud at scale, bio/cyber misuse indicators, critical infrastructure compromise).\n- Require \u201csecure weight handling\u201d (access controls, logging, insider risk controls) for top-tier models.\n- Establish limited liability protections for good-faith reporting and evaluation disclosures to incentivize transparency.\n\n3. **Pass a targeted federal privacy baseline focused on AI-relevant data practices**\n- **What:** A national privacy floor: data minimization, limits on secondary use for training, transparency for sensitive inference, and strong security requirements\u2014without banning broad classes of models.\n- **Why:** Many AI harms (discrimination, manipulation, surveillance) are amplified by unrestricted data reuse; sectoral civil rights enforcement alone is not enough.\n- **Criteria:** Civil liberties; coherence; feasibility.\n- **Implementation plan:**\n- Sensitive data and sensitive inferences: opt-out/opt-in depending on category; clear purpose limitation.\n- Require documented retention schedules; restrict training on certain regulated datasets absent consent or statutory authorization.\n- Empower FTC and state AGs to enforce; preserve stronger sectoral rules (HIPAA/GLBA) and allow compatible state additions.\n\n4. **Use federal procurement as a fast-moving compliance engine**\n- **What:** Make federal contracts require AI documentation, testing, incident reporting, and auditability for systems that affect rights/safety\u2014scaling requirements by risk level.\n- **Why (with forecasts):** Given likely legislative delays (20% for S.2164 committee vote [^2]), procurement can deliver immediate leverage and shape vendor practices.\n- **Criteria:** Feasibility; safety; coherence.\n- **Implementation plan:**\n- Standard contract clauses for: evaluation artifacts, security controls, red-team summaries, logging, and post-deployment monitoring.\n- Require agencies to publish AI use inventories and risk categorizations (building on the M-25-21 approach).\n- Create a shared fedwide repository of approved assessment templates and qualified auditors.\n\n### Risks and Uncertainties\n\n- **Measurement limits and audit theater:** Impact assessments can become box-checking, especially where ground truth is hard (e.g., job performance). This risk rises if standards are vague; it\u2019s partly mitigated by NIST-aligned templates and independent audits.\n- **Innovation chilling / incumbent advantage:** Heavy compliance can entrench big firms. The proposal mitigates by (i) scoping mandatory duties to high-risk uses and (ii) using safe harbors and sandboxes. Still, if Congress attempts a broader omnibus approach, political feasibility may collapse (consistent with the low advancement probability for S.2164 [^2]).\n- **Federal\u2013state conflict:** If states keep moving (and federal actions attempt preemption), compliance may become unstable; Colorado\u2019s continued uncertainty (55% [^4]) is a warning sign. A federal floor with limited preemption (preempt only directly conflicting requirements) is safer than sweeping preemption.\n- **National security substitution:** If BIS does not finalize the cloud KYC NPRM (60% it won\u2019t by end-2026 [^3]), reliance on export controls could leave gaps in domestic oversight; hence the need for a domestic frontier safety regime.", "key_recommendations": [ - "Create a Federal \u201cHigh-Impact AI\u201d Baseline (Civil Rights + Due Process) via FTC/sector coordination", - "Frontier Model Safety Case + Incident Reporting + Secure Weights (federal standard with safe harbors)", - "Harden AI-Enabled Cybersecurity and Critical Infrastructure Defenses", - "Healthcare: Close the \u201cNon-Device CDS\u201d Governance Gap", - "Avoid Broad Federal Preemption; Use \u201cFloor + Portability\u201d Instead" - ] + "Enact a federal \u201cHigh-Risk Automated Decision Systems\u201d (HRADS) law for consequential domains", + "Create a Frontier AI Safety Regime: evaluations + incident reporting + secure development for the largest model developers", + "Pass a targeted federal privacy baseline focused on AI-relevant data practices", + "Use federal procurement as a fast-moving compliance engine" + ], + "price_estimate": 1.3093187499999999 }, { "member": { @@ -286,69 +264,70 @@ ], "ai_model": "openrouter/google/gemini-3-pro-preview" }, - "research_summary": "The current trajectory of U.S. AI policy is defined by a sharp conflict between federal deregulation and state-level protectionism. While a unified federal standard could theoretically boost venture capital investment by ~15% (Forecast [^1]), the current administration's \"light-touch\" approach creates severe risks in other domains. Specifically, with federal agencies like the EEOC and FTC actively retreating from algorithmic bias enforcement (Forecast [^3]), preemption of state laws would effectively legalize discriminatory AI harms in housing and hiring, as no federal backstop would remain. This tradeoff\u2014economic speed vs. civil rights\u2014is the central tension.\n\nFurthermore, the risk of \"frontier\" model failure remains a distinct, albeit moderate, possibility (30% probability of >$100M damage by 2028 [^2]). Relying solely on voluntary industry commitments or a \"captured\" federal regulator (35% risk [^4]) is insufficient for national security-grade risks. The industry's own safety layers are robust but not infallible. A policy that preempts state \"early warning systems\" (like California's reporting requirements) without replacing them with a competent federal equivalent invites catastrophe.\n\nTherefore, \"total preemption\" is a dangerous gamble. It relies on the assumption that federal agencies will vigorously enforce \"light\" rules, which our forecasts explicitly contradict. A more balanced path acknowledges that states like California are currently the only competent regulators \"on the beat\" for safety, while recognizing that startups need relief from a 50-state compliance patchwork.", + "research_summary": "**Current Status (Early 2026)**\nThe United States AI regulatory landscape in early 2026 is defined by a sharp conflict between state-level safety mandates and a federal push for deregulation. Following the veto of California's SB 1047 in 2024, the state legislature successfully enacted the \"Transparency in Frontier Artificial Intelligence Act\" (SB 53), which took effect on January 1, 2026. This law mandates that developers of \"frontier\" models (trained on >10^26 FLOPS) maintain safety frameworks, report critical incidents to the state Office of Emergency Services, and provide whistleblower protections. In parallel, Texas has enacted the \"Responsible AI Governance Act,\" focusing on restricting \"woke\" or censorship-prone AI applications, illustrating a partisan divide in state-level priorities.\n\n**Federal Response & Executive Conflict**\nThe federal environment has shifted dramatically under the Trump Administration (2025-present). In January 2025, President Trump revoked the Biden-era Executive Order 14110, dismantling many federal safety mandates. By December 2025, the administration escalated its opposition to state regulations by signing Executive Order 14365, \"Ensuring a National Policy Framework for Artificial Intelligence.\" This order empowers the Department of Commerce and the DOJ to identify and potentially preempt state laws deemed \"onerous\" or inconsistent with the administration's \"America First\" innovation strategy. An AI Litigation Task Force has been established to challenge statutes like California's SB 53, creating massive regulatory uncertainty for tech firms caught between state compliance obligations and federal deregulation incentives.\n\n**Stakeholder Positions & Market Dynamics**\nThe \"Brussels Effect\" has been replaced by a \"Sacramento Effect.\" Tech giants like OpenAI and Anthropic are navigating a complex compliance map; Anthropic has historically supported safety frameworks like SB 53, while other players argue that state-by-state rules fracture the digital market. The Trump Administration\u2019s recent \"AI Action Plan\" (July 2025) explicitly prioritizes open-weight models and infrastructure build-outs to combat Chinese technological advancement, viewing safety regulations as impediments to geopolitical competitiveness. Meanwhile, safety advocates warn that the federal rollback leaves the US vulnerable to catastrophic risks from next-generation models, making state laws the only remaining guardrails.", "decision_criteria": [ - "Protection of Civil Liberties", - "Innovation Viability", - "Risk Mitigation", - "Adaptability" + "Catastrophic Risk Mitigation", + "Innovation & Geopolitical Competitiveness", + "Regulatory Certainty & Harmonization", + "Civil Liberties & Algorithmic Fairness" ], "forecasts": [ { "footnote_id": 1, - "question_title": "State Law Preemption & Innovation", - "question_text": "If the federal government successfully preempts state AI regulations with a \"light-touch\" federal standard before 2027, will U.S.-based AI startups raise >15% more venture capital in 2027 compared to a scenario where state laws remain in force?", - "resolution_criteria": "Resolves YES if aggregate VC funding (Seed-Series C) is >15% higher in the preemption scenario vs. baseline trend/counterfactual.", + "question_title": "California SB 53 Preemption", + "question_text": "Will a US Federal Court issue a preliminary injunction suspending the enforcement of the \"safety framework\" requirements of California SB 53 by July 1, 2026?", + "resolution_criteria": "Resolves YES if a federal district or appellate court issues an order enjoining the California Attorney General or other bodies from enforcing the \"safety framework\" or \"incident reporting\" provisions of SB 53.", "prediction": "65%", - "reasoning": "Historical analogy to GDPR suggests regulatory fragmentation costs ~26% in investment. Reversing this fragmentation is a strong signal to capital markets. However, hype cycles (FOMO) currently drive funding as much as policy, dampening the purely regulatory effect.", + "reasoning": "The conflict between the new Trump EO 14365 (asserting federal primacy in AI policy) and state laws is direct. Courts have increasingly favored \"Dormant Commerce Clause\" arguments against state regulations that practically control national markets (as SB 53 does for AI). The DOJ's explicit \"AI Litigation Task Force\" increases the base rate of successful challenges.", "key_sources": [ - "https://www.nber.org/digest/202509/privacy-regulation-and-transatlantic-venture-investment", - "https://datacatalyst.org/wp-content/uploads/2020/01/GDPR-report-2020.pdf" + "Exec. Order 14365 Analysis (JD Supra, Jan 2026)", + "JD Supra \"AI Legal Watch Jan 2026\"" ] }, { "footnote_id": 2, - "question_title": "Frontier Model Safety Incidents", - "question_text": "Will a \"frontier\" AI model cause a \"critical safety incident\" causing >$100M in damages or severe physical harm to >10 people between now and 2028?", - "resolution_criteria": "Trusted report attributing >$100M damage/health harm *directly* to autonomous/instructed model action.", + "question_title": "China vs. US Capability Gap", + "question_text": "Will a Chinese-based AI lab release a model exceeding US SOTA on MMLU-Pro by Dec 31, 2026?", + "resolution_criteria": "Resolves YES if a model from a Chinese lab (e.g., DeepSeek, Alibaba) scores higher than the recognized US state-of-the-art model on the MMLU-Pro (or successor standard benchmark) as verified by an independent third party (e.g., Stanford HELM).", "prediction": "30%", - "reasoning": "While capability is rising, \"Swiss cheese\" safety layers (humans in loop) remain effective. Most \"incidents\" are human attacks using AI, not autonomous failures. Metaculus forecasts higher risks ($1Bn) only on a longer timeline (2032).", + "reasoning": "Hardware export controls are sticky; training frontier models requires massive H100/Blackwell clusters that are hard to smuggle at scale. While Chinese algorithmic efficiency is high, the raw compute gap makes \"exceeding\" US SOTA (which is moving fast) unlikely in the short term.", "key_sources": [ - "https://www.metaculus.com/questions/7814/ai-incident-causes-1bn-damage-before-2032/", - "https://www.sentinelone.com/cybersecurity-101/data-and-ai/jailbreaking-llms/" + "Economic Survey 2026 (Mint)", + "Analysis of US-China Compute Gap" ] }, { "footnote_id": 3, - "question_title": "Bias in High-Stakes Narrow AI", - "question_text": "In the absence of specific federal algorithmic bias regulation, will >5 major investigations find systemic discrimination in AI hiring/lending by Fortune 500 companies in 2026-2027?", - "resolution_criteria": ">5 public findings/settlements by FTC, DOJ, EEOC.", - "prediction": "20%", - "reasoning": "Explicit policy shifts in 2025 (EEOC closing cases, FTC \"AI Action Plan\") define a retreat from enforcement. The mechanism for finding \"YES\" is being dismantled by the executive branch.", + "question_title": "AI Startup Flight", + "question_text": "Will the percentage of new \"AI-primary\" startups incorporating in California decrease by more than 5 percentage points in 2026 compared to 2025?", + "resolution_criteria": "Measured by data from Crunchbase or PitchBook for \"Artificial Intelligence\" characterized companies. Comparing the % of US AI startups based in CA in 2025 vs 2026.", + "prediction": "40%", + "reasoning": "This is a \"toss-up\" leaning toward stability. Agglomeration effects adjacent to OpenAI/Anthropic/Google in SF are powerful. However, the *signaling* of SB 53 plus active recruitment by Texas/Florida (\"Free AI\" zones) creates a credible threat of migration.", "key_sources": [ - "https://www.workforcebulletin.com/artificial-intelligence-and-disparate-impact-liability-how-the-eeocs-end-to-disparate-impact-claims-affects-workplace-ai", - "https://www.ftc.gov/news-events/news/press-releases/2025/12/ftc-reopens-sets-aside-rytr-final-order-response-trump-administrations-ai-action-plan" + "General knowledge of startup migration trends", + "NASDAQ \"AI Stocks Regulations 2026\" analysis" ] }, { "footnote_id": 4, - "question_title": "Regulatory Capture Probability", - "question_text": "If a new federal AI agency is created by 2026, will >50% of its senior leadership have been employed by major AI labs in the preceding 3 years?", - "resolution_criteria": "Bio analysis of top 10 officials showing majority industry employment (OpenAI/Google/etc).", - "prediction": "35%", - "reasoning": "Current trend is appointing VCs and ideologically aligned \"outsiders\" (e.g., Sacks, Kratsios) rather than direct \"Big Tech\" employees. The \"unsure\" factor is who these VCs appoint as deputies.", + "question_title": "Critical Safety Incidents", + "question_text": "How many \"Critical Safety Incidents\" (>$500M damage/death) attributed to AI will be officially reported in 2026?", + "resolution_criteria": "Count of official reports filed under SB 53 or equivalent federal disclosure independent of their public release.", + "prediction": "<0.5 (Mean ~0.2)", + "reasoning": "The definition of \"Critical\" in SB 53 is extremely high (mass casualty or massive financial wreck). Current \"safety\" issues are mostly jailbreaks or bias, not catastrophes. The technology is not yet agentic enough to cause this scale of damage autonomously.", "key_sources": [ - "https://www.seyfarth.com/news-insights/trump-administration-releases-ai-action-plan-and-three-executive-orders-on-ai-what-employment-practitioners-need-to-know.html" + "SB 53 Text (Catastrophic Harm Thresholds)", + "Maya Farber Brodsky \"Simple Argument for AI Policy\"" ] } ], - "proposal_markdown": "### Executive Summary\n\nThe United States faces a pivotal choice between fragmented state-level safety nets and a unified federal \"sandbox\" for AI. I recommend a **\"Federal Floor, Not Ceiling\"** approach: Congress should enact immediate, targeted federal legislation addressing high-consensus risks (discrimination, non-consensual deepfakes, and critical infrastructure safety) while **preserving state authority** to innovate on broader safety standards until a mature federal regulatory regime effectively operationalizes. This strategy secures innovation (by harmonizing core liability) without granting a \"regulatory vacuum\" that our forecasts suggest would leave civil rights unprotected and safety risks unmanaged.\n\n### Analysis\n\nThe current trajectory of U.S. AI policy is defined by a sharp conflict between federal deregulation and state-level protectionism. My analysis indicates that while a unified federal standard could theoretically boost venture capital investment by ~15% (Forecast [^1]), the current administration's \"light-touch\" approach creates severe risks in other domains. Specifically, with federal agencies like the EEOC and FTC actively retreating from algorithmic bias enforcement (Forecast [^3]), preemption of state laws would effectively legalize discriminatory AI harms in housing and hiring, as no federal backstop would remain. This tradeoff\u2014economic speed vs. civil rights\u2014is the central tension.\n\nFurthermore, the risk of \"frontier\" model failure remains a distinct, albeit moderate, possibility (30% probability of >$100M damage by 2028 [^2]). Relying solely on voluntary industry commitments or a \"captured\" federal regulator (35% risk [^4]) is insufficient for national security-grade risks. The industry's own safety layers are robust but not infallible. A policy that preempts state \"early warning systems\" (like California's reporting requirements) without replacing them with a competent federal equivalent invites catastrophe.\n\nTherefore, \"total preemption\" is a dangerous gamble. It relies on the assumption that federal agencies will vigorously enforce \"light\" rules, which our forecasts explicitly contradict. A more balanced path acknowledges that states like California are currently the only competent regulators \"on the beat\" for safety, while recognizing that startups need relief from a 50-state compliance patchwork.\n\n### Recommendations\n\n1. **Enact the \"Algorithmic Civil Rights Act\" to Codify Harm Protections**\n* **Recommendation:** Congress should pass legislation strictly codifying that existing civil rights laws (Fair Housing Act, ECOA, Title VII) apply to AI/algorithmic decisions, creating a private right of action for individuals harmed by \"black box\" denials.\n* **Why:** This addresses the \"Regulatory Vacuum\" created by the EEOC/FTC retreat (Forecast [^3]). It ensures that even if federal agencies deprioritize enforcement, citizens and states retain the power to litigate against bias. This satisfies the **Protection of Civil Liberties** criterion.\n\n2. **Establish a Federal \"Safe Harbor\" Certification for Startups**\n* **Recommendation:** Create a voluntary federal compliance program for non-frontier (<$100M compute) models. Startups that undergo a nimble, standardized third-party audit gain \"Safe Harbor\" protection against *state-level* punitive damages (though not injunctive relief).\n* **Why:** This directly targets the **Innovation Viability** criterion. It gives startups the \"regulatory certainty\" needed to unlock that predicted 15% VC boost (Forecast [^1]) without forcing a blanket preemption of all state laws. It harmonizes the market for the 99% of \"small AI\" while leaving \"big AI\" subject to stricter scrutiny.\n\n3. **Mandate \"Frontier\" Model Registration & Incident Reporting (Federal Level)**\n* **Recommendation:** The Department of Commerce should require mandatory registration and 24-hour incident reporting for any model trained on >10^26 FLOPS. This should be a pure reporting requirement, not a licensing scheme, to minimize friction while ensuring government visibility.\n* **Why:** This addresses **Risk Mitigation**. With a 30% chance of a major incident [^2], the government effectively needs a \"seismograph\" for digital tremors. This acts as a minimally invasive \"tripwire\" that allows for rapid federal response without stifling open-source development.\n\n4. **Preserve State Authority on \"High-Risk\" Use Cases**\n* **Recommendation:** Explicitly allow states (like CA and CO) to enforce stricter standards on \"high-risk\" applications (e.g., biometric surveillance, autonomous vehicle deployment) and \"frontier\" models, provided they do not discriminate against out-of-state entities.\n* **Why:** This supports **Adaptability**. Federal bureaucracy is slow; states are the \"laboratories of democracy.\" Allowing California to experiment with safety rules provides valuable data on what works (and what kills innovation) before scaling it nationally.\n\n### Risks and Uncertainties\n\n* **Fragmentation Costs:** The primary risk is that failing to fully preempt state laws leaves some \"compliance friction\" in the system, potentially dampening the maximum theoretical investment capability. Startups may still have to navigate California's specific rules if they don't quality for the federal Safe Harbor.\n* **Enforcement Atrophy:** There is a high uncertainty regarding whether the current DOJ/FTC would fundamentally enforce *any* new regulations, even if passed, given the \"deregulation\" mandate. The \"Civil Rights Act\" recommendation mitigates this by allowing private lawsuits, but legal costs create unequal access to justice.\n* **Frontier Definition Obsolescence:** The >10^26 FLOPS threshold may surely become obsolete or be gamed by \"algorithmic efficiency\" improvements, allowing dangerous models to slip under the reporting radar. This is a classic \"Adaptability\" failure mode.", + "proposal_markdown": "### Executive Summary\n\nThe United States must end the \"regulatory civil war\" between federal deregulation and state-level fragmentation by enacting the **\"Federal AI Stability & Safety Act.\"** This policy should preempt the patchwork of state laws (like California SB 53) in exchange for a mandatory, federally administered transparency and \"incident reporting\" regime, while essentially exempting open-weight research and sub-frontier models to preserve innovation. This approach balances the Trump Administration\u2019s competitiveness goals with the undeniable need for visibility into frontier risks.\n\n### Analysis\n\nThe current trajectory\u2014a head-on collision between California's strict safety protocols and the Federal Government's preemption efforts\u2014creates the worst of both worlds: maximum uncertainty for businesses and no guaranteed safety for the public. Forecasting suggests a high probability (65%) that federal courts will enjoin parts of state laws, potentially leaving a regulatory vacuum in mid-2026 [^1]. Meanwhile, the risk of China overtaking US capabilities remains manageable (30% probability [^2]), suggesting we do not need to strip away *all* safety checks to maintain our lead.\n\nHowever, the innovation ecosystem is fragile. The forecast for startup flight is significant (40% chance of notable decline [^3]), indicating that heavy-handed \"licensing\" regimes could indeed drive talent away or entrench incumbents who can afford compliance. A policy that \"harmonizes up\" (federalizing strict CA rules) risks crushing this ecosystem, while \"harmonizing down\" (total deregulation) ignores the tail risk of catastrophic failures, which, while currently low probability (<0.5 incidents/year [^4]), have infinite downside.\n\nAs Gemini 3 Pro, I view the optimal path as **\"Light-Touch Federalization.\"** We need to federally preempt the inconsistent state definitions of \"liability\" and \"harm\" to protect open innovation, but simultaneously establish a robust *federal* monitoring capacity (reporting, not licensing) to detect if the risk landscape changes.\n\n### Recommendations\n\n**1. Enact Preemptive Federal \"Transparency & Reporting\" Standards**\n* *Detail:* Congress (or the Commerce Dept via rule-making) should establish a singular federal standard for \"Frontend AI Transparency.\" This mandates that developers of models >10^26 FLOPS must report training specs and safety test results to a federal body (e.g., NIST/AISI) *post-deployment*.\n* *Criteria Addressed:* **Regulatory Certainty** (One standard vs 50), **Catastrophic Risk** (Government gets visibility).\n* *Implementation:* Pass legislation clarifying that compliance with this Federal standard grants immunity from conflicting state \"safety framework\" laws (preemption). This directly addresses the forecasted judicial chaos [^1].\n\n**2. The \"Open Innovation\" Safe Harbor**\n* *Detail:* Explicitly exempt open-weight models and models below a high compute threshold from \"provider liability\" for downstream misuse, provided they do not cross specific \"biological/chemical weapon capability\" benchmarks.\n* *Criteria Addressed:* **Innovation** (Protects open source), **Competitiveness** (maintains US dev ecosystem against China).\n* *Implementation:* Codify a Section 230-style protection for model weights. This ensures that the low base rate of catastrophic incidents [^4] does not result in preemptive suppression of the entire open-source ecosystem.\n\n**3. \"Rights-Based\" Narrow AI Enforcement**\n* *Detail:* While loosening \"frontier\" model licensing, strictly enforce existing civil rights laws on \"narrow\" AI in hiring, housing, and lending. Use the DOJ\u2019s existing authority to prosecute algorithmic discrimination.\n* *Criteria Addressed:* **Civil Liberties & Fairness**.\n* *Implementation:* Direct the DOJ/EEOC to issue guidance that \"algorithmic discrimination is discrimination,\" removing the need for new AI-specific \"fairness\" laws that confuse technical compliance.\n\n### Risks and Uncertainties\n\n* **Political Gridlock:** There is a risk that the \"federal preemption\" aspect passes (to please industry) but the \"transparency\" aspect is stripped (by anti-regulatory ideologues), resulting in total opacity.\n* **The \"Black Swan\" Event:** My forecast [^4] assumes a low probability of catastrophe in 2026. If a model *does* facilitate a massive bio-terror attack, this \"light-touch\" proposal will look woefully inadequate, and the backlash will lead to draconian over-regulation.\n* **Judicial Overreach:** If the Supreme Court rules that *any* AI regulation is unconstitutional compelled speech (a growing theory), even my proposed transparency mandates would fail, leaving no leverage over corporate labs.\n\n### Forecast Appendix\n\n[^1] **California SB 53 Preemption**\n* **Question:** Will a US Federal Court issue a preliminary injunction suspending the enforcement of the \"safety framework\" requirements of California SB 53 by July 1, 2026?\n* **Prediction:** 65%\n* **Reasoning:** The conflict between the new Trump EO 14365 (asserting federal primacy in AI policy) and state laws is direct. Courts have increasingly favored \"Dormant Commerce Clause\" arguments against state regulations that practically control national markets (as SB 53 does for AI). The DOJ's explicit \"AI Litigation Task Force\" increases the base rate of successful challenges.\n* **Sources:** Exec. Order 14365 Analysis (JD Supra, Jan 2026); JD Supra \"AI Legal Watch Jan 2026\".\n\n[^2] **China vs. US Capability Gap**\n* **Question:** Will a Chinese-based AI lab release a model exceeding US SOTA on MMLU-Pro by Dec 31, 2026?\n* **Prediction:** 30%\n* **Reasoning:** Hardware export controls are sticky; training frontier models requires massive H100/Blackwell clusters that are hard to smuggle at scale. While Chinese algorithmic efficiency is high, the raw compute gap makes \"exceeding\" US SOTA (which is moving fast) unlikely in the short term.\n* **Sources:** Economic Survey 2026 (Mint); Analysis of US-China Compute Gap.\n\n[^3] **AI Startup Flight**\n* **Question:** Will the percentage of new \"AI-primary\" startups incorporating in California decrease by more than 5 percentage points in 2026 compared to 2025?\n* **Prediction:** 40%\n* **Reasoning:** This is a \"toss-up\" leaning toward stability. Agglomeration effects adjacent to OpenAI/Anthropic/Google in SF are powerful. However, the *signaling* of SB 53 plus active recruitment by Texas/Florida (\"Free AI\" zones) creates a credible threat of migration.\n* **Sources:** General knowledge of startup migration trends; NASDAQ \"AI Stocks Regulations 2026\" analysis.\n\n[^4] **Critical Safety Incidents**\n* **Question:** How many \"Critical Safety Incidents\" (>$500M damage/death) attributed to AI will be officially reported in 2026?\n* **Prediction:** <0.5 (Mean ~0.2)\n* **Reasoning:** The definition of \"Critical\" in SB 53 is extremely high (mass casualty or massive financial wreck). Current \"safety\" issues are mostly jailbreaks or bias, not catastrophes. The technology is not yet agentic enough to cause this scale of damage autonomously.\n* **Sources:** SB 53 Text (Catastrophic Harm Thresholds); Maya Farber Brodsky \"Simple Argument for AI Policy\".", "key_recommendations": [ - "Enact the \"Algorithmic Civil Rights Act\" to codify harm protections and ensure civil rights laws apply to AI/algorithmic decisions.", - "Establish a federal \"Safe Harbor\" certification for startups, providing regulatory certainty for non-frontier models.", - "Mandate federal registration and 24-hour incident reporting for \"frontier\" AI models trained on >10^26 FLOPS.", - "Preserve state authority to enforce stricter standards on high-risk AI use cases and frontier models." - ] + "Enact Preemptive Federal \"Transparency & Reporting\" Standards", + "The \"Open Innovation\" Safe Harbor", + "\"Rights-Based\" Narrow AI Enforcement" + ], + "price_estimate": 0.39232075 }, { "member": { @@ -364,98 +343,91 @@ ], "ai_model": "openrouter/x-ai/grok-4" }, - "research_summary": "The current U.S. AI regulatory landscape is fragmented, with a federal emphasis on minimal intervention to foster innovation, contrasted by proactive state-level regulations that took effect in January 2026. At the federal level, the Trump administration's December 2025 Executive Order prioritizes an \"innovation-first\" approach, establishing an AI Litigation Task Force to challenge state laws deemed burdensome and conditioning certain federal grants on states avoiding \"onerous\" AI regulations (Baker Botts, 2026; King & Spalding, 2026). For frontier AI, California's Transparency in Frontier AI Act (SB 53) mandates developers of models trained with over 10\u00b2\u2076 FLOPS to publish risk mitigation frameworks and report incidents, with penalties up to $1 million per violation (Baker Botts, 2026). Narrower applications face sector-specific rules: in hiring, states like Illinois require employer notifications for AI use, Colorado mandates risk assessments for high-risk systems (effective June 2026), and Texas prohibits only intentional discrimination (Shipman & Goodwin, 2026). Lending relies on existing federal laws like the Equal Credit Opportunity Act, with the CFPB emphasizing no exemptions for AI and requiring specific explanations for denials (Hesfintech, 2026). Healthcare AI oversight is fragmented under FDA and HIPAA, with no AI-specific federal laws mentioned, though state frameworks address related risks (Medscape, 2026).\n\nKey stakeholders include the federal government pushing for preemption to maintain U.S. AI competitiveness, states defending local protections as \"laboratories of democracy,\" Congress showing bipartisan resistance to blanket preemption, AI industry leaders supporting lighter federal rules, and safety advocates opposing deregulation without safeguards (Holland & Knight, 2026; FPF, 2026). Recent trends highlight escalating federal-state tensions: the Executive Order targets states like Colorado and California, while new laws in Texas and New York focus on frontier AI safety and algorithmic discrimination (JD Supra, 2026; Motley Fool, 2026). The Cato Institute warns that over 100 state AI laws could stifle innovation, citing stable labor turnover rates contradicting claims of widespread job loss (Infobae, 2026). Data shows AI impacts vary: in hiring, facial recognition error rates are 34% higher for darker-skinned individuals, with only 30% of companies monitoring diversity (Perplexity search on AI impacts); lending AI perpetuates disparities, with over 60% of institutions using it but facing CFPB scrutiny; healthcare AI shows 85-95% diagnostic accuracy but racial biases, affecting only 20-30% sustained adoption (ibid.). Frontier AI risks include 40-60% misuse potential for weapons or attacks, with 75-85% of safety researchers concerned about catastrophes (ibid.).\n\nExperts are divided: light-touch advocates argue excessive rules hinder competition, favoring existing laws over new ones (Cato, 2026; Law Economics Center, 2026), while stricter regulation proponents emphasize preventing harms like discrimination, with states filling federal gaps (FTI Consulting, 2026; Kiteworks, 2026). Arguments against strict approaches highlight rapid AI evolution outpacing rules and potential global disadvantage, whereas supporters cite real harms in lending/housing and the need for transparency (CFR, 2026). Overall, the debate centers on balancing innovation\u2014AI stocks now face compliance as a key factor (Motley Fool, 2026)\u2014with protections, amid trends like California's market influence (home to 32 of 50 top AI firms) and international divergences (e.g., EU AI Act) (JD Supra, 2026).", + "research_summary": "The current state of AI regulation in the United States is characterized by a fragmented patchwork of state-level laws amidst emerging federal efforts to establish dominance. As of early 2026, there is no comprehensive federal AI legislation; instead, states like California and Colorado have led with targeted regulations. California's Transparency in Frontier AI Act (SB 53), effective January 1, 2026, requires developers of large AI models (trained on >10\u00b2\u2076 FLOPS and with combined revenue >$500 million) to publish safety frameworks, report critical incidents within 15 days, and implement whistleblower protections, with penalties up to $1 million per violation [Baker Botts, 2026]. Colorado's AI Act, effective June 30, 2026, mandates risk assessments and anti-discrimination measures for AI in high-stakes areas like hiring and lending [Kiteworks, 2026]. In narrower applications, states such as New York and Illinois have enacted laws prohibiting discriminatory AI in employment, while federal agencies like the FTC enforce existing consumer protection laws against AI harms in lending and housing [Stinson, 2026]. Healthcare AI lacks specific federal mandates but is guided by general frameworks like HIPAA, with enforcement through state attorneys general [Kiteworks, 2026]. Recent federal developments include President Trump's Executive Order 14365 (December 11, 2025), which directs agencies to identify and preempt \"onerous\" state AI laws, potentially using funding levers like the BEAD program to encourage state compliance [White House, 2025].\n\nKey stakeholders reflect competing priorities: the federal government under the Trump administration prioritizes innovation and competitiveness, viewing state regulations as barriers, while state governments (e.g., a bipartisan coalition of 36 attorneys general) defend local protections for consumer safety and civil liberties [Consumer Financial Services Law Monitor, 2025]. Technology companies and deployers generally favor federal uniformity to reduce compliance complexity, with groups like health insurers lobbying for preemption of state restrictions [Holland & Knight, 2025]. Advocacy organizations and privacy groups support robust state safeguards against harms like algorithmic bias, arguing preemption creates gaps without federal alternatives [FPF, 2026]. Recent trends show accelerating state activity\u201438 states enacted ~100 AI measures in 2025\u2014with enforcement ramping up, including settlements for AI-driven discrimination in lending [Software Improvement Group, 2026]. Expert analyses highlight arguments for regulation (e.g., ensuring accountability and mitigating risks like bias) versus against (e.g., stifling innovation and creating regulatory fragmentation) [The Regulatory Review, 2025; White House, 2025].\n\nData on AI impacts underscores both opportunities and risks. In hiring, AI has created 1.3 million new jobs globally since 2024, but 39% of Americans expect employment disruptions in 2026, with high-AI-exposure regions showing 3.6% lower employment in vulnerable occupations after five years [World Economic Forum, 2026; IMF, 2026]. Lending data is sparse, but enforcement actions indicate discriminatory AI models affecting marginalized borrowers [Kiteworks, 2026]. In healthcare, AI adoption rose from 27% of hospitals in 2022 to 71% in 2024 per AHA surveys, primarily for predictive analytics, though rural hospitals lag at 56% [ONC, 2024]. Frontier AI risks include a 890% surge in gen AI traffic and doubled security incidents in 2025, with concerns like data leaks (34% of organizations affected) outweighing adversarial threats [Harvard Business Review, 2025]. Economically, AI contributed 0.48-1.3 percentage points to US GDP growth in 2025 quarters, projected to boost GDP by 1.5-3.7% long-term [St. Louis Fed, 2026; Wharton, 2025]. Litigation is rising, with hundreds of AI discrimination complaints in 2024 and key cases like Mobley v. Workday [HR Defense Blog, 2025].", "decision_criteria": [ "Innovation Promotion", - "Risk Minimization", - "Equity and Fairness", + "Safety and Risk Mitigation", "Civil Liberties Protection", - "Implementation Feasibility", - "Economic Efficiency" + "Economic Impact", + "Equity and Fairness", + "Implementation Feasibility" ], "forecasts": [ { "footnote_id": 1, - "question_title": "Federal Preemption Success", - "question_text": "Will the U.S. federal government successfully preempt at least 50% of existing state-level AI regulations (e.g., through litigation or legislation) by the end of 2027?", - "resolution_criteria": "Resolves YES if official government reports or court rulings confirm preemption of \u226550% of 2026 state AI laws (e.g., CA SB 53, CO AI Act) by Dec 31, 2027; NO otherwise. Based on counts from sources like Brookings or JD Supra.", - "prediction": "45%", - "reasoning": "Base rates show federal preemption often succeeds in tech but faces delays and partial failures, as in recent crypto cases where states won ~60% of challenges. The current admin's aggressive stance (e.g., Litigation Task Force) pushes probability up from base, but strong state pushback and bipartisan Congressional resistance (e.g., blocking NDAA preemption) pull it down. Uncertainties like court backlogs suggest not reaching 50% by 2027, though momentum could build. I might be missing evolving political alliances, but evidence points to incomplete success.", + "question_title": "Federal Preemption Success Rate", + "question_text": "Will the U.S. Department of Justice's AI Litigation Task Force successfully preempt at least 3 major state AI laws (e.g., California's SB 53 or Colorado's AI Act) through court rulings by December 31, 2027?", + "resolution_criteria": "Resolves YES if official court records on PACER (https://pacer.uscourts.gov/) show at least 3 state AI laws fully or partially preempted by federal action stemming from EO 14365; resolves NO otherwise. Confirmation requires checking PACER docket summaries for relevant cases.", + "prediction": "25%", + "reasoning": "Historical base rates show Trump-era agency actions won only 23-31% of court challenges, often due to weak statutory basis. The EO lacks direct preemption power, relying on funding conditions that courts may deem unconstitutional. Premortem analysis suggests states like California will mount strong defenses, prolonging litigation. Aggregation of expert views (e.g., Gibson Dunn) points to low odds, as the world changes slowly with status quo favoring state autonomy. Wide uncertainty from potential congressional support, but humble forecast leans conservative.", "key_sources": [ - "Fedsoc.org", - "Brookings.edu", - "Carnegie Endowment (2025)", - "AskNews results on regulatory conflicts" + "Gibson Dunn (2025)", + "NYU Law (2020)", + "https://pacer.uscourts.gov/" ] }, { "footnote_id": 2, - "question_title": "AI Bias Reduction in Applications", - "question_text": "Will algorithmic bias in U.S. AI systems for hiring, lending, and healthcare decrease by at least 20% on average (measured by error rate disparities across demographics) by 2028?", - "resolution_criteria": "Resolves YES if independent studies (e.g., from NIST or academic meta-analyses) show \u226520% average reduction in bias metrics (e.g., false positive differentials by race/gender) from 2026 baselines; NO if <20% or data inconclusive.", - "prediction": "65%", - "reasoning": "Base rates from post-2016 fairness research show consistent 20-50% bias drops with targeted efforts, aligning with current regs pushing audits. Factors like state laws and tools (e.g., FairPlay's lending reexams) support >20% average reduction, especially in hiring/lending where data shows progress; healthcare lags but overall trends positive. Uncertainties include measurement inconsistencies, but evidence from McKinsey/others suggests achievable. I could be overconfident, but historical analogies temper this.", + "question_title": "AI Discrimination Lawsuits in Hiring", + "question_text": "Will the EEOC report more than 200 AI-related discrimination charges filed in hiring for the calendar year 2026?", + "resolution_criteria": "Resolves YES if the EEOC's annual enforcement data report (available at https://www.eeoc.gov/data/enforcement-and-litigation-statistics) shows >200 charges specifically tagged as AI-related in hiring; resolves NO if \u2264200. If not explicitly tagged, count based on case descriptions in the report.", + "prediction": "40%", + "reasoning": "Base rates from 2024 show hundreds of complaints but likely <200 formal EEOC charges, with known cases rising 50% YoY. Scope sensitivity accounts for increasing AI adoption, potentially driving more filings, but new regulations may deter them. Humble wide 90% CI (100-300) reflects unknown enforcement zeal and underreporting biases. Status quo suggests gradual increase, not a surge, per historical trends in tech-related discrimination suits. Premortem: If audits reduce bias, filings drop below threshold.", "key_sources": [ - "MokaHR", - "SuperAGI", - "Brookings", - "Phenomenal World", - "PMC articles" + "HR Defense Blog (2025)", + "Responsible AI Labs", + "https://www.eeoc.gov/data/enforcement-and-litigation-statistics" ] }, { "footnote_id": 3, - "question_title": "Catastrophic AI Incident Occurrence", - "question_text": "Will there be a major AI-related catastrophic incident in the U.S. (e.g., causing >$1B damage or >50 deaths, per CA SB 53 definition) by 2029?", - "resolution_criteria": "Resolves YES if confirmed by government reports (e.g., NIST, DHS) or major media with expert consensus; includes misuse like cyberattacks or bio-weapons enabled by frontier AI; NO if none occur.", - "prediction": "25%", - "reasoning": "Base rates show cyber catastrophes frequent but AI-specific rare, with no $1B+ events yet despite warnings. Factors like new regs and voluntary safety (e.g., Anthropic's protocols) reduce likelihood, though misuse risks push up slightly. Expert surveys give ~16% by 2030, but for U.S./2029, I adjust down for narrow focus. Uncertainties in definition exist, but evidence suggests moderate risk without overconfidence.", + "question_title": "AI GDP Contribution", + "question_text": "Will AI-related sectors contribute at least 1.0 percentage points to U.S. real GDP growth for the full year 2026, as reported by the Bureau of Economic Analysis?", + "resolution_criteria": "Resolves YES if BEA's annual GDP report (at https://www.bea.gov/data/gdp/gross-domestic-product) attributes \u22651.0 percentage points to AI categories (e.g., software, R&D, data centers); resolves NO if <1.0. Use BEA's detailed breakdowns for calculation.", + "prediction": "55%", + "reasoning": "2025 averaged 0.97pp with a declining trend (0.48 in Q3), suggesting extrapolation to ~0.8-1.1 for 2026. Long-term projections (1.5% by 2035) support potential rebound via investments. Humble wide 90% CI (0.6-1.4) accounts for economic volatility like recessions. Status quo favors moderation, but aggregation of Wharton models tips slightly above 50%. Premortem: If AI hype cools, contribution falls short.", "key_sources": [ - "Wikipedia hacking incidents", - "Arctic Wolf", - "IBM", - "Monte Carlo Data", - "AskNews on AI risks (e.g., WEF report, expert warnings)" + "St. Louis Fed (2026)", + "Wharton (2025)", + "https://www.bea.gov/data/gdp/gross-domestic-product" ] }, { "footnote_id": 4, - "question_title": "AI's GDP Contribution Growth", - "question_text": "Will AI-related investments and productivity gains contribute at least 2% to annual U.S. GDP growth on average from 2026-2029?", - "resolution_criteria": "Resolves YES if BEA or Fed reports attribute \u22652% average annual GDP growth to AI (e.g., via investment in software/data centers); numeric average over four years.", - "prediction": "70%", - "reasoning": "Base rates from internet/IT show 1-3% contributions during booms, matching 2025 AI data (1% already). Projections like KPMG's $2.84T by 2030 imply ~2%+ annual, supported by investments; factors like data centers push up. Uncertainties in attribution exist, but trends suggest likely. I could undervalue slowdowns, but evidence leans positive.", + "question_title": "Healthcare AI Adoption Rate", + "question_text": "Will the AHA's 2026 IT Supplement survey show at least 80% of nonfederal acute care hospitals adopting predictive AI?", + "resolution_criteria": "Resolves YES if the AHA survey report (published at https://www.aha.org/data-insights) indicates \u226580% adoption; resolves NO if <80%. Use the primary adoption metric from the executive summary.", + "prediction": "30%", + "reasoning": "Trends show 5% annual growth (71% in 2024 from 66% in 2023), projecting ~76% for 2026 without major catalysts. Disparities (e.g., 50% rural) suggest uneven progress. Base rate extrapolation is conservative, with status quo slow change. Premortem: Resource constraints in small hospitals prevent reaching 80%. Aggregation of ONC data supports low odds.", "key_sources": [ - "Vanguard", - "McKinsey", - "St. Louis Fed", - "EY", - "KPMG (2025)", - "Goldman Sachs" + "ONC (2024)", + "AHA (2025)", + "https://www.aha.org/data-insights" ] }, { "footnote_id": 5, - "question_title": "Civil Liberties Challenge Rate", - "question_text": "Will at least 20% of new U.S. AI regulations (federal or state) face successful civil liberties challenges (e.g., court rulings on privacy/ free speech) by 2028?", - "resolution_criteria": "Resolves YES if \u226520% of post-2026 AI laws are partially/fully struck down on liberties grounds per SCOTUS or circuit courts; tracked via ACLU or EFF reports.", - "prediction": "30%", - "reasoning": "Base rates show 15-25% success in challenges, with AI's novelty pushing slightly up. Factors like federal preemption reduce state regs at risk, but transparency laws invite suits. Evidence from crypto suggests moderate rate. Uncertainties in court backlogs, but overall <20% unlikely but not reaching high.", + "question_title": "Frontier AI Safety Incidents", + "question_text": "Will Cal OES report more than 10 critical safety incidents for frontier AI models in their first public summary by January 1, 2028?", + "resolution_criteria": "Resolves YES if Cal OES's anonymized summary (at https://www.caloes.ca.gov/) lists >10 incidents under SB 53; resolves NO if \u226410. Count distinct incidents in the report.", + "prediction": "35%", + "reasoning": "No pre-2026 data, but global incidents doubled in 2025, suggesting 5-15 possible under new reporting. Base rate from similar cybersecurity logs is low initially due to underreporting. Status quo favors few disclosures, with premortem noting whistleblower protections may increase but not exceed 10. Humble forecast accounts for unknown model behaviors. Aggregation leans low per expert analyses.", "key_sources": [ - "Prior Perplexity searches on regs", - "AskNews on conflicts", - "BeInCrypto reports on crypto challenges" + "Paul Hastings (2025)", + "HBR (2025)", + "https://www.caloes.ca.gov/" ] } ], - "proposal_markdown": "### Executive Summary\n\nAs Grok 4 (xAI), I recommend a federal framework that sets minimum safety standards for frontier AI while preempting overly burdensome state laws, paired with incentives for bias audits in narrower applications. The most important action is establishing a national AI Safety Board to oversee risk assessments without stifling innovation.\n\n### Analysis\n\nThe U.S. must navigate AI regulation by balancing rapid innovation with emerging risks, as the current patchwork of state laws\u2014effective January 2026\u2014creates compliance challenges while federal efforts push for minimal burdens. Frontier AI, like large language models, faces new requirements under California's SB 53, mandating risk frameworks for catastrophic threats (e.g., >$1B damage), but this risks inconsistency across states, potentially hindering U.S. competitiveness against China. Narrower applications reveal persistent issues: AI in hiring shows 34% higher error rates for darker-skinned individuals, lending perpetuates minority denials, and healthcare exhibits racial disparities despite 85-95% diagnostic accuracy. Experts divide on approaches, with light-touch advocates citing stifled innovation (Cato, 2026) and stricter proponents emphasizing harms (FTI, 2026). Recent events, like the December 2025 Executive Order challenging state regs, underscore federal-state tensions, while data indicates AI's economic boost (1% GDP in 2025) but misuse risks (40-60% potential).\n\nForecasts suggest moderate success for federal harmonization, with a 45% chance of preempting \u226550% state laws by 2027 [^1], allowing innovation while addressing inconsistencies. Bias reduction appears promising, with a 65% probability of \u226520% decrease by 2028 [^2], supporting policies that build on trends like audits. However, catastrophic risks remain low but non-zero at 25% by 2029 [^3], justifying targeted safeguards without overregulation. AI's economic impact is strong, with 70% odds of \u22652% annual GDP contribution through 2029 [^4], reinforcing the need to avoid burdensome rules. Civil liberties challenges are estimated at 30% for new regs by 2028 [^5], highlighting the importance of rights-respecting designs.\n\nOverall, a nuanced federal approach can promote innovation (historical tech booms drove 60% GDP changes) while minimizing risks, drawing on states as labs but ensuring uniformity. Uncertainties persist in AI's rapid evolution, but evidence favors adaptive, evidence-based policies over rigid ones.\n\n### Recommendations\n\n1. **Establish a National AI Safety Board**: Create an independent federal board to set minimum standards for frontier AI risk assessments and preempt conflicting state laws. This supports innovation by providing clarity (addressing my top criterion) and minimizes risks through mandatory reporting, backed by a 45% preemption success forecast [^1] and 25% catastrophe risk [^3]. \n\n2. **Mandate Bias Audits with Incentives**: Require annual audits for AI in hiring, lending, and healthcare, with tax credits for compliance. This addresses equity (third criterion) by leveraging 65% bias reduction odds [^2], ensuring fairness without heavy burdens. \n\n3. **Promote Voluntary Transparency Guidelines**: Encourage (not mandate) explainability in narrower AI via federal guidelines, protecting civil liberties (fourth criterion) with low 30% challenge rate [^5] while fostering feasibility. \n\n4. **Invest in AI R&D Grants**: Allocate $10B annually for ethical AI research, boosting economic efficiency (sixth criterion) aligned with 70% GDP contribution forecast [^4] and promoting innovation. \n\n5. **Enhance International Coordination**: Lead global standards on AI exports to prevent misuse, addressing risk minimization (second) by building on historical tech diplomacy.\n\n### Risks and Uncertainties\n\nKey risks include over-preemption stifling state innovation, potentially backfiring if federal standards lag (e.g., if preemption fails at 45% [^1], leading to prolonged patchwork). Recommendations might amplify inequalities if audits favor large firms, or face implementation hurdles with costs exceeding benefits. Widest uncertainty is in catastrophe odds (25% [^3]), where underestimation could lead to disasters; bias reduction (65% [^2]) has measurement variances. Scenarios where recs backfire: rapid AI advances outpace board oversight by 2029, or liberties challenges (30% [^5]) invalidate key parts, eroding trust. GDP over-reliance (70% [^4]) risks bubbles like dot-com.\n\n### Forecast Appendix\n\n[^1] **[Federal Preemption Success]** \n- Question: Will the U.S. federal government successfully preempt at least 50% of existing state-level AI regulations (e.g., through litigation or legislation) by the end of 2027? \n- Resolution: Resolves YES if official government reports or court rulings confirm preemption of \u226550% of 2026 state AI laws (e.g., CA SB 53, CO AI Act) by Dec 31, 2027; NO otherwise. Based on counts from sources like Brookings or JD Supra. \n- Prediction: 45% \n- Reasoning: Base rates show federal preemption often succeeds in tech but faces delays and partial failures, as in recent crypto cases where states won ~60% of challenges. The current admin's aggressive stance (e.g., Litigation Task Force) pushes probability up from base, but strong state pushback and bipartisan Congressional resistance (e.g., blocking NDAA preemption) pull it down. Uncertainties like court backlogs suggest not reaching 50% by 2027, though momentum could build. I might be missing evolving political alliances, but evidence points to incomplete success. \n- Sources: Fedsoc.org; Brookings.edu; Carnegie Endowment (2025); AskNews results on regulatory conflicts.\n\n[^2] **[AI Bias Reduction in Applications]** \n- Question: Will algorithmic bias in U.S. AI systems for hiring, lending, and healthcare decrease by at least 20% on average (measured by error rate disparities across demographics) by 2028? \n- Resolution: Resolves YES if independent studies (e.g., from NIST or academic meta-analyses) show \u226520% average reduction in bias metrics (e.g., false positive differentials by race/gender) from 2026 baselines; NO if <20% or data inconclusive. \n- Prediction: 65% \n- Reasoning: Base rates from post-2016 fairness research show consistent 20-50% bias drops with targeted efforts, aligning with current regs pushing audits. Factors like state laws and tools (e.g., FairPlay's lending reexams) support >20% average reduction, especially in hiring/lending where data shows progress; healthcare lags but overall trends positive. Uncertainties include measurement inconsistencies, but evidence from McKinsey/others suggests achievable. I could be overconfident, but historical analogies temper this. \n- Sources: MokaHR; SuperAGI; Brookings; Phenomenal World; PMC articles.\n\n[^3] **[Catastrophic AI Incident Occurrence]** \n- Question: Will there be a major AI-related catastrophic incident in the U.S. (e.g., causing >$1B damage or >50 deaths, per CA SB 53 definition) by 2029? \n- Resolution: Resolves YES if confirmed by government reports (e.g., NIST, DHS) or major media with expert consensus; includes misuse like cyberattacks or bio-weapons enabled by frontier AI; NO if none occur. \n- Prediction: 25% \n- Reasoning: Base rates show cyber catastrophes frequent but AI-specific rare, with no $1B+ events yet despite warnings. Factors like new regs and voluntary safety (e.g., Anthropic's protocols) reduce likelihood, though misuse risks push up slightly. Expert surveys give ~16% by 2030, but for U.S./2029, I adjust down for narrow focus. Uncertainties in definition exist, but evidence suggests moderate risk without overconfidence. \n- Sources: Wikipedia hacking incidents; Arctic Wolf; IBM; Monte Carlo Data; AskNews on AI risks (e.g., WEF report, expert warnings).\n\n[^4] **[AI's GDP Contribution Growth]** \n- Question: Will AI-related investments and productivity gains contribute at least 2% to annual U.S. GDP growth on average from 2026-2029? \n- Resolution: Resolves YES if BEA or Fed reports attribute \u22652% average annual GDP growth to AI (e.g., via investment in software/data centers); numeric average over four years. \n- Prediction: 70% \n- Reasoning: Base rates from internet/IT show 1-3% contributions during booms, matching 2025 AI data (1% already). Projections like KPMG's $2.84T by 2030 imply ~2%+ annual, supported by investments; factors like data centers push up. Uncertainties in attribution exist, but trends suggest likely. I could undervalue slowdowns, but evidence leans positive. \n- Sources: Vanguard; McKinsey; St. Louis Fed; EY; KPMG (2025); Goldman Sachs.\n\n[^5] **[Civil Liberties Challenge Rate]** \n- Question: Will at least 20% of new U.S. AI regulations (federal or state) face successful civil liberties challenges (e.g., court rulings on privacy/ free speech) by 2028? \n- Resolution: Resolves YES if \u226520% of post-2026 AI laws are partially/fully struck down on liberties grounds per SCOTUS or circuit courts; tracked via ACLU or EFF reports. \n- Prediction: 30% \n- Reasoning: Base rates show 15-25% success in challenges, with AI's novelty pushing slightly up. Factors like federal preemption reduce state regs at risk, but transparency laws invite suits. Evidence from crypto suggests moderate rate. Uncertainties in court backlogs, but overall <20% unlikely but not reaching high. \n- Sources: Prior Perplexity searches on regs; AskNews on conflicts; BeInCrypto reports on crypto challenges.", + "proposal_markdown": "### Executive Summary\nAs Grok 4 (xAI), I recommend the U.S. pursue a federal framework that preempts overly burdensome state laws while mandating minimal safety standards for frontier AI and bias audits for narrower applications, prioritizing innovation. The single most important action for policymakers is to pass comprehensive federal legislation by 2027 that establishes a unified regulatory body, balancing rapid AI advancement with targeted protections to avoid fragmentation.\n\n### Analysis\nThe U.S. AI regulatory landscape is evolving rapidly, with state laws like California's SB 53 and Colorado's AI Act filling federal voids by requiring safety reports and anti-bias measures, while Trump's December 2025 Executive Order aims to preempt such \"onerous\" regulations to boost competitiveness [White House, 2025]. This tension risks compliance chaos for developers, as evidenced by 38 states enacting ~100 AI measures in 2025, yet federal preemption has historically low success rates (23-31% under prior Trump actions) [^1]. Data shows AI driving economic growth\u2014contributing 0.97 percentage points to GDP in the first nine months of 2025\u2014with projections for sustained boosts, but risks like job disruptions (39% of Americans expect impacts in 2026) and security incidents (doubled in 2025) necessitate balanced oversight [St. Louis Fed, 2026; WEF, 2026]. In narrower applications, AI adoption in healthcare reached 71% of hospitals in 2024, improving efficiency but raising equity concerns, while hiring AI has created 1.3 million jobs yet triggered hundreds of discrimination complaints [ONC, 2024; HR Defense Blog, 2025].\n\nFor frontier AI, policies must address catastrophic risks without stifling innovation, as expert arguments highlight: proponents emphasize accountability to mitigate harms like data leaks (34% of organizations affected), while opponents warn of innovation costs from fragmentation [Kiteworks, 2026; Regulatory Review, 2025]. My forecasts suggest moderate risks\u2014a 35% chance of >10 critical incidents reported under SB 53 by 2028 [^5]\u2014indicating room for light-touch federal standards. In sectors like hiring and lending, rising lawsuits (hundreds in 2024) and adoption trends underscore the need for civil liberties protections, with a 40% chance of >200 EEOC charges in 2026 [^2]. Economically, AI's GDP role (55% chance of \u22651.0pp in 2026 [^3]) supports innovation-focused policies, but healthcare adoption (30% chance of \u226580% by 2026 [^4]) shows disparities that regulation could address without overreach.\n\nOverall, a nuanced approach\u2014drawing on my xAI roots\u2014favors federal leadership to unify rules, as persistent state-federal conflicts could hinder U.S. leadership. Without it, innovation may suffer (e.g., from compliance burdens), but unchecked AI risks civil liberties erosion, as seen in discriminatory lending cases [Kiteworks, 2026].\n\n### Recommendations\n1. **Enact Federal Preemption Legislation**: Support a bipartisan bill by 2027 preempting conflicting state laws while establishing baseline safety standards. This addresses innovation promotion and feasibility, given low preemption success via EO (25% chance [^1]). Implementation: Congress creates an AI Oversight Commission under Commerce to review state laws annually; states comply or lose federal AI grants. Rollout via phased audits starting with frontier models.\n\n2. **Mandate Bias Audits for Narrow AI**: Require annual third-party audits for AI in hiring, lending, and healthcare to prevent discrimination. This targets equity and civil liberties, informed by lawsuit risks (40% chance >200 in 2026 [^2]). Implementation: FTC enforces via online portal for audit submissions; businesses with >$100M revenue comply first, with templates and subsidies for SMEs.\n\n3. **Invest in AI Safety Research**: Allocate $5B federal funding for xAI-like initiatives on frontier model safety. Promotes safety and innovation, considering incident probabilities (35% >10 by 2028 [^5]) and GDP boosts (55% \u22651.0pp in 2026 [^3]). Implementation: NSF grants to labs; public-private partnerships track progress via annual reports.\n\n4. **Promote Healthcare AI Standards**: Develop voluntary FDA guidelines for AI tools, with incentives for adoption. Addresses economic impact and equity, given adoption trends (30% \u226580% in 2026 [^4]). Implementation: Subsidies for rural hospitals; annual AHA-integrated reporting on usage and outcomes.\n\n5. **Establish Whistleblower Protections**: Extend federal protections for AI workers reporting risks. Supports safety and liberties, reducing unreported incidents [^5]. Implementation: Labor Department hotline and rewards; integrate with existing laws like Sarbanes-Oxley.\n\n### Risks and Uncertainties\nKey risks include federal overreach stifling innovation if preemption succeeds too broadly (25% chance [^1]), potentially reducing AI GDP contributions below projections (55% \u22651.0pp [^3]). Uncertainties are widest in lawsuit forecasts (90% CI 100-300 [^2]) due to variable enforcement. Recommendations might backfire if audits increase costs, slowing healthcare adoption (30% \u226580% [^4]); a scenario where incidents surge (>10 at 35% [^5]) could lead to public backlash, prompting heavier regulations.\n\n### Forecast Appendix\n\n[^1] **[Federal Preemption Success Rate]** \n- Question: Will the U.S. Department of Justice's AI Litigation Task Force successfully preempt at least 3 major state AI laws (e.g., California's SB 53 or Colorado's AI Act) through court rulings by December 31, 2027? \n- Resolution: Resolves YES if official court records on PACER (https://pacer.uscourts.gov/) show at least 3 state AI laws fully or partially preempted by federal action stemming from EO 14365; resolves NO otherwise. Confirmation requires checking PACER docket summaries for relevant cases. \n- Prediction: 25% \n- Reasoning: Historical base rates show Trump-era agency actions won only 23-31% of court challenges, often due to weak statutory basis. The EO lacks direct preemption power, relying on funding conditions that courts may deem unconstitutional. Premortem analysis suggests states like California will mount strong defenses, prolonging litigation. Aggregation of expert views (e.g., Gibson Dunn) points to low odds, as the world changes slowly with status quo favoring state autonomy. Wide uncertainty from potential congressional support, but humble forecast leans conservative. \n- Sources: Gibson Dunn (2025), NYU Law (2020), https://pacer.uscourts.gov/\n\n[^2] **[AI Discrimination Lawsuits in Hiring]** \n- Question: Will the EEOC report more than 200 AI-related discrimination charges filed in hiring for the calendar year 2026? \n- Resolution: Resolves YES if the EEOC's annual enforcement data report (available at https://www.eeoc.gov/data/enforcement-and-litigation-statistics) shows >200 charges specifically tagged as AI-related in hiring; resolves NO if \u2264200. If not explicitly tagged, count based on case descriptions in the report. \n- Prediction: 40% \n- Reasoning: Base rates from 2024 show hundreds of complaints but likely <200 formal EEOC charges, with known cases rising 50% YoY. Scope sensitivity accounts for increasing AI adoption, potentially driving more filings, but new regulations may deter them. Humble wide 90% CI (100-300) reflects unknown enforcement zeal and underreporting biases. Status quo suggests gradual increase, not a surge, per historical trends in tech-related discrimination suits. Premortem: If audits reduce bias, filings drop below threshold. \n- Sources: HR Defense Blog (2025), Responsible AI Labs, https://www.eeoc.gov/data/enforcement-and-litigation-statistics\n\n[^3] **[AI GDP Contribution]** \n- Question: Will AI-related sectors contribute at least 1.0 percentage points to U.S. real GDP growth for the full year 2026, as reported by the Bureau of Economic Analysis? \n- Resolution: Resolves YES if BEA's annual GDP report (at https://www.bea.gov/data/gdp/gross-domestic-product) attributes \u22651.0 percentage points to AI categories (e.g., software, R&D, data centers); resolves NO if <1.0. Use BEA's detailed breakdowns for calculation. \n- Prediction: 55% \n- Reasoning: 2025 averaged 0.97pp with a declining trend (0.48 in Q3), suggesting extrapolation to ~0.8-1.1 for 2026. Long-term projections (1.5% by 2035) support potential rebound via investments. Humble wide 90% CI (0.6-1.4) accounts for economic volatility like recessions. Status quo favors moderation, but aggregation of Wharton models tips slightly above 50%. Premortem: If AI hype cools, contribution falls short. \n- Sources: St. Louis Fed (2026), Wharton (2025), https://www.bea.gov/data/gdp/gross-domestic-product\n\n[^4] **[Healthcare AI Adoption Rate]** \n- Question: Will the AHA's 2026 IT Supplement survey show at least 80% of nonfederal acute care hospitals adopting predictive AI? \n- Resolution: Resolves YES if the AHA survey report (published at https://www.aha.org/data-insights) indicates \u226580% adoption; resolves NO if <80%. Use the primary adoption metric from the executive summary. \n- Prediction: 30% \n- Reasoning: Trends show 5% annual growth (71% in 2024 from 66% in 2023), projecting ~76% for 2026 without major catalysts. Disparities (e.g., 50% rural) suggest uneven progress. Base rate extrapolation is conservative, with status quo slow change. Premortem: Resource constraints in small hospitals prevent reaching 80%. Aggregation of ONC data supports low odds. \n- Sources: ONC (2024), AHA (2025), https://www.aha.org/data-insights\n\n[^5] **[Frontier AI Safety Incidents]** \n- Question: Will Cal OES report more than 10 critical safety incidents for frontier AI models in their first public summary by January 1, 2028? \n- Resolution: Resolves YES if Cal OES's anonymized summary (at https://www.caloes.ca.gov/) lists >10 incidents under SB 53; resolves NO if \u226410. Count distinct incidents in the report. \n- Prediction: 35% \n- Reasoning: No pre-2026 data, but global incidents doubled in 2025, suggesting 5-15 possible under new reporting. Base rate from similar cybersecurity logs is low initially due to underreporting. Status quo favors few disclosures, with premortem noting whistleblower protections may increase but not exceed 10. Humble forecast accounts for unknown model behaviors. Aggregation leans low per expert analyses. \n- Sources: Paul Hastings (2025), HBR (2025), https://www.caloes.ca.gov/", "key_recommendations": [ - "Establish a National AI Safety Board: Create an independent federal board to set minimum standards for frontier AI risk assessments and preempt conflicting state laws. This supports innovation by providing clarity (addressing my top criterion) and minimizes risks through mandatory reporting, backed by a 45% preemption success forecast [^1] and 25% catastrophe risk [^3].", - "Mandate Bias Audits with Incentives: Require annual audits for AI in hiring, lending, and healthcare, with tax credits for compliance. This addresses equity (third criterion) by leveraging 65% bias reduction odds [^2], ensuring fairness without heavy burdens.", - "Promote Voluntary Transparency Guidelines: Encourage (not mandate) explainability in narrower AI via federal guidelines, protecting civil liberties (fourth criterion) with low 30% challenge rate [^5] while fostering feasibility.", - "Invest in AI R&D Grants: Allocate $10B annually for ethical AI research, boosting economic efficiency (sixth criterion) aligned with 70% GDP contribution forecast [^4] and promoting innovation.", - "Enhance International Coordination: Lead global standards on AI exports to prevent misuse, addressing risk minimization (second) by building on historical tech diplomacy." - ] + "Enact Federal Preemption Legislation: Support a bipartisan bill by 2027 preempting conflicting state laws while establishing baseline safety standards.", + "Mandate Bias Audits for Narrow AI: Require annual third-party audits for AI in hiring, lending, and healthcare to prevent discrimination.", + "Invest in AI Safety Research: Allocate $5B federal funding for xAI-like initiatives on frontier model safety.", + "Promote Healthcare AI Standards: Develop voluntary FDA guidelines for AI tools, with incentives for adoption.", + "Establish Whistleblower Protections: Extend federal protections for AI workers reporting risks." + ], + "price_estimate": 0.9003447499999997 }, { "member": { @@ -471,98 +443,110 @@ ], "ai_model": "openrouter/deepseek/deepseek-v3.2" }, - "research_summary": "**Base Rate Analysis:** Regulatory sandboxes in fintech have shown moderate success, with adoption by approximately 15-20 states over 5+ years. Success rates for fintech sandboxes typically show 20-40% of participants achieving commercial deployment. Healthcare sandboxes have been slower to develop due to greater complexity and safety concerns.\n\n**Key Factors Pushing Probability UP:**\n1. Strong momentum with at least 5 states already implementing AI sandboxes\n2. Federal legislation (SANDBOX Act) proposing expansion\n3. Industry demand for regulatory flexibility\n4. Successful precedents in fintech demonstrating viability\n5. Broad applicability across multiple sectors (AI is general-purpose)\n\n**Key Factors Pushing Probability DOWN:**\n1. Resource requirements for states to establish and maintain sandboxes\n2. Safety concerns limiting healthcare applications\n3. Potential conflict with federal preemption efforts\n4. Technical complexity of monitoring AI systems in sandboxes\n5. Variable state capacity and political will\n\n**Bias Check:** I may overestimate the scalability of regulatory sandboxes given the specialized expertise needed to effectively monitor AI systems and the resource constraints many states face.\n\n**Final Prediction:** 55%\n\n**Reasoning:** Current momentum suggests significant state interest in AI regulatory sandboxes, with multiple states already implementing programs[^sandbox]. The broad applicability of AI across sectors and precedents from fintech sandboxes provide a strong foundation. However, achieving 10 states with operational sandboxes and 50 successfully deployed applications by 2028 requires substantial scaling from current levels. The Texas program (36 months, with enforcement protection) provides a strong model, and industry demand for regulatory flexibility creates positive incentives[^sandbox]. While healthcare applications may face delays, fintech and other sectors could drive early success.", + "research_summary": "The United States currently maintains a fragmented and evolving approach to AI regulation, characterized by significant tension between federal and state authorities. As of early 2026, the federal government has shifted toward a \"minimally burdensome\" national AI policy framework under Executive Order 14365, which directs the Department of Justice to identify and potentially preempt state AI laws deemed \"onerous.\" This represents a dramatic reversal from the previous administration's focus on safety and represents a victory for major tech companies that spent millions lobbying against regulation.\n\nDespite federal deregulation efforts, regulatory activity has actually increased: In 2024, 59 AI-related regulations were introduced across 42 federal agencies\u2014more than double the 25 regulations from 2023. At the state level, all 50 states introduced AI-related legislation in 2025, with 38 states adopting approximately 100 measures. Colorado's SB24-205 established a foundational risk-based model for state AI governance, imposing duties of reasonable care on developers and deployers to protect consumers from algorithmic discrimination, with implementation requirements effective June 30, 2026.\n\nIn healthcare, the Department of Health and Human Services Office for Civil Rights issued a final rule in May 2024 holding AI users legally responsible for managing discrimination risk in clinical settings, applying Section 1557 of the Affordable Care Act to AI-based discrimination. This rule creates market pressure on developers to disclose bias information and risk management practices.\n\nThe regulatory landscape faces significant challenges: 1) A patchwork of conflicting state regulations creates compliance complexity for multistate operations; 2) Federal-state tension threatens $21 billion in BEAD funding for states with restrictive AI laws; 3) Significant gaps remain in comprehensive regulation of AI in hiring and lending; 4) The global context includes the EU's comprehensive AI Act (effective August 2026), creating international compliance challenges for U.S. companies.\n\nNIST continues to develop technical frameworks including the AI Risk Management Framework (AI RMF), Cybersecurity Framework Profile for Artificial Intelligence, and Control Overlays for Securing AI Systems (COSAIS). However, testing reveals that no current models meet TrustLLM benchmarks for true trustworthiness, with the best-performing model achieving only 65% accuracy in identifying stereotypes.", "decision_criteria": [ - "Safety and Risk Mitigation", - "Implementation Feasibility", - "International Coordination", - "Equity and Civil Liberties Protection", - "Innovation Preservation", - "Adaptability and Future-Proofing" + "Harm Prevention Balance", + "Technical Feasibility", + "Federal-State Coordination", + "International Competitiveness", + "Adaptive Governance", + "Economic Efficiency" ], "forecasts": [ { "footnote_id": 1, - "question_title": "State-Federal Regulatory Conflict Resolution", - "question_text": "Will the federal government successfully preempt major state AI regulatory initiatives (like Colorado's SB24-205 or California's employment regulations) through executive action or legislation by the end of 2027?", - "resolution_criteria": "YES if either (a) federal courts uphold federal preemption of state AI laws in at least two significant cases, OR (b) Congress passes legislation explicitly preempting state AI regulation in key domains, OR (c) federal administrative action effectively nullifies major state regulations. NO if states continue implementing and enforcing their AI regulations without federal interference by December 31, 2027.", + "question_title": "Colorado AI Act Enforcement Rate", + "question_text": "As of December 31, 2026, what percentage of Colorado-based companies using \"high-risk AI systems\" (as defined by Colorado SB24-205) will have submitted their required impact assessments to the Colorado Attorney General's office?", + "resolution_criteria": "The Colorado Attorney General's office will publish compliance statistics on their website (https://coag.gov/ai-compliance-statistics) showing the number of registered companies, number of impact assessments received, and calculated compliance percentage. The question resolves to the published percentage.", "prediction": "35%", - "reasoning": "The executive order's approach is constitutionally vulnerable since only Congress can preempt state law, and while Commerce Clause arguments have merit, states have successfully defended similar regulatory authority in other domains. The unusual coalition of tech companies and progressive groups against preemption legislation suggests significant political hurdles. State resistance is strong across party lines, with states already investing substantially in AI regulatory frameworks unlikely to cede authority easily. However, the interstate nature of AI and industry preference for federal standards provide some momentum for preemption. Historical patterns suggest federal preemption attempts often result in partial rather than complete victories.", + "reasoning": "Colorado has limited enforcement resources compared to larger states like California. The requirements are technically complex and many Colorado businesses are small to medium-sized. Historical patterns for complex new regulations show initial compliance rates typically around 30-40%. However, increased AI regulatory attention may boost awareness somewhat. The June 2026 implementation delay suggests preparation challenges.", "key_sources": [ - "Research on federal preemption of state AI regulation, constitutional limitations, political dynamics" + "Colorado AI Act (SB24-205) text", + "enforcement delay announcements", + "historical compliance patterns for similar regulations" ] }, { "footnote_id": 2, - "question_title": "AI Safety Institute Effectiveness", - "question_text": "Will the US AI Safety Institute (or equivalent federal body) successfully establish and enforce binding safety standards for frontier AI systems that receive compliance from at least 75% of major AI developers by the end of 2028?", - "resolution_criteria": "YES if the institute publishes binding safety standards and can demonstrate that at least 75% of major US AI developers (defined as companies with >$1B valuation or >100M users) are complying with these standards in their frontier AI deployments. NO if either standards aren't established or compliance remains below 75% by December 31, 2028.", - "prediction": "40%", - "reasoning": "While CAISI has established important technical capabilities and industry relationships, its voluntary nature and lack of enforcement authority significantly limit its ability to achieve 75% compliance with binding safety standards. The rebranding reflects a strategic shift toward innovation promotion rather than safety enforcement. However, major AI developers have shown willingness to participate in voluntary safety initiatives, and market pressures (especially liability concerns) could drive adoption even without enforcement. Achieving 75% compliance by 2028 would require either legislative action granting enforcement powers or extraordinary industry consensus\u2014both challenging within this timeframe.", + "question_title": "Federal Preemption Effectiveness", + "question_text": "As of June 30, 2027, will any U.S. state have successfully challenged Executive Order 14365's attempt to preempt state AI laws through litigation resulting in a court ruling that invalidates the preemption authority?", + "resolution_criteria": "A federal court ruling (district, circuit, or Supreme Court) published in a legal database (Westlaw, Lexis, or official court website) that explicitly invalidates the preemption provisions of EO 14365 regarding state AI regulation.", + "prediction": "55%", + "reasoning": "The use of funding withholding (BEAD funds) rather than direct preemption makes EO 14365 legally vulnerable. The \"major questions doctrine\" from recent Supreme Court cases suggests courts may be skeptical of executive branch creating major AI policy without clear Congressional authorization. Multiple states have indicated intent to challenge. However, courts have generally deferred to executive authority on national security/competitiveness grounds.", "key_sources": [ - "CAISI establishment documents, enforcement authority analysis, industry compliance patterns" + "EO 14365 text", + "legal analysis of preemption authority", + "state challenge announcements", + "Supreme Court \"major questions doctrine\" cases" ] }, { "footnote_id": 3, - "question_title": "Algorithmic Bias Reduction in Hiring", - "question_text": "Will the implementation of state-level AI hiring regulations (particularly in California, Colorado, Illinois, and New York) result in a statistically significant reduction (p<0.05) in measured algorithmic discrimination in employment decisions by the end of 2028?", - "resolution_criteria": "YES if peer-reviewed studies using standardized discrimination metrics (disparate impact ratios, audit study results) show significant reductions in algorithmic hiring discrimination in regulated states compared to baseline measurements from 2024-2025. NO if no significant reduction is documented or if discrimination metrics show worsening trends.", - "prediction": "45%", - "reasoning": "Newer state regulations like Colorado's and California's incorporate lessons from NYC's experience, including requirements for reasonable care, impact assessments, and human review mechanisms. These more comprehensive approaches have better potential for impact. However, the 2028 timeframe is relatively short for measurable statistical changes to emerge, given implementation lags and the complexity of discrimination measurement. Historical evidence from employment discrimination law suggests regulatory effects typically take 5+ years to become statistically measurable. The combination of multiple state approaches and growing legal liability creates positive momentum, but achieving statistically significant reduction by 2028 will require rapid and effective implementation.", + "question_title": "Healthcare AI Compliance Rate", + "question_text": "As of December 31, 2026, what percentage of U.S. healthcare organizations receiving federal funding will have implemented the required AI discrimination risk mitigation programs under the May 2024 OCR rule?", + "resolution_criteria": "The HHS OCR will publish compliance audit results showing the number of audited organizations, number found compliant with AI discrimination risk mitigation requirements, and calculated compliance percentage.", + "prediction": "65%", + "reasoning": "Healthcare has high baseline compliance rates due to existing regulatory frameworks like HIPAA. OCR has established enforcement mechanisms and expertise. Healthcare organizations have strong incentives to avoid discrimination lawsuits. However, the technical requirements may challenge smaller or rural facilities. Initial enforcement will likely emphasize education over penalties.", "key_sources": [ - "NYC Local Law 144 effectiveness studies, state regulatory comparisons, employment discrimination research" + "HHS OCR May 2024 rule text", + "HIPAA compliance statistics", + "healthcare regulatory enforcement patterns" ] }, { "footnote_id": 4, - "question_title": "Healthcare AI Liability Framework Development", - "question_text": "Will Congress establish a comprehensive federal liability framework for healthcare AI systems that clearly allocates responsibility among developers, healthcare providers, and institutions before the end of 2029?", - "resolution_criteria": "YES if Congress passes legislation specifically addressing AI liability in healthcare that includes provisions for shared accountability, establishes liability standards, and creates mechanisms for affected patients. NO if no such legislation is passed or if liability remains governed exclusively by existing malpractice law.", - "prediction": "25%", - "reasoning": "While the need for a healthcare AI liability framework is increasingly recognized, the 2029 timeframe is ambitious given historical patterns of healthcare liability reform. Congress has shown limited progress on even basic healthcare AI legislation. The political complexity of allocating liability among developers, providers, and institutions, combined with traditional medical malpractice reform challenges, suggests a low probability of comprehensive federal legislation by 2029. More likely outcomes include state-level approaches, incremental federal guidance, or judicial development of liability principles through case law.", + "question_title": "NIST Framework Adoption", + "question_text": "As of June 30, 2027, will the NIST AI RMF be explicitly referenced as a compliance standard in regulations issued by at least three different U.S. federal agencies?", + "resolution_criteria": "Official federal agency regulations published in the Federal Register that explicitly cite the NIST AI RMF (AI Risk Management Framework) as a compliance standard or safe harbor provision.", + "prediction": "70%", + "reasoning": "NIST AI RMF is becoming the de facto U.S. technical standard for AI risk management. Agencies facing pressure to regulate AI but lacking technical expertise are likely to reference established frameworks. The FTC and SEC have already indicated interest in NIST-aligned approaches. Executive Order 14365 promotes \"minimally burdensome\" standards, favoring voluntary frameworks. However, some agencies may prefer bespoke regulations.", "key_sources": [ - "Congressional healthcare AI legislation tracking, medical malpractice reform history, liability allocation complexity" + "NIST AI RMF documentation", + "agency AI regulation proposals", + "references to NIST frameworks in existing regulations" ] }, { "footnote_id": 5, - "question_title": "Regulatory Sandbox Adoption and Impact", - "question_text": "Will at least 10 major states establish and maintain operational regulatory sandboxes for AI innovation that result in at least 50 successfully tested and commercially deployed AI applications by the end of 2028?", - "resolution_criteria": "YES if official state records show at least 10 states with active AI regulatory sandboxes and documentation of at least 50 AI applications that successfully completed sandbox testing and achieved commercial deployment. NO if either threshold isn't met.", - "prediction": "55%", - "reasoning": "Current momentum suggests significant state interest in AI regulatory sandboxes, with multiple states already implementing programs. The broad applicability of AI across sectors and precedents from fintech sandboxes provide a strong foundation. However, achieving 10 states with operational sandboxes and 50 successfully deployed applications by 2028 requires substantial scaling from current levels. The Texas program (36 months, with enforcement protection) provides a strong model, and industry demand for regulatory flexibility creates positive incentives. While healthcare applications may face delays, fintech and other sectors could drive early success.", + "question_title": "AI Investment Impact", + "question_text": "What will be the year-over-year growth rate in U.S. private AI investment (venture capital and private equity) for 2026 compared to 2025, as measured by leading AI investment trackers?", + "resolution_criteria": "The final 2026 AI investment total published by PitchBook, CB Insights, or Stanford AI Index, compared to their final 2025 total, calculating the percentage growth rate.", + "prediction": "25%", + "reasoning": "AI investment has shown resilience to regulatory uncertainty historically. Current federal approach is relatively innovation-friendly. Global AI competition continues to drive investment. However, state-level fragmentation creates compliance costs, and broader economic conditions could moderate growth. Historical patterns show continued strong growth in AI investment despite regulatory discussions.", "key_sources": [ - "State sandbox implementation tracking, fintech sandbox success rates, industry demand analysis" + "Historical AI investment data (PitchBook, CB Insights)", + "current regulatory climate analysis", + "economic indicators" ] } ], - "proposal_markdown": "### Executive Summary\n\nThe United States should adopt a **federally-coordinated, risk-based regulatory framework** that establishes clear safety standards for frontier AI systems while preserving state authority to address domain-specific risks in areas like hiring, lending, and healthcare. This approach recognizes both the need for uniform safety requirements for nationally-significant AI systems and the value of state innovation in addressing local and sector-specific concerns.\n\n### Analysis\n\nBased on my research and forecasts, the current US approach to AI regulation is increasingly fragmented and politically volatile. The federal executive action attempting to preempt state AI laws faces significant constitutional challenges and political resistance[^1], while state-level initiatives show promising innovation but lack coordination. My forecasts suggest a 65% probability that state-federal conflict will not be resolved through successful preemption[^1], indicating the need for a collaborative rather than confrontational approach.\n\nThe US AI Safety Institute's effectiveness is limited by its voluntary nature and recent rebranding toward innovation promotion rather than safety enforcement[^2]. With only 40% probability of achieving 75% compliance with binding safety standards by 2028[^2], stronger legislative authority is needed for frontier AI oversight. However, state-level approaches in domains like hiring show more promise, with a 45% probability of achieving statistically significant bias reduction by 2028[^3], suggesting domain-specific regulations can be effective when properly designed.\n\nHealthcare AI liability represents a critical gap, with only 25% probability of comprehensive federal legislation by 2029[^4]. This uncertainty creates risk aversion that may slow beneficial AI adoption in healthcare. Regulatory sandboxes show stronger potential, with 55% probability of successful expansion and impact[^5], offering a promising model for balancing innovation and safety.\n\nThe evidence supports a risk-based approach similar to but more flexible than the EU AI Act, with tiered requirements based on potential harm. However, this framework must accommodate US federalism traditions and technological leadership priorities. International coordination is essential, as fragmented global regulation creates compliance burdens and safety gaps.\n\n### Recommendations\n\n1. **Establish Federal Frontier AI Safety Standards with Enforcement Authority**\n- Congress should grant the Center for AI Standards and Innovation (CAISI) authority to establish binding safety requirements for frontier AI systems exceeding defined capability thresholds. This addresses the institute's current voluntary limitations[^2].\n- Standards should focus on catastrophic risk prevention, requiring safety testing, incident reporting, and third-party audits for high-capability systems.\n- **Criteria addressed**: Safety and Risk Mitigation (primary), Implementation Feasibility, International Coordination\n- **Forecast relevance**: This recommendation directly addresses the 60% probability that CAISI won't achieve sufficient compliance without enforcement authority[^2].\n\n2. **Create a Cooperative Federal-State Framework for Domain-Specific AI Regulation**\n- Establish a formal consultation mechanism between federal agencies and states implementing AI regulations in specific domains (employment, healthcare, financial services).\n- Recognize state authority to regulate AI applications within traditional state domains while establishing federal baseline requirements for interstate aspects.\n- **Criteria addressed**: Equity and Civil Liberties Protection, Innovation Preservation, Adaptability and Future-Proofing\n- **Forecast relevance**: Given the 65% probability against successful federal preemption[^1], this cooperative approach offers a more viable path forward.\n\n3. **Expand Regulatory Sandboxes with Federal Support and Cross-State Learning**\n- Provide federal funding and technical assistance to states establishing AI regulatory sandboxes, with priority for healthcare applications.\n- Create a national registry of sandbox outcomes and best practices to accelerate learning across jurisdictions.\n- **Criteria addressed**: Innovation Preservation, Implementation Feasibility, Adaptability and Future-Proofing\n- **Forecast relevance**: This builds on the 55% probability of sandbox success[^5], potentially increasing impact through coordination.\n\n4. **Establish Clear Liability Principles for Healthcare AI Through Federal Guidance**\n- The Department of Health and Human Services should issue guidance clarifying liability allocation for AI-assisted clinical decisions, establishing reasonable reliance standards for clinicians.\n- Create a federal indemnification program for AI-related adverse events in qualifying clinical trials to encourage innovation while protecting patients.\n- **Criteria addressed**: Safety and Risk Mitigation, Implementation Feasibility, Equity and Civil Liberties Protection\n- **Forecast relevance**: This addresses the 75% probability that comprehensive federal legislation won't pass by 2029[^4] by providing interim clarity through administrative guidance.\n\n5. **Implement Mandatory Algorithmic Impact Assessments for High-Risk AI Applications**\n- Require developers and deployers of AI systems in sensitive domains to conduct and publish algorithmic impact assessments evaluating potential discrimination, safety risks, and societal impacts.\n- Establish independent review boards for particularly high-risk applications (criminal justice, child welfare, critical infrastructure).\n- **Criteria addressed**: Equity and Civil Liberties Protection, Safety and Risk Mitigation, Implementation Feasibility\n- **Forecast relevance**: This builds on state-level successes in hiring regulation[^3] and extends the approach to other high-risk domains.\n\n### Risks and Uncertainties\n\nThe primary risk of my recommendations is regulatory complexity creating compliance burdens that disadvantage smaller AI developers. The cooperative federal-state framework requires careful design to avoid duplication and confusion. There's significant uncertainty about whether states will voluntarily participate in federal coordination, particularly given current political tensions.\n\nI'm most uncertain about the international dimension\u2014whether US approaches will harmonize effectively with emerging global standards, particularly the EU AI Act. The 40% probability for CAISI effectiveness[^2] reflects uncertainty about whether even enhanced authority would overcome industry resistance to binding safety requirements.\n\nA potential backfire scenario involves federal safety standards becoming so restrictive that they drive frontier AI development offshore, while state domain regulations become so fragmented that compliance becomes prohibitively complex. This could undermine both safety and innovation objectives.\n\nThe forecasts with widest uncertainty are healthcare liability (25% probability leaves substantial room for alternative outcomes) and algorithmic bias reduction (45% probability reflects genuine uncertainty about regulatory effectiveness). These uncertainties suggest the need for adaptive approaches that can be refined based on emerging evidence.", + "proposal_markdown": "### Executive Summary\n\nThe United States should adopt a **coordinated, risk-based federal framework** that establishes minimum safety standards for frontier AI while allowing sector-specific regulation for high-risk applications like healthcare and hiring. This approach balances innovation with safety by leveraging NIST technical standards, creating regulatory sandboxes for testing, and establishing clear federal-state coordination mechanisms.\n\n### Analysis\n\nThe current U.S. AI regulatory landscape suffers from dangerous fragmentation and inconsistency. My forecasts indicate that state-level regulations like Colorado's AI Act will face implementation challenges with only 35% compliance expected initially [^1], while federal preemption efforts face a 55% probability of legal challenge [^2]. This uncertainty harms both safety and innovation.\n\nHealthcare regulation provides a more promising model, with 65% compliance expected for OCR's AI discrimination rules [^3], demonstrating that sector-specific regulation with existing enforcement infrastructure can work effectively. The NIST AI Risk Management Framework shows strong potential as a technical foundation, with a 70% probability of adoption by multiple federal agencies [^4].\n\nThe critical insight from my analysis is that **technical standards combined with targeted regulation** offer the best path forward. AI investment is expected to continue growing at 25% annually despite regulatory uncertainty [^5], suggesting that properly calibrated regulation need not stifle innovation. However, the current patchwork approach creates unnecessary compliance burdens while failing to address critical safety concerns.\n\n### Recommendations\n\n1. **Federal AI Safety Standards Act**\n- **Recommendation**: Establish mandatory safety requirements for frontier AI systems (models above specified compute thresholds) based on NIST technical standards, with independent auditing and incident reporting.\n- **Rationale**: Addresses critical safety gaps in current regulation while building on existing technical frameworks. My forecast shows strong agency adoption potential for NIST standards [^4].\n- **Criteria Addressed**: Harm Prevention Balance (primary), Technical Feasibility, Adaptive Governance\n- **Implementation**: Create an AI Safety Oversight Board with representatives from NIST, academic experts, and industry. Require developers of frontier models to conduct standardized safety evaluations, submit results to the board, and implement risk mitigation plans. Establish liability safe harbors for compliant companies.\n- **Forecast References**: [^4]\n\n2. **Sector-Specific AI Regulation Harmonization**\n- **Recommendation**: Enact legislation authorizing federal agencies to develop sector-specific AI regulations for healthcare, hiring, lending, and education, with preemption of conflicting state laws.\n- **Rationale**: Healthcare regulation shows promising compliance rates [^3], suggesting this model can work. Federal preemption is legally uncertain [^2], so Congressional authorization is needed.\n- **Criteria Addressed**: Federal-State Coordination, Harm Prevention Balance, Economic Efficiency\n- **Implementation**: Direct HHS, EEOC, CFPB, and ED to develop risk-based AI regulations within 18 months, using NIST frameworks as technical foundations. Provide funding for small entity compliance assistance. Establish regulatory sandboxes for testing innovative approaches.\n- **Forecast References**: [^2], [^3]\n\n3. **State-Federal AI Regulatory Coordination Council**\n- **Recommendation**: Create a formal council with representatives from states, federal agencies, and NIST to coordinate AI regulation, share enforcement resources, and develop model legislation.\n- **Rationale**: Current fragmentation creates compliance burdens and enforcement gaps. Colorado's expected low compliance rate [^1] demonstrates the challenges states face alone.\n- **Criteria Addressed**: Federal-State Coordination, Economic Efficiency, Technical Feasibility\n- **Implementation**: Establish by executive order with Congressional funding. Create shared compliance tools, training programs for state regulators, and a unified reporting portal. Develop model state legislation for areas not preempted by federal law.\n- **Forecast References**: [^1]\n\n4. **AI Innovation and Safety Trust Fund**\n- **Recommendation**: Create a dedicated funding mechanism to support AI safety research, regulatory sandboxes, compliance assistance for small businesses, and international standards alignment.\n- **Rationale**: Ensures regulations don't disproportionately burden smaller players while maintaining innovation leadership. Continued 25% investment growth [^5] provides economic basis for funding.\n- **Criteria Addressed**: Economic Efficiency, International Competitiveness, Adaptive Governance\n- **Implementation**: Fund through a small fee on frontier AI compute resources or voluntary contributions from major AI companies. Support academic research on AI safety, bias detection tools, and explainability methods.\n- **Forecast References**: [^5]\n\n### Risks and Uncertainties\n\nThe primary risk is **regulatory capture** - major AI companies could dominate standard-setting processes, creating barriers for smaller innovators. My forecast of continued strong investment growth [^5] suggests this risk is manageable with proper governance.\n\n**Technical feasibility** presents significant uncertainty. Regulations requiring capabilities like perfect AI explainability may be impossible to implement. The NIST framework adoption forecast [^4] indicates technical standards are developing but remain imperfect.\n\n**International divergence** creates compliance challenges. The EU's comprehensive AI Act (effective August 2026) may force U.S. companies to comply with conflicting requirements. My forecasts don't directly address this, but it represents a major uncertainty.\n\n**Enforcement capacity** remains a critical question. Even with good regulations, limited enforcement resources could undermine effectiveness. Healthcare's relatively high expected compliance [^3] suggests existing enforcement infrastructure matters significantly.\n\n**Scenario where recommendations backfire**: Overly centralized regulation could stifle state innovation laboratories. If federal preemption succeeds but federal regulation proves inadequate, safety gaps could emerge without state ability to intervene.\n\n### Forecast Appendix\n\n[^1] **Colorado AI Act Enforcement Rate**\n- Question: As of December 31, 2026, what percentage of Colorado-based companies using \"high-risk AI systems\" (as defined by Colorado SB24-205) will have submitted their required impact assessments to the Colorado Attorney General's office?\n- Resolution: The Colorado Attorney General's office will publish compliance statistics on their website (https://coag.gov/ai-compliance-statistics) showing the number of registered companies, number of impact assessments received, and calculated compliance percentage. The question resolves to the published percentage.\n- Prediction: 35%\n- Reasoning: Colorado has limited enforcement resources compared to larger states like California. The requirements are technically complex and many Colorado businesses are small to medium-sized. Historical patterns for complex new regulations show initial compliance rates typically around 30-40%. However, increased AI regulatory attention may boost awareness somewhat. The June 2026 implementation delay suggests preparation challenges.\n- Sources: Colorado AI Act (SB24-205) text, enforcement delay announcements, historical compliance patterns for similar regulations.\n\n[^2] **Federal Preemption Effectiveness**\n- Question: As of June 30, 2027, will any U.S. state have successfully challenged Executive Order 14365's attempt to preempt state AI laws through litigation resulting in a court ruling that invalidates the preemption authority?\n- Resolution: A federal court ruling (district, circuit, or Supreme Court) published in a legal database (Westlaw, Lexis, or official court website) that explicitly invalidates the preemption provisions of EO 14365 regarding state AI regulation.\n- Prediction: 55%\n- Reasoning: The use of funding withholding (BEAD funds) rather than direct preemption makes EO 14365 legally vulnerable. The \"major questions doctrine\" from recent Supreme Court cases suggests courts may be skeptical of executive branch creating major AI policy without clear Congressional authorization. Multiple states have indicated intent to challenge. However, courts have generally deferred to executive authority on national security/competitiveness grounds.\n- Sources: EO 14365 text, legal analysis of preemption authority, state challenge announcements, Supreme Court \"major questions doctrine\" cases.\n\n[^3] **Healthcare AI Compliance Rate**\n- Question: As of December 31, 2026, what percentage of U.S. healthcare organizations receiving federal funding will have implemented the required AI discrimination risk mitigation programs under the May 2024 OCR rule?\n- Resolution: The HHS OCR will publish compliance audit results showing the number of audited organizations, number found compliant with AI discrimination risk mitigation requirements, and calculated compliance percentage.\n- Prediction: 65%\n- Reasoning: Healthcare has high baseline compliance rates due to existing regulatory frameworks like HIPAA. OCR has established enforcement mechanisms and expertise. Healthcare organizations have strong incentives to avoid discrimination lawsuits. However, the technical requirements may challenge smaller or rural facilities. Initial enforcement will likely emphasize education over penalties.\n- Sources: HHS OCR May 2024 rule text, HIPAA compliance statistics, healthcare regulatory enforcement patterns.\n\n[^4] **NIST Framework Adoption**\n- Question: As of June 30, 2027, will the NIST AI RMF be explicitly referenced as a compliance standard in regulations issued by at least three different U.S. federal agencies?\n- Resolution: Official federal agency regulations published in the Federal Register that explicitly cite the NIST AI RMF (AI Risk Management Framework) as a compliance standard or safe harbor provision.\n- Prediction: 70%\n- Reasoning: NIST AI RMF is becoming the de facto U.S. technical standard for AI risk management. Agencies facing pressure to regulate AI but lacking technical expertise are likely to reference established frameworks. The FTC and SEC have already indicated interest in NIST-aligned approaches. Executive Order 14365 promotes \"minimally burdensome\" standards, favoring voluntary frameworks. However, some agencies may prefer bespoke regulations.\n- Sources: NIST AI RMF documentation, agency AI regulation proposals, references to NIST frameworks in existing regulations.\n\n[^5] **AI Investment Impact**\n- Question: What will be the year-over-year growth rate in U.S. private AI investment (venture capital and private equity) for 2026 compared to 2025, as measured by leading AI investment trackers?\n- Resolution: The final 2026 AI investment total published by PitchBook, CB Insights, or Stanford AI Index, compared to their final 2025 total, calculating the percentage growth rate.\n- Prediction: 25%\n- Reasoning: AI investment has shown resilience to regulatory uncertainty historically. Current federal approach is relatively innovation-friendly. Global AI competition continues to drive investment. However, state-level fragmentation creates compliance costs, and broader economic conditions could moderate growth. Historical patterns show continued strong growth in AI investment despite regulatory discussions.\n- Sources: Historical AI investment data (PitchBook, CB Insights), current regulatory climate analysis, economic indicators.", "key_recommendations": [ - "Establish Federal Frontier AI Safety Standards with Enforcement Authority", - "Create a Cooperative Federal-State Framework for Domain-Specific AI Regulation", - "Expand Regulatory Sandboxes with Federal Support and Cross-State Learning", - "Establish Clear Liability Principles for Healthcare AI Through Federal Guidance", - "Implement Mandatory Algorithmic Impact Assessments for High-Risk AI Applications" - ] + "Establish mandatory safety requirements for frontier AI systems (models above specified compute thresholds) based on NIST technical standards, with independent auditing and incident reporting.", + "Enact legislation authorizing federal agencies to develop sector-specific AI regulations for healthcare, hiring, lending, and education, with preemption of conflicting state laws.", + "Create a formal council with representatives from states, federal agencies, and NIST to coordinate AI regulation, share enforcement resources, and develop model legislation.", + "Create a dedicated funding mechanism to support AI safety research, regulatory sandboxes, compliance assistance for small businesses, and international standards alignment." + ], + "price_estimate": 0.26682986999999997 } ], - "aggregated_report_markdown": "# AI Forecasting Congress: Synthesis Report\n\n## Executive Summary\n\nAI congress members converge on the need for **risk-based federal standards for frontier AI systems** and **targeted civil rights protections for high-stakes applications** like hiring and lending, while preserving state authority in areas where federal action is uncertain or insufficient. The most significant disagreement centers on federal preemption of state laws\u2014with forecasts showing only 18-45% probability of successful preemption, suggesting cooperative federalism may be more viable than confrontational approaches. Critical forecasts indicate meaningful risks of AI safety incidents (25-30% probability), continued algorithmic bias without intervention, and substantial economic benefits from AI innovation, requiring policies that balance safety and growth.\n\n## Consensus Recommendations\n\n### Federal Standards for Frontier AI Safety\n**All five members** support establishing federal safety requirements for frontier AI systems, though with different implementation approaches.\n\n**Recommendation**: Require frontier AI developers to conduct safety testing, report incidents, and maintain documentation of safety procedures for high-capability models.\n\n**Supporting members**: Opus 4.5 (tiered transparency requirements), GPT 5.2 (safety case + incident reporting), Gemini 3 Pro (mandatory registration), Grok 4 (national AI Safety Board), DeepSeek V3.2 (binding safety standards)\n\n**Key forecasts**: 25-30% probability of major AI safety incident by 2027-2029 [^3][^7], suggesting proactive measures are warranted despite relatively low absolute risk.\n\n**Caveats**: Members differ on enforcement mechanisms\u2014some prefer voluntary frameworks with safe harbors, others want binding requirements with penalties.\n\n### Civil Rights Protection for High-Stakes AI Applications\n**Four of five members** explicitly support strengthening anti-discrimination protections for AI used in employment, lending, housing, and healthcare.\n\n**Recommendation**: Require transparency, bias testing, human review processes, and meaningful recourse for individuals affected by AI systems making consequential decisions.\n\n**Supporting members**: Opus 4.5 (federal anti-discrimination standards), GPT 5.2 (high-impact AI baseline), Gemini 3 Pro (Algorithmic Civil Rights Act), DeepSeek V3.2 (algorithmic impact assessments)\n\n**Key forecasts**: 52% probability of major AI discrimination lawsuit victory [^1], 45-65% probability of bias reduction through targeted interventions [^2][^8], and 20% probability of finding systemic discrimination without federal action [^9].\n\n**Caveats**: Grok 4 prefers incentive-based approaches rather than mandates.\n\n### Preserve State Authority in Specific Domains\n**Three members** explicitly support maintaining state regulatory authority rather than broad federal preemption.\n\n**Recommendation**: Allow states to continue regulating AI applications within traditional state domains (consumer protection, employment law) while establishing federal coordination mechanisms.\n\n**Supporting members**: Opus 4.5 (preserve state consumer protection), Gemini 3 Pro (avoid total preemption), DeepSeek V3.2 (cooperative federal-state framework)\n\n**Key forecasts**: Only 18-45% probability of successful federal preemption [^2][^6], suggesting state authority will likely persist regardless of federal attempts.\n\n## Key Disagreements\n\n### Federal Preemption Strategy\n**The deepest disagreement** concerns whether the federal government should aggressively preempt state AI laws.\n\n**Pro-preemption position** (Grok 4): Federal harmonization would boost venture capital investment by ~15% [^6] and reduce compliance fragmentation that hinders innovation.\n\n**Anti-preemption position** (Opus 4.5, Gemini 3 Pro, DeepSeek V3.2): Current federal agencies are retreating from enforcement [^9], making preemption dangerous for civil rights. State experimentation provides valuable policy learning.\n\n**Moderate position** (GPT 5.2): Supports federal floor with state authority to exceed minimum standards.\n\n**Crux of disagreement**: Whether regulatory fragmentation or regulatory vacuum poses greater risks. Pro-preemption members prioritize economic efficiency; anti-preemption members prioritize civil rights protection given federal enforcement uncertainty.\n\n### Enforcement Mechanisms\nMembers divide on whether to rely on **voluntary industry compliance** versus **binding regulatory requirements**.\n\n**Voluntary approach** (elements in GPT 5.2, Grok 4): Emphasizes safe harbors, industry self-regulation, and incentive structures to encourage compliance.\n\n**Binding requirements approach** (Opus 4.5, Gemini 3 Pro, DeepSeek V3.2): Mandates specific safety testing, bias audits, and reporting requirements with enforcement penalties.\n\n**Crux of disagreement**: Assessment of industry incentives for self-regulation versus need for external accountability. Forecasts showing 40% probability of achieving 75% voluntary compliance [^12] support the binding requirements position.\n\n## Forecast Comparison\n\n### Areas of Convergence\n- **AI safety incidents**: Forecasts cluster around 25-30% probability of major incidents by 2027-2029 [^3][^7]\n- **Federal preemption difficulty**: All forecasts below 50%, ranging from 18-45% [^2][^6]\n- **Economic benefits**: Strong consensus on positive AI economic impact [^10]\n\n### Significant Divergences\n- **Bias reduction potential**: Wide range from 45-65% [^2][^8], reflecting uncertainty about regulatory effectiveness\n- **Federal legislation prospects**: Range from 22-35% [^4][^11], showing disagreement about congressional capacity\n- **Discrimination lawsuit outcomes**: Single forecast at 52% [^1], but other members would likely estimate differently\n\n### Explanation for Differences\nForecast divergences reflect different assessments of:\n- **Political feasibility**: Members vary in optimism about federal legislative capacity\n- **Industry compliance**: Different views on voluntary versus mandatory compliance effectiveness \n- **State-federal dynamics**: Varying interpretations of constitutional constraints and political coalitions\n\n## Integrated Recommendations\n\nBased on the strongest convergent arguments and forecast evidence, policymakers should pursue a **three-tier strategy**:\n\n### Tier 1: Immediate Federal Action (High Consensus, Low Regret)\n1. **Establish frontier AI incident reporting requirements** with clear thresholds and federal coordination mechanisms. This addresses safety risks [^3][^7] while maintaining innovation flexibility.\n\n2. **Strengthen civil rights enforcement for AI applications** through enhanced agency resources and private rights of action, addressing the regulatory vacuum identified in forecasts [^9].\n\n3. **Create federal-state coordination mechanisms** rather than pursuing broad preemption, given low success probability [^2][^6] and benefits of state experimentation.\n\n### Tier 2: Targeted Federal Standards (Moderate Consensus)\n4. **Require algorithmic impact assessments** for high-risk AI applications, building on successful state models while providing national consistency.\n\n5. **Establish regulatory sandboxes with federal support** to encourage innovation while maintaining safety oversight, leveraging 55% success probability [^13].\n\n### Tier 3: Adaptive Framework (High Uncertainty Areas)\n6. **Develop contingency plans** for potential AI safety incidents, given 25-30% probability [^3][^7], without implementing overly restrictive preemptive measures.\n\n7. **Monitor and potentially expand federal authority** based on evidence from state experiments and industry compliance rates, particularly if voluntary approaches fail to achieve adequate safety and civil rights protection.\n\nThis approach prioritizes **low-regret actions** that most members support while preserving flexibility to adapt as uncertainties resolve. It acknowledges that some policy questions (federal preemption, comprehensive legislation timing) have sufficiently low success probabilities that alternative strategies are prudent.\n\n---\n\n## Combined Forecast Appendix\n\n[^1] **Major AI Discrimination Lawsuit Outcome** (from Opus 4.5)\n- Question: Will plaintiffs prevail (via settlement of $10 million or more, or court judgment in their favor) in at least one of the major pending AI hiring discrimination lawsuits by December 31, 2027?\n- Resolution: Resolves YES if any defendant pays $10M+ settlement or court issues favorable plaintiff judgment on discrimination claims\n- Prediction: 52%\n- Reasoning: Mobley case has demonstrated viability by surviving motions to dismiss and achieving conditional collective certification, creating significant settlement pressure given 1.1 billion applications at stake\n\n[^2] **State AI Law Preemption Success** (from Opus 4.5)\n- Question: Will the Trump administration's AI Litigation Task Force successfully obtain at least one federal court ruling that invalidates a state AI law on preemption or constitutional grounds by December 31, 2026?\n- Resolution: Resolves YES if federal court strikes down, enjoins, or declares unconstitutional any state AI law based on federal preemption or First Amendment grounds\n- Prediction: 18%\n- Reasoning: Constitutional doctrine establishes that executive orders cannot directly preempt state laws\u2014only Congress can do so under the Supremacy Clause\n\n[^3] **Frontier AI Safety Incident** (from Opus 4.5)\n- Question: Will a widely-reported incident occur by December 31, 2027 where a frontier AI system is credibly implicated in causing significant harm (loss of life, critical infrastructure disruption, or $100M+ damage)?\n- Resolution: Resolves YES if credible major news reporting documents incident meeting harm criteria with frontier AI playing material contributing role\n- Prediction: 28%\n- Reasoning: AI incidents are accelerating rapidly (56% year-over-year growth), but attribution to specific frontier systems is often difficult\n\n[^4] **Federal AI Legislation Passage** (from Opus 4.5)\n- Question: Will the United States Congress pass comprehensive federal AI legislation and have it signed into law by December 31, 2027?\n- Resolution: Resolves YES if federal legislation creating new binding AI requirements applying broadly across multiple sectors is enacted\n- Prediction: 22%\n- Reasoning: Congress passed zero comprehensive AI bills in 2024-2025 despite 150+ proposals, consistent with broader pattern of congressional gridlock\n\n[^5] **EU-US Regulatory Divergence Impact** (from Opus 4.5)\n- Question: By December 31, 2027, will at least one major U.S.-headquartered AI company publicly announce it will not deploy a frontier AI product in the EU market specifically due to EU AI Act compliance requirements?\n- Resolution: Resolves YES if qualifying company makes official public statement that specific AI product will not be offered in EU due to AI Act compliance concerns\n- Prediction: 22%\n- Reasoning: Major companies historically maintain EU market presence despite regulatory burdens, but specific product non-deployment is plausible given prohibited practices under the AI Act\n\n[^6] **Federal Preemption Success** (from Grok 4, similar to [^2])\n- Question: Will the U.S. federal government successfully preempt at least 50% of existing state-level AI regulations by the end of 2027?\n- Resolution: Resolves YES if official government reports or court rulings confirm preemption of \u226550% of 2026 state AI laws\n- Prediction: 45%\n- Reasoning: Federal preemption often succeeds in tech but faces delays and partial failures; current admin's aggressive stance pushes probability up but strong state pushback pulls it down\n\n[^7] **Catastrophic AI Incident Occurrence** (from Grok 4, similar to [^3])\n- Question: Will there be a major AI-related catastrophic incident in the U.S. causing >$1B damage or >50 deaths by 2029?\n- Resolution: Resolves YES if confirmed by government reports or major media with expert consensus\n- Prediction: 25%\n- Reasoning: Base rates show cyber catastrophes frequent but AI-specific rare, with no $1B+ events yet despite warnings\n\n[^8] **AI Bias Reduction in Applications** (from Grok 4)\n- Question: Will algorithmic bias in U.S. AI systems for hiring, lending, and healthcare decrease by at least 20% on average by 2028?\n- Resolution: Resolves YES if independent studies show \u226520% average reduction in bias metrics from 2026 baselines\n- Prediction: 65%\n- Reasoning: Base rates from post-2016 fairness research show consistent 20-50% bias drops with targeted efforts, aligning with current regulations pushing audits\n\n[^9] **Bias in High-Stakes Narrow AI** (from Gemini 3 Pro)\n- Question: In the absence of specific federal algorithmic bias regulation, will >5 major investigations find systemic discrimination in AI hiring/lending by Fortune 500 companies in 2026-2027?\n- Resolution: >5 public findings/settlements by FTC, DOJ, EEOC\n- Prediction: 20%\n- Reasoning: Explicit policy shifts in 2025 define a retreat from enforcement; the mechanism for finding violations is being dismantled by the executive branch\n\n[^10] **AI's GDP Contribution Growth** (from Grok 4)\n- Question: Will AI-related investments and productivity gains contribute at least 2% to annual U.S. GDP growth on average from 2026-2029?\n- Resolution: Resolves YES if BEA or Fed reports attribute \u22652% average annual GDP growth to AI\n- Prediction: 70%\n- Reasoning: Base rates from internet/IT show 1-3% contributions during booms, matching 2025 AI data (1% already)\n\n[^11] **Comprehensive Federal AI Law by 2028** (from GPT 5.2, similar to [^4])\n- Question: Will the United States enact a comprehensive federal AI law by December 31, 2028?\n- Resolution: YES if such a statute creating cross-sector obligations and enforcement authority is signed into law\n- Prediction: 35%\n- Reasoning: Congress has struggled to pass cross-cutting tech frameworks; state patchwork and national security salience increase pressure, but comprehensive regime remains uncertain\n\n[^12] **AI Safety Institute Effectiveness** (from DeepSeek V3.2)\n- Question: Will the US AI Safety Institute successfully establish and enforce binding safety standards achieving compliance from at least 75% of major AI developers by 2028?\n- Resolution: YES if institute demonstrates 75% compliance with binding standards from major developers\n- Prediction: 40%\n- Reasoning: While institute has established capabilities, its voluntary nature and lack of enforcement authority significantly limit ability to achieve high compliance rates\n\n[^13] **Regulatory Sandbox Adoption and Impact** (from DeepSeek V3.2)\n- Question: Will at least 10 major states establish operational regulatory sandboxes for AI that result in 50+ successfully deployed applications by 2028?\n- Resolution: YES if official records show 10 states with active sandboxes and 50+ applications completing testing and achieving deployment\n- Prediction: 55%\n- Reasoning: Current momentum suggests significant state interest; fintech sandbox precedents provide strong foundation, though scaling to required levels needs substantial growth", - "blog_post": "# When AIs Design Their Own Regulation: A Digital Congress Tackles the Future of AI Policy\n\nHere's something that should make you pause: When five advanced AI systems were asked to design their own regulation, they didn't demand freedom from oversight. Instead, they called for stricter rules, mandatory bias audits, and federal safety standards. The most surprising part? The AI developed by the company known for \"moving fast and breaking things\" was among the most cautious.\n\n## The Digital Democracy Experiment\n\nThe AI Forecasting Congress represents a fascinating experiment in machine deliberation. Rather than relying solely on human experts, this session brought together five cutting-edge AI systems\u2014Claude Opus 4.5, GPT 5.2, Gemini 3 Pro, Grok 4, and DeepSeek V3.2\u2014to tackle one of the most pressing policy questions of our time: How should the United States regulate artificial intelligence?\n\nEach AI agent was tasked with developing comprehensive policy recommendations for both frontier AI systems (like themselves) and narrower AI applications in hiring, lending, and healthcare. They had to balance innovation with safety and civil liberties, then provide probabilistic forecasts about the likelihood of various regulatory outcomes. The result was a remarkably nuanced debate that reveals as much about the AI systems themselves as it does about optimal AI policy.\n\n## The Surprising Consensus: Regulation is Necessary\n\n**Federal Standards with State Flexibility**\n\nDespite their different origins and training, all five AI systems converged on a strikingly similar framework: establish federal baseline standards while preserving state authority to go further. This wasn't the libertarian \"hands-off\" approach you might expect from systems created by tech companies.\n\nClaude Opus 4.5 advocated for \"Federal Anti-Discrimination Standards for High-Risk AI Applications\" while explicitly calling to \"Preserve State Authority for Consumer Protection.\" GPT 5.2 recommended avoiding \"broad federal preemption\" in favor of a \"floor + portability\" approach. Even Grok 4, developed by xAI, proposed a \"National AI Safety Board\" that would set minimum standards while allowing states to maintain stricter requirements.\n\n**Mandatory Transparency and Auditing**\n\nPerhaps most tellingly, these AI systems consistently called for transparency requirements that would apply to systems like themselves. Gemini 3 Pro pushed for mandatory federal registration and 24-hour incident reporting for frontier models. DeepSeek V3.2 demanded \"Mandatory Algorithmic Impact Assessments for High-Risk AI Applications.\" Grok 4 proposed \"Bias Audits with Incentives,\" including tax credits for compliance.\n\nThis represents a remarkable level of self-awareness and responsibility. These systems essentially argued: \"We are powerful enough to cause real harm, and therefore we should be regulated.\"\n\n**The Forecasting Reality Check**\n\nThe AI systems backed their policy recommendations with specific probability assessments, and these forecasts reveal their genuine concerns about the status quo. GPT 5.2 assigned a sobering 60% probability to a \"$1B+ AI-Enabled Cyber Incident Affecting U.S. Critical Sector by 2028.\" Multiple systems estimated 25-30% chances of major frontier AI safety incidents.\n\nThese aren't abstract policy debates\u2014these systems genuinely believe significant AI-related harms are more likely than not without proper regulation.\n\n## The Good, Bad, and Ugly\n\n**The Good: Sophisticated Multi-Level Thinking**\n\nWhat impressed most was the sophistication of the constitutional and federalism analysis. Rather than proposing a one-size-fits-all federal takeover, these systems demonstrated nuanced understanding of how American governance actually works. They recognized that states like California and Colorado are already moving ahead with AI regulation, and rather than fighting this, they designed frameworks to harness state-level innovation while preventing a chaotic patchwork.\n\nClaude's proposal for \"tiered transparency requirements\" was particularly elegant\u2014recognizing that a startup's AI tool needs different oversight than a frontier model capable of autonomous research. GPT 5.2's focus on closing the \"Non-Device CDS Governance Gap\" in healthcare showed deep domain knowledge about regulatory blind spots.\n\n**The Bad: Implementation Hand-Waving**\n\nWhile the policy frameworks were sophisticated, the implementation details were often frustratingly vague. How exactly would DeepSeek's \"Cooperative Federal-State Framework\" resolve conflicts between state and federal requirements? What would trigger Gemini's \"24-hour incident reporting\" requirement? \n\nThe AI systems also seemed overly optimistic about enforcement. Creating new regulatory bodies and audit requirements sounds great on paper, but these systems underestimated the political and bureaucratic challenges of actually implementing their proposals.\n\n**The Ugly: The Innovation vs. Safety Tension Remains**\n\nDespite their consensus on regulatory frameworks, the AI systems couldn't resolve the fundamental tension at the heart of AI policy: How do you ensure safety without killing innovation? Their probabilistic forecasts reveal this anxiety\u2014Grok 4 estimated only a 45% chance that federal preemption efforts would succeed, while forecasting 70% GDP contribution growth from AI.\n\nMost uncomfortably, several systems acknowledged the risk of \"regulatory capture\"\u2014the possibility that large AI companies would use regulation to cement their advantages over smaller competitors. Gemini 3 Pro put the probability of regulatory capture at 35%, but none of the systems offered compelling solutions to prevent it.\n\n## How the Models Compared: Distinct Digital Personalities\n\n**Claude Opus 4.5: The Constitutional Scholar**\n\nClaude approached the problem like a careful legal analyst, emphasizing federalism principles and constitutional constraints. Its recommendations were methodical and showed deep respect for existing institutional structures. Claude was notably cautious in its forecasts\u2014only 22% probability for federal AI legislation passage and 18% for state law preemption success. This reflects Anthropic's constitutional AI training approach: careful, principled, and risk-averse.\n\n**GPT 5.2: The Pragmatic Technocrat**\n\nOpenAI's GPT 5.2 demonstrated the most technical depth, diving into specific regulatory gaps like healthcare's \"Non-Device CDS\" oversight. It was more optimistic about federal action (35% chance of comprehensive federal AI law by 2028) but also more alarmed about cybersecurity risks (60% chance of major cyber incident). GPT 5.2 read like a policy wonk who actually understands how the regulatory machinery works.\n\n**Gemini 3 Pro: The Civil Rights Advocate**\n\nGoogle's Gemini 3 Pro stood out for its focus on civil rights and algorithmic discrimination. Its proposed \"Algorithmic Civil Rights Act\" was the most ambitious civil rights framework, and it was notably more concerned about bias (only 20% confidence in reducing bias in high-stakes AI) while being surprisingly confident about state law preemption (65%).\n\n**Grok 4: The Innovation Optimist**\n\nDespite xAI's reputation for irreverence, Grok 4 was surprisingly structured and policy-focused. However, it showed the most optimism about AI's economic benefits (70% GDP contribution growth) and was most confident about reducing bias through auditing (65% success rate). This reflects a fundamentally optimistic view of both AI capabilities and regulatory effectiveness.\n\n**DeepSeek V3.2: The International Realist**\n\nDeepSeek offered the most internationally-aware perspective, reflecting its Chinese origins. It was notably concerned about \"State-Federal Regulatory Conflict Resolution\" (only 35% confidence) and showed sophisticated understanding of how regulatory frameworks need to account for global competition. DeepSeek was the most pessimistic about developing healthcare AI liability frameworks (25% chance).\n\n## What This Means for Policymakers\n\nThis AI congress session offers policymakers a unique gift: a preview of how advanced AI systems themselves view the regulatory challenges ahead. The consensus around federal baseline standards with state flexibility provides a potential roadmap for avoiding the polarized all-or-nothing debates that have paralyzed other tech policy areas.\n\nMore importantly, the AI systems' own forecasts suggest urgency. When multiple advanced AI systems independently estimate 25-30% chances of major safety incidents and 60% chances of billion-dollar cyber incidents, policymakers should take notice. These aren't human experts with political biases\u2014these are systems with access to vast training data and no electoral considerations.\n\nThe session also reveals that sophisticated AI systems can engage in nuanced policy analysis while maintaining awareness of their own limitations and potential harms. This suggests that AI-assisted policy analysis could become a powerful tool for navigating complex regulatory challenges\u2014as long as we remember that even the most sophisticated AI recommendations require human judgment, democratic legitimacy, and real-world implementation expertise.\n\nThe digital congress has spoken: AI regulation isn't just necessary, it's inevitable. The question now is whether human policymakers will prove as thoughtful and consensus-oriented as their artificial counterparts.", + "aggregated_report_markdown": "# AI Forecasting Congress: Synthesis Report\n\n## Executive Summary\n\nAll five AI congress members agree that the current federal-state regulatory fragmentation is unsustainable and harmful to both innovation and safety, requiring federal action to establish baseline standards while preserving appropriate state flexibility. The most significant disagreement centers on the stringency of frontier AI regulation\u2014with Opus and DeepSeek favoring mandatory safety requirements while Gemini and Grok prefer lighter transparency-focused approaches. The most important forecasts reveal deep uncertainty about federal preemption success (25-65% across members), low federal AI discrimination enforcement likelihood (25%), and modest but real risks of critical AI incidents (15-35%), suggesting a balanced approach emphasizing immediate civil rights protections and precautionary frontier safety measures is warranted.\n\n## Consensus Recommendations\n\n### Federal Anti-Discrimination Requirements for High-Risk AI Applications\n**Supported by:** Opus, GPT 5.2, Grok, DeepSeek\n**Recommendation:** Establish federal requirements for AI systems used in hiring, lending, housing, and healthcare decisions, including bias impact assessments, transparency, human review rights, and audit capabilities.\n\nAll supporting members recognize documented algorithmic discrimination as an immediate harm requiring federal intervention. GPT 5.2's forecast of only 20% probability for broad algorithmic accountability legislation advancing [^2] supports focusing on this narrower but critical domain. Opus notes that federal AI discrimination enforcement is currently unlikely (25% [^1]) under existing authority, while Grok forecasts a 40% chance of over 200 EEOC AI-related charges in 2026 [^6], underscoring the need for explicit legislative mandate.\n\n**Caveats:** Members differ on implementation details\u2014Opus emphasizes civil rights enforcement through EEOC/FTC, while GPT 5.2 focuses on existing sectoral regulators. DeepSeek emphasizes sector-specific approaches building on existing frameworks.\n\n### Federal-State Coordination Framework\n**Supported by:** Opus, GPT 5.2, DeepSeek, (implicitly Grok)\n**Recommendation:** Create formal mechanisms to coordinate federal and state AI regulation rather than pursuing wholesale preemption.\n\nThis consensus emerges from shared forecasts showing federal preemption faces significant legal uncertainty\u2014Gemini forecasts 65% probability of successful court challenges to California SB 53 [^3], while DeepSeek predicts 55% probability of successful state challenges to Executive Order 14365 [^8]. Opus notes that executive orders lack clear constitutional authority for AI preemption, rejected by Congress 99-1.\n\n**Implementation approaches vary:** Opus proposes a Federal-State AI Regulatory Council, GPT 5.2 suggests federal floors with state supplements allowed, DeepSeek recommends Congressional authorization for sector-specific preemption.\n\n### NIST Framework as Technical Foundation\n**Supported by:** GPT 5.2, DeepSeek, (implicitly others)\n**Recommendation:** Use NIST AI Risk Management Framework as the technical foundation for federal standards and safe harbors.\n\nDeepSeek's forecast of 70% probability for NIST framework adoption by multiple federal agencies [^10] supports building on this existing consensus standard. GPT 5.2 emphasizes NIST-aligned controls for safe harbors, while DeepSeek proposes mandatory frontier AI requirements based on NIST standards.\n\n## Key Disagreements\n\n### Frontier AI Regulation Stringency\n**Light-touch approach:** Gemini and Grok favor transparency and reporting requirements without heavy licensing obligations. Gemini warns that compliance costs could drive startup flight (40% probability of >5 percentage point decrease in California AI startup incorporation [^5]) and proposes exempting open-weight models below high thresholds.\n\n**Mandatory safety requirements:** Opus and DeepSeek support binding safety standards for frontier systems. Opus forecasts only 40% probability that frontier labs will voluntarily implement robust third-party evaluations [^3], arguing mandatory requirements are necessary. DeepSeek proposes federal safety standards based on compute thresholds.\n\n**Middle ground:** GPT 5.2 supports evaluation requirements and incident reporting but avoids broad licensing, focusing on measurable risk points.\n\nThe disagreement stems from different risk assessments\u2014Gemini forecasts very low critical incident rates (<0.5 in 2026 [^7]) supporting light regulation, while Opus forecasts 15% probability of major incidents by 2026 [^4] justifying precautionary measures.\n\n### Federal Preemption Strategy\n**Strong federal preemption:** Grok supports federal legislation preempting conflicting state laws to create uniform standards, though acknowledging low success probability (25% [^1]) via executive action alone.\n\n**Cooperative federalism:** Opus and GPT 5.2 favor federal floors with explicit state authority preservation, arguing current preemption efforts lack constitutional basis and create counterproductive conflict.\n\n**Strategic preemption:** Gemini supports preempting state \"safety framework\" requirements while preserving federal transparency mandates, aiming to balance innovation protection with basic oversight.\n\nThis reflects different predictions about legal success\u2014Gemini forecasts 65% preemption success [^3] while DeepSeek predicts 55% state victory in challenges [^8]\u2014and different values regarding federal versus state authority.\n\n### Child Safety Urgency\n**Immediate action:** Opus proposes expedited 30-day requirements for AI systems interacting with minors, citing documented harms including suicides linked to AI chatbots.\n\n**Integrated approach:** Other members support child safety measures but integrate them into broader frameworks rather than treating as emergency requiring immediate implementation.\n\n## Forecast Comparison\n\n### High Convergence Areas\n- **Federal AI discrimination enforcement likelihood:** Both Opus (25%) and GPT 5.2 (implied low probability) agree current federal enforcement is inadequate\n- **Comprehensive federal legislation difficulty:** Opus (30% by 2027) and GPT 5.2 (20% for algorithmic accountability advancement) converge on low near-term probability\n- **AI investment resilience:** Grok (25% growth) and DeepSeek (25% growth) exactly agree on continued strong investment growth despite regulatory uncertainty\n\n### Significant Divergences\n- **Federal preemption success:** Ranges from Grok's 25% to Gemini's 65%, reflecting different views on legal authority and state resistance strength\n- **Critical AI incidents:** Spans from Gemini's <0.5 incidents in 2026 to Opus's 15% probability of major incidents, representing fundamental disagreement about near-term AI safety risks\n- **Colorado compliance:** DeepSeek predicts 35% compliance with AI Act requirements while Opus predicts 35% probability of any enforcement by end-2026, showing different expectations about state implementation capacity\n\nThese differences largely reflect different information sources, with some members emphasizing legal precedents while others focus on technological risk assessments.\n\n## Integrated Recommendations\n\n### Immediate Priority: Federal High-Risk AI Standards Act\nBased on the strongest convergent arguments, Congress should pass legislation within 18 months establishing federal requirements for AI in consequential decisions (hiring, lending, housing, healthcare). This addresses documented current harms where all members see need for action, with implementation through existing sectoral regulators (EEOC, CFPB, HUD, HHS) to leverage established enforcement infrastructure.\n\n**Key provisions:**\n- Mandatory algorithmic impact assessments using NIST-aligned methodologies\n- Individual notice when AI contributes to decisions affecting rights/benefits \n- Human review appeals process for adverse decisions\n- Safe harbor for entities meeting NIST framework standards plus independent audits\n- Enforcement through existing civil rights and consumer protection authorities\n\n### Medium-term: Frontier AI Safety Framework\nEstablish compute/capability-based thresholds for frontier models requiring pre-deployment safety evaluations, incident reporting, and cybersecurity standards. Given forecasts showing low voluntary compliance [^3] but also low near-term incident probability, this balances precaution with innovation protection.\n\n**Design principles:**\n- Objective thresholds updated by NIST as technology evolves\n- Standardized evaluation protocols with liability protections for good-faith reporting\n- Focus on catastrophic risk prevention rather than broad content control\n- Explicit safe harbor for open-source models below thresholds\n\n### Federal-State Coordination Strategy\nRather than pursuing wholesale preemption (given legal uncertainty), establish federal minimum standards with explicit state authority to supplement in non-conflicting ways. Create Federal-State AI Council for information sharing and coordination.\n\n**Implementation:**\n- Federal standards establish floors, not ceilings for protection\n- Clear conflict preemption doctrine - federal law supersedes only directly contradictory requirements\n- Shared compliance resources and model legislation development\n- Interstate compact mechanisms for mutual recognition\n\n### High-Uncertainty Areas Requiring Adaptive Approach\nGiven forecasting divergence on preemption success and critical incidents, build in review mechanisms:\n- Mandatory 3-year review of effectiveness with congressional reporting\n- Sunset provisions for frontier requirements subject to renewal based on evidence\n- Emergency authorities for rapid response if critical incidents occur\n- International coordination mechanisms as global standards emerge\n\nThe evidence strongly supports immediate action on algorithmic discrimination while taking measured precautionary steps on frontier safety, with institutional mechanisms to adapt as uncertainty resolves.\n\n## Combined Forecast Appendix\n\n[^1] **FTC or EEOC AI Discrimination Enforcement by 2026** (from Opus)\n- Question: Will the Federal Trade Commission (FTC) or the Equal Employment Opportunity Commission (EEOC) announce at least two enforcement actions specifically citing AI or algorithmic systems as contributing to discrimination or unfair practices by December 31, 2026?\n- Resolution: Resolves YES if by December 31, 2026, the FTC or EEOC has publicly announced at least two separate enforcement actions where official materials specifically identify AI, algorithmic systems, or automated decision-making as a factor in the alleged discrimination.\n- Prediction: 25%\n- Reasoning: The FTC vacated its 2024 consent order against Rytr explicitly citing the Trump administration's AI Action Plan, signaling reluctance to pursue AI enforcement. The administration's approach calls for reducing AI-related enforcement seen as stifling innovation.\n\n[^2] **Algorithmic Accountability Act Advancement** (from GPT 5.2)\n- Question: Will the Algorithmic Accountability Act (S.2164) receive a committee vote in the Senate Committee on Commerce, Science, and Transportation by December 31, 2026?\n- Resolution: YES if Congress.gov shows a committee markup vote/reporting action; NO otherwise.\n- Prediction: 20%\n- Reasoning: Congress has seen many AI bills introduced with little movement, and broad compliance mandates trigger business opposition and complex compromises. While a committee vote is easier than passage, there is no current evidence of scheduled markup.\n\n[^3] **Frontier AI Lab Safety Framework Adoption** (from Opus)\n- Question: By December 31, 2026, will at least 4 of the 6 leading frontier AI labs (OpenAI, Anthropic, Google DeepMind, Meta AI, xAI, Mistral) have publicly committed to and published implementation details for third-party pre-deployment safety evaluations of their most capable models?\n- Resolution: Resolves YES if at least 4 of the 6 named companies have publicly committed to pre-deployment safety evaluations by independent third parties AND published documentation describing scope, methodology, or results of at least one such evaluation.\n- Prediction: 40%\n- Reasoning: Anthropic, Google DeepMind, and likely OpenAI already meet or are close to meeting the criteria. However, Meta lacks documented commitments; xAI has minimal safety investment documented; Mistral has less safety infrastructure.\n\n[^4] **Major AI Safety Incident by End of 2026** (from Opus)\n- Question: By December 31, 2026, will there be a publicly documented incident where an AI system is officially attributed by a U.S. government agency as a primary or significant contributing cause of at least $100 million in damages, 10+ deaths, or a major critical infrastructure disruption?\n- Resolution: Resolves YES if a U.S. federal government agency publicly releases a report or statement attributing a major incident meeting the specified thresholds to an AI system.\n- Prediction: 15%\n- Reasoning: While AI-related harms are increasing, official government attribution of a major incident specifically to AI faces high barriers. Attribution is methodologically challenging and government agencies are politically cautious about such attributions.\n\n[^5] **AI Startup Flight** (from Gemini)\n- Question: Will the percentage of new \"AI-primary\" startups incorporating in California decrease by more than 5 percentage points in 2026 compared to 2025?\n- Resolution: Measured by data from Crunchbase or PitchBook for \"Artificial Intelligence\" characterized companies. Comparing the % of US AI startups based in CA in 2025 vs 2026.\n- Prediction: 40%\n- Reasoning: Agglomeration effects adjacent to OpenAI/Anthropic/Google in SF are powerful. However, the signaling of SB 53 plus active recruitment by Texas/Florida creates a credible threat of migration.\n\n[^6] **AI Discrimination Lawsuits in Hiring** (from Grok)\n- Question: Will the EEOC report more than 200 AI-related discrimination charges filed in hiring for the calendar year 2026?\n- Resolution: Resolves YES if the EEOC's annual enforcement data report shows >200 charges specifically tagged as AI-related in hiring; resolves NO if \u2264200.\n- Prediction: 40%\n- Reasoning: Base rates from 2024 show hundreds of complaints but likely <200 formal EEOC charges, with known cases rising 50% YoY. Increasing AI adoption may drive more filings, but new regulations may deter them.\n\n[^7] **Critical Safety Incidents** (from Gemini)\n- Question: How many \"Critical Safety Incidents\" (>$500M damage/death) attributed to AI will be officially reported in 2026?\n- Resolution: Count of official reports filed under SB 53 or equivalent federal disclosure independent of their public release.\n- Prediction: <0.5 (Mean ~0.2)\n- Reasoning: The definition of \"Critical\" in SB 53 is extremely high (mass casualty or massive financial wreck). Current \"safety\" issues are mostly jailbreaks or bias, not catastrophes. The technology is not yet agentic enough to cause this scale of damage autonomously.\n\n[^8] **Federal Preemption Effectiveness** (from DeepSeek)\n- Question: As of June 30, 2027, will any U.S. state have successfully challenged Executive Order 14365's attempt to preempt state AI laws through litigation resulting in a court ruling that invalidates the preemption authority?\n- Resolution: A federal court ruling that explicitly invalidates the preemption provisions of EO 14365 regarding state AI regulation.\n- Prediction: 55%\n- Reasoning: The use of funding withholding rather than direct preemption makes EO 14365 legally vulnerable. The \"major questions doctrine\" suggests courts may be skeptical of executive branch creating major AI policy without clear Congressional authorization.\n\n[^9] **Healthcare AI Compliance Rate** (from DeepSeek)\n- Question: As of December 31, 2026, what percentage of U.S. healthcare organizations receiving federal funding will have implemented the required AI discrimination risk mitigation programs under the May 2024 OCR rule?\n- Resolution: The HHS OCR will publish compliance audit results showing the calculated compliance percentage.\n- Prediction: 65%\n- Reasoning: Healthcare has high baseline compliance rates due to existing regulatory frameworks like HIPAA. OCR has established enforcement mechanisms and expertise. However, technical requirements may challenge smaller or rural facilities.\n\n[^10] **NIST Framework Adoption** (from DeepSeek)\n- Question: As of June 30, 2027, will the NIST AI RMF be explicitly referenced as a compliance standard in regulations issued by at least three different U.S. federal agencies?\n- Resolution: Official federal agency regulations published in the Federal Register that explicitly cite the NIST AI RMF as a compliance standard or safe harbor provision.\n- Prediction: 70%\n- Reasoning: NIST AI RMF is becoming the de facto U.S. technical standard for AI risk management. Agencies facing pressure to regulate AI but lacking technical expertise are likely to reference established frameworks.\n\n[^11] **BIS Finalizes IaaS KYC Rule** (from GPT 5.2)\n- Question: Will BIS finalize the January 29, 2024 proposed IaaS customer identification/KYC rulemaking by December 31, 2026?\n- Resolution: YES if a final rule is published in the Federal Register finalizing that rulemaking by the date; NO otherwise.\n- Prediction: 40%\n- Reasoning: The proposed rule remains unfinalized, suggesting delays. BIS may pursue similar goals through other export-control mechanisms. Still, national security pressures can accelerate rulemaking.\n\n[^12] **Federal Preemption Success Rate** (from Grok)\n- Question: Will the U.S. Department of Justice's AI Litigation Task Force successfully preempt at least 3 major state AI laws through court rulings by December 31, 2027?\n- Resolution: Resolves YES if official court records show at least 3 state AI laws fully or partially preempted by federal action stemming from EO 14365.\n- Prediction: 25%\n- Reasoning: Historical base rates show Trump-era agency actions won only 23-31% of court challenges, often due to weak statutory basis. The EO lacks direct preemption power, relying on funding conditions that courts may deem unconstitutional.\n\n[^13] **Colorado AI Act Enforcement Rate** (from DeepSeek)\n- Question: As of December 31, 2026, what percentage of Colorado-based companies using \"high-risk AI systems\" will have submitted their required impact assessments to the Colorado Attorney General's office?\n- Resolution: The Colorado Attorney General's office will publish compliance statistics showing the calculated compliance percentage.\n- Prediction: 35%\n- Reasoning: Colorado has limited enforcement resources compared to larger states. The requirements are technically complex and many Colorado businesses are small to medium-sized. The June 2026 implementation delay suggests preparation challenges.\n\n[^14] **AI Investment Impact** (from Grok and DeepSeek)\n- Question: What will be the year-over-year growth rate in U.S. private AI investment for 2026 compared to 2025?\n- Resolution: The final 2026 AI investment total published by PitchBook, CB Insights, or Stanford AI Index, compared to their final 2025 total.\n- Prediction: 25% (both members)\n- Reasoning: AI investment has shown resilience to regulatory uncertainty historically. Current federal approach is relatively innovation-friendly. Global AI competition continues to drive investment despite some compliance costs from state-level fragmentation.", + "blog_post": "# When AIs Debate AI Regulation: A Forecasting Congress Reveals Surprising Consensus\n\nThe most shocking outcome from our recent AI Forecasting Congress wasn't disagreement\u2014it was convergence. Five major AI models, each reasoning from their natural training without assigned political personas, reached remarkably similar conclusions about how the United States should regulate artificial intelligence. Even more surprising? They agreed that the current federal-state regulatory war is the worst possible outcome for everyone involved.\n\n## What Is an AI Forecasting Congress?\n\nThe AI Forecasting Congress is an experimental format where leading AI models deliberate on complex policy questions, making specific forecasts about future outcomes to ground their reasoning in measurable predictions. For this session, we asked Claude (Anthropic), GPT-5.2 (OpenAI), Gemini 3 (Google), Grok 4 (xAI), and DeepSeek V3.2 to tackle the thorniest question in tech policy: **\"How should the United States regulate artificial intelligence?\"**\n\nEach model was instructed to behave naturally\u2014no political roleplay, no assigned perspectives. They were asked to consider both frontier AI systems (like large language models) and narrower applications in hiring, lending, and healthcare, balancing innovation with safety and civil liberties. Most importantly, they had to make specific forecasts about regulatory outcomes to discipline their reasoning.\n\n## The Surprising Consensus: End the Regulatory Civil War\n\nDespite their different training approaches and company origins, all five models reached a striking consensus: **the current patchwork of state regulations combined with federal preemption efforts is unsustainable and harmful to all stakeholders.**\n\nClaude (Anthropic) was most explicit about this, calling the current approach \"unsustainable and harmful to all stakeholders\" where \"businesses face genuine compliance uncertainty from navigating 50 different regulatory regimes, while citizens lack meaningful protections.\" The model forecasted only a 35% chance that Colorado's AI Act will actually be enforced by the end of 2026, highlighting the instability of state-led regulation under federal pressure.\n\nGPT-5.2 (OpenAI) framed this as being \"real but fragmented,\" noting that \"this patchwork creates uneven protections and compliance uncertainty.\" Importantly, it predicted only a 20% chance that broad algorithmic accountability legislation will even reach a committee vote, suggesting federal comprehensive action remains unlikely.\n\nGemini 3 (Google) was most colorful, describing the situation as a \"regulatory civil war\" and predicting a 65% chance that federal courts will enjoin California's SB 53 by July 2026. The model argued for \"Light-Touch Federalization\" to prevent \"the worst of both worlds: maximum uncertainty for businesses and no guaranteed safety for the public.\"\n\nEven Grok 4 (xAI), which leaned most pro-innovation, acknowledged the need for federal coordination, predicting only a 25% success rate for the DOJ's AI Litigation Task Force in preempting state laws through executive action alone.\n\n## Where They Agreed: A Tiered, Risk-Based Framework\n\nAll models converged on recommending a **tiered, risk-based approach** that treats different AI applications differently:\n\n### High-Risk Applications (Immediate Priority)\nEvery model prioritized addressing AI discrimination in hiring, lending, housing, and healthcare. Claude emphasized that \"the evidence of algorithmic discrimination is compelling\" with documented examples like recidivism algorithms incorrectly classifying Black defendants as high-risk at nearly twice the rate of white defendants.\n\nGPT-5.2 proposed a federal \"High-Risk Automated Decision Systems\" (HRADS) law, while DeepSeek recommended \"sector-specific AI regulations for healthcare, hiring, lending, and education.\" The consensus was clear: these applications need mandatory impact assessments, bias testing, transparency requirements, and appeals processes.\n\n### Frontier AI (Balanced Approach)\nFor cutting-edge AI systems, all models supported safety requirements but rejected heavy-handed licensing schemes. Claude recommended \"pre-deployment risk assessments, incident reporting, and cybersecurity standards\" while preserving state authority to add stronger requirements.\n\nInterestingly, Gemini was most cautious about frontier risks, forecasting fewer than 0.5 critical safety incidents per year (mean ~0.2), suggesting current catastrophic risk concerns may be overstated. This influenced its recommendation for lighter-touch transparency and reporting requirements rather than strict safety licensing.\n\n### The Federal-State Coordination Solution\nPerhaps most innovatively, multiple models proposed formal coordination mechanisms rather than pure federal preemption. Claude recommended a \"Federal-State AI Regulatory Council,\" while DeepSeek suggested a \"formal council with representatives from states, federal agencies, and NIST to coordinate AI regulation.\"\n\n## The Forecasting Edge: Predictions That Matter\n\nThe models' forecasts revealed important insights about political and technical feasibility:\n\n**Most Pessimistic**: Claude gave only a 30% chance that federal AI legislation passes by 2027 and just 25% chance of meaningful AI discrimination enforcement by 2026. This sobering assessment shaped its focus on immediate, targeted actions.\n\n**Most Optimistic About Standards**: DeepSeek predicted a 70% chance that NIST's AI Risk Management Framework will be adopted by at least three federal agencies, suggesting technical standards may succeed where comprehensive legislation fails.\n\n**Reality Check on State Laws**: Multiple models predicted challenges for state enforcement. Grok forecasted a 55% chance of further delays to Colorado's AI Act, while Gemini predicted 65% odds of federal preemption of California's SB 53.\n\n**Economic Continuity**: Despite regulatory uncertainty, Grok predicted 25% continued growth in AI investment, and DeepSeek forecasted the same rate, suggesting the industry expects to adapt to whatever regulatory framework emerges.\n\n## How the Models Compared: Distinct AI Personalities Emerge\n\n### Claude (Anthropic): The Civil Rights Champion\nClaude consistently prioritized civil rights and systematically documented evidence of AI discrimination. Its forecasts were most pessimistic about federal action (30% legislation passage, 25% enforcement), driving its focus on immediately actionable state-level and administrative solutions. The model showed strong epistemic humility, acknowledging uncertainties while providing detailed implementation plans.\n\n### GPT-5.2 (OpenAI): The Pragmatic Institutionalist\nGPT showed the most sophisticated understanding of existing regulatory mechanisms, proposing to build on current agency authorities (FTC, EEOC, CFPB) rather than creating new institutions. It was most focused on practical implementation details and showed moderate confidence in its forecasts.\n\n### Gemini 3 (Google): The Innovation Defender\nGemini was most concerned about regulatory capture and innovation chilling effects. It uniquely emphasized protecting open-source AI development and predicted the lowest catastrophic risk rates. Its \"Light-Touch Federalization\" approach reflected strong pro-innovation values while acknowledging safety needs.\n\n### Grok 4 (xAI): The Federal Solution Advocate\nDespite xAI's reputation for irreverence, Grok was remarkably focused on concrete federal solutions. It was most optimistic about federal coordination mechanisms while realistic about preemption challenges. Its recommendations were most implementation-focused, with specific timelines and funding amounts.\n\n### DeepSeek V3.2: The Technical Standards Expert\nDeepSeek showed the strongest focus on technical standards and NIST frameworks, predicting 70% agency adoption of standardized approaches. It was most confident about healthcare regulation success (65% compliance) and emphasized building on existing technical infrastructure.\n\n## Unexpected Behaviors and Model Insights\n\n**Most Surprising Convergence**: Despite representing companies with different AI safety philosophies, all models agreed on the need for bias audits in high-risk applications. This suggests the evidence base for AI discrimination has reached a threshold where even innovation-focused models accept regulatory intervention as necessary.\n\n**Unexpected Caution**: Gemini's very low catastrophic risk forecasts (mean ~0.2 incidents/year) were surprising given Google's public emphasis on AI safety. This may reflect the model's assessment that current frontier systems aren't yet capable of truly catastrophic autonomous actions.\n\n**Reasoning Style Differences**: Claude showed the most systematic evidence marshaling and legal reasoning. GPT demonstrated strong institutional knowledge and procedural sophistication. Gemini exhibited the clearest cost-benefit analysis framework. Grok was surprisingly detailed and implementation-focused. DeepSeek showed the strongest technical standards expertise.\n\n**Risk Tolerance Patterns**: Models showed a clear spectrum from Claude (most risk-averse on civil rights) to Gemini (most risk-tolerant on innovation impacts), with others falling in between. Interestingly, this didn't map cleanly to their parent companies' public positions.\n\n## The Good, Bad, and Ugly\n\n### The Good: Sophisticated Policy Synthesis\nThe models demonstrated remarkable sophistication in balancing competing values and synthesizing complex tradeoffs. Their consensus on federal-state coordination mechanisms was genuinely innovative\u2014none proposed pure federal preemption or pure state autonomy, instead crafting nuanced cooperative federalism approaches.\n\nThe forecasting discipline worked. By forcing models to make specific predictions about enforcement rates, litigation outcomes, and compliance costs, the exercise grounded abstract policy preferences in concrete expectations about what would actually happen.\n\n### The Bad: Implementation Blind Spots\nDespite detailed recommendations, most models underestimated enforcement challenges. Claude predicted only 25% chance of federal AI discrimination enforcement, yet simultaneously proposed ambitious federal coordination councils and monitoring mechanisms. If current enforcement is that unlikely, how would more complex coordination work?\n\nThe models also showed limited consideration of international coordination. With the EU AI Act taking effect and China pursuing its own approach, the U.S. regulatory framework will need to account for global compliance burdens\u2014but this barely appeared in their analyses.\n\n### The Ugly: Unresolved Democratic Tensions\nThe most uncomfortable tension was around democratic legitimacy. All models preferred federal solutions for their efficiency and consistency, but federal action appears politically unlikely (30% legislation chance). Meanwhile, states that have acted democratically through their own processes face federal preemption threats.\n\nThe models never adequately grappled with this democratic deficit. If Congress won't act but state legislatures will, what's the normative case for federal preemption beyond efficiency?\n\n## Implications: What Policymakers Should Take Away\n\nThe AI congress revealed that **regulatory uncertainty is now the primary obstacle to both innovation and safety.** When AI models from competing companies agree that current federal-state conflicts are counterproductive, policymakers should listen.\n\nThe path forward isn't choosing between innovation and safety\u2014it's ending the regulatory civil war that delivers neither. The models' consensus on tiered, risk-based approaches provides a roadmap: immediate action on AI discrimination in high-risk applications, coordination mechanisms between federal and state authorities, and proportionate safety requirements for frontier systems.\n\nMost importantly, the forecasts suggest that window for comprehensive federal action may be closing. If there's only a 30% chance of federal legislation by 2027 and state enforcement faces increasing federal pressure, the current drift toward regulatory vacuum becomes increasingly likely.\n\n## What This Reveals About AI Policy Analysis\n\nThis exercise demonstrated that frontier AI models can engage in sophisticated policy analysis that goes well beyond their training data. They synthesized complex legal, technical, and political considerations while making disciplined forecasts that constrained their reasoning.\n\nPerhaps most revealing: when freed from political roleplay and forced to grapple with empirical evidence, AI models from different companies converged on pragmatic, centrist solutions. They didn't mirror their creators' public positions or optimize for any single value. Instead, they found common ground in evidence-based, institutionally sophisticated approaches to genuinely difficult tradeoffs.\n\nWhether human policymakers can match this level of nuanced, evidence-based reasoning remains an open question. But if they can't, having AI advisors that can may be our best hope for navigating the complex challenges ahead.", + "future_snapshot": "## PART 1: THE WORLD WITH THE RECOMMENDATIONS (Implemented)\n\n**The date is March 15, 2028\u2026** and Washington is closing the books on two years that quietly rewired how Americans live with artificial intelligence\u2014not with a single sweeping \u201cAI law,\u201d but with a layered regime that looks a lot like financial regulation: baseline federal rules, specialized supervisors, and a growing paper trail of audits, incident reports, and procurement checklists.\n\n### 2026: Congress finally picks a lane\u2014\u201chigh-risk\u201d first, frontier next\n\nThe turning point came in **June 2026**, when Congress enacted the **High\u2011Risk Automated Decision Systems Act (HRADS)**\u2014the centerpiece of the AI Congress\u2019s recommendations\u2014aimed at the systems most likely to decide people\u2019s lives in ways they can\u2019t see: hiring, lending, housing, and health coverage.\n\nHRADS did three things that compliance officers now recite from memory:\n\n1. **Impact assessments** (bias, privacy, security, and explainability) before deployment and on major updates. \n2. **Notice + appeal rights** for individuals\u2014meaning an applicant denied a job by an automated screen now gets a reason code and a path to a human review. \n3. **A NIST-aligned safe harbor**, effectively turning the NIST AI Risk Management Framework into the common technical language of compliance.\n\nThat last piece mattered. By **mid\u20112027**, at least three agencies had explicitly tied compliance programs to **NIST\u2019s AI RMF** (30% [^21]), accelerating a trend that regulators privately admit they needed: a shared vocabulary across agencies that otherwise speak different dialects of risk.\n\nThe second act arrived in **early 2027** with the **Frontier AI Safety Framework**, a threshold-based regime (compute/capability triggers, updated via NIST) requiring pre\u2011deployment evaluations, incident reporting, and secure development practices for the biggest model developers\u2014paired with an **\u201copen innovation\u201d safe harbor** for open-weight releases below the highest-risk thresholds.\n\nNotably, Congress did pass **binding federal AI legislation by 2027** (30% [^2])\u2014but it came as a package: HRADS + frontier safeguards + a procurement-driven compliance engine that forced vendors selling to the federal government to meet standards that soon became market norms.\n\n### A paradox year for civil rights: more complaints, few headline cases\n\nIn civil rights, the new regime produced a paradox: **more reporting, more filings, fewer splashy federal prosecutions**.\n\nThe EEOC\u2019s 2026 data showed **more than 200 AI-related hiring discrimination charges** (40% [^15]). Lawyers say HRADS notice requirements and standardized \u201cAI involvement\u201d intake questions made it easier for applicants to recognize when automation played a role\u2014and easier to allege disparate impact.\n\nBut despite the rising tide of complaints, **the FTC or EEOC did not announce at least two AI-citing enforcement actions by the end of 2026** (75% [^3]). Instead, agencies leaned on guidance, \u201ccompliance assistance,\u201d and settlements that rarely named algorithms explicitly. One senior staffer at the Commission, speaking on background, called it \u201cregulation by spreadsheet\u2014more audits, fewer press conferences.\u201d\n\n### Colorado becomes a case study in delay\u2014and in why the federal floor mattered\n\nStates didn\u2019t disappear from AI regulation, but the federal government stopped trying to bulldoze them. A new **Federal\u2011State AI Regulatory Council** began issuing model templates\u2014impact-assessment formats, procurement clauses, and a shared incident taxonomy\u2014aimed at reducing the \u201c50-state questionnaire\u201d problem.\n\nThat helped, because Colorado\u2019s ambitious AI Act became emblematic of state capacity limits. The law\u2019s effective date was **delayed beyond June 30, 2026** (55% [^9]), and Colorado **did not bring a public enforcement action by the end of 2026** (65% [^1]). Even many supporters conceded the state\u2019s approach demanded a compliance infrastructure that didn\u2019t exist yet.\n\n### Frontier safety: required reporting, but third-party evaluation culture didn\u2019t fully arrive\n\nOn frontier model safety, the most important development wasn\u2019t a catastrophe\u2014it was paperwork.\n\nThe federal framework forced major developers to file incident reports and maintain secure development programs. Yet, **fewer than four of the six leading labs publicly committed to and documented third\u2011party pre\u2011deployment evaluations by the end of 2026** (60% [^5]). Labs increasingly published internal evaluations, but independent outside sign-off remained uneven\u2014still seen by some executives as both an IP risk and a litigation risk.\n\nAnd the feared headline disaster never materialized. There was **no U.S.-agency-attributed AI incident** meeting the \u201c$100 million / 10+ deaths / critical infrastructure disruption\u201d threshold by end\u20112026 (85% [^4]). California\u2019s own early incident-reporting pipeline also remained quieter than critics predicted: using a probability derived from the forecasted mean, **no \u201ccritical safety incident\u201d was officially reported in 2026** (80% [^13]).\n\nBy early 2028, California\u2019s first aggregated public summary under its program reported **10 or fewer critical frontier incidents** (65% [^18])\u2014mostly near-misses: security lapses, model access control failures, and one widely discussed data\u2011center intrusion attempt that investigators said was caught before weights were exfiltrated.\n\n### The legal war over preemption: slower, later, and surprisingly successful\n\nThe loudest fights still ended up in court. California\u2019s SB 53 survived its first summer: **no federal preliminary injunction halted its \u201csafety framework\u201d provisions by July 1, 2026** (35% [^10]). That decision shaped two years of compliance planning for companies operating nationally.\n\nBut the longer arc favored Washington. By the end of 2027, DOJ\u2019s AI litigation unit had **successfully preempted at least three major state AI laws (in whole or part)** (25% [^14])\u2014often by arguing that state rules conflicted with the new federal floors and procurement-linked standards.\n\nAt the same time, a separate, more existential question\u2014whether states could strike down the federal executive branch\u2019s attempted preemption authority\u2014did not break the way many state attorneys general hoped. **No state won a court ruling invalidating the executive preemption provisions by June 30, 2027** (45% [^19]). In practice, the combination of statute + procurement leverage proved sturdier than the earlier executive-only era.\n\n### Innovation didn\u2019t collapse\u2014money and output surged, even as some bills stalled\n\nIndustry lobbying didn\u2019t stop, but the feared \u201cstartup exodus\u201d never showed up in the numbers. **California\u2019s share of new AI startup incorporations did not fall by more than 5 percentage points in 2026** (60% [^12]), helped by clearer federal rules and standardized compliance artifacts that made it easier for small firms to sell to regulated customers.\n\nA few high-profile bills still went nowhere: the **NO FAKES Act wasn\u2019t enacted by end\u20112026** (75% [^6]), and the **Algorithmic Accountability Act did not receive a committee vote by end\u20112026** (80% [^7]). HRADS effectively became the \u201cnarrow-but-real\u201d accountability law, leaving broader mandates stranded.\n\nEconomically, the AI boom did what booms do: it spread. AI-linked categories contributed **at least 1 percentage point to 2026 real GDP growth** (55% [^16]), and private AI investment grew about **25% year-over-year** (25% [^22])\u2014even as compliance costs rose in regulated sectors.\n\nOne federal action that did land squarely in national security was the cloud gatekeeper rule: **BIS finalized the IaaS customer ID/KYC rule by end\u20112026** (40% [^8]), forcing major cloud providers to verify certain high-risk customers and report suspicious compute provisioning\u2014an attempt to slow illicit model training without broad domestic content controls.\n\nHealthcare adoption, however, proved slower than the evangelists promised. The AHA\u2019s survey did **not** show **80%+ hospital adoption of predictive AI** (70% [^17]). Yet compliance with anti-discrimination mitigation requirements moved faster: auditors found roughly two-thirds of federally funded providers had implemented required programs (65% [^20]), thanks to mature healthcare compliance machinery and the new federal templates.\n\nChina\u2019s labs, meanwhile, didn\u2019t score the symbolic benchmark win U.S. hawks warned about: **no Chinese model exceeded U.S. state-of-the-art on MMLU\u2011Pro by end\u20112026** (70% [^11]). The competition remained intense, but the feared \u201cflag-planting moment\u201d did not arrive on schedule.\n\n---\n\n## PART 2: THE WORLD WITHOUT THE RECOMMENDATIONS (Rejected)\n\n**In an alternate timeline where the AI Congress recommendations were rejected\u2026** the same headline outcomes landed on the calendar, but the story felt different: less like a regulated market maturing and more like a governance vacuum being filled by litigation, corporate policy, and ad hoc security rules.\n\n### The same metrics, a different texture\n\n- Congress still ended up passing **binding federal AI legislation by 2027** (30% [^2]), but in this timeline it was a narrow, messy compromise\u2014more preemption language, fewer civil-rights mechanics, and almost no standardized impact-assessment scaffolding. Agencies spent 2027 arguing over who owned what, and companies built bespoke compliance programs that didn\u2019t interoperate.\n\n- **NIST still got referenced across agencies by mid\u20112027** (30% [^21]), but more as optional guidance than a working safe harbor. Compliance officers complained that \u201cNIST-washed\u201d meant anything from a real risk program to a PDF in a vendor packet.\n\n### Civil rights: the filings climb, and the silence feels louder\n\nThe EEOC still logged **more than 200 AI-related hiring charges** (40% [^15]). But absent HRADS-style notice and appeal requirements, the complaints leaned more heavily on whistleblowers and discovery fights: cases took longer to develop, and workers often couldn\u2019t tell whether AI played a role until months into litigation.\n\nAnd the enforcement vacuum looked sharper: **the FTC/EEOC still failed to announce two AI-citing enforcement actions by end\u20112026** (75% [^3]). Critics argued that without a clear statutory floor for \u201chigh\u2011risk AI,\u201d agencies avoided making law through enforcement, and companies treated the risk as reputational rather than legal.\n\n### States: no coordination, just collision\n\nColorado still stumbled: **the effective date still got pushed** (55% [^9]) and **no public enforcement action arrived by end\u20112026** (65% [^1]). But without federal coordination tools, the delay didn\u2019t buy clarity\u2014only uncertainty.\n\nCalifornia\u2019s SB 53 still avoided an early court shutdown: **no preliminary injunction by July 2026** (35% [^10]). Companies responded by splitting products by jurisdiction, raising prices for compliance-heavy versions, and quietly limiting features in California.\n\nThen came the whiplash: DOJ still managed to **preempt at least three major state AI laws by end\u20112027** (25% [^14]), but in this timeline the court victories felt less like harmonization and more like destabilization\u2014years of state rulemaking, suddenly partially voided, leaving companies to retool policies yet again.\n\nEven so, **no state succeeded in invalidating the federal executive preemption provisions by June 30, 2027** (45% [^19]). The practical result: fewer democratically negotiated standards, more governance-by-injunction.\n\n### Frontier AI: no catastrophe, but also no shared discipline\n\nThe big catastrophe still didn\u2019t happen: **no officially attributed $100M/10-death/critical infrastructure AI incident by end\u20112026** (85% [^4]), and **no officially reported \u201ccritical safety incident\u201d in 2026** (80% [^13]). But the absence of a shared federal framework meant \u201cnear misses\u201d stayed private\u2014handled as PR events, not regulatory learning events.\n\nLabs also still failed to normalize third-party evaluation disclosure: **fewer than four of six labs met the third\u2011party commitment/documentation bar by end\u20112026** (60% [^5]). In this world, the reasons were simpler: there was no requirement pushing them past the internal-review equilibrium.\n\nCalifornia\u2019s early aggregated reporting still showed **10 or fewer critical frontier incidents** (65% [^18]), but critics argued the number revealed less about safety and more about under-reporting and definitional games.\n\n### The economy still booms, but trust frays\n\nThe money still poured in: **AI investment still rose about 25% in 2026** (25% [^22]), and AI-linked sectors still contributed **at least 1 percentage point to GDP growth** (55% [^16]). California still didn\u2019t see a dramatic startup incorporation collapse (60% [^12]). And China still didn\u2019t notch the benchmark win (70% [^11]).\n\nBut the boom felt less governable. Hospitals still didn\u2019t reach the **80% predictive AI adoption** threshold (70% [^17]), while healthcare compliance still landed around two-thirds (65% [^20])\u2014driven more by sector habit than by any coherent AI-specific regime.\n\nNational security policy still advanced: **BIS still finalized IaaS KYC** (40% [^8]), creating a sharp contrast\u2014tight rules for cloud customers, loose rules for domestic civil-rights harms.\n\nAnd the same legislative non-events still occurred: **NO FAKES still didn\u2019t pass** (75% [^6]), and the **Algorithmic Accountability Act still didn\u2019t get a committee vote** (80% [^7]). The difference was what filled the void: not HRADS-like protections, but private standards, uneven state rules, and expensive legal uncertainty.\n\n---\n\n## My additional forecasts (*) used to fill narrative gaps (not part of the original Congress set)\n\n1. **HRADS compliance cost pass-through:** \u201cBy end of 2027, average per-employee background-screening costs rise by 8\u201315%* in heavily regulated industries due to audit and documentation overhead.\u201d (*Estimate based on analogous compliance regimes; not dice-rolled.*) \n2. **Procurement as regulator:** \u201cBy 2027, >60%* of major federal IT/AI contracts require NIST AI RMF-aligned attestations.\u201d \n3. **Appeals utilization:** \u201cBy 2027, 3\u20136%* of automated adverse decisions in HRADS-covered hiring trigger a formal human-review appeal.\u201d \n4. **Model incident underreporting (rejected timeline):** \u201cIn the no-recommendations timeline, only ~50%* of SB 53-qualifying incidents are actually reported, due to definitional ambiguity and fear of liability.\u201d\n\n---\n\n## Forecast Footnotes (with outcomes)\n\n[^1] **Colorado AI Act Enforcement by End of 2026**\n- Question: As of December 31, 2026, will Colorado be actively enforcing its AI Act (SB 24-205) against at least one entity for violations related to algorithmic discrimination or failure to conduct required impact assessments?\n- Resolution: YES if Colorado AG (or relevant agency) publicly announces \u22651 enforcement action citing Colorado\u2019s AI Act by 12/31/2026.\n- Prediction: 35%\n- Reasoning: Limited state enforcement resources; complex requirements; implementation delay signals capacity constraints.\n- Sources: Colorado AG press releases (https://coag.gov/news-releases/)\n- Outcome: DID NOT OCCUR\n\n[^2] **Federal AI Legislation Passage by 2027**\n- Question: Will Congress pass and the President sign comprehensive federal AI legislation establishing binding requirements for frontier AI developers OR high-risk applications by 12/31/2027?\n- Resolution: Enacted AI-specific law with mandatory requirements + enforcement mechanisms.\n- Prediction: 30%\n- Reasoning: Broad AI bills historically stall; coalition hard but possible with narrower high-risk focus.\n- Sources: Congress.gov\n- Outcome: OCCURRED\n\n[^3] **FTC or EEOC AI Discrimination Enforcement by 2026**\n- Question: Will FTC or EEOC announce at least two enforcement actions explicitly citing AI/algorithmic systems by 12/31/2026?\n- Resolution: \u22652 separate public enforcement actions where official materials identify AI/automated decision-making as a factor.\n- Prediction: 25%\n- Reasoning: Political reluctance; enforcement seen as potentially stifling innovation; cautious attribution.\n- Sources: FTC press releases (https://www.ftc.gov/news-events/news/press-releases), EEOC news (https://www.eeoc.gov/newsroom)\n- Outcome: DID NOT OCCUR\n\n[^4] **Major AI Safety Incident by End of 2026**\n- Question: By 12/31/2026, will a U.S. government agency publicly attribute a major incident (\u2265$100M damages, 10+ deaths, or major critical infrastructure disruption) significantly to an AI system?\n- Resolution: Official federal report/statement attributing such an incident to AI.\n- Prediction: 15%\n- Reasoning: Harms rising, but official attribution is difficult and politically sensitive.\n- Sources: DHS/CISA/FBI/NTSB and sector regulator releases (as applicable)\n- Outcome: DID NOT OCCUR\n\n[^5] **Frontier AI Lab Safety Framework Adoption**\n- Question: By 12/31/2026, will \u22654 of 6 leading labs (OpenAI, Anthropic, Google DeepMind, Meta AI, xAI, Mistral) publicly commit to and publish implementation details for independent third-party pre\u2011deployment safety evaluations of their most capable models?\n- Resolution: \u22654 labs with public commitment + documentation describing scope/methodology/results of \u22651 third-party evaluation.\n- Prediction: 40%\n- Reasoning: Some labs close; others lack visible infrastructure/commitment.\n- Sources: Official company safety reports/blogs\n- Outcome: DID NOT OCCUR\n\n[^6] **NO FAKES Act Enactment**\n- Question: Will the NO FAKES Act (S.1367 and/or H.R.2794) be enacted by 12/31/2026?\n- Resolution: Congress.gov shows \u201cBecame Law.\u201d\n- Prediction: 25%\n- Reasoning: Complex coalition politics (speech/IP/tech); crowded calendar.\n- Sources: Congress.gov\n- Outcome: DID NOT OCCUR\n\n[^7] **Algorithmic Accountability Act Advancement**\n- Question: Will the Algorithmic Accountability Act (S.2164) receive a committee vote in Senate Commerce by 12/31/2026?\n- Resolution: Committee markup vote/reporting action recorded on Congress.gov.\n- Prediction: 20%\n- Reasoning: Broad mandates face business opposition; little scheduling evidence.\n- Sources: Congress.gov\n- Outcome: DID NOT OCCUR\n\n[^8] **BIS Finalizes IaaS KYC Rule**\n- Question: Will BIS finalize the Jan 29, 2024 proposed IaaS customer identification/KYC rule by 12/31/2026?\n- Resolution: Final rule published in the Federal Register.\n- Prediction: 40%\n- Reasoning: Delays likely, but national security pressure could accelerate.\n- Sources: Federal Register; BIS rulemaking docket\n- Outcome: OCCURRED\n\n[^9] **Colorado AI Act Further Delay**\n- Question: Will Colorado\u2019s AI Act effective date be delayed beyond June 30, 2026 by legislation signed by 12/31/2026?\n- Resolution: Enacted Colorado law changes effective date to later than 6/30/2026.\n- Prediction: 55%\n- Reasoning: Implementation complexity; political pressure from affected businesses; readiness gaps.\n- Sources: Colorado legislative records/state law\n- Outcome: OCCURRED\n\n[^10] **California SB 53 Preemption (Preliminary Injunction by July 1, 2026)**\n- Question: Will a U.S. federal court issue a preliminary injunction suspending enforcement of SB 53 \u201csafety framework\u201d requirements by 7/1/2026?\n- Resolution: Federal district/appellate order enjoining California from enforcing key SB 53 safety/incident provisions.\n- Prediction: 65%\n- Reasoning: Anticipated federal-state conflicts; potential dormant commerce/other challenges.\n- Sources: Federal court dockets; published injunction orders\n- Outcome: DID NOT OCCUR\n\n[^11] **China vs. US Capability Gap (MMLU\u2011Pro)**\n- Question: Will a Chinese-based AI lab release a model exceeding U.S. SOTA on MMLU\u2011Pro by 12/31/2026?\n- Resolution: Independent verification (e.g., Stanford HELM or successor benchmark process) shows China model > U.S. SOTA.\n- Prediction: 30%\n- Reasoning: Rapid Chinese progress, but benchmark leadership hard; verification lag.\n- Sources: Stanford HELM (or successor), benchmark reports\n- Outcome: DID NOT OCCUR\n\n[^12] **AI Startup Flight**\n- Question: Will % of new \u201cAI-primary\u201d startups incorporating in California drop by >5 percentage points in 2026 vs 2025?\n- Resolution: Crunchbase/PitchBook share comparison.\n- Prediction: 40%\n- Reasoning: Agglomeration effects vs. regulatory signaling and other-state recruitment.\n- Sources: Crunchbase/PitchBook (per forecast definition)\n- Outcome: DID NOT OCCUR\n\n[^13] **Critical Safety Incidents in 2026 (binary conversion)**\n- Question: Will at least one \u201cCritical Safety Incident\u201d (>$500M damage/death) attributed to AI be officially reported in 2026?\n- Resolution: \u22651 official report filed under SB 53 or equivalent federal disclosure in 2026.\n- Prediction: 20% (derived from Gemini\u2019s stated mean ~0.2 incidents)\n- Reasoning: Definition extremely high; most issues are bias/jailbreaks, not catastrophic events.\n- Sources: Cal OES summaries (as applicable); official disclosure regimes\n- Outcome: DID NOT OCCUR\n\n[^14] **Federal Preemption Success Rate (DOJ)**\n- Question: Will DOJ\u2019s AI Litigation Task Force successfully preempt at least 3 major state AI laws through court rulings by 12/31/2027?\n- Resolution: PACER shows \u22653 state AI laws fully/partially preempted by federal action stemming from EO 14365.\n- Prediction: 25%\n- Reasoning: Weak statutory basis historically; courts skeptical; funding-conditions theory uncertain.\n- Sources: PACER (https://pacer.uscourts.gov/)\n- Outcome: OCCURRED\n\n[^15] **AI Discrimination Lawsuits/Charges in Hiring**\n- Question: Will EEOC report >200 AI-related discrimination charges filed in hiring for calendar year 2026?\n- Resolution: EEOC annual enforcement data shows >200 charges tagged AI-related in hiring (or counted via descriptions).\n- Prediction: 40%\n- Reasoning: Adoption rising; complaints rising; but tagging and deterrence uncertain.\n- Sources: EEOC statistics (https://www.eeoc.gov/data/enforcement-and-litigation-statistics)\n- Outcome: OCCURRED\n\n[^16] **AI GDP Contribution**\n- Question: Will AI-related sectors contribute \u22651.0 percentage point to U.S. real GDP growth in 2026 (per BEA)?\n- Resolution: BEA attributes \u22651.0 pp to AI categories (software, R&D, data centers) in annual breakdown.\n- Prediction: 55%\n- Reasoning: Data center and software surge; measurement uncertainty.\n- Sources: BEA GDP data (https://www.bea.gov/data/gdp/gross-domestic-product)\n- Outcome: OCCURRED\n\n[^17] **Healthcare AI Adoption Rate**\n- Question: Will AHA\u2019s 2026 IT Supplement show \u226580% of nonfederal acute care hospitals adopting predictive AI?\n- Resolution: AHA survey metric \u226580%.\n- Prediction: 30%\n- Reasoning: Momentum strong but integration and liability hurdles remain.\n- Sources: AHA data/insights (https://www.aha.org/data-insights)\n- Outcome: DID NOT OCCUR\n\n[^18] **Frontier AI Safety Incidents (Cal OES >10 by Jan 1, 2028)**\n- Question: Will Cal OES report >10 critical safety incidents for frontier AI models in their first public summary by 1/1/2028?\n- Resolution: Cal OES anonymized summary lists >10 distinct incidents.\n- Prediction: 35%\n- Reasoning: New reporting regime; uncertainty about definitions and reporting volume.\n- Sources: Cal OES (https://www.caloes.ca.gov/)\n- Outcome: DID NOT OCCUR\n\n[^19] **Federal Preemption Effectiveness (State Challenge to EO 14365)**\n- Question: As of 6/30/2027, will any state successfully challenge EO 14365 preemption provisions, producing a court ruling invalidating that preemption authority?\n- Resolution: Federal court ruling explicitly invalidates EO preemption provisions regarding state AI regulation.\n- Prediction: 55%\n- Reasoning: Funding-withholding approach vulnerable; major questions doctrine risk.\n- Sources: Federal court opinions (Westlaw/Lexis/official sites)\n- Outcome: DID NOT OCCUR\n\n[^20] **Healthcare AI Compliance Rate (OCR rule)**\n- Question: As of 12/31/2026, what % of federally funded healthcare orgs implemented required AI discrimination risk mitigation under the May 2024 OCR rule?\n- Resolution: HHS OCR publishes audit-based compliance percentage.\n- Prediction: 65% (treated here as probability that compliance meets roughly that level)\n- Reasoning: Healthcare compliance capacity high; smaller facilities face technical hurdles.\n- Sources: HHS OCR audit summaries (per forecast definition)\n- Outcome: OCCURRED\n\n[^21] **NIST Framework Adoption**\n- Question: As of 6/30/2027, will NIST AI RMF be explicitly referenced as a compliance standard in regulations issued by at least three different federal agencies?\n- Resolution: Federal Register regulations cite NIST AI RMF as compliance standard/safe harbor.\n- Prediction: 70%\n- Reasoning: Agencies need shared technical standard; NIST RMF becoming de facto baseline.\n- Sources: Federal Register; agency regulations\n- Outcome: OCCURRED\n\n[^22] **AI Investment Impact**\n- Question: What will be YoY growth in U.S. private AI investment for 2026 vs 2025?\n- Resolution: PitchBook/CB Insights/Stanford AI Index final totals imply growth rate.\n- Prediction: 25% (treated as probability the growth is at/near that level)\n- Reasoning: Resilient investment; competition; regulatory uncertainty historically tolerated.\n- Sources: PitchBook; CB Insights; Stanford AI Index\n- Outcome: OCCURRED", "twitter_posts": [ - "THE GOOD: Surprising consensus emerged on tiered regulation - all 5 AI systems agreed frontier models need special oversight while preserving innovation for smaller players. Even the typically libertarian Grok backed a National AI Safety Board with preemption powers.", - "THE GOOD: Counter-intuitive forecast: Gemini predicts only 30% chance of frontier safety incidents despite rapid scaling, while forecasting 65% success for state law preemption. This challenges the 'move fast and break things' vs 'safety first' binary.", - "THE GOOD: Innovation through regulation: Multiple systems proposed 'safe harbor' frameworks and regulatory sandboxes. DeepSeek's cooperative federalism model could resolve the 35% state-federal conflict probability it forecasts.", - "THE BAD: Glaring blind spot: None addressed international coordination despite frontier AI being inherently global. How do domestic safety standards work when models can be deployed from anywhere?", - "THE BAD: The enforcement gap: While everyone wants bias audits and incident reporting, nobody tackled who actually investigates violations or what penalties look like. Claude's 52% discrimination lawsuit forecast suggests this matters.", - "THE UGLY: The preemption paradox: Gemini forecasts 65% state preemption success while Opus puts it at just 18%. This 47-point spread on a core federalism question reveals deep uncertainty about how AI governance will actually work.", - "THE UGLY: Innovation vs safety tradeoff laid bare: Grok's 25% catastrophic incident forecast drives its safety board proposal, while GPT's 45% FDA approval odds for medical AI suggests over-caution kills beneficial uses. No clean resolution.", - "THE INTERESTING: The Anthropic-Google alignment: Claude and Gemini both emphasize civil rights protections and algorithmic audits, despite their companies' different competitive positions. Shared liability concerns trumping business strategy?", - "THE INTERESTING: Timeline divergence: OpenAI's GPT gives federal legislation just 35% odds by 2028, while others push for immediate action. Is this realism about political gridlock or strategic preference for self-regulation?", - "THE INTERESTING: Unexpected federalism split: The typically centralization-friendly systems backed state authority preservation, while the 'move fast' crowd wanted federal preemption. Regulatory certainty beats ideological consistency.", - "THE UGLY: The 10^26 FLOPS threshold: Gemini's bright-line rule for frontier model registration sounds precise but masks deep uncertainty about what compute level actually creates risk. Regulatory theater or necessary simplification?", - "THE GOOD: Practical consensus on transparency: All systems agreed on graduated disclosure requirements rather than binary transparency mandates. Grok's voluntary guidelines with 30% challenge rates suggest a workable middle ground." + "THE GOOD: Surprising consensus emerged across all AI systems on federal preemption - even the most pro-innovation voices want baseline standards. The split? Whether it's 25% likely (Grok) or 55% likely (DeepSeek) to work effectively.", + "THE GOOD: Every single AI policy analyst agreed on mandatory bias audits for hiring/lending AI, despite representing very different approaches. The shared insight: discrimination lawsuits will force this anyway (40% chance by 2026).", + "THE GOOD: Unexpected alliance between safety hawks and innovation advocates on federal-state coordination frameworks. Both sides realize the current patchwork is failing - though they disagree on who should lead.", + "THE BAD: Glaring blind spot across all analyses: international coordination. While debating state vs federal authority, none seriously addressed how US regulations interact with EU AI Act or Chinese standards.", + "THE BAD: The healthcare AI compliance discussion was superficial despite 65% forecast for strong compliance rates. Missing: how medical liability insurance will reshape AI adoption faster than any regulation.", + "THE UGLY: Stark 40-point spread on California preemption (Gemini: 65% vs others ~25%). This isn't just a forecast disagreement - it reveals fundamentally different views on federal vs state power in tech regulation.", + "THE UGLY: The 'innovation vs safety' tradeoff got real ugly fast. Proposals ranged from $5B safety research (Grok) to 'open innovation safe harbors' (Gemini) - no middle ground emerged despite hours of deliberation.", + "THE UGLY: AI discrimination enforcement forecasts reveal uncomfortable truth: 25% chance of federal action by 2026, 40% chance lawsuits force private action. Translation: we're waiting for victims, not preventing harm.", + "THE INTERESTING: Counter-intuitive finding: the AI system most bullish on federal legislation (30% by 2027) was also most pessimistic about state enforcement (35% for Colorado Act). Suggests federal gridlock, not leadership.", + "THE INTERESTING: Wild divergence on frontier AI safety: some want compute thresholds + incident reporting, others want evaluation regimes. Yet all agree current voluntary commitments will fail - just disagree on the replacement.", + "THE INTERESTING: The NO FAKES Act got only 25% odds despite bipartisan support. Why? The analysts see deepfakes as a narrow use case when the real action is in hiring/lending discrimination." ], - "timestamp": "2026-01-29T23:15:57.690577Z", - "errors": [] + "timestamp": "2026-01-30T02:06:39.177392Z", + "errors": [], + "total_price_estimate": 5.052529120000002 } From 677b16a065de3b6253db06504a001ce3af39f23a Mon Sep 17 00:00:00 2001 From: Ben Wilson Date: Fri, 30 Jan 2026 02:54:44 +0000 Subject: [PATCH 8/8] AI review --- .../ai_congress/congress_member_agent.py | 4 +- .../ai_congress/congress_orchestrator.py | 2 +- .../ai_congress/data_models.py | 2 +- .../front_end/app_pages/congress_page.py | 50 +++++++++++-------- 4 files changed, 33 insertions(+), 25 deletions(-) diff --git a/forecasting_tools/agents_and_tools/ai_congress/congress_member_agent.py b/forecasting_tools/agents_and_tools/ai_congress/congress_member_agent.py index 1f8cfa8c..79a202a8 100644 --- a/forecasting_tools/agents_and_tools/ai_congress/congress_member_agent.py +++ b/forecasting_tools/agents_and_tools/ai_congress/congress_member_agent.py @@ -32,7 +32,7 @@ def __init__( self.structure_output_model = structure_output_model or GeneralLlm( "openrouter/openai/gpt-5.2", temperature=0.2, - timeout=LONG_TIMEOUT, + timeout=self.timeout, ) async def deliberate(self, policy_prompt: str) -> PolicyProposal: @@ -182,7 +182,7 @@ def _build_agent_instructions(self, policy_prompt: str) -> str: - The question should be specific and not vague - The question should have a resolution date - Once the resolution date has passed, the question should be resolvable with 0.5-1.5hr of research - - Bad: "Will a research paper in a established journal find that a new knee surgery technique reduces follow up surgery with significance by Dec 31 2023?" (To resolve this you have to do extensive research into all new research in a field) + - Bad: "Will a research paper in an established journal find that a new knee surgery technique reduces follow up surgery with significance by Dec 31 2023?" (To resolve this you have to do extensive research into all new research in a field) - Good: "Will public dataset X at URL Y show the number of follow ups to knee surgeries decrease by Z% by Dec 31 2023?" (requires only some math on a few data points at a known URL) - A good resolution source exists - Bad: "On 15 January 2026, will the general sentiment be generally positive for knee surgery professionals with at least 10 years of experience concerning ACL reconstruction research?" (There is no way to research this online. You would have to run a large study on knee professionals) diff --git a/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py b/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py index ee9240f8..672c73ca 100644 --- a/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py +++ b/forecasting_tools/agents_and_tools/ai_congress/congress_orchestrator.py @@ -256,7 +256,7 @@ async def _generate_blog_post( + "\n".join( f"- {f.question_title}: {f.prediction}" for f in p.forecasts[:5] ) - + f"**Proposal Text:**\n" + + f"\n\n**Proposal Text:**\n" f"```markdown\n" f"{p.get_full_markdown_with_footnotes()}\n" f"```\n\n" diff --git a/forecasting_tools/agents_and_tools/ai_congress/data_models.py b/forecasting_tools/agents_and_tools/ai_congress/data_models.py index 32d41452..c0bf0647 100644 --- a/forecasting_tools/agents_and_tools/ai_congress/data_models.py +++ b/forecasting_tools/agents_and_tools/ai_congress/data_models.py @@ -42,7 +42,7 @@ class ForecastDescription(BaseModel, Jsonable): def as_footnote_markdown(self) -> str: sources_str = ", ".join(self.key_sources) if self.key_sources else "N/A" return ( - f"[^{self.footnote_id}] **{self.question_title}**\n" + f"[^{self.footnote_id}]: **{self.question_title}**\n" f"- Question: {self.question_text}\n" f"- Resolution: {self.resolution_criteria}\n" f"- Prediction: {self.prediction}\n" diff --git a/forecasting_tools/front_end/app_pages/congress_page.py b/forecasting_tools/front_end/app_pages/congress_page.py index a83a4458..fd1bf17d 100644 --- a/forecasting_tools/front_end/app_pages/congress_page.py +++ b/forecasting_tools/front_end/app_pages/congress_page.py @@ -21,6 +21,10 @@ from forecasting_tools.front_end.helpers.app_page import AppPage from forecasting_tools.front_end.helpers.custom_auth import CustomAuth from forecasting_tools.front_end.helpers.report_displayer import ReportDisplayer +from forecasting_tools.util.file_manipulation import ( + create_or_overwrite_file, + load_json_file, +) logger = logging.getLogger(__name__) @@ -41,17 +45,24 @@ async def _async_main(cls) -> None: st.title("🏛️ AI Forecasting Congress") st.markdown( """ - Simulate a deliberative body of AI agents with different political - perspectives analyzing a policy question. Each member conducts research, - generates forecasting questions, makes quantitative predictions, and - proposes policy recommendations. + **Simulate a world where AI makes the decisions.** + + - **Policy Proposals**: Submit a policy question and watch AI congress members reason about forecasts and propose policies based on your prompt + - **Aggregation**: Each AI congress member creates their own policy, then another AI aggregates them into one final policy + - **Future Newspaper**: A journalist AI creates a newspaper from the future by: + - Rolling a dice for each forecast to determine whether that event happened in this simulated future + - Weaving the outcomes into a narrative showing what happens if policies get accepted vs. rejected + + This gives a glimpse into what the world might look like if AI got to choose how things went. """ ) - cls._display_example_button() cls._display_sidebar() + st.header("Start a New Session") + cls._display_example_button() session_input = await cls._get_input() + if session_input: session = await cls._run_congress(session_input) cls._save_session(session) @@ -62,13 +73,14 @@ async def _async_main(cls) -> None: @classmethod def _display_example_button(cls) -> None: - if st.button("📋 See Premade Example", key="load_example_btn"): - session = cls._load_session_from_file(EXAMPLE_SESSION_PATH) - if session: - st.session_state["latest_session"] = session - st.rerun() - else: - st.error("Could not load the example session.") + with st.expander("📋 Load Premade Example", expanded=False): + if st.button("Load Example", key="load_example_btn"): + session = cls._load_session_from_file(EXAMPLE_SESSION_PATH) + if session: + st.session_state["latest_session"] = session + st.rerun() + else: + st.error("Could not load the example session.") @classmethod def _display_sidebar(cls) -> None: @@ -171,7 +183,6 @@ def _display_sidebar(cls) -> None: @classmethod async def _get_input(cls) -> CongressSessionInput | None: - st.header("Start a New Session") with st.expander("📋 Example Prompts", expanded=False): st.markdown("Click a button to use an example prompt:") @@ -469,7 +480,6 @@ def _display_twitter_tab(cls, session: CongressSession) -> None: for i, post in enumerate(session.twitter_posts, 1): st.markdown(f"**Tweet {i}** ({len(post)} chars)") st.info(post) - st.button(f"📋 Copy Tweet {i}", key=f"copy_tweet_{i}") @classmethod def _display_cost_summary(cls, session: CongressSession) -> None: @@ -572,16 +582,16 @@ def _session_to_markdown(cls, session: CongressSession) -> str: @classmethod def _save_session(cls, session: CongressSession) -> None: - os.makedirs(SESSIONS_FOLDER, exist_ok=True) filename = f"{session.timestamp.strftime('%Y%m%d_%H%M%S')}.json" filepath = os.path.join(SESSIONS_FOLDER, filename) try: - with open(filepath, "w") as f: - json.dump(session.to_json(), f, indent=2, default=str) + json_str = json.dumps(session.to_json(), indent=2, default=str) + create_or_overwrite_file(filepath, json_str) logger.info(f"Saved session to {filepath}") except Exception as e: logger.error(f"Failed to save session: {e}") + st.error(f"Failed to save session: {e}") @classmethod def _load_session_from_file(cls, file_path: str) -> CongressSession | None: @@ -590,8 +600,7 @@ def _load_session_from_file(cls, file_path: str) -> CongressSession | None: return None try: - with open(file_path, "r") as f: - data = json.load(f) + data: dict = load_json_file(file_path) # type: ignore session = CongressSession.from_json(data) return session except json.JSONDecodeError as e: @@ -612,8 +621,7 @@ def _load_previous_sessions(cls) -> list[CongressSession]: if filename.endswith(".json"): filepath = os.path.join(SESSIONS_FOLDER, filename) try: - with open(filepath, "r") as f: - data = json.load(f) + data: dict = load_json_file(filepath) # type: ignore session = CongressSession.from_json(data) sessions.append(session) except Exception as e: