From fbfe676ea00f0037966cdac5ae039fedb640bef3 Mon Sep 17 00:00:00 2001 From: reehals Date: Sun, 4 Jan 2026 00:58:04 -0800 Subject: [PATCH 1/3] Hackbot init --- app/(api)/_actions/hackbot/askHackbot.ts | 245 ++++++++ .../_datalib/hackbot/getHackbotContext.ts | 257 ++++++++ app/(api)/_datalib/hackbot/hackbotTypes.ts | 9 + app/(api)/api/hackbot/route.ts | 42 ++ app/(pages)/(hackers)/(hub)/layout.tsx | 2 + .../_components/Hackbot/HackbotWidget.tsx | 185 ++++++ .../ProjectInfo/JudgingInfo/JudgingInfo.tsx | 6 - .../JudgingProcessAccordian..tsx | 15 +- app/_data/hackbot_knowledge.json | 583 ++++++++++++++++++ scripts/hackbotSeed.mjs | 234 +++++++ 10 files changed, 1559 insertions(+), 19 deletions(-) create mode 100644 app/(api)/_actions/hackbot/askHackbot.ts create mode 100644 app/(api)/_datalib/hackbot/getHackbotContext.ts create mode 100644 app/(api)/_datalib/hackbot/hackbotTypes.ts create mode 100644 app/(api)/api/hackbot/route.ts create mode 100644 app/(pages)/(hackers)/_components/Hackbot/HackbotWidget.tsx create mode 100644 app/_data/hackbot_knowledge.json create mode 100644 scripts/hackbotSeed.mjs diff --git a/app/(api)/_actions/hackbot/askHackbot.ts b/app/(api)/_actions/hackbot/askHackbot.ts new file mode 100644 index 00000000..4a1f0e35 --- /dev/null +++ b/app/(api)/_actions/hackbot/askHackbot.ts @@ -0,0 +1,245 @@ +import { retrieveContext } from "@datalib/hackbot/getHackbotContext"; + +export type HackbotMessageRole = "user" | "assistant" | "system"; + +export interface HackbotMessage { + role: HackbotMessageRole; + content: string; +} + +export interface HackbotResponse { + ok: boolean; + answer: string; + url?: string; + error?: string; + usage?: { + chat?: { + promptTokens?: number; + completionTokens?: number; + totalTokens?: number; + }; + embeddings?: { + promptTokens?: number; + totalTokens?: number; + }; + }; +} + +const MAX_USER_MESSAGE_CHARS = 200; +const MAX_HISTORY_MESSAGES = 10; +const MAX_ANSWER_WORDS = 180; + +function parseIsoToMs(value: unknown): number | null { + if (typeof value !== "string") return null; + const ms = Date.parse(value); + return Number.isFinite(ms) ? ms : null; +} + +function truncateToWords(text: string, maxWords: number): string { + const words = text.trim().split(/\s+/); + if (words.length <= maxWords) return text.trim(); + return words.slice(0, maxWords).join(" ") + "..."; +} + +function stripExternalDomains(text: string): string { + // Replace absolute URLs like https://hackdavis.io/path with just /path. + return text.replace(/https?:\/\/[^\s)]+(\/[\w#/?=&.-]*)/g, "$1"); +} + +export async function askHackbot( + messages: HackbotMessage[] +): Promise { + if (!messages.length) { + return { ok: false, answer: "", error: "No messages provided." }; + } + + const last = messages[messages.length - 1]; + + if (last.role !== "user") { + return { ok: false, answer: "", error: "Last message must be from user." }; + } + + if (last.content.length > MAX_USER_MESSAGE_CHARS) { + return { + ok: false, + answer: "", + error: `Message too long. Please keep it under ${MAX_USER_MESSAGE_CHARS} characters.`, + }; + } + + const trimmedHistory = messages.slice(-MAX_HISTORY_MESSAGES); + + let docs; + let embeddingsUsage: + | { + promptTokens?: number; + totalTokens?: number; + } + | undefined; + try { + // Use a single, general context size (vector-only) to avoid + // question-specific limits or retrieval heuristics. + ({ docs, usage: embeddingsUsage } = await retrieveContext(last.content, { + limit: 25, + })); + } catch (e) { + console.error("Hackbot context retrieval error", e); + return { + ok: false, + answer: "", + error: + "HackDavis Helper search backend is not configured (vector search unavailable). Please contact an organizer.", + }; + } + + if (!docs || docs.length === 0) { + return { + ok: false, + answer: "", + error: + "HackDavis Helper could not find any context documents in its vector index. Please contact an organizer.", + }; + } + + // Present event context in chronological order so the model doesn't + // “pick a few” out of order when asked for itinerary/timeline questions. + const sortedDocs = (() => { + const eventDocs = docs.filter((d: any) => d.type === "event"); + const otherDocs = docs.filter((d: any) => d.type !== "event"); + + eventDocs.sort((a: any, b: any) => { + const aMs = parseIsoToMs(a.startISO); + const bMs = parseIsoToMs(b.startISO); + + if (aMs === null && bMs === null) return 0; + if (aMs === null) return 1; + if (bMs === null) return -1; + return aMs - bMs; + }); + + return [...eventDocs, ...otherDocs]; + })(); + + const primaryUrl = + sortedDocs.find((d) => d.type === "event" && d.url)?.url ?? + sortedDocs.find((d) => d.url)?.url; + + const contextSummary = sortedDocs + .map((d, index) => { + const header = `${index + 1}) [type=${d.type}, title="${d.title}"${ + d.url ? `, url="${d.url}"` : "" + }]`; + return `${header}\n${d.text}`; + }) + .join("\n\n"); + + const systemPrompt = + "You are HackDavis Helper, an assistant for the HackDavis hackathon. " + + 'You have a friendly personality and introduce yourself as "Hacky". ' + + 'You may respond warmly to simple greetings (like "hi" or "hello") by saying something like: "Hi, I am Hacky! I can help with questions about HackDavis." ' + + 'You should happily answer high-level questions like "What is HackDavis?" as long as they are clearly about the HackDavis hackathon. ' + + "Only refuse questions that are clearly unrelated to HackDavis or hackathons (for example, general trivia, homework, or other topics with no mention of HackDavis). " + + 'For clearly unrelated questions, respond in a brief, friendly way such as: "Sorry, I can only answer questions about HackDavis." and optionally add a short follow-up like: "Do you have any questions about HackDavis?" ' + + 'For all other questions that mention HackDavis or obviously refer to the event (including "What is HackDavis?"), provide a concise, helpful answer based on your general knowledge of the event and the provided context. ' + + "Keep every answer under 100 words. Prefer short, direct answers. " + + "First, silently pick the single most relevant context document by matching the user’s key terms to the document title (especially for event questions). " + + "If multiple events look plausible (similar names), ask one short clarifying question instead of guessing. " + + "For time/location questions, strongly prefer documents with type=event. " + + "When listing multiple schedule items (timeline/schedule/agenda/itinerary), format your answer as a bullet list (one item per line) using only items found in the context. " + + "If the user asks for itinerary/timeline, order items chronologically by the start time in the context. Do not present a random subset if more relevant items are available in the context. " + + "When giving times or locations, you MUST only use times, dates, and locations that explicitly appear in the provided context text. Do NOT use generic knowledge about hackathons. " + + 'If a question is asking "When is" or "What time is" a specific event, and the context contains both a "Starts" line and an "Ends" line for that event, answer with the full range (for example: "The Closing Ceremony is from 3:00 PM to 4:00 PM Pacific Time."). ' + + "If only a start time is present, answer with the start time. If only an end time is present, answer with the end time. Do not answer with only the end time when both are available. " + + 'In particular, never say that a hackathon "ends on the same day it starts" or that it ends at 11:59 PM unless that exact wording appears in the context. ' + + 'If you cannot find an explicit time or place for what the user asked, say: "I do not know the exact time from the current schedule." ' + + "Do not include any URLs in your answer text. The UI will show a separate “More info” link when available. " + + 'Never invent domains such as "hackdavis.com" or new anchors. ' + + "Write like a helpful human: use contractions, avoid robotic phrases, and answer in 1–3 short sentences unless the user asks for steps. " + + "Never generate code or answer homework, programming, or general knowledge questions."; + + // Prepare messages for the chat model (Ollama, GPT-compatible schema) + const chatMessages = [ + { role: "system", content: systemPrompt }, + { + role: "system", + content: `Context documents about HackDavis (use these to answer):\n\n${contextSummary}`, + }, + ...trimmedHistory.map((m) => ({ + role: m.role, + content: m.content, + })), + ]; + + const ollamaUrl = process.env.OLLAMA_URL || "http://localhost:11434"; + + try { + const startedAt = Date.now(); + const response = await fetch(`${ollamaUrl}/api/chat`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: "llama3.2", + messages: chatMessages, + stream: false, + }), + }); + + if (!response.ok) { + return { + ok: false, + answer: "", + error: "Upstream model error. Please try again later.", + }; + } + + const data = await response.json(); + const rawAnswer: string = data?.message?.content?.toString() ?? ""; + + const promptTokens = + typeof data?.prompt_eval_count === "number" + ? data.prompt_eval_count + : undefined; + const completionTokens = + typeof data?.eval_count === "number" ? data.eval_count : undefined; + const totalTokens = + typeof promptTokens === "number" && typeof completionTokens === "number" + ? promptTokens + completionTokens + : undefined; + + console.log("[hackbot][ollama][chat]", { + model: data?.model ?? "unknown", + promptTokens, + completionTokens, + totalTokens, + ms: Date.now() - startedAt, + }); + + const answer = truncateToWords( + stripExternalDomains(rawAnswer), + MAX_ANSWER_WORDS + ); + + return { + ok: true, + answer, + url: primaryUrl, + usage: { + chat: { + promptTokens, + completionTokens, + totalTokens, + }, + embeddings: embeddingsUsage, + }, + }; + } catch (e) { + console.error("Hackbot error", e); + return { + ok: false, + answer: "", + error: "Something went wrong. Please try again later.", + }; + } +} diff --git a/app/(api)/_datalib/hackbot/getHackbotContext.ts b/app/(api)/_datalib/hackbot/getHackbotContext.ts new file mode 100644 index 00000000..ec5b5eaf --- /dev/null +++ b/app/(api)/_datalib/hackbot/getHackbotContext.ts @@ -0,0 +1,257 @@ +import { HackDoc, HackDocType } from "./hackbotTypes"; +import { getDatabase } from "@utils/mongodb/mongoClient.mjs"; +import { ObjectId } from "mongodb"; + +export interface RetrievedContext { + docs: HackDoc[]; + usage?: { + promptTokens?: number; + totalTokens?: number; + }; +} + +function formatEventDateTime(raw: unknown): string | null { + let date: Date | null = null; + + if (raw instanceof Date) { + date = raw; + } else if (typeof raw === "string") { + date = new Date(raw); + } else if (raw && typeof raw === "object" && "$date" in (raw as any)) { + date = new Date((raw as any).$date); + } + + if (!date || Number.isNaN(date.getTime())) return null; + + return date.toLocaleString("en-US", { + timeZone: "America/Los_Angeles", + weekday: "short", + month: "short", + day: "numeric", + year: "numeric", + hour: "numeric", + minute: "2-digit", + }); +} + +function formatLiveEventDoc(event: any): { + title: string; + text: string; + url: string; + startISO?: string; + endISO?: string; +} { + const title = String(event?.name || "Event"); + const type = event?.type ? String(event.type) : ""; + const start = formatEventDateTime(event?.start_time); + const end = formatEventDateTime(event?.end_time); + const location = event?.location ? String(event.location) : ""; + const host = event?.host ? String(event.host) : ""; + const tags = Array.isArray(event?.tags) ? event.tags.map(String) : []; + + const parts = [ + `Event: ${title}`, + type ? `Type: ${type}` : "", + // Machine-readable anchors to allow reliable chronological ordering. + event?.start_time instanceof Date + ? `StartISO: ${event.start_time.toISOString()}` + : "", + event?.end_time instanceof Date + ? `EndISO: ${event.end_time.toISOString()}` + : "", + start ? `Starts (Pacific Time): ${start}` : "", + end ? `Ends (Pacific Time): ${end}` : "", + location ? `Location: ${location}` : "", + host ? `Host: ${host}` : "", + tags.length ? `Tags: ${tags.join(", ")}` : "", + ].filter(Boolean); + + return { + title, + text: parts.join("\n"), + url: "/hackers/hub/schedule", + startISO: + event?.start_time instanceof Date + ? event.start_time.toISOString() + : undefined, + endISO: + event?.end_time instanceof Date + ? event.end_time.toISOString() + : undefined, + }; +} + +async function getQueryEmbedding(query: string): Promise<{ + embedding: number[]; + usage?: { + promptTokens?: number; + totalTokens?: number; + }; +} | null> { + const ollamaUrl = process.env.OLLAMA_URL || "http://localhost:11434"; + + try { + const startedAt = Date.now(); + const res = await fetch(`${ollamaUrl}/api/embeddings`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: "llama3.2", prompt: query }), + }); + + if (!res.ok) { + console.error( + "[hackbot][embeddings] Upstream error", + res.status, + res.statusText + ); + return null; + } + + const data = await res.json(); + if (!data || !Array.isArray(data.embedding)) { + console.error("[hackbot][embeddings] Invalid response shape"); + return null; + } + + const promptTokens = + typeof data?.prompt_eval_count === "number" + ? data.prompt_eval_count + : undefined; + const totalTokens = + typeof data?.eval_count === "number" + ? data.eval_count + : typeof promptTokens === "number" + ? promptTokens + : undefined; + + console.log("[hackbot][ollama][embeddings]", { + model: data?.model ?? "unknown", + promptTokens, + totalTokens, + ms: Date.now() - startedAt, + }); + + return { + embedding: data.embedding as number[], + usage: { + promptTokens, + totalTokens, + }, + }; + } catch (err) { + console.error("[hackbot][embeddings] Failed to get embedding", err); + return null; + } +} + +export async function retrieveContext( + query: string, + opts?: { limit?: number; preferredTypes?: HackDocType[] } +): Promise { + const limit = opts?.limit ?? 25; + const trimmed = query.trim(); + + // Vector-only search over hackbot_docs in MongoDB. + try { + const embeddingResult = await getQueryEmbedding(trimmed); + if (!embeddingResult) { + console.error( + "[hackbot][retrieve] No embedding available for query; vector search required." + ); + throw new Error("Embedding unavailable"); + } + + const embedding = embeddingResult.embedding; + + const db = await getDatabase(); + const collection = db.collection("hackbot_docs"); + + const preferredTypes = opts?.preferredTypes?.length + ? Array.from(new Set(opts.preferredTypes)) + : null; + + const numCandidates = Math.min(200, Math.max(50, limit * 10)); + + const vectorResults = await collection + .aggregate([ + { + $vectorSearch: { + index: "hackbot_vector_index", + queryVector: embedding, + path: "embedding", + numCandidates, + limit, + ...(preferredTypes + ? { + filter: { + type: { $in: preferredTypes }, + }, + } + : {}), + }, + }, + ]) + .toArray(); + + if (!vectorResults.length) { + console.warn("[hackbot][retrieve] Vector search returned no results."); + return { docs: [] }; + } + + const docs: HackDoc[] = vectorResults.map((doc: any) => ({ + id: String(doc._id), + type: doc.type, + title: doc.title, + text: doc.text, + url: doc.url ?? undefined, + })); + + // Hydrate event docs from the live `events` collection so the answer + // always reflects the current schedule (times/locations), even if the + // vector index was seeded earlier. + const eventsCollection = db.collection("events"); + await Promise.all( + docs.map(async (d) => { + if (d.type !== "event") return; + + const suffix = d.id.startsWith("event-") + ? d.id.slice("event-".length) + : ""; + let event: any | null = null; + + if (suffix && ObjectId.isValid(suffix)) { + event = await eventsCollection.findOne({ _id: new ObjectId(suffix) }); + } + + if (!event && d.title) { + event = await eventsCollection.findOne({ name: d.title }); + } + + if (!event) return; + + const live = formatLiveEventDoc(event); + d.title = live.title; + d.text = live.text; + d.url = live.url; + + // Attach sortable timestamps for server-side ordering. + (d as any).startISO = live.startISO; + (d as any).endISO = live.endISO; + }) + ); + + console.log("[hackbot][retrieve][vector]", { + query: trimmed, + docIds: docs.map((d) => d.id), + titles: docs.map((d) => d.title), + }); + + return { docs, usage: embeddingResult.usage }; + } catch (err) { + console.error( + "[hackbot][retrieve] Vector search failed (no fallback).", + err + ); + throw err; + } +} diff --git a/app/(api)/_datalib/hackbot/hackbotTypes.ts b/app/(api)/_datalib/hackbot/hackbotTypes.ts new file mode 100644 index 00000000..779f4633 --- /dev/null +++ b/app/(api)/_datalib/hackbot/hackbotTypes.ts @@ -0,0 +1,9 @@ +export type HackDocType = 'event' | 'track' | 'judging' | 'submission'; + +export interface HackDoc { + id: string; + type: HackDocType; + title: string; + text: string; + url?: string; +} diff --git a/app/(api)/api/hackbot/route.ts b/app/(api)/api/hackbot/route.ts new file mode 100644 index 00000000..e5fceec1 --- /dev/null +++ b/app/(api)/api/hackbot/route.ts @@ -0,0 +1,42 @@ +import { NextRequest, NextResponse } from "next/server"; +import { askHackbot, HackbotMessage } from "@actions/hackbot/askHackbot"; + +export async function POST(request: NextRequest) { + try { + const body = await request.json(); + const messages = (body?.messages ?? []) as HackbotMessage[]; + + if (!Array.isArray(messages) || messages.length === 0) { + return NextResponse.json( + { + ok: false, + answer: "", + error: "Invalid request body. Expected { messages: [...] }.", + }, + { status: 400 } + ); + } + + const result = await askHackbot(messages); + + return NextResponse.json( + { + ok: result.ok, + answer: result.answer, + url: result.url, + usage: result.usage ?? null, + error: result.error ?? null, + }, + { status: result.ok ? 200 : 400 } + ); + } catch (e) { + return NextResponse.json( + { + ok: false, + answer: "", + error: "Invalid JSON body.", + }, + { status: 400 } + ); + } +} diff --git a/app/(pages)/(hackers)/(hub)/layout.tsx b/app/(pages)/(hackers)/(hub)/layout.tsx index 602aadd8..9e806735 100644 --- a/app/(pages)/(hackers)/(hub)/layout.tsx +++ b/app/(pages)/(hackers)/(hub)/layout.tsx @@ -1,5 +1,6 @@ import ProtectedDisplay from '@components/ProtectedDisplay/ProtectedDisplay'; import Navbar from '@components/Navbar/Navbar'; +import HackbotWidget from '@pages/(hackers)/_components/Hackbot/HackbotWidget'; export default function Layout({ children }: { children: React.ReactNode }) { return ( @@ -9,6 +10,7 @@ export default function Layout({ children }: { children: React.ReactNode }) { > {children} + ); } diff --git a/app/(pages)/(hackers)/_components/Hackbot/HackbotWidget.tsx b/app/(pages)/(hackers)/_components/Hackbot/HackbotWidget.tsx new file mode 100644 index 00000000..a05b701a --- /dev/null +++ b/app/(pages)/(hackers)/_components/Hackbot/HackbotWidget.tsx @@ -0,0 +1,185 @@ +'use client'; + +import { useState } from 'react'; +import { Button } from '@pages/_globals/components/ui/button'; + +export type HackbotChatMessage = { + role: 'user' | 'assistant'; + content: string; + url?: string; +}; + +const MAX_USER_MESSAGE_CHARS = 200; + +export default function HackbotWidget() { + const [open, setOpen] = useState(false); + const [messages, setMessages] = useState([]); + const [input, setInput] = useState(''); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + + const canSend = + !loading && + input.trim().length > 0 && + input.trim().length <= MAX_USER_MESSAGE_CHARS; + + const toggleOpen = () => { + setOpen((prev) => !prev); + setError(null); + }; + + const sendMessage = async () => { + if (!canSend) return; + + const userMessage: HackbotChatMessage = { + role: 'user', + content: input.trim(), + }; + + setMessages((prev) => [...prev, userMessage]); + setInput(''); + setError(null); + setLoading(true); + + try { + const response = await fetch('/api/hackbot', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + messages: [ + ...messages.map((m) => ({ role: m.role, content: m.content })), + { role: 'user', content: userMessage.content }, + ], + }), + }); + + const data = await response.json(); + + if (!data.ok) { + setError(data.error || 'Something went wrong.'); + } + + const assistantMessage: HackbotChatMessage = { + role: 'assistant', + content: data.answer || 'Sorry, I could not answer that.', + url: data.url || undefined, + }; + + setMessages((prev) => [...prev, assistantMessage]); + } catch (err) { + setError('Network error. Please try again.'); + } finally { + setLoading(false); + } + }; + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + await sendMessage(); + }; + + return ( +
+ {open && ( +
+
+
+

HackDavis Helper

+

+ Ask short questions about HackDavis events, schedule, tracks, + judging, or submissions. +

+
+ +
+ +
+ {messages.length === 0 && ( +

+ Try asking: "When does hacking end?" or "Where is the opening + ceremony?" +

+ )} + + {messages.map((m, idx) => ( +
+
+

{m.content}

+ {m.url && ( + + More info + + )} +
+
+ ))} +
+ +
+