-
Notifications
You must be signed in to change notification settings - Fork 4
Arxiv connection #18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Arxiv connection #18
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,83 @@ | ||||||
| import type { MantisConnection, injectUIType, onMessageType, registerListenersType, setProgressType, establishLogSocketType } from "../types"; | ||||||
| import { GenerationProgress } from "../types"; | ||||||
| import ArxivIcon from "data-base64:../../../assets/arxiv.png"; | ||||||
| import { getSpacePortal, registerAuthCookies, reqSpaceCreation } from "../../driver"; | ||||||
|
|
||||||
| const trigger = (url: string) => { | ||||||
| return url.includes("arxiv.org"); | ||||||
| } | ||||||
|
|
||||||
| const createSpace = async (injectUI: injectUIType, setProgress: setProgressType, onMessage: onMessageType, registerListeners: registerListenersType, establishLogSocket: establishLogSocketType) => { | ||||||
| setProgress(GenerationProgress.GATHERING_DATA); | ||||||
|
|
||||||
| const url = new URL(window.location.href); | ||||||
| const query = url.searchParams.get("query") || ""; | ||||||
| const spaceTitle = `Arxiv results for ${query}`; | ||||||
|
|
||||||
| const extractedData = []; | ||||||
| let idx = 1; | ||||||
| for ( let start = 0 ; start < 200; start += 20){ | ||||||
| const apiUrl = `https://export.arxiv.org/api/query?search_query=all:${encodeURIComponent(query)}&start=${start}&max_results=50`; | ||||||
| const apiResponse = await fetch(apiUrl); | ||||||
| if (!apiResponse.ok) { | ||||||
| throw new Error(`Failed to fetch search results: ${await apiResponse.text()}`); | ||||||
| } | ||||||
| const data = await apiResponse.text(); | ||||||
| const parser = new DOMParser(); | ||||||
| const xml = parser.parseFromString(data, "application/xml"); | ||||||
| const entries = Array.from(xml.getElementsByTagName("entry")); | ||||||
|
|
||||||
| if (entries.length === 0) break; | ||||||
|
|
||||||
| for ( const entry of entries) { | ||||||
| const title = entry.querySelector("title")?.textContent || "No title"; | ||||||
| const link = entry.querySelector("id")?.textContent || "No link"; | ||||||
| const summary = entry.querySelector("summary")?.textContent || "No summary"; | ||||||
|
|
||||||
| extractedData.push({ | ||||||
| idx: idx++, | ||||||
| title: title, | ||||||
| link: link, | ||||||
| snippet: summary | ||||||
| }); | ||||||
| } | ||||||
| await new Promise(r => setTimeout(r, 5000)); | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A 5-second delay between API requests is quite long and will negatively impact the user experience by slowing down the data gathering process. The Arxiv API usage guidelines recommend making no more than one request every three seconds. You can safely reduce this delay to 3000ms to improve performance while still adhering to the rate-limiting guidelines.
Suggested change
|
||||||
| } | ||||||
|
|
||||||
| setProgress(GenerationProgress.CREATING_SPACE); | ||||||
|
|
||||||
| const spaceData = await reqSpaceCreation(extractedData, { | ||||||
| "idx": "numeric", | ||||||
| "title": "title", | ||||||
| "link": "links", | ||||||
| "snippet": "semantic" | ||||||
| }, establishLogSocket, spaceTitle); | ||||||
|
|
||||||
| setProgress(GenerationProgress.INJECTING_UI); | ||||||
|
|
||||||
| const spaceId = spaceData.space_id; | ||||||
| const createdWidget = await injectUI(spaceId, onMessage, registerListeners); | ||||||
|
|
||||||
| setProgress(GenerationProgress.COMPLETED); | ||||||
|
|
||||||
| return { spaceId, createdWidget } | ||||||
|
|
||||||
| } | ||||||
|
|
||||||
| const injectUI = async (space_id: string, onMessage: onMessageType, registerListeners: registerListenersType) => { | ||||||
| await registerAuthCookies(); | ||||||
|
|
||||||
| const iframeScalerParent = await getSpacePortal(space_id, onMessage, registerListeners); | ||||||
| document.querySelector(".search-title")?.prepend(iframeScalerParent); | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using a class selector ( |
||||||
|
|
||||||
| return iframeScalerParent; | ||||||
| } | ||||||
|
|
||||||
| export const ArxivConnection: MantisConnection ={ | ||||||
| name:'Arxiv', | ||||||
| description:'creates spaces based on the searches within the Arxiv database', | ||||||
| icon: ArxivIcon, | ||||||
| trigger: trigger, | ||||||
| createSpace: createSpace, | ||||||
| injectUI:injectUI, | ||||||
| } | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The pagination logic in this loop is incorrect. You are fetching 50 results per API call (
max_results=50), but the loop'sstartoffset is only incremented by 20. This will cause overlapping results to be fetched in subsequent API calls, which is inefficient and will not produce the expected set of unique results.The loop increment should match the
max_resultsvalue to fetch distinct pages of results. Additionally, it's a good practice to define magic numbers like200and50as named constants for better code readability and maintainability.