From 583a5c2eb1879cf875272c57de65a925ed0977fd Mon Sep 17 00:00:00 2001 From: Anish Kachinthaya Date: Fri, 18 Oct 2024 10:46:45 -0700 Subject: [PATCH 1/3] add llm embeddings for semantic chunking --- lib/index.ts | 9 ++++++++- lib/llm/LLMClient.ts | 6 ++++++ lib/llm/OpenAIClient.ts | 22 ++++++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/lib/index.ts b/lib/index.ts index 77c183ae..39cc572e 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -9,7 +9,7 @@ const merge = require("deepmerge"); import path from "path"; import Browserbase from "./browserbase"; import { ScreenshotService } from "./vision"; -import { modelsWithVision } from "./llm/LLMClient"; +import { LLMClient, modelsWithVision } from "./llm/LLMClient"; require("dotenv").config({ path: ".env" }); @@ -235,6 +235,10 @@ export class Stagehand { }); } + getLLMClient(modelName: string): LLMClient { + return this.llmProvider.getClient(modelName); + } + async waitForSettledDom() { try { await this.page.waitForSelector("body"); @@ -276,11 +280,13 @@ export class Stagehand { console.log("Error in startDomDebug:", e); } } + async cleanupDomDebug() { if (this.debugDom) { await this.page.evaluate(() => window.cleanupDebug()); } } + getId(operation: string) { return crypto.createHash("sha256").update(operation).digest("hex"); } @@ -312,6 +318,7 @@ export class Stagehand { (chunksSeen?: number[]) => window.processDom(chunksSeen ?? []), chunksSeen, ); + this.log({ category: "extraction", message: `Received output from processDom. Chunk: ${chunk}, Chunks left: ${chunks.length - chunksSeen.length}`, diff --git a/lib/llm/LLMClient.ts b/lib/llm/LLMClient.ts index 5109772c..cce29875 100644 --- a/lib/llm/LLMClient.ts +++ b/lib/llm/LLMClient.ts @@ -40,8 +40,14 @@ export interface ExtractionOptions extends ChatCompletionOptions { }; // Schema for the structured output } +export interface EmbeddingOptions { + model: string; + input: string; +} + export interface LLMClient { createChatCompletion(options: ChatCompletionOptions): Promise; createExtraction(options: ExtractionOptions): Promise; + createEmbedding(options: EmbeddingOptions): Promise; logger: (message: { category?: string; message: string }) => void; } diff --git a/lib/llm/OpenAIClient.ts b/lib/llm/OpenAIClient.ts index 737c01eb..b5853c14 100644 --- a/lib/llm/OpenAIClient.ts +++ b/lib/llm/OpenAIClient.ts @@ -4,6 +4,7 @@ import { LLMClient, ChatCompletionOptions, ExtractionOptions, + EmbeddingOptions, } from "./LLMClient"; export class OpenAIClient implements LLMClient { @@ -85,4 +86,25 @@ export class OpenAIClient implements LLMClient { return response; } + + async createEmbedding(options: EmbeddingOptions) { + this.logger({ + category: "OpenAI", + message: "Creating embedding with options: " + JSON.stringify(options), + level: 2 + }); + + const response = await this.client.embeddings.create({ + model: options.model, + input: options.input, + }); + + this.logger({ + category: "OpenAI", + message: "Embedding response: " + JSON.stringify(response), + level: 2 + }); + + return response.data[0].embedding; + } } \ No newline at end of file From 7256fe9286bcf1ef24eb7e327cd45cd31930ca69 Mon Sep 17 00:00:00 2001 From: Anish Kachinthaya Date: Fri, 18 Oct 2024 15:24:16 -0700 Subject: [PATCH 2/3] started semantic chunking --- lib/dom/process.ts | 194 ++++++++++++++++++++++++++++++++++++++++----- lib/dom/types.ts | 13 ++- lib/index.ts | 148 +++++++++++++++++++++++++++++++++- package.json | 1 + 4 files changed, 334 insertions(+), 22 deletions(-) diff --git a/lib/dom/process.ts b/lib/dom/process.ts index 3ebcd8e1..40f0a3a4 100644 --- a/lib/dom/process.ts +++ b/lib/dom/process.ts @@ -1,5 +1,7 @@ -async function processDom(chunksSeen: Array) { - const { chunk, chunksArray } = await pickChunk(chunksSeen); +import { PageElementMap } from "./types"; + +async function processDom(chunksSeen: Array, chunkPriorities?: Array) { + const { chunk, chunksArray } = await pickChunk(chunksSeen, chunkPriorities); const { outputString, selectorMap } = await processElements(chunk); return { @@ -163,9 +165,158 @@ export async function processElements(chunk: number) { }; } +export async function getPageElementMap() { + const viewportHeight = window.innerHeight; + const documentHeight = document.documentElement.scrollHeight; + const chunks = Math.ceil(documentHeight / viewportHeight); + const chunksArray = Array.from({ length: chunks }, (_, i) => i); + + let outputMap: PageElementMap = {}; + + for (const chunk of chunksArray) { + const chunkHeight = viewportHeight * chunk; + + // Calculate the maximum scrollable offset + const maxScrollTop = + document.documentElement.scrollHeight - window.innerHeight; + + // Adjust the offsetTop to not exceed the maximum scrollable offset + const offsetTop = Math.min(chunkHeight, maxScrollTop); + + await scrollToHeight(offsetTop); + + const domString = window.document.body.outerHTML; + if (!domString) { + throw new Error("error selecting DOM that doesn't exist"); + } + + const candidateElements: Array = []; + const DOMQueue: Array = [...document.body.childNodes]; + while (DOMQueue.length > 0) { + const element = DOMQueue.pop(); + + let shouldAddElement = false; + + if (element && isElementNode(element)) { + const childrenCount = element.childNodes.length; + + // Always traverse child nodes + for (let i = childrenCount - 1; i >= 0; i--) { + const child = element.childNodes[i]; + DOMQueue.push(child as ChildNode); + } + + // Check if element is interactive + if (isInteractiveElement(element)) { + if ((await isActive(element)) && isVisible(element)) { + shouldAddElement = true; + } + } + + if (isLeafElement(element)) { + if ((await isActive(element)) && isVisible(element)) { + shouldAddElement = true; + } + } + } + + if (element && isTextNode(element) && isTextVisible(element)) { + shouldAddElement = true; + } + + if (shouldAddElement) { + candidateElements.push(element); + } + } + + candidateElements.forEach((element, index) => { + const xpath = generateXPath(element); + if (isTextNode(element)) { + outputMap[xpath] = { string: element.textContent, chunk: chunk, embedding: [] }; + } else if (isElementNode(element)) { + const tagName = element.tagName.toLowerCase(); + + // Collect essential attributes + const attributes: string[] = []; + if (element.id) { + attributes.push(`id="${element.id}"`); + } + if (element.className) { + attributes.push(`class="${element.className}"`); + } + if (element.getAttribute("href")) { + attributes.push(`href="${element.getAttribute("href")}"`); + } + if (element.getAttribute("src")) { + attributes.push(`src="${element.getAttribute("src")}"`); + } + if (element.getAttribute("aria-label")) { + attributes.push(`aria-label="${element.getAttribute("aria-label")}"`); + } + if (element.getAttribute("aria-name")) { + attributes.push(`aria-name="${element.getAttribute("aria-name")}"`); + } + if (element.getAttribute("aria-role")) { + attributes.push(`aria-role="${element.getAttribute("aria-role")}"`); + } + if (element.getAttribute("aria-description")) { + attributes.push( + `aria-description="${element.getAttribute("aria-description")}"`, + ); + } + if (element.getAttribute("aria-expanded")) { + attributes.push( + `aria-expanded="${element.getAttribute("aria-expanded")}"`, + ); + } + if (element.getAttribute("aria-haspopup")) { + attributes.push( + `aria-haspopup="${element.getAttribute("aria-haspopup")}"`, + ); + } + + for (const attr of element.attributes) { + if (attr.name.startsWith("data-")) { + attributes.push(`${attr.name}="${attr.value}"`); + } + } + + // Build the simplified element string + const openingTag = `<${tagName}${ + attributes.length > 0 ? " " + attributes.join(" ") : "" + }>`; + const closingTag = ``; + const textContent = element.textContent.trim(); + + outputMap[xpath] = { string: `${openingTag}${textContent}${closingTag}`, chunk: chunk, embedding: [] }; + } + }); + } + + await scrollToHeight(0); + + return outputMap; +} + +export async function getPageChunkMap() { + const viewportHeight = window.innerHeight; + const documentHeight = document.documentElement.scrollHeight; + const chunks = Math.ceil(documentHeight / viewportHeight); + const chunksArray = Array.from({ length: chunks }, (_, i) => i); + + let outputMap: PageElementMap = {}; + for (const chunk of chunksArray) { + const { outputString, selectorMap } = await processElements(chunk); + outputMap[chunk] = { string: outputString, chunk: chunk, embedding: [] }; + } + return outputMap; +} + +window.getPageElementMap = getPageElementMap; window.processDom = processDom; window.processElements = processElements; window.scrollToHeight = scrollToHeight; +window.getPageChunkMap = getPageChunkMap; function generateXPath(element: ChildNode): string { if (isElementNode(element) && element.id) { @@ -385,7 +536,7 @@ const isLeafElement = (element: Element) => { return false; }; -async function pickChunk(chunksSeen: Array) { +async function pickChunk(chunksSeen: Array, chunkPriorities?: Array) { const viewportHeight = window.innerHeight; const documentHeight = document.documentElement.scrollHeight; @@ -396,22 +547,27 @@ async function pickChunk(chunksSeen: Array) { return !chunksSeen.includes(chunk); }); - const currentScrollPosition = window.scrollY; - const closestChunk = chunksRemaining.reduce((closest, current) => { - const currentChunkTop = viewportHeight * current; - const closestChunkTop = viewportHeight * closest; - return Math.abs(currentScrollPosition - currentChunkTop) < - Math.abs(currentScrollPosition - closestChunkTop) - ? current - : closest; - }, chunksRemaining[0]); - const chunk = closestChunk; - - if (chunk === undefined) { + if (chunksRemaining.length === 0) { throw new Error(`no chunks remaining to check ${chunksRemaining}, `); + } else if (chunkPriorities.length > 0) { + const sortedChunks = chunksRemaining.sort((a, b) => chunkPriorities.indexOf(a) - chunkPriorities.indexOf(b)); + return { + chunk: sortedChunks[0], + chunksArray, + }; + } else { + const currentScrollPosition = window.scrollY; + const closestChunk = chunksRemaining.reduce((closest, current) => { + const currentChunkTop = viewportHeight * current; + const closestChunkTop = viewportHeight * closest; + return Math.abs(currentScrollPosition - currentChunkTop) < + Math.abs(currentScrollPosition - closestChunkTop) + ? current + : closest; + }, chunksRemaining[0]); + return { + chunk: closestChunk, + chunksArray, + }; } - return { - chunk, - chunksArray, - }; } diff --git a/lib/dom/types.ts b/lib/dom/types.ts index 4858cbf2..a064c894 100644 --- a/lib/dom/types.ts +++ b/lib/dom/types.ts @@ -1,8 +1,17 @@ export {}; + +export interface PageElementMap { + [key: string]: { + string: string; + chunk: number; + embedding: number[]; + }; +} + declare global { interface Window { chunkNumber: number; - processDom: (chunksSeen: Array) => Promise<{ + processDom: (chunksSeen: Array, chunkPriorities?: Array) => Promise<{ outputString: string; selectorMap: Record; chunk: number; @@ -15,5 +24,7 @@ declare global { debugDom: () => Promise; cleanupDebug: () => void; scrollToHeight: (height: number) => Promise; + getPageElementMap: () => Promise; + getPageChunkMap: () => Promise; } } diff --git a/lib/index.ts b/lib/index.ts index 39cc572e..6e433842 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -10,6 +10,8 @@ import path from "path"; import Browserbase from "./browserbase"; import { ScreenshotService } from "./vision"; import { LLMClient, modelsWithVision } from "./llm/LLMClient"; +import { dot, norm } from 'mathjs'; +import { PageElementMap } from "./dom/types"; require("dotenv").config({ path: ".env" }); @@ -155,6 +157,7 @@ export class Stagehand { public defaultModelName: string; public headless: boolean; private logger: (message: { category?: string; message: string }) => void; + private pageElementMap: PageElementMap; constructor( { @@ -235,6 +238,13 @@ export class Stagehand { }); } + async goto(url: string) { + await this.page.goto(url); + await this.waitForSettledDom(); + this.pageElementMap = {}; + this.computePageElementMap(); + } + getLLMClient(modelName: string): LLMClient { return this.llmProvider.getClient(modelName); } @@ -291,6 +301,123 @@ export class Stagehand { return crypto.createHash("sha256").update(operation).digest("hex"); } + // Add this utility function to calculate cosine similarity + private cosineSimilarity(embedding1: number[], embedding2: number[]): number { + return dot(embedding1, embedding2) / (norm(embedding1) * norm(embedding2)); + } + + // Add this method to find the most similar embedding + async findMostSimilarEmbedding(queryEmbedding: number[], allOutputMap: PageElementMap) { + const llmClient = this.llmProvider.getClient(this.defaultModelName); + + let mostSimilarKey = ''; + let highestSimilarity = -Infinity; + + for (const key in allOutputMap) { + let embedding: number[]; + if (!allOutputMap[key].embedding) { + embedding = await llmClient.createEmbedding({ + model: "text-embedding-3-small", + input: allOutputMap[key].string, + }).then(res => res.data[0].embedding); + allOutputMap[key].embedding = embedding; + } else { + embedding = allOutputMap[key].embedding; + } + + const similarity = this.cosineSimilarity(queryEmbedding, embedding); + if (similarity > highestSimilarity) { + highestSimilarity = similarity; + mostSimilarKey = key; + } + } + + return { key: mostSimilarKey, similarity: highestSimilarity }; + } + + async findSimilarChunks(queryEmbedding: number[], allOutputMap: PageElementMap) { + const llmClient = this.llmProvider.getClient(this.defaultModelName); + + let similarities: { key: string; similarity: number; chunk: number }[] = []; + + for (const key in allOutputMap) { + let embedding: number[]; + if (!allOutputMap[key].embedding) { + embedding = await llmClient.createEmbedding({ + model: "text-embedding-3-small", + input: allOutputMap[key].string, + }).then(res => res.data[0].embedding); + allOutputMap[key].embedding = embedding; + } else { + embedding = allOutputMap[key].embedding; + } + + const similarity = this.cosineSimilarity(queryEmbedding, embedding); + similarities.push({ + key, + similarity, + chunk: allOutputMap[key].chunk + }); + } + + // Sort similarities in descending order + similarities.sort((a, b) => b.similarity - a.similarity); + + // Group by chunk and keep the highest similarity for each chunk + const chunkMap = new Map(); + for (const item of similarities) { + if (!chunkMap.has(item.chunk) || item.similarity > chunkMap.get(item.chunk)!.similarity) { + chunkMap.set(item.chunk, { similarity: item.similarity, elements: [item.key] }); + } else if (item.similarity === chunkMap.get(item.chunk)!.similarity) { + chunkMap.get(item.chunk)!.elements.push(item.key); + } + } + + // Convert map to array and sort by similarity + const orderedChunks = Array.from(chunkMap.entries()) + .sort((a, b) => b[1].similarity - a[1].similarity) + .map(([chunk, { similarity }]) => ({ chunk, highestSimilarityElement: similarity })); + + return orderedChunks; + } + + async computePageElementMap() { + const llmClient = this.llmProvider.getClient(this.defaultModelName); + const allOutputMap = await this.page.evaluate(async () => { + return window.getPageElementMap(); + }); + const totalKeys = Object.keys(allOutputMap).length; + let processedKeys = 0; + + const embeddingPromises = Object.entries(allOutputMap).map(async ([key, value]) => { + const embedding = await llmClient.createEmbedding({ + model: "text-embedding-3-small", + input: value.string, + }); + return { key, embedding }; + }); + + const embeddings = await Promise.all(embeddingPromises); + + embeddings.forEach(({ key, embedding }) => { + this.pageElementMap[key] = { + string: allOutputMap[key].string, + chunk: allOutputMap[key].chunk, + embedding: embedding, + }; + + processedKeys++; + const progress = (processedKeys / totalKeys) * 100; + // console.log(`Progress: ${progress.toFixed(2)}%`); + }); + this.log({ + category: "dom", + message: `Computed page element map with ${processedKeys} keys`, + level: 1, + }); + } + + async extract({ instruction, schema, @@ -312,12 +439,29 @@ export class Stagehand { level: 1, }); + let chunkPriorities: Array = []; + + if (this.pageElementMap) { + const llmClient = this.getLLMClient(modelName || this.defaultModelName); + const targetEmbedding = await llmClient.createEmbedding({ + model: "text-embedding-3-small", + input: instruction, + }); + + const sortedChunks = await this.findSimilarChunks(targetEmbedding, this.pageElementMap); + chunkPriorities = sortedChunks.map(chunk => chunk.chunk); + // console.log("chunkPriorities", chunkPriorities); + } + await this.waitForSettledDom(); await this.startDomDebug(); const { outputString, chunk, chunks } = await this.page.evaluate( - (chunksSeen?: number[]) => window.processDom(chunksSeen ?? []), - chunksSeen, + (args: { chunksSeen: number[], chunkPriorities?: number[] }) => window.processDom(args.chunksSeen, args.chunkPriorities), + { chunksSeen, chunkPriorities } ); + // console.log("chosen chunk", chunk); + // console.log("chunksSeen", chunksSeen); + // console.log("all chunks", chunks); this.log({ category: "extraction", diff --git a/package.json b/package.json index 6191fca5..d520c01b 100644 --- a/package.json +++ b/package.json @@ -52,6 +52,7 @@ "@anthropic-ai/sdk": "^0.27.3", "anthropic": "^0.0.0", "anthropic-ai": "^0.0.10", + "mathjs": "^13.2.0", "sharp": "^0.33.5", "zod-to-json-schema": "^3.23.3" } From 77c0f3c631d851ba681242314543932d5e2c45c8 Mon Sep 17 00:00:00 2001 From: Anish Kachinthaya Date: Tue, 22 Oct 2024 16:46:54 -0700 Subject: [PATCH 3/3] use in act instead of extract --- lib/dom/process.ts | 2 +- lib/index.ts | 119 +++++++++++++++++++++++++++++++++------------ 2 files changed, 90 insertions(+), 31 deletions(-) diff --git a/lib/dom/process.ts b/lib/dom/process.ts index 40f0a3a4..9c85bc6e 100644 --- a/lib/dom/process.ts +++ b/lib/dom/process.ts @@ -549,7 +549,7 @@ async function pickChunk(chunksSeen: Array, chunkPriorities?: Array 0) { + } else if (chunkPriorities && chunkPriorities.length > 0) { const sortedChunks = chunksRemaining.sort((a, b) => chunkPriorities.indexOf(a) - chunkPriorities.indexOf(b)); return { chunk: sortedChunks[0], diff --git a/lib/index.ts b/lib/index.ts index 6e433842..582dc26a 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -155,6 +155,7 @@ export class Stagehand { public verbose: 0 | 1 | 2; public debugDom: boolean; public defaultModelName: string; + public defaultEmbeddingModel: string; public headless: boolean; private logger: (message: { category?: string; message: string }) => void; private pageElementMap: PageElementMap; @@ -212,11 +213,14 @@ export class Stagehand { await download.delete(); } - async init({ modelName = "gpt-4o" }: { modelName?: string } = {}) { + async init({ modelName = "gpt-4o", embeddingModel = "text-embedding-3-small" }: { modelName?: string, embeddingModel?: string } = {}) { const { context } = await getBrowser(this.env, this.headless); this.context = context; this.page = context.pages()[0]; this.defaultModelName = modelName; + this.defaultEmbeddingModel = embeddingModel; + + await this.setupParseSemanticDomOnLoad(); // Set the browser to headless mode if specified if (this.headless) { @@ -238,11 +242,12 @@ export class Stagehand { }); } - async goto(url: string) { - await this.page.goto(url); - await this.waitForSettledDom(); - this.pageElementMap = {}; - this.computePageElementMap(); + private async setupParseSemanticDomOnLoad() { + this.page.on('load', async () => { + console.log('Page loaded. Parsing semantic DOM...'); + this.pageElementMap = {}; + this.computePageElementMap(); + }); } getLLMClient(modelName: string): LLMClient { @@ -344,7 +349,7 @@ export class Stagehand { let embedding: number[]; if (!allOutputMap[key].embedding) { embedding = await llmClient.createEmbedding({ - model: "text-embedding-3-small", + model: this.defaultEmbeddingModel, input: allOutputMap[key].string, }).then(res => res.data[0].embedding); allOutputMap[key].embedding = embedding; @@ -391,12 +396,18 @@ export class Stagehand { const embeddingPromises = Object.entries(allOutputMap).map(async ([key, value]) => { const embedding = await llmClient.createEmbedding({ - model: "text-embedding-3-small", + model: this.defaultEmbeddingModel, input: value.string, }); return { key, embedding }; }); + this.log({ + category: "dom", + message: `Computing page element map with ${totalKeys} keys`, + level: 1, + }); + const embeddings = await Promise.all(embeddingPromises); embeddings.forEach(({ key, embedding }) => { @@ -405,9 +416,8 @@ export class Stagehand { chunk: allOutputMap[key].chunk, embedding: embedding, }; - processedKeys++; - const progress = (processedKeys / totalKeys) * 100; + // const progress = (processedKeys / totalKeys) * 100; // console.log(`Progress: ${progress.toFixed(2)}%`); }); this.log({ @@ -417,6 +427,58 @@ export class Stagehand { }); } + async generateTargetElement(instruction: string): Promise { + const llmClient = this.getLLMClient(this.defaultModelName); + const schema = z.object({ + targetElementDescription: z.string().describe("A concise phrase describing the target HTML element to look for based on the given action") + }); + + const result = await llmClient.createExtraction({ + instruction: `Based on the following action, generate a concise phrase describing the target HTML element to look for: + + Action: ${instruction}`, + schema, + response_model: { + name: "targetElementDescription", + schema: schema + }, + model: this.defaultModelName, + messages: [ + { + role: "user", + content: `Based on the following action, generate a concise phrase describing the target HTML element to look for: + + Action: ${instruction}` + } + ] + }); + + return result.targetElementDescription; + } + + async computeChunkPriorities(targetElement: string, modelName?: string) { + let chunkPriorities: Array = []; + + this.log({ + category: "dom", + message: `Target element: ${targetElement}`, + level: 1, + }); + + if (this.pageElementMap) { + const llmClient = this.getLLMClient(modelName || this.defaultModelName); + const targetEmbedding = await llmClient.createEmbedding({ + model: this.defaultEmbeddingModel, + input: targetElement, + }); + + const sortedChunks = await this.findSimilarChunks(targetEmbedding, this.pageElementMap); + chunkPriorities = sortedChunks.map(chunk => chunk.chunk); + console.log("chunkPriorities", chunkPriorities); + } + + return chunkPriorities; + } async extract({ instruction, @@ -439,19 +501,8 @@ export class Stagehand { level: 1, }); - let chunkPriorities: Array = []; - - if (this.pageElementMap) { - const llmClient = this.getLLMClient(modelName || this.defaultModelName); - const targetEmbedding = await llmClient.createEmbedding({ - model: "text-embedding-3-small", - input: instruction, - }); - - const sortedChunks = await this.findSimilarChunks(targetEmbedding, this.pageElementMap); - chunkPriorities = sortedChunks.map(chunk => chunk.chunk); - // console.log("chunkPriorities", chunkPriorities); - } + // const chunkPriorities = await this.computeChunkPriorities(instruction, modelName); + const chunkPriorities: Array = []; await this.waitForSettledDom(); await this.startDomDebug(); @@ -459,9 +510,9 @@ export class Stagehand { (args: { chunksSeen: number[], chunkPriorities?: number[] }) => window.processDom(args.chunksSeen, args.chunkPriorities), { chunksSeen, chunkPriorities } ); - // console.log("chosen chunk", chunk); - // console.log("chunksSeen", chunksSeen); - // console.log("all chunks", chunks); + console.log("chosen chunk", chunk); + console.log("chunksSeen", chunksSeen); + console.log("all chunks", chunks); this.log({ category: "extraction", @@ -579,6 +630,7 @@ export class Stagehand { return observationId; } + async ask(question: string, modelName?: string): Promise { return ask({ question, @@ -640,11 +692,18 @@ export class Stagehand { await this.startDomDebug(); - const { outputString, selectorMap, chunk, chunks } = - await this.page.evaluate((chunksSeen) => { - return window.processDom(chunksSeen); - }, chunksSeen); + const targetElement = await this.generateTargetElement(action); + const chunkPriorities = await this.computeChunkPriorities(targetElement, modelName); + const { outputString, selectorMap, chunk, chunks } = + await this.page.evaluate((args: { chunksSeen: number[], chunkPriorities?: number[] }) => { + return window.processDom(args.chunksSeen, args.chunkPriorities); + }, { chunksSeen, chunkPriorities }); + + console.log("chosen chunk", chunk); + console.log("chunksSeen", chunksSeen); + console.log("all chunks", chunks); + // New code to add bounding boxes and element numbers let annotatedScreenshot: Buffer | undefined = undefined; if (useVision === true) {