opencode/packages/opencode/src/tool/webfetch.ts at dev · stackblitz/opencode

This repository was archived by the owner on Feb 19, 2026. It is now read-only.

History

206 lines (182 loc) · 5.99 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

import z from "zod"

import { Tool } from "./tool"

import TurndownService from "turndown"

import DESCRIPTION from "./webfetch.txt"

import { abortAfterAny } from "../util/abort"

const MAX_RESPONSE_SIZE = 5 * 1024 * 1024 // 5MB

const DEFAULT_TIMEOUT = 30 * 1000 // 30 seconds

const MAX_TIMEOUT = 120 * 1000 // 2 minutes

export const WebFetchTool = Tool.define("webfetch", {

description: DESCRIPTION,

parameters: z.object({

url: z.string().describe("The URL to fetch content from"),

format: z

.enum(["text", "markdown", "html"])

.default("markdown")

.describe("The format to return the content in (text, markdown, or html). Defaults to markdown."),

timeout: z.number().describe("Optional timeout in seconds (max 120)").optional(),

}),

async execute(params, ctx) {

// Validate URL

if (!params.url.startsWith("http://") && !params.url.startsWith("https://")) {

throw new Error("URL must start with http:// or https://")

}

await ctx.ask({

permission: "webfetch",

patterns: [params.url],

always: ["*"],

metadata: {

url: params.url,

format: params.format,

timeout: params.timeout,

})

const timeout = Math.min((params.timeout ?? DEFAULT_TIMEOUT / 1000) * 1000, MAX_TIMEOUT)

const { signal, clearTimeout } = abortAfterAny(timeout, ctx.abort)

// Build Accept header based on requested format with q parameters for fallbacks

let acceptHeader = "*/*"

switch (params.format) {

case "markdown":

acceptHeader = "text/markdown;q=1.0, text/x-markdown;q=0.9, text/plain;q=0.8, text/html;q=0.7, */*;q=0.1"

break

case "text":

acceptHeader = "text/plain;q=1.0, text/markdown;q=0.9, text/html;q=0.8, */*;q=0.1"

break

case "html":

acceptHeader = "text/html;q=1.0, application/xhtml+xml;q=0.9, text/plain;q=0.8, text/markdown;q=0.7, */*;q=0.1"

break

default:

acceptHeader =

"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"

}

const headers = {

"User-Agent":

"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",

Accept: acceptHeader,

"Accept-Language": "en-US,en;q=0.9",

}

const initial = await fetch(params.url, { signal, headers })

// Retry with honest UA if blocked by Cloudflare bot detection (TLS fingerprint mismatch)

const response =

initial.status === 403 && initial.headers.get("cf-mitigated") === "challenge"

? await fetch(params.url, { signal, headers: { ...headers, "User-Agent": "opencode" } })

: initial

clearTimeout()

if (!response.ok) {

throw new Error(`Request failed with status code: ${response.status}`)

}

// Check content length

const contentLength = response.headers.get("content-length")

if (contentLength && parseInt(contentLength) > MAX_RESPONSE_SIZE) {

throw new Error("Response too large (exceeds 5MB limit)")

}

const arrayBuffer = await response.arrayBuffer()

if (arrayBuffer.byteLength > MAX_RESPONSE_SIZE) {

throw new Error("Response too large (exceeds 5MB limit)")

}

const contentType = response.headers.get("content-type") || ""

const mime = contentType.split(";")[0]?.trim().toLowerCase() || ""

const title = `${params.url} (${contentType})`

// Check if response is an image

const isImage = mime.startsWith("image/") && mime !== "image/svg+xml" && mime !== "image/vnd.fastbidsheet"

if (isImage) {

const base64Content = Buffer.from(arrayBuffer).toString("base64")

return {

title,

output: "Image fetched successfully",

metadata: {},

attachments: [

{

type: "file",

mime,

url: `data:${mime};base64,${base64Content}`,

}

const content = new TextDecoder().decode(arrayBuffer)

// Handle content based on requested format and actual content type

switch (params.format) {

case "markdown":

if (contentType.includes("text/html")) {

const markdown = convertHTMLToMarkdown(content)

return {

output: markdown,

title,

metadata: {},

}

return {

output: content,

title,

metadata: {},

}

case "text":

if (contentType.includes("text/html")) {

const text = await extractTextFromHTML(content)

return {

output: text,

title,

metadata: {},

}

return {

output: content,

title,

metadata: {},

}

case "html":

return {

output: content,

title,

metadata: {},

}

default:

return {

output: content,

title,

metadata: {},

}

})

async function extractTextFromHTML(html: string) {

let text = ""

let skipContent = false

const rewriter = new HTMLRewriter()

.on("script, style, noscript, iframe, object, embed", {

element() {

skipContent = true

text() {

// Skip text content inside these elements

})

.on("*", {

element(element) {

// Reset skip flag when entering other elements

if (!["script", "style", "noscript", "iframe", "object", "embed"].includes(element.tagName)) {

skipContent = false

}

text(input) {

if (!skipContent) {

text += input.text

}

})

.transform(new Response(html))

await rewriter.text()

return text.trim()

}

function convertHTMLToMarkdown(html: string): string {

const turndownService = new TurndownService({

headingStyle: "atx",

hr: "---",

bulletListMarker: "-",

codeBlockStyle: "fenced",

emDelimiter: "*",

})

turndownService.remove(["script", "style", "meta", "link"])

return turndownService.turndown(html)

}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

webfetch.ts

Latest commit

History

webfetch.ts

File metadata and controls