22
33With Playwright/Selenium, you'd need custom selectors for every
44site, and they'd break whenever the site redesigns. With Lightcone, Northstar
5- just * reads the screen* like a human would. One script works on any site .
5+ just reads the screen like a human would..
66
77Demonstrates the two-phase pattern:
88 Phase 1 (explore): CUA loop WITH tools — Northstar scrolls, dismisses popups, etc.
2626 "environment" : "browser" ,
2727}
2828
29- # These are totally different sites with different layouts — doesn't matter.
29+ # Sites with different layouts
3030COMPANIES = [
3131 {"name" : "Stripe" , "url" : "https://stripe.com/pricing" },
3232 {"name" : "Square" , "url" : "https://squareup.com/us/en/pricing" },
4545Reply with ONLY the JSON, no other text."""
4646
4747
48+ def _px (coord , dim ):
49+ """Convert a 0–1000 model coordinate to a pixel coordinate."""
50+ return int (coord / 1000 * dim )
51+
52+
4853def execute_action (computer , action ):
4954 """Execute a single model action on the computer session."""
55+ w , h = TOOL ["display_width" ], TOOL ["display_height" ]
5056 t = action .type
5157 if t == "click" :
52- computer .click (action .x , action .y )
58+ computer .click (_px ( action .x , w ), _px ( action .y , h ) )
5359 elif t == "double_click" :
54- computer .double_click (action .x , action .y )
60+ computer .double_click (_px ( action .x , w ), _px ( action .y , h ) )
5561 elif t == "type" :
5662 computer .type (action .text )
5763 elif t in ("key" , "keypress" ):
@@ -60,8 +66,8 @@ def execute_action(computer, action):
6066 computer .scroll (
6167 dx = action .scroll_x or 0 ,
6268 dy = action .scroll_y or 0 ,
63- x = action .x or 0 ,
64- y = action .y or 0 ,
69+ x = _px ( action .x or 0 , w ) ,
70+ y = _px ( action .y or 0 , h ) ,
6571 )
6672 elif t == "navigate" :
6773 computer .navigate (action .url )
@@ -96,16 +102,18 @@ def research_page(computer, url, prompt, max_explore_steps=10):
96102 response = client .responses .create (
97103 model = "tzafon.northstar-cua-fast" ,
98104 tools = [TOOL ],
99- input = [{
100- "role" : "user" ,
101- "content" : [
102- {
103- "type" : "input_text" ,
104- "text" : "Scroll down slowly. Dismiss any popups or cookie banners. Stop when you can see transaction fees or per-payment pricing." ,
105- },
106- {"type" : "input_image" , "image_url" : screenshot_url , "detail" : "auto" },
107- ],
108- }],
105+ input = [
106+ {
107+ "role" : "user" ,
108+ "content" : [
109+ {
110+ "type" : "input_text" ,
111+ "text" : "Scroll down slowly. Dismiss any popups or cookie banners. Stop when you can see transaction fees or per-payment pricing." ,
112+ },
113+ {"type" : "input_image" , "image_url" : screenshot_url , "detail" : "auto" },
114+ ],
115+ }
116+ ],
109117 )
110118
111119 for step in range (max_explore_steps ):
@@ -136,25 +144,33 @@ def research_page(computer, url, prompt, max_explore_steps=10):
136144 model = "tzafon.northstar-cua-fast" ,
137145 previous_response_id = response .id ,
138146 tools = [TOOL ],
139- input = [{
140- "type" : "computer_call_output" ,
141- "call_id" : computer_call .call_id ,
142- "output" : {"type" : "input_image" , "image_url" : screenshot_url , "detail" : "auto" },
143- }],
147+ input = [
148+ {
149+ "type" : "computer_call_output" ,
150+ "call_id" : computer_call .call_id ,
151+ "output" : {
152+ "type" : "input_image" ,
153+ "image_url" : screenshot_url ,
154+ "detail" : "auto" ,
155+ },
156+ }
157+ ],
144158 )
145159
146160 # --- Phase 2: Extract structured data (no tools = text response) ---
147161 print (" [extract]" )
148162 screenshot_url = computer .get_screenshot_url (computer .screenshot ())
149163 extraction = client .responses .create (
150164 model = "tzafon.northstar-cua-fast" ,
151- input = [{
152- "role" : "user" ,
153- "content" : [
154- {"type" : "input_text" , "text" : prompt },
155- {"type" : "input_image" , "image_url" : screenshot_url , "detail" : "auto" },
156- ],
157- }],
165+ input = [
166+ {
167+ "role" : "user" ,
168+ "content" : [
169+ {"type" : "input_text" , "text" : prompt },
170+ {"type" : "input_image" , "image_url" : screenshot_url , "detail" : "auto" },
171+ ],
172+ }
173+ ],
158174 # No tools — forces a text response instead of actions.
159175 )
160176
@@ -174,17 +190,24 @@ def main():
174190 print (f"\n --- Researching { company ['name' ]} ---" )
175191 print (f" URL: { company ['url' ]} " )
176192
177- raw = research_page (computer , company ["url" ], EXTRACT_PROMPT )
193+ raw = research_page (
194+ computer ,
195+ company ["url" ],
196+ EXTRACT_PROMPT ,
197+ max_explore_steps = int (os .getenv ("LIGHTCONE_MAX_EXPLORE_STEPS" , "10" )),
198+ )
178199
179200 if raw :
180201 # Try to parse as JSON; if the model wrapped it in markdown, strip that.
181202 cleaned = raw .strip ().removeprefix ("```json" ).removesuffix ("```" ).strip ()
182203 try :
183204 data = json .loads (cleaned )
184205 results .append (data )
185- print (f" -> { data .get ('main_product' , '?' )} : "
186- f"{ data .get ('transaction_fee_percent' , '?' )} + "
187- f"{ data .get ('per_transaction_fee' , '?' )} /txn" )
206+ print (
207+ f" -> { data .get ('main_product' , '?' )} : "
208+ f"{ data .get ('transaction_fee_percent' , '?' )} + "
209+ f"{ data .get ('per_transaction_fee' , '?' )} /txn"
210+ )
188211 except json .JSONDecodeError :
189212 print (f" -> Raw response: { raw [:200 ]} " )
190213 results .append ({"company" : company ["name" ], "raw" : raw })
0 commit comments