Skip to content

Commit 4764bce

Browse files
committed
fix: update examples
1 parent 3dc57e9 commit 4764bce

18 files changed

Lines changed: 661 additions & 462 deletions

README.md

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
<img src="northstar.svg" alt="Northstar" width="160">
88

9-
**Northstar CUA Fast**4B parameters, trained with GUI reinforcement learning
9+
**Northstar CUA Fast** — trained with GUI reinforcement learning
1010

1111
[Docs](https://docs.lightcone.ai) | [API Reference](https://docs.lightcone.ai/api) | [Model](https://huggingface.co/Tzafon/Northstar-CUA-Fast) | [Pricing](https://docs.tzafon.ai/pricing) | [X (Twitter)](https://x.com/tzafon_company)
1212

@@ -29,13 +29,12 @@ It recovers from mistakes, generalizes across desktop environments, and outperfo
2929

3030
| | |
3131
|---|---|
32-
| **Parameters** | 4B |
3332
| **Context** | 64K tokens |
3433
| **Training** | GUI reinforcement learning |
3534
| **Input** | Text + screenshot |
3635
| **Output** | GUI actions — `click`, `type`, `scroll`, `key`, `drag`, ... |
37-
| **Coordinates** | 0–999 normalized (model) · pixel-scaled (Responses API) |
38-
| **Pricing** | < $1/M tokens ([details](https://docs.tzafon.ai/pricing)) |
36+
| **Coordinates** | 0–999 normalized — denormalize to pixels in your code |
37+
| **Pricing** | $1/M input · $5/M output ([details](https://docs.tzafon.ai/pricing)) |
3938

4039
---
4140

@@ -210,7 +209,7 @@ Via the **Responses API** (`/v1/responses`), coordinates are scaled to viewport
210209

211210
Evaluated on [OSWorld](https://os-world.github.io/) — 369 real-world desktop tasks.
212211

213-
| Domain | UI-TARS 2 | Qwen3 Flash | **Northstar CUA Fast (4B)** |
212+
| Domain | UI-TARS 2 | Qwen3 Flash | **Northstar CUA Fast** |
214213
|---|---|---|---|
215214
| Chrome | 62.96% | 56.43% | **55.30%** |
216215
| Thunderbird | 73.33% | 66.67% | **62.40%** |
@@ -219,7 +218,7 @@ Evaluated on [OSWorld](https://os-world.github.io/) — 369 real-world desktop t
219218
| VLC | 49.94% | 34.41% | **43.87%** |
220219
| **Overall** | **53.1%** | 41.6% | 37.01% |
221220

222-
> At 4B parameters, Northstar CUA Fast is competitive with open-source models at twice its size on single-app tasks. Using the EVOCUA agent harness: EVOCUA-8B averages 32.5% vs Northstar CUA Fast (RL) at 37.0%. See our [research blog](https://www.tzafon.ai/blog/training-vlm-for-cua) for training details.
221+
> Northstar CUA Fast is competitive with open-source models on single-app tasks. Using the EVOCUA agent harness: EVOCUA-8B averages 32.5% vs Northstar CUA Fast (RL) at 37.0%. See our [research blog](https://www.tzafon.ai/blog/training-vlm-for-cua) for training details.
223222
224223
---
225224

examples/competitor_research.py

Lines changed: 55 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
With Playwright/Selenium, you'd need custom selectors for every
44
site, and they'd break whenever the site redesigns. With Lightcone, Northstar
5-
just *reads the screen* like a human would. One script works on any site.
5+
just reads the screen like a human would..
66
77
Demonstrates the two-phase pattern:
88
Phase 1 (explore): CUA loop WITH tools — Northstar scrolls, dismisses popups, etc.
@@ -26,7 +26,7 @@
2626
"environment": "browser",
2727
}
2828

29-
# These are totally different sites with different layouts — doesn't matter.
29+
# Sites with different layouts
3030
COMPANIES = [
3131
{"name": "Stripe", "url": "https://stripe.com/pricing"},
3232
{"name": "Square", "url": "https://squareup.com/us/en/pricing"},
@@ -45,13 +45,19 @@
4545
Reply with ONLY the JSON, no other text."""
4646

4747

48+
def _px(coord, dim):
49+
"""Convert a 0–1000 model coordinate to a pixel coordinate."""
50+
return int(coord / 1000 * dim)
51+
52+
4853
def execute_action(computer, action):
4954
"""Execute a single model action on the computer session."""
55+
w, h = TOOL["display_width"], TOOL["display_height"]
5056
t = action.type
5157
if t == "click":
52-
computer.click(action.x, action.y)
58+
computer.click(_px(action.x, w), _px(action.y, h))
5359
elif t == "double_click":
54-
computer.double_click(action.x, action.y)
60+
computer.double_click(_px(action.x, w), _px(action.y, h))
5561
elif t == "type":
5662
computer.type(action.text)
5763
elif t in ("key", "keypress"):
@@ -60,8 +66,8 @@ def execute_action(computer, action):
6066
computer.scroll(
6167
dx=action.scroll_x or 0,
6268
dy=action.scroll_y or 0,
63-
x=action.x or 0,
64-
y=action.y or 0,
69+
x=_px(action.x or 0, w),
70+
y=_px(action.y or 0, h),
6571
)
6672
elif t == "navigate":
6773
computer.navigate(action.url)
@@ -96,16 +102,18 @@ def research_page(computer, url, prompt, max_explore_steps=10):
96102
response = client.responses.create(
97103
model="tzafon.northstar-cua-fast",
98104
tools=[TOOL],
99-
input=[{
100-
"role": "user",
101-
"content": [
102-
{
103-
"type": "input_text",
104-
"text": "Scroll down slowly. Dismiss any popups or cookie banners. Stop when you can see transaction fees or per-payment pricing.",
105-
},
106-
{"type": "input_image", "image_url": screenshot_url, "detail": "auto"},
107-
],
108-
}],
105+
input=[
106+
{
107+
"role": "user",
108+
"content": [
109+
{
110+
"type": "input_text",
111+
"text": "Scroll down slowly. Dismiss any popups or cookie banners. Stop when you can see transaction fees or per-payment pricing.",
112+
},
113+
{"type": "input_image", "image_url": screenshot_url, "detail": "auto"},
114+
],
115+
}
116+
],
109117
)
110118

111119
for step in range(max_explore_steps):
@@ -136,25 +144,33 @@ def research_page(computer, url, prompt, max_explore_steps=10):
136144
model="tzafon.northstar-cua-fast",
137145
previous_response_id=response.id,
138146
tools=[TOOL],
139-
input=[{
140-
"type": "computer_call_output",
141-
"call_id": computer_call.call_id,
142-
"output": {"type": "input_image", "image_url": screenshot_url, "detail": "auto"},
143-
}],
147+
input=[
148+
{
149+
"type": "computer_call_output",
150+
"call_id": computer_call.call_id,
151+
"output": {
152+
"type": "input_image",
153+
"image_url": screenshot_url,
154+
"detail": "auto",
155+
},
156+
}
157+
],
144158
)
145159

146160
# --- Phase 2: Extract structured data (no tools = text response) ---
147161
print(" [extract]")
148162
screenshot_url = computer.get_screenshot_url(computer.screenshot())
149163
extraction = client.responses.create(
150164
model="tzafon.northstar-cua-fast",
151-
input=[{
152-
"role": "user",
153-
"content": [
154-
{"type": "input_text", "text": prompt},
155-
{"type": "input_image", "image_url": screenshot_url, "detail": "auto"},
156-
],
157-
}],
165+
input=[
166+
{
167+
"role": "user",
168+
"content": [
169+
{"type": "input_text", "text": prompt},
170+
{"type": "input_image", "image_url": screenshot_url, "detail": "auto"},
171+
],
172+
}
173+
],
158174
# No tools — forces a text response instead of actions.
159175
)
160176

@@ -174,17 +190,24 @@ def main():
174190
print(f"\n--- Researching {company['name']} ---")
175191
print(f" URL: {company['url']}")
176192

177-
raw = research_page(computer, company["url"], EXTRACT_PROMPT)
193+
raw = research_page(
194+
computer,
195+
company["url"],
196+
EXTRACT_PROMPT,
197+
max_explore_steps=int(os.getenv("LIGHTCONE_MAX_EXPLORE_STEPS", "10")),
198+
)
178199

179200
if raw:
180201
# Try to parse as JSON; if the model wrapped it in markdown, strip that.
181202
cleaned = raw.strip().removeprefix("```json").removesuffix("```").strip()
182203
try:
183204
data = json.loads(cleaned)
184205
results.append(data)
185-
print(f" -> {data.get('main_product', '?')}: "
186-
f"{data.get('transaction_fee_percent', '?')} + "
187-
f"{data.get('per_transaction_fee', '?')}/txn")
206+
print(
207+
f" -> {data.get('main_product', '?')}: "
208+
f"{data.get('transaction_fee_percent', '?')} + "
209+
f"{data.get('per_transaction_fee', '?')}/txn"
210+
)
188211
except json.JSONDecodeError:
189212
print(f" -> Raw response: {raw[:200]}")
190213
results.append({"company": company["name"], "raw": raw})

0 commit comments

Comments
 (0)