Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
BROWSERBASE_API_KEY=bb_live_your_api_key_here
BROWSERBASE_PROJECT_ID=your-project-uuid-here
MODEL_API_KEY=sk-proj-your-llm-api-key-here
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
.prism.log
.ruby-lsp/
.yardoc/
.env
bin/tapioca
Brewfile.lock.json
doc/
Expand Down
131 changes: 125 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,134 @@ gem "stagehand", "~> 0.6.0"
require "bundler/setup"
require "stagehand"

stagehand = Stagehand::Client.new(
browserbase_api_key: ENV["BROWSERBASE_API_KEY"], # This is the default and can be omitted
browserbase_project_id: ENV["BROWSERBASE_PROJECT_ID"], # This is the default and can be omitted
model_api_key: ENV["MODEL_API_KEY"] # This is the default and can be omitted
# Create a new Stagehand client with your credentials
client = Stagehand::Client.new(
browserbase_api_key: ENV["BROWSERBASE_API_KEY"], # defaults to ENV["BROWSERBASE_API_KEY"]
browserbase_project_id: ENV["BROWSERBASE_PROJECT_ID"], # defaults to ENV["BROWSERBASE_PROJECT_ID"]
model_api_key: ENV["MODEL_API_KEY"] # defaults to ENV["MODEL_API_KEY"]
)

# Start a new browser session
# x_language and x_sdk_version headers are required for the v3 API
start_response = client.sessions.start(
model_name: "openai/gpt-5-nano",
x_language: :typescript,
x_sdk_version: "3.0.6"
)
puts "Session started: #{start_response.data.session_id}"

session_id = start_response.data.session_id

# Navigate to a webpage
# frame_id is required - use empty string for the main frame
client.sessions.navigate(
session_id,
url: "https://news.ycombinator.com",
frame_id: "",
x_language: :typescript,
x_sdk_version: "3.0.6"
)
puts "Navigated to Hacker News"

# Use Observe to find possible actions on the page
observe_response = client.sessions.observe(
session_id,
instruction: "find the link to view comments for the top post",
x_language: :typescript,
x_sdk_version: "3.0.6"
)

actions = observe_response.data.result
puts "Found #{actions.length} possible actions"

# Take the first action returned by Observe
action = actions.first
puts "Acting on: #{action.description}"

# Pass the structured action to Act
# Convert the observe result to a hash and ensure method is set to "click"
act_response = client.sessions.act(
session_id,
input: action.to_h.merge(method: "click"),
x_language: :typescript,
x_sdk_version: "3.0.6"
)
puts "Act completed: #{act_response.data.result[:message]}"

# Extract data from the page
# We're now on the comments page, so extract the top comment text
extract_response = client.sessions.extract(
session_id,
instruction: "extract the text of the top comment on this page",
schema: {
type: "object",
properties: {
comment_text: {
type: "string",
description: "The text content of the top comment"
},
author: {
type: "string",
description: "The username of the comment author"
}
},
required: ["comment_text"]
},
x_language: :typescript,
x_sdk_version: "3.0.6"
)
puts "Extracted data: #{extract_response.data.result}"

# Get the author from the extracted data
extracted_data = extract_response.data.result
author = extracted_data[:author]
puts "Looking up profile for author: #{author}"

# Use the Agent to find the author's profile
# Execute runs an autonomous agent that can navigate and interact with pages
execute_response = client.sessions.execute(
session_id,
execute_options: {
instruction: "Find any personal website, GitHub, LinkedIn, or other best profile URL for the Hacker News user '#{author}'. " \
"Click on their username to go to their profile page and look for any links they have shared.",
max_steps: 15
},
agent_config: {
model: Stagehand::ModelConfig::ModelConfigObject.new(
model_name: "openai/gpt-5-nano",
api_key: ENV["MODEL_API_KEY"]
),
cua: false
},
x_language: :typescript,
x_sdk_version: "3.0.6"
)
puts "Agent completed: #{execute_response.data.result[:message]}"
puts "Agent success: #{execute_response.data.result[:success]}"
puts "Agent actions taken: #{execute_response.data.result[:actions]&.length || 0}"

# End the session to cleanup browser resources
client.sessions.end_(
session_id,
x_language: :typescript,
x_sdk_version: "3.0.6"
)
puts "Session ended"
```

### Running the Example

Set the required environment variables and run the example script:

response = stagehand.sessions.act("00000000-your-session-id-000000000000", input: "click the first link on the page")
```bash
# Set your credentials
export BROWSERBASE_API_KEY="your-browserbase-api-key"
export BROWSERBASE_PROJECT_ID="your-browserbase-project-id"
export MODEL_API_KEY="your-openai-api-key"

puts(response.data)
# Install dependencies and run
bundle install
bundle exec ruby examples/basic.rb
```

### Streaming
Expand Down
129 changes: 129 additions & 0 deletions examples/basic.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#!/usr/bin/env ruby

Check failure on line 1 in examples/basic.rb

View workflow job for this annotation

GitHub Actions / lint

Lint/ScriptPermission: Script file basic.rb doesn't have execute permission.
# frozen_string_literal: true

require "bundler/setup"
require "stagehand"

# Set these environment variables before running this script:
# BROWSERBASE_API_KEY - Your Browserbase API key
# BROWSERBASE_PROJECT_ID - Your Browserbase project ID
# MODEL_API_KEY - Your AI model API key (e.g., OpenAI)

# Create a new Stagehand client with your credentials
client = Stagehand::Client.new(
browserbase_api_key: ENV["BROWSERBASE_API_KEY"],
browserbase_project_id: ENV["BROWSERBASE_PROJECT_ID"],
model_api_key: ENV["MODEL_API_KEY"]
)

# Start a new browser session
# x_language and x_sdk_version headers are required for the v3 API
start_response = client.sessions.start(
model_name: "openai/gpt-5-nano",
x_language: :typescript,
x_sdk_version: "3.0.6"
)
puts "Session started: #{start_response.data.session_id}"

session_id = start_response.data.session_id

# Navigate to a webpage
# frame_id is required - use empty string for the main frame
client.sessions.navigate(
session_id,
url: "https://news.ycombinator.com",
frame_id: "",
x_language: :typescript,
x_sdk_version: "3.0.6"
)
puts "Navigated to Hacker News"

# Use Observe to find possible actions on the page
observe_response = client.sessions.observe(
session_id,
instruction: "find the link to view comments for the top post",
x_language: :typescript,
x_sdk_version: "3.0.6"
)

actions = observe_response.data.result
puts "Found #{actions.length} possible actions"

if actions.empty?
puts "No actions found"
exit 1
end

# Take the first action returned by Observe
action = actions.first
puts "Acting on: #{action.description}"

# Pass the structured action to Act
# Convert the observe result to a hash and ensure method is set to "click"
act_response = client.sessions.act(
session_id,
input: action.to_h.merge(method: "click"),
x_language: :typescript,
x_sdk_version: "3.0.6"
)
puts "Act completed: #{act_response.data.result[:message]}"

# Extract data from the page
# We're now on the comments page, so extract the top comment text
extract_response = client.sessions.extract(
session_id,
instruction: "extract the text of the top comment on this page",
schema: {
type: "object",
properties: {
comment_text: {
type: "string",
description: "The text content of the top comment"
},
author: {
type: "string",
description: "The username of the comment author"
}
},
required: ["comment_text"]
},
x_language: :typescript,
x_sdk_version: "3.0.6"
)
puts "Extracted data: #{extract_response.data.result}"

# Get the author from the extracted data
extracted_data = extract_response.data.result
author = extracted_data[:author]
puts "Looking up profile for author: #{author}"

# Use the Agent to find the author's profile
# Execute runs an autonomous agent that can navigate and interact with pages
execute_response = client.sessions.execute(
session_id,
execute_options: {
instruction: "Find any personal website, GitHub, LinkedIn, or other best profile URL for the Hacker News user '#{author}'. " \
"Click on their username to go to their profile page and look for any links they have shared.",
max_steps: 15
},
agent_config: {
model: Stagehand::ModelConfig::ModelConfigObject.new(
model_name: "openai/gpt-5-nano",
api_key: ENV["MODEL_API_KEY"]
),
cua: false
},
x_language: :typescript,
x_sdk_version: "3.0.6"
)
puts "Agent completed: #{execute_response.data.result[:message]}"
puts "Agent success: #{execute_response.data.result[:success]}"
puts "Agent actions taken: #{execute_response.data.result[:actions]&.length || 0}"

# End the session to cleanup browser resources
client.sessions.end_(
session_id,
x_language: :typescript,
x_sdk_version: "3.0.6"
)
puts "Session ended"
7 changes: 6 additions & 1 deletion lib/stagehand/internal/util.rb
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,12 @@ class << self
#
# @return [Hash{String=>Array<String>}]
def decode_query(query)
CGI.parse(query.to_s)
return {} if query.nil? || query.empty?

# Use URI.decode_www_form for Ruby 3.2+ and 4.0+ compatibility
URI.decode_www_form(query.to_s).each_with_object({}) do |(key, value), hash|
(hash[key] ||= []) << value
end
end

# @api private
Expand Down
1 change: 1 addition & 0 deletions lib/stagehand/resources/sessions.rb
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ def end_(id, params = {})
x_sent_at: "x-sent-at",
x_stream_response: "x-stream-response"
),
body: {},
model: Stagehand::Models::SessionEndResponse,
options: options
)
Expand Down
Loading