diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..8092ff3 --- /dev/null +++ b/.env.example @@ -0,0 +1,3 @@ +BROWSERBASE_API_KEY=bb_live_your_api_key_here +BROWSERBASE_PROJECT_ID=your-project-uuid-here +MODEL_API_KEY=sk-proj-your-llm-api-key-here diff --git a/.gitignore b/.gitignore index 3d26cee..a87eee7 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ .prism.log .ruby-lsp/ .yardoc/ +.env bin/tapioca Brewfile.lock.json doc/ diff --git a/README.md b/README.md index 6c505ab..78760f4 100644 --- a/README.md +++ b/README.md @@ -28,15 +28,134 @@ gem "stagehand", "~> 0.6.0" require "bundler/setup" require "stagehand" -stagehand = Stagehand::Client.new( - browserbase_api_key: ENV["BROWSERBASE_API_KEY"], # This is the default and can be omitted - browserbase_project_id: ENV["BROWSERBASE_PROJECT_ID"], # This is the default and can be omitted - model_api_key: ENV["MODEL_API_KEY"] # This is the default and can be omitted +# Create a new Stagehand client with your credentials +client = Stagehand::Client.new( + browserbase_api_key: ENV["BROWSERBASE_API_KEY"], # defaults to ENV["BROWSERBASE_API_KEY"] + browserbase_project_id: ENV["BROWSERBASE_PROJECT_ID"], # defaults to ENV["BROWSERBASE_PROJECT_ID"] + model_api_key: ENV["MODEL_API_KEY"] # defaults to ENV["MODEL_API_KEY"] +) + +# Start a new browser session +# x_language and x_sdk_version headers are required for the v3 API +start_response = client.sessions.start( + model_name: "openai/gpt-5-nano", + x_language: :typescript, + x_sdk_version: "3.0.6" +) +puts "Session started: #{start_response.data.session_id}" + +session_id = start_response.data.session_id + +# Navigate to a webpage +# frame_id is required - use empty string for the main frame +client.sessions.navigate( + session_id, + url: "https://news.ycombinator.com", + frame_id: "", + x_language: :typescript, + x_sdk_version: "3.0.6" +) +puts "Navigated to Hacker News" + +# Use Observe to find possible actions on the page +observe_response = client.sessions.observe( + session_id, + instruction: "find the link to view comments for the top post", + x_language: :typescript, + x_sdk_version: "3.0.6" +) + +actions = observe_response.data.result +puts "Found #{actions.length} possible actions" + +# Take the first action returned by Observe +action = actions.first +puts "Acting on: #{action.description}" + +# Pass the structured action to Act +# Convert the observe result to a hash and ensure method is set to "click" +act_response = client.sessions.act( + session_id, + input: action.to_h.merge(method: "click"), + x_language: :typescript, + x_sdk_version: "3.0.6" +) +puts "Act completed: #{act_response.data.result[:message]}" + +# Extract data from the page +# We're now on the comments page, so extract the top comment text +extract_response = client.sessions.extract( + session_id, + instruction: "extract the text of the top comment on this page", + schema: { + type: "object", + properties: { + comment_text: { + type: "string", + description: "The text content of the top comment" + }, + author: { + type: "string", + description: "The username of the comment author" + } + }, + required: ["comment_text"] + }, + x_language: :typescript, + x_sdk_version: "3.0.6" +) +puts "Extracted data: #{extract_response.data.result}" + +# Get the author from the extracted data +extracted_data = extract_response.data.result +author = extracted_data[:author] +puts "Looking up profile for author: #{author}" + +# Use the Agent to find the author's profile +# Execute runs an autonomous agent that can navigate and interact with pages +execute_response = client.sessions.execute( + session_id, + execute_options: { + instruction: "Find any personal website, GitHub, LinkedIn, or other best profile URL for the Hacker News user '#{author}'. " \ + "Click on their username to go to their profile page and look for any links they have shared.", + max_steps: 15 + }, + agent_config: { + model: Stagehand::ModelConfig::ModelConfigObject.new( + model_name: "openai/gpt-5-nano", + api_key: ENV["MODEL_API_KEY"] + ), + cua: false + }, + x_language: :typescript, + x_sdk_version: "3.0.6" ) +puts "Agent completed: #{execute_response.data.result[:message]}" +puts "Agent success: #{execute_response.data.result[:success]}" +puts "Agent actions taken: #{execute_response.data.result[:actions]&.length || 0}" + +# End the session to cleanup browser resources +client.sessions.end_( + session_id, + x_language: :typescript, + x_sdk_version: "3.0.6" +) +puts "Session ended" +``` + +### Running the Example + +Set the required environment variables and run the example script: -response = stagehand.sessions.act("00000000-your-session-id-000000000000", input: "click the first link on the page") +```bash +# Set your credentials +export BROWSERBASE_API_KEY="your-browserbase-api-key" +export BROWSERBASE_PROJECT_ID="your-browserbase-project-id" +export MODEL_API_KEY="your-openai-api-key" -puts(response.data) +# Install dependencies and run +bundle install +bundle exec ruby examples/basic.rb ``` ### Streaming diff --git a/examples/basic.rb b/examples/basic.rb new file mode 100644 index 0000000..b26a07e --- /dev/null +++ b/examples/basic.rb @@ -0,0 +1,129 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "stagehand" + +# Set these environment variables before running this script: +# BROWSERBASE_API_KEY - Your Browserbase API key +# BROWSERBASE_PROJECT_ID - Your Browserbase project ID +# MODEL_API_KEY - Your AI model API key (e.g., OpenAI) + +# Create a new Stagehand client with your credentials +client = Stagehand::Client.new( + browserbase_api_key: ENV["BROWSERBASE_API_KEY"], + browserbase_project_id: ENV["BROWSERBASE_PROJECT_ID"], + model_api_key: ENV["MODEL_API_KEY"] +) + +# Start a new browser session +# x_language and x_sdk_version headers are required for the v3 API +start_response = client.sessions.start( + model_name: "openai/gpt-5-nano", + x_language: :typescript, + x_sdk_version: "3.0.6" +) +puts "Session started: #{start_response.data.session_id}" + +session_id = start_response.data.session_id + +# Navigate to a webpage +# frame_id is required - use empty string for the main frame +client.sessions.navigate( + session_id, + url: "https://news.ycombinator.com", + frame_id: "", + x_language: :typescript, + x_sdk_version: "3.0.6" +) +puts "Navigated to Hacker News" + +# Use Observe to find possible actions on the page +observe_response = client.sessions.observe( + session_id, + instruction: "find the link to view comments for the top post", + x_language: :typescript, + x_sdk_version: "3.0.6" +) + +actions = observe_response.data.result +puts "Found #{actions.length} possible actions" + +if actions.empty? + puts "No actions found" + exit 1 +end + +# Take the first action returned by Observe +action = actions.first +puts "Acting on: #{action.description}" + +# Pass the structured action to Act +# Convert the observe result to a hash and ensure method is set to "click" +act_response = client.sessions.act( + session_id, + input: action.to_h.merge(method: "click"), + x_language: :typescript, + x_sdk_version: "3.0.6" +) +puts "Act completed: #{act_response.data.result[:message]}" + +# Extract data from the page +# We're now on the comments page, so extract the top comment text +extract_response = client.sessions.extract( + session_id, + instruction: "extract the text of the top comment on this page", + schema: { + type: "object", + properties: { + comment_text: { + type: "string", + description: "The text content of the top comment" + }, + author: { + type: "string", + description: "The username of the comment author" + } + }, + required: ["comment_text"] + }, + x_language: :typescript, + x_sdk_version: "3.0.6" +) +puts "Extracted data: #{extract_response.data.result}" + +# Get the author from the extracted data +extracted_data = extract_response.data.result +author = extracted_data[:author] +puts "Looking up profile for author: #{author}" + +# Use the Agent to find the author's profile +# Execute runs an autonomous agent that can navigate and interact with pages +execute_response = client.sessions.execute( + session_id, + execute_options: { + instruction: "Find any personal website, GitHub, LinkedIn, or other best profile URL for the Hacker News user '#{author}'. " \ + "Click on their username to go to their profile page and look for any links they have shared.", + max_steps: 15 + }, + agent_config: { + model: Stagehand::ModelConfig::ModelConfigObject.new( + model_name: "openai/gpt-5-nano", + api_key: ENV["MODEL_API_KEY"] + ), + cua: false + }, + x_language: :typescript, + x_sdk_version: "3.0.6" +) +puts "Agent completed: #{execute_response.data.result[:message]}" +puts "Agent success: #{execute_response.data.result[:success]}" +puts "Agent actions taken: #{execute_response.data.result[:actions]&.length || 0}" + +# End the session to cleanup browser resources +client.sessions.end_( + session_id, + x_language: :typescript, + x_sdk_version: "3.0.6" +) +puts "Session ended" diff --git a/lib/stagehand/internal/util.rb b/lib/stagehand/internal/util.rb index 1698a6f..f1c7b1e 100644 --- a/lib/stagehand/internal/util.rb +++ b/lib/stagehand/internal/util.rb @@ -272,7 +272,12 @@ class << self # # @return [Hash{String=>Array}] def decode_query(query) - CGI.parse(query.to_s) + return {} if query.nil? || query.empty? + + # Use URI.decode_www_form for Ruby 3.2+ and 4.0+ compatibility + URI.decode_www_form(query.to_s).each_with_object({}) do |(key, value), hash| + (hash[key] ||= []) << value + end end # @api private diff --git a/lib/stagehand/resources/sessions.rb b/lib/stagehand/resources/sessions.rb index 1ce3d6d..3970742 100644 --- a/lib/stagehand/resources/sessions.rb +++ b/lib/stagehand/resources/sessions.rb @@ -140,6 +140,7 @@ def end_(id, params = {}) x_sent_at: "x-sent-at", x_stream_response: "x-stream-response" ), + body: {}, model: Stagehand::Models::SessionEndResponse, options: options )