-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathmain.py
More file actions
96 lines (77 loc) · 3.64 KB
/
main.py
File metadata and controls
96 lines (77 loc) · 3.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# Stagehand + Browserbase: Data Extraction with Structured Schemas - See README.md for full documentation
import os
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from stagehand import Stagehand
# Load environment variables
load_dotenv()
# License verification variables
variables = {
"input1": "02237476" # DRE License ID to search for
}
# Define schema using Pydantic
class LicenseData(BaseModel):
license_type: str | None = Field(None, description="Type of real estate license")
name: str | None = Field(None, description="License holder's full name")
mailing_address: str | None = Field(None, description="Current mailing address")
license_id: str | None = Field(None, description="Unique license identifier")
expiration_date: str | None = Field(None, description="License expiration date")
license_status: str | None = Field(None, description="Current status (active, expired, etc.)")
salesperson_license_issued: str | None = Field(
None, description="Date salesperson license was issued"
)
former_names: str | None = Field(None, description="Any previous names used")
responsible_broker: str | None = Field(None, description="Associated broker name")
broker_license_id: str | None = Field(None, description="Broker's license ID")
broker_address: str | None = Field(None, description="Broker's business address")
disciplinary_action: str | None = Field(None, description="Any disciplinary actions taken")
other_comments: str | None = Field(None, description="Additional relevant information")
def main():
# Initialize Stagehand with Browserbase for cloud-based browser automation
client = Stagehand(
browserbase_api_key=os.environ.get("BROWSERBASE_API_KEY"),
)
# Start a new session
start_response = client.sessions.start(
model_name="openai/gpt-4.1",
)
session_id = start_response.data.session_id
try:
print("Stagehand Session Started")
print(f"Watch live: https://browserbase.com/sessions/{session_id}")
# Navigate to California DRE license verification website for data extraction.
print("Navigating to: https://www2.dre.ca.gov/publicasp/pplinfo.asp")
client.sessions.navigate(id=session_id, url="https://www2.dre.ca.gov/publicasp/pplinfo.asp")
# Fill in license ID to search for specific real estate professional.
print(f"Performing action: type {variables['input1']} into the License ID input field")
client.sessions.act(
id=session_id,
input=f"type {variables['input1']} into the License ID input field",
)
# Submit search form to retrieve license verification data.
print("Performing action: click the Find button")
client.sessions.act(
id=session_id,
input="click the Find button",
)
# Extract structured license data using Pydantic schema for type safety and validation.
print("Extracting: extract all the license verification details for DRE#02237476")
extract_response = client.sessions.extract(
id=session_id,
instruction="extract all the license verification details for DRE#02237476",
schema=LicenseData.model_json_schema(),
)
extracted_data = extract_response.data.result
print(f"Extracted: {extracted_data}")
except Exception as error:
print(f"Error: {error}")
raise
finally:
client.sessions.end(id=session_id)
print("Session closed successfully")
if __name__ == "__main__":
try:
main()
except Exception as err:
print(f"Error: {err}")
exit(1)