1717 TotalProductionList ,
1818)
1919from electricitymap .contrib .lib .types import ZoneKey
20+ from electricitymap .contrib .parsers .lib .config import use_proxy
21+ from electricitymap .contrib .parsers .lib .exceptions import ParserException
2022
2123tz_bo = ZoneInfo ("America/La_Paz" )
2224
2830INDEX_URL = "https://www.cndc.bo/gene/index.php"
2931DATA_URL = "https://www.cndc.bo/gene/dat/gene.php?fechag={0}"
3032SOURCE = "cndc.bo"
33+ # User-Agent to avoid being blocked as a bot
34+ USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
35+ REQUEST_TIMEOUT = 30 # seconds
3136
3237
3338def extract_xsrf_token (html ):
@@ -45,27 +50,84 @@ def get_datetime(query_date: datetime, hour: int) -> datetime:
4550 )
4651
4752
53+ def _check_response (response , context : str = "" ):
54+ """Check HTTP response and raise appropriate ParserException if needed."""
55+ if response .status_code == 403 :
56+ raise ParserException (
57+ "CNDC.py" ,
58+ f"Access forbidden (403){ context } . The server may be blocking requests." ,
59+ "BO" ,
60+ )
61+ elif response .status_code == 429 :
62+ raise ParserException ("CNDC.py" , f"Rate limit exceeded (429){ context } ." , "BO" )
63+ elif response .status_code >= 500 :
64+ raise ParserException (
65+ "CNDC.py" ,
66+ f"Server error ({ response .status_code } ){ context } . The CNDC server may be down." ,
67+ "BO" ,
68+ )
69+ elif not response .ok :
70+ raise ParserException ("CNDC.py" , f"HTTP { response .status_code } { context } " , "BO" )
71+
72+
4873def fetch_data (
4974 session : Session | None = None , target_datetime : datetime | None = None
5075) -> tuple [list [dict ], datetime ]:
51- if session is None :
52- session = Session ()
53-
54- if target_datetime is None :
55- target_datetime = datetime .now ()
56- target_datetime = target_datetime .astimezone (tz_bo )
57- # Define actual and previous day (for midnight data).
76+ session = session or Session ()
77+ target_datetime = (target_datetime or datetime .now ()).astimezone (tz_bo )
5878 formatted_dt = target_datetime .strftime ("%Y-%m-%d" )
5979
60- # XSRF token for the initial request
61- xsrf_token = extract_xsrf_token (session .get (INDEX_URL ).text )
62-
63- resp = session .get (
64- DATA_URL .format (formatted_dt ), headers = {"x-csrf-token" : xsrf_token }
65- )
66-
67- hour_rows = json .loads (resp .text .replace ("" , "" ))["data" ]
68- return hour_rows , target_datetime
80+ # Headers to mimic a browser and avoid being blocked
81+ headers = {
82+ "User-Agent" : USER_AGENT ,
83+ "Accept" : "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" ,
84+ "Accept-Language" : "en-US,en;q=0.5" ,
85+ "Connection" : "keep-alive" ,
86+ }
87+
88+ try :
89+ # Get XSRF token from index page
90+ index_response = session .get (
91+ INDEX_URL , headers = headers , timeout = REQUEST_TIMEOUT
92+ )
93+ _check_response (index_response )
94+
95+ try :
96+ xsrf_token = extract_xsrf_token (index_response .text )
97+ except (AttributeError , IndexError ) as e :
98+ raise ParserException (
99+ "CNDC.py" ,
100+ "Failed to extract XSRF token. Website structure may have changed." ,
101+ "BO" ,
102+ ) from e
103+
104+ # Fetch data with XSRF token
105+ headers ["x-csrf-token" ] = xsrf_token
106+ data_response = session .get (
107+ DATA_URL .format (formatted_dt ), headers = headers , timeout = REQUEST_TIMEOUT
108+ )
109+ _check_response (data_response , " when fetching data" )
110+
111+ # Parse JSON response
112+ try :
113+ hour_rows = json .loads (data_response .text .replace ("" , "" ))["data" ]
114+ except (json .JSONDecodeError , KeyError ) as e :
115+ raise ParserException (
116+ "CNDC.py" ,
117+ f"Failed to parse JSON response. API format may have changed: { e } " ,
118+ "BO" ,
119+ ) from e
120+
121+ return hour_rows , target_datetime
122+
123+ except ParserException :
124+ raise
125+ except Exception as e :
126+ raise ParserException (
127+ "CNDC.py" ,
128+ f"Unexpected error: { type (e ).__name__ } : { e } " ,
129+ "BO" ,
130+ ) from e
69131
70132
71133def parse_generation_forecast (
@@ -105,6 +167,9 @@ def parser_production_breakdown(
105167 if total is None or None in modes_extracted :
106168 continue
107169
170+ unknown_value = round (total - thermo - hydro - solar - wind - bagasse , 3 )
171+ unknown_value = None if abs (unknown_value ) < 0.05 else unknown_value
172+
108173 result .append (
109174 zoneKey = zone_key ,
110175 datetime = timestamp ,
@@ -115,14 +180,15 @@ def parser_production_breakdown(
115180 biomass = bagasse ,
116181 gas = round (thermo * gas_oil_ratio , 3 ),
117182 oil = round (thermo * (1 - gas_oil_ratio ), 3 ),
118- unknown = round ( total - thermo - hydro - solar - wind - bagasse , 3 ) ,
183+ unknown = unknown_value ,
119184 ),
120185 source = SOURCE ,
121186 )
122187
123188 return result
124189
125190
191+ @use_proxy (country_code = "BO" )
126192def fetch_production (
127193 zone_key : ZoneKey = ZoneKey ("BO" ),
128194 session : Session | None = None ,
@@ -152,8 +218,8 @@ def fetch_generation_forecast(
152218
153219if __name__ == "__main__" :
154220 """Main method, never used by the Electricity Map backend, but handy for testing."""
155- print ("fetch_production() ->" )
156221 print (fetch_production ())
222+ print ("fetch_production() ->" )
157223
158224 # print("fetch_generation_forecast() ->")
159225 # print(fetch_generation_forecast())
0 commit comments