4646import shutil
4747import subprocess
4848import sys
49+ from datetime import datetime , timezone
50+ from pathlib import Path
4951
5052import numpy as np
5153import pandas as pd
@@ -173,7 +175,8 @@ def _run_simulation(self): # noqa: C901
173175 pass
174176 else :
175177 # pythonEXE = os.path.join(localAppDir,'applications','python','python.exe')
176- pythonEXE = '"' + sys .executable + '"' # noqa: N806
178+ # pythonEXE = '"' + sys.executable + '"'
179+ pythonEXE = sys .executable # noqa: N806
177180 else :
178181 # for remote run and macOS, let's use system python
179182 pass
@@ -187,6 +190,7 @@ def _run_simulation(self): # noqa: C901
187190 if bldg_id is not None :
188191 os .chdir (bldg_id )
189192 os .chdir ('templatedir' )
193+ print (f'Current working directory: { os .getcwd ()} ' ) # noqa: PTH109, T201
190194
191195 # dakota script path
192196 dakotaScript = os .path .join ( # noqa: PTH118, N806
@@ -216,41 +220,131 @@ def _run_simulation(self): # noqa: C901
216220 # command line
217221 # KZ modified 0331
218222 # command_line = f'{pythonEXE} {dakotaScript} --workflowInput sc_dakota_plom.json --driverFile {os.path.splitext(self.workflow_driver)[0]} --workflowOutput EDP.json --runType {runType}'
219- command_line = f'{ pythonEXE } { dakotaScript } --workflowInput sc_dakota_plom.json --driverFile { os .path .splitext (self .workflow_driver )[0 ]} --workflowOutput EDP.json --runType runningLocal' # noqa: PTH122
220- print (command_line ) # noqa: T201
221- # run command
223+ # command_line = f'{pythonEXE} {dakotaScript} --workflowInput sc_dakota_plom.json --driverFile {os.path.splitext(self.workflow_driver)[0]} --workflowOutput EDP.json --runType runningLocal'
224+ # print(command_line)
225+
226+ # Build the command as a list (no shell interpretation)
227+ command_line = [
228+ pythonEXE ,
229+ dakotaScript ,
230+ '--workflowInput' ,
231+ 'sc_dakota_plom.json' ,
232+ '--driverFile' ,
233+ os .path .splitext (self .workflow_driver )[0 ], # noqa: PTH122
234+ '--workflowOutput' ,
235+ 'EDP.json' ,
236+ '--runType' ,
237+ 'runningLocal' ,
238+ ]
239+
240+ print ( # noqa: T201
241+ 'Command to run:' , ' ' .join (command_line )
242+ ) # for debugging
243+
222244 dakotaTabPath = os .path .join (self .work_dir , 'dakotaTab.out' ) # noqa: PTH118, N806
223245 print (dakotaTabPath ) # noqa: T201
224246
247+ # try:
248+ # os.system(command_line)
249+ # except:
250+ # print(
251+ # 'runPLoM._run_simulation: error in running dakota to generate the initial sample.'
252+ # )
253+ # print(
254+ # 'runPLoM._run_simulation: please check if the dakota is installed correctly on the system.'
255+ # )
256+
257+ # if not os.path.exists(dakotaTabPath):
258+ # try:
259+ # subprocess.call(command_line)
260+ # except:
261+ # print(
262+ # 'runPLoM._run_simulation: error in running dakota to generate the initial sample.'
263+ # )
264+ # print(
265+ # 'runPLoM._run_simulation: please check if the dakota is installed correctly on the system.'
266+ # )
267+
268+ # if not os.path.exists(dakotaTabPath):
269+ # msg = 'Dakota preprocessor did not run successfully'
270+ # self.errlog.exit(msg)
271+
272+ # decide if we're on HPC/remote
273+ is_remote = job_config .get ('runType' , 'runningLocal' ) == 'runningRemote'
274+
275+ # build a sanitized env ONLY for the child process running Dakota on remote
276+ env = os .environ .copy ()
277+ removed = []
278+ if is_remote :
279+ # Strip common MPI/PMI launch-context vars so Dakota won't try MPI_Init via PMI
280+ prefixes = (
281+ 'PMI_' ,
282+ 'OMPI_' ,
283+ 'MPI_' ,
284+ 'I_MPI_' ,
285+ 'HYDRA_' ,
286+ 'SLURM_' ,
287+ 'PMIX_' ,
288+ 'MPICH_' ,
289+ 'UCX_' ,
290+ 'FI_' ,
291+ 'PALS_' ,
292+ )
293+ for k in list (env ):
294+ if k .startswith (prefixes ):
295+ removed .append (k )
296+ env .pop (k , None )
297+ if removed :
298+ print ( # noqa: T201
299+ f'sanitized env for dakota run (removed { len (removed )} keys): { removed [:6 ]} ...'
300+ )
301+
302+ completed = None
225303 try :
226- os .system (command_line ) # noqa: S605
227- except : # noqa: E722
304+ # run DakotaUQ with the chosen env
305+ completed = subprocess .run ( # noqa: S603
306+ command_line ,
307+ check = True , # raise if exit code != 0
308+ capture_output = True , # capture BOTH stdout and stderr
309+ text = True , # return str, not bytes
310+ env = env if is_remote else None , # use sanitized env on remote
311+ )
312+ except subprocess .CalledProcessError as e :
228313 print ( # noqa: T201
229314 'runPLoM._run_simulation: error in running dakota to generate the initial sample.'
230315 )
231316 print ( # noqa: T201
232317 'runPLoM._run_simulation: please check if the dakota is installed correctly on the system.'
233318 )
234-
235- if not os .path .exists (dakotaTabPath ): # noqa: PTH110
236- try :
237- subprocess .call (command_line ) # noqa: S603
238- except : # noqa: E722
239- print ( # noqa: T201
240- 'runPLoM._run_simulation: error in running dakota to generate the initial sample.'
241- )
242- print ( # noqa: T201
243- 'runPLoM._run_simulation: please check if the dakota is installed correctly on the system.'
244- )
245-
319+ print (f'Command: { " " .join (e .cmd )} ' ) # noqa: T201
320+ print (f'Return code: { e .returncode } ' ) # noqa: T201
321+ if e .stdout :
322+ print ('---- STDOUT ----' ) # noqa: T201
323+ print (e .stdout ) # noqa: T201
324+ if e .stderr :
325+ print ('---- STDERR ----' ) # noqa: T201
326+ print (e .stderr ) # noqa: T201
327+ self .errlog .exit ('Dakota did not run successfully' )
328+
329+ # Verify expected output exists
246330 if not os .path .exists (dakotaTabPath ): # noqa: PTH110
247- msg = 'Dakota preprocessor did not run successfully'
248- self .errlog .exit (msg )
331+ print ( # noqa: T201
332+ 'runPLoM._run_simulation: Dakota finished without creating the expected output file.'
333+ )
334+ print (f'Expected file missing: { dakotaTabPath } ' ) # noqa: T201
335+ if completed is not None :
336+ if completed .stdout :
337+ print ('---- STDOUT ----' ) # noqa: T201
338+ print (completed .stdout ) # noqa: T201
339+ if completed .stderr :
340+ print ('---- STDERR ----' ) # noqa: T201
341+ print (completed .stderr ) # noqa: T201
342+ self .errlog .exit ('Dakota did not run successfully' )
249343
250344 # remove the new dakota.json
251345 # os.remove('sc_dakota_plom.json')
252346
253- if runType in ['run' , 'runningLocal' ]:
347+ if runType in ['run' , 'runningLocal' , 'runningRemote' ]:
254348 # create the response.csv file from the dakotaTab.out file
255349 os .chdir (run_dir )
256350 if bldg_id is not None :
@@ -273,7 +367,7 @@ def _run_simulation(self): # noqa: C901
273367 )
274368 self .job_config = job_config
275369
276- elif self .run_type in ['set_up' , 'runningRemote' ]:
370+ elif self .run_type in ['set_up' ]:
277371 pass
278372
279373 def _prepare_training_data (self , run_dir ): # noqa: C901
@@ -681,7 +775,7 @@ def _load_hyperparameter(self):
681775 if (
682776 self .constraintsFlag
683777 ): # sy - added because quoFEM/EE-UQ example failed 09/10/2024
684- constr_file = Path (self .constraintsFile ).resolve () # noqa: F405
778+ constr_file = Path (self .constraintsFile ).resolve ()
685779 sys .path .insert (0 , str (constr_file .parent ) + '/' )
686780 constr_script = importlib .__import__ ( # noqa: F405
687781 constr_file .name [:- 3 ], globals (), locals (), [], 0
@@ -690,7 +784,7 @@ def _load_hyperparameter(self):
690784 print ('beta_c = ' , self .beta_c ) # noqa: T201
691785 # if smootherKDE
692786 if self .smootherKDE_Customize :
693- kde_file = Path (self .smootherKDE_file ).resolve () # noqa: F405
787+ kde_file = Path (self .smootherKDE_file ).resolve ()
694788 sys .path .insert (0 , str (kde_file .parent ) + '/' )
695789 kde_script = importlib .__import__ ( # noqa: F405
696790 kde_file .name [:- 3 ], globals (), locals (), [], 0
@@ -701,7 +795,7 @@ def _load_hyperparameter(self):
701795 print ('epsilon_k = ' , self .smootherKDE ) # noqa: T201
702796 # if tolKDE
703797 if self .kdeTolerance_Customize :
704- beta_file = Path (self .kdeTolerance_file ).resolve () # noqa: F405
798+ beta_file = Path (self .kdeTolerance_file ).resolve ()
705799 sys .path .insert (0 , str (beta_file .parent ) + '/' )
706800 beta_script = importlib .__import__ ( # noqa: F405
707801 beta_file .name [:- 3 ], globals (), locals (), [], 0
@@ -966,13 +1060,19 @@ def read_txt(text_dir, errlog): # noqa: D103
9661060
9671061class errorLog : # noqa: D101
9681062 def __init__ (self , work_dir ):
969- self .file = open (f'{ work_dir } /dakota.err' , 'w' ) # noqa: SIM115, PTH123
1063+ self .path = Path (work_dir ) / 'dakota.err'
1064+ self .path .parent .mkdir (parents = True , exist_ok = True ) # ensure dir exists
9701065
9711066 def exit (self , msg ): # noqa: D102
972- print (msg ) # noqa: T201
973- self .file .write (msg )
974- self .file .close ()
975- exit (- 1 ) # noqa: PLR1722
1067+ print (msg , file = sys .stderr ) # also send to stderr # noqa: T201
1068+ try :
1069+ with self .path .open ('a' , encoding = 'utf-8' , errors = 'replace' ) as f :
1070+ ts = datetime .now (tz = timezone .utc ).isoformat (timespec = 'seconds' )
1071+ f .write (f'\n \n ---- PLoM error @ { ts } ----\n ' )
1072+ f .write (msg .rstrip () + '\n ' )
1073+ except Exception as e : # noqa: BLE001
1074+ print (f'[WARN] Could not write to { self .path } : { e } ' , file = sys .stderr ) # noqa: T201
1075+ sys .exit (- 1 ) # nonzero exit for failure
9761076
9771077
9781078def build_surrogate (work_dir , os_type , run_type , input_file , workflow_driver ):
0 commit comments