From 3c26199d32bbc2718d3522b19c0f177addef75ed Mon Sep 17 00:00:00 2001 From: jacques franc Date: Thu, 30 Oct 2025 12:08:09 +0100 Subject: [PATCH 01/70] starting import launcher handling --- .../src/geos/trame/app/io/simulation.py | 202 ++++++++++++++++++ .../geos/trame/app/ui/simulationStatusView.py | 78 +++++++ 2 files changed, 280 insertions(+) create mode 100644 geos-trame/src/geos/trame/app/io/simulation.py create mode 100644 geos-trame/src/geos/trame/app/ui/simulationStatusView.py diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py new file mode 100644 index 000000000..e62ce2ede --- /dev/null +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -0,0 +1,202 @@ + +from abc import ABC, abstractmethod +from pathlib import Path +from dataclasses import dataclass, field, fields +from enum import Enum, unique +from geos.trame.app.ui.simulationStatusView import SimulationStatus +from typing import Callable, Optional +import datetime +from trame_server.core import Server +from trame_server.state import State + +#TODO move outside +@dataclass(frozen=True) +class SimulationConstant: + SIMULATION_GEOS_PATH = "/some/path/" + SIMULATION_MACHINE_NAME = "p4log01" # Only run on P4 machine + + +@unique +class SlurmJobStatus(Enum): + PENDING = "PD" + RUNNING = "R" + COMPLETING = "CG" + COMPLETED = "CD" + SUSPENDED = "S" + UNKNOWN = "UNKNOWN" + + @classmethod + def from_string(cls, job_str) -> "SlurmJobStatus": + try: + return cls(job_str) + except ValueError: + return cls.UNKNOWN + +# TODO: dataclass_json +# @dataclass_json +@dataclass +class SimulationInformation: + pass + + def get_simulation_status( + self, + get_running_user_jobs_f: Callable[[], list[tuple[str, SlurmJobStatus]]], + ) -> SimulationStatus: + """ + Returns the simulation status given the current Jobs running for the current user. + Only runs the callback if the timeseries file is not already present in the done directory. + """ + if not self.geos_job_id: + return SimulationStatus.NOT_RUN + + done_sim_path = self.get_simulation_dir(SimulationStatus.DONE) + if self.get_timeseries_path(done_sim_path).exists(): + return SimulationStatus.DONE + + user_jobs = get_running_user_jobs_f() + if (self.geos_job_id, SlurmJobStatus.RUNNING) in user_jobs: + return SimulationStatus.RUNNING + + if (self.geos_job_id, SlurmJobStatus.COMPLETING) in user_jobs: + return SimulationStatus.COMPLETING + + if (self.copy_back_job_id, SlurmJobStatus.RUNNING) in user_jobs: + return SimulationStatus.COPY_BACK + + if (self.copy_job_id, SlurmJobStatus.RUNNING) in user_jobs: + return SimulationStatus.SCHEDULED + + return SimulationStatus.UNKNOWN + +@dataclass +class LauncherParams: + simulation_files_path: Optional[str] = None + simulation_cmd_filename: Optional[str] = None + simulation_job_name: Optional[str] = None + simulation_nb_process: int = 1 + + @classmethod + def from_server_state(cls, server_state: State) -> "LauncherParams": + state = cls() + for f in fields(cls): + setattr(state, f.name, server_state[f.name]) + return state + + def is_complete(self) -> bool: + return None not in [getattr(self, f.name) for f in fields(self)] + + def assert_is_complete(self) -> None: + if not self.is_complete(): + raise RuntimeError(f"Incomplete simulation launch parameters : {self}.") + + +def get_timestamp() -> str: + return datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S.%f")[:-3] + + +def get_simulation_output_file_name(timestamp: str, user_name: str = "user_name"): + return f"{user_name}_{timestamp}.json" + + +def parse_launcher_output(output: str) -> SimulationInformation: + split_output = output.split("\n") + + information = SimulationInformation() + information_dict = information.to_dict() # type: ignore + + content_to_parse = [ + ("Working directory: ", "working_directory"), + ("1. copy job id: ", "copy_job_id"), + ("2. geos job id: ", "geos_job_id"), + ("3. copy back job id: ", "copy_back_job_id"), + ("Run directory: ", "run_directory"), + ] + + for line in split_output: + for info_tuple in content_to_parse: + if info_tuple[0] in line: + split_line = line.split(info_tuple[0]) + if len(split_line) < 2: + continue + information_dict[info_tuple[1]] = split_line[-1] + + information_dict["timestamp"] = get_timestamp() + return SimulationInformation.from_dict(information_dict) # type: ignore + + +# def write_simulation_information_to_repo(info: SimulationInformation, sim_info_path: Path) -> Optional[Path]: +# return write_file( +# sim_info_path.as_posix(), +# get_simulation_output_file_name(info.timestamp, info.user_igg), +# json.dumps(info.to_dict()), # type: ignore +# ) + + +##TODO yay slurm +def get_launcher_command(launcher_params: LauncherParams) -> str: + launcher_cmd_args = ( + f"{SimulationConstant.SIMULATION_GEOS_PATH} " + f"--nprocs {launcher_params.simulation_nb_process} " + f"--fname {launcher_params.simulation_cmd_filename} " + f"--job_name {launcher_params.simulation_job_name}" + ) + + # state.simulation_nb_process is supposed to be an integer, but the UI present a VTextField, + # so if user changes it, then it can be defined as a str + if int(launcher_params.simulation_nb_process) > 1: + launcher_cmd_args += " --partition" + return launcher_cmd_args + + +# def get_simulation_screenshot_timestep(filename: str) -> int: +# """ +# From a given file name returns the time step. +# Filename is defined as: RenderView0_000000.png with 000000 the time step to parse and return +# """ +# if not filename: +# print("Simulation filename is not defined") +# return -1 + +# pattern = re.compile(r"RenderView[0-9]_[0-9]{6}\.png", re.IGNORECASE) +# if pattern.match(filename) is None: +# print("Simulation filename does not match the pattern: RenderView0_000000.png") +# return -1 + +# timestep = os.path.splitext(filename)[0].split("_")[-1] +# return int(timestep) if timestep else -1 + + +# def get_most_recent_file_from_list(files_list: list[str]) -> Optional[str]: +# if not files_list: +# return None +# return max(files_list, key=get_simulation_screenshot_timestep) + + +# def get_most_recent_simulation_screenshot(folder_path: Path) -> Optional[str]: +# return get_most_recent_file_from_list(os.listdir(folder_path)) if folder_path.exists() else None + + +class ISimRunner(ABC): + """ + Abstract interface for sim runner. + Provides methods to trigger simulation, get simulation output path and knowing if simulation is done or not. + """ + + @abstractmethod + def launch_simulation(self, launcher_params: LauncherParams) -> tuple[Path, SimulationInformation]: + pass + + @abstractmethod + def get_user_igg(self) -> str: + pass + + @abstractmethod + def get_running_user_jobs(self) -> list[tuple[str, SlurmJobStatus]]: + pass + + +class SimRunner(ISimRunner): + """ + Runs sim on HPC + """ + pass \ No newline at end of file diff --git a/geos-trame/src/geos/trame/app/ui/simulationStatusView.py b/geos-trame/src/geos/trame/app/ui/simulationStatusView.py new file mode 100644 index 000000000..84fc4d4b3 --- /dev/null +++ b/geos-trame/src/geos/trame/app/ui/simulationStatusView.py @@ -0,0 +1,78 @@ +from enum import Enum, auto, unique + +from trame_client.widgets.html import H3, Div +from trame_server import Server +from trame_vuetify.widgets.vuetify3 import VCard + +@unique +class SimulationStatus(Enum): + SCHEDULED = auto() + RUNNING = auto() + COMPLETING = auto() + COPY_BACK = auto() + DONE = auto() + NOT_RUN = auto() + UNKNOWN = auto() + + +class SimulationStatusView: + """ + Simple component containing simulation status in a VCard with some coloring depending on the status. + """ + + def __init__(self, server: Server): + def state_name(state_str): + return f"{type(self).__name__}_{state_str}_{id(self)}" + + self._text_state = state_name("text") + self._date_state = state_name("date") + self._time_state = state_name("time") + self._color_state = state_name("color") + self._state = server.state + + for s in [self._text_state, self._date_state, self._time_state, self._color_state]: + self._state.client_only(s) + + with VCard( + classes="p-8", + style=(f"`border: 4px solid ${{{self._color_state}}}; width: 300px; margin:auto; padding: 4px;`",), + ) as self.ui: + H3(f"{{{{{self._text_state}}}}}", style="text-align:center;") + Div(f"{{{{{self._date_state}}}}} {{{{{self._time_state}}}}}", style="text-align:center;") + + self.set_status(SimulationStatus.NOT_RUN) + self.set_time_stamp("") + + def set_status(self, status: SimulationStatus): + self._state[self._text_state] = status.name + self._state[self._color_state] = self.status_color(status) + self._state.flush() + + def set_time_stamp(self, time_stamp: str): + date, time = self.split_time_stamp(time_stamp) + self._state[self._time_state] = time + self._state[self._date_state] = date + self._state.flush() + + @staticmethod + def split_time_stamp(time_stamp: str) -> tuple[str, str]: + default_time_stamp = "", "" + if not time_stamp: + return default_time_stamp + + time_stamp = time_stamp.split("_") + if len(time_stamp) < 2: + return default_time_stamp + + return time_stamp[0].replace("-", "/"), time_stamp[1].split(".")[0].replace("-", ":") + + @staticmethod + def status_color(status: SimulationStatus) -> str: + return { + SimulationStatus.DONE: "#4CAF50", + SimulationStatus.RUNNING: "#3F51B5", + SimulationStatus.SCHEDULED: "#FFC107", + SimulationStatus.COMPLETING: "#C5E1A5", + SimulationStatus.COPY_BACK: "#C5E1A5", + SimulationStatus.UNKNOWN: "#E53935", + }.get(status, "#607D8B") \ No newline at end of file From abfd58f5ff38b977364101339ab3cdda835c511c Mon Sep 17 00:00:00 2001 From: jacques franc Date: Thu, 30 Oct 2025 12:08:09 +0100 Subject: [PATCH 02/70] starting import launcher handling --- .../src/geos/trame/app/io/simulation.py | 319 ++++++++++++++++++ .../geos/trame/app/ui/simulationStatusView.py | 78 +++++ 2 files changed, 397 insertions(+) create mode 100644 geos-trame/src/geos/trame/app/io/simulation.py create mode 100644 geos-trame/src/geos/trame/app/ui/simulationStatusView.py diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py new file mode 100644 index 000000000..77178bdb1 --- /dev/null +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -0,0 +1,319 @@ + +from abc import ABC, abstractmethod +from pathlib import Path +from dataclasses import dataclass, field, fields +from enum import Enum, unique +from geos.trame.app.ui.simulationStatusView import SimulationStatus +from typing import Callable, Optional +import datetime +from trame_server.core import Server +from trame_server.state import State + +#TODO move outside +#TODO use Jinja on real launcher + +@dataclass(frozen=True) +class SimulationConstant: + SIMULATION_GEOS_PATH = "/some/path/" + SIMULATION_MACHINE_NAME = "p4log01" # Only run on P4 machine + +@unique +class SlurmJobStatus(Enum): + PENDING = "PD" + RUNNING = "R" + COMPLETING = "CG" + COMPLETED = "CD" + SUSPENDED = "S" + UNKNOWN = "UNKNOWN" + + @classmethod + def from_string(cls, job_str) -> "SlurmJobStatus": + try: + return cls(job_str) + except ValueError: + return cls.UNKNOWN + +# TODO: dataclass_json +# @dataclass_json +@dataclass +class SimulationInformation: + pass + + def get_simulation_status( + self, + get_running_user_jobs_f: Callable[[], list[tuple[str, SlurmJobStatus]]], + ) -> SimulationStatus: + """ + Returns the simulation status given the current Jobs running for the current user. + Only runs the callback if the timeseries file is not already present in the done directory. + """ + if not self.geos_job_id: + return SimulationStatus.NOT_RUN + + done_sim_path = self.get_simulation_dir(SimulationStatus.DONE) + if self.get_timeseries_path(done_sim_path).exists(): + return SimulationStatus.DONE + + user_jobs = get_running_user_jobs_f() + if (self.geos_job_id, SlurmJobStatus.RUNNING) in user_jobs: + return SimulationStatus.RUNNING + + if (self.geos_job_id, SlurmJobStatus.COMPLETING) in user_jobs: + return SimulationStatus.COMPLETING + + if (self.copy_back_job_id, SlurmJobStatus.RUNNING) in user_jobs: + return SimulationStatus.COPY_BACK + + if (self.copy_job_id, SlurmJobStatus.RUNNING) in user_jobs: + return SimulationStatus.SCHEDULED + + return SimulationStatus.UNKNOWN + +@dataclass +class LauncherParams: + simulation_files_path: Optional[str] = None + simulation_cmd_filename: Optional[str] = None + simulation_job_name: Optional[str] = None + simulation_nb_process: int = 1 + + @classmethod + def from_server_state(cls, server_state: State) -> "LauncherParams": + state = cls() + for f in fields(cls): + setattr(state, f.name, server_state[f.name]) + return state + + def is_complete(self) -> bool: + return None not in [getattr(self, f.name) for f in fields(self)] + + def assert_is_complete(self) -> None: + if not self.is_complete(): + raise RuntimeError(f"Incomplete simulation launch parameters : {self}.") + + +def get_timestamp() -> str: + return datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S.%f")[:-3] + + +def get_simulation_output_file_name(timestamp: str, user_name: str = "user_name"): + return f"{user_name}_{timestamp}.json" + + +def parse_launcher_output(output: str) -> SimulationInformation: + split_output = output.split("\n") + + information = SimulationInformation() + information_dict = information.to_dict() # type: ignore + + content_to_parse = [ + ("Working directory: ", "working_directory"), + ("1. copy job id: ", "copy_job_id"), + ("2. geos job id: ", "geos_job_id"), + ("3. copy back job id: ", "copy_back_job_id"), + ("Run directory: ", "run_directory"), + ] + + for line in split_output: + for info_tuple in content_to_parse: + if info_tuple[0] in line: + split_line = line.split(info_tuple[0]) + if len(split_line) < 2: + continue + information_dict[info_tuple[1]] = split_line[-1] + + information_dict["timestamp"] = get_timestamp() + return SimulationInformation.from_dict(information_dict) # type: ignore + + +# def write_simulation_information_to_repo(info: SimulationInformation, sim_info_path: Path) -> Optional[Path]: +# return write_file( +# sim_info_path.as_posix(), +# get_simulation_output_file_name(info.timestamp, info.user_igg), +# json.dumps(info.to_dict()), # type: ignore +# ) + + +##TODO yay slurm +def get_launcher_command(launcher_params: LauncherParams) -> str: + launcher_cmd_args = ( + f"{SimulationConstant.SIMULATION_GEOS_PATH} " + f"--nprocs {launcher_params.simulation_nb_process} " + f"--fname {launcher_params.simulation_cmd_filename} " + f"--job_name {launcher_params.simulation_job_name}" + ) + + # state.simulation_nb_process is supposed to be an integer, but the UI present a VTextField, + # so if user changes it, then it can be defined as a str + if int(launcher_params.simulation_nb_process) > 1: + launcher_cmd_args += " --partition" + return launcher_cmd_args + + +# def get_simulation_screenshot_timestep(filename: str) -> int: +# """ +# From a given file name returns the time step. +# Filename is defined as: RenderView0_000000.png with 000000 the time step to parse and return +# """ +# if not filename: +# print("Simulation filename is not defined") +# return -1 + +# pattern = re.compile(r"RenderView[0-9]_[0-9]{6}\.png", re.IGNORECASE) +# if pattern.match(filename) is None: +# print("Simulation filename does not match the pattern: RenderView0_000000.png") +# return -1 + +# timestep = os.path.splitext(filename)[0].split("_")[-1] +# return int(timestep) if timestep else -1 + + +# def get_most_recent_file_from_list(files_list: list[str]) -> Optional[str]: +# if not files_list: +# return None +# return max(files_list, key=get_simulation_screenshot_timestep) + + +# def get_most_recent_simulation_screenshot(folder_path: Path) -> Optional[str]: +# return get_most_recent_file_from_list(os.listdir(folder_path)) if folder_path.exists() else None + + +class ISimRunner(ABC): + """ + Abstract interface for sim runner. + Provides methods to trigger simulation, get simulation output path and knowing if simulation is done or not. + """ + + @abstractmethod + def launch_simulation(self, launcher_params: LauncherParams) -> tuple[Path, SimulationInformation]: + pass + + @abstractmethod + def get_user_igg(self) -> str: + pass + + @abstractmethod + def get_running_user_jobs(self) -> list[tuple[str, SlurmJobStatus]]: + pass + + +class SimRunner(ISimRunner): + """ + Runs sim on HPC + """ + pass + +class Simulation: + """ + Simulation component. + Fills the UI with the screenshot as read from the simulation outputs folder and a graph with the time series + from the simulation. + + Requires a simulation runner providing information on the output path of the simulation to monitor and ways to + trigger the simulation. + """ + + def __init__(self, sim_runner: ISimRunner, server: Server, sim_info_dir: Optional[Path] = None) -> None: + self._server = server + self._sim_runner = sim_runner + self._sim_info_dir = sim_info_dir or SIMULATIONS_INFORMATION_FOLDER_PATH + + self._job_status_watcher: Optional[AsyncPeriodicRunner] = None + self._job_status_watcher_period_ms = 2000 + + self.start_result_streams() + + def __del__(self): + self.stop_result_streams() + + def set_status_watcher_period_ms(self, period_ms): + self._job_status_watcher_period_ms = period_ms + if self._job_status_watcher: + self._job_status_watcher.set_period_ms(period_ms) + + def _update_screenshot_display(self, screenshots_folder_path: Path) -> None: + newer_file = get_most_recent_simulation_screenshot(screenshots_folder_path) + if not newer_file: + return + + f_name = Path(newer_file).name + if not f_name: + return + + self._server.state.active_screenshot_folder_path = str(screenshots_folder_path) + self._server.state.dirty("active_screenshot_folder_path") + self._server.state.active_screenshot_relative_path = f_name + self._server.state.dirty("active_screenshot_relative_path") + self._server.state.flush() + + def _update_job_status(self) -> None: + sim_info = self.get_last_user_simulation_info() + job_status = sim_info.get_simulation_status(self._sim_runner.get_running_user_jobs) + sim_path = sim_info.get_simulation_dir(job_status) + + self._server.controller.set_simulation_status(job_status) + self._server.controller.set_simulation_time_stamp(sim_info.timestamp) + + self._update_screenshot_display(sim_info.get_screenshot_path(sim_path)) + self._update_plots(sim_info.get_timeseries_path(sim_path)) + + # Stop results stream if job is done + if job_status == SimulationStatus.DONE: + self.stop_result_streams() + + def get_last_user_simulation_info(self) -> SimulationInformation: + last_sim_information = self.get_last_information_path() + return SimulationInformation.from_file(last_sim_information) + + def get_last_information_path(self) -> Optional[Path]: + user_igg = self._sim_runner.get_user_igg() + + user_files = list(reversed(sorted(self._sim_info_dir.glob(f"{user_igg}*.json")))) + if not user_files: + return None + + return user_files[0] + + def stop_result_streams(self): + if self._job_status_watcher is not None: + self._job_status_watcher.stop() + + def start_result_streams(self) -> None: + self.stop_result_streams() + + self._job_status_watcher = AsyncPeriodicRunner( + self._update_job_status, period_ms=self._job_status_watcher_period_ms + ) + + def start_simulation(self) -> None: + state = self._server.state + script_path = None + try: + launcher_params = LauncherParams.from_server_state(self._server.state) + launcher_params.assert_is_complete() + + script_path, sim_info = self._sim_runner.launch_simulation(launcher_params) + self._write_sim_info(launcher_params, sim_info) + self.start_result_streams() + state.simulation_error = "" + except Exception as e: + print("Error occurred: ", e) + state.simulation_error = str(e) + finally: + state.avoid_rewriting = False + if isinstance(script_path, Path) and script_path.is_file(): + os.remove(script_path) + + def _write_sim_info(self, launcher_params: LauncherParams, sim_info: Optional[SimulationInformation]) -> None: + if sim_info is None: + raise RuntimeError("Error parsing simulation launcher output.") + + # Make sure to save the absolute path to the working directory used by the launcher in case parsed information + # is a relative Path + if not Path(sim_info.working_directory).is_absolute(): + sim_info.working_directory = path_to_string( + launcher_params.simulation_files_path + "/" + sim_info.working_directory + ) + print("simulation information", sim_info) + + sim_info.user_igg = self._sim_runner.get_user_igg() + write_simulation_information_to_repo(sim_info, self._sim_info_dir) diff --git a/geos-trame/src/geos/trame/app/ui/simulationStatusView.py b/geos-trame/src/geos/trame/app/ui/simulationStatusView.py new file mode 100644 index 000000000..84fc4d4b3 --- /dev/null +++ b/geos-trame/src/geos/trame/app/ui/simulationStatusView.py @@ -0,0 +1,78 @@ +from enum import Enum, auto, unique + +from trame_client.widgets.html import H3, Div +from trame_server import Server +from trame_vuetify.widgets.vuetify3 import VCard + +@unique +class SimulationStatus(Enum): + SCHEDULED = auto() + RUNNING = auto() + COMPLETING = auto() + COPY_BACK = auto() + DONE = auto() + NOT_RUN = auto() + UNKNOWN = auto() + + +class SimulationStatusView: + """ + Simple component containing simulation status in a VCard with some coloring depending on the status. + """ + + def __init__(self, server: Server): + def state_name(state_str): + return f"{type(self).__name__}_{state_str}_{id(self)}" + + self._text_state = state_name("text") + self._date_state = state_name("date") + self._time_state = state_name("time") + self._color_state = state_name("color") + self._state = server.state + + for s in [self._text_state, self._date_state, self._time_state, self._color_state]: + self._state.client_only(s) + + with VCard( + classes="p-8", + style=(f"`border: 4px solid ${{{self._color_state}}}; width: 300px; margin:auto; padding: 4px;`",), + ) as self.ui: + H3(f"{{{{{self._text_state}}}}}", style="text-align:center;") + Div(f"{{{{{self._date_state}}}}} {{{{{self._time_state}}}}}", style="text-align:center;") + + self.set_status(SimulationStatus.NOT_RUN) + self.set_time_stamp("") + + def set_status(self, status: SimulationStatus): + self._state[self._text_state] = status.name + self._state[self._color_state] = self.status_color(status) + self._state.flush() + + def set_time_stamp(self, time_stamp: str): + date, time = self.split_time_stamp(time_stamp) + self._state[self._time_state] = time + self._state[self._date_state] = date + self._state.flush() + + @staticmethod + def split_time_stamp(time_stamp: str) -> tuple[str, str]: + default_time_stamp = "", "" + if not time_stamp: + return default_time_stamp + + time_stamp = time_stamp.split("_") + if len(time_stamp) < 2: + return default_time_stamp + + return time_stamp[0].replace("-", "/"), time_stamp[1].split(".")[0].replace("-", ":") + + @staticmethod + def status_color(status: SimulationStatus) -> str: + return { + SimulationStatus.DONE: "#4CAF50", + SimulationStatus.RUNNING: "#3F51B5", + SimulationStatus.SCHEDULED: "#FFC107", + SimulationStatus.COMPLETING: "#C5E1A5", + SimulationStatus.COPY_BACK: "#C5E1A5", + SimulationStatus.UNKNOWN: "#E53935", + }.get(status, "#607D8B") \ No newline at end of file From 2d1c395322164d93695192ffb69d9eda9da5643c Mon Sep 17 00:00:00 2001 From: jacques franc Date: Mon, 17 Nov 2025 16:34:26 +0100 Subject: [PATCH 03/70] some more imports --- geos-trame/src/geos/trame/app/core.py | 6 + .../src/geos/trame/app/io/simulation.py | 51 +++++--- ...tatusView.py => simulation_status_view.py} | 0 .../trame/app/utils/async_file_watcher.py | 113 ++++++++++++++++++ 4 files changed, 156 insertions(+), 14 deletions(-) rename geos-trame/src/geos/trame/app/ui/{simulationStatusView.py => simulation_status_view.py} (100%) create mode 100644 geos-trame/src/geos/trame/app/utils/async_file_watcher.py diff --git a/geos-trame/src/geos/trame/app/core.py b/geos-trame/src/geos/trame/app/core.py index 0a8f40973..7020bbfe8 100644 --- a/geos-trame/src/geos/trame/app/core.py +++ b/geos-trame/src/geos/trame/app/core.py @@ -23,6 +23,7 @@ from geos.trame.app.ui.timeline import TimelineEditor from geos.trame.app.ui.viewer.viewer import DeckViewer from geos.trame.app.components.alertHandler import AlertHandler +from geos.trame.app.io.simulation import Simulation, SimRunner import sys @@ -38,6 +39,7 @@ def __init__( self, server: Server, file_name: str ) -> None: self.deckEditor: DeckEditor | None = None self.timelineEditor: TimelineEditor | None = None self.deckInspector: DeckInspector | None = None + self.simulationLauncher : Simulation | None = None self.server = server server.enable_module( module ) @@ -67,6 +69,10 @@ def __init__( self, server: Server, file_name: str ) -> None: self.region_viewer = RegionViewer() self.well_viewer = WellViewer( 5, 5 ) + # Simulation runner + self.sim_runner : SimRunner = SimRunner(self.state.user_id) + self.simulationLauncher = Simulation(self.sim_runner, server=server) + # Data loader self.data_loader = DataLoader( self.tree, self.region_viewer, self.well_viewer, trame_server=server ) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 77178bdb1..01b09d6d6 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -3,11 +3,12 @@ from pathlib import Path from dataclasses import dataclass, field, fields from enum import Enum, unique -from geos.trame.app.ui.simulationStatusView import SimulationStatus -from typing import Callable, Optional +from geos.trame.app.ui.simulation_status_view import SimulationStatus +from typing import Callable, Optional, Union import datetime from trame_server.core import Server from trame_server.state import State +from geos.trame.app.utils.async_file_watcher import AsyncPeriodicRunner #TODO move outside #TODO use Jinja on real launcher @@ -230,20 +231,20 @@ def set_status_watcher_period_ms(self, period_ms): if self._job_status_watcher: self._job_status_watcher.set_period_ms(period_ms) - def _update_screenshot_display(self, screenshots_folder_path: Path) -> None: - newer_file = get_most_recent_simulation_screenshot(screenshots_folder_path) - if not newer_file: - return + # def _update_screenshot_display(self, screenshots_folder_path: Path) -> None: + # newer_file = get_most_recent_simulation_screenshot(screenshots_folder_path) + # if not newer_file: + # return - f_name = Path(newer_file).name - if not f_name: - return + # f_name = Path(newer_file).name + # if not f_name: + # return - self._server.state.active_screenshot_folder_path = str(screenshots_folder_path) - self._server.state.dirty("active_screenshot_folder_path") - self._server.state.active_screenshot_relative_path = f_name - self._server.state.dirty("active_screenshot_relative_path") - self._server.state.flush() + # self._server.state.active_screenshot_folder_path = str(screenshots_folder_path) + # self._server.state.dirty("active_screenshot_folder_path") + # self._server.state.active_screenshot_relative_path = f_name + # self._server.state.dirty("active_screenshot_relative_path") + # self._server.state.flush() def _update_job_status(self) -> None: sim_info = self.get_last_user_simulation_info() @@ -317,3 +318,25 @@ def _write_sim_info(self, launcher_params: LauncherParams, sim_info: Optional[Si sim_info.user_igg = self._sim_runner.get_user_igg() write_simulation_information_to_repo(sim_info, self._sim_info_dir) + + +def path_to_string(p: Union[str, Path]) -> str: + return Path(p).as_posix() + +def write_simulation_information_to_repo(info: SimulationInformation, sim_info_path: Path) -> Optional[Path]: + return write_file( + sim_info_path.as_posix(), + get_simulation_output_file_name(info.timestamp, info.user_igg), + json.dumps(info.to_dict()), # type: ignore + ) + +def write_file(folder_path: str, filename: str, file_content: str) -> Optional[Path]: + try: + Path(folder_path).mkdir(exist_ok=True) + file_path = Path(f"{folder_path}/{filename}") + with open(file_path, "w") as f: + f.write(file_content) + return file_path.absolute() + except Exception as e: + print("error occurred when copying file to", folder_path, e) + return None \ No newline at end of file diff --git a/geos-trame/src/geos/trame/app/ui/simulationStatusView.py b/geos-trame/src/geos/trame/app/ui/simulation_status_view.py similarity index 100% rename from geos-trame/src/geos/trame/app/ui/simulationStatusView.py rename to geos-trame/src/geos/trame/app/ui/simulation_status_view.py diff --git a/geos-trame/src/geos/trame/app/utils/async_file_watcher.py b/geos-trame/src/geos/trame/app/utils/async_file_watcher.py new file mode 100644 index 000000000..d5ad532f4 --- /dev/null +++ b/geos-trame/src/geos/trame/app/utils/async_file_watcher.py @@ -0,0 +1,113 @@ +import asyncio +import os +from asyncio import CancelledError, ensure_future +from io import TextIOWrapper +from pathlib import Path +from typing import Callable, Optional, Union + +from trame_server.utils import asynchronous + + +class AsyncPeriodicRunner: + """ + While started, runs given callback at given period. + """ + + def __init__(self, callback: Callable, period_ms=100): + self.last_m_time = None + self.callback = callback + self.period_ms = period_ms + self.task = None + self.start() + + def __del__(self): + self.stop() + + def set_period_ms(self, period_ms): + self.period_ms = period_ms + + def start(self): + self.stop() + self.task = asynchronous.create_task(self._runner()) + + def stop(self): + if not self.task: + return + + ensure_future(self._wait_for_cancel()) + + async def _wait_for_cancel(self): + """ + Cancel and await cancel error for the task. + If cancel is done outside async, it may raise warnings as cancelled exception may be triggered outside async + loop. + """ + if not self.task or self.task.done() or self.task.cancelled(): + self.task = None + return + + try: + self.task.cancel() + await self.task + except CancelledError: + self.task = None + + async def _runner(self): + while True: + self.callback() + await asyncio.sleep(self.period_ms / 1000.0) + + +class AsyncFileWatcher(AsyncPeriodicRunner): + def __init__(self, path_to_watch: Path, on_modified_callback: Callable, check_time_out_ms=100): + super().__init__(self._check_modified, check_time_out_ms) + self.path_to_watch = Path(path_to_watch) + self.last_m_time = None + self.on_modified_callback = on_modified_callback + + def get_m_time(self): + if not self.path_to_watch.exists(): + return None + return os.stat(self.path_to_watch).st_mtime + + def _check_modified(self): + if self.get_m_time() != self.last_m_time: + self.last_m_time = self.get_m_time() + self.on_modified_callback() + + +class AsyncSubprocess: + def __init__( + self, + args, + timeout: Union[float, None] = None, + ) -> None: + self.args = args + self.timeout = timeout + self._writer: Optional[TextIOWrapper] = None + + self.stdout: Optional[bytes] = None + self.stderr: Optional[bytes] = None + self.process: Optional[asyncio.subprocess.Process] = None + self.exception: Optional[RuntimeError] = None + + async def run(self) -> None: + cmd = " ".join(map(str, self.args)) + self.process = await self._init_subprocess(cmd) + + try: + self.stdout, self.stderr = await asyncio.wait_for(self.process.communicate(), timeout=self.timeout) + except asyncio.exceptions.TimeoutError: + self.process.kill() + self.stdout, self.stderr = await self.process.communicate() + self.exception = RuntimeError("Process timed out") + finally: + if self.process.returncode != 0: + self.exception = RuntimeError(f"Process exited with code {self.process.returncode}") + + async def _init_subprocess(self, cmd: str) -> asyncio.subprocess.Process: + return await asyncio.create_subprocess_shell( + cmd=cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) From 46899b4483a2b736ca28952607d64ba2ca2f244a Mon Sep 17 00:00:00 2001 From: jacques franc Date: Tue, 18 Nov 2025 15:18:31 +0100 Subject: [PATCH 04/70] wip --- geos-trame/src/geos/trame/app/core.py | 57 +++-- .../src/geos/trame/app/io/simulation.py | 221 ++++++++++++++++-- geos-trame/src/geos/trame/app/main.py | 3 + .../src/geos/trame/app/ui/simulation_view.py | 141 +++++++++++ geos-trame/src/geos/trame/app/ui/timeline.py | 26 +-- geos-trame/src/geos/trame/assets/cluster.json | 24 ++ 6 files changed, 421 insertions(+), 51 deletions(-) create mode 100644 geos-trame/src/geos/trame/app/ui/simulation_view.py create mode 100644 geos-trame/src/geos/trame/assets/cluster.json diff --git a/geos-trame/src/geos/trame/app/core.py b/geos-trame/src/geos/trame/app/core.py index 7020bbfe8..17fdd41a3 100644 --- a/geos-trame/src/geos/trame/app/core.py +++ b/geos-trame/src/geos/trame/app/core.py @@ -23,7 +23,12 @@ from geos.trame.app.ui.timeline import TimelineEditor from geos.trame.app.ui.viewer.viewer import DeckViewer from geos.trame.app.components.alertHandler import AlertHandler + + from geos.trame.app.io.simulation import Simulation, SimRunner +from geos.trame.app.ui.simulation_view import define_simulation_view + + import sys @@ -44,6 +49,7 @@ def __init__( self, server: Server, file_name: str ) -> None: server.enable_module( module ) self.state.input_file = file_name + self.state.user_id = None # TODO handle hot_reload @@ -69,9 +75,9 @@ def __init__( self, server: Server, file_name: str ) -> None: self.region_viewer = RegionViewer() self.well_viewer = WellViewer( 5, 5 ) - # Simulation runner + ######## Simulation runner self.sim_runner : SimRunner = SimRunner(self.state.user_id) - self.simulationLauncher = Simulation(self.sim_runner, server=server) + self.simulation = Simulation(self.sim_runner, server=server) # Data loader self.data_loader = DataLoader( self.tree, self.region_viewer, self.well_viewer, trame_server=server ) @@ -183,23 +189,23 @@ def build_ui( self ) -> None: ): vuetify.VIcon( "mdi-content-save-outline" ) - with html.Div( - style= - "height: 100%; width: 300px; display: flex; align-items: center; justify-content: space-between;", - v_if=( "tab_idx == 1", ), - ): - vuetify.VBtn( - "Run", - style="z-index: 1;", - ) - vuetify.VBtn( - "Kill", - style="z-index: 1;", - ) - vuetify.VBtn( - "Clear", - style="z-index: 1;", - ) + # with html.Div( + # style= + # "height: 100%; width: 300px; display: flex; align-items: center; justify-content: space-between;", + # v_if=( "tab_idx == 1", ), + # ): + # vuetify.VBtn( + # "Run", + # style="z-index: 1;", + # ) + # vuetify.VBtn( + # "Kill", + # style="z-index: 1;", + # ) + # vuetify.VBtn( + # "Clear", + # style="z-index: 1;", + # ) # input file editor with vuetify.VCol( v_show=( "tab_idx == 0", ), classes="flex-grow-1 pa-0 ma-0" ): @@ -214,3 +220,16 @@ def build_ui( self ) -> None: "The file " + self.state.input_file + " cannot be parsed.", file=sys.stderr, ) + + with vuetify.VCol( v_show=( "tab_idx == 1"), classes="flex-grow-1 pa-0 ma-0") : + if self.simulation is not None: + define_simulation_view(self.server) + else: + self.ctrl.on_add_error( + "Error", + "The execution context " + self.state.exec_context + " is not consistent.", + ) + print( + "The execution context " + self.state.exec_context + " is not consistent.", + file=sys.stderr, + ) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 01b09d6d6..4a108a18a 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -10,13 +10,46 @@ from trame_server.state import State from geos.trame.app.utils.async_file_watcher import AsyncPeriodicRunner +import jinja2 +import paramiko + #TODO move outside #TODO use Jinja on real launcher @dataclass(frozen=True) class SimulationConstant: - SIMULATION_GEOS_PATH = "/some/path/" - SIMULATION_MACHINE_NAME = "p4log01" # Only run on P4 machine + SIMULATION_GEOS_PATH = "/workrd/users/" + HOST = "p4log01" # Only run on P4 machine + PORT = 22 + SIMULATIONS_INFORMATION_FOLDER_PATH= "/workrd/user" + SIMULATION_DEFAULT_FILE_NAME="geosDeck.xml" + +class Authentificator:#namespacing more than anything eler + + @staticmethod + def get_key(login:str, passphrase = "trameisrunning"): + + try: + PRIVATE_KEY = paramiko.RSAKey.from_private_key_file("~/.ssh/id_trame") + except paramiko.SSHException as e: + print(f"Error loading private key: {e}\n") + except FileNotFoundError as e: + print(f"Private key not found: {e}\n Generating key ...") + PRIVATE_KEY = Authentificator.gen_key(login, SimulationConstant.HOST, passphrase) + return PRIVATE_KEY + + return PRIVATE_KEY + + @staticmethod + def gen_key(login:str, host: str, passphrase: str): + file_path = "~/.ssh/id_trame" + cmd = f"ssh-keygen -t rsa -b 4096 -C {login}@{host} -f {file_path} -N \"{passphrase}\" " + import subprocess + print(f"Running: {''.join(cmd)}") + subprocess.run(cmd, shell=True) + print(f"SSH key generated at: {file_path}") + print(f"Public key: {file_path}.pub") + SIMULATION_DEFAULT_FILE_NAME = "geosDeck.xml" @unique class SlurmJobStatus(Enum): @@ -183,25 +216,170 @@ class ISimRunner(ABC): Abstract interface for sim runner. Provides methods to trigger simulation, get simulation output path and knowing if simulation is done or not. """ + pass + # @abstractmethod + # def launch_simulation(self, launcher_params: LauncherParams) -> tuple[Path, SimulationInformation]: + # pass - @abstractmethod - def launch_simulation(self, launcher_params: LauncherParams) -> tuple[Path, SimulationInformation]: - pass - - @abstractmethod - def get_user_igg(self) -> str: - pass + # @abstractmethod + # def get_user_igg(self) -> str: + # pass - @abstractmethod - def get_running_user_jobs(self) -> list[tuple[str, SlurmJobStatus]]: - pass + # @abstractmethod + # def get_running_user_jobs(self) -> list[tuple[str, SlurmJobStatus]]: + # pass class SimRunner(ISimRunner): """ - Runs sim on HPC + Runs sim on HPC. Wrap paramiko use """ - pass + + def __init__(self, user): + super().__init__() + + ssh_client = self._create_ssh_client(SimulationConstant.HOST, SimulationConstant.PORT, username=user, key=Authentificator.get_key(user)) + print(ssh_client) + + # early test + self.local_upload_file = "test_upload.txt" + import time + with open(self.local_upload_file, "w") as f: + f.write(f"This file was uploaded at {time.ctime()}\n") + print(f"Created local file: {self.local_upload_file}") + + @staticmethod + def _create_ssh_client( host, port, username, password=None, key=None): + """ + Initializes and returns an SSH client connection. + Uses context manager for automatic cleanup. + """ + client = paramiko.SSHClient() + # Automatically adds the hostname and new host keys to the host files (~/.ssh/known_hosts) + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + try: + if key: + print(f"Connecting to {host} using key-based authentication...") + client.connect(host, port, username, pkey=key, timeout=10) + else: + raise paramiko.SSHException("No Key Found") + + return client + except paramiko.AuthenticationException: + print("Authentication failed. Check your credentials or key.") + return None + except paramiko.SSHException as e: + print(f"Could not establish SSH connection: {e}") + return None + except Exception as e: + print(f"An unexpected error occurred: {e}") + return None + + + @staticmethod + def _execute_remote_command(client, command): + """ + Executes a single command on the remote server and prints the output. + """ + if not client: + return + + print(f"\n--- Executing Command: '{command}' ---") + try: + # Executes the command. stdin, stdout, and stderr are file-like objects. + # Ensure command ends with a newline character for some shell environments. + stdin, stdout, stderr = client.exec_command(command) + + # Wait for the command to finish and read the output + exit_status = stdout.channel.recv_exit_status() + + # Print standard output + stdout_data = stdout.read().decode().strip() + if stdout_data: + print("STDOUT:") + print(stdout_data) + + # Print standard error (if any) + stderr_data = stderr.read().decode().strip() + if stderr_data: + print("STDERR:") + print(stderr_data) + + print(f"Command exited with status: {exit_status}") + return exit_status + + except Exception as e: + print(f"Error executing command: {e}") + return -1 + + @staticmethod + def _transfer_file_sftp(client, local_path, remote_path, direction="put"): + """ + Transfers a file using SFTP (Secure File Transfer Protocol). + Direction can be 'put' (upload) or 'get' (download). + """ + if not client: + return + + print(f"\n--- Starting SFTP Transfer ({direction.upper()}) ---") + + try: + # Establish an SFTP connection session + sftp = client.open_sftp() + + if direction == "put": + print(f"Uploading '{local_path}' to '{remote_path}'...") + sftp.put(local_path, remote_path) + print("Upload complete.") + elif direction == "get": + print(f"Downloading '{remote_path}' to '{local_path}'...") + sftp.get(remote_path, local_path) + print("Download complete.") + else: + print("Invalid transfer direction. Use 'put' or 'get'.") + + sftp.close() + return True + + except FileNotFoundError: + print(f"Error: Local file '{local_path}' not found.") + return False + except IOError as e: + print(f"Error accessing remote file or path: {e}") + return False + except Exception as e: + print(f"An error occurred during SFTP: {e}") + return False + + + def launch_simulation(self): + + if self.ssh_client: + try: + # --- 3. Execute a Remote Command --- + self._execute_remote_command(self.ssh_client, "ls -l /tmp") + + # --- 4. Upload a File (PUT) --- + remote_path_upload = f"/tmp/{self.local_upload_file}" + self._transfer_file_sftp(self.ssh_client, self.local_upload_file, remote_path_upload, direction="put") + + # --- 5. Verify Upload by Listing Remote Directory --- + self._execute_remote_command(self.ssh_client, f"ls -l /tmp") + + # --- 6. Download a File (GET) --- + remote_download_file = f"/workrd/{self.local_upload_file}" # Use a known remote file + local_download_file = "downloaded_hostname.txt" + self._transfer_file_sftp(self.ssh_client, local_download_file, remote_download_file, direction="get") + + # --- 7. Clean up the uploaded file (Optional) --- + self._execute_remote_command(self.ssh_client, f"rm {remote_path_upload}") + + finally: + # --- 8. Close the connection --- + self.ssh_client.close() + print("\nSSH Connection closed.") + class Simulation: """ @@ -216,7 +394,7 @@ class Simulation: def __init__(self, sim_runner: ISimRunner, server: Server, sim_info_dir: Optional[Path] = None) -> None: self._server = server self._sim_runner = sim_runner - self._sim_info_dir = sim_info_dir or SIMULATIONS_INFORMATION_FOLDER_PATH + self._sim_info_dir = sim_info_dir or SimulationConstant.SIMULATIONS_INFORMATION_FOLDER_PATH self._job_status_watcher: Optional[AsyncPeriodicRunner] = None self._job_status_watcher_period_ms = 2000 @@ -235,6 +413,10 @@ def set_status_watcher_period_ms(self, period_ms): # newer_file = get_most_recent_simulation_screenshot(screenshots_folder_path) # if not newer_file: # return + # def _update_screenshot_display(self, screenshots_folder_path: Path) -> None: + # newer_file = get_most_recent_simulation_screenshot(screenshots_folder_path) + # if not newer_file: + # return # f_name = Path(newer_file).name # if not f_name: @@ -279,11 +461,12 @@ def stop_result_streams(self): self._job_status_watcher.stop() def start_result_streams(self) -> None: - self.stop_result_streams() + pass + # self.stop_result_streams() - self._job_status_watcher = AsyncPeriodicRunner( - self._update_job_status, period_ms=self._job_status_watcher_period_ms - ) + # self._job_status_watcher = AsyncPeriodicRunner( + # self._update_job_status, period_ms=self._job_status_watcher_period_ms + # ) def start_simulation(self) -> None: state = self._server.state diff --git a/geos-trame/src/geos/trame/app/main.py b/geos-trame/src/geos/trame/app/main.py index 2ad3b293a..5840dbeb7 100644 --- a/geos-trame/src/geos/trame/app/main.py +++ b/geos-trame/src/geos/trame/app/main.py @@ -7,6 +7,9 @@ from trame.app import get_server # type: ignore from trame_server import Server +import sys +sys.path.insert(0,"/data/pau901/SIM_CS/users/jfranc/geosPythonPackages/geos-trame/src") + from geos.trame.app.core import GeosTrame diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py new file mode 100644 index 000000000..b6b35282f --- /dev/null +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -0,0 +1,141 @@ +from trame.widgets import html +from trame.widgets import vuetify3 as vuetify + +from geos.trame.app.io.simulation import SimulationConstant +from geos.trame.app.ui.simulation_status_view import SimulationStatusView + + +def hint_config(): + + return ["P4: 1x12", "P4: 2x6"] + + +def define_simulation_view(server) -> None: + with vuetify.VContainer(): + with vuetify.VRow(): + with vuetify.VCol(cols=4): + vuetify.VTextField( + v_model=("login", None,), + label="Login", + dense=True, + hide_details=True, + clearable=True, + prepend_icon="mdi-login" + ) + with vuetify.VCol(cols=4): + vuetify.VTextField( + v_model=("password", None,), + label="Password", + type="password", + dense=True, + hide_details=True, + clearable=True, + prepend_icon="mdi-onepassword" + ) + + # + items = hint_config() + vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") + with vuetify.VCol(cols=2): + vuetify.VSelect(label="Cluster", + items=("items",items)) + + with vuetify.VRow(): + with vuetify.VCol(cols=8): + vuetify.VFileInput( + v_model=("key_path", None,), + label="Path to ssh key", + dense=True, + hide_details=True, + clearable=True, + prepend_icon="mdi-key-chain-variant" + ) + + # + vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") + with vuetify.VCol(cols=2): + vuetify.VBtn("Log in", click="trigger('run_try_logging')"), # type: ignore + + + vuetify.VDivider(thickness=5, classes="my-4") + + with vuetify.VRow(): + with vuetify.VCol(): + vuetify.VFileInput( + v_model=("simulation_cmd_filename", SimulationConstant.SIMULATION_DEFAULT_FILE_NAME), + label="Simulation file name", + dense=True, + hide_details=True, + clearable=True, + ) + # with vuetify.VCol(cols=1): + # vuetify.VFileInput( + # v_model=("cmd_file", None), + # prepend_icon="mdi-file-upload-outline", + # hide_input=True, + # style="padding: 0;", + # disabled=("!simulation_files_path",), + # ) + + with vuetify.VRow(), vuetify.VCol(): + vuetify.VTextField( + v_model=( + "simulation_files_path", + None, + ), + label="Path where to write files and launch code", + prepend_icon="mdi-upload", + dense=True, + hide_details=True, + clearable=True, + # TODO callback validation of path + ) + + with vuetify.VRow(), vuetify.VCol(): + # must_be_greater_than_0 = ( + # "[value => Number.isInteger(Number(value)) && value > 0 || 'Must be an integer greater than 0']" + # ) + # vuetify.VTextField( + # v_model=("simulation_nb_process", 1), + # label="Processes number", + # dense=True, + # hide_details=True, + # clearable=True, + # rules=(must_be_greater_than_0,), + # ) + vuetify.VTextField( + v_model=("simulation_dl_path",), + label="Simulation download path", + dense=True, + clearable=True, + prepend_icon="mdi-download", + # TODO callback validation of path + ) + + with vuetify.VRow(), vuetify.VCol(): + vuetify.VTextField( + v_model=("simulation_job_name", "geosJob"), + label="Job Name", + dense=True, + hide_details=True, + clearable=True, + ) + with vuetify.VRow(): + vuetify.VSpacer() + with vuetify.VCol(cols=1): + vuetify.VBtn("Run", click="trigger('run_simulation')"), # type: ignore + with vuetify.VCol(cols=1): + vuetify.VBtn("Kill", click="trigger('kill_simulation')"), # type: ignore + # with vuetify.VCol(cols=1): + # vuetify.VBtn("Clear", click="trigger('clear_simulation')"), # type: ignore + + vuetify.VDivider(thickness=5, classes="my-4") + + with vuetify.VRow(): + with vuetify.VCol(cols=2): + SimulationStatusView(server=server) + + + + with vuetify.VRow(v_if="simulation_error"): + html.Div("An error occurred while running simulation :
{{simulation_error}}", style="color:red;") diff --git a/geos-trame/src/geos/trame/app/ui/timeline.py b/geos-trame/src/geos/trame/app/ui/timeline.py index d6961c0ed..6d3559f9f 100644 --- a/geos-trame/src/geos/trame/app/ui/timeline.py +++ b/geos-trame/src/geos/trame/app/ui/timeline.py @@ -3,7 +3,7 @@ # SPDX-FileContributor: Lionel Untereiner from typing import Any -from trame.widgets import gantt +# from trame.widgets import gantt from trame.widgets import vuetify3 as vuetify from trame_simput import get_simput_manager @@ -72,18 +72,18 @@ def __init__( self, source: DeckTree, **kwargs: Any ) -> None: vuetify.VAlert( "{{ item.summary }}" ) vuetify.Template( "{{ item.start_date }}", raw_attrs=[ "v-slot:opposite" ] ) - with vuetify.VContainer( "Events chart" ): - gantt.Gantt( - canEdit=True, - dateLimit=30, - startDate="2024-11-01 00:00", - endDate="2024-12-01 00:00", - # title='Gantt-pre-test', - fields=fields, - update=( self.update_from_js, "items" ), - items=( "items", items ), - classes="fill_height", - ) + # with vuetify.VContainer( "Events chart" ): + # gantt.Gantt( + # canEdit=True, + # dateLimit=30, + # startDate="2024-11-01 00:00", + # endDate="2024-12-01 00:00", + # # title='Gantt-pre-test', + # fields=fields, + # update=( self.update_from_js, "items" ), + # items=( "items", items ), + # classes="fill_height", + # ) def update_from_js( self, *items: tuple ) -> None: """Update method called from javascript.""" diff --git a/geos-trame/src/geos/trame/assets/cluster.json b/geos-trame/src/geos/trame/assets/cluster.json new file mode 100644 index 000000000..e3ba6a238 --- /dev/null +++ b/geos-trame/src/geos/trame/assets/cluster.json @@ -0,0 +1,24 @@ +{ + "clusters": [ + { + "name": "p4", + "simulation_default_path": "/www", + "geos_version_default": "daily_rhel", + "simulation_information_default_path": "/www", + "simulation_default_filename": "geosDeck.xml", + "n_nodes": 20, + "cpu": { "types": ["Intel Xeon"], "per_node": 64 }, + "gpu": { "types": ["NVIDIA A100"], "per_node": 8 } + }, + { + "name": "elba", + "n_nodes": 10, + "simulation_default_path": "/www", + "geos_version_default": "daily_rhel", + "simulation_information_default_path": "/www", + "simulation_default_filename": "geosDeck.xml", + "cpu": { "types": ["AMD EPYC"], "per_node": 32 }, + "gpu": { "types": ["NVIDIA V100"],"per_node": 4 } + } + ] +} From e9dd40db86b02aba3b6d37089b6127c9bbc60818 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 19 Nov 2025 11:11:12 +0100 Subject: [PATCH 05/70] wip --- .../src/geos/trame/app/ui/simulation_view.py | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index b6b35282f..69347bcf9 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -34,6 +34,7 @@ def define_simulation_view(server) -> None: ) # + access_granted = False # link to login button callback run_try_logging results items = hint_config() vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") with vuetify.VCol(cols=2): @@ -67,6 +68,7 @@ def define_simulation_view(server) -> None: dense=True, hide_details=True, clearable=True, + disabled=("!access_granted") ) # with vuetify.VCol(cols=1): # vuetify.VFileInput( @@ -88,27 +90,18 @@ def define_simulation_view(server) -> None: dense=True, hide_details=True, clearable=True, + disabled=("!access_granted") # TODO callback validation of path ) with vuetify.VRow(), vuetify.VCol(): - # must_be_greater_than_0 = ( - # "[value => Number.isInteger(Number(value)) && value > 0 || 'Must be an integer greater than 0']" - # ) - # vuetify.VTextField( - # v_model=("simulation_nb_process", 1), - # label="Processes number", - # dense=True, - # hide_details=True, - # clearable=True, - # rules=(must_be_greater_than_0,), - # ) vuetify.VTextField( v_model=("simulation_dl_path",), label="Simulation download path", dense=True, clearable=True, prepend_icon="mdi-download", + disabled=("!access_granted") # TODO callback validation of path ) @@ -119,6 +112,7 @@ def define_simulation_view(server) -> None: dense=True, hide_details=True, clearable=True, + disabled=("!access_granted") ) with vuetify.VRow(): vuetify.VSpacer() @@ -126,8 +120,6 @@ def define_simulation_view(server) -> None: vuetify.VBtn("Run", click="trigger('run_simulation')"), # type: ignore with vuetify.VCol(cols=1): vuetify.VBtn("Kill", click="trigger('kill_simulation')"), # type: ignore - # with vuetify.VCol(cols=1): - # vuetify.VBtn("Clear", click="trigger('clear_simulation')"), # type: ignore vuetify.VDivider(thickness=5, classes="my-4") @@ -135,7 +127,5 @@ def define_simulation_view(server) -> None: with vuetify.VCol(cols=2): SimulationStatusView(server=server) - - with vuetify.VRow(v_if="simulation_error"): html.Div("An error occurred while running simulation :
{{simulation_error}}", style="color:red;") From 218c735247344f17d50fa7351585a8afe835e65a Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 19 Nov 2025 11:20:49 +0100 Subject: [PATCH 06/70] wip --- geos-trame/src/geos/trame/app/ui/simulation_view.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 69347bcf9..f130d96b2 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -70,14 +70,6 @@ def define_simulation_view(server) -> None: clearable=True, disabled=("!access_granted") ) - # with vuetify.VCol(cols=1): - # vuetify.VFileInput( - # v_model=("cmd_file", None), - # prepend_icon="mdi-file-upload-outline", - # hide_input=True, - # style="padding: 0;", - # disabled=("!simulation_files_path",), - # ) with vuetify.VRow(), vuetify.VCol(): vuetify.VTextField( @@ -114,6 +106,10 @@ def define_simulation_view(server) -> None: clearable=True, disabled=("!access_granted") ) + + + vuetify.VDivider(thickness=5, classes="my-4") + with vuetify.VRow(): vuetify.VSpacer() with vuetify.VCol(cols=1): @@ -121,7 +117,6 @@ def define_simulation_view(server) -> None: with vuetify.VCol(cols=1): vuetify.VBtn("Kill", click="trigger('kill_simulation')"), # type: ignore - vuetify.VDivider(thickness=5, classes="my-4") with vuetify.VRow(): with vuetify.VCol(cols=2): From 8a361d216901b1e3b7e05dbcfd7e2889c9f4be3c Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 19 Nov 2025 15:47:50 +0100 Subject: [PATCH 07/70] wip --- .../src/geos/trame/app/io/simulation.py | 4 +- .../src/geos/trame/app/ui/simulation_view.py | 106 +++++++++++++++--- geos-trame/src/geos/trame/assets/cluster.json | 16 +-- 3 files changed, 94 insertions(+), 32 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 4a108a18a..434e612f2 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -21,10 +21,10 @@ class SimulationConstant: SIMULATION_GEOS_PATH = "/workrd/users/" HOST = "p4log01" # Only run on P4 machine PORT = 22 - SIMULATIONS_INFORMATION_FOLDER_PATH= "/workrd/user" + SIMULATIONS_INFORMATION_FOLDER_PATH= "/workrd/users/" SIMULATION_DEFAULT_FILE_NAME="geosDeck.xml" -class Authentificator:#namespacing more than anything eler +class Authentificator:#namespacing more than anything else @staticmethod def get_key(login:str, passphrase = "trameisrunning"): diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index f130d96b2..c225512db 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -3,11 +3,77 @@ from geos.trame.app.io.simulation import SimulationConstant from geos.trame.app.ui.simulation_status_view import SimulationStatusView - - -def hint_config(): - - return ["P4: 1x12", "P4: 2x6"] +import json + +def suggest_decomposition(n_unknowns, + memory_per_unknown_bytes, + node_memory_gb, + cores_per_node, + min_unknowns_per_rank=10000, + strong_scaling=True): + """ + Suggests node/rank distribution for a cluster computation. + + Parameters: + - n_unknowns: total number of unknowns + - memory_per_unknown_bytes: estimated memory per unknown + - node_memory_gb: available memory per node + - cores_per_node: cores available per node + - min_unknowns_per_rank: minimum for efficiency + - strong_scaling: True if problem size is fixed + + Note: + - 10,000-100,000 unknowns per rank is often a sweet spot for many PDE solvers + - Use power-of-2 decompositions when possible (helps with communication patterns) + - For 3D problems, try to maintain cubic subdomains (minimizes surface-to-volume ratio, reducing communication) + - Don't oversubscribe: avoid using more ranks than provide parallel efficiency + + """ + + # Memory constraint + node_memory_bytes = node_memory_gb * 1e9 + max_unknowns_per_node = int(0.8 * node_memory_bytes / memory_per_unknown_bytes) + + # Compute minimum nodes needed + min_nodes = max(1, (n_unknowns + max_unknowns_per_node - 1) // max_unknowns_per_node) + + # Determine ranks per node + unknowns_per_node = n_unknowns // min_nodes + unknowns_per_rank = max(min_unknowns_per_rank, unknowns_per_node // cores_per_node) + + # Calculate total ranks needed + n_ranks = max(1, n_unknowns // unknowns_per_rank) + + # Distribute across nodes + ranks_per_node = min(cores_per_node, (n_ranks + min_nodes - 1) // min_nodes) + n_nodes = (n_ranks + ranks_per_node - 1) // ranks_per_node + + return { + 'nodes': n_nodes, + 'ranks_per_node': ranks_per_node, + 'total_ranks': n_nodes * ranks_per_node, + 'unknowns_per_rank': n_unknowns // (n_nodes * ranks_per_node) + } + +def hint_config(cluster_name, n_unknowns, job_type = 'cpu'): + + # return ["P4: 1x22", "P4: 2x11"] + with open('/data/pau901/SIM_CS/04_WORKSPACE/USERS/jfranc/geosPythonPackages/geos-trame/src/geos/trame/assets/cluster.json','r') as file: + all_cluster = json.load(file) + selected_cluster = list(filter(lambda d: d.get('name')==cluster_name, all_cluster["clusters"]))[0] + + if job_type == 'cpu': #make it an enum + sd = suggest_decomposition(n_unknowns, + 64, + selected_cluster['mem_per_node'], + selected_cluster['cpu']['per_node'] + ) + # elif job_type == 'gpu': + # selected_cluster['n_nodes']*selected_cluster['gpu']['per_node'] + + + return [ f"{selected_cluster['name']}: {sd['nodes']} x {sd['ranks_per_node']}", f"{selected_cluster['name']}: {sd['nodes'] * 2} x {sd['ranks_per_node'] // 2}" ] + def define_simulation_view(server) -> None: @@ -35,7 +101,7 @@ def define_simulation_view(server) -> None: # access_granted = False # link to login button callback run_try_logging results - items = hint_config() + items = hint_config('p4', 12e6) vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") with vuetify.VCol(cols=2): vuetify.VSelect(label="Cluster", @@ -97,23 +163,29 @@ def define_simulation_view(server) -> None: # TODO callback validation of path ) - with vuetify.VRow(), vuetify.VCol(): - vuetify.VTextField( - v_model=("simulation_job_name", "geosJob"), - label="Job Name", - dense=True, - hide_details=True, - clearable=True, - disabled=("!access_granted") - ) + with vuetify.VRow(): + with vuetify.VCol(cols=4): + vuetify.VTextField( + v_model=("simulation_job_name", "geosJob"), + label="Job Name", + dense=True, + hide_details=True, + clearable=True, + disabled=("!access_granted") + ) + + vuetify.VSpacer() + with vuetify.VCol(cols=1): + vuetify.VBtn("Run", + click="trigger('run_simulation')", + disabled=("!access_granted"), + classes="ml-auto"), # type: ignore vuetify.VDivider(thickness=5, classes="my-4") with vuetify.VRow(): vuetify.VSpacer() - with vuetify.VCol(cols=1): - vuetify.VBtn("Run", click="trigger('run_simulation')"), # type: ignore with vuetify.VCol(cols=1): vuetify.VBtn("Kill", click="trigger('kill_simulation')"), # type: ignore diff --git a/geos-trame/src/geos/trame/assets/cluster.json b/geos-trame/src/geos/trame/assets/cluster.json index e3ba6a238..d8bfa4e3c 100644 --- a/geos-trame/src/geos/trame/assets/cluster.json +++ b/geos-trame/src/geos/trame/assets/cluster.json @@ -6,19 +6,9 @@ "geos_version_default": "daily_rhel", "simulation_information_default_path": "/www", "simulation_default_filename": "geosDeck.xml", - "n_nodes": 20, - "cpu": { "types": ["Intel Xeon"], "per_node": 64 }, - "gpu": { "types": ["NVIDIA A100"], "per_node": 8 } - }, - { - "name": "elba", - "n_nodes": 10, - "simulation_default_path": "/www", - "geos_version_default": "daily_rhel", - "simulation_information_default_path": "/www", - "simulation_default_filename": "geosDeck.xml", - "cpu": { "types": ["AMD EPYC"], "per_node": 32 }, - "gpu": { "types": ["NVIDIA V100"],"per_node": 4 } + "n_nodes": 212, + "cpu": { "types": ["AMD EPYC 4th gen"], "per_node": 192 }, + "mem_per_node": 768 } ] } From cad9c39bcfbc7196aa54b9caed38904a3cd98623 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Thu, 20 Nov 2025 13:59:37 +0100 Subject: [PATCH 08/70] wip --- .../src/geos/trame/app/io/simulation.py | 29 ++++++++++--------- .../src/geos/trame/app/ui/simulation_view.py | 8 ++++- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 434e612f2..51d7c5c83 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -12,6 +12,7 @@ import jinja2 import paramiko +import os #TODO move outside #TODO use Jinja on real launcher @@ -22,12 +23,11 @@ class SimulationConstant: HOST = "p4log01" # Only run on P4 machine PORT = 22 SIMULATIONS_INFORMATION_FOLDER_PATH= "/workrd/users/" - SIMULATION_DEFAULT_FILE_NAME="geosDeck.xml" - + SIMULATION_DEFAULT_FILE_NAME = "geosDeck.xml" class Authentificator:#namespacing more than anything else @staticmethod - def get_key(login:str, passphrase = "trameisrunning"): + def get_key(): try: PRIVATE_KEY = paramiko.RSAKey.from_private_key_file("~/.ssh/id_trame") @@ -35,21 +35,25 @@ def get_key(login:str, passphrase = "trameisrunning"): print(f"Error loading private key: {e}\n") except FileNotFoundError as e: print(f"Private key not found: {e}\n Generating key ...") - PRIVATE_KEY = Authentificator.gen_key(login, SimulationConstant.HOST, passphrase) + PRIVATE_KEY = Authentificator.gen_key() return PRIVATE_KEY return PRIVATE_KEY @staticmethod - def gen_key(login:str, host: str, passphrase: str): + def gen_key(): file_path = "~/.ssh/id_trame" - cmd = f"ssh-keygen -t rsa -b 4096 -C {login}@{host} -f {file_path} -N \"{passphrase}\" " - import subprocess - print(f"Running: {''.join(cmd)}") - subprocess.run(cmd, shell=True) - print(f"SSH key generated at: {file_path}") - print(f"Public key: {file_path}.pub") - SIMULATION_DEFAULT_FILE_NAME = "geosDeck.xml" + key = paramiko.RSAKey.generate(bits=4096) + + # Get public key in OpenSSH format + public_key = f"{key.get_name()} {key.get_base64()}" + with open(file_path, "w") as pub_file: + pub_file.write(public_key) + + print("SSH key pair generated: id_trame (private), id_trame.pub (public)") + + + @unique class SlurmJobStatus(Enum): @@ -71,7 +75,6 @@ def from_string(cls, job_str) -> "SlurmJobStatus": # @dataclass_json @dataclass class SimulationInformation: - pass def get_simulation_status( self, diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index c225512db..8cf167e2d 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -73,7 +73,13 @@ def hint_config(cluster_name, n_unknowns, job_type = 'cpu'): return [ f"{selected_cluster['name']}: {sd['nodes']} x {sd['ranks_per_node']}", f"{selected_cluster['name']}: {sd['nodes'] * 2} x {sd['ranks_per_node'] // 2}" ] - + + +class Login: + + @controller.trigger("run_try_login") + def try_logging(): + pass def define_simulation_view(server) -> None: From 84f44529813d3355adb093bdb8638a68df7ddad0 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Thu, 20 Nov 2025 18:18:29 +0100 Subject: [PATCH 09/70] start login backend --- geos-trame/src/geos/trame/app/core.py | 17 ----------- .../src/geos/trame/app/io/simulation.py | 29 +++++++++++++++---- .../src/geos/trame/app/ui/simulation_view.py | 8 +---- 3 files changed, 25 insertions(+), 29 deletions(-) diff --git a/geos-trame/src/geos/trame/app/core.py b/geos-trame/src/geos/trame/app/core.py index 17fdd41a3..4d74d7aaf 100644 --- a/geos-trame/src/geos/trame/app/core.py +++ b/geos-trame/src/geos/trame/app/core.py @@ -189,23 +189,6 @@ def build_ui( self ) -> None: ): vuetify.VIcon( "mdi-content-save-outline" ) - # with html.Div( - # style= - # "height: 100%; width: 300px; display: flex; align-items: center; justify-content: space-between;", - # v_if=( "tab_idx == 1", ), - # ): - # vuetify.VBtn( - # "Run", - # style="z-index: 1;", - # ) - # vuetify.VBtn( - # "Kill", - # style="z-index: 1;", - # ) - # vuetify.VBtn( - # "Clear", - # style="z-index: 1;", - # ) # input file editor with vuetify.VCol( v_show=( "tab_idx == 0", ), classes="flex-grow-1 pa-0 ma-0" ): diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 51d7c5c83..44371d40f 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -24,13 +24,18 @@ class SimulationConstant: PORT = 22 SIMULATIONS_INFORMATION_FOLDER_PATH= "/workrd/users/" SIMULATION_DEFAULT_FILE_NAME = "geosDeck.xml" + + + + + class Authentificator:#namespacing more than anything else @staticmethod - def get_key(): + def get_key(id=os.environ.get("USER")): try: - PRIVATE_KEY = paramiko.RSAKey.from_private_key_file("~/.ssh/id_trame") + PRIVATE_KEY = paramiko.RSAKey.from_private_key_file(f"/users/{id}/.ssh/id_trame") except paramiko.SSHException as e: print(f"Error loading private key: {e}\n") except FileNotFoundError as e: @@ -41,8 +46,8 @@ def get_key(): return PRIVATE_KEY @staticmethod - def gen_key(): - file_path = "~/.ssh/id_trame" + def gen_key(id=os.environ.get("USER")): + file_path = f"/users/{id}/.ssh/id_trame" key = paramiko.RSAKey.generate(bits=4096) # Get public key in OpenSSH format @@ -241,7 +246,7 @@ class SimRunner(ISimRunner): def __init__(self, user): super().__init__() - ssh_client = self._create_ssh_client(SimulationConstant.HOST, SimulationConstant.PORT, username=user, key=Authentificator.get_key(user)) + ssh_client = self._create_ssh_client(SimulationConstant.HOST, SimulationConstant.PORT, username=user, key=Authentificator.get_key()) print(ssh_client) # early test @@ -396,6 +401,7 @@ class Simulation: def __init__(self, sim_runner: ISimRunner, server: Server, sim_info_dir: Optional[Path] = None) -> None: self._server = server + controller = server.controller self._sim_runner = sim_runner self._sim_info_dir = sim_info_dir or SimulationConstant.SIMULATIONS_INFORMATION_FOLDER_PATH @@ -403,6 +409,19 @@ def __init__(self, sim_runner: ISimRunner, server: Server, sim_info_dir: Optiona self._job_status_watcher_period_ms = 2000 self.start_result_streams() + + #define triggers + @controller.trigger("run_try_login") + def run_try_login() -> None: + print("login login login") + + @controller.trigger("run_simulation") + def run_simulation()-> None: + pass + + @controller.trigger("kill_simulation") + def kill_simulation(pid)->None: + pass def __del__(self): self.stop_result_streams() diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 8cf167e2d..0145b05f7 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -75,12 +75,6 @@ def hint_config(cluster_name, n_unknowns, job_type = 'cpu'): return [ f"{selected_cluster['name']}: {sd['nodes']} x {sd['ranks_per_node']}", f"{selected_cluster['name']}: {sd['nodes'] * 2} x {sd['ranks_per_node'] // 2}" ] -class Login: - - @controller.trigger("run_try_login") - def try_logging(): - pass - def define_simulation_view(server) -> None: with vuetify.VContainer(): @@ -127,7 +121,7 @@ def define_simulation_view(server) -> None: # vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") with vuetify.VCol(cols=2): - vuetify.VBtn("Log in", click="trigger('run_try_logging')"), # type: ignore + vuetify.VBtn("Log in", click="trigger('run_try_login')"), # type: ignore vuetify.VDivider(thickness=5, classes="my-4") From ce820e799c31605c52978e4f4718804fa0a41b8b Mon Sep 17 00:00:00 2001 From: jacques franc Date: Fri, 21 Nov 2025 15:22:11 +0100 Subject: [PATCH 10/70] ssh login --- .../src/geos/trame/app/io/simulation.py | 254 ++++++++++-------- 1 file changed, 141 insertions(+), 113 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 44371d40f..670c726bd 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -20,7 +20,8 @@ @dataclass(frozen=True) class SimulationConstant: SIMULATION_GEOS_PATH = "/workrd/users/" - HOST = "p4log01" # Only run on P4 machine + HOST = "fr-vmx00368.main.glb.corp.local" #"p4log01" # Only run on P4 machine + REMOTE_HOME_BASE = "/users" PORT = 22 SIMULATIONS_INFORMATION_FOLDER_PATH= "/workrd/users/" SIMULATION_DEFAULT_FILE_NAME = "geosDeck.xml" @@ -31,33 +32,153 @@ class SimulationConstant: class Authentificator:#namespacing more than anything else + ssh_client : paramiko.SSHClient + @staticmethod - def get_key(id=os.environ.get("USER")): + def get_key( id, pword ): try: - PRIVATE_KEY = paramiko.RSAKey.from_private_key_file(f"/users/{id}/.ssh/id_trame") + home = os.environ.get("HOME") + PRIVATE_KEY = paramiko.RSAKey.from_private_key_file(f"{home}/.ssh/id_trame") + return PRIVATE_KEY except paramiko.SSHException as e: print(f"Error loading private key: {e}\n") except FileNotFoundError as e: print(f"Private key not found: {e}\n Generating key ...") PRIVATE_KEY = Authentificator.gen_key() + temp_client = paramiko.SSHClient() + temp_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + temp_client.connect(SimulationConstant.HOST, SimulationConstant.PORT, username=id, password=pword, timeout=10) + Authentificator._transfer_file_sftp(temp_client,f"{home}/.ssh/id_trame.pub",f"{SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame.pub") + Authentificator._execute_remote_command(temp_client,f" cat {SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame.pub | tee -a {SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/authorized_keys") + return PRIVATE_KEY - return PRIVATE_KEY @staticmethod - def gen_key(id=os.environ.get("USER")): - file_path = f"/users/{id}/.ssh/id_trame" + def gen_key(): + + home = os.environ.get("HOME") + file_path = f"{home}/.ssh/id_trame" key = paramiko.RSAKey.generate(bits=4096) + key.write_private_key_file(file_path) # Get public key in OpenSSH format public_key = f"{key.get_name()} {key.get_base64()}" - with open(file_path, "w") as pub_file: + with open(file_path + ".pub", "w") as pub_file: pub_file.write(public_key) print("SSH key pair generated: id_trame (private), id_trame.pub (public)") + + return key + @staticmethod + def _create_ssh_client( host, port, username, password=None, key=None) -> paramiko.SSHClient: + """ + Initializes and returns an SSH client connection. + Uses context manager for automatic cleanup. + """ + client = paramiko.SSHClient() + # Automatically adds the hostname and new host keys to the host files (~/.ssh/known_hosts) + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + try: + # if key: + print(f"Connecting to {host} using key-based authentication...") + client.connect(host, port, username, pkey=key, timeout=10) + # elif password: + # print(f"Connecting to {host} using uid-password authentication...") + # client.connect(host, port, username, password=password, timeout=10) + # else: + # raise paramiko.SSHException("No Key Found") + + return client + except paramiko.AuthenticationException: + print("Authentication failed. Check your credentials or key.") + return None + except paramiko.SSHException as e: + print(f"Could not establish SSH connection: {e}") + return None + except Exception as e: + print(f"An unexpected error occurred: {e}") + return None + + + @staticmethod + def _execute_remote_command(client, command): + """ + Executes a single command on the remote server and prints the output. + """ + if not client: + return + + print(f"\n--- Executing Command: '{command}' ---") + try: + # Executes the command. stdin, stdout, and stderr are file-like objects. + # Ensure command ends with a newline character for some shell environments. + stdin, stdout, stderr = client.exec_command(command) + + # Wait for the command to finish and read the output + exit_status = stdout.channel.recv_exit_status() + + # Print standard output + stdout_data = stdout.read().decode().strip() + if stdout_data: + print("STDOUT:") + print(stdout_data) + + # Print standard error (if any) + stderr_data = stderr.read().decode().strip() + if stderr_data: + print("STDERR:") + print(stderr_data) + + print(f"Command exited with status: {exit_status}") + return exit_status + + except Exception as e: + print(f"Error executing command: {e}") + return -1 + + @staticmethod + def _transfer_file_sftp(client, local_path, remote_path, direction="put"): + """ + Transfers a file using SFTP (Secure File Transfer Protocol). + Direction can be 'put' (upload) or 'get' (download). + """ + if not client: + return + + print(f"\n--- Starting SFTP Transfer ({direction.upper()}) ---") + + try: + # Establish an SFTP connection session + sftp = client.open_sftp() + + if direction == "put": + print(f"Uploading '{local_path}' to '{remote_path}'...") + sftp.put(local_path, remote_path) + print("Upload complete.") + elif direction == "get": + print(f"Downloading '{remote_path}' to '{local_path}'...") + sftp.get(remote_path, local_path) + print("Download complete.") + else: + print("Invalid transfer direction. Use 'put' or 'get'.") + + sftp.close() + return True + + except FileNotFoundError: + print(f"Error: Local file '{local_path}' not found.") + return False + except IOError as e: + print(f"Error accessing remote file or path: {e}") + return False + except Exception as e: + print(f"An error occurred during SFTP: {e}") + return False @unique @@ -246,9 +367,6 @@ class SimRunner(ISimRunner): def __init__(self, user): super().__init__() - ssh_client = self._create_ssh_client(SimulationConstant.HOST, SimulationConstant.PORT, username=user, key=Authentificator.get_key()) - print(ssh_client) - # early test self.local_upload_file = "test_upload.txt" import time @@ -256,109 +374,7 @@ def __init__(self, user): f.write(f"This file was uploaded at {time.ctime()}\n") print(f"Created local file: {self.local_upload_file}") - @staticmethod - def _create_ssh_client( host, port, username, password=None, key=None): - """ - Initializes and returns an SSH client connection. - Uses context manager for automatic cleanup. - """ - client = paramiko.SSHClient() - # Automatically adds the hostname and new host keys to the host files (~/.ssh/known_hosts) - client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - - try: - if key: - print(f"Connecting to {host} using key-based authentication...") - client.connect(host, port, username, pkey=key, timeout=10) - else: - raise paramiko.SSHException("No Key Found") - - return client - except paramiko.AuthenticationException: - print("Authentication failed. Check your credentials or key.") - return None - except paramiko.SSHException as e: - print(f"Could not establish SSH connection: {e}") - return None - except Exception as e: - print(f"An unexpected error occurred: {e}") - return None - - - @staticmethod - def _execute_remote_command(client, command): - """ - Executes a single command on the remote server and prints the output. - """ - if not client: - return - - print(f"\n--- Executing Command: '{command}' ---") - try: - # Executes the command. stdin, stdout, and stderr are file-like objects. - # Ensure command ends with a newline character for some shell environments. - stdin, stdout, stderr = client.exec_command(command) - - # Wait for the command to finish and read the output - exit_status = stdout.channel.recv_exit_status() - - # Print standard output - stdout_data = stdout.read().decode().strip() - if stdout_data: - print("STDOUT:") - print(stdout_data) - - # Print standard error (if any) - stderr_data = stderr.read().decode().strip() - if stderr_data: - print("STDERR:") - print(stderr_data) - - print(f"Command exited with status: {exit_status}") - return exit_status - - except Exception as e: - print(f"Error executing command: {e}") - return -1 - - @staticmethod - def _transfer_file_sftp(client, local_path, remote_path, direction="put"): - """ - Transfers a file using SFTP (Secure File Transfer Protocol). - Direction can be 'put' (upload) or 'get' (download). - """ - if not client: - return - - print(f"\n--- Starting SFTP Transfer ({direction.upper()}) ---") - - try: - # Establish an SFTP connection session - sftp = client.open_sftp() - - if direction == "put": - print(f"Uploading '{local_path}' to '{remote_path}'...") - sftp.put(local_path, remote_path) - print("Upload complete.") - elif direction == "get": - print(f"Downloading '{remote_path}' to '{local_path}'...") - sftp.get(remote_path, local_path) - print("Download complete.") - else: - print("Invalid transfer direction. Use 'put' or 'get'.") - - sftp.close() - return True - - except FileNotFoundError: - print(f"Error: Local file '{local_path}' not found.") - return False - except IOError as e: - print(f"Error accessing remote file or path: {e}") - return False - except Exception as e: - print(f"An error occurred during SFTP: {e}") - return False + def launch_simulation(self): @@ -413,6 +429,18 @@ def __init__(self, sim_runner: ISimRunner, server: Server, sim_info_dir: Optiona #define triggers @controller.trigger("run_try_login") def run_try_login() -> None: + + # if server.state.key: + Authentificator.ssh_client = Authentificator._create_ssh_client(SimulationConstant.HOST,#test + SimulationConstant.PORT, + server.state.login, + key=Authentificator.get_key(server.state.login, server.state.password)) + + if Authentificator.ssh_client : + home = os.environ.get('HOME') + server.state.key_path = f"{home}/.ssh/id_trame" + Authentificator._execute_remote_command(Authentificator.ssh_client, f"ls -l {home}") + print("login login login") @controller.trigger("run_simulation") From 727c9273665c5aa611f3da4f56d29244228ac361 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Fri, 21 Nov 2025 16:53:41 +0100 Subject: [PATCH 11/70] Unlock runner --- .../src/geos/trame/app/io/simulation.py | 14 +++++--------- .../src/geos/trame/app/ui/simulation_view.py | 19 +++++++++++-------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 670c726bd..92b395891 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -84,14 +84,8 @@ def _create_ssh_client( host, port, username, password=None, key=None) -> parami client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) try: - # if key: print(f"Connecting to {host} using key-based authentication...") client.connect(host, port, username, pkey=key, timeout=10) - # elif password: - # print(f"Connecting to {host} using uid-password authentication...") - # client.connect(host, port, username, password=password, timeout=10) - # else: - # raise paramiko.SSHException("No Key Found") return client except paramiko.AuthenticationException: @@ -437,10 +431,12 @@ def run_try_login() -> None: key=Authentificator.get_key(server.state.login, server.state.password)) if Authentificator.ssh_client : - home = os.environ.get('HOME') - server.state.key_path = f"{home}/.ssh/id_trame" - Authentificator._execute_remote_command(Authentificator.ssh_client, f"ls -l {home}") + id = os.environ.get('USER') + Authentificator._execute_remote_command(Authentificator.ssh_client, f"ls -l {SimulationConstant.REMOTE_HOME_BASE}/{id}") + # server.state.update({"access_granted" : True, "key_path" : f"{SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame" }) + # server.state.flush() + server.state.access_granted = True print("login login login") @controller.trigger("run_simulation") diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 0145b05f7..8b86dc80f 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -100,7 +100,7 @@ def define_simulation_view(server) -> None: ) # - access_granted = False # link to login button callback run_try_logging results + server.state.access_granted = False# link to login button callback run_try_logging results items = hint_config('p4', 12e6) vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") with vuetify.VCol(cols=2): @@ -109,7 +109,7 @@ def define_simulation_view(server) -> None: with vuetify.VRow(): with vuetify.VCol(cols=8): - vuetify.VFileInput( + vuetify.VTextField( v_model=("key_path", None,), label="Path to ssh key", dense=True, @@ -121,7 +121,10 @@ def define_simulation_view(server) -> None: # vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") with vuetify.VCol(cols=2): - vuetify.VBtn("Log in", click="trigger('run_try_login')"), # type: ignore + vuetify.VBtn("Log in", + click="trigger('run_try_login')", + disabled=("access_granted",) + ) # type: ignore vuetify.VDivider(thickness=5, classes="my-4") @@ -134,7 +137,7 @@ def define_simulation_view(server) -> None: dense=True, hide_details=True, clearable=True, - disabled=("!access_granted") + disabled=("!access_granted",) ) with vuetify.VRow(), vuetify.VCol(): @@ -148,7 +151,7 @@ def define_simulation_view(server) -> None: dense=True, hide_details=True, clearable=True, - disabled=("!access_granted") + disabled=("!access_granted",) # TODO callback validation of path ) @@ -159,7 +162,7 @@ def define_simulation_view(server) -> None: dense=True, clearable=True, prepend_icon="mdi-download", - disabled=("!access_granted") + disabled=("!access_granted",) # TODO callback validation of path ) @@ -171,14 +174,14 @@ def define_simulation_view(server) -> None: dense=True, hide_details=True, clearable=True, - disabled=("!access_granted") + disabled=("!access_granted",) ) vuetify.VSpacer() with vuetify.VCol(cols=1): vuetify.VBtn("Run", click="trigger('run_simulation')", - disabled=("!access_granted"), + disabled=("!access_granted",), classes="ml-auto"), # type: ignore From 3dc8a83d8b99065fa28d5aefec50892b8c5cd140 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Fri, 21 Nov 2025 18:31:25 +0100 Subject: [PATCH 12/70] wip --- .../src/geos/trame/app/io/simulation.py | 56 ++++++- .../src/geos/trame/app/ui/simulation_view.py | 148 ++++++++++-------- 2 files changed, 136 insertions(+), 68 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 92b395891..1f9e6a6d5 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -10,7 +10,7 @@ from trame_server.state import State from geos.trame.app.utils.async_file_watcher import AsyncPeriodicRunner -import jinja2 +from jinja2 import Template import paramiko import os @@ -27,6 +27,35 @@ class SimulationConstant: SIMULATION_DEFAULT_FILE_NAME = "geosDeck.xml" +# Load template from file +# with open("slurm_job_template.j2") as f: + # template = Template(f.read()) + +#TODO from private-assets +template_str = """#!/bin/sh +#SBATCH --job-name="{{ job_name }}" +#SBATCH --ntasks={{ ntasks }} +#SBATCH --partition={{ partition }} +#SBATCH --comment={{ comment }} +#SBACTH --account={{ account }} +#SBATCH --nodes={{ nodes }} +#SBATCH --time={{ time | default('24:00:00') }} +#SBATCH --mem={{ mem }} +#SBATCH --output=job_GEOS_%j.out +#SBATCH --error=job_GEOS_%j.err + +ulimit -s unlimited +ulimit -c unlimited + +#module purge +#module geos +#run --mpi=pmix_v3 --hint=nomultithread \ +# -n {{ ntasks }} geos \ +# -o Outputs_{{ slurm_jobid | default('${SLURM_JOBID}') }} \ +# -i {{ input_file | default('geosDeck.xml') }} + +echo "Hello world" >> hello.out +""" @@ -441,10 +470,35 @@ def run_try_login() -> None: @controller.trigger("run_simulation") def run_simulation()-> None: + + if server.state.access_granted and server.state.sd and server.state.simulation_xml_filename: + template = Template(template_str) + sdi = server.state.sd + ci ={'nodes': 2 , 'total_ranks': 96 } + rendered = template.render(job_name=server.state.simulation_job_name, + input_file=server.state.simulation_xml_filename, + nodes= ci['nodes'], ntasks=ci['total_ranks'], mem=f"{ci['nodes']*sdi.selected_cluster['mem_per_node']}GB", + commment='mycomment', partition='mypart', account='myaccount' ) + + with open('job.slurm','w') as f: + f.write(rendered) + + if Authentificator.ssh_client: + Authentificator._transfer_file_sftp(Authentificator.ssh_client, + local_path='job.slurm', + remote_path=server.state.simulation_remote_path) + Authentificator._transfer_file_sftp(Authentificator.ssh_client, + remote_path=server.state.simulation_remote_path+'/job.slurm', + local_path=server.state.simulation_dl_path+'/dl.test', + direction="get") + else: + raise paramiko.SSHException + pass @controller.trigger("kill_simulation") def kill_simulation(pid)->None: + # exec scancel jobid pass def __del__(self): diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 8b86dc80f..531b65178 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -5,78 +5,92 @@ from geos.trame.app.ui.simulation_status_view import SimulationStatusView import json -def suggest_decomposition(n_unknowns, +class SuggestDecomposition: + + def __init__(self, cluster_name, n_unknowns, job_type = 'cpu'): + + # return ["P4: 1x22", "P4: 2x11"] + with open('/data/pau901/SIM_CS/04_WORKSPACE/USERS/jfranc/geosPythonPackages/geos-trame/src/geos/trame/assets/cluster.json','r') as file: + all_cluster = json.load(file) + self.selected_cluster = list(filter(lambda d: d.get('name')==cluster_name, all_cluster["clusters"]))[0] + self.n_unknowns = n_unknowns + self.job_type = job_type + + # @property + # def selected_cluster(self): + # return self.selected_cluster + + @staticmethod + def compute( n_unknowns, memory_per_unknown_bytes, node_memory_gb, cores_per_node, min_unknowns_per_rank=10000, strong_scaling=True): - """ - Suggests node/rank distribution for a cluster computation. - - Parameters: - - n_unknowns: total number of unknowns - - memory_per_unknown_bytes: estimated memory per unknown - - node_memory_gb: available memory per node - - cores_per_node: cores available per node - - min_unknowns_per_rank: minimum for efficiency - - strong_scaling: True if problem size is fixed - - Note: - - 10,000-100,000 unknowns per rank is often a sweet spot for many PDE solvers - - Use power-of-2 decompositions when possible (helps with communication patterns) - - For 3D problems, try to maintain cubic subdomains (minimizes surface-to-volume ratio, reducing communication) - - Don't oversubscribe: avoid using more ranks than provide parallel efficiency - - """ - - # Memory constraint - node_memory_bytes = node_memory_gb * 1e9 - max_unknowns_per_node = int(0.8 * node_memory_bytes / memory_per_unknown_bytes) - - # Compute minimum nodes needed - min_nodes = max(1, (n_unknowns + max_unknowns_per_node - 1) // max_unknowns_per_node) - - # Determine ranks per node - unknowns_per_node = n_unknowns // min_nodes - unknowns_per_rank = max(min_unknowns_per_rank, unknowns_per_node // cores_per_node) - - # Calculate total ranks needed - n_ranks = max(1, n_unknowns // unknowns_per_rank) - - # Distribute across nodes - ranks_per_node = min(cores_per_node, (n_ranks + min_nodes - 1) // min_nodes) - n_nodes = (n_ranks + ranks_per_node - 1) // ranks_per_node - - return { - 'nodes': n_nodes, - 'ranks_per_node': ranks_per_node, - 'total_ranks': n_nodes * ranks_per_node, - 'unknowns_per_rank': n_unknowns // (n_nodes * ranks_per_node) - } - -def hint_config(cluster_name, n_unknowns, job_type = 'cpu'): + """ + Suggests node/rank distribution for a cluster computation. + + Parameters: + - n_unknowns: total number of unknowns + - memory_per_unknown_bytes: estimated memory per unknown + - node_memory_gb: available memory per node + - cores_per_node: cores available per node + - min_unknowns_per_rank: minimum for efficiency + - strong_scaling: True if problem size is fixed + + Note: + - 10,000-100,000 unknowns per rank is often a sweet spot for many PDE solvers + - Use power-of-2 decompositions when possible (helps with communication patterns) + - For 3D problems, try to maintain cubic subdomains (minimizes surface-to-volume ratio, reducing communication) + - Don't oversubscribe: avoid using more ranks than provide parallel efficiency + + """ + + # Memory constraint + node_memory_bytes = node_memory_gb * 1e9 + max_unknowns_per_node = int(0.8 * node_memory_bytes / memory_per_unknown_bytes) + + # Compute minimum nodes needed + min_nodes = max(1, (n_unknowns + max_unknowns_per_node - 1) // max_unknowns_per_node) + + # Determine ranks per node + unknowns_per_node = n_unknowns // min_nodes + unknowns_per_rank = max(min_unknowns_per_rank, unknowns_per_node // cores_per_node) + + # Calculate total ranks needed + n_ranks = max(1, n_unknowns // unknowns_per_rank) + + # Distribute across nodes + ranks_per_node = min(cores_per_node, (n_ranks + min_nodes - 1) // min_nodes) + n_nodes = (n_ranks + ranks_per_node - 1) // ranks_per_node + + + return { + 'nodes': n_nodes, + 'ranks_per_node': ranks_per_node, + 'total_ranks': n_nodes * ranks_per_node, + 'unknowns_per_rank': n_unknowns // (n_nodes * ranks_per_node) + } - # return ["P4: 1x22", "P4: 2x11"] - with open('/data/pau901/SIM_CS/04_WORKSPACE/USERS/jfranc/geosPythonPackages/geos-trame/src/geos/trame/assets/cluster.json','r') as file: - all_cluster = json.load(file) - selected_cluster = list(filter(lambda d: d.get('name')==cluster_name, all_cluster["clusters"]))[0] - if job_type == 'cpu': #make it an enum - sd = suggest_decomposition(n_unknowns, - 64, - selected_cluster['mem_per_node'], - selected_cluster['cpu']['per_node'] - ) - # elif job_type == 'gpu': - # selected_cluster['n_nodes']*selected_cluster['gpu']['per_node'] + def to_list(self): + + if self.job_type == 'cpu': #make it an enum + sd = SuggestDecomposition.compute(self.n_unknowns, + 64, + self.selected_cluster['mem_per_node'], + self.selected_cluster['cpu']['per_node'] + ) + # elif job_type == 'gpu': + # selected_cluster['n_nodes']*selected_cluster['gpu']['per_node'] - return [ f"{selected_cluster['name']}: {sd['nodes']} x {sd['ranks_per_node']}", f"{selected_cluster['name']}: {sd['nodes'] * 2} x {sd['ranks_per_node'] // 2}" ] + return [ f"{self.selected_cluster['name']}: {sd['nodes']} x {sd['ranks_per_node']}", f"{self.selected_cluster['name']}: {sd['nodes'] * 2} x {sd['ranks_per_node'] // 2}" ] def define_simulation_view(server) -> None: + with vuetify.VContainer(): with vuetify.VRow(): with vuetify.VCol(cols=4): @@ -101,7 +115,9 @@ def define_simulation_view(server) -> None: # server.state.access_granted = False# link to login button callback run_try_logging results - items = hint_config('p4', 12e6) + server.state.simulation_xml_filename = "geosDeck.xml" + server.state.sd = SuggestDecomposition('p4', 12e6) + items = server.state.sd.to_list() vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") with vuetify.VCol(cols=2): vuetify.VSelect(label="Cluster", @@ -131,21 +147,19 @@ def define_simulation_view(server) -> None: with vuetify.VRow(): with vuetify.VCol(): - vuetify.VFileInput( - v_model=("simulation_cmd_filename", SimulationConstant.SIMULATION_DEFAULT_FILE_NAME), + vuetify.VTextField( + v_model=("simulation_xml_filename",), label="Simulation file name", dense=True, hide_details=True, clearable=True, + readonly=True, disabled=("!access_granted",) ) with vuetify.VRow(), vuetify.VCol(): vuetify.VTextField( - v_model=( - "simulation_files_path", - None, - ), + v_model=("simulation_remote_path",None), label="Path where to write files and launch code", prepend_icon="mdi-upload", dense=True, @@ -157,7 +171,7 @@ def define_simulation_view(server) -> None: with vuetify.VRow(), vuetify.VCol(): vuetify.VTextField( - v_model=("simulation_dl_path",), + v_model=("simulation_dl_path", None), label="Simulation download path", dense=True, clearable=True, From 077e6faa5314b688317c7692835b6097d0132e3a Mon Sep 17 00:00:00 2001 From: jacques franc Date: Fri, 21 Nov 2025 20:29:34 +0100 Subject: [PATCH 13/70] full PoC --- geos-trame/src/geos/trame/app/io/simulation.py | 7 ++++--- geos-trame/src/geos/trame/app/ui/simulation_view.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 1f9e6a6d5..e6c34b772 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -486,10 +486,11 @@ def run_simulation()-> None: if Authentificator.ssh_client: Authentificator._transfer_file_sftp(Authentificator.ssh_client, local_path='job.slurm', - remote_path=server.state.simulation_remote_path) + remote_path=f'{server.state.simulation_remote_path}/job.slurm', + direction="put") Authentificator._transfer_file_sftp(Authentificator.ssh_client, - remote_path=server.state.simulation_remote_path+'/job.slurm', - local_path=server.state.simulation_dl_path+'/dl.test', + remote_path=f'{server.state.simulation_remote_path}/job.slurm', + local_path=f'{server.state.simulation_dl_path}/dl.test', direction="get") else: raise paramiko.SSHException diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 531b65178..94bc1bc87 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -114,7 +114,7 @@ def define_simulation_view(server) -> None: ) # - server.state.access_granted = False# link to login button callback run_try_logging results + server.state.access_granted = False server.state.simulation_xml_filename = "geosDeck.xml" server.state.sd = SuggestDecomposition('p4', 12e6) items = server.state.sd.to_list() From 19c04e364953a1e44c3e25265e0268052b97a964 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Sat, 22 Nov 2025 17:20:13 +0100 Subject: [PATCH 14/70] wip --- geos-trame/src/geos/trame/app/io/simulation.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index e6c34b772..738f3cd96 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -461,7 +461,8 @@ def run_try_login() -> None: if Authentificator.ssh_client : id = os.environ.get('USER') - Authentificator._execute_remote_command(Authentificator.ssh_client, f"ls -l {SimulationConstant.REMOTE_HOME_BASE}/{id}") + Authentificator._execute_remote_command(Authentificator.ssh_client, f"ps aux") + # Authentificator._execute_remote_command(Authentificator.ssh_client, f"ls -l {SimulationConstant.REMOTE_HOME_BASE}/{id}") # server.state.update({"access_granted" : True, "key_path" : f"{SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame" }) # server.state.flush() @@ -492,6 +493,20 @@ def run_simulation()-> None: remote_path=f'{server.state.simulation_remote_path}/job.slurm', local_path=f'{server.state.simulation_dl_path}/dl.test', direction="get") + + + # TODO later ASYNC and subprocess # Submit job using subprocess (local ssh call) + # import subprocess + # result = subprocess.run(["ssh", "user@remote.host", "sbatch /remote/path/job.slurm"], + # capture_output=True, text=True) + + # PARAMIKO >> subprocess + # # Execute command remotely + # stdin, stdout, stderr = client.exec_command("ls -l /tmp") + # print(stdout.read().decode()) + # parse stdout + + else: raise paramiko.SSHException From 3eaabe912f8f9a7a4e200914b091a7e0a9de3b7d Mon Sep 17 00:00:00 2001 From: jacques franc Date: Tue, 25 Nov 2025 15:48:42 +0100 Subject: [PATCH 15/70] authorship --- geos-trame/pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/geos-trame/pyproject.toml b/geos-trame/pyproject.toml index 4e98f6ce1..a68dafe0a 100644 --- a/geos-trame/pyproject.toml +++ b/geos-trame/pyproject.toml @@ -8,7 +8,8 @@ version = "1.0.0" description = "Geos Simulation Modeler" authors = [{name = "GEOS Contributors" }] maintainers = [{name = "Alexandre Benedicto", email = "alexandre.benedicto@external.totalenergies.com" }, - {name = "Paloma Martinez", email = "paloma.martinez@external.totalenergies.com" }] + {name = "Paloma Martinez", email = "paloma.martinez@external.totalenergies.com" }, + {name = "Jacques Franc", email = "jacques.franc@external.totalenergies.com" },] license = {text = "Apache-2.0"} classifiers = [ "Development Status :: 4 - Beta", From d54f8b3ea1f8aa53912e75c12fa69b86bb2bbffb Mon Sep 17 00:00:00 2001 From: jacques franc Date: Thu, 27 Nov 2025 10:06:00 +0100 Subject: [PATCH 16/70] change host --- geos-trame/src/geos/trame/app/io/simulation.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 738f3cd96..01f9d467e 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -20,7 +20,7 @@ @dataclass(frozen=True) class SimulationConstant: SIMULATION_GEOS_PATH = "/workrd/users/" - HOST = "fr-vmx00368.main.glb.corp.local" #"p4log01" # Only run on P4 machine + HOST = "p4log01" # Only run on P4 machine REMOTE_HOME_BASE = "/users" PORT = 22 SIMULATIONS_INFORMATION_FOLDER_PATH= "/workrd/users/" @@ -478,8 +478,8 @@ def run_simulation()-> None: ci ={'nodes': 2 , 'total_ranks': 96 } rendered = template.render(job_name=server.state.simulation_job_name, input_file=server.state.simulation_xml_filename, - nodes= ci['nodes'], ntasks=ci['total_ranks'], mem=f"{ci['nodes']*sdi.selected_cluster['mem_per_node']}GB", - commment='mycomment', partition='mypart', account='myaccount' ) + nodes= ci['nodes'], ntasks=ci['total_ranks'], mem=f"{2}GB", + commment="GEOS,CCS,testTrame", partition='p4_general', account='myaccount' ) with open('job.slurm','w') as f: f.write(rendered) @@ -489,8 +489,13 @@ def run_simulation()-> None: local_path='job.slurm', remote_path=f'{server.state.simulation_remote_path}/job.slurm', direction="put") + + Authentificator._execute_remote_command(Authentificator.ssh_client, + f'cd {server.state.simulation_remote_path} && sbatch job.slurm') + Authentificator._execute_remote_command(Authentificator.ssh_client, + f'squeue -u $USER') Authentificator._transfer_file_sftp(Authentificator.ssh_client, - remote_path=f'{server.state.simulation_remote_path}/job.slurm', + remote_path=f'{server.state.simulation_remote_path}/hello.out', local_path=f'{server.state.simulation_dl_path}/dl.test', direction="get") From c1a8395b80b8adf734ea9186145af24793d3f343 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Fri, 28 Nov 2025 15:17:56 +0100 Subject: [PATCH 17/70] wip --- .../src/geos/trame/app/io/simulation.py | 81 +++++++++++++++---- .../src/geos/trame/app/ui/simulation_view.py | 18 ++++- 2 files changed, 79 insertions(+), 20 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 01f9d467e..745d4a4b0 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -47,14 +47,18 @@ class SimulationConstant: ulimit -s unlimited ulimit -c unlimited -#module purge -#module geos -#run --mpi=pmix_v3 --hint=nomultithread \ -# -n {{ ntasks }} geos \ -# -o Outputs_{{ slurm_jobid | default('${SLURM_JOBID}') }} \ -# -i {{ input_file | default('geosDeck.xml') }} - -echo "Hello world" >> hello.out +module purge +module use /workrd/SCR/GEOS/l1092082/modules +module load geos-develop-d36028cb-hypreUpdate + +export HDF5_USE_FILE_LOCKING=FALSE +export OMP_NUM_THREADS=1 + +srun --mpi=pmix_v3 --hint=nomultithread \ + -n {{ ntasks }} geos \ + -o Outputs_{{ slurm_jobid | default('${SLURM_JOBID}') }} \ + -i {{ input_file | default('geosDeck.xml') }} | tee log.out + """ @@ -63,6 +67,29 @@ class Authentificator:#namespacing more than anything else ssh_client : paramiko.SSHClient + @staticmethod + def _sftp_copy_tree(ssh_client, local_root, remote_root): + # Connect to remote server + sftp = ssh_client.open_sftp() + + local_root = Path(local_root).resolve() + + for path in local_root.rglob("*"): + remote_path = f"{remote_root}/{path.relative_to(local_root)}" + + if path.is_dir(): + # Create remote directory if it doesn't exist + try: + sftp.mkdir(remote_path) + except IOError: + # Directory may already exist + pass + else: + # Upload file + sftp.put(str(path), remote_path) + + sftp.close() + @staticmethod def get_key( id, pword ): @@ -478,24 +505,44 @@ def run_simulation()-> None: ci ={'nodes': 2 , 'total_ranks': 96 } rendered = template.render(job_name=server.state.simulation_job_name, input_file=server.state.simulation_xml_filename, - nodes= ci['nodes'], ntasks=ci['total_ranks'], mem=f"{2}GB", - commment="GEOS,CCS,testTrame", partition='p4_general', account='myaccount' ) + nodes= ci['nodes'], ntasks=ci['total_ranks'], mem=f"0",#TODO profile to use the correct amount + commment=server.state.slurm_comment, partition='p4_general', account='myaccount' ) - with open('job.slurm','w') as f: - f.write(rendered) + # with open(Path(server.state.simulation_xml_filename).parent/Path('job.slurm'),'w') as f: + # f.write(rendered) if Authentificator.ssh_client: - Authentificator._transfer_file_sftp(Authentificator.ssh_client, - local_path='job.slurm', - remote_path=f'{server.state.simulation_remote_path}/job.slurm', - direction="put") + #write slurm directly on remote + try: + sftp = Authentificator.ssh_client.open_sftp() + remote_path = Path(server.state.simulation_xml_filename).parent/Path('job.slurm') + with sftp.file(remote_path,'w') as f: + f.write(rendered) + + # except FileExistsError: + # print(f"Error: Local file '{remote_path}' not found.") + except PermissionError as e: + print(f"Permission error: {e}") + except IOError as e: + print(f"Error accessing remote file or path: {e}") + except Exception as e: + print(f"An error occurred during SFTP: {e}") + + Authentificator._sftp_copy_tree(Authentificator.ssh_client, + Path(server.state.simulation_xml_filename).parent, + Path(server.state.simulation_remote_path)) + Authentificator._execute_remote_command(Authentificator.ssh_client, f'cd {server.state.simulation_remote_path} && sbatch job.slurm') + + Authentificator._execute_remote_command(Authentificator.ssh_client, f'squeue -u $USER') + + Authentificator._transfer_file_sftp(Authentificator.ssh_client, - remote_path=f'{server.state.simulation_remote_path}/hello.out', + remote_path=f'{server.state.simulation_remote_path}/log.out', local_path=f'{server.state.simulation_dl_path}/dl.test', direction="get") diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 94bc1bc87..40b0c5c63 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -136,24 +136,36 @@ def define_simulation_view(server) -> None: # vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") - with vuetify.VCol(cols=2): + with vuetify.VCol(cols=1): vuetify.VBtn("Log in", click="trigger('run_try_login')", disabled=("access_granted",) ) # type: ignore + # + vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") + with vuetify.VCol(cols=1): + vuetify.VTextField( + v_model=("slurm_comment", None,), + label="Comment to slurm", + dense=True, + hide_details=True, + clearable=True, + ) # type: ignore + vuetify.VDivider(thickness=5, classes="my-4") with vuetify.VRow(): with vuetify.VCol(): - vuetify.VTextField( + vuetify.VFileInput( v_model=("simulation_xml_filename",), label="Simulation file name", dense=True, hide_details=True, clearable=True, - readonly=True, + multiple=True, + # readonly=True, disabled=("!access_granted",) ) From 85964724a34e77b9a2326a5129e323d3c3fc5bad Mon Sep 17 00:00:00 2001 From: jacques franc Date: Fri, 28 Nov 2025 17:56:23 +0100 Subject: [PATCH 18/70] update versions f/ VFileUpload --- geos-trame/pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/geos-trame/pyproject.toml b/geos-trame/pyproject.toml index a68dafe0a..1c823a34b 100644 --- a/geos-trame/pyproject.toml +++ b/geos-trame/pyproject.toml @@ -32,12 +32,12 @@ keywords = [ dependencies = [ "typing-extensions==4.12.2", "trame==3.6.5", - "trame-vuetify==2.7.1", + "trame-vuetify==3.1.0", "trame-code==1.0.1", "trame-server==3.2.3", - "trame-client==3.5.0", + "trame-client==3.11.2", "trame-simput==2.4.3", - "trame-vtk>=2.8.14", + "trame-vtk==2.10.0", "matplotlib==3.9.4", "trame-matplotlib==2.0.3", "trame-components==2.4.2", From db83e7a1206fec614153c4494a52ff5d6b13012e Mon Sep 17 00:00:00 2001 From: jacques franc Date: Mon, 1 Dec 2025 15:19:34 +0100 Subject: [PATCH 19/70] another view --- .../src/geos/trame/app/io/simulation.py | 5 +++-- .../src/geos/trame/app/ui/simulation_view.py | 21 ++++++++++++------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 745d4a4b0..435e90f4a 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -499,9 +499,10 @@ def run_try_login() -> None: @controller.trigger("run_simulation") def run_simulation()-> None: - if server.state.access_granted and server.state.sd and server.state.simulation_xml_filename: + # if server.state.access_granted and server.state.sd and server.state.simulation_xml_filename: + if server.state.access_granted and server.state.simulation_xml_filename: template = Template(template_str) - sdi = server.state.sd + # sdi = server.state.sd ci ={'nodes': 2 , 'total_ranks': 96 } rendered = template.render(job_name=server.state.simulation_job_name, input_file=server.state.simulation_xml_filename, diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 40b0c5c63..be6ea3900 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -115,9 +115,9 @@ def define_simulation_view(server) -> None: # server.state.access_granted = False - server.state.simulation_xml_filename = "geosDeck.xml" - server.state.sd = SuggestDecomposition('p4', 12e6) - items = server.state.sd.to_list() + server.state.simulation_xml_filename = [ ] + sd = SuggestDecomposition('p4', 12e6) + items = sd.to_list() vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") with vuetify.VCol(cols=2): vuetify.VSelect(label="Cluster", @@ -157,17 +157,22 @@ def define_simulation_view(server) -> None: vuetify.VDivider(thickness=5, classes="my-4") with vuetify.VRow(): - with vuetify.VCol(): - vuetify.VFileInput( + with vuetify.VCol(cols=4): + vuetify.VFileUpload( v_model=("simulation_xml_filename",), - label="Simulation file name", - dense=True, + title="Simulation file name", + density='comfortable', hide_details=True, - clearable=True, + # clearable=True, multiple=True, + filter_by_type='.xml,.vtu,.vtm,.pvtu,.pvtm,.dat,.csv,.txt', # readonly=True, disabled=("!access_granted",) ) + with vuetify.VCol(cols=4): + with vuetify.VList(): + with vuetify.VListItem( v_for=(f"file in {server.state.simulation_xml_filename}"), key="i", value="file" ): + vuetify.VListItemTitle( "{{ file.name }}" ) with vuetify.VRow(), vuetify.VCol(): vuetify.VTextField( From 21b4492fdac379fc35c27bc427f549b172bed5e3 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Mon, 1 Dec 2025 17:39:17 +0100 Subject: [PATCH 20/70] new list update --- .../src/geos/trame/app/ui/simulation_view.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index be6ea3900..a5295cc08 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -87,10 +87,19 @@ def to_list(self): return [ f"{self.selected_cluster['name']}: {sd['nodes']} x {sd['ranks_per_node']}", f"{self.selected_cluster['name']}: {sd['nodes'] * 2} x {sd['ranks_per_node'] // 2}" ] + def define_simulation_view(server) -> None: + @server.state.change("simulation_xml_temp") + def on_temp_change(simulation_xml_temp : list, **kw): + current_list = server.state.simulation_xml_filename + + new_list = current_list + simulation_xml_temp + server.state.simulation_xml_filename = new_list + server.state.simulation_xml_temp = [] + with vuetify.VContainer(): with vuetify.VRow(): with vuetify.VCol(cols=4): @@ -116,6 +125,9 @@ def define_simulation_view(server) -> None: # server.state.access_granted = False server.state.simulation_xml_filename = [ ] + # server.state.simulation_xml_temp = [ ] + + sd = SuggestDecomposition('p4', 12e6) items = sd.to_list() vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") @@ -159,7 +171,7 @@ def define_simulation_view(server) -> None: with vuetify.VRow(): with vuetify.VCol(cols=4): vuetify.VFileUpload( - v_model=("simulation_xml_filename",), + v_model=("simulation_xml_temp",[]), title="Simulation file name", density='comfortable', hide_details=True, @@ -167,12 +179,13 @@ def define_simulation_view(server) -> None: multiple=True, filter_by_type='.xml,.vtu,.vtm,.pvtu,.pvtm,.dat,.csv,.txt', # readonly=True, - disabled=("!access_granted",) + disabled=("access_granted",) ) with vuetify.VCol(cols=4): with vuetify.VList(): - with vuetify.VListItem( v_for=(f"file in {server.state.simulation_xml_filename}"), key="i", value="file" ): + with vuetify.VListItem( v_for=("(file,i) in simulation_xml_filename"), key="i", value="file" ): vuetify.VListItemTitle( "{{ file.name }}" ) + vuetify.VListItemSubtitle("{{ file.size ? (file.size / 1024).toFixed(1) + ' KB' : 'URL' }}") with vuetify.VRow(), vuetify.VCol(): vuetify.VTextField( From 6c50a55e6d1562b2da313d8237b1bc9a4b681fb5 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Tue, 2 Dec 2025 08:42:01 +0100 Subject: [PATCH 21/70] loading files complete --- .../src/geos/trame/app/io/simulation.py | 2 ++ .../src/geos/trame/app/ui/simulation_view.py | 34 ++++++++++++++++--- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 435e90f4a..7bc879a0b 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -496,6 +496,8 @@ def run_try_login() -> None: server.state.access_granted = True print("login login login") + + @controller.trigger("run_simulation") def run_simulation()-> None: diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index a5295cc08..f5f45de7d 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -93,13 +93,37 @@ def to_list(self): def define_simulation_view(server) -> None: @server.state.change("simulation_xml_temp") - def on_temp_change(simulation_xml_temp : list, **kw): + def on_temp_change(simulation_xml_temp : list, **_): current_list = server.state.simulation_xml_filename new_list = current_list + simulation_xml_temp server.state.simulation_xml_filename = new_list server.state.simulation_xml_temp = [] + @server.state.change("simulation_xml_filename") + def on_simfiles_change(simulation_xml_filename : list, **_): + import re + pattern = re.compile(r"\.xml$", re.IGNORECASE) + has_xml = any(pattern.search(file if isinstance(file, str) else file.get("name", "")) for file in simulation_xml_filename) + server.state.is_valid_jobfiles = has_xml + + + + # @controller.trigger("run_remove_jobfile") + def run_remove_jobfile(index_to_remove : int) -> None: + # for now just check there is an xml + current_files = list(server.state.simulation_xml_filename) # On prend une copie de la liste + if 0 <= index_to_remove < len(current_files): + # 1. Supprimer l'élément de la copie de la liste + del current_files[index_to_remove] + + # 2. Remplacer la variable d'état par la nouvelle liste. + # Ceci est CRITIQUE pour la réactivité, car cela force Vue.js à se mettre à jour. + server.state.simulation_xml_filename = current_files + print(f"Fichier à l'index {index_to_remove} supprimé. Nouveaux fichiers: {len(current_files)}") + else: + print(f"Erreur: Index de suppression invalide ({index_to_remove}).") + with vuetify.VContainer(): with vuetify.VRow(): with vuetify.VCol(cols=4): @@ -124,9 +148,8 @@ def on_temp_change(simulation_xml_temp : list, **kw): # server.state.access_granted = False + server.state.is_valid_jobfiles = False server.state.simulation_xml_filename = [ ] - # server.state.simulation_xml_temp = [ ] - sd = SuggestDecomposition('p4', 12e6) items = sd.to_list() @@ -186,6 +209,9 @@ def on_temp_change(simulation_xml_temp : list, **kw): with vuetify.VListItem( v_for=("(file,i) in simulation_xml_filename"), key="i", value="file" ): vuetify.VListItemTitle( "{{ file.name }}" ) vuetify.VListItemSubtitle("{{ file.size ? (file.size / 1024).toFixed(1) + ' KB' : 'URL' }}") + with vuetify.VListItemAction(): + vuetify.VBtn(small=True, icon=True, children=[vuetify.VIcon("mdi-minus-circle-outline")], + click=(run_remove_jobfile, "[i]") ) with vuetify.VRow(), vuetify.VCol(): vuetify.VTextField( @@ -225,7 +251,7 @@ def on_temp_change(simulation_xml_temp : list, **kw): with vuetify.VCol(cols=1): vuetify.VBtn("Run", click="trigger('run_simulation')", - disabled=("!access_granted",), + disabled=("!is_valid_jobfiles",), classes="ml-auto"), # type: ignore From 45358900c6eb808b864221a5fef8e6b68b6ac310 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Tue, 2 Dec 2025 14:00:45 +0100 Subject: [PATCH 22/70] first working v --- .../src/geos/trame/app/io/simulation.py | 93 ++++++++++++++----- .../src/geos/trame/app/ui/simulation_view.py | 4 +- 2 files changed, 74 insertions(+), 23 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 7bc879a0b..290f86471 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -68,27 +68,45 @@ class Authentificator:#namespacing more than anything else ssh_client : paramiko.SSHClient @staticmethod - def _sftp_copy_tree(ssh_client, local_root, remote_root): + def _sftp_copy_tree(ssh_client, file_tree, remote_root): # Connect to remote server sftp = ssh_client.open_sftp() + + Authentificator.dfs_tree(file_tree["structure"], file_tree["root"], sftp=sftp, remote_root=remote_root) - local_root = Path(local_root).resolve() - - for path in local_root.rglob("*"): - remote_path = f"{remote_root}/{path.relative_to(local_root)}" + sftp.close() - if path.is_dir(): - # Create remote directory if it doesn't exist - try: - sftp.mkdir(remote_path) - except IOError: - # Directory may already exist - pass - else: - # Upload file - sftp.put(str(path), remote_path) + @staticmethod + def dfs_tree(node, path, sftp, remote_root): + + lp = Path(path) + rp = Path(remote_root)/lp + + if isinstance(node, list): + for file in node: + # sftp.put(lp/Path(file), rp/Path(file)) + with sftp.file( str(rp/Path(file.get('name'))), 'w') as f: + f.write(file.get('content')) + print(f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}") + elif isinstance(node, dict): + if "files" in node: + for file in node["files"]: + # sftp.put( str(lp/Path(file)), str(rp/Path(file)) ) + with sftp.file( str(rp/Path(file.get('name'))), 'w') as f: + f.write(file.get('content')) + print(f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}") + if "subfolders" in node: + for subfolder, content in node["subfolders"].items(): + sftp.mkdir( str(rp/Path(subfolder))) + print(f"creating {rp/Path(subfolder)}") + Authentificator.dfs_tree(content, lp/Path(subfolder), sftp, remote_root) + + for folder, content in node.items(): + if folder not in ["files", "subfolders"]: + sftp.mkdir( str(rp/Path(folder)) ) + print(f"creating {rp/Path(folder)}") + Authentificator.dfs_tree(content, lp/Path(folder), sftp, remote_root) - sftp.close() @staticmethod def get_key( id, pword ): @@ -496,7 +514,40 @@ def run_try_login() -> None: server.state.access_granted = True print("login login login") - + @staticmethod + def gen_tree(xml_filename): + + import re + xml_pattern = re.compile(r"\.xml$", re.IGNORECASE) + mesh_pattern = re.compile(r"\.(vtu|vtm|pvtu|pvtm)$", re.IGNORECASE) + table_pattern = re.compile(r"\.(txt|dat|csv)$", re.IGNORECASE) + xml_matches = [] + mesh_matches = [] + table_matches = [] + + for file in xml_filename: + if xml_pattern.search(file.get("name","")): + xml_matches.append(file) + elif mesh_pattern.search(file.get("name","")): + mesh_matches.append(file) + elif table_pattern.search(file.get("name","")): + table_matches.append(file) + + file_tree = { + 'root' : '.', + "structure": { + "files" : xml_matches, + "subfolders": { + "mesh": mesh_matches, + "tables": table_matches + # "subfolders": { + # "inner_tables_1": ["placeholder.txt"], + # "inner_tables_2": ["placeholder.txt"] + # } + } + } + } + return file_tree @controller.trigger("run_simulation") def run_simulation()-> None: @@ -518,8 +569,8 @@ def run_simulation()-> None: #write slurm directly on remote try: sftp = Authentificator.ssh_client.open_sftp() - remote_path = Path(server.state.simulation_xml_filename).parent/Path('job.slurm') - with sftp.file(remote_path,'w') as f: + remote_path = Path(server.state.simulation_remote_path)/Path('job.slurm') + with sftp.file( str(remote_path),'w' ) as f: f.write(rendered) # except FileExistsError: @@ -532,8 +583,8 @@ def run_simulation()-> None: print(f"An error occurred during SFTP: {e}") Authentificator._sftp_copy_tree(Authentificator.ssh_client, - Path(server.state.simulation_xml_filename).parent, - Path(server.state.simulation_remote_path)) + gen_tree(server.state.simulation_xml_filename), + server.state.simulation_remote_path) Authentificator._execute_remote_command(Authentificator.ssh_client, diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index f5f45de7d..2f7fc95d7 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -202,7 +202,7 @@ def run_remove_jobfile(index_to_remove : int) -> None: multiple=True, filter_by_type='.xml,.vtu,.vtm,.pvtu,.pvtm,.dat,.csv,.txt', # readonly=True, - disabled=("access_granted",) + disabled=("!access_granted",) ) with vuetify.VCol(cols=4): with vuetify.VList(): @@ -210,7 +210,7 @@ def run_remove_jobfile(index_to_remove : int) -> None: vuetify.VListItemTitle( "{{ file.name }}" ) vuetify.VListItemSubtitle("{{ file.size ? (file.size / 1024).toFixed(1) + ' KB' : 'URL' }}") with vuetify.VListItemAction(): - vuetify.VBtn(small=True, icon=True, children=[vuetify.VIcon("mdi-minus-circle-outline")], + vuetify.VBtn(small=True, icon="mdi-minus-circle-outline", click=(run_remove_jobfile, "[i]") ) with vuetify.VRow(), vuetify.VCol(): From 0c437b0c03381ad3a6b6499be481265bf240e649 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Tue, 2 Dec 2025 15:50:10 +0100 Subject: [PATCH 23/70] start status --- .../src/geos/trame/app/io/simulation.py | 125 ++++++++---------- .../src/geos/trame/app/ui/simulation_view.py | 4 +- 2 files changed, 60 insertions(+), 69 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 290f86471..656c57b9b 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -11,7 +11,8 @@ from geos.trame.app.utils.async_file_watcher import AsyncPeriodicRunner from jinja2 import Template -import paramiko +import paramiko +import re import os #TODO move outside @@ -26,6 +27,8 @@ class SimulationConstant: SIMULATIONS_INFORMATION_FOLDER_PATH= "/workrd/users/" SIMULATION_DEFAULT_FILE_NAME = "geosDeck.xml" + # replace by conf-file json + # Load template from file # with open("slurm_job_template.j2") as f: @@ -65,7 +68,7 @@ class SimulationConstant: class Authentificator:#namespacing more than anything else - ssh_client : paramiko.SSHClient + ssh_client : Optional[paramiko.SSHClient] = None @staticmethod def _sftp_copy_tree(ssh_client, file_tree, remote_root): @@ -97,14 +100,20 @@ def dfs_tree(node, path, sftp, remote_root): print(f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}") if "subfolders" in node: for subfolder, content in node["subfolders"].items(): - sftp.mkdir( str(rp/Path(subfolder))) - print(f"creating {rp/Path(subfolder)}") + try: + sftp.stat( str(rp/Path(subfolder)) ) + except FileNotFoundError: + print(f"creating {rp/Path(subfolder)}") + sftp.mkdir( str(rp/Path(subfolder)) ) Authentificator.dfs_tree(content, lp/Path(subfolder), sftp, remote_root) for folder, content in node.items(): if folder not in ["files", "subfolders"]: - sftp.mkdir( str(rp/Path(folder)) ) - print(f"creating {rp/Path(folder)}") + try: + sftp.stat( str(rp/Path(folder)) ) + except FileNotFoundError: + print(f"creating {rp/Path(folder)}") + sftp.mkdir( str(rp/Path(folder)) ) Authentificator.dfs_tree(content, lp/Path(folder), sftp, remote_root) @@ -203,11 +212,11 @@ def _execute_remote_command(client, command): print(stderr_data) print(f"Command exited with status: {exit_status}") - return exit_status + return (exit_status,stdout_data, stderr_data) except Exception as e: print(f"Error executing command: {e}") - return -1 + return (-1,"","") @staticmethod def _transfer_file_sftp(client, local_path, remote_path, direction="put"): @@ -363,23 +372,6 @@ def parse_launcher_output(output: str) -> SimulationInformation: # json.dumps(info.to_dict()), # type: ignore # ) - -##TODO yay slurm -def get_launcher_command(launcher_params: LauncherParams) -> str: - launcher_cmd_args = ( - f"{SimulationConstant.SIMULATION_GEOS_PATH} " - f"--nprocs {launcher_params.simulation_nb_process} " - f"--fname {launcher_params.simulation_cmd_filename} " - f"--job_name {launcher_params.simulation_job_name}" - ) - - # state.simulation_nb_process is supposed to be an integer, but the UI present a VTextField, - # so if user changes it, then it can be defined as a str - if int(launcher_params.simulation_nb_process) > 1: - launcher_cmd_args += " --partition" - return launcher_cmd_args - - # def get_simulation_screenshot_timestep(filename: str) -> int: # """ # From a given file name returns the time step. @@ -444,41 +436,11 @@ def __init__(self, user): - - def launch_simulation(self): - - if self.ssh_client: - try: - # --- 3. Execute a Remote Command --- - self._execute_remote_command(self.ssh_client, "ls -l /tmp") - - # --- 4. Upload a File (PUT) --- - remote_path_upload = f"/tmp/{self.local_upload_file}" - self._transfer_file_sftp(self.ssh_client, self.local_upload_file, remote_path_upload, direction="put") - - # --- 5. Verify Upload by Listing Remote Directory --- - self._execute_remote_command(self.ssh_client, f"ls -l /tmp") - - # --- 6. Download a File (GET) --- - remote_download_file = f"/workrd/{self.local_upload_file}" # Use a known remote file - local_download_file = "downloaded_hostname.txt" - self._transfer_file_sftp(self.ssh_client, local_download_file, remote_download_file, direction="get") - - # --- 7. Clean up the uploaded file (Optional) --- - self._execute_remote_command(self.ssh_client, f"rm {remote_path_upload}") - - finally: - # --- 8. Close the connection --- - self.ssh_client.close() - print("\nSSH Connection closed.") - - class Simulation: """ Simulation component. Fills the UI with the screenshot as read from the simulation outputs folder and a graph with the time series from the simulation. - Requires a simulation runner providing information on the output path of the simulation to monitor and ways to trigger the simulation. """ @@ -488,11 +450,11 @@ def __init__(self, sim_runner: ISimRunner, server: Server, sim_info_dir: Optiona controller = server.controller self._sim_runner = sim_runner self._sim_info_dir = sim_info_dir or SimulationConstant.SIMULATIONS_INFORMATION_FOLDER_PATH + server.state.job_ids = [] self._job_status_watcher: Optional[AsyncPeriodicRunner] = None self._job_status_watcher_period_ms = 2000 - self.start_result_streams() #define triggers @controller.trigger("run_try_login") @@ -505,14 +467,14 @@ def run_try_login() -> None: key=Authentificator.get_key(server.state.login, server.state.password)) if Authentificator.ssh_client : - id = os.environ.get('USER') - Authentificator._execute_remote_command(Authentificator.ssh_client, f"ps aux") + # id = os.environ.get('USER') + # Authentificator._execute_remote_command(Authentificator.ssh_client, f"ps aux") # Authentificator._execute_remote_command(Authentificator.ssh_client, f"ls -l {SimulationConstant.REMOTE_HOME_BASE}/{id}") # server.state.update({"access_granted" : True, "key_path" : f"{SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame" }) # server.state.flush() server.state.access_granted = True - print("login login login") + print("login login login") @staticmethod def gen_tree(xml_filename): @@ -587,12 +549,22 @@ def run_simulation()-> None: server.state.simulation_remote_path) - Authentificator._execute_remote_command(Authentificator.ssh_client, + _,sout, serr = Authentificator._execute_remote_command(Authentificator.ssh_client, f'cd {server.state.simulation_remote_path} && sbatch job.slurm') - Authentificator._execute_remote_command(Authentificator.ssh_client, - f'squeue -u $USER') + + job_lines = sout.strip() + job_id = re.search(r"\b\d+\b", job_lines[0]) + + server.state.job_ids.append(job_id) + + + + + # Authentificator._execute_remote_command(Authentificator.ssh_client, + # f'squeue -u $USER') + self.start_result_streams() Authentificator._transfer_file_sftp(Authentificator.ssh_client, @@ -683,13 +655,32 @@ def stop_result_streams(self): self._job_status_watcher.stop() def start_result_streams(self) -> None: - pass - # self.stop_result_streams() + self.stop_result_streams() - # self._job_status_watcher = AsyncPeriodicRunner( - # self._update_job_status, period_ms=self._job_status_watcher_period_ms - # ) + self._job_status_watcher = AsyncPeriodicRunner( + self.check_jobs, period_ms=self._job_status_watcher_period_ms + ) + def check_jobs(self): + if Authentificator.ssh_client: + try: + _,sout, serr = Authentificator._execute_remote_command(Authentificator.ssh_client, f'date && squeue -u $USER') + job_lines = sout.strip().split("\n")[2:] + job_id = job_lines[0].split()[0] + job_status = job_lines[0].split()[4] + job_name = job_lines[0].split()[2] + print(f"{job_lines}\n job id:{job_id}\n status:{job_status}\n name:{job_name}") + + except PermissionError as e: + print(f"Permission error: {e}") + except IOError as e: + print(f"Error accessing remote file or path: {e}") + except Exception as e: + print(f"An error occurred during SFTP: {e}") + else: + return None + + def start_simulation(self) -> None: state = self._server.state script_path = None diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 2f7fc95d7..b6783a3fd 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -215,7 +215,7 @@ def run_remove_jobfile(index_to_remove : int) -> None: with vuetify.VRow(), vuetify.VCol(): vuetify.VTextField( - v_model=("simulation_remote_path",None), + v_model=("simulation_remote_path", "/workrd/users/l1165478/Example"), label="Path where to write files and launch code", prepend_icon="mdi-upload", dense=True, @@ -227,7 +227,7 @@ def run_remove_jobfile(index_to_remove : int) -> None: with vuetify.VRow(), vuetify.VCol(): vuetify.VTextField( - v_model=("simulation_dl_path", None), + v_model=("simulation_dl_path", "/users/l1165478/tmp/Example"), label="Simulation download path", dense=True, clearable=True, From 81c1fc5f67b8bb093de9610df7c707aa46afdfc9 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Tue, 2 Dec 2025 16:13:59 +0100 Subject: [PATCH 24/70] nicer --- geos-trame/src/geos/trame/app/ui/simulation_view.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index b6783a3fd..2637e96f2 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -206,12 +206,11 @@ def run_remove_jobfile(index_to_remove : int) -> None: ) with vuetify.VCol(cols=4): with vuetify.VList(): - with vuetify.VListItem( v_for=("(file,i) in simulation_xml_filename"), key="i", value="file" ): + with vuetify.VListItem( v_for=("(file,i) in simulation_xml_filename"), key="i", value="file", + prepend_icon="mdi-minus-circle-outline", + click=(run_remove_jobfile, "[i]") ): vuetify.VListItemTitle( "{{ file.name }}" ) vuetify.VListItemSubtitle("{{ file.size ? (file.size / 1024).toFixed(1) + ' KB' : 'URL' }}") - with vuetify.VListItemAction(): - vuetify.VBtn(small=True, icon="mdi-minus-circle-outline", - click=(run_remove_jobfile, "[i]") ) with vuetify.VRow(), vuetify.VCol(): vuetify.VTextField( @@ -267,5 +266,6 @@ def run_remove_jobfile(index_to_remove : int) -> None: with vuetify.VCol(cols=2): SimulationStatusView(server=server) + with vuetify.VRow(v_if="simulation_error"): html.Div("An error occurred while running simulation :
{{simulation_error}}", style="color:red;") From 640553387890c75a3c26f48a40e8641dcf6b89e0 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Tue, 2 Dec 2025 18:44:30 +0100 Subject: [PATCH 25/70] job status track --- .../src/geos/trame/app/io/simulation.py | 28 +++++++++++++++---- .../src/geos/trame/app/ui/simulation_view.py | 20 +++++++++++-- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 656c57b9b..bcf3a0e13 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -452,6 +452,13 @@ def __init__(self, sim_runner: ISimRunner, server: Server, sim_info_dir: Optiona self._sim_info_dir = sim_info_dir or SimulationConstant.SIMULATIONS_INFORMATION_FOLDER_PATH server.state.job_ids = [] + server.state.status_colors = { + "PD": "#4CAF50", + "R": "#3F51B5", + "CA": "#FFC107", + "CG": "#484B45", + "F": "#E53935", + } self._job_status_watcher: Optional[AsyncPeriodicRunner] = None self._job_status_watcher_period_ms = 2000 @@ -555,9 +562,9 @@ def run_simulation()-> None: job_lines = sout.strip() - job_id = re.search(r"\b\d+\b", job_lines[0]) + job_id = re.search(r"Submitted batch job (\d+)", job_lines) - server.state.job_ids.append(job_id) + server.state.job_ids.append({'job_id':job_id[1]}) @@ -665,11 +672,20 @@ def check_jobs(self): if Authentificator.ssh_client: try: _,sout, serr = Authentificator._execute_remote_command(Authentificator.ssh_client, f'date && squeue -u $USER') + #sacct -j --format --format=JobID,State --noheader job_lines = sout.strip().split("\n")[2:] - job_id = job_lines[0].split()[0] - job_status = job_lines[0].split()[4] - job_name = job_lines[0].split()[2] - print(f"{job_lines}\n job id:{job_id}\n status:{job_status}\n name:{job_name}") + jid = self._server.state.job_ids + for job_line in job_lines: + job_id = job_line.split()[0] + index = next((i for i, item in enumerate(jid) if item.get("job_id") == job_id), None) + if index is None: + continue + else: + jid[index]['status'] = job_line.split()[4] + jid[index]['name'] = job_line.split()[2] + print(f"{job_line}-{job_id}\n job id:{jid[index]['job_id']}\n status:{jid[index]['status']}\n name:{jid[index]['name']} \n --- \n") + self._server.state.job_ids = jid + # self._server.state.flush() except PermissionError as e: print(f"Permission error: {e}") diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 2637e96f2..50fd45b01 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -260,12 +260,26 @@ def run_remove_jobfile(index_to_remove : int) -> None: vuetify.VSpacer() with vuetify.VCol(cols=1): vuetify.VBtn("Kill", click="trigger('kill_simulation')"), # type: ignore - + + color_expression = "status_colors[job_ids[i].status] || '#607D8B'" with vuetify.VRow(): - with vuetify.VCol(cols=2): - SimulationStatusView(server=server) + with vuetify.VCol(cols=4): + # SimulationStatusView(server=server) + with vuetify.VList(): + with vuetify.VListItem( v_for=("(jobs,i) in job_ids"), key="i", value="jobs", base_color=(color_expression,)): + vuetify.VListItemTitle("{{ jobs.status }} -- {{ jobs.name }} -- {{ jobs.job_id }}") + # vuetify.VListItemTitle("{{ jobs.job_id }}") with vuetify.VRow(v_if="simulation_error"): html.Div("An error occurred while running simulation :
{{simulation_error}}", style="color:red;") + +def get_color(status): + return { + 'PD': "#4CAF50", + 'R': "#3F51B5", + 'CA': "#FFC107", + 'CG': "#484B45", + 'F': "#E53935", + }.get(status, "#607D8B") \ No newline at end of file From c0a5364a9b701f67ee8edcd38cbb8b0014fd274a Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 3 Dec 2025 11:16:15 +0100 Subject: [PATCH 26/70] kill jobs enable --- .../src/geos/trame/app/io/simulation.py | 35 ++++++++-------- .../src/geos/trame/app/ui/simulation_view.py | 40 +++++++++++-------- 2 files changed, 40 insertions(+), 35 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index bcf3a0e13..1b0a196b5 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -116,6 +116,11 @@ def dfs_tree(node, path, sftp, remote_root): sftp.mkdir( str(rp/Path(folder)) ) Authentificator.dfs_tree(content, lp/Path(folder), sftp, remote_root) + @staticmethod + def kill_job( id ): + if Authentificator.ssh_client: + Authentificator._execute_remote_command(Authentificator.ssh_client, f"scancel {id}") + return None @staticmethod def get_key( id, pword ): @@ -445,6 +450,9 @@ class Simulation: trigger the simulation. """ + + + def __init__(self, sim_runner: ISimRunner, server: Server, sim_info_dir: Optional[Path] = None) -> None: self._server = server controller = server.controller @@ -518,6 +526,9 @@ def gen_tree(xml_filename): } return file_tree + + + @controller.trigger("run_simulation") def run_simulation()-> None: @@ -579,28 +590,15 @@ def run_simulation()-> None: local_path=f'{server.state.simulation_dl_path}/dl.test', direction="get") - - # TODO later ASYNC and subprocess # Submit job using subprocess (local ssh call) - # import subprocess - # result = subprocess.run(["ssh", "user@remote.host", "sbatch /remote/path/job.slurm"], - # capture_output=True, text=True) - - # PARAMIKO >> subprocess - # # Execute command remotely - # stdin, stdout, stderr = client.exec_command("ls -l /tmp") - # print(stdout.read().decode()) - # parse stdout - - else: raise paramiko.SSHException - pass - @controller.trigger("kill_simulation") - def kill_simulation(pid)->None: + @controller.trigger("kill_all_simulations") + def kill_all_simulations()->None: # exec scancel jobid - pass + for jobs in server.state.job_ids: + Authentificator.kill_job(jobs['job_id']) def __del__(self): self.stop_result_streams() @@ -685,7 +683,8 @@ def check_jobs(self): jid[index]['name'] = job_line.split()[2] print(f"{job_line}-{job_id}\n job id:{jid[index]['job_id']}\n status:{jid[index]['status']}\n name:{jid[index]['name']} \n --- \n") self._server.state.job_ids = jid - # self._server.state.flush() + self._server.state.dirty("job_ids") + self._server.state.flush() except PermissionError as e: print(f"Permission error: {e}") diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 50fd45b01..ba1f30895 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -1,7 +1,7 @@ from trame.widgets import html from trame.widgets import vuetify3 as vuetify -from geos.trame.app.io.simulation import SimulationConstant +from geos.trame.app.io.simulation import SimulationConstant, Authentificator from geos.trame.app.ui.simulation_status_view import SimulationStatusView import json @@ -106,10 +106,23 @@ def on_simfiles_change(simulation_xml_filename : list, **_): pattern = re.compile(r"\.xml$", re.IGNORECASE) has_xml = any(pattern.search(file if isinstance(file, str) else file.get("name", "")) for file in simulation_xml_filename) server.state.is_valid_jobfiles = has_xml - + + + def kill_job(index_to_remove : int) -> None: + # for now just check there is an xml + jid = list(server.state.job_ids) + if 0 <= index_to_remove < len(jid): + # 1. Supprimer l'élément de la copie de la liste + removed_id = jid[index_to_remove]['job_id'] + Authentificator.kill_job(removed_id) + del jid[index_to_remove] + + server.state.job_ids = jid + print(f"Job {removed_id} kill. Still running: {len(jid)}") + else: + print(f"Error: supress index does not exist ({index_to_remove}).") - # @controller.trigger("run_remove_jobfile") def run_remove_jobfile(index_to_remove : int) -> None: # for now just check there is an xml current_files = list(server.state.simulation_xml_filename) # On prend une copie de la liste @@ -123,6 +136,7 @@ def run_remove_jobfile(index_to_remove : int) -> None: print(f"Fichier à l'index {index_to_remove} supprimé. Nouveaux fichiers: {len(current_files)}") else: print(f"Erreur: Index de suppression invalide ({index_to_remove}).") + with vuetify.VContainer(): with vuetify.VRow(): @@ -151,7 +165,7 @@ def run_remove_jobfile(index_to_remove : int) -> None: server.state.is_valid_jobfiles = False server.state.simulation_xml_filename = [ ] - sd = SuggestDecomposition('p4', 12e6) + sd = SuggestDecomposition('p4', 12) items = sd.to_list() vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") with vuetify.VCol(cols=2): @@ -180,7 +194,7 @@ def run_remove_jobfile(index_to_remove : int) -> None: vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") with vuetify.VCol(cols=1): vuetify.VTextField( - v_model=("slurm_comment", None,), + v_model=("slurm_comment", "GEOS,CCS,testTrame",), label="Comment to slurm", dense=True, hide_details=True, @@ -259,7 +273,7 @@ def run_remove_jobfile(index_to_remove : int) -> None: with vuetify.VRow(): vuetify.VSpacer() with vuetify.VCol(cols=1): - vuetify.VBtn("Kill", click="trigger('kill_simulation')"), # type: ignore + vuetify.VBtn("Kill", click="trigger('kill_all_simulations')"), # type: ignore color_expression = "status_colors[job_ids[i].status] || '#607D8B'" @@ -269,17 +283,9 @@ def run_remove_jobfile(index_to_remove : int) -> None: with vuetify.VList(): with vuetify.VListItem( v_for=("(jobs,i) in job_ids"), key="i", value="jobs", base_color=(color_expression,)): vuetify.VListItemTitle("{{ jobs.status }} -- {{ jobs.name }} -- {{ jobs.job_id }}") - # vuetify.VListItemTitle("{{ jobs.job_id }}") + vuetify.VTooltip(text="here is a test for future display") + vuetify.VBtn(icon="mdi-delete",click=(kill_job,"[i]")) with vuetify.VRow(v_if="simulation_error"): - html.Div("An error occurred while running simulation :
{{simulation_error}}", style="color:red;") - -def get_color(status): - return { - 'PD': "#4CAF50", - 'R': "#3F51B5", - 'CA': "#FFC107", - 'CG': "#484B45", - 'F': "#E53935", - }.get(status, "#607D8B") \ No newline at end of file + html.Div("An error occurred while running simulation :
{{simulation_error}}", style="color:red;") \ No newline at end of file From 4195ef8866db62a39d6fd441304d6ec33d52c14b Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 3 Dec 2025 11:58:42 +0100 Subject: [PATCH 27/70] working w/o copyback --- geos-trame/src/geos/trame/app/io/simulation.py | 4 ---- geos-trame/src/geos/trame/app/ui/simulation_view.py | 8 +++----- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 1b0a196b5..e2bb5baaf 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -517,10 +517,6 @@ def gen_tree(xml_filename): "subfolders": { "mesh": mesh_matches, "tables": table_matches - # "subfolders": { - # "inner_tables_1": ["placeholder.txt"], - # "inner_tables_2": ["placeholder.txt"] - # } } } } diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index ba1f30895..44d1c2a74 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -273,7 +273,7 @@ def run_remove_jobfile(index_to_remove : int) -> None: with vuetify.VRow(): vuetify.VSpacer() with vuetify.VCol(cols=1): - vuetify.VBtn("Kill", click="trigger('kill_all_simulations')"), # type: ignore + vuetify.VBtn("Kill All", click="trigger('kill_all_simulations')"), # type: ignore color_expression = "status_colors[job_ids[i].status] || '#607D8B'" @@ -281,10 +281,8 @@ def run_remove_jobfile(index_to_remove : int) -> None: with vuetify.VCol(cols=4): # SimulationStatusView(server=server) with vuetify.VList(): - with vuetify.VListItem( v_for=("(jobs,i) in job_ids"), key="i", value="jobs", base_color=(color_expression,)): - vuetify.VListItemTitle("{{ jobs.status }} -- {{ jobs.name }} -- {{ jobs.job_id }}") - vuetify.VTooltip(text="here is a test for future display") - vuetify.VBtn(icon="mdi-delete",click=(kill_job,"[i]")) + with vuetify.VListItem( v_for=("(jobs,i) in job_ids"), key="i", value="jobs", base_color=(color_expression,),prepend_icon="mdi-minus-circle-outline",click=(kill_job,"[i]") ): + vuetify.VListItemTitle("{{ jobs.status }} -- {{ jobs.name }} -- {{ jobs.job_id }}") with vuetify.VRow(v_if="simulation_error"): From 8c04e469ee8e062b67842756c88ea762ba37f562 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 3 Dec 2025 15:32:53 +0100 Subject: [PATCH 28/70] regexp working --- .../src/geos/trame/app/io/simulation.py | 40 +++++++++---------- .../src/geos/trame/app/ui/simulation_view.py | 1 - 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index e2bb5baaf..e33c8e654 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -497,11 +497,16 @@ def gen_tree(xml_filename): import re xml_pattern = re.compile(r"\.xml$", re.IGNORECASE) mesh_pattern = re.compile(r"\.(vtu|vtm|pvtu|pvtm)$", re.IGNORECASE) - table_pattern = re.compile(r"\.(txt|dat|csv)$", re.IGNORECASE) + table_pattern = re.compile(r"\.(txt|dat|csv|geos)$", re.IGNORECASE) xml_matches = [] mesh_matches = [] table_matches = [] + pattern_file = r"[\w\-.]+\.(?:vtu|pvtu|dat|txt|xml|geos)\b" # all files + pattern_xml_path = r"\"(.*/)([\w\-.]+\.(?:xml))\b" + pattern_mesh_path = r"\"(.*/)([\w\-.]+\.(?:vtu|pvtu|vtm|pvtm))\b" + pattern_table_curly_path = r"((?:[\w\-/]+/)+)([\w\-.]+\.(?:geos|csv|dat|txt))" + for file in xml_filename: if xml_pattern.search(file.get("name","")): xml_matches.append(file) @@ -510,6 +515,19 @@ def gen_tree(xml_filename): elif table_pattern.search(file.get("name","")): table_matches.append(file) + + #assume the first XML is the main xml + # TODO relocate + xml_expected_file_matches = re.findall(pattern_file, xml_matches[0]['content'].decode("utf-8")) + test_assert = {item.get("name") for item in xml_filename}.intersection(set(xml_expected_file_matches)) + + decoded = re.sub(pattern_xml_path,r'"\2', xml_matches[0]['content'].decode("utf-8")) + decoded = re.sub(pattern_mesh_path,r'"mesh/\2', decoded) + decoded = re.sub(pattern_table_curly_path,r"tables/\2", decoded) + + xml_matches[0]['content'] = decoded.encode("utf-8") + + file_tree = { 'root' : '.', "structure": { @@ -568,6 +586,7 @@ def run_simulation()-> None: + #TODO encapsulate job_lines = sout.strip() job_id = re.search(r"Submitted batch job (\d+)", job_lines) @@ -604,25 +623,6 @@ def set_status_watcher_period_ms(self, period_ms): if self._job_status_watcher: self._job_status_watcher.set_period_ms(period_ms) - # def _update_screenshot_display(self, screenshots_folder_path: Path) -> None: - # newer_file = get_most_recent_simulation_screenshot(screenshots_folder_path) - # if not newer_file: - # return - # def _update_screenshot_display(self, screenshots_folder_path: Path) -> None: - # newer_file = get_most_recent_simulation_screenshot(screenshots_folder_path) - # if not newer_file: - # return - - # f_name = Path(newer_file).name - # if not f_name: - # return - - # self._server.state.active_screenshot_folder_path = str(screenshots_folder_path) - # self._server.state.dirty("active_screenshot_folder_path") - # self._server.state.active_screenshot_relative_path = f_name - # self._server.state.dirty("active_screenshot_relative_path") - # self._server.state.flush() - def _update_job_status(self) -> None: sim_info = self.get_last_user_simulation_info() job_status = sim_info.get_simulation_status(self._sim_runner.get_running_user_jobs) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 44d1c2a74..10dd1a8a1 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -276,7 +276,6 @@ def run_remove_jobfile(index_to_remove : int) -> None: vuetify.VBtn("Kill All", click="trigger('kill_all_simulations')"), # type: ignore color_expression = "status_colors[job_ids[i].status] || '#607D8B'" - with vuetify.VRow(): with vuetify.VCol(cols=4): # SimulationStatusView(server=server) From 485446c92e39d11c2bd2acd2871574a2281a9fe7 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 3 Dec 2025 16:13:14 +0100 Subject: [PATCH 29/70] less cumbersome --- .../src/geos/trame/app/io/simulation.py | 41 +++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index e33c8e654..efb946353 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -59,8 +59,8 @@ class SimulationConstant: srun --mpi=pmix_v3 --hint=nomultithread \ -n {{ ntasks }} geos \ - -o Outputs_{{ slurm_jobid | default('${SLURM_JOBID}') }} \ - -i {{ input_file | default('geosDeck.xml') }} | tee log.out + -o Outputs_${SLURM_JOBID} \ + -i {{ input_file | default('geosDeck.xml') }} | tee Outputs_${SLURM_JOBID}/log_${SLURM_JOBID}.out """ @@ -265,7 +265,7 @@ def _transfer_file_sftp(client, local_path, remote_path, direction="put"): @unique class SlurmJobStatus(Enum): - PENDING = "PD" + PENDING = "PEND" RUNNING = "R" COMPLETING = "CG" COMPLETED = "CD" @@ -461,11 +461,11 @@ def __init__(self, sim_runner: ISimRunner, server: Server, sim_info_dir: Optiona server.state.job_ids = [] server.state.status_colors = { - "PD": "#4CAF50", - "R": "#3F51B5", - "CA": "#FFC107", - "CG": "#484B45", - "F": "#E53935", + "PENDING": "#4CAF50", #PD + "RUNNING": "#3F51B5", #R + "CANCELLED": "#FFC107", #CA + "COMPLETED": "#484B45", #CD + "FAILED": "#E53935", #F } self._job_status_watcher: Optional[AsyncPeriodicRunner] = None self._job_status_watcher_period_ms = 2000 @@ -540,8 +540,6 @@ def gen_tree(xml_filename): } return file_tree - - @controller.trigger("run_simulation") def run_simulation()-> None: @@ -665,19 +663,20 @@ def start_result_streams(self) -> None: def check_jobs(self): if Authentificator.ssh_client: try: - _,sout, serr = Authentificator._execute_remote_command(Authentificator.ssh_client, f'date && squeue -u $USER') + # _,sout, serr = Authentificator._execute_remote_command(Authentificator.ssh_client, f'date && squeue -u $USER') #sacct -j --format --format=JobID,State --noheader - job_lines = sout.strip().split("\n")[2:] jid = self._server.state.job_ids - for job_line in job_lines: - job_id = job_line.split()[0] - index = next((i for i, item in enumerate(jid) if item.get("job_id") == job_id), None) - if index is None: - continue - else: - jid[index]['status'] = job_line.split()[4] - jid[index]['name'] = job_line.split()[2] - print(f"{job_line}-{job_id}\n job id:{jid[index]['job_id']}\n status:{jid[index]['status']}\n name:{jid[index]['name']} \n --- \n") + for index,job in enumerate(jid): + job_id = job['job_id'] + _,sout, serr = Authentificator._execute_remote_command(Authentificator.ssh_client, f'sacct -j {job_id} -o JobID,JobName,State --noheader') + job_line = sout.strip().split("\n")[-1] + # index = next((i for i, item in enumerate(jid) if item.get("job_id") == job_id), None) + # if index is None: + # continue + # else: + jid[index]['status'] = job_line.split()[2] + jid[index]['name'] = job_line.split()[1] + print(f"{job_line}-{job_id}\n job id:{jid[index]['job_id']}\n status:{jid[index]['status']}\n name:{jid[index]['name']} \n --- \n") self._server.state.job_ids = jid self._server.state.dirty("job_ids") self._server.state.flush() From 51f112f456dd4d37e94b08f1e8400f1e05ba3c10 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 3 Dec 2025 16:25:31 +0100 Subject: [PATCH 30/70] first attempt at copy back --- geos-trame/src/geos/trame/app/io/simulation.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index efb946353..bed32cee4 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -548,9 +548,9 @@ def run_simulation()-> None: if server.state.access_granted and server.state.simulation_xml_filename: template = Template(template_str) # sdi = server.state.sd - ci ={'nodes': 2 , 'total_ranks': 96 } + ci ={'nodes': 1 , 'total_ranks': 2 } rendered = template.render(job_name=server.state.simulation_job_name, - input_file=server.state.simulation_xml_filename, + input_file=[ item for item in server.state.simulation_xml_filename if item.get('type') == 'text/xml' ][0].get('name'), nodes= ci['nodes'], ntasks=ci['total_ranks'], mem=f"0",#TODO profile to use the correct amount commment=server.state.slurm_comment, partition='p4_general', account='myaccount' ) @@ -663,18 +663,20 @@ def start_result_streams(self) -> None: def check_jobs(self): if Authentificator.ssh_client: try: - # _,sout, serr = Authentificator._execute_remote_command(Authentificator.ssh_client, f'date && squeue -u $USER') - #sacct -j --format --format=JobID,State --noheader jid = self._server.state.job_ids for index,job in enumerate(jid): job_id = job['job_id'] _,sout, serr = Authentificator._execute_remote_command(Authentificator.ssh_client, f'sacct -j {job_id} -o JobID,JobName,State --noheader') job_line = sout.strip().split("\n")[-1] - # index = next((i for i, item in enumerate(jid) if item.get("job_id") == job_id), None) - # if index is None: - # continue - # else: + jid[index]['status'] = job_line.split()[2] + if (jid[index]['status'] == 'COMPLETED'): + # tar and copy back + Authentificator._execute_remote_command(Authentificator.ssh_client, f'cd {self._server.simulation_remote_path} && tar cvfz Outputs_{job_id} {job_id}.tgz') + Authentificator._transfer_file_sftp(Authentificator.ssh_client, + f'{self._server.simulation_dl_path}/{job_id}.tgz', + f'{self._server.simulation_remote_path}/{job_id}.tgz') + jid[index]['name'] = job_line.split()[1] print(f"{job_line}-{job_id}\n job id:{jid[index]['job_id']}\n status:{jid[index]['status']}\n name:{jid[index]['name']} \n --- \n") self._server.state.job_ids = jid From 7c43b0ea709c688243f2ef4dece0c738fd9862d1 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 3 Dec 2025 22:32:27 +0100 Subject: [PATCH 31/70] last details --- geos-trame/src/geos/trame/app/io/simulation.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index bed32cee4..aab191476 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -672,10 +672,11 @@ def check_jobs(self): jid[index]['status'] = job_line.split()[2] if (jid[index]['status'] == 'COMPLETED'): # tar and copy back - Authentificator._execute_remote_command(Authentificator.ssh_client, f'cd {self._server.simulation_remote_path} && tar cvfz Outputs_{job_id} {job_id}.tgz') + Authentificator._execute_remote_command(Authentificator.ssh_client, f'cd {self._server.state.simulation_remote_path} && tar cvfz {job_id}.tgz Outputs_{job_id}/') Authentificator._transfer_file_sftp(Authentificator.ssh_client, - f'{self._server.simulation_dl_path}/{job_id}.tgz', - f'{self._server.simulation_remote_path}/{job_id}.tgz') + f'{self._server.state.simulation_dl_path}/{job_id}.tgz', + f'{self._server.state.simulation_remote_path}/{job_id}.tgz', + direction='get') jid[index]['name'] = job_line.split()[1] print(f"{job_line}-{job_id}\n job id:{jid[index]['job_id']}\n status:{jid[index]['status']}\n name:{jid[index]['name']} \n --- \n") From 1b01967110cb6f3bc46c72184f51fe8c2840a5c9 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Thu, 4 Dec 2025 17:36:26 +0100 Subject: [PATCH 32/70] update regex --- geos-trame/src/geos/trame/app/io/simulation.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index aab191476..fadad0612 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -503,9 +503,9 @@ def gen_tree(xml_filename): table_matches = [] pattern_file = r"[\w\-.]+\.(?:vtu|pvtu|dat|txt|xml|geos)\b" # all files - pattern_xml_path = r"\"(.*/)([\w\-.]+\.(?:xml))\b" - pattern_mesh_path = r"\"(.*/)([\w\-.]+\.(?:vtu|pvtu|vtm|pvtm))\b" - pattern_table_curly_path = r"((?:[\w\-/]+/)+)([\w\-.]+\.(?:geos|csv|dat|txt))" + pattern_xml_path = r"\"(.*/)*([\w\-.]+\.(?:xml))\b" + pattern_mesh_path = r"\"(.*/)*([\w\-.]+\.(?:vtu|pvtu|vtm|pvtm))\b" + pattern_table_curly_path = r"((?:[\w\-/]+/)+)*([\w\-.]+\.(?:geos|csv|dat|txt))" for file in xml_filename: if xml_pattern.search(file.get("name","")): @@ -553,9 +553,6 @@ def run_simulation()-> None: input_file=[ item for item in server.state.simulation_xml_filename if item.get('type') == 'text/xml' ][0].get('name'), nodes= ci['nodes'], ntasks=ci['total_ranks'], mem=f"0",#TODO profile to use the correct amount commment=server.state.slurm_comment, partition='p4_general', account='myaccount' ) - - # with open(Path(server.state.simulation_xml_filename).parent/Path('job.slurm'),'w') as f: - # f.write(rendered) if Authentificator.ssh_client: #write slurm directly on remote @@ -590,11 +587,6 @@ def run_simulation()-> None: server.state.job_ids.append({'job_id':job_id[1]}) - - - - # Authentificator._execute_remote_command(Authentificator.ssh_client, - # f'squeue -u $USER') self.start_result_streams() From e2dca24086fe2baf4cd9294d5494eb01a268defd Mon Sep 17 00:00:00 2001 From: jacques franc Date: Thu, 4 Dec 2025 18:08:12 +0100 Subject: [PATCH 33/70] fixed comment section --- geos-trame/src/geos/trame/app/io/simulation.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index fadad0612..c7c8ad972 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -39,7 +39,7 @@ class SimulationConstant: #SBATCH --job-name="{{ job_name }}" #SBATCH --ntasks={{ ntasks }} #SBATCH --partition={{ partition }} -#SBATCH --comment={{ comment }} +#SBATCH --comment={{ comment_gr }} #SBACTH --account={{ account }} #SBATCH --nodes={{ nodes }} #SBATCH --time={{ time | default('24:00:00') }} @@ -549,10 +549,11 @@ def run_simulation()-> None: template = Template(template_str) # sdi = server.state.sd ci ={'nodes': 1 , 'total_ranks': 2 } + #TODO profile to use the correct amount rendered = template.render(job_name=server.state.simulation_job_name, input_file=[ item for item in server.state.simulation_xml_filename if item.get('type') == 'text/xml' ][0].get('name'), - nodes= ci['nodes'], ntasks=ci['total_ranks'], mem=f"0",#TODO profile to use the correct amount - commment=server.state.slurm_comment, partition='p4_general', account='myaccount' ) + nodes= ci['nodes'], ntasks=ci['total_ranks'], mem=f"0", + comment_gr=server.state.slurm_comment, partition='p4_general', account='myaccount' ) if Authentificator.ssh_client: #write slurm directly on remote From 6e3dfc47246705654b80a2ed9b58334d60e09fce Mon Sep 17 00:00:00 2001 From: jacques franc Date: Thu, 4 Dec 2025 18:26:35 +0100 Subject: [PATCH 34/70] start simulation progress --- geos-trame/src/geos/trame/app/ui/simulation_view.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 10dd1a8a1..8e42589a7 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -282,7 +282,8 @@ def run_remove_jobfile(index_to_remove : int) -> None: with vuetify.VList(): with vuetify.VListItem( v_for=("(jobs,i) in job_ids"), key="i", value="jobs", base_color=(color_expression,),prepend_icon="mdi-minus-circle-outline",click=(kill_job,"[i]") ): vuetify.VListItemTitle("{{ jobs.status }} -- {{ jobs.name }} -- {{ jobs.job_id }}") - + vuetify.VProgressLinear(v_model=("simulation_progress","0"),) + with vuetify.VRow(v_if="simulation_error"): html.Div("An error occurred while running simulation :
{{simulation_error}}", style="color:red;") \ No newline at end of file From f3acd23c0be9374d4326d750e3205e31872d0ddd Mon Sep 17 00:00:00 2001 From: jacques franc Date: Mon, 8 Dec 2025 17:18:21 +0100 Subject: [PATCH 35/70] notes and cleanup --- .../src/geos/trame/app/io/simulation.py | 59 +++++++++++++------ .../src/geos/trame/app/ui/simulation_view.py | 6 +- 2 files changed, 43 insertions(+), 22 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index c7c8ad972..eb5d1876c 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -29,6 +29,24 @@ class SimulationConstant: # replace by conf-file json +#If proxyJump are needed +# +# proxy_cmd = "ssh -W {host}:{port} proxyuser@bastion.example.com".format( +# host=ssh_host, port=ssh_port +# ) +# from paramiko import ProxyCommand +# sock = ProxyCommand(proxy_cmd) + +# client = paramiko.SSHClient() +# client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +# client.connect( +# hostname=ssh_host, +# port=ssh_port, +# username=username, +# key_filename=keyfile, +# sock=sock, # <— tunnel created by ProxyCommand +# ) + # Load template from file # with open("slurm_job_template.j2") as f: @@ -42,7 +60,7 @@ class SimulationConstant: #SBATCH --comment={{ comment_gr }} #SBACTH --account={{ account }} #SBATCH --nodes={{ nodes }} -#SBATCH --time={{ time | default('24:00:00') }} +#SBATCH --time={{ time | default('00:10:00') }} #SBATCH --mem={{ mem }} #SBATCH --output=job_GEOS_%j.out #SBATCH --error=job_GEOS_%j.err @@ -449,10 +467,6 @@ class Simulation: Requires a simulation runner providing information on the output path of the simulation to monitor and ways to trigger the simulation. """ - - - - def __init__(self, sim_runner: ISimRunner, server: Server, sim_info_dir: Optional[Path] = None) -> None: self._server = server controller = server.controller @@ -553,7 +567,7 @@ def run_simulation()-> None: rendered = template.render(job_name=server.state.simulation_job_name, input_file=[ item for item in server.state.simulation_xml_filename if item.get('type') == 'text/xml' ][0].get('name'), nodes= ci['nodes'], ntasks=ci['total_ranks'], mem=f"0", - comment_gr=server.state.slurm_comment, partition='p4_general', account='myaccount' ) + comment_gr=server.state.slurm_comment, partition='p4_dev', account='myaccount' ) if Authentificator.ssh_client: #write slurm directly on remote @@ -586,7 +600,7 @@ def run_simulation()-> None: job_lines = sout.strip() job_id = re.search(r"Submitted batch job (\d+)", job_lines) - server.state.job_ids.append({'job_id':job_id[1]}) + server.state.job_ids.append({'job_id': job_id[1]}) self.start_result_streams() @@ -629,18 +643,20 @@ def _update_job_status(self) -> None: if job_status == SimulationStatus.DONE: self.stop_result_streams() - def get_last_user_simulation_info(self) -> SimulationInformation: - last_sim_information = self.get_last_information_path() - return SimulationInformation.from_file(last_sim_information) - - def get_last_information_path(self) -> Optional[Path]: - user_igg = self._sim_runner.get_user_igg() + # TODO: might be useful for history + # + # def get_last_user_simulation_info(self) -> SimulationInformation: + # last_sim_information = self.get_last_information_path() + # return SimulationInformation.from_file(last_sim_information) - user_files = list(reversed(sorted(self._sim_info_dir.glob(f"{user_igg}*.json")))) - if not user_files: - return None + # def get_last_information_path(self) -> Optional[Path]: + # user_igg = self._sim_runner.get_user_igg() - return user_files[0] + # user_files = list(reversed(sorted(self._sim_info_dir.glob(f"{user_igg}*.json")))) + # if not user_files: + # return None + # + # return user_files[0] def stop_result_streams(self): if self._job_status_watcher is not None: @@ -670,6 +686,15 @@ def check_jobs(self): f'{self._server.state.simulation_dl_path}/{job_id}.tgz', f'{self._server.state.simulation_remote_path}/{job_id}.tgz', direction='get') + elif (jid[index]['status'] == 'RUNNING'): + # getthe completed status + pattern = re.compile(r'\((\d+(?:\.\d+)?)%\s*completed\)') + with Authentificator.ssh_client.open_sftp().file( str(Path(self._server.state.simulation_remote_path)/Path(f"job_GEOS_{job_id}.out")), "r") as f: + for line in f: + m = pattern.search(line) + if m: + self._server.state.simulation_progress = str(m.group(1)) + jid[index]['name'] = job_line.split()[1] print(f"{job_line}-{job_id}\n job id:{jid[index]['job_id']}\n status:{jid[index]['status']}\n name:{jid[index]['name']} \n --- \n") diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 8e42589a7..ac49384f9 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -124,14 +124,10 @@ def kill_job(index_to_remove : int) -> None: def run_remove_jobfile(index_to_remove : int) -> None: - # for now just check there is an xml - current_files = list(server.state.simulation_xml_filename) # On prend une copie de la liste + current_files = list(server.state.simulation_xml_filename) if 0 <= index_to_remove < len(current_files): - # 1. Supprimer l'élément de la copie de la liste del current_files[index_to_remove] - # 2. Remplacer la variable d'état par la nouvelle liste. - # Ceci est CRITIQUE pour la réactivité, car cela force Vue.js à se mettre à jour. server.state.simulation_xml_filename = current_files print(f"Fichier à l'index {index_to_remove} supprimé. Nouveaux fichiers: {len(current_files)}") else: From 6510a410c72a35c538b78e290c9c43204bbec29c Mon Sep 17 00:00:00 2001 From: jacques franc Date: Mon, 8 Dec 2025 17:51:14 +0100 Subject: [PATCH 36/70] alternate strat copy back --- geos-trame/src/geos/trame/app/io/simulation.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index eb5d1876c..e4f2dd08b 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -82,6 +82,22 @@ class SimulationConstant: """ +template_cb = """#!/bin/sh +#SBATCH --job-name="{{ job_name }}" +#SBATCH --ntasks={{ ntasks }} +#SBATCH --partition={{ partition }} #p4_transfer +#SBATCH --comment={{ comment_gr }} +#SBACTH --account={{ account }} +#SBATCH --nodes={{ nodes }} +#SBATCH --time={{ time | default('00:10:00') }} +#SBATCH --mem={{ mem }} +#SBATCH --output=job_GEOS_%j.out +#SBATCH --err=job_GEOS_%j.err +#SBATCH --dependency=afterok:{{ dep_job_id }} + +srun tar cfz {{ dep_job_id }}.tgz Outputs_{{ dep_job_id }}/ && mv -v {{ dep_job_id }}.tgz {{ target_dl_path }} + +""" class Authentificator:#namespacing more than anything else From 06c5fe93853a88927b4fc7cfb010de2ffe82cfef Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 10 Dec 2025 18:38:33 +0100 Subject: [PATCH 37/70] yapf --- geos-trame/src/geos/trame/app/core.py | 16 +- .../src/geos/trame/app/io/simulation.py | 586 +++++++++--------- geos-trame/src/geos/trame/app/main.py | 3 +- .../geos/trame/app/ui/simulationStatusView.py | 58 +- .../trame/app/ui/simulation_status_view.py | 58 +- .../src/geos/trame/app/ui/simulation_view.py | 360 ++++++----- .../trame/app/utils/async_file_watcher.py | 62 +- 7 files changed, 575 insertions(+), 568 deletions(-) diff --git a/geos-trame/src/geos/trame/app/core.py b/geos-trame/src/geos/trame/app/core.py index 4d74d7aaf..06a9a54e7 100644 --- a/geos-trame/src/geos/trame/app/core.py +++ b/geos-trame/src/geos/trame/app/core.py @@ -24,12 +24,9 @@ from geos.trame.app.ui.viewer.viewer import DeckViewer from geos.trame.app.components.alertHandler import AlertHandler - from geos.trame.app.io.simulation import Simulation, SimRunner from geos.trame.app.ui.simulation_view import define_simulation_view - - import sys @@ -44,7 +41,7 @@ def __init__( self, server: Server, file_name: str ) -> None: self.deckEditor: DeckEditor | None = None self.timelineEditor: TimelineEditor | None = None self.deckInspector: DeckInspector | None = None - self.simulationLauncher : Simulation | None = None + self.simulationLauncher: Simulation | None = None self.server = server server.enable_module( module ) @@ -76,8 +73,8 @@ def __init__( self, server: Server, file_name: str ) -> None: self.well_viewer = WellViewer( 5, 5 ) ######## Simulation runner - self.sim_runner : SimRunner = SimRunner(self.state.user_id) - self.simulation = Simulation(self.sim_runner, server=server) + self.sim_runner: SimRunner = SimRunner( self.state.user_id ) + self.simulation = Simulation( self.sim_runner, server=server ) # Data loader self.data_loader = DataLoader( self.tree, self.region_viewer, self.well_viewer, trame_server=server ) @@ -189,7 +186,6 @@ def build_ui( self ) -> None: ): vuetify.VIcon( "mdi-content-save-outline" ) - # input file editor with vuetify.VCol( v_show=( "tab_idx == 0", ), classes="flex-grow-1 pa-0 ma-0" ): if self.tree.input_file is not None: @@ -203,10 +199,10 @@ def build_ui( self ) -> None: "The file " + self.state.input_file + " cannot be parsed.", file=sys.stderr, ) - - with vuetify.VCol( v_show=( "tab_idx == 1"), classes="flex-grow-1 pa-0 ma-0") : + + with vuetify.VCol( v_show=( "tab_idx == 1" ), classes="flex-grow-1 pa-0 ma-0" ): if self.simulation is not None: - define_simulation_view(self.server) + define_simulation_view( self.server ) else: self.ctrl.on_add_error( "Error", diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index e4f2dd08b..e07e2fb42 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -1,4 +1,3 @@ - from abc import ABC, abstractmethod from pathlib import Path from dataclasses import dataclass, field, fields @@ -18,19 +17,21 @@ #TODO move outside #TODO use Jinja on real launcher -@dataclass(frozen=True) + +@dataclass( frozen=True ) class SimulationConstant: SIMULATION_GEOS_PATH = "/workrd/users/" HOST = "p4log01" # Only run on P4 machine REMOTE_HOME_BASE = "/users" PORT = 22 - SIMULATIONS_INFORMATION_FOLDER_PATH= "/workrd/users/" + SIMULATIONS_INFORMATION_FOLDER_PATH = "/workrd/users/" SIMULATION_DEFAULT_FILE_NAME = "geosDeck.xml" # replace by conf-file json + #If proxyJump are needed -# +# # proxy_cmd = "ssh -W {host}:{port} proxyuser@bastion.example.com".format( # host=ssh_host, port=ssh_port # ) @@ -47,10 +48,9 @@ class SimulationConstant: # sock=sock, # <— tunnel created by ProxyCommand # ) - # Load template from file # with open("slurm_job_template.j2") as f: - # template = Template(f.read()) +# template = Template(f.read()) #TODO from private-assets template_str = """#!/bin/sh @@ -69,8 +69,6 @@ class SimulationConstant: ulimit -c unlimited module purge -module use /workrd/SCR/GEOS/l1092082/modules -module load geos-develop-d36028cb-hypreUpdate export HDF5_USE_FILE_LOCKING=FALSE export OMP_NUM_THREADS=1 @@ -85,7 +83,7 @@ class SimulationConstant: template_cb = """#!/bin/sh #SBATCH --job-name="{{ job_name }}" #SBATCH --ntasks={{ ntasks }} -#SBATCH --partition={{ partition }} #p4_transfer +#SBATCH --partition={{ partition }} #SBATCH --comment={{ comment_gr }} #SBACTH --account={{ account }} #SBATCH --nodes={{ nodes }} @@ -100,140 +98,145 @@ class SimulationConstant: """ -class Authentificator:#namespacing more than anything else +class Authentificator: #namespacing more than anything else - ssh_client : Optional[paramiko.SSHClient] = None + ssh_client: Optional[ paramiko.SSHClient ] = None @staticmethod - def _sftp_copy_tree(ssh_client, file_tree, remote_root): + def _sftp_copy_tree( ssh_client, file_tree, remote_root ): # Connect to remote server sftp = ssh_client.open_sftp() - - Authentificator.dfs_tree(file_tree["structure"], file_tree["root"], sftp=sftp, remote_root=remote_root) + + Authentificator.dfs_tree( file_tree[ "structure" ], file_tree[ "root" ], sftp=sftp, remote_root=remote_root ) sftp.close() @staticmethod - def dfs_tree(node, path, sftp, remote_root): + def dfs_tree( node, path, sftp, remote_root ): - lp = Path(path) - rp = Path(remote_root)/lp + lp = Path( path ) + rp = Path( remote_root ) / lp - if isinstance(node, list): + if isinstance( node, list ): for file in node: # sftp.put(lp/Path(file), rp/Path(file)) - with sftp.file( str(rp/Path(file.get('name'))), 'w') as f: - f.write(file.get('content')) - print(f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}") - elif isinstance(node, dict): + with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: + f.write( file.get( 'content' ) ) + print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) + elif isinstance( node, dict ): if "files" in node: - for file in node["files"]: + for file in node[ "files" ]: # sftp.put( str(lp/Path(file)), str(rp/Path(file)) ) - with sftp.file( str(rp/Path(file.get('name'))), 'w') as f: - f.write(file.get('content')) - print(f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}") + with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: + f.write( file.get( 'content' ) ) + print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) if "subfolders" in node: - for subfolder, content in node["subfolders"].items(): + for subfolder, content in node[ "subfolders" ].items(): try: - sftp.stat( str(rp/Path(subfolder)) ) + sftp.stat( str( rp / Path( subfolder ) ) ) except FileNotFoundError: - print(f"creating {rp/Path(subfolder)}") - sftp.mkdir( str(rp/Path(subfolder)) ) - Authentificator.dfs_tree(content, lp/Path(subfolder), sftp, remote_root) - + print( f"creating {rp/Path(subfolder)}" ) + sftp.mkdir( str( rp / Path( subfolder ) ) ) + Authentificator.dfs_tree( content, lp / Path( subfolder ), sftp, remote_root ) + for folder, content in node.items(): - if folder not in ["files", "subfolders"]: + if folder not in [ "files", "subfolders" ]: try: - sftp.stat( str(rp/Path(folder)) ) + sftp.stat( str( rp / Path( folder ) ) ) except FileNotFoundError: - print(f"creating {rp/Path(folder)}") - sftp.mkdir( str(rp/Path(folder)) ) - Authentificator.dfs_tree(content, lp/Path(folder), sftp, remote_root) + print( f"creating {rp/Path(folder)}" ) + sftp.mkdir( str( rp / Path( folder ) ) ) + Authentificator.dfs_tree( content, lp / Path( folder ), sftp, remote_root ) @staticmethod def kill_job( id ): if Authentificator.ssh_client: - Authentificator._execute_remote_command(Authentificator.ssh_client, f"scancel {id}") + Authentificator._execute_remote_command( Authentificator.ssh_client, f"scancel {id}" ) return None @staticmethod def get_key( id, pword ): try: - home = os.environ.get("HOME") - PRIVATE_KEY = paramiko.RSAKey.from_private_key_file(f"{home}/.ssh/id_trame") + home = os.environ.get( "HOME" ) + PRIVATE_KEY = paramiko.RSAKey.from_private_key_file( f"{home}/.ssh/id_trame" ) return PRIVATE_KEY except paramiko.SSHException as e: - print(f"Error loading private key: {e}\n") + print( f"Error loading private key: {e}\n" ) except FileNotFoundError as e: - print(f"Private key not found: {e}\n Generating key ...") + print( f"Private key not found: {e}\n Generating key ..." ) PRIVATE_KEY = Authentificator.gen_key() temp_client = paramiko.SSHClient() - temp_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - temp_client.connect(SimulationConstant.HOST, SimulationConstant.PORT, username=id, password=pword, timeout=10) - Authentificator._transfer_file_sftp(temp_client,f"{home}/.ssh/id_trame.pub",f"{SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame.pub") - Authentificator._execute_remote_command(temp_client,f" cat {SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame.pub | tee -a {SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/authorized_keys") + temp_client.set_missing_host_key_policy( paramiko.AutoAddPolicy() ) + temp_client.connect( SimulationConstant.HOST, + SimulationConstant.PORT, + username=id, + password=pword, + timeout=10 ) + Authentificator._transfer_file_sftp( temp_client, f"{home}/.ssh/id_trame.pub", + f"{SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame.pub" ) + Authentificator._execute_remote_command( + temp_client, + f" cat {SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame.pub | tee -a {SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/authorized_keys" + ) return PRIVATE_KEY - @staticmethod - def gen_key(): + def gen_key(): - home = os.environ.get("HOME") + home = os.environ.get( "HOME" ) file_path = f"{home}/.ssh/id_trame" - key = paramiko.RSAKey.generate(bits=4096) - key.write_private_key_file(file_path) - + key = paramiko.RSAKey.generate( bits=4096 ) + key.write_private_key_file( file_path ) + # Get public key in OpenSSH format public_key = f"{key.get_name()} {key.get_base64()}" - with open(file_path + ".pub", "w") as pub_file: - pub_file.write(public_key) + with open( file_path + ".pub", "w" ) as pub_file: + pub_file.write( public_key ) - print("SSH key pair generated: id_trame (private), id_trame.pub (public)") + print( "SSH key pair generated: id_trame (private), id_trame.pub (public)" ) return key - @staticmethod - def _create_ssh_client( host, port, username, password=None, key=None) -> paramiko.SSHClient: + def _create_ssh_client( host, port, username, password=None, key=None ) -> paramiko.SSHClient: """ Initializes and returns an SSH client connection. Uses context manager for automatic cleanup. """ client = paramiko.SSHClient() # Automatically adds the hostname and new host keys to the host files (~/.ssh/known_hosts) - client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + client.set_missing_host_key_policy( paramiko.AutoAddPolicy() ) try: - print(f"Connecting to {host} using key-based authentication...") - client.connect(host, port, username, pkey=key, timeout=10) + print( f"Connecting to {host} using key-based authentication..." ) + client.connect( host, port, username, pkey=key, timeout=10 ) return client except paramiko.AuthenticationException: - print("Authentication failed. Check your credentials or key.") + print( "Authentication failed. Check your credentials or key." ) return None except paramiko.SSHException as e: - print(f"Could not establish SSH connection: {e}") + print( f"Could not establish SSH connection: {e}" ) return None except Exception as e: - print(f"An unexpected error occurred: {e}") + print( f"An unexpected error occurred: {e}" ) return None - @staticmethod - def _execute_remote_command(client, command): + def _execute_remote_command( client, command ): """ Executes a single command on the remote server and prints the output. """ if not client: return - print(f"\n--- Executing Command: '{command}' ---") + print( f"\n--- Executing Command: '{command}' ---" ) try: # Executes the command. stdin, stdout, and stderr are file-like objects. # Ensure command ends with a newline character for some shell environments. - stdin, stdout, stderr = client.exec_command(command) + stdin, stdout, stderr = client.exec_command( command ) # Wait for the command to finish and read the output exit_status = stdout.channel.recv_exit_status() @@ -241,24 +244,24 @@ def _execute_remote_command(client, command): # Print standard output stdout_data = stdout.read().decode().strip() if stdout_data: - print("STDOUT:") - print(stdout_data) + print( "STDOUT:" ) + print( stdout_data ) # Print standard error (if any) stderr_data = stderr.read().decode().strip() if stderr_data: - print("STDERR:") - print(stderr_data) + print( "STDERR:" ) + print( stderr_data ) + + print( f"Command exited with status: {exit_status}" ) + return ( exit_status, stdout_data, stderr_data ) - print(f"Command exited with status: {exit_status}") - return (exit_status,stdout_data, stderr_data) - except Exception as e: - print(f"Error executing command: {e}") - return (-1,"","") + print( f"Error executing command: {e}" ) + return ( -1, "", "" ) @staticmethod - def _transfer_file_sftp(client, local_path, remote_path, direction="put"): + def _transfer_file_sftp( client, local_path, remote_path, direction="put" ): """ Transfers a file using SFTP (Secure File Transfer Protocol). Direction can be 'put' (upload) or 'get' (download). @@ -266,39 +269,39 @@ def _transfer_file_sftp(client, local_path, remote_path, direction="put"): if not client: return - print(f"\n--- Starting SFTP Transfer ({direction.upper()}) ---") - + print( f"\n--- Starting SFTP Transfer ({direction.upper()}) ---" ) + try: # Establish an SFTP connection session sftp = client.open_sftp() if direction == "put": - print(f"Uploading '{local_path}' to '{remote_path}'...") - sftp.put(local_path, remote_path) - print("Upload complete.") + print( f"Uploading '{local_path}' to '{remote_path}'..." ) + sftp.put( local_path, remote_path ) + print( "Upload complete." ) elif direction == "get": - print(f"Downloading '{remote_path}' to '{local_path}'...") - sftp.get(remote_path, local_path) - print("Download complete.") + print( f"Downloading '{remote_path}' to '{local_path}'..." ) + sftp.get( remote_path, local_path ) + print( "Download complete." ) else: - print("Invalid transfer direction. Use 'put' or 'get'.") + print( "Invalid transfer direction. Use 'put' or 'get'." ) sftp.close() return True - + except FileNotFoundError: - print(f"Error: Local file '{local_path}' not found.") + print( f"Error: Local file '{local_path}' not found." ) return False except IOError as e: - print(f"Error accessing remote file or path: {e}") + print( f"Error accessing remote file or path: {e}" ) return False except Exception as e: - print(f"An error occurred during SFTP: {e}") + print( f"An error occurred during SFTP: {e}" ) return False @unique -class SlurmJobStatus(Enum): +class SlurmJobStatus( Enum ): PENDING = "PEND" RUNNING = "R" COMPLETING = "CG" @@ -307,12 +310,13 @@ class SlurmJobStatus(Enum): UNKNOWN = "UNKNOWN" @classmethod - def from_string(cls, job_str) -> "SlurmJobStatus": + def from_string( cls, job_str ) -> "SlurmJobStatus": try: - return cls(job_str) + return cls( job_str ) except ValueError: return cls.UNKNOWN - + + # TODO: dataclass_json # @dataclass_json @dataclass @@ -320,7 +324,7 @@ class SimulationInformation: def get_simulation_status( self, - get_running_user_jobs_f: Callable[[], list[tuple[str, SlurmJobStatus]]], + get_running_user_jobs_f: Callable[ [], list[ tuple[ str, SlurmJobStatus ] ] ], ) -> SimulationStatus: """ Returns the simulation status given the current Jobs running for the current user. @@ -329,79 +333,80 @@ def get_simulation_status( if not self.geos_job_id: return SimulationStatus.NOT_RUN - done_sim_path = self.get_simulation_dir(SimulationStatus.DONE) - if self.get_timeseries_path(done_sim_path).exists(): + done_sim_path = self.get_simulation_dir( SimulationStatus.DONE ) + if self.get_timeseries_path( done_sim_path ).exists(): return SimulationStatus.DONE user_jobs = get_running_user_jobs_f() - if (self.geos_job_id, SlurmJobStatus.RUNNING) in user_jobs: + if ( self.geos_job_id, SlurmJobStatus.RUNNING ) in user_jobs: return SimulationStatus.RUNNING - if (self.geos_job_id, SlurmJobStatus.COMPLETING) in user_jobs: + if ( self.geos_job_id, SlurmJobStatus.COMPLETING ) in user_jobs: return SimulationStatus.COMPLETING - if (self.copy_back_job_id, SlurmJobStatus.RUNNING) in user_jobs: + if ( self.copy_back_job_id, SlurmJobStatus.RUNNING ) in user_jobs: return SimulationStatus.COPY_BACK - if (self.copy_job_id, SlurmJobStatus.RUNNING) in user_jobs: + if ( self.copy_job_id, SlurmJobStatus.RUNNING ) in user_jobs: return SimulationStatus.SCHEDULED return SimulationStatus.UNKNOWN - + + @dataclass class LauncherParams: - simulation_files_path: Optional[str] = None - simulation_cmd_filename: Optional[str] = None - simulation_job_name: Optional[str] = None + simulation_files_path: Optional[ str ] = None + simulation_cmd_filename: Optional[ str ] = None + simulation_job_name: Optional[ str ] = None simulation_nb_process: int = 1 @classmethod - def from_server_state(cls, server_state: State) -> "LauncherParams": + def from_server_state( cls, server_state: State ) -> "LauncherParams": state = cls() - for f in fields(cls): - setattr(state, f.name, server_state[f.name]) + for f in fields( cls ): + setattr( state, f.name, server_state[ f.name ] ) return state - def is_complete(self) -> bool: - return None not in [getattr(self, f.name) for f in fields(self)] + def is_complete( self ) -> bool: + return None not in [ getattr( self, f.name ) for f in fields( self ) ] - def assert_is_complete(self) -> None: + def assert_is_complete( self ) -> None: if not self.is_complete(): - raise RuntimeError(f"Incomplete simulation launch parameters : {self}.") + raise RuntimeError( f"Incomplete simulation launch parameters : {self}." ) def get_timestamp() -> str: - return datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S.%f")[:-3] + return datetime.utcnow().strftime( "%Y-%m-%d_%H-%M-%S.%f" )[ :-3 ] -def get_simulation_output_file_name(timestamp: str, user_name: str = "user_name"): +def get_simulation_output_file_name( timestamp: str, user_name: str = "user_name" ): return f"{user_name}_{timestamp}.json" -def parse_launcher_output(output: str) -> SimulationInformation: - split_output = output.split("\n") +def parse_launcher_output( output: str ) -> SimulationInformation: + split_output = output.split( "\n" ) information = SimulationInformation() information_dict = information.to_dict() # type: ignore content_to_parse = [ - ("Working directory: ", "working_directory"), - ("1. copy job id: ", "copy_job_id"), - ("2. geos job id: ", "geos_job_id"), - ("3. copy back job id: ", "copy_back_job_id"), - ("Run directory: ", "run_directory"), + ( "Working directory: ", "working_directory" ), + ( "1. copy job id: ", "copy_job_id" ), + ( "2. geos job id: ", "geos_job_id" ), + ( "3. copy back job id: ", "copy_back_job_id" ), + ( "Run directory: ", "run_directory" ), ] for line in split_output: for info_tuple in content_to_parse: - if info_tuple[0] in line: - split_line = line.split(info_tuple[0]) - if len(split_line) < 2: + if info_tuple[ 0 ] in line: + split_line = line.split( info_tuple[ 0 ] ) + if len( split_line ) < 2: continue - information_dict[info_tuple[1]] = split_line[-1] + information_dict[ info_tuple[ 1 ] ] = split_line[ -1 ] - information_dict["timestamp"] = get_timestamp() - return SimulationInformation.from_dict(information_dict) # type: ignore + information_dict[ "timestamp" ] = get_timestamp() + return SimulationInformation.from_dict( information_dict ) # type: ignore # def write_simulation_information_to_repo(info: SimulationInformation, sim_info_path: Path) -> Optional[Path]: @@ -428,18 +433,16 @@ def parse_launcher_output(output: str) -> SimulationInformation: # timestep = os.path.splitext(filename)[0].split("_")[-1] # return int(timestep) if timestep else -1 - # def get_most_recent_file_from_list(files_list: list[str]) -> Optional[str]: # if not files_list: # return None # return max(files_list, key=get_simulation_screenshot_timestep) - # def get_most_recent_simulation_screenshot(folder_path: Path) -> Optional[str]: # return get_most_recent_file_from_list(os.listdir(folder_path)) if folder_path.exists() else None -class ISimRunner(ABC): +class ISimRunner( ABC ): """ Abstract interface for sim runner. Provides methods to trigger simulation, get simulation output path and knowing if simulation is done or not. @@ -458,22 +461,21 @@ class ISimRunner(ABC): # pass -class SimRunner(ISimRunner): +class SimRunner( ISimRunner ): """ Runs sim on HPC. Wrap paramiko use """ - def __init__(self, user): + def __init__( self, user ): super().__init__() # early test self.local_upload_file = "test_upload.txt" import time - with open(self.local_upload_file, "w") as f: - f.write(f"This file was uploaded at {time.ctime()}\n") - print(f"Created local file: {self.local_upload_file}") + with open( self.local_upload_file, "w" ) as f: + f.write( f"This file was uploaded at {time.ctime()}\n" ) + print( f"Created local file: {self.local_upload_file}" ) - class Simulation: """ @@ -483,7 +485,8 @@ class Simulation: Requires a simulation runner providing information on the output path of the simulation to monitor and ways to trigger the simulation. """ - def __init__(self, sim_runner: ISimRunner, server: Server, sim_info_dir: Optional[Path] = None) -> None: + + def __init__( self, sim_runner: ISimRunner, server: Server, sim_info_dir: Optional[ Path ] = None ) -> None: self._server = server controller = server.controller self._sim_runner = sim_runner @@ -491,169 +494,169 @@ def __init__(self, sim_runner: ISimRunner, server: Server, sim_info_dir: Optiona server.state.job_ids = [] server.state.status_colors = { - "PENDING": "#4CAF50", #PD - "RUNNING": "#3F51B5", #R - "CANCELLED": "#FFC107", #CA - "COMPLETED": "#484B45", #CD - "FAILED": "#E53935", #F + "PENDING": "#4CAF50", #PD + "RUNNING": "#3F51B5", #R + "CANCELLED": "#FFC107", #CA + "COMPLETED": "#484B45", #CD + "FAILED": "#E53935", #F } - self._job_status_watcher: Optional[AsyncPeriodicRunner] = None + self._job_status_watcher: Optional[ AsyncPeriodicRunner ] = None self._job_status_watcher_period_ms = 2000 - #define triggers - @controller.trigger("run_try_login") + @controller.trigger( "run_try_login" ) def run_try_login() -> None: # if server.state.key: - Authentificator.ssh_client = Authentificator._create_ssh_client(SimulationConstant.HOST,#test - SimulationConstant.PORT, - server.state.login, - key=Authentificator.get_key(server.state.login, server.state.password)) - - if Authentificator.ssh_client : + Authentificator.ssh_client = Authentificator._create_ssh_client( + SimulationConstant.HOST, #test + SimulationConstant.PORT, + server.state.login, + key=Authentificator.get_key( server.state.login, server.state.password ) ) + + if Authentificator.ssh_client: # id = os.environ.get('USER') # Authentificator._execute_remote_command(Authentificator.ssh_client, f"ps aux") # Authentificator._execute_remote_command(Authentificator.ssh_client, f"ls -l {SimulationConstant.REMOTE_HOME_BASE}/{id}") - + # server.state.update({"access_granted" : True, "key_path" : f"{SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame" }) # server.state.flush() server.state.access_granted = True - print("login login login") + print( "login login login" ) @staticmethod - def gen_tree(xml_filename): + def gen_tree( xml_filename ): import re - xml_pattern = re.compile(r"\.xml$", re.IGNORECASE) - mesh_pattern = re.compile(r"\.(vtu|vtm|pvtu|pvtm)$", re.IGNORECASE) - table_pattern = re.compile(r"\.(txt|dat|csv|geos)$", re.IGNORECASE) + xml_pattern = re.compile( r"\.xml$", re.IGNORECASE ) + mesh_pattern = re.compile( r"\.(vtu|vtm|pvtu|pvtm)$", re.IGNORECASE ) + table_pattern = re.compile( r"\.(txt|dat|csv|geos)$", re.IGNORECASE ) xml_matches = [] mesh_matches = [] table_matches = [] - pattern_file = r"[\w\-.]+\.(?:vtu|pvtu|dat|txt|xml|geos)\b" # all files + pattern_file = r"[\w\-.]+\.(?:vtu|pvtu|dat|txt|xml|geos)\b" # all files pattern_xml_path = r"\"(.*/)*([\w\-.]+\.(?:xml))\b" pattern_mesh_path = r"\"(.*/)*([\w\-.]+\.(?:vtu|pvtu|vtm|pvtm))\b" pattern_table_curly_path = r"((?:[\w\-/]+/)+)*([\w\-.]+\.(?:geos|csv|dat|txt))" for file in xml_filename: - if xml_pattern.search(file.get("name","")): - xml_matches.append(file) - elif mesh_pattern.search(file.get("name","")): - mesh_matches.append(file) - elif table_pattern.search(file.get("name","")): - table_matches.append(file) - - - #assume the first XML is the main xml + if xml_pattern.search( file.get( "name", "" ) ): + xml_matches.append( file ) + elif mesh_pattern.search( file.get( "name", "" ) ): + mesh_matches.append( file ) + elif table_pattern.search( file.get( "name", "" ) ): + table_matches.append( file ) + + #assume the first XML is the main xml # TODO relocate - xml_expected_file_matches = re.findall(pattern_file, xml_matches[0]['content'].decode("utf-8")) - test_assert = {item.get("name") for item in xml_filename}.intersection(set(xml_expected_file_matches)) + xml_expected_file_matches = re.findall( pattern_file, xml_matches[ 0 ][ 'content' ].decode( "utf-8" ) ) + test_assert = { item.get( "name" ) + for item in xml_filename }.intersection( set( xml_expected_file_matches ) ) - decoded = re.sub(pattern_xml_path,r'"\2', xml_matches[0]['content'].decode("utf-8")) - decoded = re.sub(pattern_mesh_path,r'"mesh/\2', decoded) - decoded = re.sub(pattern_table_curly_path,r"tables/\2", decoded) - - xml_matches[0]['content'] = decoded.encode("utf-8") + decoded = re.sub( pattern_xml_path, r'"\2', xml_matches[ 0 ][ 'content' ].decode( "utf-8" ) ) + decoded = re.sub( pattern_mesh_path, r'"mesh/\2', decoded ) + decoded = re.sub( pattern_table_curly_path, r"tables/\2", decoded ) + xml_matches[ 0 ][ 'content' ] = decoded.encode( "utf-8" ) file_tree = { - 'root' : '.', - "structure": { - "files" : xml_matches, - "subfolders": { - "mesh": mesh_matches, - "tables": table_matches + 'root': '.', + "structure": { + "files": xml_matches, + "subfolders": { + "mesh": mesh_matches, + "tables": table_matches + } } } - } return file_tree + @controller.trigger( "run_simulation" ) + def run_simulation() -> None: - @controller.trigger("run_simulation") - def run_simulation()-> None: - # if server.state.access_granted and server.state.sd and server.state.simulation_xml_filename: if server.state.access_granted and server.state.simulation_xml_filename: - template = Template(template_str) + template = Template( template_str ) # sdi = server.state.sd - ci ={'nodes': 1 , 'total_ranks': 2 } + ci = { 'nodes': 1, 'total_ranks': 2 } #TODO profile to use the correct amount - rendered = template.render(job_name=server.state.simulation_job_name, - input_file=[ item for item in server.state.simulation_xml_filename if item.get('type') == 'text/xml' ][0].get('name'), - nodes= ci['nodes'], ntasks=ci['total_ranks'], mem=f"0", - comment_gr=server.state.slurm_comment, partition='p4_dev', account='myaccount' ) - + rendered = template.render( job_name=server.state.simulation_job_name, + input_file=[ + item for item in server.state.simulation_xml_filename + if item.get( 'type' ) == 'text/xml' + ][ 0 ].get( 'name' ), + nodes=ci[ 'nodes' ], + ntasks=ci[ 'total_ranks' ], + mem=f"0", + comment_gr=server.state.slurm_comment, + partition='p4_dev', + account='myaccount' ) + if Authentificator.ssh_client: #write slurm directly on remote try: sftp = Authentificator.ssh_client.open_sftp() - remote_path = Path(server.state.simulation_remote_path)/Path('job.slurm') - with sftp.file( str(remote_path),'w' ) as f: - f.write(rendered) + remote_path = Path( server.state.simulation_remote_path ) / Path( 'job.slurm' ) + with sftp.file( str( remote_path ), 'w' ) as f: + f.write( rendered ) # except FileExistsError: - # print(f"Error: Local file '{remote_path}' not found.") + # print(f"Error: Local file '{remote_path}' not found.") except PermissionError as e: - print(f"Permission error: {e}") + print( f"Permission error: {e}" ) except IOError as e: - print(f"Error accessing remote file or path: {e}") + print( f"Error accessing remote file or path: {e}" ) except Exception as e: - print(f"An error occurred during SFTP: {e}") + print( f"An error occurred during SFTP: {e}" ) - Authentificator._sftp_copy_tree(Authentificator.ssh_client, - gen_tree(server.state.simulation_xml_filename), - server.state.simulation_remote_path) + Authentificator._sftp_copy_tree( Authentificator.ssh_client, + gen_tree( server.state.simulation_xml_filename ), + server.state.simulation_remote_path ) - - _,sout, serr = Authentificator._execute_remote_command(Authentificator.ssh_client, - f'cd {server.state.simulation_remote_path} && sbatch job.slurm') - + _, sout, serr = Authentificator._execute_remote_command( + Authentificator.ssh_client, f'cd {server.state.simulation_remote_path} && sbatch job.slurm' ) - #TODO encapsulate job_lines = sout.strip() - job_id = re.search(r"Submitted batch job (\d+)", job_lines) + job_id = re.search( r"Submitted batch job (\d+)", job_lines ) + + server.state.job_ids.append( { 'job_id': job_id[ 1 ] } ) - server.state.job_ids.append({'job_id': job_id[1]}) - self.start_result_streams() - - Authentificator._transfer_file_sftp(Authentificator.ssh_client, - remote_path=f'{server.state.simulation_remote_path}/log.out', - local_path=f'{server.state.simulation_dl_path}/dl.test', - direction="get") + Authentificator._transfer_file_sftp( Authentificator.ssh_client, + remote_path=f'{server.state.simulation_remote_path}/log.out', + local_path=f'{server.state.simulation_dl_path}/dl.test', + direction="get" ) else: raise paramiko.SSHException - - @controller.trigger("kill_all_simulations") - def kill_all_simulations()->None: + @controller.trigger( "kill_all_simulations" ) + def kill_all_simulations() -> None: # exec scancel jobid for jobs in server.state.job_ids: - Authentificator.kill_job(jobs['job_id']) + Authentificator.kill_job( jobs[ 'job_id' ] ) - def __del__(self): + def __del__( self ): self.stop_result_streams() - def set_status_watcher_period_ms(self, period_ms): + def set_status_watcher_period_ms( self, period_ms ): self._job_status_watcher_period_ms = period_ms if self._job_status_watcher: - self._job_status_watcher.set_period_ms(period_ms) + self._job_status_watcher.set_period_ms( period_ms ) - def _update_job_status(self) -> None: + def _update_job_status( self ) -> None: sim_info = self.get_last_user_simulation_info() - job_status = sim_info.get_simulation_status(self._sim_runner.get_running_user_jobs) - sim_path = sim_info.get_simulation_dir(job_status) + job_status = sim_info.get_simulation_status( self._sim_runner.get_running_user_jobs ) + sim_path = sim_info.get_simulation_dir( job_status ) - self._server.controller.set_simulation_status(job_status) - self._server.controller.set_simulation_time_stamp(sim_info.timestamp) + self._server.controller.set_simulation_status( job_status ) + self._server.controller.set_simulation_time_stamp( sim_info.timestamp ) - self._update_screenshot_display(sim_info.get_screenshot_path(sim_path)) - self._update_plots(sim_info.get_timeseries_path(sim_path)) + self._update_screenshot_display( sim_info.get_screenshot_path( sim_path ) ) + self._update_plots( sim_info.get_timeseries_path( sim_path ) ) # Stop results stream if job is done if job_status == SimulationStatus.DONE: @@ -674,112 +677,119 @@ def _update_job_status(self) -> None: # # return user_files[0] - def stop_result_streams(self): + def stop_result_streams( self ): if self._job_status_watcher is not None: self._job_status_watcher.stop() - def start_result_streams(self) -> None: + def start_result_streams( self ) -> None: self.stop_result_streams() - self._job_status_watcher = AsyncPeriodicRunner( - self.check_jobs, period_ms=self._job_status_watcher_period_ms - ) + self._job_status_watcher = AsyncPeriodicRunner( self.check_jobs, period_ms=self._job_status_watcher_period_ms ) - def check_jobs(self): + def check_jobs( self ): if Authentificator.ssh_client: try: jid = self._server.state.job_ids - for index,job in enumerate(jid): - job_id = job['job_id'] - _,sout, serr = Authentificator._execute_remote_command(Authentificator.ssh_client, f'sacct -j {job_id} -o JobID,JobName,State --noheader') - job_line = sout.strip().split("\n")[-1] - - jid[index]['status'] = job_line.split()[2] - if (jid[index]['status'] == 'COMPLETED'): + for index, job in enumerate( jid ): + job_id = job[ 'job_id' ] + _, sout, serr = Authentificator._execute_remote_command( + Authentificator.ssh_client, f'sacct -j {job_id} -o JobID,JobName,State --noheader' ) + job_line = sout.strip().split( "\n" )[ -1 ] + + jid[ index ][ 'status' ] = job_line.split()[ 2 ] + if ( jid[ index ][ 'status' ] == 'COMPLETED' ): # tar and copy back - Authentificator._execute_remote_command(Authentificator.ssh_client, f'cd {self._server.state.simulation_remote_path} && tar cvfz {job_id}.tgz Outputs_{job_id}/') - Authentificator._transfer_file_sftp(Authentificator.ssh_client, - f'{self._server.state.simulation_dl_path}/{job_id}.tgz', - f'{self._server.state.simulation_remote_path}/{job_id}.tgz', - direction='get') - elif (jid[index]['status'] == 'RUNNING'): + Authentificator._execute_remote_command( + Authentificator.ssh_client, + f'cd {self._server.state.simulation_remote_path} && tar cvfz {job_id}.tgz Outputs_{job_id}/' + ) + Authentificator._transfer_file_sftp( + Authentificator.ssh_client, + f'{self._server.state.simulation_dl_path}/{job_id}.tgz', + f'{self._server.state.simulation_remote_path}/{job_id}.tgz', + direction='get' ) + elif ( jid[ index ][ 'status' ] == 'RUNNING' ): # getthe completed status - pattern = re.compile(r'\((\d+(?:\.\d+)?)%\s*completed\)') - with Authentificator.ssh_client.open_sftp().file( str(Path(self._server.state.simulation_remote_path)/Path(f"job_GEOS_{job_id}.out")), "r") as f: + pattern = re.compile( r'\((\d+(?:\.\d+)?)%\s*completed\)' ) + with Authentificator.ssh_client.open_sftp().file( + str( + Path( self._server.state.simulation_remote_path ) / + Path( f"job_GEOS_{job_id}.out" ) ), "r" ) as f: for line in f: - m = pattern.search(line) + m = pattern.search( line ) if m: - self._server.state.simulation_progress = str(m.group(1)) + self._server.state.simulation_progress = str( m.group( 1 ) ) - - jid[index]['name'] = job_line.split()[1] - print(f"{job_line}-{job_id}\n job id:{jid[index]['job_id']}\n status:{jid[index]['status']}\n name:{jid[index]['name']} \n --- \n") + jid[ index ][ 'name' ] = job_line.split()[ 1 ] + print( + f"{job_line}-{job_id}\n job id:{jid[index]['job_id']}\n status:{jid[index]['status']}\n name:{jid[index]['name']} \n --- \n" + ) self._server.state.job_ids = jid - self._server.state.dirty("job_ids") + self._server.state.dirty( "job_ids" ) self._server.state.flush() - + except PermissionError as e: - print(f"Permission error: {e}") + print( f"Permission error: {e}" ) except IOError as e: - print(f"Error accessing remote file or path: {e}") + print( f"Error accessing remote file or path: {e}" ) except Exception as e: - print(f"An error occurred during SFTP: {e}") + print( f"An error occurred during SFTP: {e}" ) else: return None - - def start_simulation(self) -> None: + def start_simulation( self ) -> None: state = self._server.state script_path = None try: - launcher_params = LauncherParams.from_server_state(self._server.state) + launcher_params = LauncherParams.from_server_state( self._server.state ) launcher_params.assert_is_complete() - script_path, sim_info = self._sim_runner.launch_simulation(launcher_params) - self._write_sim_info(launcher_params, sim_info) + script_path, sim_info = self._sim_runner.launch_simulation( launcher_params ) + self._write_sim_info( launcher_params, sim_info ) self.start_result_streams() state.simulation_error = "" except Exception as e: - print("Error occurred: ", e) - state.simulation_error = str(e) + print( "Error occurred: ", e ) + state.simulation_error = str( e ) finally: state.avoid_rewriting = False - if isinstance(script_path, Path) and script_path.is_file(): - os.remove(script_path) + if isinstance( script_path, Path ) and script_path.is_file(): + os.remove( script_path ) - def _write_sim_info(self, launcher_params: LauncherParams, sim_info: Optional[SimulationInformation]) -> None: + def _write_sim_info( self, launcher_params: LauncherParams, sim_info: Optional[ SimulationInformation ] ) -> None: if sim_info is None: - raise RuntimeError("Error parsing simulation launcher output.") + raise RuntimeError( "Error parsing simulation launcher output." ) # Make sure to save the absolute path to the working directory used by the launcher in case parsed information # is a relative Path - if not Path(sim_info.working_directory).is_absolute(): - sim_info.working_directory = path_to_string( - launcher_params.simulation_files_path + "/" + sim_info.working_directory - ) - print("simulation information", sim_info) + if not Path( sim_info.working_directory ).is_absolute(): + sim_info.working_directory = path_to_string( launcher_params.simulation_files_path + "/" + + sim_info.working_directory ) + print( "simulation information", sim_info ) sim_info.user_igg = self._sim_runner.get_user_igg() - write_simulation_information_to_repo(sim_info, self._sim_info_dir) + write_simulation_information_to_repo( sim_info, self._sim_info_dir ) -def path_to_string(p: Union[str, Path]) -> str: - return Path(p).as_posix() +def path_to_string( p: Union[ str, Path ] ) -> str: + return Path( p ).as_posix() -def write_simulation_information_to_repo(info: SimulationInformation, sim_info_path: Path) -> Optional[Path]: + +def write_simulation_information_to_repo( info: SimulationInformation, sim_info_path: Path ) -> Optional[ Path ]: return write_file( sim_info_path.as_posix(), - get_simulation_output_file_name(info.timestamp, info.user_igg), - json.dumps(info.to_dict()), # type: ignore + get_simulation_output_file_name( info.timestamp, info.user_igg ), + json.dumps( info.to_dict() ), # type: ignore ) -def write_file(folder_path: str, filename: str, file_content: str) -> Optional[Path]: + +def write_file( folder_path: str, filename: str, file_content: str ) -> Optional[ Path ]: try: - Path(folder_path).mkdir(exist_ok=True) - file_path = Path(f"{folder_path}/{filename}") - with open(file_path, "w") as f: - f.write(file_content) + Path( folder_path ).mkdir( exist_ok=True ) + file_path = Path( f"{folder_path}/{filename}" ) + with open( file_path, "w" ) as f: + f.write( file_content ) return file_path.absolute() except Exception as e: - print("error occurred when copying file to", folder_path, e) - return None \ No newline at end of file + print( "error occurred when copying file to", folder_path, e ) + return None diff --git a/geos-trame/src/geos/trame/app/main.py b/geos-trame/src/geos/trame/app/main.py index 5840dbeb7..d2629b752 100644 --- a/geos-trame/src/geos/trame/app/main.py +++ b/geos-trame/src/geos/trame/app/main.py @@ -8,7 +8,8 @@ from trame_server import Server import sys -sys.path.insert(0,"/data/pau901/SIM_CS/users/jfranc/geosPythonPackages/geos-trame/src") + +sys.path.insert( 0, "/data/pau901/SIM_CS/users/jfranc/geosPythonPackages/geos-trame/src" ) from geos.trame.app.core import GeosTrame diff --git a/geos-trame/src/geos/trame/app/ui/simulationStatusView.py b/geos-trame/src/geos/trame/app/ui/simulationStatusView.py index 84fc4d4b3..8b85d441b 100644 --- a/geos-trame/src/geos/trame/app/ui/simulationStatusView.py +++ b/geos-trame/src/geos/trame/app/ui/simulationStatusView.py @@ -4,8 +4,9 @@ from trame_server import Server from trame_vuetify.widgets.vuetify3 import VCard + @unique -class SimulationStatus(Enum): +class SimulationStatus( Enum ): SCHEDULED = auto() RUNNING = auto() COMPLETING = auto() @@ -20,54 +21,55 @@ class SimulationStatusView: Simple component containing simulation status in a VCard with some coloring depending on the status. """ - def __init__(self, server: Server): - def state_name(state_str): + def __init__( self, server: Server ): + + def state_name( state_str ): return f"{type(self).__name__}_{state_str}_{id(self)}" - self._text_state = state_name("text") - self._date_state = state_name("date") - self._time_state = state_name("time") - self._color_state = state_name("color") + self._text_state = state_name( "text" ) + self._date_state = state_name( "date" ) + self._time_state = state_name( "time" ) + self._color_state = state_name( "color" ) self._state = server.state - for s in [self._text_state, self._date_state, self._time_state, self._color_state]: - self._state.client_only(s) + for s in [ self._text_state, self._date_state, self._time_state, self._color_state ]: + self._state.client_only( s ) with VCard( - classes="p-8", - style=(f"`border: 4px solid ${{{self._color_state}}}; width: 300px; margin:auto; padding: 4px;`",), + classes="p-8", + style=( f"`border: 4px solid ${{{self._color_state}}}; width: 300px; margin:auto; padding: 4px;`", ), ) as self.ui: - H3(f"{{{{{self._text_state}}}}}", style="text-align:center;") - Div(f"{{{{{self._date_state}}}}} {{{{{self._time_state}}}}}", style="text-align:center;") + H3( f"{{{{{self._text_state}}}}}", style="text-align:center;" ) + Div( f"{{{{{self._date_state}}}}} {{{{{self._time_state}}}}}", style="text-align:center;" ) - self.set_status(SimulationStatus.NOT_RUN) - self.set_time_stamp("") + self.set_status( SimulationStatus.NOT_RUN ) + self.set_time_stamp( "" ) - def set_status(self, status: SimulationStatus): - self._state[self._text_state] = status.name - self._state[self._color_state] = self.status_color(status) + def set_status( self, status: SimulationStatus ): + self._state[ self._text_state ] = status.name + self._state[ self._color_state ] = self.status_color( status ) self._state.flush() - def set_time_stamp(self, time_stamp: str): - date, time = self.split_time_stamp(time_stamp) - self._state[self._time_state] = time - self._state[self._date_state] = date + def set_time_stamp( self, time_stamp: str ): + date, time = self.split_time_stamp( time_stamp ) + self._state[ self._time_state ] = time + self._state[ self._date_state ] = date self._state.flush() @staticmethod - def split_time_stamp(time_stamp: str) -> tuple[str, str]: + def split_time_stamp( time_stamp: str ) -> tuple[ str, str ]: default_time_stamp = "", "" if not time_stamp: return default_time_stamp - time_stamp = time_stamp.split("_") - if len(time_stamp) < 2: + time_stamp = time_stamp.split( "_" ) + if len( time_stamp ) < 2: return default_time_stamp - return time_stamp[0].replace("-", "/"), time_stamp[1].split(".")[0].replace("-", ":") + return time_stamp[ 0 ].replace( "-", "/" ), time_stamp[ 1 ].split( "." )[ 0 ].replace( "-", ":" ) @staticmethod - def status_color(status: SimulationStatus) -> str: + def status_color( status: SimulationStatus ) -> str: return { SimulationStatus.DONE: "#4CAF50", SimulationStatus.RUNNING: "#3F51B5", @@ -75,4 +77,4 @@ def status_color(status: SimulationStatus) -> str: SimulationStatus.COMPLETING: "#C5E1A5", SimulationStatus.COPY_BACK: "#C5E1A5", SimulationStatus.UNKNOWN: "#E53935", - }.get(status, "#607D8B") \ No newline at end of file + }.get( status, "#607D8B" ) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_status_view.py b/geos-trame/src/geos/trame/app/ui/simulation_status_view.py index 84fc4d4b3..8b85d441b 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_status_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_status_view.py @@ -4,8 +4,9 @@ from trame_server import Server from trame_vuetify.widgets.vuetify3 import VCard + @unique -class SimulationStatus(Enum): +class SimulationStatus( Enum ): SCHEDULED = auto() RUNNING = auto() COMPLETING = auto() @@ -20,54 +21,55 @@ class SimulationStatusView: Simple component containing simulation status in a VCard with some coloring depending on the status. """ - def __init__(self, server: Server): - def state_name(state_str): + def __init__( self, server: Server ): + + def state_name( state_str ): return f"{type(self).__name__}_{state_str}_{id(self)}" - self._text_state = state_name("text") - self._date_state = state_name("date") - self._time_state = state_name("time") - self._color_state = state_name("color") + self._text_state = state_name( "text" ) + self._date_state = state_name( "date" ) + self._time_state = state_name( "time" ) + self._color_state = state_name( "color" ) self._state = server.state - for s in [self._text_state, self._date_state, self._time_state, self._color_state]: - self._state.client_only(s) + for s in [ self._text_state, self._date_state, self._time_state, self._color_state ]: + self._state.client_only( s ) with VCard( - classes="p-8", - style=(f"`border: 4px solid ${{{self._color_state}}}; width: 300px; margin:auto; padding: 4px;`",), + classes="p-8", + style=( f"`border: 4px solid ${{{self._color_state}}}; width: 300px; margin:auto; padding: 4px;`", ), ) as self.ui: - H3(f"{{{{{self._text_state}}}}}", style="text-align:center;") - Div(f"{{{{{self._date_state}}}}} {{{{{self._time_state}}}}}", style="text-align:center;") + H3( f"{{{{{self._text_state}}}}}", style="text-align:center;" ) + Div( f"{{{{{self._date_state}}}}} {{{{{self._time_state}}}}}", style="text-align:center;" ) - self.set_status(SimulationStatus.NOT_RUN) - self.set_time_stamp("") + self.set_status( SimulationStatus.NOT_RUN ) + self.set_time_stamp( "" ) - def set_status(self, status: SimulationStatus): - self._state[self._text_state] = status.name - self._state[self._color_state] = self.status_color(status) + def set_status( self, status: SimulationStatus ): + self._state[ self._text_state ] = status.name + self._state[ self._color_state ] = self.status_color( status ) self._state.flush() - def set_time_stamp(self, time_stamp: str): - date, time = self.split_time_stamp(time_stamp) - self._state[self._time_state] = time - self._state[self._date_state] = date + def set_time_stamp( self, time_stamp: str ): + date, time = self.split_time_stamp( time_stamp ) + self._state[ self._time_state ] = time + self._state[ self._date_state ] = date self._state.flush() @staticmethod - def split_time_stamp(time_stamp: str) -> tuple[str, str]: + def split_time_stamp( time_stamp: str ) -> tuple[ str, str ]: default_time_stamp = "", "" if not time_stamp: return default_time_stamp - time_stamp = time_stamp.split("_") - if len(time_stamp) < 2: + time_stamp = time_stamp.split( "_" ) + if len( time_stamp ) < 2: return default_time_stamp - return time_stamp[0].replace("-", "/"), time_stamp[1].split(".")[0].replace("-", ":") + return time_stamp[ 0 ].replace( "-", "/" ), time_stamp[ 1 ].split( "." )[ 0 ].replace( "-", ":" ) @staticmethod - def status_color(status: SimulationStatus) -> str: + def status_color( status: SimulationStatus ) -> str: return { SimulationStatus.DONE: "#4CAF50", SimulationStatus.RUNNING: "#3F51B5", @@ -75,4 +77,4 @@ def status_color(status: SimulationStatus) -> str: SimulationStatus.COMPLETING: "#C5E1A5", SimulationStatus.COPY_BACK: "#C5E1A5", SimulationStatus.UNKNOWN: "#E53935", - }.get(status, "#607D8B") \ No newline at end of file + }.get( status, "#607D8B" ) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index ac49384f9..2b84f7bde 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -5,28 +5,30 @@ from geos.trame.app.ui.simulation_status_view import SimulationStatusView import json + class SuggestDecomposition: - def __init__(self, cluster_name, n_unknowns, job_type = 'cpu'): - + def __init__( self, cluster_name, n_unknowns, job_type='cpu' ): + # return ["P4: 1x22", "P4: 2x11"] - with open('/data/pau901/SIM_CS/04_WORKSPACE/USERS/jfranc/geosPythonPackages/geos-trame/src/geos/trame/assets/cluster.json','r') as file: - all_cluster = json.load(file) - self.selected_cluster = list(filter(lambda d: d.get('name')==cluster_name, all_cluster["clusters"]))[0] + with open( '/assets/cluster.json', 'r' ) as file: + all_cluster = json.load( file ) + self.selected_cluster = list( filter( lambda d: d.get( 'name' ) == cluster_name, + all_cluster[ "clusters" ] ) )[ 0 ] self.n_unknowns = n_unknowns self.job_type = job_type # @property # def selected_cluster(self): # return self.selected_cluster - - @staticmethod - def compute( n_unknowns, - memory_per_unknown_bytes, - node_memory_gb, - cores_per_node, - min_unknowns_per_rank=10000, - strong_scaling=True): + + @staticmethod + def compute( n_unknowns, + memory_per_unknown_bytes, + node_memory_gb, + cores_per_node, + min_unknowns_per_rank=10000, + strong_scaling=True ): """ Suggests node/rank distribution for a cluster computation. @@ -45,166 +47,158 @@ def compute( n_unknowns, - Don't oversubscribe: avoid using more ranks than provide parallel efficiency """ - + # Memory constraint node_memory_bytes = node_memory_gb * 1e9 - max_unknowns_per_node = int(0.8 * node_memory_bytes / memory_per_unknown_bytes) - + max_unknowns_per_node = int( 0.8 * node_memory_bytes / memory_per_unknown_bytes ) + # Compute minimum nodes needed - min_nodes = max(1, (n_unknowns + max_unknowns_per_node - 1) // max_unknowns_per_node) - + min_nodes = max( 1, ( n_unknowns + max_unknowns_per_node - 1 ) // max_unknowns_per_node ) + # Determine ranks per node unknowns_per_node = n_unknowns // min_nodes - unknowns_per_rank = max(min_unknowns_per_rank, unknowns_per_node // cores_per_node) - + unknowns_per_rank = max( min_unknowns_per_rank, unknowns_per_node // cores_per_node ) + # Calculate total ranks needed - n_ranks = max(1, n_unknowns // unknowns_per_rank) - + n_ranks = max( 1, n_unknowns // unknowns_per_rank ) + # Distribute across nodes - ranks_per_node = min(cores_per_node, (n_ranks + min_nodes - 1) // min_nodes) - n_nodes = (n_ranks + ranks_per_node - 1) // ranks_per_node - + ranks_per_node = min( cores_per_node, ( n_ranks + min_nodes - 1 ) // min_nodes ) + n_nodes = ( n_ranks + ranks_per_node - 1 ) // ranks_per_node return { 'nodes': n_nodes, 'ranks_per_node': ranks_per_node, 'total_ranks': n_nodes * ranks_per_node, - 'unknowns_per_rank': n_unknowns // (n_nodes * ranks_per_node) + 'unknowns_per_rank': n_unknowns // ( n_nodes * ranks_per_node ) } - - def to_list(self): + def to_list( self ): - if self.job_type == 'cpu': #make it an enum - sd = SuggestDecomposition.compute(self.n_unknowns, - 64, - self.selected_cluster['mem_per_node'], - self.selected_cluster['cpu']['per_node'] - ) + if self.job_type == 'cpu': #make it an enum + sd = SuggestDecomposition.compute( self.n_unknowns, 64, self.selected_cluster[ 'mem_per_node' ], + self.selected_cluster[ 'cpu' ][ 'per_node' ] ) # elif job_type == 'gpu': - # selected_cluster['n_nodes']*selected_cluster['gpu']['per_node'] - - - return [ f"{self.selected_cluster['name']}: {sd['nodes']} x {sd['ranks_per_node']}", f"{self.selected_cluster['name']}: {sd['nodes'] * 2} x {sd['ranks_per_node'] // 2}" ] + # selected_cluster['n_nodes']*selected_cluster['gpu']['per_node'] - + return [ + f"{self.selected_cluster['name']}: {sd['nodes']} x {sd['ranks_per_node']}", + f"{self.selected_cluster['name']}: {sd['nodes'] * 2} x {sd['ranks_per_node'] // 2}" + ] -def define_simulation_view(server) -> None: +def define_simulation_view( server ) -> None: - @server.state.change("simulation_xml_temp") - def on_temp_change(simulation_xml_temp : list, **_): + @server.state.change( "simulation_xml_temp" ) + def on_temp_change( simulation_xml_temp: list, **_ ): current_list = server.state.simulation_xml_filename - new_list = current_list + simulation_xml_temp + new_list = current_list + simulation_xml_temp server.state.simulation_xml_filename = new_list server.state.simulation_xml_temp = [] - @server.state.change("simulation_xml_filename") - def on_simfiles_change(simulation_xml_filename : list, **_): + @server.state.change( "simulation_xml_filename" ) + def on_simfiles_change( simulation_xml_filename: list, **_ ): import re - pattern = re.compile(r"\.xml$", re.IGNORECASE) - has_xml = any(pattern.search(file if isinstance(file, str) else file.get("name", "")) for file in simulation_xml_filename) + pattern = re.compile( r"\.xml$", re.IGNORECASE ) + has_xml = any( + pattern.search( file if isinstance( file, str ) else file.get( "name", "" ) ) + for file in simulation_xml_filename ) server.state.is_valid_jobfiles = has_xml - - - def kill_job(index_to_remove : int) -> None: + + def kill_job( index_to_remove: int ) -> None: # for now just check there is an xml - jid = list(server.state.job_ids) - if 0 <= index_to_remove < len(jid): - # 1. Supprimer l'élément de la copie de la liste - removed_id = jid[index_to_remove]['job_id'] - Authentificator.kill_job(removed_id) - del jid[index_to_remove] - - server.state.job_ids = jid - print(f"Job {removed_id} kill. Still running: {len(jid)}") - else: - print(f"Error: supress index does not exist ({index_to_remove}).") - - - def run_remove_jobfile(index_to_remove : int) -> None: - current_files = list(server.state.simulation_xml_filename) - if 0 <= index_to_remove < len(current_files): - del current_files[index_to_remove] - - server.state.simulation_xml_filename = current_files - print(f"Fichier à l'index {index_to_remove} supprimé. Nouveaux fichiers: {len(current_files)}") - else: - print(f"Erreur: Index de suppression invalide ({index_to_remove}).") - + jid = list( server.state.job_ids ) + if 0 <= index_to_remove < len( jid ): + # 1. Supprimer l'élément de la copie de la liste + removed_id = jid[ index_to_remove ][ 'job_id' ] + Authentificator.kill_job( removed_id ) + del jid[ index_to_remove ] + + server.state.job_ids = jid + print( f"Job {removed_id} kill. Still running: {len(jid)}" ) + else: + print( f"Error: supress index does not exist ({index_to_remove})." ) + + def run_remove_jobfile( index_to_remove: int ) -> None: + current_files = list( server.state.simulation_xml_filename ) + if 0 <= index_to_remove < len( current_files ): + del current_files[ index_to_remove ] + + server.state.simulation_xml_filename = current_files + print( f"Fichier à l'index {index_to_remove} supprimé. Nouveaux fichiers: {len(current_files)}" ) + else: + print( f"Erreur: Index de suppression invalide ({index_to_remove})." ) with vuetify.VContainer(): with vuetify.VRow(): - with vuetify.VCol(cols=4): - vuetify.VTextField( - v_model=("login", None,), - label="Login", - dense=True, - hide_details=True, - clearable=True, - prepend_icon="mdi-login" - ) - with vuetify.VCol(cols=4): - vuetify.VTextField( - v_model=("password", None,), - label="Password", - type="password", - dense=True, - hide_details=True, - clearable=True, - prepend_icon="mdi-onepassword" - ) - - # + with vuetify.VCol( cols=4 ): + vuetify.VTextField( v_model=( + "login", + None, + ), + label="Login", + dense=True, + hide_details=True, + clearable=True, + prepend_icon="mdi-login" ) + with vuetify.VCol( cols=4 ): + vuetify.VTextField( v_model=( + "password", + None, + ), + label="Password", + type="password", + dense=True, + hide_details=True, + clearable=True, + prepend_icon="mdi-onepassword" ) + + # server.state.access_granted = False server.state.is_valid_jobfiles = False - server.state.simulation_xml_filename = [ ] + server.state.simulation_xml_filename = [] - sd = SuggestDecomposition('p4', 12) + sd = SuggestDecomposition( 'p4', 12 ) items = sd.to_list() - vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") - with vuetify.VCol(cols=2): - vuetify.VSelect(label="Cluster", - items=("items",items)) + vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) + with vuetify.VCol( cols=2 ): + vuetify.VSelect( label="Cluster", items=( "items", items ) ) with vuetify.VRow(): - with vuetify.VCol(cols=8): - vuetify.VTextField( - v_model=("key_path", None,), - label="Path to ssh key", - dense=True, - hide_details=True, - clearable=True, - prepend_icon="mdi-key-chain-variant" - ) - + with vuetify.VCol( cols=8 ): + vuetify.VTextField( v_model=( + "key_path", + None, + ), + label="Path to ssh key", + dense=True, + hide_details=True, + clearable=True, + prepend_icon="mdi-key-chain-variant" ) + # - vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") - with vuetify.VCol(cols=1): - vuetify.VBtn("Log in", - click="trigger('run_try_login')", - disabled=("access_granted",) - ) # type: ignore + vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) + with vuetify.VCol( cols=1 ): + vuetify.VBtn( "Log in", click="trigger('run_try_login')", + disabled=( "access_granted", ) ) # type: ignore # - vuetify.VDivider(vertical=True, thickness=5, classes="mx-4") - with vuetify.VCol(cols=1): + vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) + with vuetify.VCol( cols=1 ): vuetify.VTextField( - v_model=("slurm_comment", "GEOS,CCS,testTrame",), - label="Comment to slurm", - dense=True, - hide_details=True, - clearable=True, - ) # type: ignore - + v_model=( "slurm_comment", ), + label="Comment to slurm", + dense=True, + hide_details=True, + clearable=True, + ) # type: ignore - - vuetify.VDivider(thickness=5, classes="my-4") + vuetify.VDivider( thickness=5, classes="my-4" ) with vuetify.VRow(): - with vuetify.VCol(cols=4): + with vuetify.VCol( cols=4 ): vuetify.VFileUpload( - v_model=("simulation_xml_temp",[]), + v_model=( "simulation_xml_temp", [] ), title="Simulation file name", density='comfortable', hide_details=True, @@ -212,74 +206,74 @@ def run_remove_jobfile(index_to_remove : int) -> None: multiple=True, filter_by_type='.xml,.vtu,.vtm,.pvtu,.pvtm,.dat,.csv,.txt', # readonly=True, - disabled=("!access_granted",) - ) - with vuetify.VCol(cols=4): + disabled=( "!access_granted", ) ) + with vuetify.VCol( cols=4 ): with vuetify.VList(): - with vuetify.VListItem( v_for=("(file,i) in simulation_xml_filename"), key="i", value="file", - prepend_icon="mdi-minus-circle-outline", - click=(run_remove_jobfile, "[i]") ): - vuetify.VListItemTitle( "{{ file.name }}" ) - vuetify.VListItemSubtitle("{{ file.size ? (file.size / 1024).toFixed(1) + ' KB' : 'URL' }}") + with vuetify.VListItem( v_for=( "(file,i) in simulation_xml_filename" ), + key="i", + value="file", + prepend_icon="mdi-minus-circle-outline", + click=( run_remove_jobfile, "[i]" ) ): + vuetify.VListItemTitle( "{{ file.name }}" ) + vuetify.VListItemSubtitle( "{{ file.size ? (file.size / 1024).toFixed(1) + ' KB' : 'URL' }}" ) with vuetify.VRow(), vuetify.VCol(): - vuetify.VTextField( - v_model=("simulation_remote_path", "/workrd/users/l1165478/Example"), - label="Path where to write files and launch code", - prepend_icon="mdi-upload", - dense=True, - hide_details=True, - clearable=True, - disabled=("!access_granted",) - # TODO callback validation of path - ) + vuetify.VTextField( v_model=( "simulation_remote_path", ), + label="Path where to write files and launch code", + prepend_icon="mdi-upload", + dense=True, + hide_details=True, + clearable=True, + disabled=( "!access_granted", ) + # TODO callback validation of path + ) with vuetify.VRow(), vuetify.VCol(): - vuetify.VTextField( - v_model=("simulation_dl_path", "/users/l1165478/tmp/Example"), - label="Simulation download path", - dense=True, - clearable=True, - prepend_icon="mdi-download", - disabled=("!access_granted",) - # TODO callback validation of path - ) + vuetify.VTextField( v_model=( "simulation_dl_path", ), + label="Simulation download path", + dense=True, + clearable=True, + prepend_icon="mdi-download", + disabled=( "!access_granted", ) + # TODO callback validation of path + ) with vuetify.VRow(): - with vuetify.VCol(cols=4): - vuetify.VTextField( - v_model=("simulation_job_name", "geosJob"), - label="Job Name", - dense=True, - hide_details=True, - clearable=True, - disabled=("!access_granted",) - ) - + with vuetify.VCol( cols=4 ): + vuetify.VTextField( v_model=( "simulation_job_name", "geosJob" ), + label="Job Name", + dense=True, + hide_details=True, + clearable=True, + disabled=( "!access_granted", ) ) + vuetify.VSpacer() - with vuetify.VCol(cols=1): - vuetify.VBtn("Run", - click="trigger('run_simulation')", - disabled=("!is_valid_jobfiles",), - classes="ml-auto"), # type: ignore + with vuetify.VCol( cols=1 ): + vuetify.VBtn( "Run", + click="trigger('run_simulation')", + disabled=( "!is_valid_jobfiles", ), + classes="ml-auto" ), # type: ignore + vuetify.VDivider( thickness=5, classes="my-4" ) - vuetify.VDivider(thickness=5, classes="my-4") - with vuetify.VRow(): vuetify.VSpacer() - with vuetify.VCol(cols=1): - vuetify.VBtn("Kill All", click="trigger('kill_all_simulations')"), # type: ignore - + with vuetify.VCol( cols=1 ): + vuetify.VBtn( "Kill All", click="trigger('kill_all_simulations')" ), # type: ignore + color_expression = "status_colors[job_ids[i].status] || '#607D8B'" with vuetify.VRow(): - with vuetify.VCol(cols=4): + with vuetify.VCol( cols=4 ): # SimulationStatusView(server=server) with vuetify.VList(): - with vuetify.VListItem( v_for=("(jobs,i) in job_ids"), key="i", value="jobs", base_color=(color_expression,),prepend_icon="mdi-minus-circle-outline",click=(kill_job,"[i]") ): - vuetify.VListItemTitle("{{ jobs.status }} -- {{ jobs.name }} -- {{ jobs.job_id }}") - vuetify.VProgressLinear(v_model=("simulation_progress","0"),) - + with vuetify.VListItem( v_for=( "(jobs,i) in job_ids" ), + key="i", + value="jobs", + base_color=( color_expression, ), + prepend_icon="mdi-minus-circle-outline", + click=( kill_job, "[i]" ) ): + vuetify.VListItemTitle( "{{ jobs.status }} -- {{ jobs.name }} -- {{ jobs.job_id }}" ) + vuetify.VProgressLinear( v_model=( "simulation_progress", "0" ), ) - with vuetify.VRow(v_if="simulation_error"): - html.Div("An error occurred while running simulation :
{{simulation_error}}", style="color:red;") \ No newline at end of file + with vuetify.VRow( v_if="simulation_error" ): + html.Div( "An error occurred while running simulation :
{{simulation_error}}", style="color:red;" ) diff --git a/geos-trame/src/geos/trame/app/utils/async_file_watcher.py b/geos-trame/src/geos/trame/app/utils/async_file_watcher.py index d5ad532f4..17b3df3aa 100644 --- a/geos-trame/src/geos/trame/app/utils/async_file_watcher.py +++ b/geos-trame/src/geos/trame/app/utils/async_file_watcher.py @@ -13,30 +13,30 @@ class AsyncPeriodicRunner: While started, runs given callback at given period. """ - def __init__(self, callback: Callable, period_ms=100): + def __init__( self, callback: Callable, period_ms=100 ): self.last_m_time = None self.callback = callback self.period_ms = period_ms self.task = None self.start() - def __del__(self): + def __del__( self ): self.stop() - def set_period_ms(self, period_ms): + def set_period_ms( self, period_ms ): self.period_ms = period_ms - def start(self): + def start( self ): self.stop() - self.task = asynchronous.create_task(self._runner()) + self.task = asynchronous.create_task( self._runner() ) - def stop(self): + def stop( self ): if not self.task: return - ensure_future(self._wait_for_cancel()) + ensure_future( self._wait_for_cancel() ) - async def _wait_for_cancel(self): + async def _wait_for_cancel( self ): """ Cancel and await cancel error for the task. If cancel is done outside async, it may raise warnings as cancelled exception may be triggered outside async @@ -52,60 +52,62 @@ async def _wait_for_cancel(self): except CancelledError: self.task = None - async def _runner(self): + async def _runner( self ): while True: self.callback() - await asyncio.sleep(self.period_ms / 1000.0) + await asyncio.sleep( self.period_ms / 1000.0 ) -class AsyncFileWatcher(AsyncPeriodicRunner): - def __init__(self, path_to_watch: Path, on_modified_callback: Callable, check_time_out_ms=100): - super().__init__(self._check_modified, check_time_out_ms) - self.path_to_watch = Path(path_to_watch) +class AsyncFileWatcher( AsyncPeriodicRunner ): + + def __init__( self, path_to_watch: Path, on_modified_callback: Callable, check_time_out_ms=100 ): + super().__init__( self._check_modified, check_time_out_ms ) + self.path_to_watch = Path( path_to_watch ) self.last_m_time = None self.on_modified_callback = on_modified_callback - def get_m_time(self): + def get_m_time( self ): if not self.path_to_watch.exists(): return None - return os.stat(self.path_to_watch).st_mtime + return os.stat( self.path_to_watch ).st_mtime - def _check_modified(self): + def _check_modified( self ): if self.get_m_time() != self.last_m_time: self.last_m_time = self.get_m_time() self.on_modified_callback() class AsyncSubprocess: + def __init__( self, args, - timeout: Union[float, None] = None, + timeout: Union[ float, None ] = None, ) -> None: self.args = args self.timeout = timeout - self._writer: Optional[TextIOWrapper] = None + self._writer: Optional[ TextIOWrapper ] = None - self.stdout: Optional[bytes] = None - self.stderr: Optional[bytes] = None - self.process: Optional[asyncio.subprocess.Process] = None - self.exception: Optional[RuntimeError] = None + self.stdout: Optional[ bytes ] = None + self.stderr: Optional[ bytes ] = None + self.process: Optional[ asyncio.subprocess.Process ] = None + self.exception: Optional[ RuntimeError ] = None - async def run(self) -> None: - cmd = " ".join(map(str, self.args)) - self.process = await self._init_subprocess(cmd) + async def run( self ) -> None: + cmd = " ".join( map( str, self.args ) ) + self.process = await self._init_subprocess( cmd ) try: - self.stdout, self.stderr = await asyncio.wait_for(self.process.communicate(), timeout=self.timeout) + self.stdout, self.stderr = await asyncio.wait_for( self.process.communicate(), timeout=self.timeout ) except asyncio.exceptions.TimeoutError: self.process.kill() self.stdout, self.stderr = await self.process.communicate() - self.exception = RuntimeError("Process timed out") + self.exception = RuntimeError( "Process timed out" ) finally: if self.process.returncode != 0: - self.exception = RuntimeError(f"Process exited with code {self.process.returncode}") + self.exception = RuntimeError( f"Process exited with code {self.process.returncode}" ) - async def _init_subprocess(self, cmd: str) -> asyncio.subprocess.Process: + async def _init_subprocess( self, cmd: str ) -> asyncio.subprocess.Process: return await asyncio.create_subprocess_shell( cmd=cmd, stdout=asyncio.subprocess.PIPE, From 66368fcf164d0cfdfcb527bb66abcab836a6eb98 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 10 Dec 2025 18:42:20 +0100 Subject: [PATCH 38/70] discard sim_status_view --- .../trame/app/ui/simulation_status_view.py | 80 ------------------- .../src/geos/trame/app/ui/simulation_view.py | 2 - 2 files changed, 82 deletions(-) delete mode 100644 geos-trame/src/geos/trame/app/ui/simulation_status_view.py diff --git a/geos-trame/src/geos/trame/app/ui/simulation_status_view.py b/geos-trame/src/geos/trame/app/ui/simulation_status_view.py deleted file mode 100644 index 8b85d441b..000000000 --- a/geos-trame/src/geos/trame/app/ui/simulation_status_view.py +++ /dev/null @@ -1,80 +0,0 @@ -from enum import Enum, auto, unique - -from trame_client.widgets.html import H3, Div -from trame_server import Server -from trame_vuetify.widgets.vuetify3 import VCard - - -@unique -class SimulationStatus( Enum ): - SCHEDULED = auto() - RUNNING = auto() - COMPLETING = auto() - COPY_BACK = auto() - DONE = auto() - NOT_RUN = auto() - UNKNOWN = auto() - - -class SimulationStatusView: - """ - Simple component containing simulation status in a VCard with some coloring depending on the status. - """ - - def __init__( self, server: Server ): - - def state_name( state_str ): - return f"{type(self).__name__}_{state_str}_{id(self)}" - - self._text_state = state_name( "text" ) - self._date_state = state_name( "date" ) - self._time_state = state_name( "time" ) - self._color_state = state_name( "color" ) - self._state = server.state - - for s in [ self._text_state, self._date_state, self._time_state, self._color_state ]: - self._state.client_only( s ) - - with VCard( - classes="p-8", - style=( f"`border: 4px solid ${{{self._color_state}}}; width: 300px; margin:auto; padding: 4px;`", ), - ) as self.ui: - H3( f"{{{{{self._text_state}}}}}", style="text-align:center;" ) - Div( f"{{{{{self._date_state}}}}} {{{{{self._time_state}}}}}", style="text-align:center;" ) - - self.set_status( SimulationStatus.NOT_RUN ) - self.set_time_stamp( "" ) - - def set_status( self, status: SimulationStatus ): - self._state[ self._text_state ] = status.name - self._state[ self._color_state ] = self.status_color( status ) - self._state.flush() - - def set_time_stamp( self, time_stamp: str ): - date, time = self.split_time_stamp( time_stamp ) - self._state[ self._time_state ] = time - self._state[ self._date_state ] = date - self._state.flush() - - @staticmethod - def split_time_stamp( time_stamp: str ) -> tuple[ str, str ]: - default_time_stamp = "", "" - if not time_stamp: - return default_time_stamp - - time_stamp = time_stamp.split( "_" ) - if len( time_stamp ) < 2: - return default_time_stamp - - return time_stamp[ 0 ].replace( "-", "/" ), time_stamp[ 1 ].split( "." )[ 0 ].replace( "-", ":" ) - - @staticmethod - def status_color( status: SimulationStatus ) -> str: - return { - SimulationStatus.DONE: "#4CAF50", - SimulationStatus.RUNNING: "#3F51B5", - SimulationStatus.SCHEDULED: "#FFC107", - SimulationStatus.COMPLETING: "#C5E1A5", - SimulationStatus.COPY_BACK: "#C5E1A5", - SimulationStatus.UNKNOWN: "#E53935", - }.get( status, "#607D8B" ) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 2b84f7bde..17dbef3d8 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -2,7 +2,6 @@ from trame.widgets import vuetify3 as vuetify from geos.trame.app.io.simulation import SimulationConstant, Authentificator -from geos.trame.app.ui.simulation_status_view import SimulationStatusView import json @@ -264,7 +263,6 @@ def run_remove_jobfile( index_to_remove: int ) -> None: color_expression = "status_colors[job_ids[i].status] || '#607D8B'" with vuetify.VRow(): with vuetify.VCol( cols=4 ): - # SimulationStatusView(server=server) with vuetify.VList(): with vuetify.VListItem( v_for=( "(jobs,i) in job_ids" ), key="i", From 30f67fba0c000bee20ef2aa52e77fa23308653de Mon Sep 17 00:00:00 2001 From: jacques franc Date: Thu, 11 Dec 2025 08:47:44 +0100 Subject: [PATCH 39/70] import error --- .../src/geos/trame/app/io/simulation.py | 87 ------------------- 1 file changed, 87 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index e07e2fb42..d18d0a611 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -2,7 +2,6 @@ from pathlib import Path from dataclasses import dataclass, field, fields from enum import Enum, unique -from geos.trame.app.ui.simulation_status_view import SimulationStatus from typing import Callable, Optional, Union import datetime from trame_server.core import Server @@ -316,43 +315,6 @@ def from_string( cls, job_str ) -> "SlurmJobStatus": except ValueError: return cls.UNKNOWN - -# TODO: dataclass_json -# @dataclass_json -@dataclass -class SimulationInformation: - - def get_simulation_status( - self, - get_running_user_jobs_f: Callable[ [], list[ tuple[ str, SlurmJobStatus ] ] ], - ) -> SimulationStatus: - """ - Returns the simulation status given the current Jobs running for the current user. - Only runs the callback if the timeseries file is not already present in the done directory. - """ - if not self.geos_job_id: - return SimulationStatus.NOT_RUN - - done_sim_path = self.get_simulation_dir( SimulationStatus.DONE ) - if self.get_timeseries_path( done_sim_path ).exists(): - return SimulationStatus.DONE - - user_jobs = get_running_user_jobs_f() - if ( self.geos_job_id, SlurmJobStatus.RUNNING ) in user_jobs: - return SimulationStatus.RUNNING - - if ( self.geos_job_id, SlurmJobStatus.COMPLETING ) in user_jobs: - return SimulationStatus.COMPLETING - - if ( self.copy_back_job_id, SlurmJobStatus.RUNNING ) in user_jobs: - return SimulationStatus.COPY_BACK - - if ( self.copy_job_id, SlurmJobStatus.RUNNING ) in user_jobs: - return SimulationStatus.SCHEDULED - - return SimulationStatus.UNKNOWN - - @dataclass class LauncherParams: simulation_files_path: Optional[ str ] = None @@ -383,32 +345,6 @@ def get_simulation_output_file_name( timestamp: str, user_name: str = "user_name return f"{user_name}_{timestamp}.json" -def parse_launcher_output( output: str ) -> SimulationInformation: - split_output = output.split( "\n" ) - - information = SimulationInformation() - information_dict = information.to_dict() # type: ignore - - content_to_parse = [ - ( "Working directory: ", "working_directory" ), - ( "1. copy job id: ", "copy_job_id" ), - ( "2. geos job id: ", "geos_job_id" ), - ( "3. copy back job id: ", "copy_back_job_id" ), - ( "Run directory: ", "run_directory" ), - ] - - for line in split_output: - for info_tuple in content_to_parse: - if info_tuple[ 0 ] in line: - split_line = line.split( info_tuple[ 0 ] ) - if len( split_line ) < 2: - continue - information_dict[ info_tuple[ 1 ] ] = split_line[ -1 ] - - information_dict[ "timestamp" ] = get_timestamp() - return SimulationInformation.from_dict( information_dict ) # type: ignore - - # def write_simulation_information_to_repo(info: SimulationInformation, sim_info_path: Path) -> Optional[Path]: # return write_file( # sim_info_path.as_posix(), @@ -756,33 +692,10 @@ def start_simulation( self ) -> None: if isinstance( script_path, Path ) and script_path.is_file(): os.remove( script_path ) - def _write_sim_info( self, launcher_params: LauncherParams, sim_info: Optional[ SimulationInformation ] ) -> None: - if sim_info is None: - raise RuntimeError( "Error parsing simulation launcher output." ) - - # Make sure to save the absolute path to the working directory used by the launcher in case parsed information - # is a relative Path - if not Path( sim_info.working_directory ).is_absolute(): - sim_info.working_directory = path_to_string( launcher_params.simulation_files_path + "/" + - sim_info.working_directory ) - print( "simulation information", sim_info ) - - sim_info.user_igg = self._sim_runner.get_user_igg() - write_simulation_information_to_repo( sim_info, self._sim_info_dir ) - def path_to_string( p: Union[ str, Path ] ) -> str: return Path( p ).as_posix() - -def write_simulation_information_to_repo( info: SimulationInformation, sim_info_path: Path ) -> Optional[ Path ]: - return write_file( - sim_info_path.as_posix(), - get_simulation_output_file_name( info.timestamp, info.user_igg ), - json.dumps( info.to_dict() ), # type: ignore - ) - - def write_file( folder_path: str, filename: str, file_content: str ) -> Optional[ Path ]: try: Path( folder_path ).mkdir( exist_ok=True ) From 495070eb03f66089bc7b3b7be0278c8734561c91 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Tue, 16 Dec 2025 17:31:14 +0100 Subject: [PATCH 40/70] spliting up big block --- geos-trame/src/geos/trame/app/io/hpc_tools.py | 83 +++ .../trame/app/io/jinja_t/p4_copyback.jinja | 14 + .../geos/trame/app/io/jinja_t/p4_slurm.jinja | 27 + .../trame/app/io/jinja_t/pine_slurm.jinja | 29 ++ .../src/geos/trame/app/io/simulation.py | 482 +++++------------- geos-trame/src/geos/trame/app/io/ssh_tools.py | 234 +++++++++ .../geos/trame/app/ui/simulationStatusView.py | 80 --- .../src/geos/trame/app/ui/simulation_view.py | 86 +--- geos-trame/src/geos/trame/app/ui/timeline.py | 3 +- 9 files changed, 527 insertions(+), 511 deletions(-) create mode 100644 geos-trame/src/geos/trame/app/io/hpc_tools.py create mode 100644 geos-trame/src/geos/trame/app/io/jinja_t/p4_copyback.jinja create mode 100644 geos-trame/src/geos/trame/app/io/jinja_t/p4_slurm.jinja create mode 100644 geos-trame/src/geos/trame/app/io/jinja_t/pine_slurm.jinja create mode 100644 geos-trame/src/geos/trame/app/io/ssh_tools.py delete mode 100644 geos-trame/src/geos/trame/app/ui/simulationStatusView.py diff --git a/geos-trame/src/geos/trame/app/io/hpc_tools.py b/geos-trame/src/geos/trame/app/io/hpc_tools.py new file mode 100644 index 000000000..4f60a3f5e --- /dev/null +++ b/geos-trame/src/geos/trame/app/io/hpc_tools.py @@ -0,0 +1,83 @@ +import json + + + +class SuggestDecomposition: + + def __init__( self, cluster_name, n_unknowns, job_type='cpu' ): + + # return ["P4: 1x22", "P4: 2x11"] + with open( '/assets/cluster.json', 'r' ) as file: + all_cluster = json.load( file ) + self.selected_cluster = list( filter( lambda d: d.get( 'name' ) == cluster_name, + all_cluster[ "clusters" ] ) )[ 0 ] + self.n_unknowns = n_unknowns + self.job_type = job_type + + # @property + # def selected_cluster(self): + # return self.selected_cluster + + @staticmethod + def compute( n_unknowns, + memory_per_unknown_bytes, + node_memory_gb, + cores_per_node, + min_unknowns_per_rank=10000, + strong_scaling=True ): + """ + Suggests node/rank distribution for a cluster computation. + + Parameters: + - n_unknowns: total number of unknowns + - memory_per_unknown_bytes: estimated memory per unknown + - node_memory_gb: available memory per node + - cores_per_node: cores available per node + - min_unknowns_per_rank: minimum for efficiency + - strong_scaling: True if problem size is fixed + + Note: + - 10,000-100,000 unknowns per rank is often a sweet spot for many PDE solvers + - Use power-of-2 decompositions when possible (helps with communication patterns) + - For 3D problems, try to maintain cubic subdomains (minimizes surface-to-volume ratio, reducing communication) + - Don't oversubscribe: avoid using more ranks than provide parallel efficiency + + """ + + # Memory constraint + node_memory_bytes = node_memory_gb * 1e9 + max_unknowns_per_node = int( 0.8 * node_memory_bytes / memory_per_unknown_bytes ) + + # Compute minimum nodes needed + min_nodes = max( 1, ( n_unknowns + max_unknowns_per_node - 1 ) // max_unknowns_per_node ) + + # Determine ranks per node + unknowns_per_node = n_unknowns // min_nodes + unknowns_per_rank = max( min_unknowns_per_rank, unknowns_per_node // cores_per_node ) + + # Calculate total ranks needed + n_ranks = max( 1, n_unknowns // unknowns_per_rank ) + + # Distribute across nodes + ranks_per_node = min( cores_per_node, ( n_ranks + min_nodes - 1 ) // min_nodes ) + n_nodes = ( n_ranks + ranks_per_node - 1 ) // ranks_per_node + + return { + 'nodes': n_nodes, + 'ranks_per_node': ranks_per_node, + 'total_ranks': n_nodes * ranks_per_node, + 'unknowns_per_rank': n_unknowns // ( n_nodes * ranks_per_node ) + } + + def to_list( self ): + + if self.job_type == 'cpu': #make it an enum + sd = SuggestDecomposition.compute( self.n_unknowns, 64, self.selected_cluster[ 'mem_per_node' ], + self.selected_cluster[ 'cpu' ][ 'per_node' ] ) + # elif job_type == 'gpu': + # selected_cluster['n_nodes']*selected_cluster['gpu']['per_node'] + + return [ + f"{self.selected_cluster['name']}: {sd['nodes']} x {sd['ranks_per_node']}", + f"{self.selected_cluster['name']}: {sd['nodes'] * 2} x {sd['ranks_per_node'] // 2}" + ] diff --git a/geos-trame/src/geos/trame/app/io/jinja_t/p4_copyback.jinja b/geos-trame/src/geos/trame/app/io/jinja_t/p4_copyback.jinja new file mode 100644 index 000000000..52cf6dce1 --- /dev/null +++ b/geos-trame/src/geos/trame/app/io/jinja_t/p4_copyback.jinja @@ -0,0 +1,14 @@ +#!/bin/sh +#SBATCH --job-name="{{ job_name | geosCopyBack }}" +#SBATCH --ntasks={{ ntasks }} +#SBATCH --partition={{ partition }} +#SBATCH --comment={{ comment_gr }} +#SBACTH --account={{ account }} +#SBATCH --nodes={{ nodes }} +#SBATCH --time={{ time | default('00:10:00') }} +#SBATCH --mem={{ mem }} +#SBATCH --output=job_GEOS_%j.out +#SBATCH --err=job_GEOS_%j.err +#SBATCH --dependency=afterok:{{ dep_job_id }} + +srun tar cfz {{ dep_job_id }}.tgz Outputs_{{ dep_job_id }}/ && mv -v {{ dep_job_id }}.tgz {{ target_dl_path }} diff --git a/geos-trame/src/geos/trame/app/io/jinja_t/p4_slurm.jinja b/geos-trame/src/geos/trame/app/io/jinja_t/p4_slurm.jinja new file mode 100644 index 000000000..2fd51ff9a --- /dev/null +++ b/geos-trame/src/geos/trame/app/io/jinja_t/p4_slurm.jinja @@ -0,0 +1,27 @@ +#!/bin/sh +#SBATCH --job-name="{{ job_name }}" +#SBATCH --ntasks={{ ntasks }} +#SBATCH --partition={{ partition }} +#SBATCH --comment={{ comment_gr }} +#SBACTH --account={{ account }} +#SBATCH --nodes={{ nodes }} +#SBATCH --time={{ time | default('00:10:00') }} +#SBATCH --mem={{ mem }} +#SBATCH --output=job_GEOS_%j.out +#SBATCH --error=job_GEOS_%j.err + +ulimit -s unlimited +ulimit -c unlimited + +module purge +module use {{ geos_module }} + +export HDF5_USE_FILE_LOCKING=FALSE +export OMP_NUM_THREADS=1 +export EXEC={{ geosPath }} + + +srun --mpi=pmix_v3 --hint=nomultithread \ + -n {{ ntasks }} ${EXEC} \ + -o Outputs_${SLURM_JOBID} \ + -i {{ input_file | default('geosDeck.xml') }} | tee Outputs_${SLURM_JOBID}/log_${SLURM_JOBID}.out \ No newline at end of file diff --git a/geos-trame/src/geos/trame/app/io/jinja_t/pine_slurm.jinja b/geos-trame/src/geos/trame/app/io/jinja_t/pine_slurm.jinja new file mode 100644 index 000000000..69092ea22 --- /dev/null +++ b/geos-trame/src/geos/trame/app/io/jinja_t/pine_slurm.jinja @@ -0,0 +1,29 @@ +#!/bin/sh +#SBATCH --job-name="{{ job_name }}" +#SBATCH --ntasks={{ ntasks }} +#SBATCH --partition={{ partition }} +#SBATCH --comment={{ comment_gr }} +#SBACTH --account={{ account }} +#SBATCH --nodes={{ nodes }} +#SBATCH --time={{ time | default('00:10:00') }} +#SBATCH --mem={{ mem }} +#SBATCH --output=job_GEOS_%j.out +#SBATCH --error=job_GEOS_%j.err + +ulimit -s unlimited +ulimit -c unlimited + +module purge +module use {{ geos_module }} +module load genesis common proxy slurm +module load gcc/11.4.1 openmpi-gcc/5.0.5 cmake/3.27.9 + +export HDF5_USE_FILE_LOCKING=FALSE +export OMP_NUM_THREADS=1 +export EXEC={{ geosPath }} + + +mpirun -mca coll_hcoll_enable 0 -x UCX_RNDV_THRESH=131072 \ + -n {{ ntasks }} ${EXEC} \ + -o Outputs_${SLURM_JOBID} \ + -i {{ input_file | default('geosDeck.xml') }} | tee Outputs_${SLURM_JOBID}/log_${SLURM_JOBID}.out \ No newline at end of file diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index d18d0a611..b20bed074 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -1,303 +1,80 @@ from abc import ABC, abstractmethod from pathlib import Path from dataclasses import dataclass, field, fields -from enum import Enum, unique +from enum import Enum, unique, auto from typing import Callable, Optional, Union import datetime from trame_server.core import Server from trame_server.state import State + +from geos.trame.app.io.ssh_tools import Authentificator, SimulationConstant from geos.trame.app.utils.async_file_watcher import AsyncPeriodicRunner -from jinja2 import Template +from jinja2 import Environment, FileSystemLoader import paramiko import re import os + #TODO move outside -#TODO use Jinja on real launcher - - -@dataclass( frozen=True ) -class SimulationConstant: - SIMULATION_GEOS_PATH = "/workrd/users/" - HOST = "p4log01" # Only run on P4 machine - REMOTE_HOME_BASE = "/users" - PORT = 22 - SIMULATIONS_INFORMATION_FOLDER_PATH = "/workrd/users/" - SIMULATION_DEFAULT_FILE_NAME = "geosDeck.xml" - - # replace by conf-file json - - -#If proxyJump are needed -# -# proxy_cmd = "ssh -W {host}:{port} proxyuser@bastion.example.com".format( -# host=ssh_host, port=ssh_port -# ) -# from paramiko import ProxyCommand -# sock = ProxyCommand(proxy_cmd) - -# client = paramiko.SSHClient() -# client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -# client.connect( -# hostname=ssh_host, -# port=ssh_port, -# username=username, -# key_filename=keyfile, -# sock=sock, # <— tunnel created by ProxyCommand -# ) # Load template from file # with open("slurm_job_template.j2") as f: # template = Template(f.read()) #TODO from private-assets -template_str = """#!/bin/sh -#SBATCH --job-name="{{ job_name }}" -#SBATCH --ntasks={{ ntasks }} -#SBATCH --partition={{ partition }} -#SBATCH --comment={{ comment_gr }} -#SBACTH --account={{ account }} -#SBATCH --nodes={{ nodes }} -#SBATCH --time={{ time | default('00:10:00') }} -#SBATCH --mem={{ mem }} -#SBATCH --output=job_GEOS_%j.out -#SBATCH --error=job_GEOS_%j.err - -ulimit -s unlimited -ulimit -c unlimited - -module purge - -export HDF5_USE_FILE_LOCKING=FALSE -export OMP_NUM_THREADS=1 - -srun --mpi=pmix_v3 --hint=nomultithread \ - -n {{ ntasks }} geos \ - -o Outputs_${SLURM_JOBID} \ - -i {{ input_file | default('geosDeck.xml') }} | tee Outputs_${SLURM_JOBID}/log_${SLURM_JOBID}.out - -""" - -template_cb = """#!/bin/sh -#SBATCH --job-name="{{ job_name }}" -#SBATCH --ntasks={{ ntasks }} -#SBATCH --partition={{ partition }} -#SBATCH --comment={{ comment_gr }} -#SBACTH --account={{ account }} -#SBATCH --nodes={{ nodes }} -#SBATCH --time={{ time | default('00:10:00') }} -#SBATCH --mem={{ mem }} -#SBATCH --output=job_GEOS_%j.out -#SBATCH --err=job_GEOS_%j.err -#SBATCH --dependency=afterok:{{ dep_job_id }} - -srun tar cfz {{ dep_job_id }}.tgz Outputs_{{ dep_job_id }}/ && mv -v {{ dep_job_id }}.tgz {{ target_dl_path }} - -""" - - -class Authentificator: #namespacing more than anything else - - ssh_client: Optional[ paramiko.SSHClient ] = None - - @staticmethod - def _sftp_copy_tree( ssh_client, file_tree, remote_root ): - # Connect to remote server - sftp = ssh_client.open_sftp() - - Authentificator.dfs_tree( file_tree[ "structure" ], file_tree[ "root" ], sftp=sftp, remote_root=remote_root ) - - sftp.close() - - @staticmethod - def dfs_tree( node, path, sftp, remote_root ): - - lp = Path( path ) - rp = Path( remote_root ) / lp - - if isinstance( node, list ): - for file in node: - # sftp.put(lp/Path(file), rp/Path(file)) - with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: - f.write( file.get( 'content' ) ) - print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) - elif isinstance( node, dict ): - if "files" in node: - for file in node[ "files" ]: - # sftp.put( str(lp/Path(file)), str(rp/Path(file)) ) - with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: - f.write( file.get( 'content' ) ) - print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) - if "subfolders" in node: - for subfolder, content in node[ "subfolders" ].items(): - try: - sftp.stat( str( rp / Path( subfolder ) ) ) - except FileNotFoundError: - print( f"creating {rp/Path(subfolder)}" ) - sftp.mkdir( str( rp / Path( subfolder ) ) ) - Authentificator.dfs_tree( content, lp / Path( subfolder ), sftp, remote_root ) - - for folder, content in node.items(): - if folder not in [ "files", "subfolders" ]: - try: - sftp.stat( str( rp / Path( folder ) ) ) - except FileNotFoundError: - print( f"creating {rp/Path(folder)}" ) - sftp.mkdir( str( rp / Path( folder ) ) ) - Authentificator.dfs_tree( content, lp / Path( folder ), sftp, remote_root ) - - @staticmethod - def kill_job( id ): - if Authentificator.ssh_client: - Authentificator._execute_remote_command( Authentificator.ssh_client, f"scancel {id}" ) - return None - - @staticmethod - def get_key( id, pword ): - - try: - home = os.environ.get( "HOME" ) - PRIVATE_KEY = paramiko.RSAKey.from_private_key_file( f"{home}/.ssh/id_trame" ) - return PRIVATE_KEY - except paramiko.SSHException as e: - print( f"Error loading private key: {e}\n" ) - except FileNotFoundError as e: - print( f"Private key not found: {e}\n Generating key ..." ) - PRIVATE_KEY = Authentificator.gen_key() - temp_client = paramiko.SSHClient() - temp_client.set_missing_host_key_policy( paramiko.AutoAddPolicy() ) - temp_client.connect( SimulationConstant.HOST, - SimulationConstant.PORT, - username=id, - password=pword, - timeout=10 ) - Authentificator._transfer_file_sftp( temp_client, f"{home}/.ssh/id_trame.pub", - f"{SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame.pub" ) - Authentificator._execute_remote_command( - temp_client, - f" cat {SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame.pub | tee -a {SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/authorized_keys" - ) - - return PRIVATE_KEY - - @staticmethod - def gen_key(): - - home = os.environ.get( "HOME" ) - file_path = f"{home}/.ssh/id_trame" - key = paramiko.RSAKey.generate( bits=4096 ) - key.write_private_key_file( file_path ) - - # Get public key in OpenSSH format - public_key = f"{key.get_name()} {key.get_base64()}" - with open( file_path + ".pub", "w" ) as pub_file: - pub_file.write( public_key ) - - print( "SSH key pair generated: id_trame (private), id_trame.pub (public)" ) - - return key - - @staticmethod - def _create_ssh_client( host, port, username, password=None, key=None ) -> paramiko.SSHClient: - """ - Initializes and returns an SSH client connection. - Uses context manager for automatic cleanup. - """ - client = paramiko.SSHClient() - # Automatically adds the hostname and new host keys to the host files (~/.ssh/known_hosts) - client.set_missing_host_key_policy( paramiko.AutoAddPolicy() ) - - try: - print( f"Connecting to {host} using key-based authentication..." ) - client.connect( host, port, username, pkey=key, timeout=10 ) - - return client - except paramiko.AuthenticationException: - print( "Authentication failed. Check your credentials or key." ) - return None - except paramiko.SSHException as e: - print( f"Could not establish SSH connection: {e}" ) - return None - except Exception as e: - print( f"An unexpected error occurred: {e}" ) - return None - - @staticmethod - def _execute_remote_command( client, command ): - """ - Executes a single command on the remote server and prints the output. - """ - if not client: - return - - print( f"\n--- Executing Command: '{command}' ---" ) - try: - # Executes the command. stdin, stdout, and stderr are file-like objects. - # Ensure command ends with a newline character for some shell environments. - stdin, stdout, stderr = client.exec_command( command ) - - # Wait for the command to finish and read the output - exit_status = stdout.channel.recv_exit_status() - - # Print standard output - stdout_data = stdout.read().decode().strip() - if stdout_data: - print( "STDOUT:" ) - print( stdout_data ) - - # Print standard error (if any) - stderr_data = stderr.read().decode().strip() - if stderr_data: - print( "STDERR:" ) - print( stderr_data ) - - print( f"Command exited with status: {exit_status}" ) - return ( exit_status, stdout_data, stderr_data ) - - except Exception as e: - print( f"Error executing command: {e}" ) - return ( -1, "", "" ) - - @staticmethod - def _transfer_file_sftp( client, local_path, remote_path, direction="put" ): - """ - Transfers a file using SFTP (Secure File Transfer Protocol). - Direction can be 'put' (upload) or 'get' (download). - """ - if not client: - return - - print( f"\n--- Starting SFTP Transfer ({direction.upper()}) ---" ) - - try: - # Establish an SFTP connection session - sftp = client.open_sftp() - - if direction == "put": - print( f"Uploading '{local_path}' to '{remote_path}'..." ) - sftp.put( local_path, remote_path ) - print( "Upload complete." ) - elif direction == "get": - print( f"Downloading '{remote_path}' to '{local_path}'..." ) - sftp.get( remote_path, local_path ) - print( "Download complete." ) - else: - print( "Invalid transfer direction. Use 'put' or 'get'." ) - - sftp.close() - return True - - except FileNotFoundError: - print( f"Error: Local file '{local_path}' not found." ) - return False - except IOError as e: - print( f"Error accessing remote file or path: {e}" ) - return False - except Exception as e: - print( f"An error occurred during SFTP: {e}" ) - return False - +# template_str = """#!/bin/sh +# #SBATCH --job-name="{{ job_name }}" +# #SBATCH --ntasks={{ ntasks }} +# #SBATCH --partition={{ partition }} +# #SBATCH --comment={{ comment_gr }} +# #SBACTH --account={{ account }} +# #SBATCH --nodes={{ nodes }} +# #SBATCH --time={{ time | default('00:10:00') }} +# #SBATCH --mem={{ mem }} +# #SBATCH --output=job_GEOS_%j.out +# #SBATCH --error=job_GEOS_%j.err + +# ulimit -s unlimited +# ulimit -c unlimited + +# module purge + +# export HDF5_USE_FILE_LOCKING=FALSE +# export OMP_NUM_THREADS=1 + +# srun --mpi=pmix_v3 --hint=nomultithread \ +# -n {{ ntasks }} geos \ +# -o Outputs_${SLURM_JOBID} \ +# -i {{ input_file | default('geosDeck.xml') }} | tee Outputs_${SLURM_JOBID}/log_${SLURM_JOBID}.out + +# """ + +# template_cb = """#!/bin/sh +# #SBATCH --job-name="{{ job_name }}" +# #SBATCH --ntasks={{ ntasks }} +# #SBATCH --partition={{ partition }} +# #SBATCH --comment={{ comment_gr }} +# #SBACTH --account={{ account }} +# #SBATCH --nodes={{ nodes }} +# #SBATCH --time={{ time | default('00:10:00') }} +# #SBATCH --mem={{ mem }} +# #SBATCH --output=job_GEOS_%j.out +# #SBATCH --err=job_GEOS_%j.err +# #SBATCH --dependency=afterok:{{ dep_job_id }} + +# srun tar cfz {{ dep_job_id }}.tgz Outputs_{{ dep_job_id }}/ && mv -v {{ dep_job_id }}.tgz {{ target_dl_path }} + +# """ +@unique +class SimulationStatus( Enum ): + SCHEDULED = auto() + RUNNING = auto() + COMPLETING = auto() + COPY_BACK = auto() + DONE = auto() + NOT_RUN = auto() + UNKNOWN = auto() @unique class SlurmJobStatus( Enum ): @@ -451,14 +228,7 @@ def run_try_login() -> None: key=Authentificator.get_key( server.state.login, server.state.password ) ) if Authentificator.ssh_client: - # id = os.environ.get('USER') - # Authentificator._execute_remote_command(Authentificator.ssh_client, f"ps aux") - # Authentificator._execute_remote_command(Authentificator.ssh_client, f"ls -l {SimulationConstant.REMOTE_HOME_BASE}/{id}") - - # server.state.update({"access_granted" : True, "key_path" : f"{SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame" }) - # server.state.flush() server.state.access_granted = True - print( "login login login" ) @staticmethod def gen_tree( xml_filename ): @@ -485,10 +255,12 @@ def gen_tree( xml_filename ): table_matches.append( file ) #assume the first XML is the main xml - # TODO relocate xml_expected_file_matches = re.findall( pattern_file, xml_matches[ 0 ][ 'content' ].decode( "utf-8" ) ) + + #TODO all the needed files test_assert = { item.get( "name" ) for item in xml_filename }.intersection( set( xml_expected_file_matches ) ) + assert test_assert decoded = re.sub( pattern_xml_path, r'"\2', xml_matches[ 0 ][ 'content' ].decode( "utf-8" ) ) decoded = re.sub( pattern_mesh_path, r'"mesh/\2', decoded ) @@ -496,7 +268,7 @@ def gen_tree( xml_filename ): xml_matches[ 0 ][ 'content' ] = decoded.encode( "utf-8" ) - file_tree = { + FILE_TREE = { 'root': '.', "structure": { "files": xml_matches, @@ -506,14 +278,16 @@ def gen_tree( xml_filename ): } } } - return file_tree + return FILE_TREE @controller.trigger( "run_simulation" ) def run_simulation() -> None: # if server.state.access_granted and server.state.sd and server.state.simulation_xml_filename: if server.state.access_granted and server.state.simulation_xml_filename: - template = Template( template_str ) + + template = Environment(load=FileSystemLoader('jinja_t')).get_template('p4_slurm.jinja') + template_2 = Environment(load=FileSystemLoader('jinja_t')).get_template('p4_copyback.jinja') # sdi = server.state.sd ci = { 'nodes': 1, 'total_ranks': 2 } #TODO profile to use the correct amount @@ -529,6 +303,19 @@ def run_simulation() -> None: partition='p4_dev', account='myaccount' ) + rendered_2 = template_2.render( job_name=server.state.simulation_job_name, + input_file=[ + item for item in server.state.simulation_xml_filename + if item.get( 'type' ) == 'text/xml' + ][ 0 ].get( 'name' ), + nodes=ci[ 'nodes' ], + ntasks=ci[ 'total_ranks' ], + mem=f"0", + dep_job_id={runjob}, ##### HERE + comment_gr=server.state.slurm_comment, + partition='p4_transfert', + account='myaccount' ) + if Authentificator.ssh_client: #write slurm directly on remote try: @@ -536,6 +323,10 @@ def run_simulation() -> None: remote_path = Path( server.state.simulation_remote_path ) / Path( 'job.slurm' ) with sftp.file( str( remote_path ), 'w' ) as f: f.write( rendered ) + + remote_path = Path( server.state.simulation_remote_path ) / Path( 'copyback.slurm' ) + with sftp.file( str( remote_path ), 'w' ) as f: + f.write( rendered_2 ) # except FileExistsError: # print(f"Error: Local file '{remote_path}' not found.") @@ -550,22 +341,22 @@ def run_simulation() -> None: gen_tree( server.state.simulation_xml_filename ), server.state.simulation_remote_path ) - _, sout, serr = Authentificator._execute_remote_command( + #first the job + _, sout, _ = Authentificator._execute_remote_command( Authentificator.ssh_client, f'cd {server.state.simulation_remote_path} && sbatch job.slurm' ) - - #TODO encapsulate job_lines = sout.strip() job_id = re.search( r"Submitted batch job (\d+)", job_lines ) + server.state.job_ids.append( { 'job_id': job_id[ 1 ] } ) + # then the copy back (that will fail if main job failed) + _, sout, _ = Authentificator._execute_remote_command( + Authentificator.ssh_client, f'cd {server.state.simulation_remote_path} && sbatch copy_back.slurm' ) + job_lines = sout.strip() + job_id = re.search( r"Submitted batch job (\d+)", job_lines ) server.state.job_ids.append( { 'job_id': job_id[ 1 ] } ) self.start_result_streams() - Authentificator._transfer_file_sftp( Authentificator.ssh_client, - remote_path=f'{server.state.simulation_remote_path}/log.out', - local_path=f'{server.state.simulation_dl_path}/dl.test', - direction="get" ) - else: raise paramiko.SSHException @@ -628,23 +419,24 @@ def check_jobs( self ): jid = self._server.state.job_ids for index, job in enumerate( jid ): job_id = job[ 'job_id' ] - _, sout, serr = Authentificator._execute_remote_command( + _, sout, _ = Authentificator._execute_remote_command( Authentificator.ssh_client, f'sacct -j {job_id} -o JobID,JobName,State --noheader' ) job_line = sout.strip().split( "\n" )[ -1 ] jid[ index ][ 'status' ] = job_line.split()[ 2 ] - if ( jid[ index ][ 'status' ] == 'COMPLETED' ): - # tar and copy back - Authentificator._execute_remote_command( - Authentificator.ssh_client, - f'cd {self._server.state.simulation_remote_path} && tar cvfz {job_id}.tgz Outputs_{job_id}/' - ) - Authentificator._transfer_file_sftp( - Authentificator.ssh_client, - f'{self._server.state.simulation_dl_path}/{job_id}.tgz', - f'{self._server.state.simulation_remote_path}/{job_id}.tgz', - direction='get' ) - elif ( jid[ index ][ 'status' ] == 'RUNNING' ): + # OLD COPY BACK POLICY + # if ( jid[ index ][ 'status' ] == 'COMPLETED' ): + # # tar and copy back + # Authentificator._execute_remote_command( + # Authentificator.ssh_client, + # f'cd {self._server.state.simulation_remote_path} && tar cvfz {job_id}.tgz Outputs_{job_id}/' + # ) + # Authentificator._transfer_file_sftp( + # Authentificator.ssh_client, + # f'{self._server.state.simulation_dl_path}/{job_id}.tgz', + # f'{self._server.state.simulation_remote_path}/{job_id}.tgz', + # direction='get' ) + if ( jid[ index ][ 'status' ] == 'RUNNING' ): # getthe completed status pattern = re.compile( r'\((\d+(?:\.\d+)?)%\s*completed\)' ) with Authentificator.ssh_client.open_sftp().file( @@ -673,36 +465,36 @@ def check_jobs( self ): else: return None - def start_simulation( self ) -> None: - state = self._server.state - script_path = None - try: - launcher_params = LauncherParams.from_server_state( self._server.state ) - launcher_params.assert_is_complete() - - script_path, sim_info = self._sim_runner.launch_simulation( launcher_params ) - self._write_sim_info( launcher_params, sim_info ) - self.start_result_streams() - state.simulation_error = "" - except Exception as e: - print( "Error occurred: ", e ) - state.simulation_error = str( e ) - finally: - state.avoid_rewriting = False - if isinstance( script_path, Path ) and script_path.is_file(): - os.remove( script_path ) - - -def path_to_string( p: Union[ str, Path ] ) -> str: - return Path( p ).as_posix() - -def write_file( folder_path: str, filename: str, file_content: str ) -> Optional[ Path ]: - try: - Path( folder_path ).mkdir( exist_ok=True ) - file_path = Path( f"{folder_path}/{filename}" ) - with open( file_path, "w" ) as f: - f.write( file_content ) - return file_path.absolute() - except Exception as e: - print( "error occurred when copying file to", folder_path, e ) - return None + # def start_simulation( self ) -> None: + # state = self._server.state + # script_path = None + # try: + # launcher_params = LauncherParams.from_server_state( self._server.state ) + # launcher_params.assert_is_complete() + + # script_path, sim_info = self._sim_runner.launch_simulation( launcher_params ) + # self._write_sim_info( launcher_params, sim_info ) + # self.start_result_streams() + # state.simulation_error = "" + # except Exception as e: + # print( "Error occurred: ", e ) + # state.simulation_error = str( e ) + # finally: + # state.avoid_rewriting = False + # if isinstance( script_path, Path ) and script_path.is_file(): + # os.remove( script_path ) + + +# def path_to_string( p: Union[ str, Path ] ) -> str: +# return Path( p ).as_posix() + +# def write_file( folder_path: str, filename: str, file_content: str ) -> Optional[ Path ]: +# try: +# Path( folder_path ).mkdir( exist_ok=True ) +# file_path = Path( f"{folder_path}/{filename}" ) +# with open( file_path, "w" ) as f: +# f.write( file_content ) +# return file_path.absolute() +# except Exception as e: +# print( "error occurred when copying file to", folder_path, e ) +# return None diff --git a/geos-trame/src/geos/trame/app/io/ssh_tools.py b/geos-trame/src/geos/trame/app/io/ssh_tools.py new file mode 100644 index 000000000..18cff2f83 --- /dev/null +++ b/geos-trame/src/geos/trame/app/io/ssh_tools.py @@ -0,0 +1,234 @@ +from typing import Optional +from pathlib import Path +import paramiko + + +# replace by conf-file json +from dataclasses import dataclass +@dataclass( frozen=True ) +class SimulationConstant: + SIMULATION_GEOS_PATH = "/workrd/users/" + HOST = "p4log01" # Only run on P4 machine + REMOTE_HOME_BASE = "/users" + PORT = 22 + SIMULATIONS_INFORMATION_FOLDER_PATH = "/workrd/users/" + SIMULATION_DEFAULT_FILE_NAME = "geosDeck.xml" + +#If proxyJump are needed +# +# proxy_cmd = "ssh -W {host}:{port} proxyuser@bastion.example.com".format( +# host=ssh_host, port=ssh_port +# ) +# from paramiko import ProxyCommand +# sock = ProxyCommand(proxy_cmd) + +# client = paramiko.SSHClient() +# client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +# client.connect( +# hostname=ssh_host, +# port=ssh_port, +# username=username, +# key_filename=keyfile, +# sock=sock, # <— tunnel created by ProxyCommand +# ) + +class Authentificator: #namespacing more than anything else + + ssh_client: Optional[ paramiko.SSHClient ] = None + + @staticmethod + def _sftp_copy_tree( ssh_client, file_tree, remote_root ): + # Connect to remote server + sftp = ssh_client.open_sftp() + + Authentificator.dfs_tree( file_tree[ "structure" ], file_tree[ "root" ], sftp=sftp, remote_root=remote_root ) + + sftp.close() + + @staticmethod + def dfs_tree( node, path, sftp, remote_root ): + + lp = Path( path ) + rp = Path( remote_root ) / lp + + if isinstance( node, list ): + for file in node: + # sftp.put(lp/Path(file), rp/Path(file)) + with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: + f.write( file.get( 'content' ) ) + print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) + elif isinstance( node, dict ): + if "files" in node: + for file in node[ "files" ]: + # sftp.put( str(lp/Path(file)), str(rp/Path(file)) ) + with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: + f.write( file.get( 'content' ) ) + print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) + if "subfolders" in node: + for subfolder, content in node[ "subfolders" ].items(): + try: + sftp.stat( str( rp / Path( subfolder ) ) ) + except FileNotFoundError: + print( f"creating {rp/Path(subfolder)}" ) + sftp.mkdir( str( rp / Path( subfolder ) ) ) + Authentificator.dfs_tree( content, lp / Path( subfolder ), sftp, remote_root ) + + for folder, content in node.items(): + if folder not in [ "files", "subfolders" ]: + try: + sftp.stat( str( rp / Path( folder ) ) ) + except FileNotFoundError: + print( f"creating {rp/Path(folder)}" ) + sftp.mkdir( str( rp / Path( folder ) ) ) + Authentificator.dfs_tree( content, lp / Path( folder ), sftp, remote_root ) + + @staticmethod + def kill_job( id ): + if Authentificator.ssh_client: + Authentificator._execute_remote_command( Authentificator.ssh_client, f"scancel {id}" ) + return None + + @staticmethod + def get_key( id, pword ): + + try: + home = os.environ.get( "HOME" ) + PRIVATE_KEY = paramiko.RSAKey.from_private_key_file( f"{home}/.ssh/id_trame" ) + return PRIVATE_KEY + except paramiko.SSHException as e: + print( f"Error loading private key: {e}\n" ) + except FileNotFoundError as e: + print( f"Private key not found: {e}\n Generating key ..." ) + PRIVATE_KEY = Authentificator.gen_key() + temp_client = paramiko.SSHClient() + temp_client.set_missing_host_key_policy( paramiko.AutoAddPolicy() ) + temp_client.connect( SimulationConstant.HOST, + SimulationConstant.PORT, + username=id, + password=pword, + timeout=10 ) + Authentificator._transfer_file_sftp( temp_client, f"{home}/.ssh/id_trame.pub", + f"{SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame.pub" ) + Authentificator._execute_remote_command( + temp_client, + f" cat {SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame.pub | tee -a {SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/authorized_keys" + ) + + return PRIVATE_KEY + + @staticmethod + def gen_key(): + + home = os.environ.get( "HOME" ) + file_path = f"{home}/.ssh/id_trame" + key = paramiko.RSAKey.generate( bits=4096 ) + key.write_private_key_file( file_path ) + + # Get public key in OpenSSH format + public_key = f"{key.get_name()} {key.get_base64()}" + with open( file_path + ".pub", "w" ) as pub_file: + pub_file.write( public_key ) + + print( "SSH key pair generated: id_trame (private), id_trame.pub (public)" ) + + return key + + @staticmethod + def _create_ssh_client( host, port, username, password=None, key=None ) -> paramiko.SSHClient: + """ + Initializes and returns an SSH client connection. + Uses context manager for automatic cleanup. + """ + client = paramiko.SSHClient() + # Automatically adds the hostname and new host keys to the host files (~/.ssh/known_hosts) + client.set_missing_host_key_policy( paramiko.AutoAddPolicy() ) + + try: + print( f"Connecting to {host} using key-based authentication..." ) + client.connect( host, port, username, pkey=key, timeout=10 ) + + return client + except paramiko.AuthenticationException: + print( "Authentication failed. Check your credentials or key." ) + return None + except paramiko.SSHException as e: + print( f"Could not establish SSH connection: {e}" ) + return None + except Exception as e: + print( f"An unexpected error occurred: {e}" ) + return None + + @staticmethod + def _execute_remote_command( client, command ): + """ + Executes a single command on the remote server and prints the output. + """ + if not client: + return + + print( f"\n--- Executing Command: '{command}' ---" ) + try: + # Executes the command. stdin, stdout, and stderr are file-like objects. + # Ensure command ends with a newline character for some shell environments. + stdin, stdout, stderr = client.exec_command( command ) + + # Wait for the command to finish and read the output + exit_status = stdout.channel.recv_exit_status() + + # Print standard output + stdout_data = stdout.read().decode().strip() + if stdout_data: + print( "STDOUT:" ) + print( stdout_data ) + + # Print standard error (if any) + stderr_data = stderr.read().decode().strip() + if stderr_data: + print( "STDERR:" ) + print( stderr_data ) + + print( f"Command exited with status: {exit_status}" ) + return ( exit_status, stdout_data, stderr_data ) + + except Exception as e: + print( f"Error executing command: {e}" ) + return ( -1, "", "" ) + + @staticmethod + def _transfer_file_sftp( client, local_path, remote_path, direction="put" ): + """ + Transfers a file using SFTP (Secure File Transfer Protocol). + Direction can be 'put' (upload) or 'get' (download). + """ + if not client: + return + + print( f"\n--- Starting SFTP Transfer ({direction.upper()}) ---" ) + + try: + # Establish an SFTP connection session + sftp = client.open_sftp() + + if direction == "put": + print( f"Uploading '{local_path}' to '{remote_path}'..." ) + sftp.put( local_path, remote_path ) + print( "Upload complete." ) + elif direction == "get": + print( f"Downloading '{remote_path}' to '{local_path}'..." ) + sftp.get( remote_path, local_path ) + print( "Download complete." ) + else: + print( "Invalid transfer direction. Use 'put' or 'get'." ) + + sftp.close() + return True + + except FileNotFoundError: + print( f"Error: Local file '{local_path}' not found." ) + return False + except IOError as e: + print( f"Error accessing remote file or path: {e}" ) + return False + except Exception as e: + print( f"An error occurred during SFTP: {e}" ) + return False \ No newline at end of file diff --git a/geos-trame/src/geos/trame/app/ui/simulationStatusView.py b/geos-trame/src/geos/trame/app/ui/simulationStatusView.py deleted file mode 100644 index 8b85d441b..000000000 --- a/geos-trame/src/geos/trame/app/ui/simulationStatusView.py +++ /dev/null @@ -1,80 +0,0 @@ -from enum import Enum, auto, unique - -from trame_client.widgets.html import H3, Div -from trame_server import Server -from trame_vuetify.widgets.vuetify3 import VCard - - -@unique -class SimulationStatus( Enum ): - SCHEDULED = auto() - RUNNING = auto() - COMPLETING = auto() - COPY_BACK = auto() - DONE = auto() - NOT_RUN = auto() - UNKNOWN = auto() - - -class SimulationStatusView: - """ - Simple component containing simulation status in a VCard with some coloring depending on the status. - """ - - def __init__( self, server: Server ): - - def state_name( state_str ): - return f"{type(self).__name__}_{state_str}_{id(self)}" - - self._text_state = state_name( "text" ) - self._date_state = state_name( "date" ) - self._time_state = state_name( "time" ) - self._color_state = state_name( "color" ) - self._state = server.state - - for s in [ self._text_state, self._date_state, self._time_state, self._color_state ]: - self._state.client_only( s ) - - with VCard( - classes="p-8", - style=( f"`border: 4px solid ${{{self._color_state}}}; width: 300px; margin:auto; padding: 4px;`", ), - ) as self.ui: - H3( f"{{{{{self._text_state}}}}}", style="text-align:center;" ) - Div( f"{{{{{self._date_state}}}}} {{{{{self._time_state}}}}}", style="text-align:center;" ) - - self.set_status( SimulationStatus.NOT_RUN ) - self.set_time_stamp( "" ) - - def set_status( self, status: SimulationStatus ): - self._state[ self._text_state ] = status.name - self._state[ self._color_state ] = self.status_color( status ) - self._state.flush() - - def set_time_stamp( self, time_stamp: str ): - date, time = self.split_time_stamp( time_stamp ) - self._state[ self._time_state ] = time - self._state[ self._date_state ] = date - self._state.flush() - - @staticmethod - def split_time_stamp( time_stamp: str ) -> tuple[ str, str ]: - default_time_stamp = "", "" - if not time_stamp: - return default_time_stamp - - time_stamp = time_stamp.split( "_" ) - if len( time_stamp ) < 2: - return default_time_stamp - - return time_stamp[ 0 ].replace( "-", "/" ), time_stamp[ 1 ].split( "." )[ 0 ].replace( "-", ":" ) - - @staticmethod - def status_color( status: SimulationStatus ) -> str: - return { - SimulationStatus.DONE: "#4CAF50", - SimulationStatus.RUNNING: "#3F51B5", - SimulationStatus.SCHEDULED: "#FFC107", - SimulationStatus.COMPLETING: "#C5E1A5", - SimulationStatus.COPY_BACK: "#C5E1A5", - SimulationStatus.UNKNOWN: "#E53935", - }.get( status, "#607D8B" ) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 17dbef3d8..c6e6865b3 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -1,90 +1,8 @@ from trame.widgets import html from trame.widgets import vuetify3 as vuetify -from geos.trame.app.io.simulation import SimulationConstant, Authentificator -import json - - -class SuggestDecomposition: - - def __init__( self, cluster_name, n_unknowns, job_type='cpu' ): - - # return ["P4: 1x22", "P4: 2x11"] - with open( '/assets/cluster.json', 'r' ) as file: - all_cluster = json.load( file ) - self.selected_cluster = list( filter( lambda d: d.get( 'name' ) == cluster_name, - all_cluster[ "clusters" ] ) )[ 0 ] - self.n_unknowns = n_unknowns - self.job_type = job_type - - # @property - # def selected_cluster(self): - # return self.selected_cluster - - @staticmethod - def compute( n_unknowns, - memory_per_unknown_bytes, - node_memory_gb, - cores_per_node, - min_unknowns_per_rank=10000, - strong_scaling=True ): - """ - Suggests node/rank distribution for a cluster computation. - - Parameters: - - n_unknowns: total number of unknowns - - memory_per_unknown_bytes: estimated memory per unknown - - node_memory_gb: available memory per node - - cores_per_node: cores available per node - - min_unknowns_per_rank: minimum for efficiency - - strong_scaling: True if problem size is fixed - - Note: - - 10,000-100,000 unknowns per rank is often a sweet spot for many PDE solvers - - Use power-of-2 decompositions when possible (helps with communication patterns) - - For 3D problems, try to maintain cubic subdomains (minimizes surface-to-volume ratio, reducing communication) - - Don't oversubscribe: avoid using more ranks than provide parallel efficiency - - """ - - # Memory constraint - node_memory_bytes = node_memory_gb * 1e9 - max_unknowns_per_node = int( 0.8 * node_memory_bytes / memory_per_unknown_bytes ) - - # Compute minimum nodes needed - min_nodes = max( 1, ( n_unknowns + max_unknowns_per_node - 1 ) // max_unknowns_per_node ) - - # Determine ranks per node - unknowns_per_node = n_unknowns // min_nodes - unknowns_per_rank = max( min_unknowns_per_rank, unknowns_per_node // cores_per_node ) - - # Calculate total ranks needed - n_ranks = max( 1, n_unknowns // unknowns_per_rank ) - - # Distribute across nodes - ranks_per_node = min( cores_per_node, ( n_ranks + min_nodes - 1 ) // min_nodes ) - n_nodes = ( n_ranks + ranks_per_node - 1 ) // ranks_per_node - - return { - 'nodes': n_nodes, - 'ranks_per_node': ranks_per_node, - 'total_ranks': n_nodes * ranks_per_node, - 'unknowns_per_rank': n_unknowns // ( n_nodes * ranks_per_node ) - } - - def to_list( self ): - - if self.job_type == 'cpu': #make it an enum - sd = SuggestDecomposition.compute( self.n_unknowns, 64, self.selected_cluster[ 'mem_per_node' ], - self.selected_cluster[ 'cpu' ][ 'per_node' ] ) - # elif job_type == 'gpu': - # selected_cluster['n_nodes']*selected_cluster['gpu']['per_node'] - - return [ - f"{self.selected_cluster['name']}: {sd['nodes']} x {sd['ranks_per_node']}", - f"{self.selected_cluster['name']}: {sd['nodes'] * 2} x {sd['ranks_per_node'] // 2}" - ] - +from geos.trame.app.io.simulation import Authentificator +from geos.trame.app.io.hpc_tools import SuggestDecomposition def define_simulation_view( server ) -> None: diff --git a/geos-trame/src/geos/trame/app/ui/timeline.py b/geos-trame/src/geos/trame/app/ui/timeline.py index 6d3559f9f..aee23ef1d 100644 --- a/geos-trame/src/geos/trame/app/ui/timeline.py +++ b/geos-trame/src/geos/trame/app/ui/timeline.py @@ -1,9 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright 2023-2024 TotalEnergies. -# SPDX-FileContributor: Lionel Untereiner +# SPDX-FileContributor: Lionel Untereiner, Jacques Franc from typing import Any -# from trame.widgets import gantt from trame.widgets import vuetify3 as vuetify from trame_simput import get_simput_manager From 4d705bf4e7978e26db1e4227548822ccd6548fe2 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 17 Dec 2025 11:21:56 +0100 Subject: [PATCH 41/70] continuig --- .../src/geos/trame/app/io/simulation.py | 112 +++++++++--------- geos-trame/src/geos/trame/app/io/ssh_tools.py | 36 ++++-- geos-trame/src/geos/trame/assets/cluster.json | 14 ++- 3 files changed, 86 insertions(+), 76 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index b20bed074..4d867475d 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -280,81 +280,42 @@ def gen_tree( xml_filename ): } return FILE_TREE + + @controller.trigger( "run_simulation" ) def run_simulation() -> None: # if server.state.access_granted and server.state.sd and server.state.simulation_xml_filename: - if server.state.access_granted and server.state.simulation_xml_filename: - - template = Environment(load=FileSystemLoader('jinja_t')).get_template('p4_slurm.jinja') - template_2 = Environment(load=FileSystemLoader('jinja_t')).get_template('p4_copyback.jinja') - # sdi = server.state.sd - ci = { 'nodes': 1, 'total_ranks': 2 } - #TODO profile to use the correct amount - rendered = template.render( job_name=server.state.simulation_job_name, - input_file=[ - item for item in server.state.simulation_xml_filename - if item.get( 'type' ) == 'text/xml' - ][ 0 ].get( 'name' ), + if server.state.access_granted and server.state.simulation_xml_filename: + if Authentificator.ssh_client: + + Authentificator._sftp_copy_tree( Authentificator.ssh_client, + gen_tree( server.state.simulation_xml_filename ), + server.state.simulation_remote_path ) + + # sdi = server.state.sd + ci = { 'nodes': 1, 'total_ranks': 2 } + run_id : int = Simulation.render_and_run('p4_slurm.jinja','job.slurm', server, + job_name=server.state.simulation_job_name, + input_file=[ item for item in server.state.simulation_xml_filename if item.get( 'type' ) == 'text/xml'][ 0 ].get( 'name' ), nodes=ci[ 'nodes' ], ntasks=ci[ 'total_ranks' ], mem=f"0", comment_gr=server.state.slurm_comment, partition='p4_dev', - account='myaccount' ) - - rendered_2 = template_2.render( job_name=server.state.simulation_job_name, - input_file=[ - item for item in server.state.simulation_xml_filename - if item.get( 'type' ) == 'text/xml' - ][ 0 ].get( 'name' ), + account='myaccount') + + Simulation.render_and_run('p4_copyback.jinja', 'copyback.slurm', server, + job_name=server.state.simulation_job_name, + input_file=[ item for item in server.state.simulation_xml_filename if item.get( 'type' ) == 'text/xml' ][ 0 ].get( 'name' ), nodes=ci[ 'nodes' ], ntasks=ci[ 'total_ranks' ], mem=f"0", - dep_job_id={runjob}, ##### HERE + dep_job_id=run_id, comment_gr=server.state.slurm_comment, partition='p4_transfert', account='myaccount' ) - if Authentificator.ssh_client: - #write slurm directly on remote - try: - sftp = Authentificator.ssh_client.open_sftp() - remote_path = Path( server.state.simulation_remote_path ) / Path( 'job.slurm' ) - with sftp.file( str( remote_path ), 'w' ) as f: - f.write( rendered ) - - remote_path = Path( server.state.simulation_remote_path ) / Path( 'copyback.slurm' ) - with sftp.file( str( remote_path ), 'w' ) as f: - f.write( rendered_2 ) - - # except FileExistsError: - # print(f"Error: Local file '{remote_path}' not found.") - except PermissionError as e: - print( f"Permission error: {e}" ) - except IOError as e: - print( f"Error accessing remote file or path: {e}" ) - except Exception as e: - print( f"An error occurred during SFTP: {e}" ) - - Authentificator._sftp_copy_tree( Authentificator.ssh_client, - gen_tree( server.state.simulation_xml_filename ), - server.state.simulation_remote_path ) - - #first the job - _, sout, _ = Authentificator._execute_remote_command( - Authentificator.ssh_client, f'cd {server.state.simulation_remote_path} && sbatch job.slurm' ) - job_lines = sout.strip() - job_id = re.search( r"Submitted batch job (\d+)", job_lines ) - server.state.job_ids.append( { 'job_id': job_id[ 1 ] } ) - - # then the copy back (that will fail if main job failed) - _, sout, _ = Authentificator._execute_remote_command( - Authentificator.ssh_client, f'cd {server.state.simulation_remote_path} && sbatch copy_back.slurm' ) - job_lines = sout.strip() - job_id = re.search( r"Submitted batch job (\d+)", job_lines ) - server.state.job_ids.append( { 'job_id': job_id[ 1 ] } ) - self.start_result_streams() else: @@ -465,6 +426,39 @@ def check_jobs( self ): else: return None + @staticmethod + def render_and_run(template_name: str, dest_name: str , server, **kwargs) -> int : + """Render the slurm template and run it. Return it job_id""" + + if server.state.access_granted and server.state.simulation_xml_filename: + template = Environment(load=FileSystemLoader('jinja_t')).get_template(template) + rendered = template.render(kwargs) + + if Authentificator.ssh_client: + #write slurm directly on remote + try: + sftp = Authentificator.ssh_client.open_sftp() + remote_path = Path( server.state.simulation_remote_path ) / Path( dest_name ) + with sftp.file( str( remote_path ), 'w' ) as f: + f.write( rendered ) + + # except FileExistsError: + # print(f"Error: Local file '{remote_path}' not found.") + except PermissionError as e: + print( f"Permission error: {e}" ) + except IOError as e: + print( f"Error accessing remote file or path: {e}" ) + except Exception as e: + print( f"An error occurred during SFTP: {e}" ) + + _, sout, _ = Authentificator._execute_remote_command( + Authentificator.ssh_client, f'cd {server.state.simulation_remote_path} && sbatch {dest_name}' ) + job_lines = sout.strip() + job_id = re.search( r"Submitted batch job (\d+)", job_lines ) + server.state.job_ids.append( { 'job_id': job_id[ 1 ] } ) + + return job_id[1] + # def start_simulation( self ) -> None: # state = self._server.state # script_path = None diff --git a/geos-trame/src/geos/trame/app/io/ssh_tools.py b/geos-trame/src/geos/trame/app/io/ssh_tools.py index 18cff2f83..56e36c443 100644 --- a/geos-trame/src/geos/trame/app/io/ssh_tools.py +++ b/geos-trame/src/geos/trame/app/io/ssh_tools.py @@ -1,18 +1,25 @@ from typing import Optional from pathlib import Path import paramiko - +import json # replace by conf-file json from dataclasses import dataclass -@dataclass( frozen=True ) +@dataclass class SimulationConstant: - SIMULATION_GEOS_PATH = "/workrd/users/" - HOST = "p4log01" # Only run on P4 machine - REMOTE_HOME_BASE = "/users" - PORT = 22 - SIMULATIONS_INFORMATION_FOLDER_PATH = "/workrd/users/" - SIMULATION_DEFAULT_FILE_NAME = "geosDeck.xml" + name: str + host : str + port : int = 22 + remote_home_base : str + simulation_default_filename : str + simulation_default_path : str + simulation_dl_default_path : str + geos_default_version : str + simulation_information_default_path : str + n_nodes : int + cores_per_node : int + mem_per_node : int + # return ["P4: 1x22", "P4: 2x11"] #If proxyJump are needed # @@ -36,6 +43,8 @@ class Authentificator: #namespacing more than anything else ssh_client: Optional[ paramiko.SSHClient ] = None + sim_constants = SimulationConstant(**json.load(open( '/assets/cluster.json', 'r' ))) + @staticmethod def _sftp_copy_tree( ssh_client, file_tree, remote_root ): # Connect to remote server @@ -92,6 +101,7 @@ def kill_job( id ): def get_key( id, pword ): try: + import os home = os.environ.get( "HOME" ) PRIVATE_KEY = paramiko.RSAKey.from_private_key_file( f"{home}/.ssh/id_trame" ) return PRIVATE_KEY @@ -102,16 +112,16 @@ def get_key( id, pword ): PRIVATE_KEY = Authentificator.gen_key() temp_client = paramiko.SSHClient() temp_client.set_missing_host_key_policy( paramiko.AutoAddPolicy() ) - temp_client.connect( SimulationConstant.HOST, - SimulationConstant.PORT, + temp_client.connect( SimulationConstant.host, + SimulationConstant.port, username=id, password=pword, timeout=10 ) Authentificator._transfer_file_sftp( temp_client, f"{home}/.ssh/id_trame.pub", - f"{SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame.pub" ) + f"{SimulationConstant.remote_home_base}/{id}/.ssh/id_trame.pub" ) Authentificator._execute_remote_command( temp_client, - f" cat {SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/id_trame.pub | tee -a {SimulationConstant.REMOTE_HOME_BASE}/{id}/.ssh/authorized_keys" + f" cat {SimulationConstant.remote_home_base}/{id}/.ssh/id_trame.pub | tee -a {SimulationConstant.remote_home_base}/{id}/.ssh/authorized_keys" ) return PRIVATE_KEY @@ -119,6 +129,8 @@ def get_key( id, pword ): @staticmethod def gen_key(): + import os + home = os.environ.get( "HOME" ) file_path = f"{home}/.ssh/id_trame" key = paramiko.RSAKey.generate( bits=4096 ) diff --git a/geos-trame/src/geos/trame/assets/cluster.json b/geos-trame/src/geos/trame/assets/cluster.json index d8bfa4e3c..3a4a23eb2 100644 --- a/geos-trame/src/geos/trame/assets/cluster.json +++ b/geos-trame/src/geos/trame/assets/cluster.json @@ -2,13 +2,17 @@ "clusters": [ { "name": "p4", - "simulation_default_path": "/www", - "geos_version_default": "daily_rhel", - "simulation_information_default_path": "/www", + "host": "p4log01", + "port": 22, + "remote_home_base": "/users/", "simulation_default_filename": "geosDeck.xml", + "simulation_default_path": "/workrd/users", + "simulation_dl_default_path": "/data/", + "geos_default_version": "daily_rhel", + "simulation_information_default_path": "/workrd/users", "n_nodes": 212, - "cpu": { "types": ["AMD EPYC 4th gen"], "per_node": 192 }, - "mem_per_node": 768 + "cores_per_nodes" : 192, + "mem_per_node": 747 } ] } From c14d42581f5c0b8fb39945c9496c5ed32d9e42c5 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 24 Dec 2025 17:27:20 +0100 Subject: [PATCH 42/70] wip --- .gitignore | 3 ++ geos-trame/pyproject.toml | 1 + geos-trame/src/geos/trame/app/io/hpc_tools.py | 6 ++-- .../src/geos/trame/app/io/simulation.py | 3 +- geos-trame/src/geos/trame/app/io/ssh_tools.py | 9 +++--- geos-trame/src/geos/trame/app/main.py | 3 +- geos-trame/src/geos/trame/assets/cluster.json | 28 ++++++++----------- 7 files changed, 27 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index 5f08477d3..d8f712970 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,9 @@ MANIFEST *.manifest *.spec +#env +.env + # Installer logs pip-log.txt pip-delete-this-directory.txt diff --git a/geos-trame/pyproject.toml b/geos-trame/pyproject.toml index 1c823a34b..af6b3a394 100644 --- a/geos-trame/pyproject.toml +++ b/geos-trame/pyproject.toml @@ -42,6 +42,7 @@ dependencies = [ "trame-matplotlib==2.0.3", "trame-components==2.4.2", "trame-gantt==0.1.5", + "python-dotenv>=1.2.1", "mpld3<0.5.11", "xsdata==24.5", "xsdata-pydantic[lxml]==24.5", diff --git a/geos-trame/src/geos/trame/app/io/hpc_tools.py b/geos-trame/src/geos/trame/app/io/hpc_tools.py index 4f60a3f5e..824733cf9 100644 --- a/geos-trame/src/geos/trame/app/io/hpc_tools.py +++ b/geos-trame/src/geos/trame/app/io/hpc_tools.py @@ -1,5 +1,5 @@ import json - +import os class SuggestDecomposition: @@ -7,10 +7,10 @@ class SuggestDecomposition: def __init__( self, cluster_name, n_unknowns, job_type='cpu' ): # return ["P4: 1x22", "P4: 2x11"] - with open( '/assets/cluster.json', 'r' ) as file: + with open( f'{os.getenv("TRAME_DIR")}/assets/cluster.json', 'r' ) as file: all_cluster = json.load( file ) self.selected_cluster = list( filter( lambda d: d.get( 'name' ) == cluster_name, - all_cluster[ "clusters" ] ) )[ 0 ] + all_cluster ) ) self.n_unknowns = n_unknowns self.job_type = job_type diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 4d867475d..ae8cd837d 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -182,6 +182,7 @@ class SimRunner( ISimRunner ): def __init__( self, user ): super().__init__() + #TODO delete # early test self.local_upload_file = "test_upload.txt" import time @@ -203,7 +204,7 @@ def __init__( self, sim_runner: ISimRunner, server: Server, sim_info_dir: Option self._server = server controller = server.controller self._sim_runner = sim_runner - self._sim_info_dir = sim_info_dir or SimulationConstant.SIMULATIONS_INFORMATION_FOLDER_PATH + self._sim_info_dir = sim_info_dir server.state.job_ids = [] server.state.status_colors = { diff --git a/geos-trame/src/geos/trame/app/io/ssh_tools.py b/geos-trame/src/geos/trame/app/io/ssh_tools.py index 56e36c443..2a774e0e0 100644 --- a/geos-trame/src/geos/trame/app/io/ssh_tools.py +++ b/geos-trame/src/geos/trame/app/io/ssh_tools.py @@ -1,15 +1,16 @@ from typing import Optional from pathlib import Path import paramiko +import os import json # replace by conf-file json from dataclasses import dataclass @dataclass class SimulationConstant: - name: str + name : str host : str - port : int = 22 + port : int remote_home_base : str simulation_default_filename : str simulation_default_path : str @@ -43,7 +44,7 @@ class Authentificator: #namespacing more than anything else ssh_client: Optional[ paramiko.SSHClient ] = None - sim_constants = SimulationConstant(**json.load(open( '/assets/cluster.json', 'r' ))) + sim_constants = SimulationConstant(**json.load(open( f'{os.getenv("TRAME_DIR")}/assets/cluster.json', 'r' ))) @staticmethod def _sftp_copy_tree( ssh_client, file_tree, remote_root ): @@ -62,13 +63,11 @@ def dfs_tree( node, path, sftp, remote_root ): if isinstance( node, list ): for file in node: - # sftp.put(lp/Path(file), rp/Path(file)) with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: f.write( file.get( 'content' ) ) print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) elif isinstance( node, dict ): if "files" in node: - for file in node[ "files" ]: # sftp.put( str(lp/Path(file)), str(rp/Path(file)) ) with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: f.write( file.get( 'content' ) ) diff --git a/geos-trame/src/geos/trame/app/main.py b/geos-trame/src/geos/trame/app/main.py index d2629b752..4e2c2f5c8 100644 --- a/geos-trame/src/geos/trame/app/main.py +++ b/geos-trame/src/geos/trame/app/main.py @@ -3,6 +3,7 @@ # SPDX-FileContributor: Lionel Untereiner from pathlib import Path from typing import Any +from dotenv import load_dotenv from trame.app import get_server # type: ignore from trame_server import Server @@ -11,9 +12,9 @@ sys.path.insert( 0, "/data/pau901/SIM_CS/users/jfranc/geosPythonPackages/geos-trame/src" ) +assert load_dotenv(dotenv_path=Path(__file__).parent.parent / ".env") from geos.trame.app.core import GeosTrame - def main( server: Server = None, **kwargs: Any ) -> None: """Main function.""" # Get or create server diff --git a/geos-trame/src/geos/trame/assets/cluster.json b/geos-trame/src/geos/trame/assets/cluster.json index 3a4a23eb2..bb47e1c5c 100644 --- a/geos-trame/src/geos/trame/assets/cluster.json +++ b/geos-trame/src/geos/trame/assets/cluster.json @@ -1,18 +1,14 @@ { - "clusters": [ - { - "name": "p4", - "host": "p4log01", - "port": 22, - "remote_home_base": "/users/", - "simulation_default_filename": "geosDeck.xml", - "simulation_default_path": "/workrd/users", - "simulation_dl_default_path": "/data/", - "geos_default_version": "daily_rhel", - "simulation_information_default_path": "/workrd/users", - "n_nodes": 212, - "cores_per_nodes" : 192, - "mem_per_node": 747 - } - ] + "name": "p4", + "host": "p4log01", + "port": 22, + "remote_home_base": "/users/", + "simulation_default_filename": "geosDeck.xml", + "simulation_default_path": "/workrd/users", + "simulation_dl_default_path": "/data/", + "geos_default_version": "daily_rhel", + "simulation_information_default_path": "/workrd/users", + "n_nodes": 212, + "cores_per_node" : 192, + "mem_per_node": 747 } From 80ebd8fcbfe2f4f0d1730c20eef25e8b294ad4d5 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Mon, 29 Dec 2025 10:49:39 +0100 Subject: [PATCH 43/70] refactor cluster entries --- geos-trame/src/geos/trame/app/io/hpc_tools.py | 20 ++++++------ .../src/geos/trame/app/io/simulation.py | 5 +-- geos-trame/src/geos/trame/app/io/ssh_tools.py | 7 +++- .../src/geos/trame/app/ui/simulation_view.py | 13 +++++--- geos-trame/src/geos/trame/assets/cluster.json | 32 ++++++++++++++++++- 5 files changed, 59 insertions(+), 18 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/hpc_tools.py b/geos-trame/src/geos/trame/app/io/hpc_tools.py index 824733cf9..43c172523 100644 --- a/geos-trame/src/geos/trame/app/io/hpc_tools.py +++ b/geos-trame/src/geos/trame/app/io/hpc_tools.py @@ -1,16 +1,14 @@ import json import os +from geos.trame.app.io.ssh_tools import SimulationConstant, Authentificator class SuggestDecomposition: - def __init__( self, cluster_name, n_unknowns, job_type='cpu' ): + def __init__( self, selected_cluster, n_unknowns, job_type='cpu' ): # return ["P4: 1x22", "P4: 2x11"] - with open( f'{os.getenv("TRAME_DIR")}/assets/cluster.json', 'r' ) as file: - all_cluster = json.load( file ) - self.selected_cluster = list( filter( lambda d: d.get( 'name' ) == cluster_name, - all_cluster ) ) + self.selected_cluster = selected_cluster self.n_unknowns = n_unknowns self.job_type = job_type @@ -71,13 +69,15 @@ def compute( n_unknowns, def to_list( self ): - if self.job_type == 'cpu': #make it an enum - sd = SuggestDecomposition.compute( self.n_unknowns, 64, self.selected_cluster[ 'mem_per_node' ], - self.selected_cluster[ 'cpu' ][ 'per_node' ] ) + if self.job_type == 'cpu' and self.selected_cluster: #make it an enum + sd = SuggestDecomposition.compute( self.n_unknowns, 64, self.selected_cluster.mem_per_node, + self.selected_cluster.cores_per_node ) + else: + sd = {'nodes': 0, 'ranks_per_node': 0 , 'total_ranks': 0, 'unknowns_per_rank': 0} # elif job_type == 'gpu': # selected_cluster['n_nodes']*selected_cluster['gpu']['per_node'] return [ - f"{self.selected_cluster['name']}: {sd['nodes']} x {sd['ranks_per_node']}", - f"{self.selected_cluster['name']}: {sd['nodes'] * 2} x {sd['ranks_per_node'] // 2}" + f"{self.selected_cluster.name} : {sd['nodes']} x {sd['ranks_per_node']}", + f"{self.selected_cluster.name}: {sd['nodes'] * 2} x {sd['ranks_per_node'] // 2}" ] diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index ae8cd837d..5de6dae96 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -206,6 +206,7 @@ def __init__( self, sim_runner: ISimRunner, server: Server, sim_info_dir: Option self._sim_runner = sim_runner self._sim_info_dir = sim_info_dir server.state.job_ids = [] + server.state.selected_cluster = None server.state.status_colors = { "PENDING": "#4CAF50", #PD @@ -223,8 +224,8 @@ def run_try_login() -> None: # if server.state.key: Authentificator.ssh_client = Authentificator._create_ssh_client( - SimulationConstant.HOST, #test - SimulationConstant.PORT, + SimulationConstant.host, #test + SimulationConstant.port, server.state.login, key=Authentificator.get_key( server.state.login, server.state.password ) ) diff --git a/geos-trame/src/geos/trame/app/io/ssh_tools.py b/geos-trame/src/geos/trame/app/io/ssh_tools.py index 2a774e0e0..dc627b884 100644 --- a/geos-trame/src/geos/trame/app/io/ssh_tools.py +++ b/geos-trame/src/geos/trame/app/io/ssh_tools.py @@ -44,7 +44,12 @@ class Authentificator: #namespacing more than anything else ssh_client: Optional[ paramiko.SSHClient ] = None - sim_constants = SimulationConstant(**json.load(open( f'{os.getenv("TRAME_DIR")}/assets/cluster.json', 'r' ))) + sim_constants = [ SimulationConstant(**item) for item in json.load(open( f'{os.getenv("TRAME_DIR")}/assets/cluster.json', 'r' )) ] + + @staticmethod + def get_cluster( name : str ): + match = next(( item for item in Authentificator.sim_constants if item.name == name ), None) + return match @staticmethod def _sftp_copy_tree( ssh_client, file_tree, remote_root ): diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index c6e6865b3..a8ced433d 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -75,12 +75,17 @@ def run_remove_jobfile( index_to_remove: int ) -> None: server.state.access_granted = False server.state.is_valid_jobfiles = False server.state.simulation_xml_filename = [] + server.state.selected_cluster_names = [cluster.name for cluster in Authentificator.sim_constants] + server.state.selected_cluster_name = 'local' - sd = SuggestDecomposition( 'p4', 12 ) - items = sd.to_list() + sd = SuggestDecomposition( Authentificator.get_cluster(server.state.selected_cluster_name) , 12 ) #TODO reactive + # items = sd.to_list() vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) - with vuetify.VCol( cols=2 ): - vuetify.VSelect( label="Cluster", items=( "items", items ) ) + with vuetify.VCol( cols=1 ): + vuetify.VSelect( label="Cluster", items=( "selected_cluster_names", ), model_value=("selected_cluster_name",)) + vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) + with vuetify.VCol( cols=1 ): + vuetify.VSelect( label="Decomposition", items=( "decomposition", sd.to_list() ) ) with vuetify.VRow(): with vuetify.VCol( cols=8 ): diff --git a/geos-trame/src/geos/trame/assets/cluster.json b/geos-trame/src/geos/trame/assets/cluster.json index bb47e1c5c..3cc30941e 100644 --- a/geos-trame/src/geos/trame/assets/cluster.json +++ b/geos-trame/src/geos/trame/assets/cluster.json @@ -1,4 +1,5 @@ -{ +[ + { "name": "p4", "host": "p4log01", "port": 22, @@ -11,4 +12,33 @@ "n_nodes": 212, "cores_per_node" : 192, "mem_per_node": 747 +}, + { + "name": "pine", + "host": "pine-1", + "port": 22, + "remote_home_base": "/users/", + "simulation_default_filename": "geosDeck.xml", + "simulation_default_path": "/workrd/users", + "simulation_dl_default_path": "/data/", + "geos_default_version": "/some/path/to/geos", + "simulation_information_default_path": "/workrd/users", + "n_nodes": 212, + "cores_per_node" : 192, + "mem_per_node": 747 +}, + { + "name": "local", + "host": "127.0.0.1", + "port": 22, + "remote_home_base": "/home", + "simulation_default_filename": "geosDeck.xml", + "simulation_default_path": "/workrd/users", + "simulation_dl_default_path": "/data/", + "geos_default_version": "/some/path/to/geos", + "simulation_information_default_path": "/workrd/users", + "n_nodes": 1, + "cores_per_node" : 8, + "mem_per_node": 32 } +] From 5d88f14239adc95d6b582784a68e9279c6213a9b Mon Sep 17 00:00:00 2001 From: jacques franc Date: Mon, 29 Dec 2025 11:46:02 +0100 Subject: [PATCH 44/70] correct update --- .../src/geos/trame/app/ui/simulation_view.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index a8ced433d..f1ae20812 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -6,6 +6,12 @@ def define_simulation_view( server ) -> None: + @server.state.change( "selected_cluster_name") + def on_cluster_change( selected_cluster_name : str , **_): + print(selected_cluster_name) + server.state.decompositions = SuggestDecomposition( Authentificator.get_cluster(selected_cluster_name) , 12 ).to_list()#discard 12 + + @server.state.change( "simulation_xml_temp" ) def on_temp_change( simulation_xml_temp: list, **_ ): current_list = server.state.simulation_xml_filename @@ -43,9 +49,9 @@ def run_remove_jobfile( index_to_remove: int ) -> None: del current_files[ index_to_remove ] server.state.simulation_xml_filename = current_files - print( f"Fichier à l'index {index_to_remove} supprimé. Nouveaux fichiers: {len(current_files)}" ) + print( f"File at {index_to_remove} deleted. New files: {len(current_files)}" ) else: - print( f"Erreur: Index de suppression invalide ({index_to_remove})." ) + print( f"Erreur: Wrong deletion index ({index_to_remove})." ) with vuetify.VContainer(): with vuetify.VRow(): @@ -76,16 +82,15 @@ def run_remove_jobfile( index_to_remove: int ) -> None: server.state.is_valid_jobfiles = False server.state.simulation_xml_filename = [] server.state.selected_cluster_names = [cluster.name for cluster in Authentificator.sim_constants] - server.state.selected_cluster_name = 'local' + server.state.decompositions = [] - sd = SuggestDecomposition( Authentificator.get_cluster(server.state.selected_cluster_name) , 12 ) #TODO reactive # items = sd.to_list() vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) with vuetify.VCol( cols=1 ): - vuetify.VSelect( label="Cluster", items=( "selected_cluster_names", ), model_value=("selected_cluster_name",)) + vuetify.VSelect( label="Cluster", items=( "selected_cluster_names", ), v_model=("selected_cluster_name", 'local') ) vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) with vuetify.VCol( cols=1 ): - vuetify.VSelect( label="Decomposition", items=( "decomposition", sd.to_list() ) ) + vuetify.VSelect( label="Decomposition", items=( "decompositions", ), v_model=("decomposition", []) ) with vuetify.VRow(): with vuetify.VCol( cols=8 ): From 4fe819ba5ef82eba1b560cd7566c116f4612c9e4 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Mon, 29 Dec 2025 15:42:42 +0100 Subject: [PATCH 45/70] wip --- geos-trame/src/geos/trame/app/io/hpc_tools.py | 30 ++++--- .../src/geos/trame/app/io/simulation.py | 80 +++++++++---------- geos-trame/src/geos/trame/app/io/ssh_tools.py | 11 ++- .../src/geos/trame/app/ui/simulation_view.py | 21 +++-- 4 files changed, 79 insertions(+), 63 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/hpc_tools.py b/geos-trame/src/geos/trame/app/io/hpc_tools.py index 43c172523..debe6637e 100644 --- a/geos-trame/src/geos/trame/app/io/hpc_tools.py +++ b/geos-trame/src/geos/trame/app/io/hpc_tools.py @@ -11,6 +11,7 @@ def __init__( self, selected_cluster, n_unknowns, job_type='cpu' ): self.selected_cluster = selected_cluster self.n_unknowns = n_unknowns self.job_type = job_type + self.sd = [] # @property # def selected_cluster(self): @@ -60,24 +61,33 @@ def compute( n_unknowns, ranks_per_node = min( cores_per_node, ( n_ranks + min_nodes - 1 ) // min_nodes ) n_nodes = ( n_ranks + ranks_per_node - 1 ) // ranks_per_node - return { + return [{ 'nodes': n_nodes, 'ranks_per_node': ranks_per_node, 'total_ranks': n_nodes * ranks_per_node, 'unknowns_per_rank': n_unknowns // ( n_nodes * ranks_per_node ) - } - - def to_list( self ): - + }, + { + 'nodes': n_nodes * 2, + 'ranks_per_node': ranks_per_node // 2, + 'total_ranks': n_nodes * ranks_per_node, + 'unknowns_per_rank': n_unknowns // ( n_nodes * ranks_per_node ) + },] + + def get_sd( self ): + if self.job_type == 'cpu' and self.selected_cluster: #make it an enum - sd = SuggestDecomposition.compute( self.n_unknowns, 64, self.selected_cluster.mem_per_node, + self.sd = SuggestDecomposition.compute( self.n_unknowns, 64, self.selected_cluster.mem_per_node, self.selected_cluster.cores_per_node ) else: - sd = {'nodes': 0, 'ranks_per_node': 0 , 'total_ranks': 0, 'unknowns_per_rank': 0} + self.sd = [{'nodes': 0, 'ranks_per_node': 0 , 'total_ranks': 0, 'unknowns_per_rank': 0 },] # elif job_type == 'gpu': # selected_cluster['n_nodes']*selected_cluster['gpu']['per_node'] + return self.sd + + def to_list( self ): + sd = self.get_sd() return [ - f"{self.selected_cluster.name} : {sd['nodes']} x {sd['ranks_per_node']}", - f"{self.selected_cluster.name}: {sd['nodes'] * 2} x {sd['ranks_per_node'] // 2}" - ] + f"{self.selected_cluster.name} : {sd_item['nodes']} x {sd_item['ranks_per_node']}" for sd_item in sd + ] diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 5de6dae96..9f28367a0 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -16,12 +16,6 @@ import os -#TODO move outside - -# Load template from file -# with open("slurm_job_template.j2") as f: -# template = Template(f.read()) - #TODO from private-assets # template_str = """#!/bin/sh # #SBATCH --job-name="{{ job_name }}" @@ -224,8 +218,8 @@ def run_try_login() -> None: # if server.state.key: Authentificator.ssh_client = Authentificator._create_ssh_client( - SimulationConstant.host, #test - SimulationConstant.port, + Authentificator.get_cluster(server.state.selected_cluster_name).host, #test + Authentificator.get_cluster(server.state.selected_cluster_name).port, server.state.login, key=Authentificator.get_key( server.state.login, server.state.password ) ) @@ -296,12 +290,12 @@ def run_simulation() -> None: server.state.simulation_remote_path ) # sdi = server.state.sd - ci = { 'nodes': 1, 'total_ranks': 2 } + # ci = { 'nodes': 1, 'total_ranks': 2 } run_id : int = Simulation.render_and_run('p4_slurm.jinja','job.slurm', server, job_name=server.state.simulation_job_name, input_file=[ item for item in server.state.simulation_xml_filename if item.get( 'type' ) == 'text/xml'][ 0 ].get( 'name' ), - nodes=ci[ 'nodes' ], - ntasks=ci[ 'total_ranks' ], + nodes=server.state.sd[ 'nodes' ], + ntasks=server.state.sd[ 'total_ranks' ], mem=f"0", comment_gr=server.state.slurm_comment, partition='p4_dev', @@ -310,8 +304,8 @@ def run_simulation() -> None: Simulation.render_and_run('p4_copyback.jinja', 'copyback.slurm', server, job_name=server.state.simulation_job_name, input_file=[ item for item in server.state.simulation_xml_filename if item.get( 'type' ) == 'text/xml' ][ 0 ].get( 'name' ), - nodes=ci[ 'nodes' ], - ntasks=ci[ 'total_ranks' ], + nodes=1, + ntasks=1, mem=f"0", dep_job_id=run_id, comment_gr=server.state.slurm_comment, @@ -428,38 +422,38 @@ def check_jobs( self ): else: return None - @staticmethod - def render_and_run(template_name: str, dest_name: str , server, **kwargs) -> int : - """Render the slurm template and run it. Return it job_id""" - - if server.state.access_granted and server.state.simulation_xml_filename: - template = Environment(load=FileSystemLoader('jinja_t')).get_template(template) - rendered = template.render(kwargs) + @staticmethod + def render_and_run(template_name: str, dest_name: str , server, **kwargs) -> int : + """Render the slurm template and run it. Return it job_id""" - if Authentificator.ssh_client: - #write slurm directly on remote - try: - sftp = Authentificator.ssh_client.open_sftp() - remote_path = Path( server.state.simulation_remote_path ) / Path( dest_name ) - with sftp.file( str( remote_path ), 'w' ) as f: - f.write( rendered ) - - # except FileExistsError: - # print(f"Error: Local file '{remote_path}' not found.") - except PermissionError as e: - print( f"Permission error: {e}" ) - except IOError as e: - print( f"Error accessing remote file or path: {e}" ) - except Exception as e: - print( f"An error occurred during SFTP: {e}" ) - - _, sout, _ = Authentificator._execute_remote_command( - Authentificator.ssh_client, f'cd {server.state.simulation_remote_path} && sbatch {dest_name}' ) - job_lines = sout.strip() - job_id = re.search( r"Submitted batch job (\d+)", job_lines ) - server.state.job_ids.append( { 'job_id': job_id[ 1 ] } ) + if server.state.access_granted and server.state.simulation_xml_filename: + template = Environment(loader=FileSystemLoader(f'{os.getenv("TRAME_DIR")}/app/io/jinja_t')).get_template(template_name) + rendered = template.render(kwargs) - return job_id[1] + if Authentificator.ssh_client: + #write slurm directly on remote + try: + sftp = Authentificator.ssh_client.open_sftp() + remote_path = Path( server.state.simulation_remote_path ) / Path( dest_name ) + with sftp.file( str( remote_path ), 'w' ) as f: + f.write( rendered ) + + # except FileExistsError: + # print(f"Error: Local file '{remote_path}' not found.") + except PermissionError as e: + print( f"Permission error: {e}" ) + except IOError as e: + print( f"Error accessing remote file or path: {e}" ) + except Exception as e: + print( f"An error occurred during SFTP: {e}" ) + + _, sout, _ = Authentificator._execute_remote_command( + Authentificator.ssh_client, f'cd {server.state.simulation_remote_path} && sbatch {dest_name}' ) + job_lines = sout.strip() + job_id = re.search( r"Submitted batch job (\d+)", job_lines ) + server.state.job_ids.append( { 'job_id': job_id[ 1 ] } ) + + return job_id[1] # def start_simulation( self ) -> None: # state = self._server.state diff --git a/geos-trame/src/geos/trame/app/io/ssh_tools.py b/geos-trame/src/geos/trame/app/io/ssh_tools.py index dc627b884..cea6feda7 100644 --- a/geos-trame/src/geos/trame/app/io/ssh_tools.py +++ b/geos-trame/src/geos/trame/app/io/ssh_tools.py @@ -63,6 +63,9 @@ def _sftp_copy_tree( ssh_client, file_tree, remote_root ): @staticmethod def dfs_tree( node, path, sftp, remote_root ): + if path is None or remote_root is None: + return + lp = Path( path ) rp = Path( remote_root ) / lp @@ -74,9 +77,11 @@ def dfs_tree( node, path, sftp, remote_root ): elif isinstance( node, dict ): if "files" in node: # sftp.put( str(lp/Path(file)), str(rp/Path(file)) ) - with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: - f.write( file.get( 'content' ) ) - print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) + files = node['files'] + for file in files: + with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: + f.write( file.get( 'content' ) ) + print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) if "subfolders" in node: for subfolder, content in node[ "subfolders" ].items(): try: diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index f1ae20812..cc9b94cfe 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -6,11 +6,18 @@ def define_simulation_view( server ) -> None: - @server.state.change( "selected_cluster_name") + @server.state.change( "selected_cluster_name" ) def on_cluster_change( selected_cluster_name : str , **_): print(selected_cluster_name) server.state.decompositions = SuggestDecomposition( Authentificator.get_cluster(selected_cluster_name) , 12 ).to_list()#discard 12 + @server.state.change( "decomposition" ) + def on_decomposition_selected( decomposition : str, **_): + ll = SuggestDecomposition( Authentificator.get_cluster(server.state.selected_cluster_name) , 12 ).get_sd() + if server.state.decomposition: + server.state.sd = ll[ server.state.decompositions.index(decomposition) ] + else: + server.state.sd = {'nodes': 0, 'total_ranks': 0} @server.state.change( "simulation_xml_temp" ) def on_temp_change( simulation_xml_temp: list, **_ ): @@ -82,15 +89,15 @@ def run_remove_jobfile( index_to_remove: int ) -> None: server.state.is_valid_jobfiles = False server.state.simulation_xml_filename = [] server.state.selected_cluster_names = [cluster.name for cluster in Authentificator.sim_constants] - server.state.decompositions = [] + # server.state.decompositions = [] + server.state.sd = None - # items = sd.to_list() vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) with vuetify.VCol( cols=1 ): vuetify.VSelect( label="Cluster", items=( "selected_cluster_names", ), v_model=("selected_cluster_name", 'local') ) vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) with vuetify.VCol( cols=1 ): - vuetify.VSelect( label="Decomposition", items=( "decompositions", ), v_model=("decomposition", []) ) + vuetify.VSelect( label="Decomposition", items=( "decompositions", []), v_model=("decomposition",'') ) with vuetify.VRow(): with vuetify.VCol( cols=8 ): @@ -113,7 +120,7 @@ def run_remove_jobfile( index_to_remove: int ) -> None: vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) with vuetify.VCol( cols=1 ): vuetify.VTextField( - v_model=( "slurm_comment", ), + v_model=( "slurm_comment", None ), label="Comment to slurm", dense=True, hide_details=True, @@ -145,7 +152,7 @@ def run_remove_jobfile( index_to_remove: int ) -> None: vuetify.VListItemSubtitle( "{{ file.size ? (file.size / 1024).toFixed(1) + ' KB' : 'URL' }}" ) with vuetify.VRow(), vuetify.VCol(): - vuetify.VTextField( v_model=( "simulation_remote_path", ), + vuetify.VTextField( v_model=( "simulation_remote_path", None ), label="Path where to write files and launch code", prepend_icon="mdi-upload", dense=True, @@ -156,7 +163,7 @@ def run_remove_jobfile( index_to_remove: int ) -> None: ) with vuetify.VRow(), vuetify.VCol(): - vuetify.VTextField( v_model=( "simulation_dl_path", ), + vuetify.VTextField( v_model=( "simulation_dl_path", None ), label="Simulation download path", dense=True, clearable=True, From b85c76a728532d3073dd769047e377af313ac9f7 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Mon, 29 Dec 2025 16:53:38 +0100 Subject: [PATCH 46/70] some fixes --- geos-trame/src/geos/trame/app/io/hpc_tools.py | 2 -- .../trame/app/io/jinja_t/p4_copyback.jinja | 4 ++-- .../geos/trame/app/io/jinja_t/p4_slurm.jinja | 6 ++--- .../src/geos/trame/app/io/simulation.py | 9 +++++--- geos-trame/src/geos/trame/app/io/ssh_tools.py | 8 ++++--- .../src/geos/trame/app/ui/simulation_view.py | 2 ++ geos-trame/src/geos/trame/assets/cluster.json | 22 ++++++++++++------- 7 files changed, 32 insertions(+), 21 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/hpc_tools.py b/geos-trame/src/geos/trame/app/io/hpc_tools.py index debe6637e..246c95604 100644 --- a/geos-trame/src/geos/trame/app/io/hpc_tools.py +++ b/geos-trame/src/geos/trame/app/io/hpc_tools.py @@ -1,8 +1,6 @@ import json import os -from geos.trame.app.io.ssh_tools import SimulationConstant, Authentificator - class SuggestDecomposition: def __init__( self, selected_cluster, n_unknowns, job_type='cpu' ): diff --git a/geos-trame/src/geos/trame/app/io/jinja_t/p4_copyback.jinja b/geos-trame/src/geos/trame/app/io/jinja_t/p4_copyback.jinja index 52cf6dce1..9db17c2b1 100644 --- a/geos-trame/src/geos/trame/app/io/jinja_t/p4_copyback.jinja +++ b/geos-trame/src/geos/trame/app/io/jinja_t/p4_copyback.jinja @@ -1,5 +1,5 @@ #!/bin/sh -#SBATCH --job-name="{{ job_name | geosCopyBack }}" +#SBATCH --job-name="{{ job_name }}" #SBATCH --ntasks={{ ntasks }} #SBATCH --partition={{ partition }} #SBATCH --comment={{ comment_gr }} @@ -11,4 +11,4 @@ #SBATCH --err=job_GEOS_%j.err #SBATCH --dependency=afterok:{{ dep_job_id }} -srun tar cfz {{ dep_job_id }}.tgz Outputs_{{ dep_job_id }}/ && mv -v {{ dep_job_id }}.tgz {{ target_dl_path }} +srun tar cfz {{ dep_job_id }}.tgz Outputs_{{ dep_job_id }}/ log_{{ dep_job_id }}.out && mv -v {{ dep_job_id }}.tgz {{ target_dl_path }} diff --git a/geos-trame/src/geos/trame/app/io/jinja_t/p4_slurm.jinja b/geos-trame/src/geos/trame/app/io/jinja_t/p4_slurm.jinja index 2fd51ff9a..2bc3a36aa 100644 --- a/geos-trame/src/geos/trame/app/io/jinja_t/p4_slurm.jinja +++ b/geos-trame/src/geos/trame/app/io/jinja_t/p4_slurm.jinja @@ -15,13 +15,13 @@ ulimit -c unlimited module purge module use {{ geos_module }} +module load {{ geos_load_list }} export HDF5_USE_FILE_LOCKING=FALSE export OMP_NUM_THREADS=1 -export EXEC={{ geosPath }} - +export EXEC={{ geos_path }} srun --mpi=pmix_v3 --hint=nomultithread \ -n {{ ntasks }} ${EXEC} \ -o Outputs_${SLURM_JOBID} \ - -i {{ input_file | default('geosDeck.xml') }} | tee Outputs_${SLURM_JOBID}/log_${SLURM_JOBID}.out \ No newline at end of file + -i {{ input_file | default('geosDeck.xml') }} | tee log_${SLURM_JOBID}.out \ No newline at end of file diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 9f28367a0..809914afa 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -292,24 +292,27 @@ def run_simulation() -> None: # sdi = server.state.sd # ci = { 'nodes': 1, 'total_ranks': 2 } run_id : int = Simulation.render_and_run('p4_slurm.jinja','job.slurm', server, - job_name=server.state.simulation_job_name, + job_name=server.state.simulation_job_name, input_file=[ item for item in server.state.simulation_xml_filename if item.get( 'type' ) == 'text/xml'][ 0 ].get( 'name' ), nodes=server.state.sd[ 'nodes' ], ntasks=server.state.sd[ 'total_ranks' ], + geos_module=Authentificator.get_cluster(server.state.selected_cluster_name).geos_module, + geos_load_list=" ".join(Authentificator.get_cluster(server.state.selected_cluster_name).geos_load_list), + geos_path=Authentificator.get_cluster(server.state.selected_cluster_name).geos_path, mem=f"0", comment_gr=server.state.slurm_comment, partition='p4_dev', account='myaccount') Simulation.render_and_run('p4_copyback.jinja', 'copyback.slurm', server, - job_name=server.state.simulation_job_name, + job_name=server.state.simulation_job_name, input_file=[ item for item in server.state.simulation_xml_filename if item.get( 'type' ) == 'text/xml' ][ 0 ].get( 'name' ), nodes=1, ntasks=1, mem=f"0", dep_job_id=run_id, comment_gr=server.state.slurm_comment, - partition='p4_transfert', + partition='p4_transfer', account='myaccount' ) self.start_result_streams() diff --git a/geos-trame/src/geos/trame/app/io/ssh_tools.py b/geos-trame/src/geos/trame/app/io/ssh_tools.py index cea6feda7..cff70a420 100644 --- a/geos-trame/src/geos/trame/app/io/ssh_tools.py +++ b/geos-trame/src/geos/trame/app/io/ssh_tools.py @@ -11,11 +11,13 @@ class SimulationConstant: name : str host : str port : int - remote_home_base : str + geos_path : str + geos_module : str + geos_load_list : list + remote_home_base : str # for ssh key simulation_default_filename : str - simulation_default_path : str + simulation_remote_path : str simulation_dl_default_path : str - geos_default_version : str simulation_information_default_path : str n_nodes : int cores_per_node : int diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index cc9b94cfe..ba7aea05c 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -16,6 +16,8 @@ def on_decomposition_selected( decomposition : str, **_): ll = SuggestDecomposition( Authentificator.get_cluster(server.state.selected_cluster_name) , 12 ).get_sd() if server.state.decomposition: server.state.sd = ll[ server.state.decompositions.index(decomposition) ] + server.state.simulation_remote_path = Authentificator.get_cluster(server.state.selected_cluster_name).simulation_remote_path + server.state.simulation_dl_path = Authentificator.get_cluster(server.state.selected_cluster_name).simulation_dl_default_path else: server.state.sd = {'nodes': 0, 'total_ranks': 0} diff --git a/geos-trame/src/geos/trame/assets/cluster.json b/geos-trame/src/geos/trame/assets/cluster.json index 3cc30941e..83e89bdd3 100644 --- a/geos-trame/src/geos/trame/assets/cluster.json +++ b/geos-trame/src/geos/trame/assets/cluster.json @@ -3,11 +3,13 @@ "name": "p4", "host": "p4log01", "port": 22, - "remote_home_base": "/users/", + "geos_path":"/workrd/users/$USER/GEOS/build-spack-generated-debug/bin/geosx", + "geos_module":"/workrd/users/$USER/modulesRHEL88", + "geos_load_list":["geos-daily-rhel88"], + "remote_home_base": "/users/$USER", "simulation_default_filename": "geosDeck.xml", - "simulation_default_path": "/workrd/users", - "simulation_dl_default_path": "/data/", - "geos_default_version": "daily_rhel", + "simulation_remote_path": "/workrd/users/$USER/Example", + "simulation_dl_default_path": "/users/$USER/Example", "simulation_information_default_path": "/workrd/users", "n_nodes": 212, "cores_per_node" : 192, @@ -17,11 +19,13 @@ "name": "pine", "host": "pine-1", "port": 22, + "geos_path":"/workrd/users/$USER/GEOS/build-spack-generated-debug/bin/geosx", + "geos_module":"/workrd/users/$USER/modulesRHEL88", + "geos_load_list":["geos-daily-rhel88"], "remote_home_base": "/users/", "simulation_default_filename": "geosDeck.xml", - "simulation_default_path": "/workrd/users", + "simulation_remote_path": "/workrd/users", "simulation_dl_default_path": "/data/", - "geos_default_version": "/some/path/to/geos", "simulation_information_default_path": "/workrd/users", "n_nodes": 212, "cores_per_node" : 192, @@ -31,11 +35,13 @@ "name": "local", "host": "127.0.0.1", "port": 22, + "geos_path":"/workrd/users/$USER/GEOS/build-spack-generated-debug/bin/geosx", + "geos_module":"/workrd/users/$USER/modulesRHEL88", + "geos_load_list":["geos-daily-rhel88"], "remote_home_base": "/home", "simulation_default_filename": "geosDeck.xml", - "simulation_default_path": "/workrd/users", + "simulation_remote_path": "/workrd/users", "simulation_dl_default_path": "/data/", - "geos_default_version": "/some/path/to/geos", "simulation_information_default_path": "/workrd/users", "n_nodes": 1, "cores_per_node" : 8, From b1ef3dfe036321902828586db62729fdd194ef47 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Tue, 30 Dec 2025 09:08:06 +0100 Subject: [PATCH 47/70] completion bars --- .../trame/app/io/jinja_t/p4_copyback.jinja | 3 ++- .../src/geos/trame/app/io/simulation.py | 24 ++++++++++++------- .../src/geos/trame/app/ui/simulation_view.py | 3 ++- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/jinja_t/p4_copyback.jinja b/geos-trame/src/geos/trame/app/io/jinja_t/p4_copyback.jinja index 9db17c2b1..417ac06e9 100644 --- a/geos-trame/src/geos/trame/app/io/jinja_t/p4_copyback.jinja +++ b/geos-trame/src/geos/trame/app/io/jinja_t/p4_copyback.jinja @@ -11,4 +11,5 @@ #SBATCH --err=job_GEOS_%j.err #SBATCH --dependency=afterok:{{ dep_job_id }} -srun tar cfz {{ dep_job_id }}.tgz Outputs_{{ dep_job_id }}/ log_{{ dep_job_id }}.out && mv -v {{ dep_job_id }}.tgz {{ target_dl_path }} +srun tar cfz {{ dep_job_id }}.tgz Outputs_{{ dep_job_id }}/ log_{{ dep_job_id }}.out +srun mkdir -p {{ target_dl_path }} && mv -v {{ dep_job_id }}.tgz {{ target_dl_path }} diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 809914afa..cac74a712 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -311,6 +311,7 @@ def run_simulation() -> None: ntasks=1, mem=f"0", dep_job_id=run_id, + target_dl_path=server.state.simulation_dl_path, comment_gr=server.state.slurm_comment, partition='p4_transfer', account='myaccount' ) @@ -384,6 +385,7 @@ def check_jobs( self ): job_line = sout.strip().split( "\n" )[ -1 ] jid[ index ][ 'status' ] = job_line.split()[ 2 ] + jid[ index ][ 'name' ] = job_line.split()[ 1 ] # OLD COPY BACK POLICY # if ( jid[ index ][ 'status' ] == 'COMPLETED' ): # # tar and copy back @@ -397,18 +399,22 @@ def check_jobs( self ): # f'{self._server.state.simulation_remote_path}/{job_id}.tgz', # direction='get' ) if ( jid[ index ][ 'status' ] == 'RUNNING' ): + _, sout, _ = Authentificator._execute_remote_command( + Authentificator.ssh_client, f"sacct -j {job_id} -o ElapsedRaw,TimelimitRaw --noheader --parsable2 | head -n 1 " ) + progress_line = sout.strip().split("|") + jid[ index ][ 'slprogress' ] = str( float(progress_line[0])/ float(progress_line[1]) / 60 * 100 ) + # jid[ index ][ 'simprogress' ] = "40" + # getthe completed status pattern = re.compile( r'\((\d+(?:\.\d+)?)%\s*completed\)' ) - with Authentificator.ssh_client.open_sftp().file( - str( - Path( self._server.state.simulation_remote_path ) / - Path( f"job_GEOS_{job_id}.out" ) ), "r" ) as f: - for line in f: - m = pattern.search( line ) - if m: - self._server.state.simulation_progress = str( m.group( 1 ) ) + _, sout, _ = Authentificator._execute_remote_command( + Authentificator.ssh_client, f"grep \"completed\" {self._server.state.simulation_remote_path}/job_GEOS_{job_id}.out | tail -1" ) + m = pattern.search( sout.strip() ) + if m: + jid[ index ][ 'simprogress' ] = str( m.group( 1 ) ) + + - jid[ index ][ 'name' ] = job_line.split()[ 1 ] print( f"{job_line}-{job_id}\n job id:{jid[index]['job_id']}\n status:{jid[index]['status']}\n name:{jid[index]['name']} \n --- \n" ) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index ba7aea05c..46cd0e481 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -208,7 +208,8 @@ def run_remove_jobfile( index_to_remove: int ) -> None: prepend_icon="mdi-minus-circle-outline", click=( kill_job, "[i]" ) ): vuetify.VListItemTitle( "{{ jobs.status }} -- {{ jobs.name }} -- {{ jobs.job_id }}" ) - vuetify.VProgressLinear( v_model=( "simulation_progress", "0" ), ) + vuetify.VProgressLinear( v_model=( "jobs.simprogress", "0" ), ) + vuetify.VProgressLinear( v_model=( "jobs.slprogress", "0" ), ) with vuetify.VRow( v_if="simulation_error" ): html.Div( "An error occurred while running simulation :
{{simulation_error}}", style="color:red;" ) From 1dd6f1b58d63f09c8532ae803925e664e049c666 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Tue, 30 Dec 2025 10:06:52 +0100 Subject: [PATCH 48/70] fast clean up --- geos-trame/src/geos/trame/app/io/hpc_tools.py | 5 - .../src/geos/trame/app/io/simulation.py | 224 +----------------- geos-trame/src/geos/trame/app/io/ssh_tools.py | 1 - 3 files changed, 6 insertions(+), 224 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/hpc_tools.py b/geos-trame/src/geos/trame/app/io/hpc_tools.py index 246c95604..00dc55e60 100644 --- a/geos-trame/src/geos/trame/app/io/hpc_tools.py +++ b/geos-trame/src/geos/trame/app/io/hpc_tools.py @@ -5,16 +5,11 @@ class SuggestDecomposition: def __init__( self, selected_cluster, n_unknowns, job_type='cpu' ): - # return ["P4: 1x22", "P4: 2x11"] self.selected_cluster = selected_cluster self.n_unknowns = n_unknowns self.job_type = job_type self.sd = [] - # @property - # def selected_cluster(self): - # return self.selected_cluster - @staticmethod def compute( n_unknowns, memory_per_unknown_bytes, diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index cac74a712..8fbadf540 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -1,13 +1,10 @@ -from abc import ABC, abstractmethod +from abc import ABC from pathlib import Path -from dataclasses import dataclass, field, fields from enum import Enum, unique, auto -from typing import Callable, Optional, Union -import datetime +from typing import Optional from trame_server.core import Server -from trame_server.state import State -from geos.trame.app.io.ssh_tools import Authentificator, SimulationConstant +from geos.trame.app.io.ssh_tools import Authentificator from geos.trame.app.utils.async_file_watcher import AsyncPeriodicRunner from jinja2 import Environment, FileSystemLoader @@ -15,51 +12,6 @@ import re import os - -#TODO from private-assets -# template_str = """#!/bin/sh -# #SBATCH --job-name="{{ job_name }}" -# #SBATCH --ntasks={{ ntasks }} -# #SBATCH --partition={{ partition }} -# #SBATCH --comment={{ comment_gr }} -# #SBACTH --account={{ account }} -# #SBATCH --nodes={{ nodes }} -# #SBATCH --time={{ time | default('00:10:00') }} -# #SBATCH --mem={{ mem }} -# #SBATCH --output=job_GEOS_%j.out -# #SBATCH --error=job_GEOS_%j.err - -# ulimit -s unlimited -# ulimit -c unlimited - -# module purge - -# export HDF5_USE_FILE_LOCKING=FALSE -# export OMP_NUM_THREADS=1 - -# srun --mpi=pmix_v3 --hint=nomultithread \ -# -n {{ ntasks }} geos \ -# -o Outputs_${SLURM_JOBID} \ -# -i {{ input_file | default('geosDeck.xml') }} | tee Outputs_${SLURM_JOBID}/log_${SLURM_JOBID}.out - -# """ - -# template_cb = """#!/bin/sh -# #SBATCH --job-name="{{ job_name }}" -# #SBATCH --ntasks={{ ntasks }} -# #SBATCH --partition={{ partition }} -# #SBATCH --comment={{ comment_gr }} -# #SBACTH --account={{ account }} -# #SBATCH --nodes={{ nodes }} -# #SBATCH --time={{ time | default('00:10:00') }} -# #SBATCH --mem={{ mem }} -# #SBATCH --output=job_GEOS_%j.out -# #SBATCH --err=job_GEOS_%j.err -# #SBATCH --dependency=afterok:{{ dep_job_id }} - -# srun tar cfz {{ dep_job_id }}.tgz Outputs_{{ dep_job_id }}/ && mv -v {{ dep_job_id }}.tgz {{ target_dl_path }} - -# """ @unique class SimulationStatus( Enum ): SCHEDULED = auto() @@ -86,105 +38,22 @@ def from_string( cls, job_str ) -> "SlurmJobStatus": except ValueError: return cls.UNKNOWN -@dataclass -class LauncherParams: - simulation_files_path: Optional[ str ] = None - simulation_cmd_filename: Optional[ str ] = None - simulation_job_name: Optional[ str ] = None - simulation_nb_process: int = 1 - - @classmethod - def from_server_state( cls, server_state: State ) -> "LauncherParams": - state = cls() - for f in fields( cls ): - setattr( state, f.name, server_state[ f.name ] ) - return state - - def is_complete( self ) -> bool: - return None not in [ getattr( self, f.name ) for f in fields( self ) ] - - def assert_is_complete( self ) -> None: - if not self.is_complete(): - raise RuntimeError( f"Incomplete simulation launch parameters : {self}." ) - - -def get_timestamp() -> str: - return datetime.utcnow().strftime( "%Y-%m-%d_%H-%M-%S.%f" )[ :-3 ] - - -def get_simulation_output_file_name( timestamp: str, user_name: str = "user_name" ): - return f"{user_name}_{timestamp}.json" - - -# def write_simulation_information_to_repo(info: SimulationInformation, sim_info_path: Path) -> Optional[Path]: -# return write_file( -# sim_info_path.as_posix(), -# get_simulation_output_file_name(info.timestamp, info.user_igg), -# json.dumps(info.to_dict()), # type: ignore -# ) - -# def get_simulation_screenshot_timestep(filename: str) -> int: -# """ -# From a given file name returns the time step. -# Filename is defined as: RenderView0_000000.png with 000000 the time step to parse and return -# """ -# if not filename: -# print("Simulation filename is not defined") -# return -1 - -# pattern = re.compile(r"RenderView[0-9]_[0-9]{6}\.png", re.IGNORECASE) -# if pattern.match(filename) is None: -# print("Simulation filename does not match the pattern: RenderView0_000000.png") -# return -1 - -# timestep = os.path.splitext(filename)[0].split("_")[-1] -# return int(timestep) if timestep else -1 - -# def get_most_recent_file_from_list(files_list: list[str]) -> Optional[str]: -# if not files_list: -# return None -# return max(files_list, key=get_simulation_screenshot_timestep) - -# def get_most_recent_simulation_screenshot(folder_path: Path) -> Optional[str]: -# return get_most_recent_file_from_list(os.listdir(folder_path)) if folder_path.exists() else None - - class ISimRunner( ABC ): """ Abstract interface for sim runner. Provides methods to trigger simulation, get simulation output path and knowing if simulation is done or not. """ pass - # @abstractmethod - # def launch_simulation(self, launcher_params: LauncherParams) -> tuple[Path, SimulationInformation]: - # pass - - # @abstractmethod - # def get_user_igg(self) -> str: - # pass - - # @abstractmethod - # def get_running_user_jobs(self) -> list[tuple[str, SlurmJobStatus]]: - # pass class SimRunner( ISimRunner ): """ - Runs sim on HPC. Wrap paramiko use + Runs sim on HPC. """ def __init__( self, user ): super().__init__() - #TODO delete - # early test - self.local_upload_file = "test_upload.txt" - import time - with open( self.local_upload_file, "w" ) as f: - f.write( f"This file was uploaded at {time.ctime()}\n" ) - print( f"Created local file: {self.local_upload_file}" ) - - class Simulation: """ Simulation component. @@ -289,8 +158,6 @@ def run_simulation() -> None: gen_tree( server.state.simulation_xml_filename ), server.state.simulation_remote_path ) - # sdi = server.state.sd - # ci = { 'nodes': 1, 'total_ranks': 2 } run_id : int = Simulation.render_and_run('p4_slurm.jinja','job.slurm', server, job_name=server.state.simulation_job_name, input_file=[ item for item in server.state.simulation_xml_filename if item.get( 'type' ) == 'text/xml'][ 0 ].get( 'name' ), @@ -335,43 +202,12 @@ def set_status_watcher_period_ms( self, period_ms ): if self._job_status_watcher: self._job_status_watcher.set_period_ms( period_ms ) - def _update_job_status( self ) -> None: - sim_info = self.get_last_user_simulation_info() - job_status = sim_info.get_simulation_status( self._sim_runner.get_running_user_jobs ) - sim_path = sim_info.get_simulation_dir( job_status ) - - self._server.controller.set_simulation_status( job_status ) - self._server.controller.set_simulation_time_stamp( sim_info.timestamp ) - - self._update_screenshot_display( sim_info.get_screenshot_path( sim_path ) ) - self._update_plots( sim_info.get_timeseries_path( sim_path ) ) - - # Stop results stream if job is done - if job_status == SimulationStatus.DONE: - self.stop_result_streams() - - # TODO: might be useful for history - # - # def get_last_user_simulation_info(self) -> SimulationInformation: - # last_sim_information = self.get_last_information_path() - # return SimulationInformation.from_file(last_sim_information) - - # def get_last_information_path(self) -> Optional[Path]: - # user_igg = self._sim_runner.get_user_igg() - - # user_files = list(reversed(sorted(self._sim_info_dir.glob(f"{user_igg}*.json")))) - # if not user_files: - # return None - # - # return user_files[0] - def stop_result_streams( self ): if self._job_status_watcher is not None: self._job_status_watcher.stop() def start_result_streams( self ) -> None: self.stop_result_streams() - self._job_status_watcher = AsyncPeriodicRunner( self.check_jobs, period_ms=self._job_status_watcher_period_ms ) def check_jobs( self ): @@ -386,24 +222,12 @@ def check_jobs( self ): jid[ index ][ 'status' ] = job_line.split()[ 2 ] jid[ index ][ 'name' ] = job_line.split()[ 1 ] - # OLD COPY BACK POLICY - # if ( jid[ index ][ 'status' ] == 'COMPLETED' ): - # # tar and copy back - # Authentificator._execute_remote_command( - # Authentificator.ssh_client, - # f'cd {self._server.state.simulation_remote_path} && tar cvfz {job_id}.tgz Outputs_{job_id}/' - # ) - # Authentificator._transfer_file_sftp( - # Authentificator.ssh_client, - # f'{self._server.state.simulation_dl_path}/{job_id}.tgz', - # f'{self._server.state.simulation_remote_path}/{job_id}.tgz', - # direction='get' ) + if ( jid[ index ][ 'status' ] == 'RUNNING' ): _, sout, _ = Authentificator._execute_remote_command( Authentificator.ssh_client, f"sacct -j {job_id} -o ElapsedRaw,TimelimitRaw --noheader --parsable2 | head -n 1 " ) progress_line = sout.strip().split("|") jid[ index ][ 'slprogress' ] = str( float(progress_line[0])/ float(progress_line[1]) / 60 * 100 ) - # jid[ index ][ 'simprogress' ] = "40" # getthe completed status pattern = re.compile( r'\((\d+(?:\.\d+)?)%\s*completed\)' ) @@ -447,8 +271,6 @@ def render_and_run(template_name: str, dest_name: str , server, **kwargs) -> in with sftp.file( str( remote_path ), 'w' ) as f: f.write( rendered ) - # except FileExistsError: - # print(f"Error: Local file '{remote_path}' not found.") except PermissionError as e: print( f"Permission error: {e}" ) except IOError as e: @@ -462,38 +284,4 @@ def render_and_run(template_name: str, dest_name: str , server, **kwargs) -> in job_id = re.search( r"Submitted batch job (\d+)", job_lines ) server.state.job_ids.append( { 'job_id': job_id[ 1 ] } ) - return job_id[1] - - # def start_simulation( self ) -> None: - # state = self._server.state - # script_path = None - # try: - # launcher_params = LauncherParams.from_server_state( self._server.state ) - # launcher_params.assert_is_complete() - - # script_path, sim_info = self._sim_runner.launch_simulation( launcher_params ) - # self._write_sim_info( launcher_params, sim_info ) - # self.start_result_streams() - # state.simulation_error = "" - # except Exception as e: - # print( "Error occurred: ", e ) - # state.simulation_error = str( e ) - # finally: - # state.avoid_rewriting = False - # if isinstance( script_path, Path ) and script_path.is_file(): - # os.remove( script_path ) - - -# def path_to_string( p: Union[ str, Path ] ) -> str: -# return Path( p ).as_posix() - -# def write_file( folder_path: str, filename: str, file_content: str ) -> Optional[ Path ]: -# try: -# Path( folder_path ).mkdir( exist_ok=True ) -# file_path = Path( f"{folder_path}/{filename}" ) -# with open( file_path, "w" ) as f: -# f.write( file_content ) -# return file_path.absolute() -# except Exception as e: -# print( "error occurred when copying file to", folder_path, e ) -# return None + return job_id[1] \ No newline at end of file diff --git a/geos-trame/src/geos/trame/app/io/ssh_tools.py b/geos-trame/src/geos/trame/app/io/ssh_tools.py index cff70a420..268c9ed4f 100644 --- a/geos-trame/src/geos/trame/app/io/ssh_tools.py +++ b/geos-trame/src/geos/trame/app/io/ssh_tools.py @@ -22,7 +22,6 @@ class SimulationConstant: n_nodes : int cores_per_node : int mem_per_node : int - # return ["P4: 1x22", "P4: 2x11"] #If proxyJump are needed # From b02fa3d9fc9ebaa40801a45cfdf6d332dd13c1e8 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Tue, 30 Dec 2025 10:41:59 +0100 Subject: [PATCH 49/70] update --- geos-trame/README.rst | 16 ++++++++++ .../trame/app/io/jinja_t/local_copyback.jinja | 15 +++++++++ .../trame/app/io/jinja_t/local_slurm.jinja | 27 ++++++++++++++++ .../trame/app/io/jinja_t/pine_copyback.jinja | 15 +++++++++ .../trame/app/io/jinja_t/pine_slurm.jinja | 6 ++-- geos-trame/src/geos/trame/assets/cluster.json | 32 +++++++++---------- 6 files changed, 91 insertions(+), 20 deletions(-) create mode 100644 geos-trame/src/geos/trame/app/io/jinja_t/local_copyback.jinja create mode 100644 geos-trame/src/geos/trame/app/io/jinja_t/local_slurm.jinja create mode 100644 geos-trame/src/geos/trame/app/io/jinja_t/pine_copyback.jinja diff --git a/geos-trame/README.rst b/geos-trame/README.rst index a1c54deb2..74ca61d26 100644 --- a/geos-trame/README.rst +++ b/geos-trame/README.rst @@ -23,6 +23,22 @@ Build and install the Vue components npm run build cd - +Associate it with `dotenv` environement file defining path to trame + +.. code-block:: console + + cat .env + TRAME_DIR=/path/to/geosPythonPackages/geos-trame/src/geos/trame + +Then generic launcher templates and configuration are found under + +.. code-block:: console + + ls ${TRAME_DIR}/app/io/jinja_t + ... + ls ${TRAME_DIR}/assets/cluster.json + ... + Install the application .. code-block:: console diff --git a/geos-trame/src/geos/trame/app/io/jinja_t/local_copyback.jinja b/geos-trame/src/geos/trame/app/io/jinja_t/local_copyback.jinja new file mode 100644 index 000000000..8038d5c01 --- /dev/null +++ b/geos-trame/src/geos/trame/app/io/jinja_t/local_copyback.jinja @@ -0,0 +1,15 @@ +#!/bin/sh +#SBATCH --job-name="{{ job_name }}" +#SBATCH --ntasks={{ ntasks }} +#SBATCH --partition={{ partition }} +#SBATCH --comment={{ comment_gr }} +#SBACTH --account={{ account }} +#SBATCH --nodes={{ nodes }} +#SBATCH --time={{ time | default('00:10:00') }} +#SBATCH --mem={{ mem }} +#SBATCH --output=job_GEOS_%j.out +#SBATCH --error=job_GEOS_%j.err +#SBATCH --dependency=afterok:{{ dep_job_id }} + +srun tar cfz {{ dep_job_id }}.tgz Outputs_{{ dep_job_id }}/ log_{{ dep_job_id }}.out +srun mkdir -p {{ target_dl_path }} && mv -v {{ dep_job_id }}.tgz {{ target_dl_path }} \ No newline at end of file diff --git a/geos-trame/src/geos/trame/app/io/jinja_t/local_slurm.jinja b/geos-trame/src/geos/trame/app/io/jinja_t/local_slurm.jinja new file mode 100644 index 000000000..deb4ce7f8 --- /dev/null +++ b/geos-trame/src/geos/trame/app/io/jinja_t/local_slurm.jinja @@ -0,0 +1,27 @@ +#!/bin/sh +#SBATCH --job-name="{{ job_name }}" +#SBATCH --ntasks={{ ntasks }} +#SBATCH --partition={{ partition }} +#SBATCH --comment={{ comment_gr }} +#SBACTH --account={{ account }} +#SBATCH --nodes={{ nodes }} +#SBATCH --time={{ time | default('00:10:00') }} +#SBATCH --mem={{ mem }} +#SBATCH --output=job_GEOS_%j.out +#SBATCH --error=job_GEOS_%j.err + +ulimit -s unlimited +ulimit -c unlimited + +module purge +module use {{ geos_module }} +module load {{ geos_load_list }} + +export HDF5_USE_FILE_LOCKING=FALSE +export OMP_NUM_THREADS=1 +export EXEC={{ geos_path }} + +srun --hint=nomultithread \ + -n {{ ntasks }} ${EXEC} \ + -o Outputs_${SLURM_JOBID} \ + -i {{ input_file | default('geosDeck.xml') }} | tee Outputs_${SLURM_JOBID}/log_${SLURM_JOBID}.out \ No newline at end of file diff --git a/geos-trame/src/geos/trame/app/io/jinja_t/pine_copyback.jinja b/geos-trame/src/geos/trame/app/io/jinja_t/pine_copyback.jinja new file mode 100644 index 000000000..ffdffe09f --- /dev/null +++ b/geos-trame/src/geos/trame/app/io/jinja_t/pine_copyback.jinja @@ -0,0 +1,15 @@ +#!/bin/sh +#SBATCH --job-name="{{ job_name }}" +#SBATCH --ntasks={{ ntasks }} +#SBATCH --partition={{ partition }} +#SBATCH --comment={{ comment_gr }} +#SBACTH --account={{ account }} +#SBATCH --nodes={{ nodes }} +#SBATCH --time={{ time | default('00:10:00') }} +#SBATCH --mem={{ mem }} +#SBATCH --output=job_GEOS_%j.out +#SBATCH --error=job_GEOS_%j.err +#SBATCH --dependency=afterok:{{ dep_job_id }} + +srun tar cfz {{ dep_job_id }}.tgz Outputs_{{ dep_job_id }}/ log_{{ dep_job_id }}.out +srun mkdir -p {{ target_dl_path }} && mv -v {{ dep_job_id }}.tgz {{ target_dl_path }} diff --git a/geos-trame/src/geos/trame/app/io/jinja_t/pine_slurm.jinja b/geos-trame/src/geos/trame/app/io/jinja_t/pine_slurm.jinja index 69092ea22..06f2d5ac7 100644 --- a/geos-trame/src/geos/trame/app/io/jinja_t/pine_slurm.jinja +++ b/geos-trame/src/geos/trame/app/io/jinja_t/pine_slurm.jinja @@ -15,13 +15,11 @@ ulimit -c unlimited module purge module use {{ geos_module }} -module load genesis common proxy slurm -module load gcc/11.4.1 openmpi-gcc/5.0.5 cmake/3.27.9 +module load {{ geos_load_list }} export HDF5_USE_FILE_LOCKING=FALSE export OMP_NUM_THREADS=1 -export EXEC={{ geosPath }} - +export EXEC={{ geos_path }} mpirun -mca coll_hcoll_enable 0 -x UCX_RNDV_THRESH=131072 \ -n {{ ntasks }} ${EXEC} \ diff --git a/geos-trame/src/geos/trame/assets/cluster.json b/geos-trame/src/geos/trame/assets/cluster.json index 83e89bdd3..55e037ddc 100644 --- a/geos-trame/src/geos/trame/assets/cluster.json +++ b/geos-trame/src/geos/trame/assets/cluster.json @@ -10,7 +10,7 @@ "simulation_default_filename": "geosDeck.xml", "simulation_remote_path": "/workrd/users/$USER/Example", "simulation_dl_default_path": "/users/$USER/Example", - "simulation_information_default_path": "/workrd/users", + "simulation_information_default_path": "/users/$USER/.trame-logs", "n_nodes": 212, "cores_per_node" : 192, "mem_per_node": 747 @@ -20,29 +20,29 @@ "host": "pine-1", "port": 22, "geos_path":"/workrd/users/$USER/GEOS/build-spack-generated-debug/bin/geosx", - "geos_module":"/workrd/users/$USER/modulesRHEL88", - "geos_load_list":["geos-daily-rhel88"], - "remote_home_base": "/users/", + "geos_module":"/apps/modules/modulefiles3", + "geos_load_list":["genesis","common","proxy","slurm","gcc/11.4.1","openmpi-gcc/5.0.5","cmake/3.27.9"], + "remote_home_base": "/home/$USER", "simulation_default_filename": "geosDeck.xml", - "simulation_remote_path": "/workrd/users", - "simulation_dl_default_path": "/data/", - "simulation_information_default_path": "/workrd/users", - "n_nodes": 212, - "cores_per_node" : 192, - "mem_per_node": 747 + "simulation_remote_path": "/shared/data1/Users/$USER/Example", + "simulation_dl_default_path": "/shared/data1/Users/$USER/Example", + "simulation_information_default_path": "/home/$USER/.trame-logs", + "n_nodes": 48, + "cores_per_node" : 64, + "mem_per_node": 768 }, { "name": "local", "host": "127.0.0.1", "port": 22, - "geos_path":"/workrd/users/$USER/GEOS/build-spack-generated-debug/bin/geosx", - "geos_module":"/workrd/users/$USER/modulesRHEL88", - "geos_load_list":["geos-daily-rhel88"], - "remote_home_base": "/home", + "geos_path":"/opt/GEOS/build-spack-generated-debug/bin/geosx", + "geos_module":"/workrd/users/$USER/geos-generated", + "geos_load_list":["geos-toolchains"], + "remote_home_base": "/home/$USER", "simulation_default_filename": "geosDeck.xml", - "simulation_remote_path": "/workrd/users", + "simulation_remote_path": "/work/", "simulation_dl_default_path": "/data/", - "simulation_information_default_path": "/workrd/users", + "simulation_information_default_path": "/home/.trame-logs", "n_nodes": 1, "cores_per_node" : 8, "mem_per_node": 32 From a3ae2fc16da23dbfafc6459a000ee360b867f684 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Tue, 30 Dec 2025 10:56:25 +0100 Subject: [PATCH 50/70] Headers and some typing --- geos-trame/src/geos/trame/app/io/hpc_tools.py | 78 +++++++----- .../src/geos/trame/app/io/simulation.py | 116 +++++++++++------- geos-trame/src/geos/trame/app/io/ssh_tools.py | 63 ++++++---- geos-trame/src/geos/trame/app/main.py | 5 +- .../src/geos/trame/app/ui/simulation_view.py | 36 ++++-- .../trame/app/utils/async_file_watcher.py | 6 +- 6 files changed, 181 insertions(+), 123 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/hpc_tools.py b/geos-trame/src/geos/trame/app/io/hpc_tools.py index 00dc55e60..35dc1de8f 100644 --- a/geos-trame/src/geos/trame/app/io/hpc_tools.py +++ b/geos-trame/src/geos/trame/app/io/hpc_tools.py @@ -1,22 +1,23 @@ -import json -import os +from geos.trame.app.io.ssh_tools import SimulationConstant + class SuggestDecomposition: - def __init__( self, selected_cluster, n_unknowns, job_type='cpu' ): + def __init__( self, selected_cluster: SimulationConstant, n_unknowns: int, job_type: str = 'cpu' ) -> None: + """Initialize the decomposition hinter for HPC.""" - self.selected_cluster = selected_cluster - self.n_unknowns = n_unknowns - self.job_type = job_type - self.sd = [] + self.selected_cluster: SimulationConstant = selected_cluster + self.n_unknowns: int = n_unknowns + self.job_type: str = job_type #TODO should be an enum + self.sd: list[ dict ] = [] @staticmethod - def compute( n_unknowns, - memory_per_unknown_bytes, - node_memory_gb, - cores_per_node, - min_unknowns_per_rank=10000, - strong_scaling=True ): + def compute( n_unknowns: int, + memory_per_unknown_bytes: int, + node_memory_gb: int, + cores_per_node: int, + min_unknowns_per_rank: int = 10000, + strong_scaling: bool = True ): """ Suggests node/rank distribution for a cluster computation. @@ -54,33 +55,42 @@ def compute( n_unknowns, ranks_per_node = min( cores_per_node, ( n_ranks + min_nodes - 1 ) // min_nodes ) n_nodes = ( n_ranks + ranks_per_node - 1 ) // ranks_per_node - return [{ - 'nodes': n_nodes, - 'ranks_per_node': ranks_per_node, - 'total_ranks': n_nodes * ranks_per_node, - 'unknowns_per_rank': n_unknowns // ( n_nodes * ranks_per_node ) - }, - { - 'nodes': n_nodes * 2, - 'ranks_per_node': ranks_per_node // 2, - 'total_ranks': n_nodes * ranks_per_node, - 'unknowns_per_rank': n_unknowns // ( n_nodes * ranks_per_node ) - },] - - def get_sd( self ): - - if self.job_type == 'cpu' and self.selected_cluster: #make it an enum + return [ + { + 'nodes': n_nodes, + 'ranks_per_node': ranks_per_node, + 'total_ranks': n_nodes * ranks_per_node, + 'unknowns_per_rank': n_unknowns // ( n_nodes * ranks_per_node ) + }, + { + 'nodes': n_nodes * 2, + 'ranks_per_node': ranks_per_node // 2, + 'total_ranks': n_nodes * ranks_per_node, + 'unknowns_per_rank': n_unknowns // ( n_nodes * ranks_per_node ) + }, + ] + + def get_sd( self ): + """Get the suggested decomposition popoulated.""" + + if self.job_type == 'cpu' and self.selected_cluster: #make it an enum self.sd = SuggestDecomposition.compute( self.n_unknowns, 64, self.selected_cluster.mem_per_node, - self.selected_cluster.cores_per_node ) + self.selected_cluster.cores_per_node ) else: - self.sd = [{'nodes': 0, 'ranks_per_node': 0 , 'total_ranks': 0, 'unknowns_per_rank': 0 },] + self.sd = [ + { + 'nodes': 0, + 'ranks_per_node': 0, + 'total_ranks': 0, + 'unknowns_per_rank': 0 + }, + ] # elif job_type == 'gpu': # selected_cluster['n_nodes']*selected_cluster['gpu']['per_node'] return self.sd def to_list( self ): + """Pretty printer to list of string for display in UI.""" sd = self.get_sd() - return [ - f"{self.selected_cluster.name} : {sd_item['nodes']} x {sd_item['ranks_per_node']}" for sd_item in sd - ] + return [ f"{self.selected_cluster.name} : {sd_item['nodes']} x {sd_item['ranks_per_node']}" for sd_item in sd ] diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 8fbadf540..cc5e22b0f 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -1,10 +1,14 @@ -from abc import ABC +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright 2023-2024 TotalEnergies. +# SPDX-FileContributor: Jacques Franc + +from abc import ABC from pathlib import Path from enum import Enum, unique, auto -from typing import Optional +from typing import Optional from trame_server.core import Server -from geos.trame.app.io.ssh_tools import Authentificator +from geos.trame.app.io.ssh_tools import Authentificator from geos.trame.app.utils.async_file_watcher import AsyncPeriodicRunner from jinja2 import Environment, FileSystemLoader @@ -12,6 +16,7 @@ import re import os + @unique class SimulationStatus( Enum ): SCHEDULED = auto() @@ -22,6 +27,7 @@ class SimulationStatus( Enum ): NOT_RUN = auto() UNKNOWN = auto() + @unique class SlurmJobStatus( Enum ): PENDING = "PEND" @@ -38,6 +44,7 @@ def from_string( cls, job_str ) -> "SlurmJobStatus": except ValueError: return cls.UNKNOWN + class ISimRunner( ABC ): """ Abstract interface for sim runner. @@ -54,6 +61,7 @@ class SimRunner( ISimRunner ): def __init__( self, user ): super().__init__() + class Simulation: """ Simulation component. @@ -67,7 +75,7 @@ def __init__( self, sim_runner: ISimRunner, server: Server, sim_info_dir: Option self._server = server controller = server.controller self._sim_runner = sim_runner - self._sim_info_dir = sim_info_dir + self._sim_info_dir = sim_info_dir server.state.job_ids = [] server.state.selected_cluster = None @@ -87,8 +95,8 @@ def run_try_login() -> None: # if server.state.key: Authentificator.ssh_client = Authentificator._create_ssh_client( - Authentificator.get_cluster(server.state.selected_cluster_name).host, #test - Authentificator.get_cluster(server.state.selected_cluster_name).port, + Authentificator.get_cluster( server.state.selected_cluster_name ).host, #test + Authentificator.get_cluster( server.state.selected_cluster_name ).port, server.state.login, key=Authentificator.get_key( server.state.login, server.state.password ) ) @@ -121,7 +129,7 @@ def gen_tree( xml_filename ): #assume the first XML is the main xml xml_expected_file_matches = re.findall( pattern_file, xml_matches[ 0 ][ 'content' ].decode( "utf-8" ) ) - + #TODO all the needed files test_assert = { item.get( "name" ) for item in xml_filename }.intersection( set( xml_expected_file_matches ) ) @@ -145,43 +153,52 @@ def gen_tree( xml_filename ): } return FILE_TREE - - @controller.trigger( "run_simulation" ) def run_simulation() -> None: # if server.state.access_granted and server.state.sd and server.state.simulation_xml_filename: - if server.state.access_granted and server.state.simulation_xml_filename: + if server.state.access_granted and server.state.simulation_xml_filename: if Authentificator.ssh_client: - + Authentificator._sftp_copy_tree( Authentificator.ssh_client, gen_tree( server.state.simulation_xml_filename ), server.state.simulation_remote_path ) - - run_id : int = Simulation.render_and_run('p4_slurm.jinja','job.slurm', server, - job_name=server.state.simulation_job_name, - input_file=[ item for item in server.state.simulation_xml_filename if item.get( 'type' ) == 'text/xml'][ 0 ].get( 'name' ), - nodes=server.state.sd[ 'nodes' ], - ntasks=server.state.sd[ 'total_ranks' ], - geos_module=Authentificator.get_cluster(server.state.selected_cluster_name).geos_module, - geos_load_list=" ".join(Authentificator.get_cluster(server.state.selected_cluster_name).geos_load_list), - geos_path=Authentificator.get_cluster(server.state.selected_cluster_name).geos_path, - mem=f"0", - comment_gr=server.state.slurm_comment, - partition='p4_dev', - account='myaccount') - - Simulation.render_and_run('p4_copyback.jinja', 'copyback.slurm', server, - job_name=server.state.simulation_job_name, - input_file=[ item for item in server.state.simulation_xml_filename if item.get( 'type' ) == 'text/xml' ][ 0 ].get( 'name' ), - nodes=1, - ntasks=1, - mem=f"0", - dep_job_id=run_id, - target_dl_path=server.state.simulation_dl_path, - comment_gr=server.state.slurm_comment, - partition='p4_transfer', - account='myaccount' ) + + run_id: int = Simulation.render_and_run( + 'p4_slurm.jinja', + 'job.slurm', + server, + job_name=server.state.simulation_job_name, + input_file=[ + item for item in server.state.simulation_xml_filename if item.get( 'type' ) == 'text/xml' + ][ 0 ].get( 'name' ), + nodes=server.state.sd[ 'nodes' ], + ntasks=server.state.sd[ 'total_ranks' ], + geos_module=Authentificator.get_cluster( server.state.selected_cluster_name ).geos_module, + geos_load_list=" ".join( + Authentificator.get_cluster( server.state.selected_cluster_name ).geos_load_list ), + geos_path=Authentificator.get_cluster( server.state.selected_cluster_name ).geos_path, + mem=f"0", + comment_gr=server.state.slurm_comment, + partition='p4_dev', + account='myaccount' ) + + Simulation.render_and_run( 'p4_copyback.jinja', + 'copyback.slurm', + server, + job_name=server.state.simulation_job_name, + input_file=[ + item for item in server.state.simulation_xml_filename + if item.get( 'type' ) == 'text/xml' + ][ 0 ].get( 'name' ), + nodes=1, + ntasks=1, + mem=f"0", + dep_job_id=run_id, + target_dl_path=server.state.simulation_dl_path, + comment_gr=server.state.slurm_comment, + partition='p4_transfer', + account='myaccount' ) self.start_result_streams() @@ -210,7 +227,7 @@ def start_result_streams( self ) -> None: self.stop_result_streams() self._job_status_watcher = AsyncPeriodicRunner( self.check_jobs, period_ms=self._job_status_watcher_period_ms ) - def check_jobs( self ): + def check_jobs( self ) -> None: if Authentificator.ssh_client: try: jid = self._server.state.job_ids @@ -225,20 +242,22 @@ def check_jobs( self ): if ( jid[ index ][ 'status' ] == 'RUNNING' ): _, sout, _ = Authentificator._execute_remote_command( - Authentificator.ssh_client, f"sacct -j {job_id} -o ElapsedRaw,TimelimitRaw --noheader --parsable2 | head -n 1 " ) - progress_line = sout.strip().split("|") - jid[ index ][ 'slprogress' ] = str( float(progress_line[0])/ float(progress_line[1]) / 60 * 100 ) + Authentificator.ssh_client, + f"sacct -j {job_id} -o ElapsedRaw,TimelimitRaw --noheader --parsable2 | head -n 1 " ) + progress_line = sout.strip().split( "|" ) + jid[ index ][ 'slprogress' ] = str( + float( progress_line[ 0 ] ) / float( progress_line[ 1 ] ) / 60 * 100 ) # getthe completed status pattern = re.compile( r'\((\d+(?:\.\d+)?)%\s*completed\)' ) _, sout, _ = Authentificator._execute_remote_command( - Authentificator.ssh_client, f"grep \"completed\" {self._server.state.simulation_remote_path}/job_GEOS_{job_id}.out | tail -1" ) + Authentificator.ssh_client, + f"grep \"completed\" {self._server.state.simulation_remote_path}/job_GEOS_{job_id}.out | tail -1" + ) m = pattern.search( sout.strip() ) if m: jid[ index ][ 'simprogress' ] = str( m.group( 1 ) ) - - print( f"{job_line}-{job_id}\n job id:{jid[index]['job_id']}\n status:{jid[index]['status']}\n name:{jid[index]['name']} \n --- \n" ) @@ -252,16 +271,19 @@ def check_jobs( self ): print( f"Error accessing remote file or path: {e}" ) except Exception as e: print( f"An error occurred during SFTP: {e}" ) + + return None else: return None @staticmethod - def render_and_run(template_name: str, dest_name: str , server, **kwargs) -> int : + def render_and_run( template_name: str, dest_name: str, server, **kwargs ) -> int: """Render the slurm template and run it. Return it job_id""" if server.state.access_granted and server.state.simulation_xml_filename: - template = Environment(loader=FileSystemLoader(f'{os.getenv("TRAME_DIR")}/app/io/jinja_t')).get_template(template_name) - rendered = template.render(kwargs) + template = Environment( + loader=FileSystemLoader( f'{os.getenv("TRAME_DIR")}/app/io/jinja_t' ) ).get_template( template_name ) + rendered = template.render( kwargs ) if Authentificator.ssh_client: #write slurm directly on remote @@ -284,4 +306,4 @@ def render_and_run(template_name: str, dest_name: str , server, **kwargs) -> in job_id = re.search( r"Submitted batch job (\d+)", job_lines ) server.state.job_ids.append( { 'job_id': job_id[ 1 ] } ) - return job_id[1] \ No newline at end of file + return job_id[ 1 ] diff --git a/geos-trame/src/geos/trame/app/io/ssh_tools.py b/geos-trame/src/geos/trame/app/io/ssh_tools.py index 268c9ed4f..78e57fd20 100644 --- a/geos-trame/src/geos/trame/app/io/ssh_tools.py +++ b/geos-trame/src/geos/trame/app/io/ssh_tools.py @@ -1,27 +1,32 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright 2023-2024 TotalEnergies. +# SPDX-FileContributor: Jacques Franc + from typing import Optional from pathlib import Path import paramiko import os import json - -# replace by conf-file json from dataclasses import dataclass + + @dataclass class SimulationConstant: - name : str - host : str - port : int - geos_path : str - geos_module : str - geos_load_list : list - remote_home_base : str # for ssh key - simulation_default_filename : str - simulation_remote_path : str - simulation_dl_default_path : str - simulation_information_default_path : str - n_nodes : int - cores_per_node : int - mem_per_node : int + name: str + host: str + port: int + geos_path: str + geos_module: str + geos_load_list: list + remote_home_base: str # for ssh key + simulation_default_filename: str + simulation_remote_path: str + simulation_dl_default_path: str + simulation_information_default_path: str + n_nodes: int + cores_per_node: int + mem_per_node: int + #If proxyJump are needed # @@ -41,16 +46,20 @@ class SimulationConstant: # sock=sock, # <— tunnel created by ProxyCommand # ) + class Authentificator: #namespacing more than anything else ssh_client: Optional[ paramiko.SSHClient ] = None - sim_constants = [ SimulationConstant(**item) for item in json.load(open( f'{os.getenv("TRAME_DIR")}/assets/cluster.json', 'r' )) ] + sim_constants = [ + SimulationConstant( **item ) + for item in json.load( open( f'{os.getenv("TRAME_DIR")}/assets/cluster.json', 'r' ) ) + ] @staticmethod - def get_cluster( name : str ): - match = next(( item for item in Authentificator.sim_constants if item.name == name ), None) - return match + def get_cluster( name: str ): + match = next( ( item for item in Authentificator.sim_constants if item.name == name ), None ) + return match @staticmethod def _sftp_copy_tree( ssh_client, file_tree, remote_root ): @@ -77,12 +86,12 @@ def dfs_tree( node, path, sftp, remote_root ): print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) elif isinstance( node, dict ): if "files" in node: - # sftp.put( str(lp/Path(file)), str(rp/Path(file)) ) - files = node['files'] - for file in files: - with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: - f.write( file.get( 'content' ) ) - print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) + # sftp.put( str(lp/Path(file)), str(rp/Path(file)) ) + files = node[ 'files' ] + for file in files: + with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: + f.write( file.get( 'content' ) ) + print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) if "subfolders" in node: for subfolder, content in node[ "subfolders" ].items(): try: @@ -253,4 +262,4 @@ def _transfer_file_sftp( client, local_path, remote_path, direction="put" ): return False except Exception as e: print( f"An error occurred during SFTP: {e}" ) - return False \ No newline at end of file + return False diff --git a/geos-trame/src/geos/trame/app/main.py b/geos-trame/src/geos/trame/app/main.py index 4e2c2f5c8..7f959328d 100644 --- a/geos-trame/src/geos/trame/app/main.py +++ b/geos-trame/src/geos/trame/app/main.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright 2023-2024 TotalEnergies. -# SPDX-FileContributor: Lionel Untereiner +# SPDX-FileContributor: Lionel Untereiner, Jacques Franc from pathlib import Path from typing import Any from dotenv import load_dotenv @@ -12,9 +12,10 @@ sys.path.insert( 0, "/data/pau901/SIM_CS/users/jfranc/geosPythonPackages/geos-trame/src" ) -assert load_dotenv(dotenv_path=Path(__file__).parent.parent / ".env") +assert load_dotenv( dotenv_path=Path( __file__ ).parent.parent / ".env" ) from geos.trame.app.core import GeosTrame + def main( server: Server = None, **kwargs: Any ) -> None: """Main function.""" # Get or create server diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 46cd0e481..fa9efa5ba 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -1,25 +1,33 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright 2023-2024 TotalEnergies. +# SPDX-FileContributor: Jacques Franc + from trame.widgets import html from trame.widgets import vuetify3 as vuetify from geos.trame.app.io.simulation import Authentificator from geos.trame.app.io.hpc_tools import SuggestDecomposition + def define_simulation_view( server ) -> None: @server.state.change( "selected_cluster_name" ) - def on_cluster_change( selected_cluster_name : str , **_): - print(selected_cluster_name) - server.state.decompositions = SuggestDecomposition( Authentificator.get_cluster(selected_cluster_name) , 12 ).to_list()#discard 12 + def on_cluster_change( selected_cluster_name: str, **_ ): + print( selected_cluster_name ) + server.state.decompositions = SuggestDecomposition( Authentificator.get_cluster( selected_cluster_name ), + 12 ).to_list() #discard 12 @server.state.change( "decomposition" ) - def on_decomposition_selected( decomposition : str, **_): - ll = SuggestDecomposition( Authentificator.get_cluster(server.state.selected_cluster_name) , 12 ).get_sd() + def on_decomposition_selected( decomposition: str, **_ ): + ll = SuggestDecomposition( Authentificator.get_cluster( server.state.selected_cluster_name ), 12 ).get_sd() if server.state.decomposition: - server.state.sd = ll[ server.state.decompositions.index(decomposition) ] - server.state.simulation_remote_path = Authentificator.get_cluster(server.state.selected_cluster_name).simulation_remote_path - server.state.simulation_dl_path = Authentificator.get_cluster(server.state.selected_cluster_name).simulation_dl_default_path + server.state.sd = ll[ server.state.decompositions.index( decomposition ) ] + server.state.simulation_remote_path = Authentificator.get_cluster( + server.state.selected_cluster_name ).simulation_remote_path + server.state.simulation_dl_path = Authentificator.get_cluster( + server.state.selected_cluster_name ).simulation_dl_default_path else: - server.state.sd = {'nodes': 0, 'total_ranks': 0} + server.state.sd = { 'nodes': 0, 'total_ranks': 0 } @server.state.change( "simulation_xml_temp" ) def on_temp_change( simulation_xml_temp: list, **_ ): @@ -90,16 +98,20 @@ def run_remove_jobfile( index_to_remove: int ) -> None: server.state.access_granted = False server.state.is_valid_jobfiles = False server.state.simulation_xml_filename = [] - server.state.selected_cluster_names = [cluster.name for cluster in Authentificator.sim_constants] + server.state.selected_cluster_names = [ cluster.name for cluster in Authentificator.sim_constants ] # server.state.decompositions = [] server.state.sd = None vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) with vuetify.VCol( cols=1 ): - vuetify.VSelect( label="Cluster", items=( "selected_cluster_names", ), v_model=("selected_cluster_name", 'local') ) + vuetify.VSelect( label="Cluster", + items=( "selected_cluster_names", ), + v_model=( "selected_cluster_name", 'local' ) ) vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) with vuetify.VCol( cols=1 ): - vuetify.VSelect( label="Decomposition", items=( "decompositions", []), v_model=("decomposition",'') ) + vuetify.VSelect( label="Decomposition", + items=( "decompositions", [] ), + v_model=( "decomposition", '' ) ) with vuetify.VRow(): with vuetify.VCol( cols=8 ): diff --git a/geos-trame/src/geos/trame/app/utils/async_file_watcher.py b/geos-trame/src/geos/trame/app/utils/async_file_watcher.py index 17b3df3aa..10a8a4cc9 100644 --- a/geos-trame/src/geos/trame/app/utils/async_file_watcher.py +++ b/geos-trame/src/geos/trame/app/utils/async_file_watcher.py @@ -1,3 +1,7 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright 2023-2024 TotalEnergies. +# SPDX-FileContributor: Jacques Franc + import asyncio import os from asyncio import CancelledError, ensure_future @@ -34,7 +38,7 @@ def stop( self ): if not self.task: return - ensure_future( self._wait_for_cancel() ) + ensure_future( self._wait_for_cancel() ) #ignore async def _wait_for_cancel( self ): """ From 81081517120f79e04db03e7e0283c9ecee6c66d2 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Tue, 30 Dec 2025 11:17:43 +0100 Subject: [PATCH 51/70] clean up and typing --- .github/workflows/typing-check.yml | 2 +- geos-trame/src/geos/trame/app/io/hpc_tools.py | 16 ++-- .../src/geos/trame/app/io/simulation.py | 27 +++--- geos-trame/src/geos/trame/app/io/ssh_tools.py | 28 +++--- .../src/geos/trame/app/ui/simulation_view.py | 49 +++++----- geos-trame/src/geos/trame/app/ui/timeline.py | 29 ------ .../trame/app/utils/async_file_watcher.py | 92 ++++--------------- 7 files changed, 74 insertions(+), 169 deletions(-) diff --git a/.github/workflows/typing-check.yml b/.github/workflows/typing-check.yml index 0a00276dc..952402041 100644 --- a/.github/workflows/typing-check.yml +++ b/.github/workflows/typing-check.yml @@ -30,7 +30,7 @@ jobs: # working-directory: ./${{ matrix.package-name }} run: | python -m pip install --upgrade pip - python -m pip install mypy ruff types-PyYAML + python -m pip install mypy ruff types-PyYAML types-paramiko - name: Typing check with mypy # working-directory: ./${{ matrix.package-name }} diff --git a/geos-trame/src/geos/trame/app/io/hpc_tools.py b/geos-trame/src/geos/trame/app/io/hpc_tools.py index 35dc1de8f..d8528c9a8 100644 --- a/geos-trame/src/geos/trame/app/io/hpc_tools.py +++ b/geos-trame/src/geos/trame/app/io/hpc_tools.py @@ -5,7 +5,6 @@ class SuggestDecomposition: def __init__( self, selected_cluster: SimulationConstant, n_unknowns: int, job_type: str = 'cpu' ) -> None: """Initialize the decomposition hinter for HPC.""" - self.selected_cluster: SimulationConstant = selected_cluster self.n_unknowns: int = n_unknowns self.job_type: str = job_type #TODO should be an enum @@ -17,10 +16,9 @@ def compute( n_unknowns: int, node_memory_gb: int, cores_per_node: int, min_unknowns_per_rank: int = 10000, - strong_scaling: bool = True ): - """ - Suggests node/rank distribution for a cluster computation. - + strong_scaling: bool = True ) -> list[ dict ]: + """Suggests node/rank distribution for a cluster computation. + Parameters: - n_unknowns: total number of unknowns - memory_per_unknown_bytes: estimated memory per unknown @@ -28,7 +26,7 @@ def compute( n_unknowns: int, - cores_per_node: cores available per node - min_unknowns_per_rank: minimum for efficiency - strong_scaling: True if problem size is fixed - + Note: - 10,000-100,000 unknowns per rank is often a sweet spot for many PDE solvers - Use power-of-2 decompositions when possible (helps with communication patterns) @@ -36,7 +34,6 @@ def compute( n_unknowns: int, - Don't oversubscribe: avoid using more ranks than provide parallel efficiency """ - # Memory constraint node_memory_bytes = node_memory_gb * 1e9 max_unknowns_per_node = int( 0.8 * node_memory_bytes / memory_per_unknown_bytes ) @@ -70,9 +67,8 @@ def compute( n_unknowns: int, }, ] - def get_sd( self ): + def get_sd( self ) -> list[ dict ]: """Get the suggested decomposition popoulated.""" - if self.job_type == 'cpu' and self.selected_cluster: #make it an enum self.sd = SuggestDecomposition.compute( self.n_unknowns, 64, self.selected_cluster.mem_per_node, self.selected_cluster.cores_per_node ) @@ -90,7 +86,7 @@ def get_sd( self ): return self.sd - def to_list( self ): + def to_list( self ) -> list[ str ]: """Pretty printer to list of string for display in UI.""" sd = self.get_sd() return [ f"{self.selected_cluster.name} : {sd_item['nodes']} x {sd_item['ranks_per_node']}" for sd_item in sd ] diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index cc5e22b0f..4f32fee6c 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -46,25 +46,21 @@ def from_string( cls, job_str ) -> "SlurmJobStatus": class ISimRunner( ABC ): - """ - Abstract interface for sim runner. + """Abstract interface for sim runner. Provides methods to trigger simulation, get simulation output path and knowing if simulation is done or not. """ pass class SimRunner( ISimRunner ): - """ - Runs sim on HPC. - """ + """Runs sim on HPC.""" - def __init__( self, user ): + def __init__( self, user ) -> None: super().__init__() class Simulation: - """ - Simulation component. + """Simulation component. Fills the UI with the screenshot as read from the simulation outputs folder and a graph with the time series from the simulation. Requires a simulation runner providing information on the output path of the simulation to monitor and ways to @@ -95,7 +91,7 @@ def run_try_login() -> None: # if server.state.key: Authentificator.ssh_client = Authentificator._create_ssh_client( - Authentificator.get_cluster( server.state.selected_cluster_name ).host, #test + Authentificator.get_cluster( server.state.selected_cluster_name ).host, #test Authentificator.get_cluster( server.state.selected_cluster_name ).port, server.state.login, key=Authentificator.get_key( server.state.login, server.state.password ) ) @@ -178,7 +174,7 @@ def run_simulation() -> None: geos_load_list=" ".join( Authentificator.get_cluster( server.state.selected_cluster_name ).geos_load_list ), geos_path=Authentificator.get_cluster( server.state.selected_cluster_name ).geos_path, - mem=f"0", + mem="0", comment_gr=server.state.slurm_comment, partition='p4_dev', account='myaccount' ) @@ -193,7 +189,7 @@ def run_simulation() -> None: ][ 0 ].get( 'name' ), nodes=1, ntasks=1, - mem=f"0", + mem="0", dep_job_id=run_id, target_dl_path=server.state.simulation_dl_path, comment_gr=server.state.slurm_comment, @@ -211,15 +207,15 @@ def kill_all_simulations() -> None: for jobs in server.state.job_ids: Authentificator.kill_job( jobs[ 'job_id' ] ) - def __del__( self ): + def __del__( self ) -> None: self.stop_result_streams() - def set_status_watcher_period_ms( self, period_ms ): + def set_status_watcher_period_ms( self, period_ms ) -> None: self._job_status_watcher_period_ms = period_ms if self._job_status_watcher: self._job_status_watcher.set_period_ms( period_ms ) - def stop_result_streams( self ): + def stop_result_streams( self ) -> None: if self._job_status_watcher is not None: self._job_status_watcher.stop() @@ -278,8 +274,7 @@ def check_jobs( self ) -> None: @staticmethod def render_and_run( template_name: str, dest_name: str, server, **kwargs ) -> int: - """Render the slurm template and run it. Return it job_id""" - + """Render the slurm template and run it. Return it job_id.""" if server.state.access_granted and server.state.simulation_xml_filename: template = Environment( loader=FileSystemLoader( f'{os.getenv("TRAME_DIR")}/app/io/jinja_t' ) ).get_template( template_name ) diff --git a/geos-trame/src/geos/trame/app/io/ssh_tools.py b/geos-trame/src/geos/trame/app/io/ssh_tools.py index 78e57fd20..4392b5774 100644 --- a/geos-trame/src/geos/trame/app/io/ssh_tools.py +++ b/geos-trame/src/geos/trame/app/io/ssh_tools.py @@ -47,7 +47,7 @@ class SimulationConstant: # ) -class Authentificator: #namespacing more than anything else +class Authentificator: ssh_client: Optional[ paramiko.SSHClient ] = None @@ -62,7 +62,7 @@ def get_cluster( name: str ): return match @staticmethod - def _sftp_copy_tree( ssh_client, file_tree, remote_root ): + def _sftp_copy_tree( ssh_client, file_tree, remote_root ) -> None: # Connect to remote server sftp = ssh_client.open_sftp() @@ -71,7 +71,7 @@ def _sftp_copy_tree( ssh_client, file_tree, remote_root ): sftp.close() @staticmethod - def dfs_tree( node, path, sftp, remote_root ): + def dfs_tree( node, path, sftp, remote_root ) -> None: if path is None or remote_root is None: return @@ -111,13 +111,13 @@ def dfs_tree( node, path, sftp, remote_root ): Authentificator.dfs_tree( content, lp / Path( folder ), sftp, remote_root ) @staticmethod - def kill_job( id ): + def kill_job( id ) -> None: if Authentificator.ssh_client: Authentificator._execute_remote_command( Authentificator.ssh_client, f"scancel {id}" ) return None @staticmethod - def get_key( id, pword ): + def get_key( id, pword ) -> paramiko.RSAKey: try: import os @@ -146,7 +146,7 @@ def get_key( id, pword ): return PRIVATE_KEY @staticmethod - def gen_key(): + def gen_key() -> paramiko.RSAKey: import os @@ -166,8 +166,8 @@ def gen_key(): @staticmethod def _create_ssh_client( host, port, username, password=None, key=None ) -> paramiko.SSHClient: - """ - Initializes and returns an SSH client connection. + """Initializes and returns an SSH client connection. + Uses context manager for automatic cleanup. """ client = paramiko.SSHClient() @@ -190,10 +190,8 @@ def _create_ssh_client( host, port, username, password=None, key=None ) -> param return None @staticmethod - def _execute_remote_command( client, command ): - """ - Executes a single command on the remote server and prints the output. - """ + def _execute_remote_command( client, command ) -> None: + """Executes a single command on the remote server and prints the output.""" if not client: return @@ -226,9 +224,9 @@ def _execute_remote_command( client, command ): return ( -1, "", "" ) @staticmethod - def _transfer_file_sftp( client, local_path, remote_path, direction="put" ): - """ - Transfers a file using SFTP (Secure File Transfer Protocol). + def _transfer_file_sftp( client, local_path, remote_path, direction="put" ) -> Optional[ bool ]: + """Transfers a file using SFTP (Secure File Transfer Protocol). + Direction can be 'put' (upload) or 'get' (download). """ if not client: diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index fa9efa5ba..4b2c6b84f 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -2,6 +2,8 @@ # SPDX-FileCopyrightText: Copyright 2023-2024 TotalEnergies. # SPDX-FileContributor: Jacques Franc +from typing import Any + from trame.widgets import html from trame.widgets import vuetify3 as vuetify @@ -12,13 +14,13 @@ def define_simulation_view( server ) -> None: @server.state.change( "selected_cluster_name" ) - def on_cluster_change( selected_cluster_name: str, **_ ): + def on_cluster_change( selected_cluster_name: str, **_: Any ) -> None: print( selected_cluster_name ) server.state.decompositions = SuggestDecomposition( Authentificator.get_cluster( selected_cluster_name ), 12 ).to_list() #discard 12 @server.state.change( "decomposition" ) - def on_decomposition_selected( decomposition: str, **_ ): + def on_decomposition_selected( decomposition: str, **_: Any ) -> None: ll = SuggestDecomposition( Authentificator.get_cluster( server.state.selected_cluster_name ), 12 ).get_sd() if server.state.decomposition: server.state.sd = ll[ server.state.decompositions.index( decomposition ) ] @@ -30,7 +32,7 @@ def on_decomposition_selected( decomposition: str, **_ ): server.state.sd = { 'nodes': 0, 'total_ranks': 0 } @server.state.change( "simulation_xml_temp" ) - def on_temp_change( simulation_xml_temp: list, **_ ): + def on_temp_change( simulation_xml_temp: list, **_: Any ) -> None: current_list = server.state.simulation_xml_filename new_list = current_list + simulation_xml_temp @@ -38,7 +40,7 @@ def on_temp_change( simulation_xml_temp: list, **_ ): server.state.simulation_xml_temp = [] @server.state.change( "simulation_xml_filename" ) - def on_simfiles_change( simulation_xml_filename: list, **_ ): + def on_simfiles_change( simulation_xml_filename: list, **_: Any ) -> None: import re pattern = re.compile( r"\.xml$", re.IGNORECASE ) has_xml = any( @@ -155,15 +157,14 @@ def run_remove_jobfile( index_to_remove: int ) -> None: filter_by_type='.xml,.vtu,.vtm,.pvtu,.pvtm,.dat,.csv,.txt', # readonly=True, disabled=( "!access_granted", ) ) - with vuetify.VCol( cols=4 ): - with vuetify.VList(): - with vuetify.VListItem( v_for=( "(file,i) in simulation_xml_filename" ), - key="i", - value="file", - prepend_icon="mdi-minus-circle-outline", - click=( run_remove_jobfile, "[i]" ) ): - vuetify.VListItemTitle( "{{ file.name }}" ) - vuetify.VListItemSubtitle( "{{ file.size ? (file.size / 1024).toFixed(1) + ' KB' : 'URL' }}" ) + with vuetify.VCol( cols=4 ), vuetify.VList(): + with vuetify.VListItem( v_for=( "(file,i) in simulation_xml_filename" ), + key="i", + value="file", + prepend_icon="mdi-minus-circle-outline", + click=( run_remove_jobfile, "[i]" ) ): + vuetify.VListItemTitle( "{{ file.name }}" ) + vuetify.VListItemSubtitle( "{{ file.size ? (file.size / 1024).toFixed(1) + ' KB' : 'URL' }}" ) with vuetify.VRow(), vuetify.VCol(): vuetify.VTextField( v_model=( "simulation_remote_path", None ), @@ -210,18 +211,16 @@ def run_remove_jobfile( index_to_remove: int ) -> None: vuetify.VBtn( "Kill All", click="trigger('kill_all_simulations')" ), # type: ignore color_expression = "status_colors[job_ids[i].status] || '#607D8B'" - with vuetify.VRow(): - with vuetify.VCol( cols=4 ): - with vuetify.VList(): - with vuetify.VListItem( v_for=( "(jobs,i) in job_ids" ), - key="i", - value="jobs", - base_color=( color_expression, ), - prepend_icon="mdi-minus-circle-outline", - click=( kill_job, "[i]" ) ): - vuetify.VListItemTitle( "{{ jobs.status }} -- {{ jobs.name }} -- {{ jobs.job_id }}" ) - vuetify.VProgressLinear( v_model=( "jobs.simprogress", "0" ), ) - vuetify.VProgressLinear( v_model=( "jobs.slprogress", "0" ), ) + with vuetify.VRow(), vuetify.VCol( cols=4 ), vuetify.VList(): + with vuetify.VListItem( v_for=( "(jobs,i) in job_ids" ), + key="i", + value="jobs", + base_color=( color_expression, ), + prepend_icon="mdi-minus-circle-outline", + click=( kill_job, "[i]" ) ): + vuetify.VListItemTitle( "{{ jobs.status }} -- {{ jobs.name }} -- {{ jobs.job_id }}" ) + vuetify.VProgressLinear( v_model=( "jobs.simprogress", "0" ), ) + vuetify.VProgressLinear( v_model=( "jobs.slprogress", "0" ), ) with vuetify.VRow( v_if="simulation_error" ): html.Div( "An error occurred while running simulation :
{{simulation_error}}", style="color:red;" ) diff --git a/geos-trame/src/geos/trame/app/ui/timeline.py b/geos-trame/src/geos/trame/app/ui/timeline.py index aee23ef1d..1ff6a6093 100644 --- a/geos-trame/src/geos/trame/app/ui/timeline.py +++ b/geos-trame/src/geos/trame/app/ui/timeline.py @@ -20,35 +20,6 @@ def __init__( self, source: DeckTree, **kwargs: Any ) -> None: items = self.tree.timeline() - fields = [ { - "summary": { - "label": "Summary", - "component": "gantt-text", - "width": 300, - "placeholder": "Add a new task...", - }, - "start_date": { - "label": "Start", - "component": "gantt-date", - "width": 75, - "placeholder": "Start", - "sort": "date", - }, - "end_date": { - "label": "End", - "component": "gantt-date", - "width": 75, - "placeholder": "End", - "sort": "date", - }, - "duration": { - "label": "Days", - "component": "gantt-number", - "width": 50, - "placeholder": "0", - }, - } ] - with self: vuetify.VCardTitle( "Events View" ) vuetify.VDateInput( diff --git a/geos-trame/src/geos/trame/app/utils/async_file_watcher.py b/geos-trame/src/geos/trame/app/utils/async_file_watcher.py index 10a8a4cc9..da1d406fa 100644 --- a/geos-trame/src/geos/trame/app/utils/async_file_watcher.py +++ b/geos-trame/src/geos/trame/app/utils/async_file_watcher.py @@ -3,46 +3,49 @@ # SPDX-FileContributor: Jacques Franc import asyncio -import os from asyncio import CancelledError, ensure_future -from io import TextIOWrapper -from pathlib import Path -from typing import Callable, Optional, Union +from typing import Callable from trame_server.utils import asynchronous class AsyncPeriodicRunner: - """ - While started, runs given callback at given period. - """ + """While started, runs given callback at given period.""" - def __init__( self, callback: Callable, period_ms=100 ): + def __init__( self, callback: Callable, period_ms: int = 100 ) -> None: + """Init the async watcher object.""" self.last_m_time = None self.callback = callback self.period_ms = period_ms self.task = None self.start() - def __del__( self ): + def __del__( self ) -> None: + """Clean up async watch on destruction.""" self.stop() - def set_period_ms( self, period_ms ): + def set_period_ms( self, period_ms: int ) -> None: + """Set the async watch period. + + :params:period_ms period in ms + """ self.period_ms = period_ms - def start( self ): + def start( self ) -> None: + """Stop existing async watch and start a new stream.""" self.stop() self.task = asynchronous.create_task( self._runner() ) - def stop( self ): + def stop( self ) -> None: + """Stop the async watch.""" if not self.task: return ensure_future( self._wait_for_cancel() ) #ignore - async def _wait_for_cancel( self ): - """ - Cancel and await cancel error for the task. + async def _wait_for_cancel( self ) -> None: + """Cancel and await cancel error for the task. + If cancel is done outside async, it may raise warnings as cancelled exception may be triggered outside async loop. """ @@ -56,64 +59,7 @@ async def _wait_for_cancel( self ): except CancelledError: self.task = None - async def _runner( self ): + async def _runner( self ) -> None: while True: self.callback() await asyncio.sleep( self.period_ms / 1000.0 ) - - -class AsyncFileWatcher( AsyncPeriodicRunner ): - - def __init__( self, path_to_watch: Path, on_modified_callback: Callable, check_time_out_ms=100 ): - super().__init__( self._check_modified, check_time_out_ms ) - self.path_to_watch = Path( path_to_watch ) - self.last_m_time = None - self.on_modified_callback = on_modified_callback - - def get_m_time( self ): - if not self.path_to_watch.exists(): - return None - return os.stat( self.path_to_watch ).st_mtime - - def _check_modified( self ): - if self.get_m_time() != self.last_m_time: - self.last_m_time = self.get_m_time() - self.on_modified_callback() - - -class AsyncSubprocess: - - def __init__( - self, - args, - timeout: Union[ float, None ] = None, - ) -> None: - self.args = args - self.timeout = timeout - self._writer: Optional[ TextIOWrapper ] = None - - self.stdout: Optional[ bytes ] = None - self.stderr: Optional[ bytes ] = None - self.process: Optional[ asyncio.subprocess.Process ] = None - self.exception: Optional[ RuntimeError ] = None - - async def run( self ) -> None: - cmd = " ".join( map( str, self.args ) ) - self.process = await self._init_subprocess( cmd ) - - try: - self.stdout, self.stderr = await asyncio.wait_for( self.process.communicate(), timeout=self.timeout ) - except asyncio.exceptions.TimeoutError: - self.process.kill() - self.stdout, self.stderr = await self.process.communicate() - self.exception = RuntimeError( "Process timed out" ) - finally: - if self.process.returncode != 0: - self.exception = RuntimeError( f"Process exited with code {self.process.returncode}" ) - - async def _init_subprocess( self, cmd: str ) -> asyncio.subprocess.Process: - return await asyncio.create_subprocess_shell( - cmd=cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) From 0e623016a42b5bd77bfe6940ecdc2d0a969e69c9 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Tue, 30 Dec 2025 12:03:02 +0100 Subject: [PATCH 52/70] some typing --- geos-trame/src/geos/trame/app/core.py | 5 +- .../src/geos/trame/app/io/simulation.py | 157 ++++++++---------- geos-trame/src/geos/trame/app/io/ssh_tools.py | 41 +++-- .../src/geos/trame/app/ui/simulation_view.py | 8 +- .../trame/app/utils/async_file_watcher.py | 6 +- 5 files changed, 109 insertions(+), 108 deletions(-) diff --git a/geos-trame/src/geos/trame/app/core.py b/geos-trame/src/geos/trame/app/core.py index 06a9a54e7..f14c02d20 100644 --- a/geos-trame/src/geos/trame/app/core.py +++ b/geos-trame/src/geos/trame/app/core.py @@ -1,7 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright 2023-2024 TotalEnergies. -# SPDX-FileContributor: Lionel Untereiner - +# SPDX-FileContributor: Lionel Untereiner, Jacques Franc +# ignore context collapsing as it is clearer this way +# ruff: noqa: SIM117 from trame.ui.vuetify3 import VAppLayout from trame.decorators import TrameApp from trame.widgets import html, simput diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 4f32fee6c..3e089f2c4 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -2,10 +2,9 @@ # SPDX-FileCopyrightText: Copyright 2023-2024 TotalEnergies. # SPDX-FileContributor: Jacques Franc -from abc import ABC from pathlib import Path from enum import Enum, unique, auto -from typing import Optional +from typing import Optional, Any from trame_server.core import Server from geos.trame.app.io.ssh_tools import Authentificator @@ -37,40 +36,20 @@ class SlurmJobStatus( Enum ): SUSPENDED = "S" UNKNOWN = "UNKNOWN" - @classmethod - def from_string( cls, job_str ) -> "SlurmJobStatus": - try: - return cls( job_str ) - except ValueError: - return cls.UNKNOWN - - -class ISimRunner( ABC ): - """Abstract interface for sim runner. - Provides methods to trigger simulation, get simulation output path and knowing if simulation is done or not. - """ - pass - - -class SimRunner( ISimRunner ): - """Runs sim on HPC.""" - - def __init__( self, user ) -> None: - super().__init__() - class Simulation: """Simulation component. + Fills the UI with the screenshot as read from the simulation outputs folder and a graph with the time series from the simulation. Requires a simulation runner providing information on the output path of the simulation to monitor and ways to trigger the simulation. """ - def __init__( self, sim_runner: ISimRunner, server: Server, sim_info_dir: Optional[ Path ] = None ) -> None: + def __init__( self, server: Server, sim_info_dir: Optional[ Path ] = None ) -> None: + """Initialize the Simulation object with logging and sim triggers among other callbacks.""" self._server = server controller = server.controller - self._sim_runner = sim_runner self._sim_info_dir = sim_info_dir server.state.job_ids = [] server.state.selected_cluster = None @@ -99,56 +78,6 @@ def run_try_login() -> None: if Authentificator.ssh_client: server.state.access_granted = True - @staticmethod - def gen_tree( xml_filename ): - - import re - xml_pattern = re.compile( r"\.xml$", re.IGNORECASE ) - mesh_pattern = re.compile( r"\.(vtu|vtm|pvtu|pvtm)$", re.IGNORECASE ) - table_pattern = re.compile( r"\.(txt|dat|csv|geos)$", re.IGNORECASE ) - xml_matches = [] - mesh_matches = [] - table_matches = [] - - pattern_file = r"[\w\-.]+\.(?:vtu|pvtu|dat|txt|xml|geos)\b" # all files - pattern_xml_path = r"\"(.*/)*([\w\-.]+\.(?:xml))\b" - pattern_mesh_path = r"\"(.*/)*([\w\-.]+\.(?:vtu|pvtu|vtm|pvtm))\b" - pattern_table_curly_path = r"((?:[\w\-/]+/)+)*([\w\-.]+\.(?:geos|csv|dat|txt))" - - for file in xml_filename: - if xml_pattern.search( file.get( "name", "" ) ): - xml_matches.append( file ) - elif mesh_pattern.search( file.get( "name", "" ) ): - mesh_matches.append( file ) - elif table_pattern.search( file.get( "name", "" ) ): - table_matches.append( file ) - - #assume the first XML is the main xml - xml_expected_file_matches = re.findall( pattern_file, xml_matches[ 0 ][ 'content' ].decode( "utf-8" ) ) - - #TODO all the needed files - test_assert = { item.get( "name" ) - for item in xml_filename }.intersection( set( xml_expected_file_matches ) ) - assert test_assert - - decoded = re.sub( pattern_xml_path, r'"\2', xml_matches[ 0 ][ 'content' ].decode( "utf-8" ) ) - decoded = re.sub( pattern_mesh_path, r'"mesh/\2', decoded ) - decoded = re.sub( pattern_table_curly_path, r"tables/\2", decoded ) - - xml_matches[ 0 ][ 'content' ] = decoded.encode( "utf-8" ) - - FILE_TREE = { - 'root': '.', - "structure": { - "files": xml_matches, - "subfolders": { - "mesh": mesh_matches, - "tables": table_matches - } - } - } - return FILE_TREE - @controller.trigger( "run_simulation" ) def run_simulation() -> None: @@ -157,7 +86,7 @@ def run_simulation() -> None: if Authentificator.ssh_client: Authentificator._sftp_copy_tree( Authentificator.ssh_client, - gen_tree( server.state.simulation_xml_filename ), + Authentificator.gen_tree( server.state.simulation_xml_filename ), server.state.simulation_remote_path ) run_id: int = Simulation.render_and_run( @@ -196,7 +125,7 @@ def run_simulation() -> None: partition='p4_transfer', account='myaccount' ) - self.start_result_streams() + self._start_result_streams() else: raise paramiko.SSHException @@ -208,22 +137,25 @@ def kill_all_simulations() -> None: Authentificator.kill_job( jobs[ 'job_id' ] ) def __del__( self ) -> None: - self.stop_result_streams() + """Clean up running streams on destruction.""" + self._stop_result_streams() - def set_status_watcher_period_ms( self, period_ms ) -> None: + def set_status_watcher_period_ms( self, period_ms: int ) -> None: + """Set the watcher period in ms.""" self._job_status_watcher_period_ms = period_ms if self._job_status_watcher: self._job_status_watcher.set_period_ms( period_ms ) - def stop_result_streams( self ) -> None: + def _stop_result_streams( self ) -> None: if self._job_status_watcher is not None: self._job_status_watcher.stop() - def start_result_streams( self ) -> None: - self.stop_result_streams() + def _start_result_streams( self ) -> None: + self._stop_result_streams() self._job_status_watcher = AsyncPeriodicRunner( self.check_jobs, period_ms=self._job_status_watcher_period_ms ) def check_jobs( self ) -> None: + """Check on running jobs and update their names and progresses.""" if Authentificator.ssh_client: try: jid = self._server.state.job_ids @@ -273,7 +205,7 @@ def check_jobs( self ) -> None: return None @staticmethod - def render_and_run( template_name: str, dest_name: str, server, **kwargs ) -> int: + def render_and_run( template_name: str, dest_name: str, server: Server, **kwargs: Any ) -> str: """Render the slurm template and run it. Return it job_id.""" if server.state.access_granted and server.state.simulation_xml_filename: template = Environment( @@ -299,6 +231,61 @@ def render_and_run( template_name: str, dest_name: str, server, **kwargs ) -> in Authentificator.ssh_client, f'cd {server.state.simulation_remote_path} && sbatch {dest_name}' ) job_lines = sout.strip() job_id = re.search( r"Submitted batch job (\d+)", job_lines ) - server.state.job_ids.append( { 'job_id': job_id[ 1 ] } ) + if job_id: + server.state.job_ids.append( { 'job_id': job_id.group( 1 ) } ) + return job_id.group( 1 ) + else: + return "-1" + else: + return "-1" + else: + return "-1" - return job_id[ 1 ] + @staticmethod + def gen_tree( xml_filename: Any ) -> dict: + """Generate file tree to be copied on remote from files uploaded.""" + import re + xml_pattern = re.compile( r"\.xml$", re.IGNORECASE ) + mesh_pattern = re.compile( r"\.(vtu|vtm|pvtu|pvtm)$", re.IGNORECASE ) + table_pattern = re.compile( r"\.(txt|dat|csv|geos)$", re.IGNORECASE ) + xml_matches = [] + mesh_matches = [] + table_matches = [] + + pattern_file = r"[\w\-.]+\.(?:vtu|pvtu|dat|txt|xml|geos)\b" # all files + pattern_xml_path = r"\"(.*/)*([\w\-.]+\.(?:xml))\b" + pattern_mesh_path = r"\"(.*/)*([\w\-.]+\.(?:vtu|pvtu|vtm|pvtm))\b" + pattern_table_curly_path = r"((?:[\w\-/]+/)+)*([\w\-.]+\.(?:geos|csv|dat|txt))" + + for file in xml_filename: + if xml_pattern.search( file.get( "name", "" ) ): + xml_matches.append( file ) + elif mesh_pattern.search( file.get( "name", "" ) ): + mesh_matches.append( file ) + elif table_pattern.search( file.get( "name", "" ) ): + table_matches.append( file ) + + #assume the first XML is the main xml + xml_expected_file_matches = re.findall( pattern_file, xml_matches[ 0 ][ 'content' ].decode( "utf-8" ) ) + + #TODO all the needed files + test_assert = { item.get( "name" ) for item in xml_filename }.intersection( set( xml_expected_file_matches ) ) + assert test_assert + + decoded = re.sub( pattern_xml_path, r'"\2', xml_matches[ 0 ][ 'content' ].decode( "utf-8" ) ) + decoded = re.sub( pattern_mesh_path, r'"mesh/\2', decoded ) + decoded = re.sub( pattern_table_curly_path, r"tables/\2", decoded ) + + xml_matches[ 0 ][ 'content' ] = decoded.encode( "utf-8" ) + + FILE_TREE = { + 'root': '.', + "structure": { + "files": xml_matches, + "subfolders": { + "mesh": mesh_matches, + "tables": table_matches + } + } + } + return FILE_TREE diff --git a/geos-trame/src/geos/trame/app/io/ssh_tools.py b/geos-trame/src/geos/trame/app/io/ssh_tools.py index 4392b5774..c753b5947 100644 --- a/geos-trame/src/geos/trame/app/io/ssh_tools.py +++ b/geos-trame/src/geos/trame/app/io/ssh_tools.py @@ -53,16 +53,18 @@ class Authentificator: sim_constants = [ SimulationConstant( **item ) - for item in json.load( open( f'{os.getenv("TRAME_DIR")}/assets/cluster.json', 'r' ) ) + for item in json.load( open( f'{os.getenv("TRAME_DIR")}/assets/cluster.json', 'r' ) ) # noqa: SIM115 ] @staticmethod - def get_cluster( name: str ): + def get_cluster( name: str ) -> Optional[ SimulationConstant ]: + """Return the structured meta for cluster selected.""" match = next( ( item for item in Authentificator.sim_constants if item.name == name ), None ) return match @staticmethod - def _sftp_copy_tree( ssh_client, file_tree, remote_root ) -> None: + def _sftp_copy_tree( ssh_client: paramiko.SSHClient, file_tree: dict, remote_root: str ) -> None: + """Copy the file tree at remote root using ssh_client.""" # Connect to remote server sftp = ssh_client.open_sftp() @@ -71,10 +73,10 @@ def _sftp_copy_tree( ssh_client, file_tree, remote_root ) -> None: sftp.close() @staticmethod - def dfs_tree( node, path, sftp, remote_root ) -> None: - + def dfs_tree( node: list | dict, path: str, sftp: paramiko.SFTPClient, remote_root: str ) -> None: + """Create the tree represented by node at local path in remote pointed by sftp client at remote_root.""" if path is None or remote_root is None: - return + return # type:ignore[unreachable] lp = Path( path ) rp = Path( remote_root ) / lp @@ -86,7 +88,6 @@ def dfs_tree( node, path, sftp, remote_root ) -> None: print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) elif isinstance( node, dict ): if "files" in node: - # sftp.put( str(lp/Path(file)), str(rp/Path(file)) ) files = node[ 'files' ] for file in files: with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: @@ -99,7 +100,7 @@ def dfs_tree( node, path, sftp, remote_root ) -> None: except FileNotFoundError: print( f"creating {rp/Path(subfolder)}" ) sftp.mkdir( str( rp / Path( subfolder ) ) ) - Authentificator.dfs_tree( content, lp / Path( subfolder ), sftp, remote_root ) + Authentificator.dfs_tree( content, str( lp / Path( subfolder ) ), sftp, remote_root ) for folder, content in node.items(): if folder not in [ "files", "subfolders" ]: @@ -108,17 +109,18 @@ def dfs_tree( node, path, sftp, remote_root ) -> None: except FileNotFoundError: print( f"creating {rp/Path(folder)}" ) sftp.mkdir( str( rp / Path( folder ) ) ) - Authentificator.dfs_tree( content, lp / Path( folder ), sftp, remote_root ) + Authentificator.dfs_tree( content, str( lp / Path( folder ) ), sftp, remote_root ) @staticmethod - def kill_job( id ) -> None: + def kill_job( id: int ) -> None: + """Cancel job identified by id in slurm schedulder.""" if Authentificator.ssh_client: Authentificator._execute_remote_command( Authentificator.ssh_client, f"scancel {id}" ) return None @staticmethod - def get_key( id, pword ) -> paramiko.RSAKey: - + def get_key( id: str, pword: str ) -> paramiko.RSAKey: + """Return the ssh key if found or create and dispatch one.""" try: import os home = os.environ.get( "HOME" ) @@ -147,7 +149,7 @@ def get_key( id, pword ) -> paramiko.RSAKey: @staticmethod def gen_key() -> paramiko.RSAKey: - + """Generate RSAKey for SSH protocol.""" import os home = os.environ.get( "HOME" ) @@ -165,7 +167,11 @@ def gen_key() -> paramiko.RSAKey: return key @staticmethod - def _create_ssh_client( host, port, username, password=None, key=None ) -> paramiko.SSHClient: + def _create_ssh_client( host: str, + port: int, + username: str, + password: str | None = None, + key: paramiko.RSAKey = None ) -> paramiko.SSHClient: """Initializes and returns an SSH client connection. Uses context manager for automatic cleanup. @@ -190,7 +196,7 @@ def _create_ssh_client( host, port, username, password=None, key=None ) -> param return None @staticmethod - def _execute_remote_command( client, command ) -> None: + def _execute_remote_command( client: paramiko.SSHClient, command: str ) -> tuple[ int, str, str ]: """Executes a single command on the remote server and prints the output.""" if not client: return @@ -224,7 +230,10 @@ def _execute_remote_command( client, command ) -> None: return ( -1, "", "" ) @staticmethod - def _transfer_file_sftp( client, local_path, remote_path, direction="put" ) -> Optional[ bool ]: + def _transfer_file_sftp( client: paramiko.SSHClient, + local_path: str, + remote_path: str, + direction: str = "put" ) -> Optional[ bool ]: """Transfers a file using SFTP (Secure File Transfer Protocol). Direction can be 'put' (upload) or 'get' (download). diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 4b2c6b84f..44efa5031 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -1,17 +1,21 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright 2023-2024 TotalEnergies. # SPDX-FileContributor: Jacques Franc - +# ignore context collapsing as it is clearer this way +# ruff: noqa: SIM117 from typing import Any from trame.widgets import html from trame.widgets import vuetify3 as vuetify +from trame.app.core import Server from geos.trame.app.io.simulation import Authentificator from geos.trame.app.io.hpc_tools import SuggestDecomposition -def define_simulation_view( server ) -> None: +#TODO a class from it +def define_simulation_view( server: Server ) -> None: + """Functional definition of UI elements.""" @server.state.change( "selected_cluster_name" ) def on_cluster_change( selected_cluster_name: str, **_: Any ) -> None: diff --git a/geos-trame/src/geos/trame/app/utils/async_file_watcher.py b/geos-trame/src/geos/trame/app/utils/async_file_watcher.py index da1d406fa..f8960e580 100644 --- a/geos-trame/src/geos/trame/app/utils/async_file_watcher.py +++ b/geos-trame/src/geos/trame/app/utils/async_file_watcher.py @@ -41,7 +41,7 @@ def stop( self ) -> None: if not self.task: return - ensure_future( self._wait_for_cancel() ) #ignore + ensure_future( self._wait_for_cancel() ) # type:ignore[unreachable] async def _wait_for_cancel( self ) -> None: """Cancel and await cancel error for the task. @@ -49,11 +49,11 @@ async def _wait_for_cancel( self ) -> None: If cancel is done outside async, it may raise warnings as cancelled exception may be triggered outside async loop. """ - if not self.task or self.task.done() or self.task.cancelled(): + if not self.task or self.task.done() or self.task.cancelled(): # type:ignore[unreachable] self.task = None return - try: + try: # type:ignore[unreachable] self.task.cancel() await self.task except CancelledError: From de77419cdbf2c9429b715e72fad8a5d9f76c199e Mon Sep 17 00:00:00 2001 From: jacques franc Date: Tue, 6 Jan 2026 11:51:49 +0100 Subject: [PATCH 53/70] adjust install and pathing --- geos-trame/README.rst | 20 ++++----- geos-trame/configure.sh | 4 ++ geos-trame/pyproject.toml | 6 +-- geos-trame/src/geos/trame/app/core.py | 5 +-- .../src/geos/trame/app/io/simulation.py | 6 +-- geos-trame/src/geos/trame/app/io/ssh_tools.py | 42 ++++++++++--------- geos-trame/src/geos/trame/app/main.py | 12 +++--- .../src/geos/trame/app/ui/simulation_view.py | 6 +-- install_packages.sh | 4 +- 9 files changed, 54 insertions(+), 51 deletions(-) create mode 100644 geos-trame/configure.sh diff --git a/geos-trame/README.rst b/geos-trame/README.rst index 74ca61d26..6e519cdad 100644 --- a/geos-trame/README.rst +++ b/geos-trame/README.rst @@ -21,23 +21,21 @@ Build and install the Vue components cd vue-components npm i npm run build - cd - + cd .. -Associate it with `dotenv` environement file defining path to trame +then configure the .env + + sh configure.sh + +this will generate a `dotenv` environement file defining useful path to trame, .. code-block:: console cat .env - TRAME_DIR=/path/to/geosPythonPackages/geos-trame/src/geos/trame - -Then generic launcher templates and configuration are found under - -.. code-block:: console + TEMPLATE_DIR=/path/to/geosPythonPackages/geos-trame/src/geos/trame/io/jinja_t + ASSETS_DIR=/path/to/geosPythonPackages/geos-trame/src/geos/trame/assets - ls ${TRAME_DIR}/app/io/jinja_t - ... - ls ${TRAME_DIR}/assets/cluster.json - ... +those will have lower precedence than local environement variables if defined Install the application diff --git a/geos-trame/configure.sh b/geos-trame/configure.sh new file mode 100644 index 000000000..f46d8ff9a --- /dev/null +++ b/geos-trame/configure.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +echo "TEMPLATE_DIR=${PWD}/src/geos/trame/app/io/jinja_t/" >> ${PWD}/src/geos/trame/assets/.env +echo "ASSETS_DIR=${PWD}/src/geos/trame/assets/" >> ${PWD}/src/geos/trame/assets/.env \ No newline at end of file diff --git a/geos-trame/pyproject.toml b/geos-trame/pyproject.toml index af6b3a394..31dc64d91 100644 --- a/geos-trame/pyproject.toml +++ b/geos-trame/pyproject.toml @@ -30,7 +30,6 @@ keywords = [ ] dependencies = [ - "typing-extensions==4.12.2", "trame==3.6.5", "trame-vuetify==3.1.0", "trame-code==1.0.1", @@ -44,6 +43,7 @@ dependencies = [ "trame-gantt==0.1.5", "python-dotenv>=1.2.1", "mpld3<0.5.11", + "paramiko==4.0.0", "xsdata==24.5", "xsdata-pydantic[lxml]==24.5", "pyvista==0.45.2", @@ -51,7 +51,7 @@ dependencies = [ "colorcet==3.1.0", "funcy==2.0", "typing_inspect==0.9.0", - "typing_extensions>=4.12", + "typing_extensions>=4.15.0", "PyYAML", ] @@ -93,7 +93,7 @@ include-package-data = true # include = ['geos-trame*'] [tool.setuptools.package-data] -"*" = ["*.js", "*.css"] +"*" = ["*.js", "*.css","assets/*","*.jinja","*.json",".env"] [tool.pytest.ini_options] addopts = [ diff --git a/geos-trame/src/geos/trame/app/core.py b/geos-trame/src/geos/trame/app/core.py index f14c02d20..1804266d4 100644 --- a/geos-trame/src/geos/trame/app/core.py +++ b/geos-trame/src/geos/trame/app/core.py @@ -25,7 +25,7 @@ from geos.trame.app.ui.viewer.viewer import DeckViewer from geos.trame.app.components.alertHandler import AlertHandler -from geos.trame.app.io.simulation import Simulation, SimRunner +from geos.trame.app.io.simulation import Simulation from geos.trame.app.ui.simulation_view import define_simulation_view import sys @@ -74,8 +74,7 @@ def __init__( self, server: Server, file_name: str ) -> None: self.well_viewer = WellViewer( 5, 5 ) ######## Simulation runner - self.sim_runner: SimRunner = SimRunner( self.state.user_id ) - self.simulation = Simulation( self.sim_runner, server=server ) + self.simulation = Simulation( server=server ) # Data loader self.data_loader = DataLoader( self.tree, self.region_viewer, self.well_viewer, trame_server=server ) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 3e089f2c4..bc1cb9dee 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -73,7 +73,7 @@ def run_try_login() -> None: Authentificator.get_cluster( server.state.selected_cluster_name ).host, #test Authentificator.get_cluster( server.state.selected_cluster_name ).port, server.state.login, - key=Authentificator.get_key( server.state.login, server.state.password ) ) + key=Authentificator.get_key( server.state.login, server.state.password, server.state.key_path, server.state.selected_cluster_name ) ) if Authentificator.ssh_client: server.state.access_granted = True @@ -86,7 +86,7 @@ def run_simulation() -> None: if Authentificator.ssh_client: Authentificator._sftp_copy_tree( Authentificator.ssh_client, - Authentificator.gen_tree( server.state.simulation_xml_filename ), + Simulation.gen_tree( server.state.simulation_xml_filename ), server.state.simulation_remote_path ) run_id: int = Simulation.render_and_run( @@ -209,7 +209,7 @@ def render_and_run( template_name: str, dest_name: str, server: Server, **kwargs """Render the slurm template and run it. Return it job_id.""" if server.state.access_granted and server.state.simulation_xml_filename: template = Environment( - loader=FileSystemLoader( f'{os.getenv("TRAME_DIR")}/app/io/jinja_t' ) ).get_template( template_name ) + loader=FileSystemLoader( f'{os.getenv("TEMPLATE_DIR")}' ) ).get_template( template_name ) rendered = template.render( kwargs ) if Authentificator.ssh_client: diff --git a/geos-trame/src/geos/trame/app/io/ssh_tools.py b/geos-trame/src/geos/trame/app/io/ssh_tools.py index c753b5947..13c2bb9c6 100644 --- a/geos-trame/src/geos/trame/app/io/ssh_tools.py +++ b/geos-trame/src/geos/trame/app/io/ssh_tools.py @@ -8,7 +8,7 @@ import os import json from dataclasses import dataclass - +import trame_server.state as State @dataclass class SimulationConstant: @@ -53,7 +53,7 @@ class Authentificator: sim_constants = [ SimulationConstant( **item ) - for item in json.load( open( f'{os.getenv("TRAME_DIR")}/assets/cluster.json', 'r' ) ) # noqa: SIM115 + for item in json.load( open( f'{os.getenv("ASSETS_DIR")}/cluster.json', 'r' ) ) # noqa: SIM115 ] @staticmethod @@ -83,16 +83,16 @@ def dfs_tree( node: list | dict, path: str, sftp: paramiko.SFTPClient, remote_ro if isinstance( node, list ): for file in node: + print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: f.write( file.get( 'content' ) ) - print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) elif isinstance( node, dict ): if "files" in node: files = node[ 'files' ] for file in files: + print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: f.write( file.get( 'content' ) ) - print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) if "subfolders" in node: for subfolder, content in node[ "subfolders" ].items(): try: @@ -119,50 +119,52 @@ def kill_job( id: int ) -> None: return None @staticmethod - def get_key( id: str, pword: str ) -> paramiko.RSAKey: + def get_key( id: str, pword: str, key_path : str, cluster_name: str) -> paramiko.RSAKey: """Return the ssh key if found or create and dispatch one.""" try: import os - home = os.environ.get( "HOME" ) - PRIVATE_KEY = paramiko.RSAKey.from_private_key_file( f"{home}/.ssh/id_trame" ) + # home = os.environ.get( "HOME" ) + # PRIVATE_KEY = paramiko.RSAKey.from_private_key_file( f"{home}/.ssh/id_trame" ) + PRIVATE_KEY = paramiko.RSAKey.from_private_key_file( key_path ) return PRIVATE_KEY except paramiko.SSHException as e: print( f"Error loading private key: {e}\n" ) except FileNotFoundError as e: - print( f"Private key not found: {e}\n Generating key ..." ) - PRIVATE_KEY = Authentificator.gen_key() + print( f"Private key not found: {e}\n Generating key at ... {key_path}" ) + PRIVATE_KEY = Authentificator.gen_key(key_path) temp_client = paramiko.SSHClient() temp_client.set_missing_host_key_policy( paramiko.AutoAddPolicy() ) - temp_client.connect( SimulationConstant.host, - SimulationConstant.port, + temp_client.connect( Authentificator.get_cluster(cluster_name).host, + Authentificator.get_cluster(cluster_name).port, username=id, password=pword, timeout=10 ) - Authentificator._transfer_file_sftp( temp_client, f"{home}/.ssh/id_trame.pub", - f"{SimulationConstant.remote_home_base}/{id}/.ssh/id_trame.pub" ) + Authentificator._transfer_file_sftp( temp_client, f"{key_path.split('/')[-1]}.pub", + f"{Authentificator.get_cluster(cluster_name).remote_home_base}/{id}/.ssh/{key_path.split('/')[-1]}.pub" ) Authentificator._execute_remote_command( temp_client, - f" cat {SimulationConstant.remote_home_base}/{id}/.ssh/id_trame.pub | tee -a {SimulationConstant.remote_home_base}/{id}/.ssh/authorized_keys" + f" cat {Authentificator.get_cluster(cluster_name).remote_home_base}/.ssh/{key_path.split('/')[-1]}.pub | tee -a {Authentificator.get_cluster(cluster_name).remote_home_base}/.ssh/authorized_keys" ) return PRIVATE_KEY @staticmethod - def gen_key() -> paramiko.RSAKey: + def gen_key(key_path : str ) -> paramiko.RSAKey: """Generate RSAKey for SSH protocol.""" import os - home = os.environ.get( "HOME" ) - file_path = f"{home}/.ssh/id_trame" + # home = os.environ.get( "HOME" ) + # file_path = f"{home}/.ssh/id_trame" key = paramiko.RSAKey.generate( bits=4096 ) - key.write_private_key_file( file_path ) + key.write_private_key_file( key_path ) # Get public key in OpenSSH format public_key = f"{key.get_name()} {key.get_base64()}" - with open( file_path + ".pub", "w" ) as pub_file: + with open( key_path + ".pub", "w" ) as pub_file: pub_file.write( public_key ) - print( "SSH key pair generated: id_trame (private), id_trame.pub (public)" ) + suffix = key_path.split('/')[-1] + print( f"SSH key pair generated: {suffix} (private), {suffix}.pub (public)" ) return key diff --git a/geos-trame/src/geos/trame/app/main.py b/geos-trame/src/geos/trame/app/main.py index 7f959328d..1ce00f6d3 100644 --- a/geos-trame/src/geos/trame/app/main.py +++ b/geos-trame/src/geos/trame/app/main.py @@ -3,7 +3,7 @@ # SPDX-FileContributor: Lionel Untereiner, Jacques Franc from pathlib import Path from typing import Any -from dotenv import load_dotenv +from dotenv import load_dotenv, find_dotenv from trame.app import get_server # type: ignore from trame_server import Server @@ -12,7 +12,8 @@ sys.path.insert( 0, "/data/pau901/SIM_CS/users/jfranc/geosPythonPackages/geos-trame/src" ) -assert load_dotenv( dotenv_path=Path( __file__ ).parent.parent / ".env" ) +#do not override if existing +assert load_dotenv( dotenv_path=Path( __file__ ).parent.parent / "assets/.env" ) from geos.trame.app.core import GeosTrame @@ -30,14 +31,11 @@ def main( server: Server = None, **kwargs: Any ) -> None: # parse args parser = server.cli - parser.add_argument( "-I", "--input", help="Input file (.xml)" ) + parser.add_argument( "-I", "--input", help="Input file (.xml)", required=True ) + parser.add_argument( "-e", "--env", help="dot_env file" , required=False ) ( args, _unknown ) = parser.parse_known_args() - if args.input is None: - print( "Usage: \n\tgeos-trame -I /path/to/input/file" ) - return - file_name = str( Path( args.input ).absolute() ) app = GeosTrame( server, file_name ) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 44efa5031..5374e6c7d 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -7,7 +7,7 @@ from trame.widgets import html from trame.widgets import vuetify3 as vuetify -from trame.app.core import Server +from trame_server import Server from geos.trame.app.io.simulation import Authentificator from geos.trame.app.io.hpc_tools import SuggestDecomposition @@ -112,7 +112,7 @@ def run_remove_jobfile( index_to_remove: int ) -> None: with vuetify.VCol( cols=1 ): vuetify.VSelect( label="Cluster", items=( "selected_cluster_names", ), - v_model=( "selected_cluster_name", 'local' ) ) + v_model=( "selected_cluster_name", 'p4' ) ) vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) with vuetify.VCol( cols=1 ): vuetify.VSelect( label="Decomposition", @@ -123,7 +123,7 @@ def run_remove_jobfile( index_to_remove: int ) -> None: with vuetify.VCol( cols=8 ): vuetify.VTextField( v_model=( "key_path", - None, + "~/.ssh/id_trame", ), label="Path to ssh key", dense=True, diff --git a/install_packages.sh b/install_packages.sh index b6726ef71..bf89bfbe3 100755 --- a/install_packages.sh +++ b/install_packages.sh @@ -14,5 +14,7 @@ python -m pip install --upgrade ./geos-pv cd ./geos-trame/vue-components npm i npm run build -cd ../../ +cd .. +sh configure.sh +cd .. python -m pip install ./geos-trame \ No newline at end of file From f5f9a6c49b2aff723dffca9aca5138d35a84d0fb Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 7 Jan 2026 11:12:11 +0100 Subject: [PATCH 54/70] sd works for external mesh --- .../src/geos/trame/app/io/simulation.py | 1 + geos-trame/src/geos/trame/app/main.py | 1 - .../src/geos/trame/app/ui/simulation_view.py | 107 +++++++++++++++--- 3 files changed, 94 insertions(+), 15 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index bc1cb9dee..d9eb4293f 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -53,6 +53,7 @@ def __init__( self, server: Server, sim_info_dir: Optional[ Path ] = None ) -> N self._sim_info_dir = sim_info_dir server.state.job_ids = [] server.state.selected_cluster = None + server.state.nunknowns = 1 server.state.status_colors = { "PENDING": "#4CAF50", #PD diff --git a/geos-trame/src/geos/trame/app/main.py b/geos-trame/src/geos/trame/app/main.py index 1ce00f6d3..38e709594 100644 --- a/geos-trame/src/geos/trame/app/main.py +++ b/geos-trame/src/geos/trame/app/main.py @@ -9,7 +9,6 @@ from trame_server import Server import sys - sys.path.insert( 0, "/data/pau901/SIM_CS/users/jfranc/geosPythonPackages/geos-trame/src" ) #do not override if existing diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 5374e6c7d..3b6b08bd4 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -12,6 +12,61 @@ from geos.trame.app.io.simulation import Authentificator from geos.trame.app.io.hpc_tools import SuggestDecomposition +#rough estimate of n unknowns would be better from GEOS's dry-run +# unknowns (oncell,onpoint) +# for now do not take into account wells as dep on the num of wells (neg vs matrix elmts) +# for now do not take into account frac as dep on the num of frac elmts (prob neg vs matrix elmts) +solvers_to_unknowns = { + "CompositionalMultiphaseFVM" : (3, 0), + "CompositionalMultiphaseHybridFVM" : (4, 0), + "CompositionalMultiphaseReservoirPoromechanics" : (3,3), + "CompositionalMultiphaseReservoirPoromechanicsConformingFractures" : (3,6), + "CompositionalMultiphaseWell" : (3,0), + "ElasticFirstOrderSEM" : (0,3), + "ElasticSEM" : (0,3), + "ImmiscibleMultiphaseFlow": (3,0), + "LaplaceFEM" : (0,3), + "MultiphasePoromechanics" : (3,3), + "MultiphasePoromechanicsReservoir" : (3,3),#?? + "MultiphasePoromechanicsConformingFractures" : (3,6) , + "SinglePhaseFVM" : (2,0), + "SinglePhaseHybridFVM" : (3,0), + "SinglePhasePoromechanics" : (2,3), + "SinglePhasePoromechanicsConformingFractures" : (2,3), + "SinglePhasePoromechanicsConformingFracturesALM" : (2,3), + "SinglePhaseWell" : (2,0), + "SolidMechanicsEmbeddedFractures": (0,3), + "SolidMechanicsAugmentedLagrangianContact": (0,3), + "SolidMechanicsLagrangeContact": (0,3), + "SolidMechanicsLagrangeContactBubbleStab": (0,3), + "SolidMechanicsLagrangianFEM": (0,3) +} + + # helpers +def _what_solver(bcontent): + import xml.etree + sim_xml = xml.etree.ElementTree.fromstring(bcontent['content']) + nunk = [solvers_to_unknowns.get(elt.tag,0) for elt in sim_xml.find('Solvers')] + return max(nunk) + + +def _how_many_cells( bcontent ): + import vtk + name = bcontent['name'] + if name.endswith(".vtp"): + reader = vtk.vtkXMLPolyDataReader() + elif name.endswith(".vtu"): + reader = vtk.vtkXMLUnstructuredGridReader() + elif name.endswith(".vtm"): + reader = vtk.vtkXMLMultiBlockDataReader() + else: + raise ValueError("Unsupported kind (use 'vtp', 'vtu', or 'vtm').") + + reader.SetReadFromInputString(1) + reader.SetInputString(bcontent['content']) + reader.Update() + output = reader.GetOutput() + return (output.GetNumberOfCells(), output.GetNumberOfPoints()) #TODO a class from it def define_simulation_view( server: Server ) -> None: @@ -21,18 +76,21 @@ def define_simulation_view( server: Server ) -> None: def on_cluster_change( selected_cluster_name: str, **_: Any ) -> None: print( selected_cluster_name ) server.state.decompositions = SuggestDecomposition( Authentificator.get_cluster( selected_cluster_name ), - 12 ).to_list() #discard 12 + server.state.nunknowns ).to_list() + + server.state.simulation_remote_path = Authentificator.get_cluster( + server.state.selected_cluster_name ).simulation_remote_path + + server.state.simulation_dl_path = Authentificator.get_cluster( + server.state.selected_cluster_name ).simulation_dl_default_path @server.state.change( "decomposition" ) def on_decomposition_selected( decomposition: str, **_: Any ) -> None: - ll = SuggestDecomposition( Authentificator.get_cluster( server.state.selected_cluster_name ), 12 ).get_sd() - if server.state.decomposition: + ll = SuggestDecomposition( Authentificator.get_cluster( server.state.selected_cluster_name ), server.state.nunknowns ).get_sd() + # if server.state.decomposition: + try: server.state.sd = ll[ server.state.decompositions.index( decomposition ) ] - server.state.simulation_remote_path = Authentificator.get_cluster( - server.state.selected_cluster_name ).simulation_remote_path - server.state.simulation_dl_path = Authentificator.get_cluster( - server.state.selected_cluster_name ).simulation_dl_default_path - else: + except: server.state.sd = { 'nodes': 0, 'total_ranks': 0 } @server.state.change( "simulation_xml_temp" ) @@ -43,15 +101,36 @@ def on_temp_change( simulation_xml_temp: list, **_: Any ) -> None: server.state.simulation_xml_filename = new_list server.state.simulation_xml_temp = [] + @server.state.change("nunknowns") + def on_nunknowns_change( nunknowns : int , **_ : Any) -> None: + #re-gen list + if len(server.state.decomposition) > 0: + server.state.decompositions = SuggestDecomposition( Authentificator.get_cluster( server.state.selected_cluster_name ), + nunknowns ).to_list() + server.state.decomposition = server.state.decompositions[0] + server.state.nunknowns = nunknowns + + @server.state.change( "simulation_xml_filename" ) def on_simfiles_change( simulation_xml_filename: list, **_: Any ) -> None: import re - pattern = re.compile( r"\.xml$", re.IGNORECASE ) - has_xml = any( - pattern.search( file if isinstance( file, str ) else file.get( "name", "" ) ) - for file in simulation_xml_filename ) - server.state.is_valid_jobfiles = has_xml + has_xml = list([True if file.get( "type", "" ) == 'text/xml' else False + for file in simulation_xml_filename ]) + + has_mesh = list([True if file.get( "name", "" ).endswith((".vtu",".vtm",".vtp")) else False + for file in simulation_xml_filename ]) + + server.state.is_valid_jobfiles = any(has_xml) + if any(has_mesh) and any(has_xml): + for i,_ in enumerate(has_mesh): + if has_mesh[i]: + nc, np = _how_many_cells(simulation_xml_filename[i]) + elif has_xml[i]: + uc, up = _what_solver(simulation_xml_filename[i]) + + server.state.nunknowns = uc*nc + up*np + def kill_job( index_to_remove: int ) -> None: # for now just check there is an xml jid = list( server.state.job_ids ) @@ -117,7 +196,7 @@ def run_remove_jobfile( index_to_remove: int ) -> None: with vuetify.VCol( cols=1 ): vuetify.VSelect( label="Decomposition", items=( "decompositions", [] ), - v_model=( "decomposition", '' ) ) + v_model=( "decomposition", 'No: 0x0' ) ) with vuetify.VRow(): with vuetify.VCol( cols=8 ): From 89273f0d5f87111a9f7f1d3e1a564e4f7072a504 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 7 Jan 2026 13:09:26 +0100 Subject: [PATCH 55/70] simpler --- geos-trame/src/geos/trame/app/io/hpc_tools.py | 13 +++++--- .../src/geos/trame/app/io/simulation.py | 6 ++-- .../src/geos/trame/app/ui/simulation_view.py | 31 ++++++++++--------- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/hpc_tools.py b/geos-trame/src/geos/trame/app/io/hpc_tools.py index d8528c9a8..4d49f089e 100644 --- a/geos-trame/src/geos/trame/app/io/hpc_tools.py +++ b/geos-trame/src/geos/trame/app/io/hpc_tools.py @@ -54,12 +54,14 @@ def compute( n_unknowns: int, return [ { + 'id':1, 'nodes': n_nodes, 'ranks_per_node': ranks_per_node, 'total_ranks': n_nodes * ranks_per_node, 'unknowns_per_rank': n_unknowns // ( n_nodes * ranks_per_node ) }, { + 'id':2, 'nodes': n_nodes * 2, 'ranks_per_node': ranks_per_node // 2, 'total_ranks': n_nodes * ranks_per_node, @@ -72,9 +74,12 @@ def get_sd( self ) -> list[ dict ]: if self.job_type == 'cpu' and self.selected_cluster: #make it an enum self.sd = SuggestDecomposition.compute( self.n_unknowns, 64, self.selected_cluster.mem_per_node, self.selected_cluster.cores_per_node ) + self.sd = [{**item,'label': f"{self.selected_cluster.name} : {item['nodes']} x {item['ranks_per_node']}"} for item in self.sd ] else: self.sd = [ { + 'id': -1, + 'label':'No: 0x0', 'nodes': 0, 'ranks_per_node': 0, 'total_ranks': 0, @@ -86,7 +91,7 @@ def get_sd( self ) -> list[ dict ]: return self.sd - def to_list( self ) -> list[ str ]: - """Pretty printer to list of string for display in UI.""" - sd = self.get_sd() - return [ f"{self.selected_cluster.name} : {sd_item['nodes']} x {sd_item['ranks_per_node']}" for sd_item in sd ] + # def to_list( self ) -> list[ str ]: + # """Pretty printer to list of string for display in UI.""" + # sd = self.get_sd() + # return [ f"{self.selected_cluster.name} : {sd_item['nodes']} x {sd_item['ranks_per_node']}" for sd_item in sd ] diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index d9eb4293f..aecebed75 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -83,7 +83,7 @@ def run_try_login() -> None: def run_simulation() -> None: # if server.state.access_granted and server.state.sd and server.state.simulation_xml_filename: - if server.state.access_granted and server.state.simulation_xml_filename: + if server.state.access_granted and server.state.simulation_xml_filename and server.state.decomposition: if Authentificator.ssh_client: Authentificator._sftp_copy_tree( Authentificator.ssh_client, @@ -98,8 +98,8 @@ def run_simulation() -> None: input_file=[ item for item in server.state.simulation_xml_filename if item.get( 'type' ) == 'text/xml' ][ 0 ].get( 'name' ), - nodes=server.state.sd[ 'nodes' ], - ntasks=server.state.sd[ 'total_ranks' ], + nodes=server.state.decomposition[ 'nodes' ], + ntasks=server.state.decomposition[ 'total_ranks' ], geos_module=Authentificator.get_cluster( server.state.selected_cluster_name ).geos_module, geos_load_list=" ".join( Authentificator.get_cluster( server.state.selected_cluster_name ).geos_load_list ), diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 3b6b08bd4..553006355 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -76,7 +76,7 @@ def define_simulation_view( server: Server ) -> None: def on_cluster_change( selected_cluster_name: str, **_: Any ) -> None: print( selected_cluster_name ) server.state.decompositions = SuggestDecomposition( Authentificator.get_cluster( selected_cluster_name ), - server.state.nunknowns ).to_list() + server.state.nunknowns ).get_sd() server.state.simulation_remote_path = Authentificator.get_cluster( server.state.selected_cluster_name ).simulation_remote_path @@ -84,14 +84,12 @@ def on_cluster_change( selected_cluster_name: str, **_: Any ) -> None: server.state.simulation_dl_path = Authentificator.get_cluster( server.state.selected_cluster_name ).simulation_dl_default_path - @server.state.change( "decomposition" ) - def on_decomposition_selected( decomposition: str, **_: Any ) -> None: - ll = SuggestDecomposition( Authentificator.get_cluster( server.state.selected_cluster_name ), server.state.nunknowns ).get_sd() - # if server.state.decomposition: - try: - server.state.sd = ll[ server.state.decompositions.index( decomposition ) ] - except: - server.state.sd = { 'nodes': 0, 'total_ranks': 0 } + # @server.state.change( "decomposition" ) + # def on_decomposition_selected( decomposition: str, **_: Any ) -> None: + # = SuggestDecomposition( Authentificator.get_cluster( server.state.selected_cluster_name ), server.state.nunknowns ).get_sd() + # # if server.state.decomposition: + # except: + # server.state.sd = { 'nodes': 0, 'total_ranks': 0 } @server.state.change( "simulation_xml_temp" ) def on_temp_change( simulation_xml_temp: list, **_: Any ) -> None: @@ -104,10 +102,9 @@ def on_temp_change( simulation_xml_temp: list, **_: Any ) -> None: @server.state.change("nunknowns") def on_nunknowns_change( nunknowns : int , **_ : Any) -> None: #re-gen list - if len(server.state.decomposition) > 0: + if len(server.state.decompositions) > 0: server.state.decompositions = SuggestDecomposition( Authentificator.get_cluster( server.state.selected_cluster_name ), - nunknowns ).to_list() - server.state.decomposition = server.state.decompositions[0] + nunknowns ).get_sd() server.state.nunknowns = nunknowns @@ -185,7 +182,7 @@ def run_remove_jobfile( index_to_remove: int ) -> None: server.state.simulation_xml_filename = [] server.state.selected_cluster_names = [ cluster.name for cluster in Authentificator.sim_constants ] # server.state.decompositions = [] - server.state.sd = None + # server.state.sd = None vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) with vuetify.VCol( cols=1 ): @@ -196,13 +193,17 @@ def run_remove_jobfile( index_to_remove: int ) -> None: with vuetify.VCol( cols=1 ): vuetify.VSelect( label="Decomposition", items=( "decompositions", [] ), - v_model=( "decomposition", 'No: 0x0' ) ) + v_model=( "decomposition", None ), + item_title="label", + item_value="id", + return_object=True + ) with vuetify.VRow(): with vuetify.VCol( cols=8 ): vuetify.VTextField( v_model=( "key_path", - "~/.ssh/id_trame", + "/users/$USER/.ssh/id_trame", ), label="Path to ssh key", dense=True, From 50a8c10d467466df92cc1edad052d0e25e240dc6 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 7 Jan 2026 14:00:17 +0100 Subject: [PATCH 56/70] with internal mesh too --- geos-trame/src/geos/trame/app/io/hpc_tools.py | 2 +- .../src/geos/trame/app/ui/simulation_view.py | 44 +++++++++++++++---- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/hpc_tools.py b/geos-trame/src/geos/trame/app/io/hpc_tools.py index 4d49f089e..27a68bad1 100644 --- a/geos-trame/src/geos/trame/app/io/hpc_tools.py +++ b/geos-trame/src/geos/trame/app/io/hpc_tools.py @@ -15,7 +15,7 @@ def compute( n_unknowns: int, memory_per_unknown_bytes: int, node_memory_gb: int, cores_per_node: int, - min_unknowns_per_rank: int = 10000, + min_unknowns_per_rank: int = 10, strong_scaling: bool = True ) -> list[ dict ]: """Suggests node/rank distribution for a cluster computation. diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 553006355..673d291b1 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -43,14 +43,14 @@ } # helpers -def _what_solver(bcontent): +def _what_solver(bcontent) -> int: import xml.etree sim_xml = xml.etree.ElementTree.fromstring(bcontent['content']) - nunk = [solvers_to_unknowns.get(elt.tag,0) for elt in sim_xml.find('Solvers')] + nunk = [solvers_to_unknowns.get(elt.tag, 1) for elt in sim_xml.find('Solvers')] return max(nunk) -def _how_many_cells( bcontent ): +def _how_many_cells( bcontent ) -> tuple[int,int]: import vtk name = bcontent['name'] if name.endswith(".vtp"): @@ -68,6 +68,24 @@ def _how_many_cells( bcontent ): output = reader.GetOutput() return (output.GetNumberOfCells(), output.GetNumberOfPoints()) +def _has_internalMesh(bcontent) -> bool: + import xml.etree + sim_xml = xml.etree.ElementTree.fromstring(bcontent['content']) + return (sim_xml.find('Mesh/InternalMesh') is not None) + +def _what_internalMesh(bcontent) -> tuple[int,int]: + import xml.etree + import re + sim_xml = xml.etree.ElementTree.fromstring(bcontent['content']) + nx = sim_xml.find('Mesh/InternalMesh').get('nx') + nx = sum([int(el) for el in re.findall(r'-?\d+(?:\.\d+)?', nx)]) + ny = sim_xml.find('Mesh/InternalMesh').get('ny') + ny = sum([int(el) for el in re.findall(r'-?\d+(?:\.\d+)?', ny)]) + nz = sim_xml.find('Mesh/InternalMesh').get('nz') + nz = sum([int(el) for el in re.findall(r'-?\d+(?:\.\d+)?', nz)]) + return (nx*ny*nz, (nx+1)*(ny+1)*(nz+1)) + + #TODO a class from it def define_simulation_view( server: Server ) -> None: """Functional definition of UI elements.""" @@ -105,6 +123,7 @@ def on_nunknowns_change( nunknowns : int , **_ : Any) -> None: if len(server.state.decompositions) > 0: server.state.decompositions = SuggestDecomposition( Authentificator.get_cluster( server.state.selected_cluster_name ), nunknowns ).get_sd() + print(f'unknowns changed : {server.state.nunknowns} -> {nunknowns}') server.state.nunknowns = nunknowns @@ -114,20 +133,27 @@ def on_simfiles_change( simulation_xml_filename: list, **_: Any ) -> None: has_xml = list([True if file.get( "type", "" ) == 'text/xml' else False for file in simulation_xml_filename ]) - has_mesh = list([True if file.get( "name", "" ).endswith((".vtu",".vtm",".vtp")) else False + has_external_mesh = list([True if file.get( "name", "" ).endswith((".vtu",".vtm",".vtp")) else False for file in simulation_xml_filename ]) - server.state.is_valid_jobfiles = any(has_xml) - - if any(has_mesh) and any(has_xml): - for i,_ in enumerate(has_mesh): - if has_mesh[i]: + has_internal_mesh = False + for i,_ in enumerate(has_xml): + if has_xml[i]: + has_internal_mesh = _has_internalMesh(simulation_xml_filename[i]) + + if any(has_xml): + for i,_ in enumerate(has_xml): + if has_external_mesh[i]: nc, np = _how_many_cells(simulation_xml_filename[i]) elif has_xml[i]: uc, up = _what_solver(simulation_xml_filename[i]) + if has_internal_mesh: + nc,np = _what_internalMesh(simulation_xml_filename[i]) server.state.nunknowns = uc*nc + up*np + server.state.is_valid_jobfiles = any(has_xml) + def kill_job( index_to_remove: int ) -> None: # for now just check there is an xml jid = list( server.state.job_ids ) From bb2d3f8e0c1f69169dd5073ea34f442225bbcc19 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 7 Jan 2026 14:03:50 +0100 Subject: [PATCH 57/70] beautify --- geos-trame/src/geos/trame/app/ui/simulation_view.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 673d291b1..e38f975f9 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -92,7 +92,7 @@ def define_simulation_view( server: Server ) -> None: @server.state.change( "selected_cluster_name" ) def on_cluster_change( selected_cluster_name: str, **_: Any ) -> None: - print( selected_cluster_name ) + print( f"selecting {selected_cluster_name}" ) server.state.decompositions = SuggestDecomposition( Authentificator.get_cluster( selected_cluster_name ), server.state.nunknowns ).get_sd() @@ -158,7 +158,6 @@ def kill_job( index_to_remove: int ) -> None: # for now just check there is an xml jid = list( server.state.job_ids ) if 0 <= index_to_remove < len( jid ): - # 1. Supprimer l'élément de la copie de la liste removed_id = jid[ index_to_remove ][ 'job_id' ] Authentificator.kill_job( removed_id ) del jid[ index_to_remove ] From d5bc8e4d5ccb63943ae6b3526893963f12f5f2d0 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Wed, 7 Jan 2026 15:20:46 +0100 Subject: [PATCH 58/70] init paths before copy --- geos-trame/src/geos/trame/app/io/hpc_tools.py | 2 +- .../src/geos/trame/app/io/simulation.py | 102 ++++++++++-------- geos-trame/src/geos/trame/app/io/ssh_tools.py | 40 +++++-- .../src/geos/trame/app/ui/simulation_view.py | 1 - 4 files changed, 90 insertions(+), 55 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/hpc_tools.py b/geos-trame/src/geos/trame/app/io/hpc_tools.py index 27a68bad1..4d49f089e 100644 --- a/geos-trame/src/geos/trame/app/io/hpc_tools.py +++ b/geos-trame/src/geos/trame/app/io/hpc_tools.py @@ -15,7 +15,7 @@ def compute( n_unknowns: int, memory_per_unknown_bytes: int, node_memory_gb: int, cores_per_node: int, - min_unknowns_per_rank: int = 10, + min_unknowns_per_rank: int = 10000, strong_scaling: bool = True ) -> list[ dict ]: """Suggests node/rank distribution for a cluster computation. diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index aecebed75..a2ce049c0 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -85,6 +85,28 @@ def run_simulation() -> None: # if server.state.access_granted and server.state.sd and server.state.simulation_xml_filename: if server.state.access_granted and server.state.simulation_xml_filename and server.state.decomposition: if Authentificator.ssh_client: + # create remote path + try: + sftp = Authentificator.ssh_client.open_sftp() + sftp.stat( server.state.simulation_remote_path ) + except FileNotFoundError: + import posixpath + jpart = '/' + for part in server.state.simulation_remote_path.split('/')[1:]: + try: + jpart = posixpath.join(jpart,part) + sftp.stat( str(jpart) ) # exists? + except FileNotFoundError: + sftp.mkdir( str(jpart) ) + except PermissionError: + print( f"Permission error creating root folder at {jpart}") + raise + except: + print( f"Error creating root folder at {jpart}") + raise + + # create local path + os.makedirs(server.state.simulation_dl_path, exist_ok=True) Authentificator._sftp_copy_tree( Authentificator.ssh_client, Simulation.gen_tree( server.state.simulation_xml_filename ), @@ -158,52 +180,42 @@ def _start_result_streams( self ) -> None: def check_jobs( self ) -> None: """Check on running jobs and update their names and progresses.""" if Authentificator.ssh_client: - try: - jid = self._server.state.job_ids - for index, job in enumerate( jid ): - job_id = job[ 'job_id' ] + jid = self._server.state.job_ids + for index, job in enumerate( jid ): + job_id = job[ 'job_id' ] + _, sout, _ = Authentificator._execute_remote_command( + Authentificator.ssh_client, f'sacct -j {job_id} -o JobID,JobName,State --noheader' ) + job_line = sout.strip().split( "\n" )[ -1 ] + + jid[ index ][ 'status' ] = job_line.split()[ 2 ] + jid[ index ][ 'name' ] = job_line.split()[ 1 ] + + if ( jid[ index ][ 'status' ] == 'RUNNING' ): + _, sout, _ = Authentificator._execute_remote_command( + Authentificator.ssh_client, + f"sacct -j {job_id} -o ElapsedRaw,TimelimitRaw --noheader --parsable2 | head -n 1 " ) + progress_line = sout.strip().split( "|" ) + jid[ index ][ 'slprogress' ] = str( + float( progress_line[ 0 ] ) / float( progress_line[ 1 ] ) / 60 * 100 ) + + # getthe completed status + pattern = re.compile( r'\((\d+(?:\.\d+)?)%\s*completed\)' ) _, sout, _ = Authentificator._execute_remote_command( - Authentificator.ssh_client, f'sacct -j {job_id} -o JobID,JobName,State --noheader' ) - job_line = sout.strip().split( "\n" )[ -1 ] - - jid[ index ][ 'status' ] = job_line.split()[ 2 ] - jid[ index ][ 'name' ] = job_line.split()[ 1 ] - - if ( jid[ index ][ 'status' ] == 'RUNNING' ): - _, sout, _ = Authentificator._execute_remote_command( - Authentificator.ssh_client, - f"sacct -j {job_id} -o ElapsedRaw,TimelimitRaw --noheader --parsable2 | head -n 1 " ) - progress_line = sout.strip().split( "|" ) - jid[ index ][ 'slprogress' ] = str( - float( progress_line[ 0 ] ) / float( progress_line[ 1 ] ) / 60 * 100 ) - - # getthe completed status - pattern = re.compile( r'\((\d+(?:\.\d+)?)%\s*completed\)' ) - _, sout, _ = Authentificator._execute_remote_command( - Authentificator.ssh_client, - f"grep \"completed\" {self._server.state.simulation_remote_path}/job_GEOS_{job_id}.out | tail -1" - ) - m = pattern.search( sout.strip() ) - if m: - jid[ index ][ 'simprogress' ] = str( m.group( 1 ) ) - - print( - f"{job_line}-{job_id}\n job id:{jid[index]['job_id']}\n status:{jid[index]['status']}\n name:{jid[index]['name']} \n --- \n" + Authentificator.ssh_client, + f"grep \"completed\" {self._server.state.simulation_remote_path}/job_GEOS_{job_id}.out | tail -1" ) - self._server.state.job_ids = jid - self._server.state.dirty( "job_ids" ) - self._server.state.flush() - - except PermissionError as e: - print( f"Permission error: {e}" ) - except IOError as e: - print( f"Error accessing remote file or path: {e}" ) - except Exception as e: - print( f"An error occurred during SFTP: {e}" ) - - return None - else: - return None + m = pattern.search( sout.strip() ) + if m: + jid[ index ][ 'simprogress' ] = str( m.group( 1 ) ) + + print( + f"{job_line}-{job_id}\n job id:{jid[index]['job_id']}\n status:{jid[index]['status']}\n name:{jid[index]['name']} \n --- \n" + ) + self._server.state.job_ids = jid + self._server.state.dirty( "job_ids" ) + self._server.state.flush() + + return None @staticmethod def render_and_run( template_name: str, dest_name: str, server: Server, **kwargs: Any ) -> str: @@ -289,4 +301,6 @@ def gen_tree( xml_filename: Any ) -> dict: } } } + + print( f"Generated FILE_TREE: {FILE_TREE}" ) return FILE_TREE diff --git a/geos-trame/src/geos/trame/app/io/ssh_tools.py b/geos-trame/src/geos/trame/app/io/ssh_tools.py index 13c2bb9c6..77000f078 100644 --- a/geos-trame/src/geos/trame/app/io/ssh_tools.py +++ b/geos-trame/src/geos/trame/app/io/ssh_tools.py @@ -84,22 +84,34 @@ def dfs_tree( node: list | dict, path: str, sftp: paramiko.SFTPClient, remote_ro if isinstance( node, list ): for file in node: print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) - with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: - f.write( file.get( 'content' ) ) + try: + with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: + f.write( file.get( 'content' ) ) + except: + print( f"Error copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}") + raise elif isinstance( node, dict ): if "files" in node: files = node[ 'files' ] for file in files: print( f"copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}" ) - with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: - f.write( file.get( 'content' ) ) + try: + with sftp.file( str( rp / Path( file.get( 'name' ) ) ), 'w' ) as f: + f.write( file.get( 'content' ) ) + except: + print( f"Error copying {lp/Path(file.get('name'))} to {rp/Path(file.get('name'))}") + raise if "subfolders" in node: for subfolder, content in node[ "subfolders" ].items(): try: sftp.stat( str( rp / Path( subfolder ) ) ) except FileNotFoundError: - print( f"creating {rp/Path(subfolder)}" ) - sftp.mkdir( str( rp / Path( subfolder ) ) ) + try: + print( f"creating {rp/Path(subfolder)}" ) + sftp.mkdir( str( rp / Path( subfolder ) ) ) + except: + print( f"Error creating {rp/Path(subfolder)} on remote.") + raise Authentificator.dfs_tree( content, str( lp / Path( subfolder ) ), sftp, remote_root ) for folder, content in node.items(): @@ -108,7 +120,11 @@ def dfs_tree( node: list | dict, path: str, sftp: paramiko.SFTPClient, remote_ro sftp.stat( str( rp / Path( folder ) ) ) except FileNotFoundError: print( f"creating {rp/Path(folder)}" ) - sftp.mkdir( str( rp / Path( folder ) ) ) + try: + sftp.mkdir( str( rp / Path( folder ) ) ) + except: + print( f"Error creating {rp/Path(subfolder)} on remote.") + raise Authentificator.dfs_tree( content, str( lp / Path( folder ) ), sftp, remote_root ) @staticmethod @@ -226,9 +242,15 @@ def _execute_remote_command( client: paramiko.SSHClient, command: str ) -> tuple print( f"Command exited with status: {exit_status}" ) return ( exit_status, stdout_data, stderr_data ) - + + except PermissionError as e: + print( f"Permission error: {e}" ) + return ( -1, "", "" ) + except IOError as e: + print( f"Error accessing remote file or path: {e}" ) + return ( -1, "", "" ) except Exception as e: - print( f"Error executing command: {e}" ) + print( f"An error occurred during SFTP: {e}" ) return ( -1, "", "" ) @staticmethod diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index e38f975f9..e8bce6b46 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -207,7 +207,6 @@ def run_remove_jobfile( index_to_remove: int ) -> None: server.state.simulation_xml_filename = [] server.state.selected_cluster_names = [ cluster.name for cluster in Authentificator.sim_constants ] # server.state.decompositions = [] - # server.state.sd = None vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) with vuetify.VCol( cols=1 ): From 9a91327621a70934c74b8dc4f8a5c15bae5748d5 Mon Sep 17 00:00:00 2001 From: Jacques Franc <49998870+jafranc@users.noreply.github.com> Date: Thu, 8 Jan 2026 09:07:28 +0100 Subject: [PATCH 59/70] Add .geos to file type filter in simulation view --- geos-trame/src/geos/trame/app/ui/simulation_view.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index e8bce6b46..d2f2482bf 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -262,7 +262,7 @@ def run_remove_jobfile( index_to_remove: int ) -> None: hide_details=True, # clearable=True, multiple=True, - filter_by_type='.xml,.vtu,.vtm,.pvtu,.pvtm,.dat,.csv,.txt', + filter_by_type='.xml,.vtu,.vtm,.pvtu,.pvtm,.dat,.csv,.txt,.geos', # readonly=True, disabled=( "!access_granted", ) ) with vuetify.VCol( cols=4 ), vuetify.VList(): From 7405c369f5ba38a0adc780707dfcf56e6d8fa80f Mon Sep 17 00:00:00 2001 From: jacques franc Date: Thu, 8 Jan 2026 10:53:45 +0100 Subject: [PATCH 60/70] env and assets loading logique --- geos-trame/src/geos/trame/app/io/ssh_tools.py | 9 ++++++++- geos-trame/src/geos/trame/app/main.py | 17 ++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/ssh_tools.py b/geos-trame/src/geos/trame/app/io/ssh_tools.py index 77000f078..31fba6184 100644 --- a/geos-trame/src/geos/trame/app/io/ssh_tools.py +++ b/geos-trame/src/geos/trame/app/io/ssh_tools.py @@ -51,7 +51,14 @@ class Authentificator: ssh_client: Optional[ paramiko.SSHClient ] = None - sim_constants = [ + sim_constants = [] + # SimulationConstant( **item ) + # for item in json.load( open( f'{os.getenv("ASSETS_DIR")}/cluster.json', 'r' ) ) # noqa: SIM115 + # ] + + @staticmethod + def reload_simconstants(): + Authentificator.sim_constants = [ SimulationConstant( **item ) for item in json.load( open( f'{os.getenv("ASSETS_DIR")}/cluster.json', 'r' ) ) # noqa: SIM115 ] diff --git a/geos-trame/src/geos/trame/app/main.py b/geos-trame/src/geos/trame/app/main.py index 38e709594..4aa0e5cf6 100644 --- a/geos-trame/src/geos/trame/app/main.py +++ b/geos-trame/src/geos/trame/app/main.py @@ -3,7 +3,8 @@ # SPDX-FileContributor: Lionel Untereiner, Jacques Franc from pathlib import Path from typing import Any -from dotenv import load_dotenv, find_dotenv +from dotenv import load_dotenv +import os from trame.app import get_server # type: ignore from trame_server import Server @@ -12,9 +13,8 @@ sys.path.insert( 0, "/data/pau901/SIM_CS/users/jfranc/geosPythonPackages/geos-trame/src" ) #do not override if existing -assert load_dotenv( dotenv_path=Path( __file__ ).parent.parent / "assets/.env" ) from geos.trame.app.core import GeosTrame - +from geos.trame.app.io.ssh_tools import Authentificator def main( server: Server = None, **kwargs: Any ) -> None: """Main function.""" @@ -34,6 +34,17 @@ def main( server: Server = None, **kwargs: Any ) -> None: parser.add_argument( "-e", "--env", help="dot_env file" , required=False ) ( args, _unknown ) = parser.parse_known_args() + + if args.env: + assert load_dotenv( dotenv_path=Path(args.env) ) + else: + assert load_dotenv( dotenv_path=Path( __file__ ).parent.parent / "assets/.env" ) + + Authentificator.reload_simconstants() + + print(f"TEMPLATE_DIR .. {os.getenv('TEMPLATE_DIR')}") + print(f"ASSETS_DIR .. {os.getenv('ASSETS_DIR')}") + file_name = str( Path( args.input ).absolute() ) From a86428c2b030db497427f6c52b9b24d884a02114 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Thu, 8 Jan 2026 11:24:35 +0100 Subject: [PATCH 61/70] small fix --- geos-trame/src/geos/trame/app/io/simulation.py | 4 ++-- geos-trame/src/geos/trame/app/ui/simulation_view.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index a2ce049c0..7005f3a80 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -121,14 +121,14 @@ def run_simulation() -> None: item for item in server.state.simulation_xml_filename if item.get( 'type' ) == 'text/xml' ][ 0 ].get( 'name' ), nodes=server.state.decomposition[ 'nodes' ], - ntasks=server.state.decomposition[ 'total_ranks' ], + ntasks=server.state.decomposition[ 'nodes' ]*server.state.decomposition['ranks_per_node'], geos_module=Authentificator.get_cluster( server.state.selected_cluster_name ).geos_module, geos_load_list=" ".join( Authentificator.get_cluster( server.state.selected_cluster_name ).geos_load_list ), geos_path=Authentificator.get_cluster( server.state.selected_cluster_name ).geos_path, mem="0", comment_gr=server.state.slurm_comment, - partition='p4_dev', + partition='p4_general', account='myaccount' ) Simulation.render_and_run( 'p4_copyback.jinja', diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index d2f2482bf..e550e3553 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -46,7 +46,7 @@ def _what_solver(bcontent) -> int: import xml.etree sim_xml = xml.etree.ElementTree.fromstring(bcontent['content']) - nunk = [solvers_to_unknowns.get(elt.tag, 1) for elt in sim_xml.find('Solvers')] + nunk = [solvers_to_unknowns.get(elt.tag, (1,0)) for elt in sim_xml.find('Solvers')] return max(nunk) From aad6c24be3d0a9b6dd7f40f3ddb23102bba34969 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Mon, 19 Jan 2026 11:42:33 +0100 Subject: [PATCH 62/70] fine-grained conf --- .../src/geos/trame/app/io/jinja_t/p4_slurm.jinja | 2 +- .../geos/trame/app/io/jinja_t/pine_copyback.jinja | 2 +- .../geos/trame/app/io/jinja_t/pine_slurm.jinja | 5 +++-- geos-trame/src/geos/trame/app/io/simulation.py | 15 +++++++++------ geos-trame/src/geos/trame/app/io/ssh_tools.py | 2 ++ geos-trame/src/geos/trame/assets/cluster.json | 8 +++++++- 6 files changed, 23 insertions(+), 11 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/jinja_t/p4_slurm.jinja b/geos-trame/src/geos/trame/app/io/jinja_t/p4_slurm.jinja index 2bc3a36aa..dd15c075e 100644 --- a/geos-trame/src/geos/trame/app/io/jinja_t/p4_slurm.jinja +++ b/geos-trame/src/geos/trame/app/io/jinja_t/p4_slurm.jinja @@ -24,4 +24,4 @@ export EXEC={{ geos_path }} srun --mpi=pmix_v3 --hint=nomultithread \ -n {{ ntasks }} ${EXEC} \ -o Outputs_${SLURM_JOBID} \ - -i {{ input_file | default('geosDeck.xml') }} | tee log_${SLURM_JOBID}.out \ No newline at end of file + -i {{ input_file | default('geosDeck.xml') }} | tee log_${SLURM_JOBID}.out diff --git a/geos-trame/src/geos/trame/app/io/jinja_t/pine_copyback.jinja b/geos-trame/src/geos/trame/app/io/jinja_t/pine_copyback.jinja index ffdffe09f..e849c07d2 100644 --- a/geos-trame/src/geos/trame/app/io/jinja_t/pine_copyback.jinja +++ b/geos-trame/src/geos/trame/app/io/jinja_t/pine_copyback.jinja @@ -3,7 +3,7 @@ #SBATCH --ntasks={{ ntasks }} #SBATCH --partition={{ partition }} #SBATCH --comment={{ comment_gr }} -#SBACTH --account={{ account }} +#SBATCH --account={{ account }} #SBATCH --nodes={{ nodes }} #SBATCH --time={{ time | default('00:10:00') }} #SBATCH --mem={{ mem }} diff --git a/geos-trame/src/geos/trame/app/io/jinja_t/pine_slurm.jinja b/geos-trame/src/geos/trame/app/io/jinja_t/pine_slurm.jinja index 06f2d5ac7..0c47ae296 100644 --- a/geos-trame/src/geos/trame/app/io/jinja_t/pine_slurm.jinja +++ b/geos-trame/src/geos/trame/app/io/jinja_t/pine_slurm.jinja @@ -3,7 +3,7 @@ #SBATCH --ntasks={{ ntasks }} #SBATCH --partition={{ partition }} #SBATCH --comment={{ comment_gr }} -#SBACTH --account={{ account }} +#SBATCH --account={{ account }} #SBATCH --nodes={{ nodes }} #SBATCH --time={{ time | default('00:10:00') }} #SBATCH --mem={{ mem }} @@ -21,7 +21,8 @@ export HDF5_USE_FILE_LOCKING=FALSE export OMP_NUM_THREADS=1 export EXEC={{ geos_path }} +mkdir -p Outputs_${SLURM_JOBID} && touch log_${SLURM_JOBID}.out mpirun -mca coll_hcoll_enable 0 -x UCX_RNDV_THRESH=131072 \ -n {{ ntasks }} ${EXEC} \ -o Outputs_${SLURM_JOBID} \ - -i {{ input_file | default('geosDeck.xml') }} | tee Outputs_${SLURM_JOBID}/log_${SLURM_JOBID}.out \ No newline at end of file + -i {{ input_file | default('geosDeck.xml') }} | tee Outputs_${SLURM_JOBID}/log_${SLURM_JOBID}.out diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 7005f3a80..6046116c5 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -112,8 +112,11 @@ def run_simulation() -> None: Simulation.gen_tree( server.state.simulation_xml_filename ), server.state.simulation_remote_path ) + cluster_name = Authentificator.get_cluster( server.state.selected_cluster_name ).name + cluster_part = Authentificator.get_cluster( server.state.selected_cluster_name ).partition + cluster_trans_part = Authentificator.get_cluster( server.state.selected_cluster_name ).partition_transfert run_id: int = Simulation.render_and_run( - 'p4_slurm.jinja', + f'{cluster_name}_slurm.jinja', 'job.slurm', server, job_name=server.state.simulation_job_name, @@ -128,10 +131,10 @@ def run_simulation() -> None: geos_path=Authentificator.get_cluster( server.state.selected_cluster_name ).geos_path, mem="0", comment_gr=server.state.slurm_comment, - partition='p4_general', - account='myaccount' ) + partition=cluter_part, + account=server.state.slurm_comment) - Simulation.render_and_run( 'p4_copyback.jinja', + Simulation.render_and_run( f'{cluster_name}_copyback.jinja', 'copyback.slurm', server, job_name=server.state.simulation_job_name, @@ -145,8 +148,8 @@ def run_simulation() -> None: dep_job_id=run_id, target_dl_path=server.state.simulation_dl_path, comment_gr=server.state.slurm_comment, - partition='p4_transfer', - account='myaccount' ) + partition=cluster_trans_part, + account=server.stat.slurm_comment ) self._start_result_streams() diff --git a/geos-trame/src/geos/trame/app/io/ssh_tools.py b/geos-trame/src/geos/trame/app/io/ssh_tools.py index 31fba6184..d1aa5716a 100644 --- a/geos-trame/src/geos/trame/app/io/ssh_tools.py +++ b/geos-trame/src/geos/trame/app/io/ssh_tools.py @@ -14,6 +14,8 @@ class SimulationConstant: name: str host: str + partition: str + partition_tranfert: str port: int geos_path: str geos_module: str diff --git a/geos-trame/src/geos/trame/assets/cluster.json b/geos-trame/src/geos/trame/assets/cluster.json index 55e037ddc..5097c3b94 100644 --- a/geos-trame/src/geos/trame/assets/cluster.json +++ b/geos-trame/src/geos/trame/assets/cluster.json @@ -2,6 +2,8 @@ { "name": "p4", "host": "p4log01", + "partition":"p4_general", + "partition_transfert":"p4_transfert", "port": 22, "geos_path":"/workrd/users/$USER/GEOS/build-spack-generated-debug/bin/geosx", "geos_module":"/workrd/users/$USER/modulesRHEL88", @@ -18,8 +20,10 @@ { "name": "pine", "host": "pine-1", + "partition":"pine", + "partition_transfert":"pine", "port": 22, - "geos_path":"/workrd/users/$USER/GEOS/build-spack-generated-debug/bin/geosx", + "geos_path":"/shared/data1/Users/$USER/codes/GEOS-2025-11-03/build-pine-1.pine.cluster-linux-rocky9-zen4-gcc@11.4.1-release/bin/geosx", "geos_module":"/apps/modules/modulefiles3", "geos_load_list":["genesis","common","proxy","slurm","gcc/11.4.1","openmpi-gcc/5.0.5","cmake/3.27.9"], "remote_home_base": "/home/$USER", @@ -34,6 +38,8 @@ { "name": "local", "host": "127.0.0.1", + "partition": "debug", + "partition_transfert": "debug", "port": 22, "geos_path":"/opt/GEOS/build-spack-generated-debug/bin/geosx", "geos_module":"/workrd/users/$USER/geos-generated", From ea923b49e7f2f79c95919878a979c1e87c7539a5 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Mon, 19 Jan 2026 12:01:40 +0100 Subject: [PATCH 63/70] typo --- geos-trame/src/geos/trame/app/io/ssh_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geos-trame/src/geos/trame/app/io/ssh_tools.py b/geos-trame/src/geos/trame/app/io/ssh_tools.py index d1aa5716a..0bff770e9 100644 --- a/geos-trame/src/geos/trame/app/io/ssh_tools.py +++ b/geos-trame/src/geos/trame/app/io/ssh_tools.py @@ -15,7 +15,7 @@ class SimulationConstant: name: str host: str partition: str - partition_tranfert: str + partition_transfert: str port: int geos_path: str geos_module: str From b6d64ca45eb0e9fd19b5d7658390f28c50532ffd Mon Sep 17 00:00:00 2001 From: jacques franc Date: Mon, 19 Jan 2026 13:39:17 +0100 Subject: [PATCH 64/70] typos --- geos-trame/src/geos/trame/app/io/simulation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/geos-trame/src/geos/trame/app/io/simulation.py b/geos-trame/src/geos/trame/app/io/simulation.py index 6046116c5..cd1df1bd1 100644 --- a/geos-trame/src/geos/trame/app/io/simulation.py +++ b/geos-trame/src/geos/trame/app/io/simulation.py @@ -131,7 +131,7 @@ def run_simulation() -> None: geos_path=Authentificator.get_cluster( server.state.selected_cluster_name ).geos_path, mem="0", comment_gr=server.state.slurm_comment, - partition=cluter_part, + partition=cluster_part, account=server.state.slurm_comment) Simulation.render_and_run( f'{cluster_name}_copyback.jinja', @@ -149,7 +149,7 @@ def run_simulation() -> None: target_dl_path=server.state.simulation_dl_path, comment_gr=server.state.slurm_comment, partition=cluster_trans_part, - account=server.stat.slurm_comment ) + account=server.state.slurm_comment ) self._start_result_streams() From 6f1a0a55bbac8634ec2ea98df28114922b3d3cde Mon Sep 17 00:00:00 2001 From: jacques franc Date: Mon, 19 Jan 2026 13:57:28 +0100 Subject: [PATCH 65/70] rename transfer partition --- geos-trame/src/geos/trame/assets/cluster.json | 118 ++++++++++-------- 1 file changed, 65 insertions(+), 53 deletions(-) diff --git a/geos-trame/src/geos/trame/assets/cluster.json b/geos-trame/src/geos/trame/assets/cluster.json index 5097c3b94..221dd11d1 100644 --- a/geos-trame/src/geos/trame/assets/cluster.json +++ b/geos-trame/src/geos/trame/assets/cluster.json @@ -1,56 +1,68 @@ [ { - "name": "p4", - "host": "p4log01", - "partition":"p4_general", - "partition_transfert":"p4_transfert", - "port": 22, - "geos_path":"/workrd/users/$USER/GEOS/build-spack-generated-debug/bin/geosx", - "geos_module":"/workrd/users/$USER/modulesRHEL88", - "geos_load_list":["geos-daily-rhel88"], - "remote_home_base": "/users/$USER", - "simulation_default_filename": "geosDeck.xml", - "simulation_remote_path": "/workrd/users/$USER/Example", - "simulation_dl_default_path": "/users/$USER/Example", - "simulation_information_default_path": "/users/$USER/.trame-logs", - "n_nodes": 212, - "cores_per_node" : 192, - "mem_per_node": 747 -}, + "name": "p4", + "host": "p4log01", + "partition": "p4_general", + "partition_transfert": "p4_transfer", + "port": 22, + "geos_path": "/workrd/users/$USER/GEOS/build-spack-generated-debug/bin/geosx", + "geos_module": "/workrd/users/$USER/modulesRHEL88", + "geos_load_list": [ + "geos-daily-rhel88" + ], + "remote_home_base": "/users/$USER", + "simulation_default_filename": "geosDeck.xml", + "simulation_remote_path": "/workrd/users/$USER/Example", + "simulation_dl_default_path": "/users/$USER/Example", + "simulation_information_default_path": "/users/$USER/.trame-logs", + "n_nodes": 212, + "cores_per_node": 192, + "mem_per_node": 747 + }, { - "name": "pine", - "host": "pine-1", - "partition":"pine", - "partition_transfert":"pine", - "port": 22, - "geos_path":"/shared/data1/Users/$USER/codes/GEOS-2025-11-03/build-pine-1.pine.cluster-linux-rocky9-zen4-gcc@11.4.1-release/bin/geosx", - "geos_module":"/apps/modules/modulefiles3", - "geos_load_list":["genesis","common","proxy","slurm","gcc/11.4.1","openmpi-gcc/5.0.5","cmake/3.27.9"], - "remote_home_base": "/home/$USER", - "simulation_default_filename": "geosDeck.xml", - "simulation_remote_path": "/shared/data1/Users/$USER/Example", - "simulation_dl_default_path": "/shared/data1/Users/$USER/Example", - "simulation_information_default_path": "/home/$USER/.trame-logs", - "n_nodes": 48, - "cores_per_node" : 64, - "mem_per_node": 768 -}, - { - "name": "local", - "host": "127.0.0.1", - "partition": "debug", - "partition_transfert": "debug", - "port": 22, - "geos_path":"/opt/GEOS/build-spack-generated-debug/bin/geosx", - "geos_module":"/workrd/users/$USER/geos-generated", - "geos_load_list":["geos-toolchains"], - "remote_home_base": "/home/$USER", - "simulation_default_filename": "geosDeck.xml", - "simulation_remote_path": "/work/", - "simulation_dl_default_path": "/data/", - "simulation_information_default_path": "/home/.trame-logs", - "n_nodes": 1, - "cores_per_node" : 8, - "mem_per_node": 32 -} -] + "name": "pine", + "host": "pine-1", + "partition": "pine", + "partition_transfert": "pine", + "port": 22, + "geos_path": "/shared/data1/Users/$USER/codes/GEOS-2025-11-03/build-pine-1.pine.cluster-linux-rocky9-zen4-gcc@11.4.1-release/bin/geosx", + "geos_module": "/apps/modules/modulefiles3", + "geos_load_list": [ + "genesis", + "common", + "proxy", + "slurm", + "gcc/11.4.1", + "openmpi-gcc/5.0.5", + "cmake/3.27.9" + ], + "remote_home_base": "/home/$USER", + "simulation_default_filename": "geosDeck.xml", + "simulation_remote_path": "/shared/data1/Users/$USER/Example", + "simulation_dl_default_path": "/shared/data1/Users/$USER/Example", + "simulation_information_default_path": "/home/$USER/.trame-logs", + "n_nodes": 48, + "cores_per_node": 64, + "mem_per_node": 768 + }, + { + "name": "local", + "host": "127.0.0.1", + "partition": "debug", + "partition_transfert": "debug", + "port": 22, + "geos_path": "/opt/GEOS/build-spack-generated-debug/bin/geosx", + "geos_module": "/workrd/users/$USER/geos-generated", + "geos_load_list": [ + "geos-toolchains" + ], + "remote_home_base": "/home/$USER", + "simulation_default_filename": "geosDeck.xml", + "simulation_remote_path": "/work/", + "simulation_dl_default_path": "/data/", + "simulation_information_default_path": "/home/.trame-logs", + "n_nodes": 1, + "cores_per_node": 8, + "mem_per_node": 32 + } +] \ No newline at end of file From 924fdca55e48eb89abb6d2d5902e21573e67944d Mon Sep 17 00:00:00 2001 From: jacques franc Date: Thu, 5 Feb 2026 14:19:54 +0100 Subject: [PATCH 66/70] fixing imporper eval of num unk --- geos-trame/src/geos/trame/app/ui/simulation_view.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index e550e3553..423545e61 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -142,6 +142,7 @@ def on_simfiles_change( simulation_xml_filename: list, **_: Any ) -> None: has_internal_mesh = _has_internalMesh(simulation_xml_filename[i]) if any(has_xml): + uc = up = nc = np = None for i,_ in enumerate(has_xml): if has_external_mesh[i]: nc, np = _how_many_cells(simulation_xml_filename[i]) @@ -150,7 +151,8 @@ def on_simfiles_change( simulation_xml_filename: list, **_: Any ) -> None: if has_internal_mesh: nc,np = _what_internalMesh(simulation_xml_filename[i]) - server.state.nunknowns = uc*nc + up*np + if all(i is not None for i in (uc,nc,up,np)): + server.state.nunknowns = uc*nc + up*np server.state.is_valid_jobfiles = any(has_xml) From 9576aadbb937761d91ee6ff377174be6fe238744 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Fri, 27 Feb 2026 11:07:30 +0100 Subject: [PATCH 67/70] .vtk for wells meshes --- geos-trame/src/geos/trame/app/ui/simulation_view.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 423545e61..54b8ea6db 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -264,7 +264,7 @@ def run_remove_jobfile( index_to_remove: int ) -> None: hide_details=True, # clearable=True, multiple=True, - filter_by_type='.xml,.vtu,.vtm,.pvtu,.pvtm,.dat,.csv,.txt,.geos', + filter_by_type='.xml,.vtu,.vtm,.pvtu,.pvtm,.dat,.csv,.txt,.geos,.vtk', # readonly=True, disabled=( "!access_granted", ) ) with vuetify.VCol( cols=4 ), vuetify.VList(): From 48fd308551ca956888095a7904ab4970381170b0 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Fri, 27 Feb 2026 11:10:18 +0100 Subject: [PATCH 68/70] all req - wip --- geos-trame/src/geos/trame/app/ui/simulation_view.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 54b8ea6db..e0b358fee 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -155,6 +155,7 @@ def on_simfiles_change( simulation_xml_filename: list, **_: Any ) -> None: server.state.nunknowns = uc*nc + up*np server.state.is_valid_jobfiles = any(has_xml) + server.state.all_req_files = all(req_files) def kill_job( index_to_remove: int ) -> None: # for now just check there is an xml @@ -310,7 +311,7 @@ def run_remove_jobfile( index_to_remove: int ) -> None: with vuetify.VCol( cols=1 ): vuetify.VBtn( "Run", click="trigger('run_simulation')", - disabled=( "!is_valid_jobfiles", ), + disabled=( "!is_valid_jobfiles && !all_req_files", ), classes="ml-auto" ), # type: ignore vuetify.VDivider( thickness=5, classes="my-4" ) From ce37eefb29384c21b69fb9fe327699b023f85b92 Mon Sep 17 00:00:00 2001 From: jacques franc Date: Thu, 7 May 2026 15:18:58 +0200 Subject: [PATCH 69/70] add list of req files before run --- geos-trame/pyproject.toml | 1 - geos-trame/src/geos/trame/app/main.py | 3 - .../src/geos/trame/app/ui/simulation_view.py | 72 +++++++++++++++++-- 3 files changed, 66 insertions(+), 10 deletions(-) diff --git a/geos-trame/pyproject.toml b/geos-trame/pyproject.toml index 347ee53b3..f81870e69 100644 --- a/geos-trame/pyproject.toml +++ b/geos-trame/pyproject.toml @@ -31,7 +31,6 @@ keywords = [ dependencies = [ "setuptools", - "typing-extensions==4.12.2", "trame==3.6.5", "trame-vuetify==3.1.0", "trame-code==1.0.1", diff --git a/geos-trame/src/geos/trame/app/main.py b/geos-trame/src/geos/trame/app/main.py index 4aa0e5cf6..1fe527e74 100644 --- a/geos-trame/src/geos/trame/app/main.py +++ b/geos-trame/src/geos/trame/app/main.py @@ -9,9 +9,6 @@ from trame.app import get_server # type: ignore from trame_server import Server -import sys -sys.path.insert( 0, "/data/pau901/SIM_CS/users/jfranc/geosPythonPackages/geos-trame/src" ) - #do not override if existing from geos.trame.app.core import GeosTrame from geos.trame.app.io.ssh_tools import Authentificator diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index e0b358fee..7db432849 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -90,6 +90,18 @@ def _what_internalMesh(bcontent) -> tuple[int,int]: def define_simulation_view( server: Server ) -> None: """Functional definition of UI elements.""" + @server.state.change("other_widget_selected_file") + def on_other_widget_file_ready(other_widget_selected_file: dict, **_: Any) -> None: + if not other_widget_selected_file: + return + + current = list(server.state.simulation_xml_filename) + existing_names = {f.get("name") for f in current} + + if other_widget_selected_file.get("name") not in existing_names: + current.append(other_widget_selected_file) + server.state.simulation_xml_filename = current + @server.state.change( "selected_cluster_name" ) def on_cluster_change( selected_cluster_name: str, **_: Any ) -> None: print( f"selecting {selected_cluster_name}" ) @@ -109,11 +121,12 @@ def on_cluster_change( selected_cluster_name: str, **_: Any ) -> None: # except: # server.state.sd = { 'nodes': 0, 'total_ranks': 0 } - @server.state.change( "simulation_xml_temp" ) - def on_temp_change( simulation_xml_temp: list, **_: Any ) -> None: + @server.state.change("simulation_xml_temp") + def on_temp_change(simulation_xml_temp: list, **_: Any) -> None: current_list = server.state.simulation_xml_filename - new_list = current_list + simulation_xml_temp + + server.state.simulation_xml_filename = new_list server.state.simulation_xml_temp = [] @@ -143,6 +156,8 @@ def on_simfiles_change( simulation_xml_filename: list, **_: Any ) -> None: if any(has_xml): uc = up = nc = np = None + # compute unknowns and cells only for xml files, if external mesh do not take into account internal mesh info even if present, if no external mesh try to take into account internal mesh info if present + # useful for decomposition suggestion for i,_ in enumerate(has_xml): if has_external_mesh[i]: nc, np = _how_many_cells(simulation_xml_filename[i]) @@ -152,10 +167,40 @@ def on_simfiles_change( simulation_xml_filename: list, **_: Any ) -> None: nc,np = _what_internalMesh(simulation_xml_filename[i]) if all(i is not None for i in (uc,nc,up,np)): - server.state.nunknowns = uc*nc + up*np + server.state.nunknowns = uc*nc + up*np + + if any(has_xml): + xml_pattern = re.compile(r"\.xml$", re.IGNORECASE) + mesh_pattern = re.compile(r"\.(vtu|vtm|pvtu|pvtm)$", re.IGNORECASE) + table_pattern = re.compile(r"\.(txt|dat|csv|geos)$", re.IGNORECASE) + + xml_matches, mesh_matches, table_matches = [], [], [] + + pattern_file = r"[\w\-.]+\.(?:vtu|pvtu|dat|txt|xml|geos)\b" + + # Fix: use enumerate instead of .index() to handle duplicates safely + for i, file in enumerate(simulation_xml_filename): + if not has_xml[i]: + continue + name = file.get("name", "") + if xml_pattern.search(name): + xml_matches.append(file) + elif mesh_pattern.search(name): + mesh_matches.append(file) + elif table_pattern.search(name): + table_matches.append(file) + + if xml_matches: + already_have = {file.get("name", "") for file in simulation_xml_filename} + required = set(re.findall(pattern_file, xml_matches[0]['content'].decode("utf-8"))) + required -= already_have + # Fix: store as list of dicts so the UI can use {{ file.name }} + server.state.simulation_xml_required = [{"name": f} for f in sorted(required)] + else: + server.state.simulation_xml_required = [] server.state.is_valid_jobfiles = any(has_xml) - server.state.all_req_files = all(req_files) + server.state.all_req_files = any(has_xml) and len(server.state.simulation_xml_required) == 0 def kill_job( index_to_remove: int ) -> None: # for now just check there is an xml @@ -208,9 +253,11 @@ def run_remove_jobfile( index_to_remove: int ) -> None: server.state.access_granted = False server.state.is_valid_jobfiles = False server.state.simulation_xml_filename = [] + server.state.simulation_xml_required = [] server.state.selected_cluster_names = [ cluster.name for cluster in Authentificator.sim_constants ] # server.state.decompositions = [] + # --------------------------- auth block -----------------------# vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) with vuetify.VCol( cols=1 ): vuetify.VSelect( label="Cluster", @@ -254,6 +301,7 @@ def run_remove_jobfile( index_to_remove: int ) -> None: clearable=True, ) # type: ignore + # --------------------------- simulation block -----------------------# vuetify.VDivider( thickness=5, classes="my-4" ) with vuetify.VRow(): @@ -276,6 +324,17 @@ def run_remove_jobfile( index_to_remove: int ) -> None: click=( run_remove_jobfile, "[i]" ) ): vuetify.VListItemTitle( "{{ file.name }}" ) vuetify.VListItemSubtitle( "{{ file.size ? (file.size / 1024).toFixed(1) + ' KB' : 'URL' }}" ) + vuetify.VDivider( thickness=2, classes="my-2" ) + + with vuetify.VListItem( v_for=( "(file,i) in simulation_xml_required" ), + key="i", + value="file", + classes="bg-red-lighten-4 text-red-darken-4", + # base_color="red-lighten-4", + # style="background-color: rgb(var(--v-theme-error-lighten-4));", + prepend_icon="mdi-alert-circle-outline" ): + vuetify.VListItemTitle( "{{ file.name }} (required)" ) + with vuetify.VRow(), vuetify.VCol(): vuetify.VTextField( v_model=( "simulation_remote_path", None ), @@ -311,9 +370,10 @@ def run_remove_jobfile( index_to_remove: int ) -> None: with vuetify.VCol( cols=1 ): vuetify.VBtn( "Run", click="trigger('run_simulation')", - disabled=( "!is_valid_jobfiles && !all_req_files", ), + disabled=( "!is_valid_jobfiles || !all_req_files", ), classes="ml-auto" ), # type: ignore + # ------------------------------- Status block ----------------------------- # vuetify.VDivider( thickness=5, classes="my-4" ) with vuetify.VRow(): From 69f81d2425b493f6d3979571bde37e961cedd0af Mon Sep 17 00:00:00 2001 From: jacques franc Date: Thu, 7 May 2026 17:31:50 +0200 Subject: [PATCH 70/70] mypying --- .../src/geos/trame/app/ui/simulation_view.py | 289 +++++++++--------- 1 file changed, 150 insertions(+), 139 deletions(-) diff --git a/geos-trame/src/geos/trame/app/ui/simulation_view.py b/geos-trame/src/geos/trame/app/ui/simulation_view.py index 7db432849..6df604221 100644 --- a/geos-trame/src/geos/trame/app/ui/simulation_view.py +++ b/geos-trame/src/geos/trame/app/ui/simulation_view.py @@ -16,103 +16,117 @@ # unknowns (oncell,onpoint) # for now do not take into account wells as dep on the num of wells (neg vs matrix elmts) # for now do not take into account frac as dep on the num of frac elmts (prob neg vs matrix elmts) -solvers_to_unknowns = { - "CompositionalMultiphaseFVM" : (3, 0), - "CompositionalMultiphaseHybridFVM" : (4, 0), - "CompositionalMultiphaseReservoirPoromechanics" : (3,3), - "CompositionalMultiphaseReservoirPoromechanicsConformingFractures" : (3,6), - "CompositionalMultiphaseWell" : (3,0), - "ElasticFirstOrderSEM" : (0,3), - "ElasticSEM" : (0,3), - "ImmiscibleMultiphaseFlow": (3,0), - "LaplaceFEM" : (0,3), - "MultiphasePoromechanics" : (3,3), - "MultiphasePoromechanicsReservoir" : (3,3),#?? - "MultiphasePoromechanicsConformingFractures" : (3,6) , - "SinglePhaseFVM" : (2,0), - "SinglePhaseHybridFVM" : (3,0), - "SinglePhasePoromechanics" : (2,3), - "SinglePhasePoromechanicsConformingFractures" : (2,3), - "SinglePhasePoromechanicsConformingFracturesALM" : (2,3), - "SinglePhaseWell" : (2,0), - "SolidMechanicsEmbeddedFractures": (0,3), - "SolidMechanicsAugmentedLagrangianContact": (0,3), - "SolidMechanicsLagrangeContact": (0,3), - "SolidMechanicsLagrangeContactBubbleStab": (0,3), - "SolidMechanicsLagrangianFEM": (0,3) +solvers_to_unknowns = { + "CompositionalMultiphaseFVM": ( 3, 0 ), + "CompositionalMultiphaseHybridFVM": ( 4, 0 ), + "CompositionalMultiphaseReservoirPoromechanics": ( 3, 3 ), + "CompositionalMultiphaseReservoirPoromechanicsConformingFractures": ( 3, 6 ), + "CompositionalMultiphaseWell": ( 3, 0 ), + "ElasticFirstOrderSEM": ( 0, 3 ), + "ElasticSEM": ( 0, 3 ), + "ImmiscibleMultiphaseFlow": ( 3, 0 ), + "LaplaceFEM": ( 0, 3 ), + "MultiphasePoromechanics": ( 3, 3 ), + "MultiphasePoromechanicsReservoir": ( 3, 3 ), #?? + "MultiphasePoromechanicsConformingFractures": ( 3, 6 ), + "SinglePhaseFVM": ( 2, 0 ), + "SinglePhaseHybridFVM": ( 3, 0 ), + "SinglePhasePoromechanics": ( 2, 3 ), + "SinglePhasePoromechanicsConformingFractures": ( 2, 3 ), + "SinglePhasePoromechanicsConformingFracturesALM": ( 2, 3 ), + "SinglePhaseWell": ( 2, 0 ), + "SolidMechanicsEmbeddedFractures": ( 0, 3 ), + "SolidMechanicsAugmentedLagrangianContact": ( 0, 3 ), + "SolidMechanicsLagrangeContact": ( 0, 3 ), + "SolidMechanicsLagrangeContactBubbleStab": ( 0, 3 ), + "SolidMechanicsLagrangianFEM": ( 0, 3 ) } - # helpers -def _what_solver(bcontent) -> int: - import xml.etree - sim_xml = xml.etree.ElementTree.fromstring(bcontent['content']) - nunk = [solvers_to_unknowns.get(elt.tag, (1,0)) for elt in sim_xml.find('Solvers')] - return max(nunk) - - -def _how_many_cells( bcontent ) -> tuple[int,int]: - import vtk - name = bcontent['name'] - if name.endswith(".vtp"): - reader = vtk.vtkXMLPolyDataReader() - elif name.endswith(".vtu"): - reader = vtk.vtkXMLUnstructuredGridReader() - elif name.endswith(".vtm"): - reader = vtk.vtkXMLMultiBlockDataReader() - else: - raise ValueError("Unsupported kind (use 'vtp', 'vtu', or 'vtm').") - reader.SetReadFromInputString(1) - reader.SetInputString(bcontent['content']) - reader.Update() - output = reader.GetOutput() - return (output.GetNumberOfCells(), output.GetNumberOfPoints()) +# helpers +def _what_solver( bcontent: dict ) -> tuple[ int, int ]: + from xml.etree.ElementTree import Element, fromstring + sim_xml: Element = fromstring( bcontent[ 'content' ] ) + solver = sim_xml.find( 'Solvers' ) + nunk: list[ tuple[ int, int ] ] = [ solvers_to_unknowns.get( elt.tag, ( 1, 0 ) ) + for elt in solver ] if solver else [ ( 0, 0 ) ] + return max( nunk ) -def _has_internalMesh(bcontent) -> bool: - import xml.etree - sim_xml = xml.etree.ElementTree.fromstring(bcontent['content']) - return (sim_xml.find('Mesh/InternalMesh') is not None) -def _what_internalMesh(bcontent) -> tuple[int,int]: - import xml.etree - import re - sim_xml = xml.etree.ElementTree.fromstring(bcontent['content']) - nx = sim_xml.find('Mesh/InternalMesh').get('nx') - nx = sum([int(el) for el in re.findall(r'-?\d+(?:\.\d+)?', nx)]) - ny = sim_xml.find('Mesh/InternalMesh').get('ny') - ny = sum([int(el) for el in re.findall(r'-?\d+(?:\.\d+)?', ny)]) - nz = sim_xml.find('Mesh/InternalMesh').get('nz') - nz = sum([int(el) for el in re.findall(r'-?\d+(?:\.\d+)?', nz)]) - return (nx*ny*nz, (nx+1)*(ny+1)*(nz+1)) +def _how_many_cells( bcontent: dict ) -> tuple[ int, int ]: + import vtk + name = bcontent[ 'name' ] + if name.endswith( ".vtp" ): + reader = vtk.vtkXMLPolyDataReader() + elif name.endswith( ".vtu" ): + reader = vtk.vtkXMLUnstructuredGridReader() + elif name.endswith( ".vtm" ): + reader = vtk.vtkXMLMultiBlockDataReader() + else: + raise ValueError( "Unsupported kind (use 'vtp', 'vtu', or 'vtm')." ) + + reader.SetReadFromInputString( 1 ) + reader.SetInputString( bcontent[ 'content' ] ) + reader.Update() + output = reader.GetOutput() + return ( output.GetNumberOfCells(), output.GetNumberOfPoints() ) + + +def _has_internalMesh( bcontent: dict ) -> bool: + from xml.etree.ElementTree import Element, fromstring + sim_xml: Element = fromstring( bcontent[ 'content' ] ) + return bool( sim_xml.find( 'Mesh/InternalMesh' ) is not None ) + + +def _what_internalMesh( bcontent: dict ) -> tuple[ int, int ]: + from xml.etree.ElementTree import Element, fromstring + import re + sim_xml: Element = fromstring( bcontent[ 'content' ] ) + + mesh = sim_xml.find( 'Mesh/InternalMesh' ) + + def _parse_sum( value: str | None ) -> int: + if value is None: + return 0 + return sum( int( el ) for el in re.findall( r'-?\d+(?:\.\d+)?', value ) ) + + if mesh is None: + nx = ny = nz = 0 + else: + nx = _parse_sum( mesh.get( 'nx' ) ) + ny = _parse_sum( mesh.get( 'ny' ) ) + nz = _parse_sum( mesh.get( 'nz' ) ) + + return ( nx * ny * nz, ( nx + 1 ) * ( ny + 1 ) * ( nz + 1 ) ) #TODO a class from it def define_simulation_view( server: Server ) -> None: """Functional definition of UI elements.""" - @server.state.change("other_widget_selected_file") - def on_other_widget_file_ready(other_widget_selected_file: dict, **_: Any) -> None: + @server.state.change( "other_widget_selected_file" ) + def on_other_widget_file_ready( other_widget_selected_file: dict, **_: Any ) -> None: if not other_widget_selected_file: return - current = list(server.state.simulation_xml_filename) - existing_names = {f.get("name") for f in current} + current = list( server.state.simulation_xml_filename ) + existing_names = { f.get( "name" ) for f in current } - if other_widget_selected_file.get("name") not in existing_names: - current.append(other_widget_selected_file) + if other_widget_selected_file.get( "name" ) not in existing_names: + current.append( other_widget_selected_file ) server.state.simulation_xml_filename = current - + @server.state.change( "selected_cluster_name" ) def on_cluster_change( selected_cluster_name: str, **_: Any ) -> None: print( f"selecting {selected_cluster_name}" ) server.state.decompositions = SuggestDecomposition( Authentificator.get_cluster( selected_cluster_name ), server.state.nunknowns ).get_sd() - + server.state.simulation_remote_path = Authentificator.get_cluster( - server.state.selected_cluster_name ).simulation_remote_path - + server.state.selected_cluster_name ).simulation_remote_path + server.state.simulation_dl_path = Authentificator.get_cluster( - server.state.selected_cluster_name ).simulation_dl_default_path + server.state.selected_cluster_name ).simulation_dl_default_path # @server.state.change( "decomposition" ) # def on_decomposition_selected( decomposition: str, **_: Any ) -> None: @@ -121,87 +135,85 @@ def on_cluster_change( selected_cluster_name: str, **_: Any ) -> None: # except: # server.state.sd = { 'nodes': 0, 'total_ranks': 0 } - @server.state.change("simulation_xml_temp") - def on_temp_change(simulation_xml_temp: list, **_: Any) -> None: + @server.state.change( "simulation_xml_temp" ) + def on_temp_change( simulation_xml_temp: list, **_: Any ) -> None: current_list = server.state.simulation_xml_filename new_list = current_list + simulation_xml_temp - server.state.simulation_xml_filename = new_list server.state.simulation_xml_temp = [] - @server.state.change("nunknowns") - def on_nunknowns_change( nunknowns : int , **_ : Any) -> None: + @server.state.change( "nunknowns" ) + def on_nunknowns_change( nunknowns: int, **_: Any ) -> None: #re-gen list - if len(server.state.decompositions) > 0: - server.state.decompositions = SuggestDecomposition( Authentificator.get_cluster( server.state.selected_cluster_name ), - nunknowns ).get_sd() - print(f'unknowns changed : {server.state.nunknowns} -> {nunknowns}') + if len( server.state.decompositions ) > 0: + server.state.decompositions = SuggestDecomposition( + Authentificator.get_cluster( server.state.selected_cluster_name ), nunknowns ).get_sd() + print( f'unknowns changed : {server.state.nunknowns} -> {nunknowns}' ) server.state.nunknowns = nunknowns - @server.state.change( "simulation_xml_filename" ) def on_simfiles_change( simulation_xml_filename: list, **_: Any ) -> None: import re - has_xml = list([True if file.get( "type", "" ) == 'text/xml' else False - for file in simulation_xml_filename ]) - - has_external_mesh = list([True if file.get( "name", "" ).endswith((".vtu",".vtm",".vtp")) else False - for file in simulation_xml_filename ]) - + has_xml = [ file.get( "type", "" ) == 'text/xml' for file in simulation_xml_filename ] + + has_external_mesh = [ + bool( file.get( "name", "" ).endswith( ( ".vtu", ".vtm", ".vtp" ) ) ) for file in simulation_xml_filename + ] + has_internal_mesh = False - for i,_ in enumerate(has_xml): - if has_xml[i]: - has_internal_mesh = _has_internalMesh(simulation_xml_filename[i]) + for i, _ in enumerate( has_xml ): + if has_xml[ i ]: + has_internal_mesh = _has_internalMesh( simulation_xml_filename[ i ] ) - if any(has_xml): - uc = up = nc = np = None + if any( has_xml ): + uc = up = nc = np = 0 # compute unknowns and cells only for xml files, if external mesh do not take into account internal mesh info even if present, if no external mesh try to take into account internal mesh info if present # useful for decomposition suggestion - for i,_ in enumerate(has_xml): - if has_external_mesh[i]: - nc, np = _how_many_cells(simulation_xml_filename[i]) - elif has_xml[i]: - uc, up = _what_solver(simulation_xml_filename[i]) + for i, _ in enumerate( has_xml ): + if has_external_mesh[ i ]: + nc, np = _how_many_cells( simulation_xml_filename[ i ] ) + elif has_xml[ i ]: + uc, up = _what_solver( simulation_xml_filename[ i ] ) if has_internal_mesh: - nc,np = _what_internalMesh(simulation_xml_filename[i]) - - if all(i is not None for i in (uc,nc,up,np)): - server.state.nunknowns = uc*nc + up*np + nc, np = _what_internalMesh( simulation_xml_filename[ i ] ) + + if all( i is not None for i in ( uc, nc, up, np ) ): + server.state.nunknowns = uc * nc + up * np - if any(has_xml): - xml_pattern = re.compile(r"\.xml$", re.IGNORECASE) - mesh_pattern = re.compile(r"\.(vtu|vtm|pvtu|pvtm)$", re.IGNORECASE) - table_pattern = re.compile(r"\.(txt|dat|csv|geos)$", re.IGNORECASE) + if any( has_xml ): + xml_pattern = re.compile( r"\.xml$", re.IGNORECASE ) + mesh_pattern = re.compile( r"\.(vtu|vtm|pvtu|pvtm)$", re.IGNORECASE ) + table_pattern = re.compile( r"\.(txt|dat|csv|geos)$", re.IGNORECASE ) xml_matches, mesh_matches, table_matches = [], [], [] pattern_file = r"[\w\-.]+\.(?:vtu|pvtu|dat|txt|xml|geos)\b" # Fix: use enumerate instead of .index() to handle duplicates safely - for i, file in enumerate(simulation_xml_filename): - if not has_xml[i]: + for i, file in enumerate( simulation_xml_filename ): + if not has_xml[ i ]: continue - name = file.get("name", "") - if xml_pattern.search(name): - xml_matches.append(file) - elif mesh_pattern.search(name): - mesh_matches.append(file) - elif table_pattern.search(name): - table_matches.append(file) + name = file.get( "name", "" ) + if xml_pattern.search( name ): + xml_matches.append( file ) + elif mesh_pattern.search( name ): + mesh_matches.append( file ) + elif table_pattern.search( name ): + table_matches.append( file ) if xml_matches: - already_have = {file.get("name", "") for file in simulation_xml_filename} - required = set(re.findall(pattern_file, xml_matches[0]['content'].decode("utf-8"))) + already_have = { file.get( "name", "" ) for file in simulation_xml_filename } + required = set( re.findall( pattern_file, xml_matches[ 0 ][ 'content' ].decode( "utf-8" ) ) ) required -= already_have # Fix: store as list of dicts so the UI can use {{ file.name }} - server.state.simulation_xml_required = [{"name": f} for f in sorted(required)] + server.state.simulation_xml_required = [ { "name": f } for f in sorted( required ) ] else: server.state.simulation_xml_required = [] - - server.state.is_valid_jobfiles = any(has_xml) - server.state.all_req_files = any(has_xml) and len(server.state.simulation_xml_required) == 0 - + + server.state.is_valid_jobfiles = any( has_xml ) + server.state.all_req_files = any( has_xml ) and len( server.state.simulation_xml_required ) == 0 + def kill_job( index_to_remove: int ) -> None: # for now just check there is an xml jid = list( server.state.job_ids ) @@ -257,7 +269,7 @@ def run_remove_jobfile( index_to_remove: int ) -> None: server.state.selected_cluster_names = [ cluster.name for cluster in Authentificator.sim_constants ] # server.state.decompositions = [] - # --------------------------- auth block -----------------------# + # --------------------------- auth block -----------------------# vuetify.VDivider( vertical=True, thickness=5, classes="mx-4" ) with vuetify.VCol( cols=1 ): vuetify.VSelect( label="Cluster", @@ -268,10 +280,9 @@ def run_remove_jobfile( index_to_remove: int ) -> None: vuetify.VSelect( label="Decomposition", items=( "decompositions", [] ), v_model=( "decomposition", None ), - item_title="label", - item_value="id", - return_object=True - ) + item_title="label", + item_value="id", + return_object=True ) with vuetify.VRow(): with vuetify.VCol( cols=8 ): @@ -301,7 +312,7 @@ def run_remove_jobfile( index_to_remove: int ) -> None: clearable=True, ) # type: ignore - # --------------------------- simulation block -----------------------# + # --------------------------- simulation block -----------------------# vuetify.VDivider( thickness=5, classes="my-4" ) with vuetify.VRow(): @@ -325,16 +336,16 @@ def run_remove_jobfile( index_to_remove: int ) -> None: vuetify.VListItemTitle( "{{ file.name }}" ) vuetify.VListItemSubtitle( "{{ file.size ? (file.size / 1024).toFixed(1) + ' KB' : 'URL' }}" ) vuetify.VDivider( thickness=2, classes="my-2" ) - - with vuetify.VListItem( v_for=( "(file,i) in simulation_xml_required" ), - key="i", - value="file", - classes="bg-red-lighten-4 text-red-darken-4", - # base_color="red-lighten-4", - # style="background-color: rgb(var(--v-theme-error-lighten-4));", - prepend_icon="mdi-alert-circle-outline" ): - vuetify.VListItemTitle( "{{ file.name }} (required)" ) + with vuetify.VListItem( + v_for=( "(file,i) in simulation_xml_required" ), + key="i", + value="file", + classes="bg-red-lighten-4 text-red-darken-4", + # base_color="red-lighten-4", + # style="background-color: rgb(var(--v-theme-error-lighten-4));", + prepend_icon="mdi-alert-circle-outline" ): + vuetify.VListItemTitle( "{{ file.name }} (required)" ) with vuetify.VRow(), vuetify.VCol(): vuetify.VTextField( v_model=( "simulation_remote_path", None ),