Blame view

src/core/pyros_django/agent/AgentSST.py 13.5 KB
05316241   Alexis Koralewski   Adding AgentSST, ...
1
2
3
4
#!/usr/bin/env python3

from pathlib import Path
import subprocess
a04e004d   Alexis Koralewski   Fixing AgentCmd c...
5
6
import sys, os
from datetime import datetime, timezone, timedelta
05316241   Alexis Koralewski   Adding AgentSST, ...
7
8
9
10
11
12
13
##import utils.Logger as L
#import threading #, multiprocessing, os
import time

#from django.db import transaction
#from common.models import Command

dd27c2bc   Alexis Koralewski   Updating agent co...
14
from Agent import Agent, build_agent, log, extract_parameters
05316241   Alexis Koralewski   Adding AgentSST, ...
15
import socket
a04e004d   Alexis Koralewski   Fixing AgentCmd c...
16
from common.models import AgentCmd, AgentSurvey
05316241   Alexis Koralewski   Adding AgentSST, ...
17

dd27c2bc   Alexis Koralewski   Updating agent co...
18
from src.core.pyros_django.obsconfig.obsconfig_class import OBSConfig
05316241   Alexis Koralewski   Adding AgentSST, ...
19
20

class AgentSST(Agent):
04b0b442   Alexis Koralewski   Changing computer...
21
    computer = "XCY1"
05316241   Alexis Koralewski   Adding AgentSST, ...
22
23
24
25
    _previous_dir = ""
    PROJECT_ROOT_PATH = ""
    VENV_PYTHON = ""
    subprocess_dict = {}
7fd15ce5   Alexis Koralewski   Adding test mode ...
26
    agent_in_mode_test = {}
05316241   Alexis Koralewski   Adding AgentSST, ...
27

dd27c2bc   Alexis Koralewski   Updating agent co...
28
    AGENT_SPECIFIC_COMMANDS = [
1de7129b   Alexis Koralewski   Adapt agentSST to...
29
30
31
        ("do_kill_agent",10,0),
        ("do_restart_agent",20,0),
        ("do_start_agent",20,0),
dd27c2bc   Alexis Koralewski   Updating agent co...
32
    ]
7fd15ce5   Alexis Koralewski   Adding test mode ...
33
34
    TEST_COMMANDS_LIST = [
    ]
05316241   Alexis Koralewski   Adding AgentSST, ...
35

dd27c2bc   Alexis Koralewski   Updating agent co...
36
    def __init__(self, name:str=None,sim_computer=None):
05316241   Alexis Koralewski   Adding AgentSST, ...
37
        
dd27c2bc   Alexis Koralewski   Updating agent co...
38
        super().__init__()
05316241   Alexis Koralewski   Adding AgentSST, ...
39
40
41
        self.PROJECT_ROOT_PATH = os.environ["PROJECT_ROOT_PATH"]
        if name is None:
            name = self.__class__.__name__
4a81e038   Alexis Koralewski   Add get_specifics...
42
        self.computer = socket.gethostname()    
05316241   Alexis Koralewski   Adding AgentSST, ...
43
44
45
        WITH_DOCKER = False
        if os.environ.get("WITH_DOCKER"):
            WITH_DOCKER = True
99502363   Alexis Koralewski   add message when ...
46
            # if WITH_DOCKER socket.gethostname() bizarre 
05316241   Alexis Koralewski   Adding AgentSST, ...
47
48
49
50
51
52
53
54
55
56
57
58
59
60
        if WITH_DOCKER:
            VENV_ROOT = ""
            VENV = ""
            VENV_BIN = ""
        else:
            VENV_ROOT = "venv"
            VENV = "venv_py3_pyros"
            VENV_BIN = (
                self.PROJECT_ROOT_PATH
                + os.sep + VENV_ROOT
                + os.sep + VENV
                + os.sep + "bin"
                + os.sep
            )
dd27c2bc   Alexis Koralewski   Updating agent co...
61
        self.VENV_PYTHON = VENV_BIN + "python3"
4a81e038   Alexis Koralewski   Add get_specifics...
62
        #log.info(f"PC hostname is {self.computer}")
9bd7ac9e   Alexis Koralewski   Adding timeout co...
63
        #name_from_config = self.get_config().get_agent_sst_of_current_computer()
7fd15ce5   Alexis Koralewski   Adding test mode ...
64
65
66
    
    def init(self):
        super().init()
4a81e038   Alexis Koralewski   Add get_specifics...
67
        log.info(f"PC hostname is {self.computer}")
05316241   Alexis Koralewski   Adding AgentSST, ...
68
        self.start_agents()
7fd15ce5   Alexis Koralewski   Adding test mode ...
69
        self.TEST_MODE = False
ea1ca112   Alexis Koralewski   Fixing agentSST s...
70
71
        time.sleep(10)
        self.set_delay(3)
a3d0b4b0   Alexis Koralewski   AgentSST : adapti...
72

4a81e038   Alexis Koralewski   Add get_specifics...
73
74
    def set_computer(self,computer):
        self.computer = computer
05316241   Alexis Koralewski   Adding AgentSST, ...
75
76
77
78
79
80
81
82
    def start_agents(self,agent_name=None):
        """
        Start all agents or one agent of obs_config

        Args:
            agent_name (_type_, optional): Specific agent name to start. Defaults to None.
        """
        obs_config = self.get_config()
7fd15ce5   Alexis Koralewski   Adding test mode ...
83
84
        test_mode = " -t"

05316241   Alexis Koralewski   Adding AgentSST, ...
85
86
87
88
89
90
91
92
93
94
        if agent_name:
            agent = agent_name
            # Start a specific agent of obs_config (restart)
            agent_informations = obs_config.get_agent_information(obs_config.unit_name,agent)
            protocol = agent_informations.get("protocol")
            if protocol:
                protocol_folder_abs_path = os.path.join(self.PROJECT_ROOT_PATH, os.path.dirname(protocol))
                
                protocol_script_name = protocol.split("/")[-1]
                if os.path.exists(protocol_folder_abs_path + os.sep + protocol_script_name):
dd27c2bc   Alexis Koralewski   Updating agent co...
95
                    cmd = self.VENV_PYTHON +" "+ protocol_folder_abs_path + os.sep + protocol_script_name
7fd15ce5   Alexis Koralewski   Adding test mode ...
96
97
98
99
                    if not agent in self.agent_in_mode_test:
                        self.agent_in_mode_test = self.TEST_MODE
                    if self.agent_in_mode_test[agent]:
                        cmd += test_mode
9bd7ac9e   Alexis Koralewski   Adding timeout co...
100
                    process = subprocess.Popen(f"{cmd}",shell=True)
8d9c8345   Alexis Koralewski   Renaming commands...
101
                    process.poll()
9bd7ac9e   Alexis Koralewski   Adding timeout co...
102
103
                    
                    self.subprocess_dict[agent]["process"] = process
d0760264   Alexis Koralewski   Fixing cmd test f...
104
105
106
                    nb_try_restart = self.subprocess_dict[agent].get("nb_try_restart",0)
                    nb_try_restart += 1
                    self.subprocess_dict[agent]["nb_try_restart"] = nb_try_restart
05316241   Alexis Koralewski   Adding AgentSST, ...
107
108
109
                    log.info(f"Start agent {agent} with cmd {cmd}")

        else:
027eaa78   Alexis Koralewski   Fixing AgentSST i...
110
111
112
113
114
115
116
117
118
            agents = obs_config.get_agents_per_computer(obs_config.unit_name).get(self.computer)
            if agents is None:
                available_hostnames = obs_config.get_agents_per_computer(obs_config.unit_name).keys()
                log.info("Computer not found in obs config")
                log.info(f"Available hostnames {available_hostnames}. Current hostname is {self.computer}")
                exit(1)
            #self.change_dir(self.PROJECT_ROOT_PATH)
            else:
                log.info(f"Agents associated to this computer : {agents}")
05316241   Alexis Koralewski   Adding AgentSST, ...
119
120
121
122
123
124
125
126
127
            # Start every agent of obs_config (initial start)
            for agent in agents:
                agent_informations = obs_config.get_agent_information(obs_config.unit_name,agent)
                protocol = agent_informations.get("protocol")
                if protocol:
                    protocol_folder_abs_path = os.path.join(self.PROJECT_ROOT_PATH, os.path.dirname(protocol))
                    
                    protocol_script_name = protocol.split("/")[-1]
                    if os.path.exists(protocol_folder_abs_path + os.sep + protocol_script_name):
05316241   Alexis Koralewski   Adding AgentSST, ...
128
                        
7fd15ce5   Alexis Koralewski   Adding test mode ...
129
130
131
132
133
                        cmd = self.VENV_PYTHON +" "+ protocol_folder_abs_path + os.sep + protocol_script_name
                        if not agent in self.agent_in_mode_test:
                            self.agent_in_mode_test[agent] = self.TEST_MODE
                        if self.agent_in_mode_test[agent]:
                            cmd += test_mode
a04e004d   Alexis Koralewski   Fixing AgentCmd c...
134
135
                        # process = subprocess.Popen(f"{cmd}", shell=True, stdout=subprocess.DEVNULL,stderr=subprocess.STDOUT)
                        process = subprocess.Popen(f"{cmd}", shell=True)
9bd7ac9e   Alexis Koralewski   Adding timeout co...
136
137
                        self.subprocess_dict[agent] = {}
                        self.subprocess_dict[agent]["process"] = process
3449489e   Alexis Koralewski   Fixing current_nb...
138
139
                        # Reset to zero nb_try when AgentSST start (launch all agents)
                        self.subprocess_dict[agent]["nb_try_restart"] = 0
05316241   Alexis Koralewski   Adding AgentSST, ...
140
141
                        log.info(f"Start agent {agent} with cmd {cmd}")

2918ab51   Alexis Koralewski   Adding do_start_a...
142
    def do_start_agent(self, agent_name:str):
05316241   Alexis Koralewski   Adding AgentSST, ...
143
144
145
146
147
148
149
        """
        Start a specific agent of obs_config (Restart)

        Args:
            agent_name (str): Name of agent to start
        """
        self.start_agents(agent_name)
3449489e   Alexis Koralewski   Fixing current_nb...
150
151
152
153
154
        nb_try_restart_agent = self.subprocess_dict[agent_name]["nb_try_restart"]
        agent_survey = AgentSurvey.objects.get(name=agent_name)
        agent_survey.current_nb_restart = nb_try_restart_agent
        
        agent_survey.save()
05316241   Alexis Koralewski   Adding AgentSST, ...
155

dd27c2bc   Alexis Koralewski   Updating agent co...
156

1de7129b   Alexis Koralewski   Adapt agentSST to...
157
    def do_kill_agent(self, agent:str)->None:
afbc5c95   Alexis Koralewski   Renaming AgentSST...
158
        # agent = args[0]
dd27c2bc   Alexis Koralewski   Updating agent co...
159
        if agent in self.subprocess_dict.keys():
a3d0b4b0   Alexis Koralewski   AgentSST : adapti...
160
161
            cmd = self.send_cmd_to(agent,"do_exit")
            #cmd = self.send_cmd_to(agent,"do_exit","asap")
a04e004d   Alexis Koralewski   Fixing AgentCmd c...
162
            return cmd
dd27c2bc   Alexis Koralewski   Updating agent co...
163

1de7129b   Alexis Koralewski   Adapt agentSST to...
164
    def do_restart_agent(self, agent:str)->None:
afbc5c95   Alexis Koralewski   Renaming AgentSST...
165
        # agent = args[0]
9bd7ac9e   Alexis Koralewski   Adding timeout co...
166
        nb_try_restart_agent = self.subprocess_dict[agent]["nb_try_restart"]
c957b9e1   Alexis Koralewski   fixing restart of...
167
        if nb_try_restart_agent < AgentSurvey.objects.get(name=agent).nb_restart_max:
9bd7ac9e   Alexis Koralewski   Adding timeout co...
168
169
170
171
172
            self.do_kill_agent(agent)
            self.do_start_agent(agent)
        else:
            #sendmail
            pass
a3d0b4b0   Alexis Koralewski   AgentSST : adapti...
173
174
175
        agent_survey = AgentSurvey.objects.get(name=agent)
        agent_survey.current_nb_restart = nb_try_restart_agent
        agent_survey.save()
9bd7ac9e   Alexis Koralewski   Adding timeout co...
176

a04e004d   Alexis Koralewski   Fixing AgentCmd c...
177
178
179
180
181
182
        # if agent in self.subprocess_dict.keys():
        #     cmd.set_result(f"Agent {agent} restarted")
        #     cmd.set_as_processed()
        # else:
        #     cmd.set_result(f"Agent {agent} failed to restart")
        #     log.debug(f"Agent {agent} failed to restart")
dd27c2bc   Alexis Koralewski   Updating agent co...
183

a04e004d   Alexis Koralewski   Fixing AgentCmd c...
184
    def force_kill_agent(self, *args)->None:
d4ebe565   Alexis Koralewski   Adding pyros stop...
185
186
187
        if args:
            agent = args[0]
            if self.subprocess_dict.get(agent) is not None:
9bd7ac9e   Alexis Koralewski   Adding timeout co...
188
                process = self.subprocess_dict.get(agent).get("process")
d4ebe565   Alexis Koralewski   Adding pyros stop...
189
190
191
192
193
194
                # process.terminate()
                # process.wait()
                # Kill is better when using Popen(shell=True) because it will remove the created child process
                process.kill()
            else:
                return None
a04e004d   Alexis Koralewski   Fixing AgentCmd c...
195
196
197
198

    def do_things_before_exit(self,abort_cmd_sender):
        kill_agent_commands = {}
        for agent in self.subprocess_dict.keys():
8d9c8345   Alexis Koralewski   Renaming commands...
199
            cmd = self.do_kill_agent(agent)    
a04e004d   Alexis Koralewski   Fixing AgentCmd c...
200
            kill_agent_commands[agent] = cmd
3449489e   Alexis Koralewski   Fixing current_nb...
201
202
203
            agent_survey = AgentSurvey.objects.get(name=agent)
            # Reset counter before exiting
            agent_survey.current_nb_restart = 0
a3d0b4b0   Alexis Koralewski   AgentSST : adapti...
204
            agent_survey.save()                             
d4ebe565   Alexis Koralewski   Adding pyros stop...
205
206
        # wait 10 seconds in order to agents to exit themselves properly 
        time.sleep(10)
ea1ca112   Alexis Koralewski   Fixing agentSST s...
207
208
209
        for agent in self.subprocess_dict.keys():
            while self.subprocess_dict[agent].get("process").poll() is None:
                time.sleep(0.5)
a04e004d   Alexis Koralewski   Fixing AgentCmd c...
210

2ac0a02f   Alexis Koralewski   Fixing AgentSST (...
211
    def routine_process_after_body(self):
a04e004d   Alexis Koralewski   Fixing AgentCmd c...
212
        now_time = datetime.now(timezone.utc) 
027eaa78   Alexis Koralewski   Fixing AgentSST i...
213
        last_running_commands = AgentCmd.get_commands_sent_by_agent("AgentSST").filter(state="CMD_RUNNING",recipient__in=list(self.subprocess_dict.keys()))
a04e004d   Alexis Koralewski   Fixing AgentCmd c...
214
        for cmd in last_running_commands:
fd880010   Alexis Koralewski   Fixing error in A...
215
            last_running_cmd = cmd.full_name
a04e004d   Alexis Koralewski   Fixing AgentCmd c...
216
            if last_running_cmd == "KILL_AGENT" and cmd.is_expired():
fd880010   Alexis Koralewski   Fixing error in A...
217
                agent = cmd.args[0]
2918ab51   Alexis Koralewski   Adding do_start_a...
218
                self.force_kill_agent(agent)
a04e004d   Alexis Koralewski   Fixing AgentCmd c...
219

027eaa78   Alexis Koralewski   Fixing AgentSST i...
220
        # checking status of agent if they are timeout
a04e004d   Alexis Koralewski   Fixing AgentCmd c...
221
        for agent in self.subprocess_dict.keys():
fd880010   Alexis Koralewski   Fixing error in A...
222
223
            try:
                agent_survey = AgentSurvey.objects.get(name=agent)
027eaa78   Alexis Koralewski   Fixing AgentSST i...
224
225
226
227
228
229
230
231
232
233
234
            except AgentSurvey.DoesNotExist:
                # If there is no entry in AgentSurvey for this agent go to next iteration (it surely means that the agentSST launched this agent for the first time, and it didn't had enough time to create an entry in AgentSurvey)
                continue
            
            validity_duration = agent_survey.validity_duration
            last_update_from_agent = agent_survey.updated
            validity_duration_timedelta = timedelta(seconds=validity_duration)
            timeout_datetime = last_update_from_agent + validity_duration_timedelta
            timeout_datetime = timeout_datetime.replace(tzinfo=timezone.utc)
            # if agent latest state is timeout, restart it
            if timeout_datetime < now_time:
9bd7ac9e   Alexis Koralewski   Adding timeout co...
235
                if self.subprocess_dict[agent].get("process").poll() != None:
027eaa78   Alexis Koralewski   Fixing AgentSST i...
236
237
                    last_executed_start_agent_cmd =  AgentCmd.objects.filter(state="CMD_EXECUTED",full_name=f"do_start_agent {agent}",recipient=self.name).order_by("-s_deposit_time")
                    if last_executed_start_agent_cmd.exists():
7fd15ce5   Alexis Koralewski   Adding test mode ...
238
239
240
241
242
                        cmd_outdated_datetime_start = datetime.utcnow() - timedelta(seconds=30)
                        cmd_outdated_datetime_end = datetime.utcnow() - timedelta(seconds=25)
                        cmd_outdated_datetime_start = cmd_outdated_datetime_start.replace(tzinfo=timezone.utc)
                        cmd_outdated_datetime_end = cmd_outdated_datetime_end.replace(tzinfo=timezone.utc)
                        # cmd outdated if deposit time was between 25 and 30 seconds ago from now
027eaa78   Alexis Koralewski   Fixing AgentSST i...
243
                        # if last start cmd for this agent was executed and this agent isn't currently running, ask again a start.
3449489e   Alexis Koralewski   Fixing current_nb...
244
                        if cmd_outdated_datetime_start >= last_executed_start_agent_cmd.first().s_deposit_time and cmd_outdated_datetime_end >= last_executed_start_agent_cmd.first().s_deposit_time:
027eaa78   Alexis Koralewski   Fixing AgentSST i...
245
246
247
248
249
250
                            self.send_cmd_to("AgentSST","do_start_agent", agent)
                    else:
                        try:
                            # Check if do_start_agent cmd already asked by agentSST in previous iterations and not exectuted. If the query success (no exception raised), we don't send again a cmd
                            AgentCmd.get_pending_and_running_commands_for_agent(self.name).get(full_name=f"do_start_agent {agent}")
                        except:
a3d0b4b0   Alexis Koralewski   AgentSST : adapti...
251
                            self.send_cmd_to(self.name,"do_start_agent", agent)
027eaa78   Alexis Koralewski   Fixing AgentSST i...
252
253
254
                else:
                    last_executed_start_or_restart_agent_cmd =  AgentCmd.objects.filter(state="CMD_EXECUTED",full_name__in=(f"do_start_agent {agent}",f"do_restart_agent {agent}"),recipient=self.name).order_by("-s_deposit_time")
                    if last_executed_start_or_restart_agent_cmd.exists():
7fd15ce5   Alexis Koralewski   Adding test mode ...
255
256
257
258
259
                        cmd_outdated_datetime_start = datetime.utcnow() - timedelta(seconds=30)
                        cmd_outdated_datetime_end = datetime.utcnow() - timedelta(seconds=25)
                        cmd_outdated_datetime_start = cmd_outdated_datetime_start.replace(tzinfo=timezone.utc)
                        cmd_outdated_datetime_end = cmd_outdated_datetime_end.replace(tzinfo=timezone.utc)
                        # cmd outdated if deposit time was between 25 and 30 seconds ago from now
027eaa78   Alexis Koralewski   Fixing AgentSST i...
260
                        # if last start or restart cmd for this agent was executed and this agent isn't currently running, ask again a restart.
3449489e   Alexis Koralewski   Fixing current_nb...
261
                        if cmd_outdated_datetime_start >= last_executed_start_or_restart_agent_cmd.first().s_deposit_time and cmd_outdated_datetime_end >=  last_executed_start_or_restart_agent_cmd.first().s_deposit_time:
a3d0b4b0   Alexis Koralewski   AgentSST : adapti...
262
                            self.send_cmd_to(self.name,"do_restart_agent", agent)    
027eaa78   Alexis Koralewski   Fixing AgentSST i...
263
264
265
266
267
                    else:
                        try:
                            # Check if do_restart_agent cmd already asked by agentSST in previous iterations and not exectuted. If the query success (no exception raised), we don't send again a cmd
                            AgentCmd.get_pending_and_running_commands_for_agent(self.name).get(full_name=f"do_restart_agent {agent}")
                        except:
a3d0b4b0   Alexis Koralewski   AgentSST : adapti...
268
                            self.send_cmd_to(self.name,"do_restart_agent", agent)
027eaa78   Alexis Koralewski   Fixing AgentSST i...
269

a04e004d   Alexis Koralewski   Fixing AgentCmd c...
270
271
        log.info("Check status of process")
        for agent in self.subprocess_dict:
9bd7ac9e   Alexis Koralewski   Adding timeout co...
272
            proc = self.subprocess_dict.get(agent).get("process")
dd27c2bc   Alexis Koralewski   Updating agent co...
273
            log.info(f"{agent} poll result is {proc.poll()}")
a04e004d   Alexis Koralewski   Fixing AgentCmd c...
274
     
dd27c2bc   Alexis Koralewski   Updating agent co...
275

05316241   Alexis Koralewski   Adding AgentSST, ...
276
277
278
279
280

if __name__ == "__main__":

    agent = build_agent(AgentSST)
    agent.run()