AgentSST.py
17.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
#!/usr/bin/env python3
from pathlib import Path
import subprocess
import sys, os, argparse
from datetime import datetime, timezone, timedelta
##import utils.Logger as L
#import threading #, multiprocessing, os
import time
#from django.db import transaction
#from common.models import Command
from Agent import Agent, build_agent, log, __extract_parameters
import socket
from common.models import AgentCmd, AgentSurvey, Majordome
from src.core.pyros_django.obsconfig.obsconfig_class import OBSConfig
class AgentSST(Agent):
computer = "XCY1"
_previous_dir = ""
PROJECT_ROOT_PATH = ""
VENV_PYTHON = ""
subprocess_dict = {}
agent_in_mode_test = {}
simulated_computer = None
_AGENT_SPECIFIC_COMMANDS = {
# Format : “cmd_name” : (timeout, exec_mode)
"do_stop_agent" : (10, Agent.EXEC_MODE.SEQUENTIAL, ''),
"do_restart_agent" : (20, Agent.EXEC_MODE.SEQUENTIAL, ''),
"do_start_agent" : (20, Agent.EXEC_MODE.SEQUENTIAL, ''),
}
_TEST_COMMANDS_LIST = [
(True, "self get_mode", 200, "MODE = ATTENTIVE", Agent.CMD_STATUS.CMD_EXECUTED),
(True, "self set_mode ATTENTIVE", 200, "MODE = ATTENTIVE", Agent.CMD_STATUS.CMD_EXECUTED),
(True, "self do_stop", 200, None, Agent.CMD_STATUS.CMD_EXECUTED),
]
# @Override
def __init__(self, name:str=None, simulated_computer=None, agent=None):
super().__init__()
self.PROJECT_ROOT_PATH = os.environ["PROJECT_ROOT_PATH"]
if name is None:
name = self.__class__.__name__
self.name = name
self.computer = socket.gethostname()
if simulated_computer != None:
self.computer = simulated_computer
self.simulated_computer = simulated_computer
name_from_config = self.get_config().get_agent_sst_of_computer(self.computer)
if name_from_config != None:
name = name_from_config
self.name = name
if AgentSurvey.objects.filter(name=self.name).exists():
self._agent_survey = AgentSurvey.objects.get(name=self.name)
else:
self._agent_survey = AgentSurvey.objects.create(
name=self.name,
validity_duration=60,
mode=self.get_mode(),
status=self.get_status(),
iteration=-1
)
WITH_DOCKER = False
if os.environ.get("WITH_DOCKER"):
WITH_DOCKER = True
# if WITH_DOCKER socket.gethostname() bizarre
if WITH_DOCKER:
VENV_ROOT = ""
VENV = ""
VENV_BIN = ""
else:
VENV_ROOT = "venv"
VENV = "venv_py3_pyros"
VENV_BIN = (
self.PROJECT_ROOT_PATH
+ os.sep + VENV_ROOT
+ os.sep + VENV
+ os.sep + "bin"
+ os.sep
)
self.VENV_PYTHON = VENV_BIN + "python3"
if agent:
self.init_agent = agent
else:
self.init_agent = None
self._no_restart = False
#log.info(f"PC hostname is {self.computer}")
# @Override
def _init(self):
log.info(f"PC hostname is {self.computer}")
if self.init_agent is None:
self.start_agents()
else:
self.start_agents(self.init_agent)
if self.TEST_MODE:
self._no_restart = True
self.TEST_MODE = False
time.sleep(10)
self.set_delay(3)
def set_computer(self,computer):
self.computer = computer
def start_agents(self,agent_name=None):
"""
Start all agents or one agent of obs_config
Args:
agent_name (_type_, optional): Specific agent name to start. Defaults to None.
"""
obs_config = self.get_config()
test_mode = " -t"
if agent_name:
agent = agent_name
if "AgentSST" not in agent:
agents = obs_config.get_agents_per_computer(obs_config.unit_name).get(self.computer)
if agent not in agents:
log.info(f"{agent} isn't associated to this computer : {self.computer}")
log.info(f"Agents associated to this computer : {agents}")
exit(1)
# Start a specific agent of obs_config (restart)
agent_informations = obs_config.get_agent_information(obs_config.unit_name,agent)
protocol = agent_informations.get("protocol")
if protocol:
protocol_folder_abs_path = os.path.join(self.PROJECT_ROOT_PATH, os.path.dirname(protocol))
protocol_script_name = protocol.split("/")[-1]
if os.path.exists(protocol_folder_abs_path + os.sep + protocol_script_name):
cmd = self.VENV_PYTHON +" "+ protocol_folder_abs_path + os.sep + protocol_script_name
if not agent in self.agent_in_mode_test:
self.agent_in_mode_test[agent] = self.TEST_MODE
if self.agent_in_mode_test[agent]:
cmd += test_mode
if self.simulated_computer:
cmd += f" --computer {self.simulated_computer}"
process = subprocess.Popen(f"{cmd}",shell=True)
process.poll()
if agent not in self.subprocess_dict:
self.subprocess_dict[agent] = {}
self.subprocess_dict[agent]["process"] = process
nb_try_restart = self.subprocess_dict[agent].get("nb_try_restart",0)
nb_try_restart += 1
self.subprocess_dict[agent]["nb_try_restart"] = nb_try_restart
log.info(f"Start agent {agent} with cmd {cmd}")
else:
agents = obs_config.get_agents_per_computer(obs_config.unit_name).get(self.computer)
if agents is None:
available_hostnames = obs_config.get_agents_per_computer(obs_config.unit_name).keys()
log.info("Computer not found in obs config")
log.info(f"Available hostnames {available_hostnames}. Current hostname is {self.computer}")
exit(1)
#self.change_dir(self.PROJECT_ROOT_PATH)
else:
log.info(f"Agents associated to this computer : {agents}")
# Start every agent of obs_config (initial start)
for agent in agents:
if "AgentSST" in agent:
continue
agent_informations = obs_config.get_agent_information(obs_config.unit_name,agent)
protocol = agent_informations.get("protocol")
is_active = agent_informations.get("is_active",True)
if protocol and is_active == True:
protocol_folder_abs_path = os.path.join(self.PROJECT_ROOT_PATH, os.path.dirname(protocol))
protocol_script_name = protocol.split("/")[-1]
if os.path.exists(protocol_folder_abs_path + os.sep + protocol_script_name):
cmd = self.VENV_PYTHON +" "+ protocol_folder_abs_path + os.sep + protocol_script_name
if not agent in self.agent_in_mode_test:
self.agent_in_mode_test[agent] = self.TEST_MODE
if self.agent_in_mode_test[agent]:
cmd += test_mode
if self.simulated_computer:
cmd += f" --computer {self.simulated_computer}"
# process = subprocess.Popen(f"{cmd}", shell=True, stdout=subprocess.DEVNULL,stderr=subprocess.STDOUT)
process = subprocess.Popen(f"{cmd}", shell=True)
self.subprocess_dict[agent] = {}
self.subprocess_dict[agent]["process"] = process
# Reset to zero nb_try when AgentSST start (launch all agents)
self.subprocess_dict[agent]["nb_try_restart"] = 0
log.info(f"Start agent {agent} with cmd {cmd}")
def do_start_agent(self, agent_name:str):
"""
Start a specific agent of obs_config (Restart)
Args:
agent_name (str): Name of agent to start
"""
self.start_agents(agent_name)
nb_try_restart_agent = self.subprocess_dict[agent_name]["nb_try_restart"]
agent_survey = AgentSurvey.objects.get(name=agent_name)
agent_survey.current_nb_restart = nb_try_restart_agent
agent_survey.save()
def do_stop_agent(self, agent:str)->None:
# agent = args[0]
if agent in self.subprocess_dict.keys() or agent == self.name:
#cmd = self.send_cmd_to(agent,"do_exit")
cmd = self.send_cmd_to(agent,"do_stop","asap")
print(cmd)
return f"Sent do_stop asap to {agent}"
def do_restart_agent(self, agent:str, mode:str)->None:
# agent = args[0]
nb_try_restart_agent = self.subprocess_dict[agent]["nb_try_restart"]
if nb_try_restart_agent < AgentSurvey.objects.get(name=agent).nb_restart_max:
if mode == "soft":
cmd = self.send_cmd_to(agent,"do_restart","asap")
else:
self.do_stop_agent(agent)
self.do_start_agent(agent)
else:
#sendmail
pass
agent_survey = AgentSurvey.objects.get(name=agent)
agent_survey.current_nb_restart = nb_try_restart_agent
agent_survey.save()
# if agent in self.subprocess_dict.keys():
# cmd.set_result(f"Agent {agent} restarted")
# cmd.set_as_processed()
# else:
# cmd.set_result(f"Agent {agent} failed to restart")
# log.debug(f"Agent {agent} failed to restart")
def force_kill_agent(self, *args)->None:
if args:
agent = args[0]
if self.subprocess_dict.get(agent) is not None:
process = self.subprocess_dict.get(agent).get("process")
# process.terminate()
# process.wait()
# Kill is better when using Popen(shell=True) because it will remove the created child process
process.kill()
else:
return None
def _do_things_before_exit(self,abort_cmd_sender):
kill_agent_commands = {}
for agent in self.subprocess_dict.keys():
print(agent)
if AgentSurvey.objects.get(name=agent).status != "EXITING":
self.do_stop_agent(agent)
cmd = AgentCmd.objects.filter(full_name="do_stop asap",recipient=agent).latest("s_deposit_time")
#cmd = self.do_stop_agent(agent)
kill_agent_commands[agent] = cmd
agent_survey = AgentSurvey.objects.get(name=agent)
# Reset counter before exiting
agent_survey.current_nb_restart = 0
agent_survey.save()
for agent in kill_agent_commands.keys():
cmd = kill_agent_commands[agent]
while AgentCmd.objects.get(id=cmd.id).state != "CMD_EXECUTED":
if self.subprocess_dict[agent].get("process").poll() != None:
cmd = AgentCmd.objects.get(id=cmd.id)
cmd.state = "CMD_EXECUTED"
cmd.save()
time.sleep(0.5)
# wait 10 seconds in order to agents to exit themselves properly
# time.sleep(20)
# for agent in self.subprocess_dict.keys():
# while self.subprocess_dict[agent].get("process").poll() is None:
# time.sleep(0.5)
# agent_survey = AgentSurvey.objects.get(name=self.name)
# agent_survey.status = AgentSurvey.STATUS_EXIT
# agent_survey.save()
def _routine_process_iter_end_body(self):
now_time = datetime.now(timezone.utc)
last_running_commands = AgentCmd.get_commands_sent_by_agent("AgentSST").filter(state="CMD_RUNNING",recipient__in=list(self.subprocess_dict.keys()))
for cmd in last_running_commands:
last_running_cmd = cmd.full_name
if last_running_cmd == "KILL_AGENT" and cmd.is_expired():
agent = cmd.args[0]
self.force_kill_agent(agent)
# checking status of agent if they are timeout if in auto mode
try:
soft_mode = Majordome.objects.last().soft_mode
except Majordome.DoesNotExist:
soft_mode = None
if (soft_mode is not None and soft_mode == "AUTO") and not self._no_restart:
for agent in self.subprocess_dict.keys():
try:
agent_survey = AgentSurvey.objects.get(name=agent)
except AgentSurvey.DoesNotExist:
# If there is no entry in AgentSurvey for this agent go to next iteration (it surely means that the agentSST launched this agent for the first time, and it didn't had enough time to create an entry in AgentSurvey)
continue
validity_duration = agent_survey.validity_duration
last_update_from_agent = agent_survey.updated
validity_duration_timedelta = timedelta(seconds=validity_duration)
timeout_datetime = last_update_from_agent + validity_duration_timedelta
timeout_datetime = timeout_datetime.replace(tzinfo=timezone.utc)
# if agent latest state is timeout, restart it
if timeout_datetime < now_time:
if self.subprocess_dict[agent].get("process").poll() != None:
last_executed_start_agent_cmd = AgentCmd.objects.filter(state="CMD_EXECUTED",full_name=f"do_start_agent {agent}",recipient=self.name).order_by("-s_deposit_time")
if last_executed_start_agent_cmd.exists():
cmd_outdated_datetime_start = datetime.utcnow() - timedelta(seconds=30)
cmd_outdated_datetime_end = datetime.utcnow() - timedelta(seconds=25)
cmd_outdated_datetime_start = cmd_outdated_datetime_start.replace(tzinfo=timezone.utc)
cmd_outdated_datetime_end = cmd_outdated_datetime_end.replace(tzinfo=timezone.utc)
# cmd outdated if deposit time was between 25 and 30 seconds ago from now
# if last start cmd for this agent was executed and this agent isn't currently running, ask again a start.
if cmd_outdated_datetime_start >= last_executed_start_agent_cmd.first().s_deposit_time and cmd_outdated_datetime_end >= last_executed_start_agent_cmd.first().s_deposit_time:
self.send_cmd_to(self.name,"do_start_agent", agent)
else:
try:
# Check if do_start_agent cmd already asked by agentSST in previous iterations and not exectuted. If the query success (no exception raised), we don't send again a cmd
AgentCmd.get_pending_and_running_commands_for_agent(self.name).get(full_name=f"do_start_agent {agent}")
except:
self.send_cmd_to(self.name,"do_start_agent", agent)
else:
last_executed_start_or_restart_agent_cmd = AgentCmd.objects.filter(state="CMD_EXECUTED",full_name__in=(f"do_start_agent {agent}",f"do_restart_agent {agent}"),recipient=self.name).order_by("-s_deposit_time")
if last_executed_start_or_restart_agent_cmd.exists():
cmd_outdated_datetime_start = datetime.utcnow() - timedelta(seconds=30)
cmd_outdated_datetime_end = datetime.utcnow() - timedelta(seconds=25)
cmd_outdated_datetime_start = cmd_outdated_datetime_start.replace(tzinfo=timezone.utc)
cmd_outdated_datetime_end = cmd_outdated_datetime_end.replace(tzinfo=timezone.utc)
# cmd outdated if deposit time was between 25 and 30 seconds ago from now
# if last start or restart cmd for this agent was executed and this agent isn't currently running, ask again a restart.
if cmd_outdated_datetime_start >= last_executed_start_or_restart_agent_cmd.first().s_deposit_time and cmd_outdated_datetime_end >= last_executed_start_or_restart_agent_cmd.first().s_deposit_time:
self.send_cmd_to(self.name,"do_restart_agent", agent)
else:
try:
# Check if do_restart_agent cmd already asked by agentSST in previous iterations and not exectuted. If the query success (no exception raised), we don't send again a cmd
AgentCmd.get_pending_and_running_commands_for_agent(self.name).get(full_name=f"do_restart_agent {agent}")
except:
self.send_cmd_to(self.name,"do_restart_agent", agent)
log.info("Check status of process")
for agent in self.subprocess_dict:
proc = self.subprocess_dict.get(agent).get("process")
log.info(f"{agent} poll result is {proc.poll()}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Start a agentSST.')
parser.add_argument("--computer",dest="computer",help='Launch agent with simulated computer hostname',action="store")
parser.add_argument("--agent",dest="agent",help='Launch an specific agent ',action="store")
parser.add_argument("-t", action="store_true" )
args = vars(parser.parse_args())
agent = build_agent(AgentSST,param_constr=args)
# agent = build_agent(AgentSST)
agent.run()