AgentSST.py 18.4 KB
Edit Raw Blame History



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368


#!/usr/bin/env python3

from pathlib import Path
import subprocess
import sys, os, argparse

from datetime import datetime, timezone, timedelta
##import utils.Logger as L
#import threading #, multiprocessing, os
import time

#from django.db import transaction
#from common.models import Command

from Agent import Agent, build_agent, log, __extract_parameters
import socket
from common.models import AgentCmd, AgentSurvey, Majordome

from src.core.pyros_django.obsconfig.obsconfig_class import OBSConfig

class AgentSST(Agent):
    computer = "XCY1"
    _previous_dir = ""
    PROJECT_ROOT_PATH = ""
    VENV_PYTHON = ""
    subprocess_dict = {}
    agent_in_mode_test = {}
    simulated_computer = None

    _AGENT_SPECIFIC_COMMANDS = {
        # Format : “cmd_name” : (timeout, exec_mode)
        
        "do_stop_agent" : (10, Agent.EXEC_MODE.SEQUENTIAL, ''),
        "do_restart_agent" : (20, Agent.EXEC_MODE.SEQUENTIAL, ''),
        "do_start_agent" : (20, Agent.EXEC_MODE.SEQUENTIAL, ''),
    }

    _TEST_COMMANDS_LIST_OFF:list = []
    _TEST_COMMANDS_LIST_ON = [
        (True, "self get_mode", 200, "ATTENTIVE",  Agent.CMD_STATUS.CMD_EXECUTED),
        (True, "self set_mode ATTENTIVE", 200, "MODE is ATTENTIVE", Agent.CMD_STATUS.CMD_EXECUTED),
        # If AgentSST must not stop at end of test scenario, comment this line
        (True, "self do_stop", 200, None, Agent.CMD_STATUS.CMD_EXECUTED),
    ]
    # AgentSST WITH test scenario in TEST mode
    #_TEST_COMMANDS_LIST = _TEST_COMMANDS_LIST_ON
    # AgentSST WITHOUT test scenario in TEST mode
    _TEST_COMMANDS_LIST = _TEST_COMMANDS_LIST_OFF

    # @Override
    def __init__(self, name:str=None, simulated_computer=None, agent=None):
        
        super().__init__()
        self.PROJECT_ROOT_PATH = os.environ["PROJECT_ROOT_PATH"]
        if name is None:
            name = self.__class__.__name__
            self.name = name
        self.computer = socket.gethostname()    
        if simulated_computer != None:
            self.computer = simulated_computer
            self.simulated_computer = simulated_computer
        name_from_config = self.get_config().get_agent_sst_of_computer(self.computer)
        if name_from_config != None:
            name = name_from_config
            self.name = name
        if AgentSurvey.objects.filter(name=self.name).exists():
            self._agent_survey = AgentSurvey.objects.get(name=self.name)
        else:
            self._agent_survey = AgentSurvey.objects.create(
                name=self.name, 
                validity_duration=60, 
                mode=self.get_mode(), 
                status=self.get_status(), 
                iteration=-1
            )
        WITH_DOCKER = False
        if os.environ.get("WITH_DOCKER"):
            WITH_DOCKER = True
            # if WITH_DOCKER socket.gethostname() bizarre 
        if WITH_DOCKER:
            VENV_ROOT = ""
            VENV = ""
            VENV_BIN = ""
        else:
            VENV_ROOT = "venv"
            VENV = "venv_py3_pyros"
            VENV_BIN = (
                self.PROJECT_ROOT_PATH
                + os.sep + VENV_ROOT
                + os.sep + VENV
                + os.sep + "bin"
                + os.sep
            )
        self.VENV_PYTHON = VENV_BIN + "python3"
        if agent:
            self.init_agent = agent
        else:
            self.init_agent = None
        self._no_restart = False
        #log.info(f"PC hostname is {self.computer}")
    
    # @Override
    def _init(self):
        log.info(f"PC hostname is {self.computer}")
        if self.init_agent is None:
            self.start_agents()
        else:
            self.start_agents(self.init_agent)
        if self.TEST_MODE:
            self._no_restart = True
        #self.TEST_MODE = False
        time.sleep(10)
        self.set_delay(3)

    def set_computer(self,computer):
        self.computer = computer
    def start_agents(self,agent_name=None):
        """
        Start all agents or one agent of obs_config

        Args:
            agent_name (_type_, optional): Specific agent name to start. Defaults to None.
        """
        obs_config = self.get_config()
        test_mode = " -t"

        if agent_name:
            agent = agent_name
            if "AgentSST" not in agent:
                agents = obs_config.get_agents_per_computer(obs_config.unit_name).get(self.computer)
                if agent not in agents:
                    log.info(f"{agent} isn't associated to this computer : {self.computer}")
                    log.info(f"Agents associated to this computer : {agents}")
                    exit(1)
                # Start a specific agent of obs_config (restart)
                agent_informations = obs_config.get_agent_information(obs_config.unit_name,agent)
                protocol = agent_informations.get("protocol")
                if protocol:
                    protocol_folder_abs_path = os.path.join(self.PROJECT_ROOT_PATH, os.path.dirname(protocol))
                    
                    protocol_script_name = protocol.split("/")[-1]
                    if os.path.exists(protocol_folder_abs_path + os.sep + protocol_script_name):
                        cmd = self.VENV_PYTHON +" "+ protocol_folder_abs_path + os.sep + protocol_script_name
                        if not agent in self.agent_in_mode_test:
                            self.agent_in_mode_test[agent] = self.TEST_MODE
                        if self.agent_in_mode_test[agent]:
                            cmd += test_mode
                        if self.simulated_computer:
                            cmd += f" --computer {self.simulated_computer}"
                        if self.DEBUG_MODE:
                            cmd += " -d"
                        process = subprocess.Popen(f"{cmd}",shell=True)
                        process.poll()
                        if agent not in self.subprocess_dict:
                            self.subprocess_dict[agent] = {}
                        self.subprocess_dict[agent]["process"] = process
                        nb_try_restart = self.subprocess_dict[agent].get("nb_try_restart",0)
                        nb_try_restart += 1
                        self.subprocess_dict[agent]["nb_try_restart"] = nb_try_restart
                        log.info(f"Start agent {agent} with cmd {cmd}")

        else:
            agents = obs_config.get_agents_per_computer(obs_config.unit_name).get(self.computer)
            if agents is None:
                available_hostnames = obs_config.get_agents_per_computer(obs_config.unit_name).keys()
                log.info("Computer not found in obs config")
                log.info(f"Available hostnames {available_hostnames}. Current hostname is {self.computer}")
                exit(1)
            #self.change_dir(self.PROJECT_ROOT_PATH)
            else:
                log.info(f"Agents associated to this computer : {agents}")
            # Start every agent of obs_config (initial start)
            for agent in agents:
                if "AgentSST" in agent:
                    continue
                agent_informations = obs_config.get_agent_information(obs_config.unit_name,agent)
                protocol = agent_informations.get("protocol")
                is_active = agent_informations.get("is_active",True)
                if protocol and is_active == True:
                    protocol_folder_abs_path = os.path.join(self.PROJECT_ROOT_PATH, os.path.dirname(protocol))
                    
                    protocol_script_name = protocol.split("/")[-1]
                    if os.path.exists(protocol_folder_abs_path + os.sep + protocol_script_name):
                        
                        cmd = self.VENV_PYTHON +" "+ protocol_folder_abs_path + os.sep + protocol_script_name
                        if not agent in self.agent_in_mode_test:
                            self.agent_in_mode_test[agent] = self.TEST_MODE
                        if self.agent_in_mode_test[agent]:
                            cmd += test_mode
                        if self.simulated_computer:
                            cmd += f" --computer {self.simulated_computer}"
                        if self.DEBUG_MODE:
                            cmd += " -d"
                        # process = subprocess.Popen(f"{cmd}", shell=True, stdout=subprocess.DEVNULL,stderr=subprocess.STDOUT)
                        process = subprocess.Popen(f"{cmd}", shell=True)
                        self.subprocess_dict[agent] = {}
                        self.subprocess_dict[agent]["process"] = process
                        # Reset to zero nb_try when AgentSST start (launch all agents)
                        self.subprocess_dict[agent]["nb_try_restart"] = 0
                        log.info(f"Start agent {agent} with cmd {cmd}")

    def do_start_agent(self, agent_name:str):
        """
        Start a specific agent of obs_config (Restart)

        Args:
            agent_name (str): Name of agent to start
        """
        self.start_agents(agent_name)
        nb_try_restart_agent = self.subprocess_dict[agent_name]["nb_try_restart"]
        agent_survey = AgentSurvey.objects.get(name=agent_name)
        agent_survey.current_nb_restart = nb_try_restart_agent
        
        agent_survey.save()


    def do_stop_agent(self, agent:str)->None:
        # agent = args[0]
        if agent in self.subprocess_dict.keys() or agent == self.name:
            #cmd = self.send_cmd_to(agent,"do_exit")
            cmd = self.send_cmd_to(agent,"do_stop","asap")
            print(cmd)
            return f"Sent do_stop asap to {agent}"

    def do_restart_agent(self, agent:str, mode:str)->None:
        # agent = args[0]
        nb_try_restart_agent = self.subprocess_dict[agent]["nb_try_restart"]
        if nb_try_restart_agent < AgentSurvey.objects.get(name=agent).nb_restart_max:
            if mode == "soft":
                cmd = self.send_cmd_to(agent,"do_restart","asap")
            else:
                self.do_stop_agent(agent)
                self.do_start_agent(agent)
        else:
            #sendmail
            pass
        agent_survey = AgentSurvey.objects.get(name=agent)
        agent_survey.current_nb_restart = nb_try_restart_agent
        agent_survey.save()

        # if agent in self.subprocess_dict.keys():
        #     cmd.set_result(f"Agent {agent} restarted")
        #     cmd.set_as_processed()
        # else:
        #     cmd.set_result(f"Agent {agent} failed to restart")
        #     log.debug(f"Agent {agent} failed to restart")

    def force_kill_agent(self, *args)->None:
        if args:
            agent = args[0]
            if self.subprocess_dict.get(agent) is not None:
                process = self.subprocess_dict.get(agent).get("process")
                # process.terminate()
                # process.wait()
                # Kill is better when using Popen(shell=True) because it will remove the created child process
                process.kill()
            else:
                return None

    def _do_things_before_exit(self,abort_cmd_sender):
        kill_agent_commands = {}
        for agent in self.subprocess_dict.keys():
            if AgentSurvey.objects.get(name=agent).status != "EXITING":
                self.do_stop_agent(agent)
                cmd = AgentCmd.objects.filter(full_name="do_stop asap",recipient=agent).latest("s_deposit_time")
                #cmd = self.do_stop_agent(agent)    
                kill_agent_commands[agent] = cmd
                agent_survey = AgentSurvey.objects.get(name=agent)
                # Reset counter before exiting
                agent_survey.current_nb_restart = 0
                agent_survey.save()                             
        for agent in kill_agent_commands.keys():
            cmd = kill_agent_commands[agent]
            while AgentCmd.objects.get(id=cmd.id).state != "CMD_EXECUTED":
                if self.subprocess_dict[agent].get("process").poll() != None:
                    cmd = AgentCmd.objects.get(id=cmd.id)
                    cmd.state = "CMD_EXECUTED"
                    cmd.save()
                time.sleep(0.5)
        # wait 10 seconds in order to agents to exit themselves properly 
        # time.sleep(20)
        # for agent in self.subprocess_dict.keys():
        #     while self.subprocess_dict[agent].get("process").poll() is None:
        #         time.sleep(0.5)
        # agent_survey = AgentSurvey.objects.get(name=self.name)
        # agent_survey.status = AgentSurvey.STATUS_EXIT
        # agent_survey.save()
    def _routine_process_iter_end_body(self):
        now_time = datetime.now(timezone.utc) 
        last_running_commands = AgentCmd.get_commands_sent_by_agent("AgentSST").filter(state="CMD_RUNNING",recipient__in=list(self.subprocess_dict.keys()))
        for cmd in last_running_commands:
            last_running_cmd = cmd.full_name
            if last_running_cmd == "KILL_AGENT" and cmd.is_expired():
                agent = cmd.args[0]
                self.force_kill_agent(agent)

        # checking status of agent if they are timeout if in auto mode
        try:
            soft_mode = Majordome.objects.last().soft_mode
        except Majordome.DoesNotExist:
            soft_mode = None
        if (soft_mode is not None and soft_mode == "AUTO") and not self._no_restart:
            for agent in self.subprocess_dict.keys():
                try:
                    agent_survey = AgentSurvey.objects.get(name=agent)
                except AgentSurvey.DoesNotExist:
                    # If there is no entry in AgentSurvey for this agent go to next iteration (it surely means that the agentSST launched this agent for the first time, and it didn't had enough time to create an entry in AgentSurvey)
                    continue
                
                validity_duration = agent_survey.validity_duration
                last_update_from_agent = agent_survey.updated
                validity_duration_timedelta = timedelta(seconds=validity_duration)
                timeout_datetime = last_update_from_agent + validity_duration_timedelta
                timeout_datetime = timeout_datetime.replace(tzinfo=timezone.utc)
                # if agent latest state is timeout, restart it
                if timeout_datetime < now_time:
                    if self.subprocess_dict[agent].get("process").poll() != None:
                        last_executed_start_agent_cmd =  AgentCmd.objects.filter(state="CMD_EXECUTED",full_name=f"do_start_agent {agent}",recipient=self.name).order_by("-s_deposit_time")
                        if last_executed_start_agent_cmd.exists():
                            cmd_outdated_datetime_start = datetime.utcnow() - timedelta(seconds=30)
                            cmd_outdated_datetime_end = datetime.utcnow() - timedelta(seconds=25)
                            cmd_outdated_datetime_start = cmd_outdated_datetime_start.replace(tzinfo=timezone.utc)
                            cmd_outdated_datetime_end = cmd_outdated_datetime_end.replace(tzinfo=timezone.utc)
                            # cmd outdated if deposit time was between 25 and 30 seconds ago from now
                            # if last start cmd for this agent was executed and this agent isn't currently running, ask again a start.
                            if cmd_outdated_datetime_start >= last_executed_start_agent_cmd.first().s_deposit_time and cmd_outdated_datetime_end >= last_executed_start_agent_cmd.first().s_deposit_time:
                                self.send_cmd_to(self.name,"do_start_agent", agent)
                        else:
                            try:
                                # Check if do_start_agent cmd already asked by agentSST in previous iterations and not exectuted. If the query success (no exception raised), we don't send again a cmd
                                AgentCmd.get_pending_and_running_commands_for_agent(self.name).get(full_name=f"do_start_agent {agent}")
                            except:
                                self.send_cmd_to(self.name,"do_start_agent", agent)
                    else:
                        last_executed_start_or_restart_agent_cmd =  AgentCmd.objects.filter(state="CMD_EXECUTED",full_name__in=(f"do_start_agent {agent}",f"do_restart_agent {agent}"),recipient=self.name).order_by("-s_deposit_time")
                        if last_executed_start_or_restart_agent_cmd.exists():
                            cmd_outdated_datetime_start = datetime.utcnow() - timedelta(seconds=30)
                            cmd_outdated_datetime_end = datetime.utcnow() - timedelta(seconds=25)
                            cmd_outdated_datetime_start = cmd_outdated_datetime_start.replace(tzinfo=timezone.utc)
                            cmd_outdated_datetime_end = cmd_outdated_datetime_end.replace(tzinfo=timezone.utc)
                            # cmd outdated if deposit time was between 25 and 30 seconds ago from now
                            # if last start or restart cmd for this agent was executed and this agent isn't currently running, ask again a restart.
                            if cmd_outdated_datetime_start >= last_executed_start_or_restart_agent_cmd.first().s_deposit_time and cmd_outdated_datetime_end >=  last_executed_start_or_restart_agent_cmd.first().s_deposit_time:
                                self.send_cmd_to(self.name,"do_restart_agent", agent)    
                        else:
                            try:
                                # Check if do_restart_agent cmd already asked by agentSST in previous iterations and not exectuted. If the query success (no exception raised), we don't send again a cmd
                                AgentCmd.get_pending_and_running_commands_for_agent(self.name).get(full_name=f"do_restart_agent {agent}")
                            except:
                                self.send_cmd_to(self.name,"do_restart_agent", agent)

        log.info("Check status of process")
        for agent in self.subprocess_dict:
            proc = self.subprocess_dict.get(agent).get("process")
            log.info(f"{agent} poll result is {proc.poll()}")
     

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Start a agentSST.')
    parser.add_argument("--computer",dest="computer",help='Launch agent with simulated computer hostname',action="store")
    parser.add_argument("--agent",dest="agent",help='Launch an specific agent ',action="store")
    parser.add_argument("-t", action="store_true" )
    parser.add_argument("-d", action="store_true" )
    args = vars(parser.parse_args())
    agent = build_agent(AgentSST,param_constr=args)
    # agent = build_agent(AgentSST)
    agent.run()