Skip to content
Snippets Groups Projects
Commit da3b229727e7 authored by jfp's avatar jfp
Browse files

autorestart can be False, True, unexpected. Add exitcodes to .conf file

parent ff9bc7b1e6ce
No related branches found
No related tags found
No related merge requests found
...@@ -8,3 +8,6 @@ ...@@ -8,3 +8,6 @@
startretries=3 startretries=3
stopwaitsecs=10 stopwaitsecs=10
user=vmsuser user=vmsuser
autorestart=unexpected
# SS$_NORMAL, SS$_FORCEX, SS$_EXITFORCED
exitcodes=1,11228,11220
...@@ -41,7 +41,43 @@ ...@@ -41,7 +41,43 @@
supervisorctl_pwd = '' supervisorctl_pwd = ''
TRUTHY_STRINGS = ('yes', 'true', 'on', '1')
FALSY_STRINGS = ('no', 'false', 'off', '0')
class RestartWhenExitUnexpected:
pass
class RestartUnconditionally:
pass
def auto_restart(
value: str,
) -> (
Type[RestartUnconditionally]
| Type[RestartWhenExitUnexpected]
| Literal[False]
):
value = str(value.lower())
computed_value = value
if value in TRUTHY_STRINGS:
computed_value = RestartUnconditionally
elif value in FALSY_STRINGS:
computed_value = False
elif value == 'unexpected':
computed_value = RestartWhenExitUnexpected
if computed_value not in (
RestartWhenExitUnexpected,
RestartUnconditionally,
False,
):
raise ValueError("invalid 'autorestart' value %r" % value)
return computed_value
class TimerItemType(IntEnum): class TimerItemType(IntEnum):
UNDEF = 0 UNDEF = 0
PROC_STARTING = 1 PROC_STARTING = 1
PROC_BACKOFF = 2 PROC_BACKOFF = 2
...@@ -44,7 +80,8 @@ ...@@ -44,7 +80,8 @@
class TimerItemType(IntEnum): class TimerItemType(IntEnum):
UNDEF = 0 UNDEF = 0
PROC_STARTING = 1 PROC_STARTING = 1
PROC_BACKOFF = 2 PROC_BACKOFF = 2
PROC_STOPPING = 3
class TimerItem(object): class TimerItem(object):
...@@ -128,6 +165,12 @@ ...@@ -128,6 +165,12 @@
remain_startretries: int remain_startretries: int
timer_item = TimerItem | None timer_item = TimerItem | None
kill_request: bool kill_request: bool
autorestart: (
Type[RestartUnconditionally]
| Type[RestartWhenExitUnexpected]
| Literal[False]
)
exitcodes: List[int]
def __init__( def __init__(
self, self,
...@@ -141,6 +184,12 @@ ...@@ -141,6 +184,12 @@
startsecs: int, startsecs: int,
startretries: int, startretries: int,
stopwaitsecs: int, stopwaitsecs: int,
autorestart: (
Type[RestartUnconditionally]
| Type[RestartWhenExitUnexpected]
| Literal[False]
),
exitcodes: List[int],
): ):
self.name = name self.name = name
self.user = user.encode() self.user = user.encode()
...@@ -155,6 +204,8 @@ ...@@ -155,6 +204,8 @@
self.stopwaitsecs = stopwaitsecs self.stopwaitsecs = stopwaitsecs
self.timer_item = None self.timer_item = None
self.kill_request = False self.kill_request = False
self.autorestart = autorestart
self.exitcodes = exitcodes
self.process = ProcessInfo() self.process = ProcessInfo()
...@@ -158,7 +209,7 @@ ...@@ -158,7 +209,7 @@
self.process = ProcessInfo() self.process = ProcessInfo()
def process_is_running(self) -> bool: def process_exists(self) -> bool:
return self.process.pid in self.running_processes return self.process.pid in self.running_processes
def process_is_backoff(self) -> bool: def process_is_backoff(self) -> bool:
...@@ -167,7 +218,13 @@ ...@@ -167,7 +218,13 @@
def process_is_fatal(self) -> bool: def process_is_fatal(self) -> bool:
return self.process.state == ProcessStates.FATAL return self.process.state == ProcessStates.FATAL
def process_is_stopped(self) -> bool:
return self.process.state == ProcessStates.STOPPED
def process_is_stopping(self) -> bool:
return self.process.state == ProcessStates.STOPPING
def set_running(self): def set_running(self):
self.process.state = ProcessStates.RUNNING self.process.state = ProcessStates.RUNNING
self.remain_startretries = self.startretries + 1 self.remain_startretries = self.startretries + 1
...@@ -170,7 +227,8 @@ ...@@ -170,7 +227,8 @@
def set_running(self): def set_running(self):
self.process.state = ProcessStates.RUNNING self.process.state = ProcessStates.RUNNING
self.remain_startretries = self.startretries + 1 self.remain_startretries = self.startretries + 1
def create_process(self, check_finalsts: bool = True): # def create_process(self, check_finalsts: bool = True):
def create_process(self, from_terminated: bool):
global supervisord_table_name global supervisord_table_name
...@@ -175,7 +233,15 @@ ...@@ -175,7 +233,15 @@
global supervisord_table_name global supervisord_table_name
if check_finalsts and self.process.finalsts in ( if self.kill_request:
ssdef.SS__FORCEX, return
ssdef.SS__EXITFORCED,
if from_terminated:
# program has already been started
if not self.autorestart and self.process.finalsts is not None:
return
if (
self.autorestart is RestartWhenExitUnexpected
and self.process.finalsts in self.exitcodes
): ):
return return
...@@ -180,4 +246,5 @@ ...@@ -180,4 +246,5 @@
): ):
return return
if self.process_is_running():
if self.process_exists():
return return
...@@ -183,4 +250,5 @@ ...@@ -183,4 +250,5 @@
return return
try: try:
v = lib.get_logical( v = lib.get_logical(
self.process_name + b'_PID', self.process_name + b'_PID',
...@@ -224,6 +292,7 @@ ...@@ -224,6 +292,7 @@
self.timer_item.cancel = True self.timer_item.cancel = True
self.timer_item = None self.timer_item = None
self.process.finalsts = None
userpro = starlet.create_user_profile(usrnam=self.user)[1] userpro = starlet.create_user_profile(usrnam=self.user)[1]
persona_id = starlet.persona_create(usrpro=userpro)[1] persona_id = starlet.persona_create(usrpro=userpro)[1]
persona_previous_id = starlet.persona_assume(persona_id)[1] persona_previous_id = starlet.persona_assume(persona_id)[1]
...@@ -280,9 +349,13 @@ ...@@ -280,9 +349,13 @@
) )
def kill(self): def kill(self):
if self.process_is_backoff(): if self.process_is_stopped():
self.process.state = ProcessStates.FATAL return
if self.process_is_backoff() or self.process_is_fatal():
self.process.state = ProcessStates.STOPPED
self.remain_startretries = self.startretries + 1
if self.timer_item: if self.timer_item:
self.timer_item.cancel = True self.timer_item.cancel = True
self.timer_item = None self.timer_item = None
return return
...@@ -285,12 +358,9 @@ ...@@ -285,12 +358,9 @@
if self.timer_item: if self.timer_item:
self.timer_item.cancel = True self.timer_item.cancel = True
self.timer_item = None self.timer_item = None
return return
if self.process_is_fatal():
self.process.state = ProcessStates.STOPPED if not self.process_exists() or self.kill_request:
self.remain_startretries = self.startretries + 1
return
if not self.process_is_running():
return return
self.kill_request = True self.kill_request = True
...@@ -298,5 +368,6 @@ ...@@ -298,5 +368,6 @@
pid = self.process.pid pid = self.process.pid
if self.process.state == ProcessStates.STOPPING: if self.process.state == ProcessStates.STOPPING:
starlet.delprc(self.process.pid) starlet.delprc(self.process.pid)
else:
starlet.forcex(pid, code=ssdef.SS__FORCEX) starlet.forcex(pid, code=ssdef.SS__FORCEX)
self.process.state = ProcessStates.STOPPING self.process.state = ProcessStates.STOPPING
...@@ -301,5 +372,11 @@ ...@@ -301,5 +372,11 @@
starlet.forcex(pid, code=ssdef.SS__FORCEX) starlet.forcex(pid, code=ssdef.SS__FORCEX)
self.process.state = ProcessStates.STOPPING self.process.state = ProcessStates.STOPPING
if self.timer_item:
self.timer_item.cancel
self.timer_item = TimerItem(TimerItemType.PROC_STOPPING, self)
timer_queue.put(
PrioritizedItem(current_tick + self.stopwaitsecs, self.timer_item)
)
def set_terminated(self, finalsts: int, end_time: datetime.datetime): def set_terminated(self, finalsts: int, end_time: datetime.datetime):
global supervisord_table_name global supervisord_table_name
...@@ -432,6 +509,7 @@ ...@@ -432,6 +509,7 @@
except OSError: except OSError:
pass pass
def dispatch_cmd(res: bytes, fcmd_r: mbxqio.MBXQIO): def dispatch_cmd(res: bytes, fcmd_r: mbxqio.MBXQIO):
global supervisorctl_pwd global supervisorctl_pwd
jscmd = json.loads(res) jscmd = json.loads(res)
...@@ -518,8 +596,8 @@ ...@@ -518,8 +596,8 @@
f'{acc.acc_l_finalsts}' f'{acc.acc_l_finalsts}'
) )
pgm.set_terminated(acc.acc_l_finalsts, acc.acc_q_termtime) pgm.set_terminated(acc.acc_l_finalsts, acc.acc_q_termtime)
if not stsdef.vms_status_success(acc.acc_l_finalsts): # if not stsdef.vms_status_success(acc.acc_l_finalsts):
pgm.create_process() pgm.create_process(True)
except KeyError: except KeyError:
pass pass
case AstParamType.CMD: case AstParamType.CMD:
...@@ -542,5 +620,4 @@ ...@@ -542,5 +620,4 @@
timer_queue.put(itm) timer_queue.put(itm)
return return
itm.item.done = True itm.item.done = True
if itm.item.typ == TimerItemType.PROC_STARTING:
val: Program = itm.item.value val: Program = itm.item.value
...@@ -546,2 +623,4 @@ ...@@ -546,2 +623,4 @@
val: Program = itm.item.value val: Program = itm.item.value
match itm.item.typ:
case TimerItemType.PROC_STARTING:
val.set_running() val.set_running()
...@@ -547,7 +626,8 @@ ...@@ -547,7 +626,8 @@
val.set_running() val.set_running()
elif itm.item.typ == TimerItemType.PROC_BACKOFF: case TimerItemType.PROC_BACKOFF:
val: Program = itm.item.value val.create_process(False)
val.create_process() case TimerItemType.PROC_STOPPING:
val.kill()
def run(chan: int, chancmd: int, chancmd_r: int): def run(chan: int, chancmd: int, chancmd_r: int):
...@@ -562,7 +642,7 @@ ...@@ -562,7 +642,7 @@
for pgm in Program.programs.values(): for pgm in Program.programs.values():
if pgm.autostart: if pgm.autostart:
pgm.create_process() pgm.create_process(False)
timer_astctx = vmsast.AstContext(vmsast.M_WAKE | vmsast.M_QUEUE) timer_astctx = vmsast.AstContext(vmsast.M_WAKE | vmsast.M_QUEUE)
timer_astctx.param = AstParam(AstParamType.TIMER) timer_astctx.param = AstParam(AstParamType.TIMER)
...@@ -652,6 +732,10 @@ ...@@ -652,6 +732,10 @@
startsecs = config[sn].getint('startsecs', 10) startsecs = config[sn].getint('startsecs', 10)
startretries = config[sn].getint('startretries', 3) startretries = config[sn].getint('startretries', 3)
stopwaitsecs = config[sn].getint('stopwaitsescs', 10) stopwaitsecs = config[sn].getint('stopwaitsescs', 10)
autorestart = auto_restart(
config[sn].get('autorestart', 'unexpected')
)
exitcodes = config[sn].get('exitcodes', '1').split(',')
user = config[sn]['user'] user = config[sn]['user']
p = Program( p = Program(
name=name, name=name,
...@@ -664,6 +748,10 @@ ...@@ -664,6 +748,10 @@
startsecs=startsecs, startsecs=startsecs,
startretries=startretries, startretries=startretries,
stopwaitsecs=stopwaitsecs, stopwaitsecs=stopwaitsecs,
autorestart=autorestart,
exitcodes=[
int(exitcode) for exitcode in exitcodes if exitcode != ''
],
) )
Program.programs[name] = p Program.programs[name] = p
...@@ -674,4 +762,3 @@ ...@@ -674,4 +762,3 @@
if __name__ == '__main__': if __name__ == '__main__':
main() main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment