Package backend :: Package daemons :: Module backend
[hide private]
[frames] | no frames]

Source Code for Module backend.daemons.backend

  1  # coding: utf-8 
  2   
  3  from __future__ import print_function 
  4  from __future__ import unicode_literals 
  5  from __future__ import division 
  6  from __future__ import absolute_import 
  7   
  8  import grp 
  9  import multiprocessing 
 10  import os 
 11  import pwd 
 12  import signal 
 13  import sys 
 14  import time 
 15  from collections import defaultdict 
 16   
 17  import lockfile 
 18  from daemon import DaemonContext 
 19  from retask.queue import Queue 
 20  from retask import ConnectionError 
 21   
 22  from ..exceptions import CoprBackendError 
 23  from ..helpers import BackendConfigReader 
 24  from .job_grab import CoprJobGrab 
 25  from .log import CoprBackendLog 
 26  from .dispatcher import Worker 
 27   
 28   
29 -class CoprBackend(object):
30 31 """ 32 Core process - starts/stops/initializes workers and other backend components 33 34 35 :param config_file: path to the backend configuration file 36 :param ext_opts: additional options for backend 37 """ 38
39 - def __init__(self, config_file=None, ext_opts=None):
40 # read in config file 41 # put all the config items into a single self.opts bunch 42 43 if not config_file: 44 raise CoprBackendError("Must specify config_file") 45 46 self.config_file = config_file 47 self.ext_opts = ext_opts # to stow our cli options for read_conf() 48 self.workers_by_group_id = defaultdict(list) 49 self.max_worker_num_by_group_id = defaultdict(int) 50 51 self.config_reader = BackendConfigReader(self.config_file, self.ext_opts) 52 self.opts = None 53 self.update_conf() 54 55 self.lock = multiprocessing.Lock() 56 57 self.task_queues = {} 58 self.events = multiprocessing.Queue() 59 # event format is a dict {when:time, who:[worker|logger|job|main], 60 # what:str} 61 62 self.abort = False 63 if not os.path.exists(self.opts.worker_logdir): 64 os.makedirs(self.opts.worker_logdir, mode=0o750)
65
66 - def clean_task_queues(self):
67 """ 68 Make sure there is nothing in our task queues 69 """ 70 try: 71 for queue in self.task_queues.values(): 72 while queue.length: 73 queue.dequeue() 74 except ConnectionError: 75 raise CoprBackendError( 76 "Could not connect to a task queue. Is Redis running?")
77
78 - def init_task_queues(self):
79 """ 80 Connect to the retask.Queue for each group_id. Remove old tasks from queues. 81 """ 82 try: 83 for group in self.opts.build_groups: 84 group_id = group["id"] 85 queue = Queue("copr-be-{0}".format(group_id)) 86 queue.connect() 87 self.task_queues[group_id] = queue 88 except ConnectionError: 89 raise CoprBackendError( 90 "Could not connect to a task queue. Is Redis running?") 91 92 self.clean_task_queues()
93
94 - def init_sub_process(self):
95 """ 96 - Create backend logger 97 - Create job grabber 98 """ 99 self._logger = CoprBackendLog(self.opts, self.events) 100 self._logger.start() 101 102 self.event("Starting up Job Grabber") 103 104 self._jobgrab = CoprJobGrab(self.opts, self.events, self.lock) 105 self._jobgrab.start()
106
107 - def event(self, what):
108 """ 109 Put a new event into the queue 110 :param what: Event content 111 """ 112 self.events.put({"when": time.time(), "who": "main", "what": what})
113
114 - def update_conf(self):
115 """ 116 Update backend config from config file 117 """ 118 self.opts = self.config_reader.read()
119
120 - def spin_up_workers_by_group(self, group):
121 """ 122 Handles starting/growing the number of workers 123 124 :param dict group: Builders group 125 126 Utilized keys: 127 - **id** 128 - **max_workers** 129 130 """ 131 group_id = group["id"] 132 133 if len(self.workers_by_group_id[group_id]) < group["max_workers"]: 134 self.event("Spinning up more workers") 135 for _ in range(group["max_workers"] - len(self.workers_by_group_id[group_id])): 136 self.max_worker_num_by_group_id[group_id] += 1 137 w = Worker( 138 self.opts, self.events, 139 self.max_worker_num_by_group_id[group_id], 140 group_id, lock=self.lock 141 ) 142 143 self.workers_by_group_id[group_id].append(w) 144 w.start()
145
146 - def prune_dead_workers_by_group_id(self, group_id):
147 """ Removes dead workers from the pool 148 149 :return list: alive workers 150 151 :raises: 152 :py:class:`~backend.exceptions.CoprBackendError` when got dead worker and 153 option "exit_on_worker" is enabled 154 """ 155 preserved_workers = [] 156 for w in self.workers_by_group_id[group_id]: 157 if not w.is_alive(): 158 self.event("Worker {0} died unexpectedly".format(w.worker_num)) 159 w.terminate() # kill it with a fire 160 if self.opts.exit_on_worker: 161 raise CoprBackendError( 162 "Worker died unexpectedly, exiting") 163 else: 164 preserved_workers.append(w) 165 return preserved_workers
166
167 - def terminate(self):
168 """ 169 Cleanup backend processes (just workers for now) 170 And also clean all task queues as they would survive copr restart 171 """ 172 173 self.abort = True 174 for group in self.opts.build_groups: 175 group_id = group["id"] 176 for w in self.workers_by_group_id[group_id][:]: 177 self.workers_by_group_id[group_id].remove(w) 178 w.terminate_instance() 179 self.clean_task_queues()
180
181 - def run(self):
182 """ 183 Starts backend process. Control sub process start/stop. 184 """ 185 self.init_task_queues() 186 self.init_sub_process() 187 188 self.abort = False 189 while not self.abort: 190 # re-read config into opts 191 self.update_conf() 192 193 for group in self.opts.build_groups: 194 group_id = group["id"] 195 self.event("# jobs in {0} queue: {1}" 196 .format(group["name"], self.task_queues[group_id].length)) 197 self.spin_up_workers_by_group(group) 198 self.event("Finished starting worker processes") 199 200 # FIXME - prune out workers 201 # if len(self.workers) > self.opts.num_workers: 202 # killnum = len(self.workers) - self.opts.num_workers 203 # for w in self.workers[:killnum]: 204 # insert a poison pill? Kill after something? I dunno. 205 # FIXME - if a worker bombs out - we need to check them 206 # and startup a new one if it happens 207 # check for dead workers and abort 208 preserved_workers = self.prune_dead_workers_by_group_id(group_id) 209 self.workers_by_group_id[group_id] = preserved_workers 210 211 time.sleep(self.opts.sleeptime)
212 213
214 -def run_backend(opts):
215 """ 216 Start main backend daemon 217 218 :param opts: Bunch object with command line options 219 220 Expected **opts** fields: 221 - `config_file` - path to the backend config file 222 - `daemonize` - boolean flag to enable daemon mode 223 - `pidfile` - path to the backend pidfile 224 225 """ 226 cbe = None 227 try: 228 context = DaemonContext( 229 pidfile=lockfile.FileLock(opts.pidfile), 230 gid=grp.getgrnam("copr").gr_gid, 231 uid=pwd.getpwnam("copr").pw_uid, 232 detach_process=opts.daemonize, 233 umask=0o22, 234 stderr=sys.stderr, 235 signal_map={ 236 signal.SIGTERM: "terminate", 237 signal.SIGHUP: "terminate", 238 }, 239 ) 240 with context: 241 cbe = CoprBackend(opts.config_file, ext_opts=opts) 242 cbe.run() 243 except (Exception, KeyboardInterrupt): 244 sys.stderr.write("Killing/Dying\n") 245 if cbe is not None: 246 cbe.terminate() 247 raise
248