1
2
3 from __future__ import print_function
4 from __future__ import unicode_literals
5 from __future__ import division
6 from __future__ import absolute_import
7
8 import grp
9 import multiprocessing
10 import os
11 import pwd
12 import signal
13 import sys
14 import time
15 from collections import defaultdict
16
17 import lockfile
18 from daemon import DaemonContext
19 from retask.queue import Queue
20 from retask import ConnectionError
21
22 from ..exceptions import CoprBackendError
23 from ..helpers import BackendConfigReader
24 from .job_grab import CoprJobGrab
25 from .log import CoprBackendLog
26 from .dispatcher import Worker
27
28
30
31 """
32 Core process - starts/stops/initializes workers and other backend components
33
34
35 :param config_file: path to the backend configuration file
36 :param ext_opts: additional options for backend
37 """
38
39 - def __init__(self, config_file=None, ext_opts=None):
40
41
42
43 if not config_file:
44 raise CoprBackendError("Must specify config_file")
45
46 self.config_file = config_file
47 self.ext_opts = ext_opts
48 self.workers_by_group_id = defaultdict(list)
49 self.max_worker_num_by_group_id = defaultdict(int)
50
51 self.config_reader = BackendConfigReader(self.config_file, self.ext_opts)
52 self.opts = None
53 self.update_conf()
54
55 self.lock = multiprocessing.Lock()
56
57 self.task_queues = {}
58 self.events = multiprocessing.Queue()
59
60
61
62 self.abort = False
63 if not os.path.exists(self.opts.worker_logdir):
64 os.makedirs(self.opts.worker_logdir, mode=0o750)
65
67 """
68 Make sure there is nothing in our task queues
69 """
70 try:
71 for queue in self.task_queues.values():
72 while queue.length:
73 queue.dequeue()
74 except ConnectionError:
75 raise CoprBackendError(
76 "Could not connect to a task queue. Is Redis running?")
77
79 """
80 Connect to the retask.Queue for each group_id. Remove old tasks from queues.
81 """
82 try:
83 for group in self.opts.build_groups:
84 group_id = group["id"]
85 queue = Queue("copr-be-{0}".format(group_id))
86 queue.connect()
87 self.task_queues[group_id] = queue
88 except ConnectionError:
89 raise CoprBackendError(
90 "Could not connect to a task queue. Is Redis running?")
91
92 self.clean_task_queues()
93
95 """
96 - Create backend logger
97 - Create job grabber
98 """
99 self._logger = CoprBackendLog(self.opts, self.events)
100 self._logger.start()
101
102 self.event("Starting up Job Grabber")
103
104 self._jobgrab = CoprJobGrab(self.opts, self.events, self.lock)
105 self._jobgrab.start()
106
108 """
109 Put a new event into the queue
110 :param what: Event content
111 """
112 self.events.put({"when": time.time(), "who": "main", "what": what})
113
115 """
116 Update backend config from config file
117 """
118 self.opts = self.config_reader.read()
119
121 """
122 Handles starting/growing the number of workers
123
124 :param dict group: Builders group
125
126 Utilized keys:
127 - **id**
128 - **max_workers**
129
130 """
131 group_id = group["id"]
132
133 if len(self.workers_by_group_id[group_id]) < group["max_workers"]:
134 self.event("Spinning up more workers")
135 for _ in range(group["max_workers"] - len(self.workers_by_group_id[group_id])):
136 self.max_worker_num_by_group_id[group_id] += 1
137 w = Worker(
138 self.opts, self.events,
139 self.max_worker_num_by_group_id[group_id],
140 group_id, lock=self.lock
141 )
142
143 self.workers_by_group_id[group_id].append(w)
144 w.start()
145
147 """ Removes dead workers from the pool
148
149 :return list: alive workers
150
151 :raises:
152 :py:class:`~backend.exceptions.CoprBackendError` when got dead worker and
153 option "exit_on_worker" is enabled
154 """
155 preserved_workers = []
156 for w in self.workers_by_group_id[group_id]:
157 if not w.is_alive():
158 self.event("Worker {0} died unexpectedly".format(w.worker_num))
159 w.terminate()
160 if self.opts.exit_on_worker:
161 raise CoprBackendError(
162 "Worker died unexpectedly, exiting")
163 else:
164 preserved_workers.append(w)
165 return preserved_workers
166
168 """
169 Cleanup backend processes (just workers for now)
170 And also clean all task queues as they would survive copr restart
171 """
172
173 self.abort = True
174 for group in self.opts.build_groups:
175 group_id = group["id"]
176 for w in self.workers_by_group_id[group_id][:]:
177 self.workers_by_group_id[group_id].remove(w)
178 w.terminate_instance()
179 self.clean_task_queues()
180
182 """
183 Starts backend process. Control sub process start/stop.
184 """
185 self.init_task_queues()
186 self.init_sub_process()
187
188 self.abort = False
189 while not self.abort:
190
191 self.update_conf()
192
193 for group in self.opts.build_groups:
194 group_id = group["id"]
195 self.event("# jobs in {0} queue: {1}"
196 .format(group["name"], self.task_queues[group_id].length))
197 self.spin_up_workers_by_group(group)
198 self.event("Finished starting worker processes")
199
200
201
202
203
204
205
206
207
208 preserved_workers = self.prune_dead_workers_by_group_id(group_id)
209 self.workers_by_group_id[group_id] = preserved_workers
210
211 time.sleep(self.opts.sleeptime)
212
213
215 """
216 Start main backend daemon
217
218 :param opts: Bunch object with command line options
219
220 Expected **opts** fields:
221 - `config_file` - path to the backend config file
222 - `daemonize` - boolean flag to enable daemon mode
223 - `pidfile` - path to the backend pidfile
224
225 """
226 cbe = None
227 try:
228 context = DaemonContext(
229 pidfile=lockfile.FileLock(opts.pidfile),
230 gid=grp.getgrnam("copr").gr_gid,
231 uid=pwd.getpwnam("copr").pw_uid,
232 detach_process=opts.daemonize,
233 umask=0o22,
234 stderr=sys.stderr,
235 signal_map={
236 signal.SIGTERM: "terminate",
237 signal.SIGHUP: "terminate",
238 },
239 )
240 with context:
241 cbe = CoprBackend(opts.config_file, ext_opts=opts)
242 cbe.run()
243 except (Exception, KeyboardInterrupt):
244 sys.stderr.write("Killing/Dying\n")
245 if cbe is not None:
246 cbe.terminate()
247 raise
248