1 import re
2 import os
3 import sys
4 import time
5 import fcntl
6 import json
7 import subprocess
8 from subprocess import CalledProcessError
9 import multiprocessing
10
11 import ansible
12 import ansible.runner
13 import ansible.utils
14
15 from ansible.errors import AnsibleError
16
17 from setproctitle import setproctitle
18 from IPy import IP
19 from retask.queue import Queue
20
21
22
23 from ..mockremote.callback import CliLogCallBack
24
25 from ..exceptions import MockRemoteError, CoprWorkerError, CoprWorkerSpawnFailError
26 from ..job import BuildJob
27
28 from ..mockremote import MockRemote
29 from ..frontend import FrontendClient
30 from ..constants import BuildStatus
31 from ..helpers import register_build_result
32
33 ansible_playbook = "ansible-playbook"
34
35 try:
36 import fedmsg
37 except ImportError:
38
39 fedmsg = None
43 """ transform dict into --extra-vars="json string" """
44 if not extra_vars:
45 return ""
46 return "--extra-vars='{{\"{0}\": {1}}}'".format(name, json.dumps(extra_vars))
47
50 """
51 Callback class for worker. Now used only for message logging
52
53 :param logfile: path to the log file
54 """
55
57 self.logfile = logfile
58
60 """
61 Safely writes msg to the logfile
62
63 :param str msg: message to be logged
64 """
65 if self.logfile:
66 now = time.strftime("%F %T")
67 try:
68 with open(self.logfile, 'a') as lf:
69 fcntl.flock(lf, fcntl.LOCK_EX)
70 lf.write(str(now) + ': ' + msg + '\n')
71 fcntl.flock(lf, fcntl.LOCK_UN)
72 except (IOError, OSError) as e:
73 sys.stderr.write("Could not write to logfile {0} - {1}\n"
74 .format(self.logfile, str(e)))
75
76
77
78 -class Worker(multiprocessing.Process):
79
80 """
81 Worker process dispatches building tasks. Backend spin-up multiple workers, each
82 worker associated to one group_id and process one task at the each moment.
83
84 Worker listens for the new tasks from :py:class:`retask.Queueu` associated with its group_id
85
86 :param Bunch opts: backend config
87 :param queue: (:py:class:`multiprocessing.Queue`) queue to announce new events
88 :param int worker_num: worker number
89 :param int group_id: group_id from the set of groups defined in config
90 :param callback: callback object to handle internal workers events. Should implement method ``log(msg)``.
91 :param lock: (:py:class:`multiprocessing.Lock`) global backend lock
92
93 """
94
95 - def __init__(self, opts, events, worker_num, group_id,
96 callback=None, lock=None):
97
98
99 multiprocessing.Process.__init__(self, name="worker-builder")
100
101 self.opts = opts
102
103
104 self.task_queue = Queue("copr-be-{0}".format(str(group_id)))
105 self.task_queue.connect()
106
107 self.events = events
108 self.worker_num = worker_num
109 self.group_id = group_id
110
111 self.kill_received = False
112 self.lock = lock
113 self.frontend_callback = FrontendClient(opts, events)
114 self.callback = callback
115 if not self.callback:
116 log_name = "worker-{0}-{1}.log".format(
117 self.group_name,
118 self.worker_num)
119
120 self.logfile = os.path.join(self.opts.worker_logdir, log_name)
121 self.callback = WorkerCallback(logfile=self.logfile)
122
123 self.vm_name = None
124 self.vm_ip = None
125 self.callback.log("creating worker: dynamic ip")
126
127 @property
129 try:
130 return self.opts.build_groups[self.group_id]["name"]
131 except Exception as error:
132 self.callback.log("Failed to get builder group name from config, using group_id as name."
133 "Original error: {}".format(error))
134 return self.group_id
135
136 - def event(self, topic, template, content=None):
137 """ Multi-purpose logging method.
138
139 Logs messages to three different destinations:
140 - To log file
141 - The internal "events" queue for communicating back to the
142 dispatcher.
143 - The fedmsg bus. Messages are posted asynchronously to a
144 zmq.PUB socket.
145
146 """
147
148 content = content or {}
149 what = template.format(**content)
150 who = "worker-{0}".format(self.worker_num)
151
152 self.callback.log("event: who: {0}, what: {1}".format(who, what))
153 self.events.put({"when": time.time(), "who": who, "what": what})
154
155 if self.opts.fedmsg_enabled and fedmsg:
156 content["who"] = who
157 content["what"] = what
158 try:
159 fedmsg.publish(modname="copr", topic=topic, msg=content)
160
161 except Exception as e:
162
163 self.callback.log("failed to publish message: {0}".format(e))
164
166 """
167 Announce everywhere that a build process started now.
168 """
169 job.started_on = time.time()
170 self.mark_started(job)
171
172 template = "build start: user:{user} copr:{copr}" \
173 "pkg: {pkg} build:{build} ip:{ip} pid:{pid}"
174
175 content = dict(user=job.submitter, copr=job.project_name,
176 owner=job.project_owner, pkg=job.pkg_name,
177 build=job.build_id, ip=self.vm_ip, pid=self.pid)
178 self.event("build.start", template, content)
179
180 template = "chroot start: chroot:{chroot} user:{user}" \
181 "copr:{copr} pkg: {pkg} build:{build} ip:{ip} pid:{pid}"
182
183 content = dict(chroot=job.chroot, user=job.submitter,
184 owner=job.project_owner, pkg=job.pkg_name,
185 copr=job.project_name, build=job.build_id,
186 ip=self.vm_ip, pid=self.pid)
187
188 self.event("chroot.start", template, content)
189
191 """
192 Announce everywhere that a build process ended now.
193 """
194 job.ended_on = time.time()
195
196 self.return_results(job)
197 self.callback.log("worker finished build: {0}".format(self.vm_ip))
198 template = "build end: user:{user} copr:{copr} build:{build}" \
199 " pkg: {pkg} version: {version} ip:{ip} pid:{pid} status:{status}"
200
201 content = dict(user=job.submitter, copr=job.project_name,
202 owner=job.project_owner,
203 pkg=job.pkg_name, version=job.pkg_version,
204 build=job.build_id, ip=self.vm_ip, pid=self.pid,
205 status=job.status, chroot=job.chroot)
206 self.event("build.end", template, content)
207
209 """
210 Call ansible playbook:
211
212 - well mostly we run out of space in OpenStack so we rather try
213 multiple times (attempts param)
214 - dump any attempt failure
215 """
216
217
218
219
220 command = "{0} {1}".format(ansible_playbook, args)
221
222 result = None
223 for i in range(0, attempts):
224 try:
225 attempt_desc = ": retry: " if i > 0 else ": begin: "
226 self.callback.log(name + attempt_desc + command)
227 result = subprocess.check_output(command, shell=True)
228 self.callback.log("Raw playbook output:\n{0}\n".format(result))
229 break
230
231 except CalledProcessError as e:
232 self.callback.log("CalledProcessError: \n{0}\n".format(e.output))
233 sys.stderr.write("{0}\n".format(e.output))
234
235 time.sleep(self.opts.sleeptime)
236
237 self.callback.log(name + ": end")
238 return result
239
241 """
242 Test connectivity to the VM
243
244 :param ipaddr: ip address to the newly created VM
245 :raises: :py:class:`~backend.exceptions.CoprWorkerSpawnFailError`: validation fails
246 """
247
248
249 runner_options = dict(
250 remote_user="root",
251 host_list="{},".format(self.vm_ip),
252 pattern=self.vm_ip,
253 forks=1,
254 transport=self.opts.ssh.transport,
255 timeout=500
256 )
257 connection = ansible.runner.Runner(**runner_options)
258 connection.module_name = "shell"
259 connection.module_args = "echo hello"
260
261 try:
262 res = connection.run()
263 except Exception as exception:
264 raise CoprWorkerSpawnFailError(
265 "Failed to check created VM ({})"
266 "due to ansible error: {}".format(self.vm_ip, exception))
267
268 if self.vm_ip not in res.get("contacted", {}):
269 self.callback.log(
270 "Worker is not responding to the testing playbook. Terminating it."
271 "Runner options: {}".format(runner_options) +
272 "Ansible raw response:\n{}".format(res))
273 raise CoprWorkerSpawnFailError("Created VM ({}) was unresponsive "
274 "and therefore terminated".format(self.vm_ip))
275
277 """
278 Tries to spawn new vm using ansible
279
280 :param args: ansible for ansible command which spawns VM
281 :return str: valid ip address of new machine (nobody guarantee machine availability)
282 """
283 result = self.run_ansible_playbook(args, "spawning instance")
284 if not result:
285 raise CoprWorkerSpawnFailError("No result, trying again")
286 match = re.search(r'IP=([^\{\}"]+)', result, re.MULTILINE)
287
288 if not match:
289 raise CoprWorkerSpawnFailError("No ip in the result, trying again")
290 ipaddr = match.group(1)
291 match = re.search(r'vm_name=([^\{\}"]+)', result, re.MULTILINE)
292
293 if match:
294 self.vm_name = match.group(1)
295 self.callback.log("got instance ip: {0}".format(ipaddr))
296
297 try:
298 IP(ipaddr)
299 except ValueError:
300
301 msg = "Invalid IP back from spawn_instance - dumping cache output\n"
302 msg += str(result)
303 raise CoprWorkerSpawnFailError(msg)
304
305 return ipaddr
306
308 """
309 Spawn new VM, executing the following steps:
310
311 - call the spawn playbook to startup/provision a building instance
312 - get an IP and test if the builder responds
313 - repeat this until you get an IP of working builder
314
315 :param BuildJob job:
316 :return ip: of created VM
317 :return None: if couldn't find playbook to spin ip VM
318 """
319
320 start = time.time()
321
322
323
324
325 try:
326 spawn_playbook = self.opts.build_groups[self.group_id]["spawn_playbook"]
327 except KeyError:
328 return
329
330 spawn_args = "-c ssh {}".format(spawn_playbook)
331
332
333 i = 0
334 while self.vm_ip is None:
335 i += 1
336 try:
337 self.callback.log("Spawning a builder. Try No. {0}".format(i))
338
339 self.vm_ip = self.try_spawn(spawn_args)
340 self.update_process_title()
341 try:
342 self.validate_vm()
343 except CoprWorkerSpawnFailError:
344 self.terminate_instance()
345 raise
346
347 self.callback.log("Instance spawn/provision took {0} sec"
348 .format(time.time() - start))
349
350 except CoprWorkerSpawnFailError as exception:
351 self.callback.log("VM Spawn attempt failed with message: {}"
352 .format(exception.msg))
353
355 """
356 Call the terminate playbook to destroy the building instance
357 """
358 self.update_process_title(suffix="Terminating VM")
359 term_args = {}
360 if "ip" in self.opts.terminate_vars:
361 term_args["ip"] = self.vm_ip
362 if "vm_name" in self.opts.terminate_vars:
363 term_args["vm_name"] = self.vm_name
364
365 try:
366 playbook = self.opts.build_groups[self.group_id]["terminate_playbook"]
367 except KeyError:
368 self.callback.log(
369 "Fatal error: no terminate playbook for group_id: {}; exiting"
370 .format(self.group_id))
371 sys.exit(255)
372
373
374 args = "-c ssh {} {}".format(
375
376 playbook,
377 ans_extra_vars_encode(term_args, "copr_task"))
378
379 try:
380 self.run_ansible_playbook(args, "terminate instance")
381 except Exception as error:
382 self.callback.log("Failed to terminate an instance: vm_name={}, vm_ip={}. Original error: {}"
383 .format(self.vm_name, self.vm_ip, error))
384
385
386 self.vm_ip = None
387 self.vm_name = None
388 self.update_process_title()
389
391 """
392 Send data about started build to the frontend
393 """
394
395 job.status = 3
396 build = job.to_dict()
397 self.callback.log("build: {}".format(build))
398
399 data = {"builds": [build]}
400 try:
401 self.frontend_callback.update(data)
402 except:
403 raise CoprWorkerError(
404 "Could not communicate to front end to submit status info")
405
407 """
408 Send the build results to the frontend
409 """
410 self.callback.log(
411 "{0} status {1}. Took {2} seconds".format(
412 job.build_id, job.status, job.ended_on - job.started_on))
413
414 self.callback.log("build: {}".format(job.to_dict()))
415 data = {"builds": [job.to_dict()]}
416
417 try:
418 self.frontend_callback.update(data)
419 except Exception as err:
420 raise CoprWorkerError(
421 "Could not communicate to front end to submit results: {}"
422 .format(err)
423 )
424
426 """
427 Announce to the frontend that a build is starting.
428
429 :return True: if the build can start
430 :return False: if the build can not start (build is cancelled)
431 """
432
433 try:
434 can_start = self.frontend_callback.starting_build(job.build_id, job.chroot)
435 except Exception as err:
436 raise CoprWorkerError(
437 "Could not communicate to front end to submit results: {}"
438 .format(err)
439 )
440
441 return can_start
442
443 @classmethod
445 """
446 Check whether the package has already been built in this chroot.
447 """
448 s_pkg = os.path.basename(pkg)
449 pdn = s_pkg.replace(".src.rpm", "")
450 resdir = "{0}/{1}/{2}".format(destdir, chroot, pdn)
451 resdir = os.path.normpath(resdir)
452 if os.path.exists(resdir) and os.path.exists(os.path.join(resdir, "success")):
453 return True
454 return False
455
457 """
458 Wrapper around self.spawn_instance() with exception checking
459
460 :param BuildJob job:
461
462 :return str: ip of spawned vm
463 :raises:
464
465 - :py:class:`~backend.exceptions.CoprWorkerError`: spawn function doesn't return ip
466 - :py:class:`AnsibleError`: failure during anible command execution
467 """
468 self.update_process_title(suffix="Spawning a new VM")
469 try:
470 self.spawn_instance()
471 if not self.vm_ip:
472
473 raise CoprWorkerError(
474 "No IP found from creating instance")
475 except AnsibleError as e:
476 register_build_result(self.opts, failed=True)
477
478 self.callback.log("failure to setup instance: {0}".format(e))
479 raise
480
482 """
483 Initialize Fedmsg
484 (this assumes there are certs and a fedmsg config on disk)
485 """
486
487 if not (self.opts.fedmsg_enabled and fedmsg):
488 return
489
490 try:
491 fedmsg.init(name="relay_inbound", cert_prefix="copr", active=True)
492 except Exception as e:
493 self.callback.log(
494 "failed to initialize fedmsg: {0}".format(e))
495
505
507 """
508 Retrieves new build task from queue.
509 Checks if the new job can be started and not skipped.
510 """
511 self.update_process_title(suffix="No task")
512
513
514
515
516 try:
517 task = self.task_queue.dequeue()
518 except TypeError:
519 return
520 if not task:
521 return
522
523
524 job = BuildJob(task.data, self.opts)
525
526 self.update_process_title(suffix="Task: {} chroot: {}".format(job.build_id, job.chroot))
527
528
529 if not self.starting_build(job):
530 return
531
532
533 if self.pkg_built_before(job.pkg, job.chroot, job.destdir):
534 self.on_pkg_skip(job)
535 return
536
537
538
539
540
541 return job
542
544 """
545 Executes new job.
546
547 :param job: :py:class:`~backend.job.BuildJob`
548 """
549 self._announce_start(job)
550 status = BuildStatus.SUCCEEDED
551 chroot_destdir = os.path.normpath(job.destdir + '/' + job.chroot)
552
553
554 if not os.path.exists(chroot_destdir):
555 try:
556 os.makedirs(chroot_destdir)
557 except (OSError, IOError) as e:
558 msg = "Could not make results dir" \
559 " for job: {0} - {1}".format(chroot_destdir, str(e))
560
561 self.callback.log(msg)
562 status = BuildStatus.FAILURE
563
564 if status == BuildStatus.SUCCEEDED:
565
566
567
568
569
570
571
572
573 self.callback.log(
574 "Starting build: id={0} builder={1} timeout={2} destdir={3}"
575 " chroot={4} repos={5}"
576 .format(job.build_id, self.vm_ip, job.timeout, job.destdir,
577 job.chroot, str(job.repos)))
578
579 self.callback.log("Building pkgs: {0}".format(job.pkg))
580
581 chroot_repos = list(job.repos)
582 chroot_repos.append(job.results + job.chroot + '/')
583 chroot_repos.append(job.results + job.chroot + '/devel/')
584
585 chroot_logfile = "{0}/build-{1}.log".format(
586 chroot_destdir, job.build_id)
587
588 macros = {
589 "copr_username": job.project_owner,
590 "copr_projectname": job.project_name,
591 "vendor": "Fedora Project COPR ({0}/{1})".format(
592 job.project_owner, job.project_name)
593 }
594
595 try:
596 mr = MockRemote(
597 builder_host=self.vm_ip, job=job, repos=chroot_repos,
598 macros=macros, opts=self.opts, lock=self.lock,
599 callback=CliLogCallBack(quiet=True, logfn=chroot_logfile),
600 )
601 mr.check()
602
603 build_details = mr.build_pkg()
604 job.update(build_details)
605
606 if self.opts.do_sign:
607 mr.add_pubkey()
608
609 register_build_result(self.opts)
610
611 except MockRemoteError as e:
612
613 self.callback.log("{0} - {1}".format(self.vm_ip, e))
614 status = BuildStatus.FAILURE
615 register_build_result(self.opts, failed=True)
616
617 self.callback.log(
618 "Finished build: id={0} builder={1} timeout={2} destdir={3}"
619 " chroot={4} repos={5}"
620 .format(job.build_id, self.vm_ip, job.timeout, job.destdir,
621 job.chroot, str(job.repos)))
622
623 job.status = status
624 self._announce_end(job)
625 self.update_process_title(suffix="Task: {} chroot: {} done"
626 .format(job.build_id, job.chroot))
627
639
641 title = "worker-{} {} ".format(self.group_name, self.worker_num)
642 if self.vm_ip:
643 title += "VM_IP={} ".format(self.vm_ip)
644 if self.vm_name:
645 title += "VM_NAME={} ".format(self.vm_name)
646 if suffix:
647 title += str(suffix)
648
649 setproctitle(title)
650
684