From 05b161e7659908c830dc5e73d197712ab962ab10 Mon Sep 17 00:00:00 2001 From: Pavel Raiskup Date: Apr 09 2019 11:39:29 +0000 Subject: [backend] simplify logging through redis After 3d642d711eb9cf1e10d224fa4d379c5edf6fc34a commit, I started to observe weird errors (see below). So while debugging, I noticed that the filters are unnecessary complicated, and that we can bypass LogErrors through Redis queue a more convenient way: -> start with LogRecord -> if traceback -> encode traceback as msg (as before) -> else -> encode message (as before) -> drop some fields which are not json.dump()able -> and then send to redis On the RedisLogHandler side it is then only matter of logging.makeLogRecord() call and logger.handle(record). While I was on it, I removed the router_logger mechanism, and created separate logger for each daemon (this is actually faster, because not all the loggers have to go through all log records). Hopefully this means final fight with RedisLog* stuff, should be much solid. copr_run_logger.py[26166]: --- Logging error --- copr_run_logger.py[26166]: Traceback (most recent call last): copr_run_logger.py[26166]: File "/usr/lib64/python3.6/logging/__init__.py", line 994, in emit copr_run_logger.py[26166]: msg = self.format(record) copr_run_logger.py[26166]: File "/usr/lib64/python3.6/logging/__init__.py", line 840, in format copr_run_logger.py[26166]: return fmt.format(record) copr_run_logger.py[26166]: File "/usr/lib64/python3.6/logging/__init__.py", line 585, in format copr_run_logger.py[26166]: record.exc_text = self.formatException(record.exc_info) copr_run_logger.py[26166]: File "/usr/lib64/python3.6/logging/__init__.py", line 535, in formatException copr_run_logger.py[26166]: traceback.print_exception(ei[0], ei[1], tb, None, sio) copr_run_logger.py[26166]: File "/usr/lib64/python3.6/traceback.py", line 104, in print_exception copr_run_logger.py[26166]: type(value), value, tb, limit=limit).format(chain=chain): copr_run_logger.py[26166]: File "/usr/lib64/python3.6/traceback.py", line 477, in __init__ copr_run_logger.py[26166]: if (exc_value and exc_value.__cause__ is not None copr_run_logger.py[26166]: AttributeError: 'str' object has no attribute '__cause__' copr_run_logger.py[26166]: Call stack: copr_run_logger.py[26166]: File "/usr/bin/copr_run_logger.py", line 15, in copr_run_logger.py[26166]: main() copr_run_logger.py[26166]: File "/usr/bin/copr_run_logger.py", line 11, in main copr_run_logger.py[26166]: handler.run() copr_run_logger.py[26166]: File "/usr/share/copr/backend/daemons/log.py", line 135, in run copr_run_logger.py[26166]: self.handle_msg(raw) copr_run_logger.py[26166]: File "/usr/share/copr/backend/daemons/log.py", line 120, in handle_msg copr_run_logger.py[26166]: self.router_logger.log(level, msg, extra={"event": event}) copr_run_logger.py[26166]: Message: 'Healtcheck failed for VM 172.25.150.214 with error Connection broke:\nOUT:\n\nERR:\nssh: connect to host 172.25.150.214 port 22: No route to host\n> --- diff --git a/backend/backend/daemons/log.py b/backend/backend/daemons/log.py index a6a8fc7..707f22b 100644 --- a/backend/backend/daemons/log.py +++ b/backend/backend/daemons/log.py @@ -14,49 +14,6 @@ from .. import helpers from ..constants import default_log_format -level_map = { - "info": logging.INFO, - "debug": logging.DEBUG, - "error": logging.ERROR, -} - - -class LogRouterFilter(logging.Filter): - def __init__(self, who): - """ - Value of field `who` which should be present to propagate LogRecord - """ - super(LogRouterFilter, self).__init__() - self.who = who - - def filter(self, record): - return record.event.get("who") == self.who - - -class CustomFilter(logging.Filter): - - def filter(self, record): - if not hasattr(record, "event"): - return False - - event = record.event - if "traceback" in event: - record.msg = "{}\n{}".format(record.msg, event.pop("traceback")) - - # '%s %s' % (a, b) requires tuple, while - # json.loads(json.dumps(tuple)) returns array - if "args" in event and isinstance(event["args"], list): - event["args"] = tuple(event["args"]) - - record.lineno = int(event.pop("lineno", "-1")) - record.funcName = event.pop("funcName", None) - record.pathname = event.pop("pathname", None) - for k, v in event.items(): - setattr(record, k, v) - - return True - - class RedisLogHandler(object): """ Single point to collect logs through redis pub/sub and write @@ -81,43 +38,31 @@ class RedisLogHandler(object): self.main_handler.setFormatter(default_log_format) self.main_logger.addHandler(self.main_handler) - self.router_logger = logging.Logger("log_router") - self.router_logger.addFilter(CustomFilter()) + level = getattr(logging, self.opts.log_level.upper(), None) + self.loggers = {} for component in self.components: + logger = logging.Logger(component) handler = logging.handlers.WatchedFileHandler( filename=os.path.join(self.log_dir, "{}.log".format(component))) handler.setFormatter(default_log_format) - handler.setLevel(level=level_map[self.opts.log_level]) - # not very good from performance point: - # filter called for each message, but only one handler process record - # but it shouldn't be a real problem - handler.addFilter(filter=LogRouterFilter(component)) - self.router_logger.addHandler(handler) + handler.setLevel(level) + logger.addHandler(handler) + self.loggers[component] = logger def handle_msg(self, raw): try: event = json.loads(raw["data"]) - # expected fields: - # - who: self.components - # - level: "info", "debug", "error", None --> default is "info" - # - msg: str with log msg - # [- traceback: str with error traceback ] - # [ more LogRecord kwargs, see: https://docs.python.org/2/library/logging.html#logrecord-objects] - - for key in ["who", "msg"]: - if key not in event: - raise Exception("Handler received msg without `{}` field, msg: {}".format(key, event)) - - who = event["who"] - if who not in self.components: - raise Exception("Handler received msg with unknown `who` field: {}, msg: {}".format(who, event)) - - level = level_map[event.pop("level", "info")] - msg = event.pop("msg") + who = event.get('who', None) + if not who: + raise Exception("No LogRecord.who field, raw: {}".format(event)) + if who not in self.loggers: + raise Exception("Unknown LogRecord.who field: {}, raw event: {}" + .format(who, event)) - self.router_logger.log(level, msg, extra={"event": event}) + log_record = logging.makeLogRecord(event) + self.loggers[who].handle(log_record) except Exception as err: self.main_logger.exception(err) diff --git a/backend/backend/helpers.py b/backend/backend/helpers.py index 72887bf..fe33d13 100644 --- a/backend/backend/helpers.py +++ b/backend/backend/helpers.py @@ -397,30 +397,25 @@ class RedisPublishHandler(logging.Handler): self.who = who def emit(self, record): - def default(obj): - if isinstance(obj, Thread): - return obj.name # shows Thread- - if isinstance(obj, types.ModuleType): - return "module " + obj.__name__ - # This is better than traceback. - return "can't convert " + str(type(obj)) - - try: - msg = record.__dict__ - msg["who"] = self.who - - if msg.get("exc_info"): - # from celery.contrib import rdb; rdb.set_trace() - _, error, tb = msg.get("exc_info") - msg["traceback"] = format_tb(error, tb) + # copr specific semantics + record.who = self.who + if record.exc_info: + _, error, tb = record.exc_info + record.msg = format_tb(error, tb) + else: # For the message arguments, it is better to expand them right now # instead of relying on method in json.dumps(..., default=default) # and even worse rely on it's reverse action in RedisLogHandler. - msg['msg'] = msg['msg'] % msg['args'] - msg['args'] = () + record.msg = record.msg % record.args - self.rc.publish(constants.LOG_PUB_SUB, json.dumps(msg, default=default)) + # cleanup the hard to json.dumps() stuff + record.exc_info = None + record.exc_text = None + record.args = () + + try: + self.rc.publish(constants.LOG_PUB_SUB, json.dumps(record.__dict__)) # pylint: disable=W0703 except Exception as error: _, _, ex_tb = sys.exc_info() diff --git a/backend/tests/test_helpers.py b/backend/tests/test_helpers.py index 7618252..7e18888 100644 --- a/backend/tests/test_helpers.py +++ b/backend/tests/test_helpers.py @@ -2,6 +2,7 @@ from munch import Munch import json +import logging from backend.exceptions import BuilderError from backend.helpers import get_redis_logger, get_chroot_arch, \ @@ -41,9 +42,9 @@ class TestHelpers(object): for raw in self.channel.listen(): assert raw.get("type") == "message" data = json.loads(raw['data']) - assert data.get("traceback") assert data.get("who") == "test" - assert 'backend.exceptions.BuilderError: foobar\n' in data['traceback'] + assert data.get("levelno") == logging.ERROR + assert 'backend.exceptions.BuilderError: foobar\n' in data['msg'] break def test_get_chroot_arch(self):