#149 Add audit logging subsystems
Opened 7 years ago by merlinthp. Modified 7 years ago
merlinthp/ipsilon audit  into  master

@@ -113,6 +113,8 @@ 

                      'datadir': args['data_dir'], 'dbname': 'userprefs'},

                  'transdb': args['transaction_dburi'] or args['database_url'] %

                  {'datadir': args['data_dir'], 'dbname': 'transactions'},

+                 'auditdb': args['audit_dburi'] or args['database_url'] % {

+                     'datadir': args['data_dir'], 'dbname': 'audit'},

                  'samlsessionsdb': args['samlsessions_dburi'] or args[

                      'database_url'] % {'datadir': args['data_dir'],

                                         'dbname': 'saml2sessions'},
@@ -414,6 +416,8 @@ 

                               'template)')

      parser.add_argument('--transaction-dburi',

                          help='Transaction database URI (override template)')

+     parser.add_argument('--audit-dburi',

+                         help='Audit database URI (override template)')

      parser.add_argument('--samlsessions-dburi',

                          help='SAML 2 sessions database URI (override ' +

                               'template)')

file modified
+11
@@ -10,6 +10,16 @@ 

  

  

  class TestAuth(LoginFormBase):

+     def __init__(self, site, mgr, page, template=None):

+         super(TestAuth, self).__init__(site, mgr, page, template=template)

+         self._auditname = 'testauth'

+         self.register_audit()

+ 

+     def get_audit_display(self, event_type, event_attrs):

+         if event_type == 'login':

+             return 'User logged in'

+         else:

+             return 'Unknown event'

  

      def POST(self, *args, **kwargs):

          username = kwargs.get("login_name")
@@ -18,6 +28,7 @@ 

  

          if username and password:

              if password == 'ipsilon':

+                 self.audit('login', user=username)

                  cherrypy.log("User %s successfully authenticated." % username)

                  testdata = {

                      'givenname': 'Test User 一',

file modified
+11
@@ -246,6 +246,7 @@ 

          super(LoginPageBase, self).__init__(site)

          self.lm = mgr

          self._Transaction = None

+         self._audittype = 'login'

  

      def root(self, *args, **kwargs):

          raise cherrypy.HTTPError(500)
@@ -370,9 +371,19 @@ 

      def __init__(self, *args, **kwargs):

          super(Logout, self).__init__(*args, **kwargs)

          self.handlers = {}

+         self._audittype = 'login'

+         self._auditname = 'logout'

+         self.register_audit()

+ 

+     def get_audit_display(self, event_type, event_attrs):

+         if event_type == 'logout':

+             return 'User logged out'

+         else:

+             return 'Unknown event'

  

      def root(self, *args, **kwargs):

          us = UserSession()

+         self.audit('logout')

  

          for provider in self.handlers:

              self.debug("Calling logout for provider %s" % provider)

file modified
+2
@@ -1,5 +1,6 @@ 

  # Copyright (C) 2013,2016 Ipsilon project Contributors, for license see COPYING

  

+ from ipsilon.util.audit import Audit

  from ipsilon.util.page import Page

  from ipsilon.util.webfinger import WebFinger

  from ipsilon.util import errors
@@ -37,6 +38,7 @@ 

          cherrypy.config['error_page.404'] = errors.Error_404(self._site)

          cherrypy.config['error_page.500'] = errors.Errors(self._site)

  

+         self._site['audit'] = Audit()

          self._site['authz'] = Authz(self._site)

  

          # set up WebFinger endpoint

file modified
+3 -2
@@ -6,7 +6,8 @@ 

  import os

  from jinja2 import Environment, FileSystemLoader

  import ipsilon.util.sessions

- from ipsilon.util.data import AdminStore, Store, UserStore, TranStore

+ from ipsilon.util.data import AdminStore, Store, UserStore, TranStore, \

+     AuditStore

  from ipsilon.util.sessions import SqlSession

  from ipsilon.root import Root

  
@@ -93,7 +94,7 @@ 

              return upgrade_failed()

  

      # Now handle the rest of the default datastores

-     for store in [UserStore, TranStore]:

+     for store in [UserStore, TranStore, AuditStore]:

          store = store()

          logger.info('Handling default datastore %s',

                      store.__class__.__name__)

file modified
+6
@@ -38,6 +38,11 @@ 

      def root(self, *args, **kwargs):

          us = UserSession()

          user = us.get_user()

+ 

+         events = self._site['audit'].fetch_records(user=user.name, limit=10)

+         # Data comes out newest->oldest, but oldest->newest looks better on

+         # the page.

+         events.reverse()

          consents = user.list_consents()

  

          for consent in consents:
@@ -59,4 +64,5 @@ 

                                title='',

                                baseurl=self.url,

                                menu=self.menu,

+                               events=events,

                                consents=consents)

file added
+105
@@ -0,0 +1,105 @@ 

+ # Copyright (C) 2016 Ipsilon project Contributors, for license see COPYING

+ 

+ from ipsilon.util.data import AuditStore

+ from ipsilon.util.log import Log

+ from ipsilon.util.user import UserSession

+ import datetime

+ 

+ 

+ class Audit(Log):

+     def __init__(self):

+         self._store = AuditStore()

+         self._handlers = {}

+ 

+     def audit(self, provider_type, provider_name, event_type, **event_attrs):

+         """

+         Log an audit entry in the audit database.

+ 

+         provider_type, provider_name, and event_type are all strings.

+ 

+         Callers can store arbitrary data in the audit record (so long as it can

+         be serialised into JSON format) by passing keyword arguments to the

+         method.  To render this data into a readable format, the caller can

+         implement a get_audit_display method and register with the audit

+         handler.  See the register_provider method below.

+ 

+         There are two "special" attributes:

+           'user' is the name of the user causing the event.  Many callers to

+                 this function will already have a User or UserSession object to

+                 hand, so passing this value in can save some processing.  It if

+                 isn't provided, the function will fetch the data itself.

+           'timestamp' can be used to override the timestamp related to the

+                 event.  If it is not specified, the current time will be used.

+         """

+         if 'user' in event_attrs:

+             user = event_attrs['user']

+             del event_attrs['user']

+         else:

+             us = UserSession()

+             user = us.user

+         if 'timestamp' in event_attrs:

+             timestamp = event_attrs['timestamp']

+             del event_attrs['timestamp']

+         else:

+             timestamp = datetime.datetime.now()

+ 

+         self._store.add_record(timestamp, user, provider_type, provider_name,

+                                event_type, event_attrs)

+ 

+     def fetch_records(self, user=None, limit=None, age=None):

+         """

+         Returns audit records from the audit database.  By default, it will

+         return all records in the store, which probably isn't what you want.

+ 

+         Output can be restricted to records relating to a specific user by

+         specifying the username as the argument.  The limit argument restricts

+         the output to the most recent LIMIT records.  age allows the

+         restriction of the output to records in the most recent AGE hours.

+ 

+         Records are returned as a list, newest record first.  Each record is a

+         dict with the following keys:

+           'ts' is the event timestamp, as a DateTime.

+           'user' is the username related to the event.

+           'session' is the CherryPy session ID of the event, and can be used to

+                 trace the activities of a single login session.

+           'ip' is the source IP address of the request that triggered the audit

+                 event.

+           'provtype' is the provider type specified by the audit caller.

+           'provname' is the provider name specified by the audit caller.

+           'eventtype' is the event type.

+           'eventattrs' is a dict containing any extra attributes passed to the

+                 audit call.

+           'event' is generated by the appropriate event rendering function, if

+                 one was registered.  Otherwise it will have a default value of

+                 'Unknown event'.

+         """

+         records = self._store.fetch_records(user, limit, age)

+         for r in records:

+             r['event'] = 'Unknown event'

+             if r['provtype'] in self._handlers:

+                 if r['provname'] in self._handlers[r['provtype']]:

+                     r['event'] = self._handlers[r['provtype']][r['provname']](

+                         r['eventtype'], r['eventattrs'])

+         return records

+ 

+     def register_provider(self, provider_type, provider_name, handler):

+         """

+         Registers a handler function to render audit entries into human-

+         readable display form.  The handler must be a callable matching the

+         following signature:

+ 

+             render_event_method(event_type, event_attrs)

+ 

+         event_type is the type string passed in the original audit call.  Any

+         extra data passed to audit via keyword args is passed to the rendering

+         function in the event_attrs dict.  The render function should return a

+         string.

+         """

+         if not callable(handler):

+             raise ValueError("Handler is not callable, or doesn't have a "

+                              "get_audit_display method.")

+ 

+         if provider_type not in self._handlers:

+             self._handlers[provider_type] = {}

+ 

+         self._handlers[provider_type][provider_name] = handler

file modified
+77 -1
@@ -4,7 +4,7 @@ 

  import datetime

  from ipsilon.util.log import Log

  from sqlalchemy import create_engine

- from sqlalchemy import MetaData, Table, Column, Text, String

+ from sqlalchemy import MetaData, Table, Column, Text, String, Integer, DateTime

  from sqlalchemy.pool import QueuePool, SingletonThreadPool

  from sqlalchemy.schema import (PrimaryKeyConstraint, Index, AddConstraint,

                                 CreateIndex)
@@ -14,6 +14,7 @@ 

  import uuid

  import logging

  import time

+ import json

  

  

  CURRENT_SCHEMA_VERSION = 3
@@ -27,6 +28,12 @@ 

                       'primary_key': ('uuid', 'name'),

                       'indexes': [('uuid',)]

                       }

+ AUDIT_TABLE = {'columns': [('id', Integer()), ('timestamp', DateTime()),

+                            'user', 'session', 'ip', 'provtype', 'provname',

+                            'eventtype', 'eventattrs'],

+                'primary_key': ('id',),

+                'indexes': [('id',)]

+                }

  

  

  class DatabaseError(Exception):
@@ -981,3 +988,72 @@ 

              return 3

          else:

              raise NotImplementedError()

+ 

+ 

+ class AuditStore(Store):

+     def __init__(self):

+         super(AuditStore, self).__init__('audit.db')

+         self.table = 'audit'

+ 

+     def _initialize_schema(self):

+         q = self._query(self._db, self.table, AUDIT_TABLE, trans=False)

+         q.create()

+         q._con.close()  # pylint: disable=protected-access

+ 

+     def add_record(self, timestamp, user, provider_type, provider_name,

+                    event_type, event_attrs):

+         q = self._query(self._db, self.table, AUDIT_TABLE)

+         q.insert({'timestamp': timestamp, 'user': user,

+                   'session': cherrypy.session.id,

+                   'ip': cherrypy.request.remote.ip, 'provtype': provider_type,

+                   'provname': provider_name, 'eventtype': event_type,

+                   'eventattrs': json.dumps(event_attrs)})

+         q.commit()

+ 

+     def fetch_records(self, user, limit, age):

+         # pylint: disable=protected-access

+         table = SqlQuery(self._db, self.table, AUDIT_TABLE)._table

I would prefer it if you don't use a raw SqlQuery, I'm working on getting rid of them in the rest of the code :).
Is there any chance you can either use the standard select type?
If not, I would like it if you could abstract this away in the Query or BaseStore layer.

+         sel = select([table.c.timestamp, table.c.user, table.c.session,

+                       table.c.ip, table.c.provtype, table.c.provname,

+                       table.c.eventtype, table.c.eventattrs])

+         if user:

+             sel = sel.where(table.c.user == user)

+         if age:

+             oldest = datetime.datetime.now() - datetime.timedelta(hours=age)

+             sel = sel.where(table.c.timestamp >= oldest)

+         sel = sel.order_by(table.c.timestamp.desc())

+         if limit:

+             sel.limit(limit)

+ 

+         d = []

+         for row in sel.execute():

+             d.append({

+                 'ts': row[0],

+                 'user': row[1],

+                 'session': row[2],

+                 'ip': row[3],

+                 'provtype': row[4],

+                 'provname': row[5],

+                 'eventtype': row[6],

+                 'eventattrs': json.loads(row[7])

+             })

+ 

+         return d

+ 

+     def _upgrade_schema(self, old_version):

+         if old_version == 1:

+             return 2

+         elif old_version == 2:

+             return 3

+         else:

+             raise NotImplementedError

+ 

+     def _cleanup(self):

+         # pylint: disable=protected-access

+         table = SqlQuery(self._db, self.table, AUDIT_TABLE)._table

+         expiretime = datetime.datetime.now() - \

+             datetime.timedelta(days=cherrypy.config.get('audit.purge_age', 30))

+         sel = select([table.c.id]).where(table.c.timestamp < expiretime)

+         # pylint: disable=no-value-for-parameter

+         d = table.delete().where(table.c.id.in_(sel))

+         return d.execute().rowcount

file modified
+14
@@ -35,6 +35,9 @@ 

          self.user = None

          self._is_form_page = form

          self.auth_protect = False

+         # Subclasses are expected to change these, if they want audit to work

+         self._audittype = None

+         self._auditname = None

  

      def get_url(self):

          return cherrypy.url(relative=False)
@@ -140,4 +143,15 @@ 

              msg = 'Transaction expired, or cookies not available'

              raise cherrypy.HTTPError(401, msg)

  

+     def register_audit(self):

+         self._site['audit'].register_provider(self._audittype, self._auditname,

+                                               self.get_audit_display)

+ 

+     def audit(self, event_type, **event_attrs):

+         self._site['audit'].audit(self._audittype, self._auditname, event_type,

+                                   **event_attrs)

+ 

+     def get_audit_display(self, event_type, event_attrs):

+         return ''

+ 

      exposed = True

file modified
+2
@@ -71,6 +71,7 @@ 

      subprocess.call(['/usr/bin/sqlite3', '-init', sql, users_db, '.quit'])

  

      trans_db = os.path.join(workdir, 'transactions.sqlite')

+     audit_db = os.path.join(workdir, 'audit.sqlite')

      cachedir = os.path.join(workdir, 'cache')

  

      with open(CONF_TEMPLATE) as f:
@@ -88,6 +89,7 @@ 

                           'admindb': admin_db,

                           'usersdb': users_db,

                           'transdb': trans_db,

+                          'auditdb': audit_db,

                           'sesstype': 'file',

                           'sessopt': 'path',

                           'sessval': os.path.join(workdir, 'sessions'),

@@ -12,6 +12,8 @@ 

  admin.config.db = "${admindb}"

  user.prefs.db = "${usersdb}"

  transactions.db = "${transdb}"

+ audit.db = "${auditdb}"

+ audit.purge_age = 30

  

  tools.sessions.on = True

  tools.sessions.name = "${instance}_ipsilon_session_id"

file modified
+35
@@ -3,6 +3,41 @@ 

  <div class="container">

    <div class="row">

      <div class="col-sm-12">

+       <h2>My Recent Activity</h2>

+       <div class="row">

+         <div class="col-xs-2">

+           <p><b>Event Time</b></p>

+         </div>

+         <div class="col-xs-1">

+           <p><b>IP Address</b></p>

+         </div>

+         <div class="col-xs-3">

+           <p><b>Session</b></p>

+         </div>

+         <div class="col-xs-6">

+           <p><b>Event</b></p>

+         </div>

+       </div>

+ {%- for event in events %}

+       <div class="row ipsilon-row">

+         <div class="col-xs-2">

+           <p>{{ event.ts.strftime('%Y-%m-%d %H:%M:%S') }}</p>

+         </div>

+         <div class="col-xs-1">

+           <p>{{ event.ip }}</p>

+         </div>

+         <div class="col-xs-3">

+           <p>{{ event.session }}</p>

+         </div>

+         <div class="col-xs-6">

+           <p>{{ event.event }}</p>

+         </div>

+       </div>

+ {%- endfor %}

+     </div>

+   </div>

+   <div class="row">

+     <div class="col-sm-12">

        <h2>Granted Consent</h2>

        <div class="row">

          <div class="col-xs-2">

file modified
+1 -1
@@ -257,7 +257,7 @@ 

          self.processes.append(p)

          p.wait()

          for d in ['adminconfig', 'users', 'transactions', 'sessions',

-                   'saml2.sessions.db']:

+                   'saml2.sessions.db', 'audit']:

              cmd = ['/usr/bin/createdb', '-h', addr, '-p', port, d]

              subprocess.check_call(cmd, env=env)

  

This adds an audit logging subsystem with a new database to store the audit records.

Mainly looking for comments on whether the basics look right for now, before I start adding lots of audit calls to various parts of the codebase.

So, I'm wondering whether we want to keep with the free-form text record.
I would personally prefer a type of entry_provider_type,entry_provider,entry_type and then arguments.
So something like: audit('login', 'authtest', 'login', {'attributes':'test'}) or something.

And then the particular plugin that generated the audit entry would need a function to get a user-displayed string.
This way, we would have the information in a way that it could also be used by admins that for example want all users that used openid in the last week.

A more structured approach sounds fine to me.

You might add a comment over this line explaining why you want to reverse this list.

With this line indented, it seems that the following lines will not work.

I suggest adding docblocks on all the new classes, functions, and methods so they are well explained for future readers.

Consider adding a comment explaining what this method is for, and what the meanings of 1, 2, and 3 are.

It seems overall good, but I think it would really benefit from automated tests (for example, that indentation problem would be caught if there were tests).

Yeah, I fixed that when I spotted it earlier. None of the calls in this PR set timestamp, so they worked. Mildly surprised it passed the lint test (I think I did one before pushing).

Thanks for the comments. I'm most of the way through a rewrite based on Patrick's suggestion above.

rebased

7 years ago

Patrick, is this more like what you're thinking?

rebased

7 years ago

Would it be an idea to make event_arrs a double-star keyword-argument dict?
That way, functions can do things like self.audit('login', user=username, foo='bar')

But about the general idea: yes, this looks like what I was thinking of.

rebased

7 years ago

I'm fairly happy with this at the moment, although I think the Audit class' fetch_records function will probably need more work when we come to having record search pages (paginated results and the like).

I would prefer it if you don't use a raw SqlQuery, I'm working on getting rid of them in the rest of the code :).
Is there any chance you can either use the standard select type?
If not, I would like it if you could abstract this away in the Query or BaseStore layer.