#3027 Adjust activity heatmap and logs for timezone (#1642)
Merged 6 years ago by pingou. Opened 6 years ago by adamwill.
adamwill/pagure activity-timezone-funtimes  into  master

file modified
+7 -2
@@ -481,13 +481,15 @@ 

  

      """

      date_format = flask.request.args.get('format', 'isoformat')

+     offset = flask.request.args.get('offset', 0)

  

      user = _get_user(username=username)

  

      stats = pagure.lib.get_yearly_stats_user(

          flask.g.session,

          user,

-         datetime.datetime.utcnow().date() + datetime.timedelta(days=1)

+         datetime.datetime.utcnow().date() + datetime.timedelta(days=1),

+         offset=offset

      )

  

      def format_date(d):
@@ -576,6 +578,7 @@ 

  

      """  # noqa

      grouped = str(flask.request.args.get('grouped')).lower() in ['1', 'true']

+     offset = flask.request.args.get('offset', 0)

  

      try:

          date = arrow.get(date)
@@ -586,7 +589,9 @@ 

  

      user = _get_user(username=username)

  

-     activities = pagure.lib.get_user_activity_day(flask.g.session, user, date)

+     activities = pagure.lib.get_user_activity_day(

+         flask.g.session, user, date, offset=offset

+     )

      js_act = []

      if grouped:

          commits = collections.defaultdict(list)

file modified
+44 -15
@@ -33,6 +33,7 @@ 

  import uuid

  import markdown

  import werkzeug

+ from collections import Counter

  from math import ceil

  import copy

  
@@ -4351,41 +4352,69 @@ 

          return 'Custom field %s reset (from %s)' % (key.name, old_value)

  

  

- def get_yearly_stats_user(session, user, date):

+ def get_yearly_stats_user(session, user, date, offset=0):

      """ Return the activity of the specified user in the year preceding the

-     specified date.

+     specified date. 'offset' is intended to be a timezone offset from UTC,

+     in minutes: you can discover the offset for a timezone and pass that

+     in order for the results to be relative to that timezone. Note, offset

+     should be the amount of minutes that should be added to the UTC time to

+     produce the local time - so for timezones behind UTC the number should

+     be negative, and for timezones ahead of UTC the number should be

+     positive. This is the opposite of what Javascript getTimezoneOffset()

+     does, so you have to invert any value you get from that.

      """

      start_date = datetime.datetime(date.year - 1, date.month, date.day)

  

-     query = session.query(

-         model.PagureLog.date, func.count(model.PagureLog.id)

+     events = session.query(

+         model.PagureLog

      ).filter(

          model.PagureLog.date_created.between(start_date, date)

      ).filter(

          model.PagureLog.user_id == user.id

-     ).group_by(

-         model.PagureLog.date

-     ).order_by(

-         model.PagureLog.date

-     )

- 

-     return query.all()

+     ).all()

+     # Counter very handily does exactly what we want here: it gives

+     # us a dict with the dates as keys and the number of times each

+     # date occurs in the data as the values, we return its items as

+     # a list of tuples

+     return Counter([event.date_offset(offset) for event in events]).items()

  

  

- def get_user_activity_day(session, user, date):

+ def get_user_activity_day(session, user, date, offset=0):

      """ Return the activity of the specified user on the specified date.

+     'offset' is intended to be a timezone offset from UTC, in minutes:

+     you can discover the offset for a timezone and pass that, so this

+     will return activity that occurred on the specified date in the

+     desired timezone. Note, offset should be the amount of minutes

+     that should be added to the UTC time to produce the local time -

+     so for timezones behind UTC the number should be negative, and

+     for timezones ahead of UTC the number should be positive. This is

+     the opposite of what Javascript getTimezoneOffset() does, so you

+     have to invert any value you get from that.

      """

+     dt = datetime.datetime.strptime(date, '%Y-%m-%d')

+     # if the offset is *negative* some of the events we want may be

+     # on the next day in UTC terms. if the offset is *positive* some

+     # of the events we want may be on the previous day in UTC terms.

+     # 'dt' will be at 00:00, so we subtract 1 day for prevday but add

+     # 2 days for nextday. e.g. 2018-02-15 00:00 - prevday will be

+     # 2018-02-14 00:00, nextday will be 2018-02-17 00:00. We'll get

+     # all events that occurred on 2018-02-14, 2018-02-15 or 2018-02-16

+     # in UTC time.

+     prevday = dt - datetime.timedelta(days=1)

+     nextday = dt + datetime.timedelta(days=2)

      query = session.query(

          model.PagureLog

      ).filter(

-         model.PagureLog.date == date

+         model.PagureLog.date_created.between(prevday, nextday)

      ).filter(

          model.PagureLog.user_id == user.id

      ).order_by(

          model.PagureLog.id.asc()

      )

- 

-     return query.all()

+     events = query.all()

+     # Now we filter down to the events that *really* occurred on the

+     # date we were asked for with the offset applied, and return

+     return [ev for ev in events if ev.date_offset(offset) == dt.date()]

  

  

  def log_action(session, action, obj, user_obj):

file modified
+9
@@ -2527,6 +2527,15 @@ 

  

          return desc % arg

  

+     def date_offset(self, offset):

+         '''Returns the date (as a datetime.date()) of this log entry

+         with a specified offset (in minutes) applied. Necessary if we

+         want to know what date this event occurred on in a particular

+         time zone.

+         '''

+         offsetdt = self.date_created + datetime.timedelta(minutes=int(offset))

+         return offsetdt.date()

+ 

  

  class IssueWatcher(BASE):

      """ Stores the users watching issues.

@@ -305,6 +305,8 @@ 

          $('#user-activity').hide();

        });

        var cal = new CalHeatMap();

+       var offset = new Date().getTimezoneOffset();

+       offset = -offset;

        cal.init({

          cellSize: 9,

          domain: "month",
@@ -313,7 +315,7 @@ 

          start: new Date(new Date().setMonth(new Date().getMonth() - 11)),

          data: "{{ url_for(

            'api_ns.api_view_user_activity_stats',

-           username=username, format='timestamp') }}",

+           username=username, format='timestamp') }}" + '&offset=' + offset,

          dataType: "json",

          highlight: "now",

          onClick: function(date, nb) {
@@ -323,7 +325,7 @@ 

              type: 'GET',

              url: "{{ url_for(

                'api_ns.api_view_user_activity_date',

-               username=username, date='') }}" + date + '?grouped=1',

+               username=username, date='') }}" + date + '?grouped=1&offset=' + offset,

              contentType: "application/json",

              dataType: 'json',

              success: function(data) {

@@ -468,6 +468,132 @@ 

          }

          self.assertEqual(data, exp)

  

+     @patch('pagure.lib.notify.send_email')

+     def test_api_view_user_activity_timezone_negative(self, mockemail):

+         """Test api_view_user_activity{_stats,_date} with a timezone

+         5 hours behind UTC. The activities will occur on 2018-02-15 in

+         UTC, but on 2018-02-14 in local time.

+         """

+         tests.create_projects(self.session)

+         repo = pagure.lib._get_project(self.session, 'test')

+ 

+         dateobj = datetime.datetime(2018, 2, 15, 3, 30)

+         utcdate = '2018-02-15'

+         localdate = '2018-02-14'

+         # Create a single commit log

+         log = model.PagureLog(

+             user_id=1,

+             user_email='foo@bar.com',

+             project_id=1,

+             log_type='committed',

+             ref_id='githash',

+             date=dateobj.date(),

+             date_created=dateobj

+         )

+         self.session.add(log)

+         self.session.commit()

+ 

+         # Retrieve the user's stats with no offset

+         output = self.app.get('/api/0/user/pingou/activity/stats')

+         self.assertEqual(output.status_code, 200)

+         data = json.loads(output.data)

+         # date in output should be UTC date

+         self.assertDictEqual(data, {utcdate: 1})

+ 

+         # Retrieve the user's stats with correct offset

+         output = self.app.get('/api/0/user/pingou/activity/stats?offset=-300')

+         self.assertEqual(output.status_code, 200)

+         data = json.loads(output.data)

+         # date in output should be local date

+         self.assertDictEqual(data, {localdate: 1})

+ 

+         # Retrieve the user's logs for 2018-02-15 with no offset

+         output = self.app.get(

+             '/api/0/user/pingou/activity/%s?grouped=1' % utcdate)

+         self.assertEqual(output.status_code, 200)

+         data = json.loads(output.data)

+         exp = {

+           "activities": [

+             {

+               "description_mk": "<p>pingou committed on test#githash</p>"

+             }

+           ],

+           "date": utcdate,

+         }

+         self.assertEqual(data, exp)

+ 

+         # Now retrieve the user's logs for 2018-02-14 with correct

+         # offset applied

+         output = self.app.get(

+             '/api/0/user/pingou/activity/%s?grouped=1&offset=-300' % localdate)

+         self.assertEqual(output.status_code, 200)

+         data = json.loads(output.data)

+         exp['date'] = localdate

+         self.assertEqual(data, exp)

+ 

+     @patch('pagure.lib.notify.send_email')

+     def test_api_view_user_activity_timezone_positive(self, mockemail):

+         """Test api_view_user_activity{_stats,_date} with a timezone

+         4 hours ahead of UTC. The activities will occur on 2018-02-15

+         in UTC, but on 2018-02-16 in local time.

+         """

+         tests.create_projects(self.session)

+         repo = pagure.lib._get_project(self.session, 'test')

+ 

+         dateobj = datetime.datetime(2018, 2, 15, 22, 30)

+         utcdate = '2018-02-15'

+         localdate = '2018-02-16'

+         # Create a single commit log

+         log = model.PagureLog(

+             user_id=1,

+             user_email='foo@bar.com',

+             project_id=1,

+             log_type='committed',

+             ref_id='githash',

+             date=dateobj.date(),

+             date_created=dateobj

+         )

+         self.session.add(log)

+         self.session.commit()

+ 

+         # Retrieve the user's stats with no offset

+         output = self.app.get('/api/0/user/pingou/activity/stats')

+         self.assertEqual(output.status_code, 200)

+         data = json.loads(output.data)

+         # date in output should be UTC date

+         self.assertDictEqual(data, {utcdate: 1})

+ 

+         # Retrieve the user's stats with correct offset

+         output = self.app.get('/api/0/user/pingou/activity/stats?offset=240')

+         self.assertEqual(output.status_code, 200)

+         data = json.loads(output.data)

+         # date in output should be local date

+         self.assertDictEqual(data, {localdate: 1})

+ 

+         # Retrieve the user's logs for 2018-02-15 with no offset

+         output = self.app.get(

+             '/api/0/user/pingou/activity/%s?grouped=1' % utcdate)

+         self.assertEqual(output.status_code, 200)

+         data = json.loads(output.data)

+         exp = {

+           "activities": [

+             {

+               "description_mk": "<p>pingou committed on test#githash</p>"

+             }

+           ],

+           "date": utcdate,

+         }

+         self.assertEqual(data, exp)

+ 

+         # Now retrieve the user's logs for 2018-02-16 with correct

+         # offset applied

+         output = self.app.get(

+             '/api/0/user/pingou/activity/%s?grouped=1&offset=240' % localdate)

+         self.assertEqual(output.status_code, 200)

+         data = json.loads(output.data)

+         exp['date'] = localdate

+         self.assertEqual(data, exp)

+ 

  

  class PagureFlaskApiUsertestrequests(tests.Modeltests):

      """ Tests for the user requests endpoints """

As discussed in #1642, there is a problem with the user activity
log page in the web UI caused by a timezone mismatch. The
library used to generate the heatmap assumes any date / time you
feed it is in the browser timezone. Pagure ultimately feeds it
the 'date' value of PagureLog instances, converted to a Unix
timestamp. This timestamp reflects 00:00 on the date in question
in the UTC timezone. If the browser timezone is behind UTC, the
heatmap library treats the event as falling on the previous
day, because in the browser timezone, that precise time falls on
the previous day.

It would be relatively easy to 'fix' this if all we wanted to do
was have the heatmap reflect what date each event occurred on in
the UTC timezone, as the activity log currently does. However, I
don't think that's the best fix. I think the best behaviour here
is for both the heatmap and the activity log to be relative to
the browser timezone. As a Pagure user in a timezone several
hours behind UTC, if I make a commit in the late afternoon of
Wednesday, I'd expect the heatmap and the activity log to show
that commit as occuring on Wednesday, not Thursday.

So, this commit dumps the use of the 'date' value in this code,
and works with the 'date_created' value instead, which is a
precise UTC date and time (as opposed to just the date relative
to UTC). We allow a timezone offset to be passed to the relevant
methods, and figure out what date the events fell on if that
offset is applied
, rather than just what date they fell on in
the UTC timezone. To facilitate this we add a custom method to
the database model, date_offset, which gives the date when an
arbitrary offset (in minutes, positive or negative) is applied.
We tweak the relevant functions in pagure.lib to get all the
necessary data from the database, then use the date_offset
method to adjust and refine the results before returning.

Fixes https://pagure.io/pagure/issue/1642

Looks good and tests are passing, so all good to me.

Thanks! :)

Pull-Request has been merged by pingou

6 years ago

Just for the record, my explanation of what the old code did was slightly wrong in one regard. When producing a timestamp to feed to the heatmap library, Pagure did not in fact always use 'the timestamp for 00:00 UTC on the date stored in the database'. It used 'the timestamp for 00:00 server local time on the date stored in the database'. I think the local time of the main 'real world' production servers (this one, and the Fedora package one) is UTC, but if you were using a local test server, this would affect its behaviour a bit.

So the key factor in determining whether the heatmap would show the event on a different date from the one stored in the database (and shown in the activity log) was the difference between the client's browser timezone and the server's local time, not the difference between the client's browser timezone and UTC.

This is just a note for the record, though, it doesn't mean the fix was wrong in any way or require any further changes. AFAICT, we are now not using this 'date' value in the database model at all, as really it's fundamentally problematic. In fact it might be a good idea to get rid of it, or at least put large warning signs around it.

Bonus note! The reason for the above is I think to do with how datetime.Date objects work. This is the type Pagure uses for the PagureLog.date database model attribute; it effectively gets set to 'a datetime.Date instance for the date when the event happened in UTC timezone'.

Dates are inherently naive - they cannot have a timezone. So while we use 'the current date in UTC timezone' to create it, the fact that the date was 'relative' to UTC then immediately gets lost. All the information that is really stored in the database is the year, month and date.

When creating the timestamp to send to the heatmap library, the old code got the date from the database as d (sqlalchemy would provide it as a Date instance again) and called d.strftime('%s'). Python documents that when calling strftime on a Date instance, "Format codes referring to hours, minutes or seconds will see 0 values." It doesn't explicitly state what it does about the timezone, but clearly it just uses the system's local timezone, so you get a timestamp for 00:00 on the date in question in the system's local time. This does seem like the 'natural' thing for it to do, given that the Date instance cannot have a 'native' timezone.