#334 [frontend] make graphs load faster using caching
Merged 5 years ago by clime. Opened 5 years ago by dturecek.
copr/ dturecek/copr graphs  into  master

@@ -0,0 +1,3 @@ 

+ #!/usr/bin/sh

+ 

+ runuser -c '/usr/share/copr/coprs_frontend/manage.py vacuum_graphs' - copr-fe

@@ -3,3 +3,4 @@ 

  runuser -c '/usr/share/copr/coprs_frontend/run/check_for_anitya_version_updates.py --backend pypi --delta=172800 &> /dev/null' - copr-fe

  runuser -c '/usr/share/copr/coprs_frontend/run/check_for_anitya_version_updates.py --backend rubygems --delta=172800 &> /dev/null' - copr-fe

  runuser -c '/usr/share/copr/coprs_frontend/manage.py update_indexes_quick 120 &> /dev/null' - copr-fe

+ runuser -c '/usr/share/copr/coprs_frontend/manage.py update_graphs' - copr-fe

file modified
+4 -1
@@ -49,7 +49,7 @@ 

  # git clone https://pagure.io/copr/copr.git

  # git checkout {{{ cached_git_name_version }}}

  # cd copr/frontend

- # rpkg spec --sources 

+ # rpkg spec --sources

  Source0:    {{{ git_dir_pack }}}

  

  BuildArch:  noarch
@@ -218,10 +218,12 @@ 

  install -d %{buildroot}%{_sharedstatedir}/copr/data/whooshee/copr_user_whoosheer

  install -d %{buildroot}%{_sharedstatedir}/copr/data/srpm_storage

  install -d %{buildroot}%{_sysconfdir}/cron.hourly

+ install -d %{buildroot}%{_sysconfdir}/cron.daily

  install -d %{buildroot}/%{_bindir}

  install -d %{buildroot}%{_unitdir}

  

  install -p -m 755 conf/cron.hourly/copr-frontend %{buildroot}%{_sysconfdir}/cron.hourly/copr-frontend

+ install -p -m 755 conf/cron.daily/copr-frontend %{buildroot}%{_sysconfdir}/cron.daily/copr-frontend

  

  cp -a coprs_frontend/* %{buildroot}%{_datadir}/copr/coprs_frontend

  sed -i "s/__RPM_BUILD_VERSION/%{version}-%{release}/" %{buildroot}%{_datadir}/copr/coprs_frontend/coprs/templates/layout.html
@@ -321,6 +323,7 @@ 

  %config(noreplace)  %{_sysconfdir}/copr/copr_unit_test.conf

  

  %config(noreplace) %attr(0755, root, root) %{_sysconfdir}/cron.hourly/copr-frontend

+ %config(noreplace) %attr(0755, root, root) %{_sysconfdir}/cron.daily/copr-frontend

  %exclude_files flavor

  %exclude_files devel

  

@@ -0,0 +1,34 @@ 

+ """add table for graph statistics

+ 

+ Revision ID: acac8d3ae868

+ Revises: 10029c92dd0d

+ Create Date: 2018-06-28 11:39:43.913783

+ 

+ """

+ 

+ # revision identifiers, used by Alembic.

+ revision = 'acac8d3ae868'

+ down_revision = '10029c92dd0d'

+ 

+ from alembic import op

+ import sqlalchemy as sa

+ 

+ 

+ def upgrade():

+     # ### commands auto generated by Alembic - please adjust! ###

+     op.create_table('builds_statistics',

+     sa.Column('time', sa.Integer(), nullable=False),

+     sa.Column('stat_type', sa.Text(), nullable=False),

+     sa.Column('running', sa.Integer(), nullable=True),

+     sa.Column('pending', sa.Integer(), nullable=True),

+     sa.PrimaryKeyConstraint('time', 'stat_type')

+     )

+     op.create_index(op.f('ix_build_chroot_started_on'), 'build_chroot', ['started_on'], unique=False)

+     # ### end Alembic commands ###

+ 

+ 

+ def downgrade():

+     # ### commands auto generated by Alembic - please adjust! ###

+     op.drop_index(op.f('ix_build_chroot_started_on'), table_name='build_chroot')

+     op.drop_table('builds_statistics')

+     # ### end Alembic commands ###

@@ -9,6 +9,7 @@ 

  from sqlalchemy.sql import text

  from sqlalchemy import or_

  from sqlalchemy import and_

+ from sqlalchemy import func

  from sqlalchemy.orm import joinedload

  from sqlalchemy.orm.exc import NoResultFound

  from sqlalchemy.sql import false,true
@@ -84,14 +85,6 @@ 

          return list(query.all()[:4])

  

      @classmethod

-     def get_tasks_by_time(cls, start, end):

-         result = models.BuildChroot.query.join(models.Build)\

-             .filter(models.BuildChroot.ended_on >= start)\

-             .filter(models.Build.submitted_on <= end)\

-             .order_by(models.Build.id.asc())

-         return result

- 

-     @classmethod

      def get_running_tasks_by_time(cls, start, end):

          result = models.BuildChroot.query\

              .filter(models.BuildChroot.ended_on > start)\
@@ -119,6 +112,111 @@ 

          return data

  

      @classmethod

+     def get_chroot_histogram(cls, start, end):

+         chroots = []

+         chroot_query = BuildChroot.query\

+             .filter(models.BuildChroot.started_on < end)\

+             .filter(models.BuildChroot.ended_on > start)\

+             .with_entities(BuildChroot.mock_chroot_id,

+                            func.count(BuildChroot.mock_chroot_id))\

+             .group_by(BuildChroot.mock_chroot_id)\

+             .order_by(BuildChroot.mock_chroot_id)

+ 

+         for chroot in chroot_query:

+             chroots.append([chroot[0], chroot[1]])

+ 

+         mock_chroots = coprs_logic.MockChrootsLogic.get_multiple()

+         for mock_chroot in mock_chroots:

+             for l in chroots:

+                 if l[0] == mock_chroot.id:

+                     l[0] = mock_chroot.name

+ 

+         return chroots

+ 

+     @classmethod

+     def get_tasks_histogram(cls, type, start, end, step):

+         start = start - (start % step) # align graph interval to a multiple of step

+         end = end - (end % step)

+         steps = int((end - start) / step + 0.5)

+         data = [['pending'], ['running'], ['avg running'], ['time']]

+ 

+         result = models.BuildsStatistics.query\

+             .filter(models.BuildsStatistics.stat_type == type)\

+             .filter(models.BuildsStatistics.time >= start)\

+             .filter(models.BuildsStatistics.time <= end)\

+             .order_by(models.BuildsStatistics.time)

+ 

+         for row in result:

+             data[0].append(row.pending)

+             data[1].append(row.running)

+ 

+         for i in range(len(data[0]) - 1, steps):

+             step_start = start + i * step

+             step_end = step_start + step

+ 

+             query_pending = text("""

+                 SELECT COUNT(*) as pending

+                 FROM build_chroot JOIN build on build.id = build_chroot.build_id

+                 WHERE

+                     (

+                         build_chroot.started_on > :start

+                         OR build_chroot.started_on is NULL

+                     )

+                     AND build.submitted_on < :end

+                     AND (

+                         build_chroot.ended_on > :start

+                         OR build_chroot.ended_on IS NULL

+                     )

+                     AND NOT ((build.submitted_on NOT BETWEEN :start and :end)

+                         AND (build_chroot.started_on NOT BETWEEN :start AND :end)

+                         AND (build.submitted_on >= :start OR build_chroot.started_on <= :end)

+                     )

+             """)

+ 

+             query_running = text("""

+                 SELECT COUNT(*) as running

+                 FROM build_chroot

+                 WHERE

+                     started_on < :end

+                     AND (ended_on > :start OR ended_on IS NULL)

+                     AND (

+                         (started_on BETWEEN :start AND :end)

+                         OR (ended_on BETWEEN :start AND :end)

+                         OR (started_on < :start

+                             AND (ended_on > :end OR (ended_on IS NULL AND status = :status)))

+                     )

+                 """)

+ 

+             res_pending = db.engine.execute(query_pending, start=step_start, end=step_end)

+             res_running = db.engine.execute(query_running, start=step_start, end=step_end,

+                                             status=helpers.StatusEnum('running'))

+ 

+             pending = res_pending.first().pending

+             running = res_running.first().running

+             data[0].append(pending)

+             data[1].append(running)

+ 

+             statistic = models.BuildsStatistics(

+                 time = step_start,

+                 stat_type = type,

+                 running = running,

+                 pending = pending

+             )

+             db.session.merge(statistic)

+             db.session.commit()

+ 

+         running_total = 0

+         for i in range(1, steps + 1):

+             running_total += data[1][i]

+ 

+         data[2].extend([running_total * 1.0 / steps] * (len(data[0]) - 1))

+ 

+         for i in range(start, end, step):

+             data[3].append(time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(i)))

+ 

+         return data

+ 

+     @classmethod

      def get_build_importing_queue(cls, background=None):

          """

          Returns Builds which are waiting to be uploaded to dist git

@@ -961,7 +961,7 @@ 

      git_hash = db.Column(db.String(40))

      status = db.Column(db.Integer, default=StatusEnum("waiting"))

  

-     started_on = db.Column(db.Integer)

+     started_on = db.Column(db.Integer, index=True)

      ended_on = db.Column(db.Integer, index=True)

  

      # directory name on backend with build results
@@ -1217,3 +1217,10 @@ 

          Return text representation of status of this build

          """

          return helpers.ModuleStatusEnum(self.status)

+ 

+ 

+ class BuildsStatistics(db.Model):

+     time = db.Column(db.Integer, primary_key=True)

+     stat_type = db.Column(db.Text, primary_key=True)

+     running = db.Column(db.Integer)

+     pending = db.Column(db.Integer)

@@ -34,10 +34,6 @@ 

          point: {r: 2.5},

          tooltip: {

              format: {

-                 title: function(d) {

-                     var a = d.toString().substring(0, 25) + 'UTC';

-                     return a;

-                 },

                  value: function(value, ratio, id) {

                      if (id === 'avg running') return value.toFixed(2);

                      else return value;
@@ -58,6 +54,16 @@ 

      chart.bindto = bind;

      chart.color.pattern = ['#0088ce', '#cc8844', '#cc0000'];

      chart.data.columns = data;

+     if (format === '%Y-%m-%d')

+ 	chart.tooltip.format.title = function(d) {

+ 	    var a = d.toString().substring(0, 15);

+ 	    return a;

+ 	}

+     if (format === '%H:%M')

+ 	chart.tooltip.format.title = function(d) {

+ 	    var a = d.toString().substring(16, 25) + '(UTC)';

+ 	    return a;

+ 	}

      var chartDay = c3.generate(chart);

  };

  

@@ -1,65 +1,11 @@ 

  import flask

- import time

+ from time import time

  

  from coprs.views.status_ns import status_ns

- from coprs.logic import builds_logic, coprs_logic

+ from coprs.logic import builds_logic

  from coprs import helpers

  

  

- def get_graph_data(start, end, step):

-     chroots_dict = {}

-     chroots = []

-     chroot_names = {}

-     tasks = builds_logic.BuildsLogic.get_tasks_by_time(start, end)

-     running_tasks = builds_logic.BuildsLogic.get_running_tasks_by_time(start, end)

-     steps = int(round((end - start) / step + 0.5))

-     current_step = 0

- 

-     data = [[0] * (steps + 1), [0] * (steps + 1), [1.0 * running_tasks.count() / steps] * (steps + 1), [0] * (steps + 1)]

-     data[0][0] = 'pending'

-     data[1][0] = 'running'

-     data[2][0] = 'avg running'

-     data[3][0] = 'time'

- 

-     for t in tasks:

-         task = t.to_dict()

-         started = task['started_on'] if task['started_on'] else end

-         ended = task['ended_on'] if task['ended_on'] else end

- 

-         start_cell = int(round((started - start) / step + 0.5))

-         end_cell  = int(round((ended - start) / step + 0.5))

-         submitted_cell = int(round((t.build.submitted_on - start) / step + 0.5))

- 

-         # pending tasks

-         for i in range(max(1, submitted_cell), max(1, start_cell) + 1):

-             if i <= steps:

-                 data[0][i] += 1

- 

-         # running tasks

-         for i in range(max(1, start_cell), end_cell + 1):

-             if i <= steps:

-                 data[1][i] += 1

- 

-         if task['mock_chroot_id'] not in chroots_dict:

-             chroots_dict[task['mock_chroot_id']] = 1

-         else:

-             chroots_dict[task['mock_chroot_id']] += 1

- 

-     for i in range(0, steps):

-         data[3][i + 1] = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(start + (i * step)))

- 

-     for key in chroots_dict:

-         chroots.append([key, chroots_dict[key]])

- 

-     mock_chroots = coprs_logic.MockChrootsLogic.get_multiple()

-     for mock_chroot in mock_chroots:

-         for l in chroots:

-             if l[0] == mock_chroot.id:

-                 l[0] = mock_chroot.name

- 

-     return data, chroots

- 

- 

  @status_ns.route("/")

  @status_ns.route("/pending/")

  def pending():
@@ -90,11 +36,14 @@ 

  

  @status_ns.route("/stats/")

  def stats():

-     current_time = int(time.time()) - int(time.time()) % 600

-     data1, chroots1 = get_graph_data(current_time - 86400, current_time - 1, 600) # last 24 hours

-     data2, chroots2 = get_graph_data(current_time - 86400 * 90, current_time - 1, 86400) # last 90 days

+     curr_time = int(time())

+     chroots_24h = builds_logic.BuildsLogic.get_chroot_histogram(curr_time - 86400, curr_time)

+     chroots_90d = builds_logic.BuildsLogic.get_chroot_histogram(curr_time - 90*86400, curr_time)

+     data_24h = builds_logic.BuildsLogic.get_tasks_histogram('10min', curr_time - 86400, curr_time, 600)

+     data_90d = builds_logic.BuildsLogic.get_tasks_histogram('24h', curr_time - 90*86400, curr_time, 86400)

+ 

      return flask.render_template("status/stats.html",

-                                  data1=data1,

-                                  data2=data2,

-                                  chroots1=chroots1,

-                                  chroots2=chroots2)

+                                  data1=data_24h,

+                                  data2=data_90d,

+                                  chroots1=chroots_24h,

+                                  chroots2=chroots_90d)

@@ -18,7 +18,7 @@ 

  from coprs.logic import coprs_logic, packages_logic, actions_logic, builds_logic, users_logic

  from coprs.views.misc import create_user_wrapper

  from coprs.whoosheers import CoprWhoosheer

- from sqlalchemy import or_

+ from sqlalchemy import and_, or_

  from coprs.helpers import chroot_to_branch,StatusEnum

  

  
@@ -461,6 +461,33 @@ 

          writer.commit()

  

  

+ class UpdateGraphsDataCommand(Command):

+     """

+     Generates newest graph data.

+     """

+ 

+     def run(self):

+         curr_time = int(time.time())

+         builds_logic.BuildsLogic.get_tasks_histogram('10min', curr_time - 86599, curr_time, 600)

+         builds_logic.BuildsLogic.get_tasks_histogram('24h', curr_time - 90*86400, curr_time, 86400)

+ 

+ 

+ class RemoveGraphsDataCommand(Command):

+     """

+     Removes old cached graph data that is no longer used.

+     """

+ 

+     def run(self):

+         curr_time = int(time.time())

+         models.BuildsStatistics.query.filter(or_(

+             and_(models.BuildsStatistics.time < curr_time - 91 * 86400,

+                  models.BuildsStatistics.stat_type == '24h'),

+             and_(models.BuildsStatistics.time < curr_time - 87000,

+                  models.BuildsStatistics.stat_type == '10min')

+         )).delete()

+         db.session.commit()

+ 

+ 

  manager = Manager(app)

  manager.add_command("test", TestCommand())

  manager.add_command("create_sqlite_file", CreateSqliteFileCommand())
@@ -478,6 +505,8 @@ 

  manager.add_command("update_indexes_quick", UpdateIndexesQuickCommand())

  manager.add_command("rawhide_to_release", RawhideToReleaseCommand())

  manager.add_command("backend_rawhide_to_release", BackendRawhideToReleaseCommand())

+ manager.add_command("update_graphs", UpdateGraphsDataCommand())

+ manager.add_command("vacuum_graphs", RemoveGraphsDataCommand())

  

  if __name__ == "__main__":

      manager.run()

We tried to optimize the SQL query for graph data generation. However, it still takes way too much time -- for this reason, I created a db table for caching already generated data.

In addition, this PR adds an hourly cron job that generates graph data to prevent long loading times when no one accessed the graph page in some time, and a daily cron job to delete old cached data that is no longer used in the graphs.

I have also slightly changed the mouseover labels in the graphs so that the 24h one shows only time and the 90d shows only date, and fixed the data for pending builds that were previously shown incorrectly.

5 new commits added

  • [frontend] improve code readability of graph data generation
  • [frontend] add script to update cached graph data
  • [frontend] use cached graph statistics
  • [frontend] change graph label format
  • [frontend] add db table for storing cached graph data
5 years ago

Looks good! The update graph script could have been part of manage.py but like this it's also acceptable.

+1

3 new commits added

  • [frontend] don't use sql.format() for passing values to sql queries
  • [frontend] move updating/removing graph data to manage.py
  • [frontend] use sqlalchemy for some graph data queries
5 years ago

The update graph script could have been part of manage.py ...

I've moved the graph update/remove code over there.

The update graph script could have been part of manage.py ...

I've moved the graph update/remove code over there.

Ok, well. I can see you did quite a lot more than that. It would be better to react to specific remarks here in the PR.

Anyway,

manage.py remove_graphs

is not very intuitive. Can you please change it to e.g. manage.py vacuum_graphs?

1 new commit added

  • [frontend] rename command to remove old graph data to vacuum_graphs
5 years ago

I agree it wasn't a very descriptive name so I changed it to vacuum_graphs.

Pull-Request has been merged by clime

5 years ago