#366 [frontend] use cached data for small graph of usage
Merged 5 years ago by clime. Opened 5 years ago by dturecek.
copr/ dturecek/copr graphs  into  master

@@ -97,24 +97,6 @@ 

          return result

  

      @classmethod

-     def get_running_tasks_from_last_day(cls):

-         end = int(time.time())

-         start = end - 86399

-         step = 3600

-         tasks = cls.get_running_tasks_by_time(start, end)

-         steps = int(round((end - start) / step + 0.5))

-         current_step = 0

- 

-         data = [[0] * (steps + 1)]

-         data[0][0] = ''

-         for t in tasks:

-             task = t.to_dict()

-             while task['started_on'] > start + step * (current_step + 1):

-                 current_step += 1

-             data[0][current_step + 1] += 1

-         return data

- 

-     @classmethod

      def get_chroot_histogram(cls, start, end):

          chroots = []

          chroot_query = BuildChroot.query\
@@ -137,81 +119,146 @@ 

          return chroots

  

      @classmethod

-     def get_tasks_histogram(cls, type, start, end, step):

-         start = start - (start % step) # align graph interval to a multiple of step

-         end = end - (end % step)

-         steps = int((end - start) / step + 0.5)

-         data = [['pending'], ['running'], ['avg running'], ['time']]

+     def get_pending_jobs_bucket(cls, start, end):

+         query = text("""

+             SELECT COUNT(*) as result

+             FROM build_chroot JOIN build on build.id = build_chroot.build_id

+             WHERE

+                 build.submitted_on < :end

+                 AND (

+                     build_chroot.started_on > :start

+                     OR (build_chroot.started_on is NULL AND build_chroot.status = :status)

+                     -- for currently pending builds we need to filter on status=pending because there might be

+                     -- failed builds that have started_on=NULL

+                 )

+                 AND NOT build.canceled

+         """)

+ 

+         res = db.engine.execute(query, start=start, end=end, status=StatusEnum("pending"))

+         return res.first().result

  

+     @classmethod

+     def get_running_jobs_bucket(cls, start, end):

+         query = text("""

+             SELECT COUNT(*) as result

+             FROM build_chroot

+             WHERE

+                 started_on < :end

+                 AND (ended_on > :start OR (ended_on is NULL AND status = :status))

+                 -- for currently running builds we need to filter on status=running because there might be failed

+                 -- builds that have ended_on=NULL

+         """)

+ 

+         res = db.engine.execute(query, start=start, end=end, status=StatusEnum("running"))

+         return res.first().result

+ 

+     @classmethod

+     def get_cached_graph_data(cls, params):

+         data = {

+             "pending": [],

+             "running": [],

+         }

          result = models.BuildsStatistics.query\

-             .filter(models.BuildsStatistics.stat_type == type)\

-             .filter(models.BuildsStatistics.time >= start)\

-             .filter(models.BuildsStatistics.time <= end)\

+             .filter(models.BuildsStatistics.stat_type == params["type"])\

+             .filter(models.BuildsStatistics.time >= params["start"])\

+             .filter(models.BuildsStatistics.time <= params["end"])\

              .order_by(models.BuildsStatistics.time)

  

          for row in result:

-             data[0].append(row.pending)

-             data[1].append(row.running)

- 

-         for i in range(len(data[0]) - 1, steps):

-             step_start = start + i * step

-             step_end = step_start + step

- 

-             query_pending = text("""

-                 SELECT COUNT(*) as pending

-                 FROM build_chroot JOIN build on build.id = build_chroot.build_id

-                 WHERE

-                     build.submitted_on < :end

-                     AND (

-                         build_chroot.started_on > :start

-                         OR (build_chroot.started_on is NULL AND build_chroot.status = :status)

-                         -- for currently pending builds we need to filter on status=pending because there might be

-                         -- failed builds that have started_on=NULL

-                     )

-                     AND NOT build.canceled

-             """)

- 

-             query_running = text("""

-                 SELECT COUNT(*) as running

-                 FROM build_chroot

-                 WHERE

-                     started_on < :end

-                     AND (ended_on > :start OR (ended_on is NULL AND status = :status))

-                     -- for currently running builds we need to filter on status=running because there might be failed

-                     -- builds that have ended_on=NULL

-             """)

- 

-             res_pending = db.engine.execute(query_pending, start=step_start, end=step_end,

-                                             status=StatusEnum('pending'))

-             res_running = db.engine.execute(query_running, start=step_start, end=step_end,

-                                             status=StatusEnum('running'))

- 

-             pending = res_pending.first().pending

-             running = res_running.first().running

+             data["pending"].append(row.pending)

+             data["running"].append(row.running)

+ 

+         return data

+ 

+     @classmethod

+     def get_task_graph_data(cls, type):

+         data = [["pending"], ["running"], ["avg running"], ["time"]]

+         params = cls.get_graph_parameters(type)

+         cached_data = cls.get_cached_graph_data(params)

+         data[0].extend(cached_data["pending"])

+         data[1].extend(cached_data["running"])

+ 

+         for i in range(len(data[0]) - 1, params["steps"]):

+             step_start = params["start"] + i * params["step"]

+             step_end = step_start + params["step"]

+             pending = cls.get_pending_jobs_bucket(step_start, step_end)

+             running = cls.get_running_jobs_bucket(step_start, step_end)

              data[0].append(pending)

              data[1].append(running)

- 

-             statistic = models.BuildsStatistics(

-                 time = step_start,

-                 stat_type = type,

-                 running = running,

-                 pending = pending

-             )

-             db.session.merge(statistic)

-             db.session.commit()

+             cls.cache_graph_data(type, time=step_start, pending=pending, running=running)

  

          running_total = 0

-         for i in range(1, steps + 1):

+         for i in range(1, params["steps"] + 1):

              running_total += data[1][i]

  

-         data[2].extend([running_total * 1.0 / steps] * (len(data[0]) - 1))

+         data[2].extend([running_total * 1.0 / params["steps"]] * (len(data[0]) - 1))

  

-         for i in range(start, end, step):

+         for i in range(params["start"], params["end"], params["step"]):

              data[3].append(time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(i)))

  

          return data

  

      @classmethod

+     def get_small_graph_data(cls, type):

+         data = [[""]]

+         params = cls.get_graph_parameters(type)

+         cached_data = cls.get_cached_graph_data(params)

+         data[0].extend(cached_data["running"])

+ 

+         for i in range(len(data[0]) - 1, params["steps"]):

+             step_start = params["start"] + i * params["step"]

+             step_end = step_start + params["step"]

+             running = cls.get_running_jobs_bucket(step_start, step_end)

+             data[0].append(running)

+             cls.cache_graph_data(type, time=step_start, running=running)

+ 

+         return data

+ 

+     @classmethod

+     def cache_graph_data(cls, type, time, pending=0, running=0):

+         result = models.BuildsStatistics.query\

+                 .filter(models.BuildsStatistics.stat_type == type)\

+                 .filter(models.BuildsStatistics.time == time).first()

+         if result:

+             return

+ 

+         cached_data = models.BuildsStatistics(

+             time = time,

+             stat_type = type,

+             running = running,

+             pending = pending

+         )

+         db.session.merge(cached_data)

+         db.session.commit()

+ 

+     @classmethod

+     def get_graph_parameters(cls, type):

+         if type is "10min":

+             # 24 hours with 10 minute intervals

+             step = 600

+             steps = 144

+         elif type is "30min":

+             # 24 hours with 30 minute intervals

+             step = 1800

+             steps = 48

+         elif type is "24h":

+             # 90 days with 24 hour intervals

+             step = 86400

+             steps = 90

+ 

+         end = int(time.time())

+         end = end - (end % step) # align graph interval to a multiple of step

+         start = end - (steps * step)

+ 

+         return {

+             "type": type,

+             "step": step,

+             "steps": steps,

+             "start": start,

+             "end": end,

+         }

+ 

+     @classmethod

      def get_build_importing_queue(cls, background=None):

          """

          Returns Builds which are waiting to be uploaded to dist git

@@ -20,11 +20,7 @@ 

          color: {pattern: colorPattern},

          data: {

              hide: ['avg running'],

-             types: {

-                 'avg-running': 'line',

-                 'running': 'area',

-                 'pending': 'area',

-             },

+             type: 'line',

Is this expected? I don't mind in particular, I just noted that the graph design has been changed.

              x: 'time',

              xFormat: '%Y-%m-%d %H:%M:%S'

          },

@@ -79,7 +79,7 @@ 

      # users_builds = builds_logic.BuildsLogic.get_recent_tasks(flask.g.user, 5)

      users_builds = builds_logic.BuildsLogic.get_recent_tasks(None, 4)

  

-     data = builds_logic.BuildsLogic.get_running_tasks_from_last_day()

+     data = builds_logic.BuildsLogic.get_small_graph_data('30min')

  

      return flask.render_template("coprs/show/all.html",

                                   coprs=coprs,
@@ -108,7 +108,7 @@ 

      # flask.g.user is none when no user is logged - showing builds from everyone

      users_builds = builds_logic.BuildsLogic.get_recent_tasks(flask.g.user, 4)

  

-     data = builds_logic.BuildsLogic.get_running_tasks_from_last_day()

+     data = builds_logic.BuildsLogic.get_small_graph_data('30min')

  

      return flask.render_template("coprs/show/user.html",

                                   user=user,
@@ -133,7 +133,7 @@ 

      paginator = helpers.Paginator(query, query.count(), page,

                                    additional_params={"fulltext": fulltext})

  

-     data = builds_logic.BuildsLogic.get_running_tasks_from_last_day()

+     data = builds_logic.BuildsLogic.get_small_graph_data('30min')

  

      coprs = paginator.sliced_query

      return render_template("coprs/show/fulltext.html",

@@ -66,7 +66,7 @@ 

  

      coprs = paginator.sliced_query

  

-     data = builds_logic.BuildsLogic.get_running_tasks_from_last_day()

+     data = builds_logic.BuildsLogic.get_small_graph_data('30min')

  

      return render_template(

          "coprs/show/group.html",

@@ -39,8 +39,8 @@ 

      curr_time = int(time())

      chroots_24h = builds_logic.BuildsLogic.get_chroot_histogram(curr_time - 86400, curr_time)

      chroots_90d = builds_logic.BuildsLogic.get_chroot_histogram(curr_time - 90*86400, curr_time)

-     data_24h = builds_logic.BuildsLogic.get_tasks_histogram('10min', curr_time - 86400, curr_time, 600)

-     data_90d = builds_logic.BuildsLogic.get_tasks_histogram('24h', curr_time - 90*86400, curr_time, 86400)

+     data_24h = builds_logic.BuildsLogic.get_task_graph_data('10min')

+     data_90d = builds_logic.BuildsLogic.get_task_graph_data('24h')

  

      return flask.render_template("status/stats.html",

                                   data1=data_24h,

@@ -7,7 +7,7 @@ 

  

  

  def render_user_info(user):

-     graph = BuildsLogic.get_running_tasks_from_last_day()

+     graph = BuildsLogic.get_small_graph_data('30min')

      return flask.render_template("user_info.html",

                                   user=user,

                                   tasks_info=ComplexLogic.get_queue_sizes(),

@@ -485,9 +485,9 @@ 

      """

  

      def run(self):

-         curr_time = int(time.time())

-         builds_logic.BuildsLogic.get_tasks_histogram('10min', curr_time - 86599, curr_time, 600)

-         builds_logic.BuildsLogic.get_tasks_histogram('24h', curr_time - 90*86400, curr_time, 86400)

+         builds_logic.BuildsLogic.get_task_graph_data('10min')

+         builds_logic.BuildsLogic.get_small_graph_data('30min')

+         builds_logic.BuildsLogic.get_task_graph_data('24h')

  

  

  class RemoveGraphsDataCommand(Command):
@@ -501,6 +501,8 @@ 

              and_(models.BuildsStatistics.time < curr_time - 91 * 86400,

                   models.BuildsStatistics.stat_type == '24h'),

              and_(models.BuildsStatistics.time < curr_time - 87000,

+                  models.BuildsStatistics.stat_type == '30min'),

+             and_(models.BuildsStatistics.time < curr_time - 87000,

                   models.BuildsStatistics.stat_type == '10min')

          )).delete()

          db.session.commit()

This fixes Issue 356 where the small graph shows incorrect data.

I changed it so now the data is cached the same way the graphs on the graph page do (minus pending builds).

I was a little unsure about the number of samples in the graph. Originally, there were one per hour but I think I like two per hour a bit more.

The code would read slightly better if you made two methods. One that returns query for pending jobs and another one that returns query for running jobs. Methods should be named accordingly. There is less indentation and there is better separation of the individual code chunks. If you name the method e.g. get_running_jobs_bucket and get_pending_jobs_bucket, that will also increase readability of the queries.

Those if running_only switches in the code are just...not good. This whole method would deserve refactoring into smaller chunks. One that handles the running_only case and the second that handles the full case. The pieces of code that would be shared across those methods should be then moved to another shared method. Actually the step_size, steps should be defined as contants somewhere outside of the function (in dict). models.BuildsStatistics.query should be moved to a separate method. start, end, should be passed from an outer scope because it makes for more general base method (the one that can shift the time window being calculated displayed).....You can think about the think as a special case (that doesn't fit that well with the logic for the big graphs), it's therefore ok to have a separate path in the code for it. But the idea to share certain bits that are possible to share was correct.

So, you fixed the issue #356. Cool.

You did some refactoring at the same time...that was unnecessary.

And also you mashed the code for the small and big graphs a bit too much.

Try to make more smaller method and name them properly.

You can think about the think

  • you can think about the small graph

rebased onto 199b544

5 years ago

I've refactored the code. In addition, this should solve Issue#376.

Also, I've removed the colored area under the graph lines as we discussed a while back.

This variable should be called 'result' instead of 'query'.

Thank you. This is much more reader-friendly code than the prev version.

+1 for merging.

3 new commits added

  • [frontend] don't fill the area below the graph lines
  • [frontend] refactor code for graph data generation
  • [frontend] use cached data for small graph of usage
5 years ago

Thanks. I've just renamed the query variable to result.

Is this expected? I don't mind in particular, I just noted that the graph design has been changed.

Yes, we've discussed this a while back that the graphs on the statistics page would look better without filling the areas below the graph lines. I thought this PR might be a good place to change it.

Pull-Request has been merged by clime

5 years ago