#1160 Faster package-list/build-list queries
Merged 4 years ago by praiskup. Opened 4 years ago by praiskup.
Unknown source faster-queries  into  master

@@ -0,0 +1,58 @@

+ """

+ drop unused PG-only DB functions

+ 

+ Revision ID: d230af5e05d8

+ Revises: 4ed794df3bbb

+ Create Date: 2020-01-07 20:42:39.467075

+ """

+ 

+ import sqlalchemy as sa

+ from alembic import op

+ 

+ 

+ revision = 'd230af5e05d8'

+ down_revision = '4ed794df3bbb'

+ 

+ def upgrade():

+     query_functions = """

+     DROP FUNCTION status_to_order;

+     DROP FUNCTION order_to_status;

+     """

+     op.execute(sa.text(query_functions))

+ 

+ def downgrade():

+     # copy from 465202bfb9ce_update_db_functions.py

+     query_functions = """

+ CREATE OR REPLACE FUNCTION status_to_order (x integer)

+ RETURNS integer AS $$ BEGIN

+         RETURN CASE WHEN x = 3 THEN 1

+                     WHEN x = 6 THEN 2

+                     WHEN x = 7 THEN 3

+                     WHEN x = 4 THEN 4

+                     WHEN x = 0 THEN 5

+                     WHEN x = 1 THEN 6

+                     WHEN x = 5 THEN 7

+                     WHEN x = 2 THEN 8

+                     WHEN x = 8 THEN 9

+                     WHEN x = 9 THEN 10

+                ELSE x

+         END; END;

+     $$ LANGUAGE plpgsql;

+ 

+ CREATE OR REPLACE FUNCTION order_to_status (x integer)

+ RETURNS integer AS $$ BEGIN

+         RETURN CASE WHEN x = 1 THEN 3

+                     WHEN x = 2 THEN 6

+                     WHEN x = 3 THEN 7

+                     WHEN x = 4 THEN 4

+                     WHEN x = 5 THEN 0

+                     WHEN x = 6 THEN 1

+                     WHEN x = 7 THEN 5

+                     WHEN x = 8 THEN 2

+                     WHEN x = 9 THEN 8

+                     WHEN x = 10 THEN 9

+                ELSE x

+         END; END;

+     $$ LANGUAGE plpgsql;

+     """

+     op.execute(sa.text(query_functions))

@@ -22,6 +22,3 @@

      from alembic import command

      alembic_cfg = Config(alembic_ini)

      command.stamp(alembic_cfg, "head")

-     # Functions are not covered by models.py, and no migrations are run

-     # by command.stamp() above.  Create functions explicitly:

-     builds_logic.BuildsLogic.init_db()

@@ -8,7 +8,7 @@

  

  from sqlalchemy.sql import text

  from sqlalchemy.sql.expression import not_

- from sqlalchemy.orm import joinedload

+ from sqlalchemy.orm import joinedload, selectinload

  from sqlalchemy import or_

  from sqlalchemy import and_

  from sqlalchemy import func, desc
@@ -330,138 +330,15 @@

              models.Copr.user == user)

  

      @classmethod

-     def init_db(cls):

-         if db.engine.url.drivername == "sqlite":

-             return

- 

-         status_to_order = """

-         CREATE OR REPLACE FUNCTION status_to_order (x integer)

-         RETURNS integer AS $$ BEGIN

-         RETURN CASE WHEN x = 3 THEN 1

-                 WHEN x = 6 THEN 2

-                 WHEN x = 7 THEN 3

-                 WHEN x = 4 THEN 4

-                 WHEN x = 0 THEN 5

-                 WHEN x = 1 THEN 6

-                 WHEN x = 5 THEN 7

-                 WHEN x = 2 THEN 8

-                 WHEN x = 8 THEN 9

-                 WHEN x = 9 THEN 10

-             ELSE x

-         END; END;

-         $$ LANGUAGE plpgsql;

-         """

- 

-         order_to_status = """

-         CREATE OR REPLACE FUNCTION order_to_status (x integer)

-         RETURNS integer AS $$ BEGIN

-         RETURN CASE WHEN x = 1 THEN 3

-                 WHEN x = 2 THEN 6

-                 WHEN x = 3 THEN 7

-                 WHEN x = 4 THEN 4

-                 WHEN x = 5 THEN 0

-                 WHEN x = 6 THEN 1

-                 WHEN x = 7 THEN 5

-                 WHEN x = 8 THEN 2

-                 WHEN x = 9 THEN 8

-                 WHEN x = 10 THEN 9

-             ELSE x

-         END; END;

-         $$ LANGUAGE plpgsql;

-         """

- 

-         db.engine.connect()

-         db.engine.execute(status_to_order)

-         db.engine.execute(order_to_status)

- 

-     @classmethod

-     def get_copr_builds_list(cls, copr, dirname=''):

-         query_select = """

- SELECT build.id, build.source_status, MAX(package.name) AS pkg_name, build.pkg_version, build.submitted_on,

-     MIN(statuses.started_on) AS started_on, MAX(statuses.ended_on) AS ended_on, order_to_status(MIN(statuses.st)) AS status,

-     build.canceled, MIN("group".name) AS group_name, MIN(copr.name) as copr_name, MIN("user".username) as user_name, build.copr_id

- FROM build

- LEFT OUTER JOIN package

-     ON build.package_id = package.id

- LEFT OUTER JOIN (SELECT build_chroot.build_id, started_on, ended_on, status_to_order(status) AS st FROM build_chroot) AS statuses

-     ON statuses.build_id=build.id

- LEFT OUTER JOIN copr

-     ON copr.id = build.copr_id

- LEFT OUTER JOIN copr_dir

-     ON build.copr_dir_id = copr_dir.id

- LEFT OUTER JOIN "user"

-     ON copr.user_id = "user".id

- LEFT OUTER JOIN "group"

-     ON copr.group_id = "group".id

- WHERE build.copr_id = :copr_id

-     AND (:dirname = '' OR :dirname = copr_dir.name)

- GROUP BY

-     build.id

- ORDER BY

-     build.id DESC;

- """

- 

-         if db.engine.url.drivername == "sqlite":

-             def sqlite_status_to_order(x):

-                 if x == 3:

-                     return 1

-                 elif x == 6:

-                     return 2

-                 elif x == 7:

-                     return 3

-                 elif x == 4:

-                     return 4

-                 elif x == 0:

-                     return 5

-                 elif x == 1:

-                     return 6

-                 elif x == 5:

-                     return 7

-                 elif x == 2:

-                     return 8

-                 elif x == 8:

-                     return 9

-                 elif x == 9:

-                     return 10

-                 return 1000

- 

-             def sqlite_order_to_status(x):

-                 if x == 1:

-                     return 3

-                 elif x == 2:

-                     return 6

-                 elif x == 3:

-                     return 7

-                 elif x == 4:

-                     return 4

-                 elif x == 5:

-                     return 0

-                 elif x == 6:

-                     return 1

-                 elif x == 7:

-                     return 5

-                 elif x == 8:

-                     return 2

-                 elif x == 9:

-                     return 8

-                 elif x == 10:

-                     return 9

-                 return 1000

- 

-             conn = db.engine.connect()

-             conn.connection.create_function("status_to_order", 1, sqlite_status_to_order)

-             conn.connection.create_function("order_to_status", 1, sqlite_order_to_status)

-             statement = text(query_select)

-             statement.bindparams(bindparam("copr_id", Integer))

-             statement.bindparams(bindparam("dirname", String))

-             result = conn.execute(statement, {"copr_id": copr.id, "dirname": dirname})

+     def get_copr_builds_list(cls, copr, dirname=None):

+         query = models.Build.query.filter(models.Build.copr_id==copr.id)

+         if dirname:

+             copr_dir = coprs_logic.CoprDirsLogic.get_by_copr(copr, dirname).one()

          else:

-             statement = text(query_select)

-             statement.bindparams(bindparam("copr_id", Integer))

-             statement.bindparams(bindparam("dirname", String))

-             result = db.engine.execute(statement, {"copr_id": copr.id, "dirname": dirname})

- 

-         return result

+             copr_dir = copr.main_dir

+         query = query.filter(models.Build.copr_dir_id==copr_dir.id)

+         query = query.options(selectinload('build_chroots'), selectinload('package'))

+         return list(query)

  

      @classmethod

      def join_group(cls, query):

@@ -1,8 +1,9 @@

  import json

  import re

  

- from sqlalchemy import bindparam, Integer

+ from sqlalchemy import bindparam, Integer, func

  from sqlalchemy.sql import true, text

+ from sqlalchemy.orm import selectinload

  

  from coprs import app

  from coprs import db
@@ -34,80 +35,24 @@

                  .filter(models.Package.copr_id == copr_id))

  

      @classmethod

-     def get_copr_packages_list(cls, copr_dir):

-         query_select = """

- SELECT package.name, build.pkg_version, build.submitted_on, package.webhook_rebuild, order_to_status(subquery2.min_order_for_a_build) AS status, build.source_status

- FROM package

- LEFT OUTER JOIN (select MAX(build.id) as max_build_id_for_a_package, package_id

-   FROM build

-   WHERE build.copr_dir_id = :copr_dir_id

-   GROUP BY package_id) as subquery1 ON subquery1.package_id = package.id

- LEFT OUTER JOIN build ON build.id = subquery1.max_build_id_for_a_package

- LEFT OUTER JOIN (select build_id, min(status_to_order(status)) as min_order_for_a_build

-   FROM build_chroot

-   GROUP BY build_id) as subquery2 ON subquery2.build_id = subquery1.max_build_id_for_a_package

- WHERE package.copr_dir_id = :copr_dir_id;

-         """

- 

-         if db.engine.url.drivername == "sqlite":

-             def sqlite_status_to_order(x):

-                 if x == 3:

-                     return 1

-                 elif x == 6:

-                     return 2

-                 elif x == 7:

-                     return 3

-                 elif x == 4:

-                     return 4

-                 elif x == 0:

-                     return 5

-                 elif x == 1:

-                     return 6

-                 elif x == 5:

-                     return 7

-                 elif x == 2:

-                     return 8

-                 elif x == 8:

-                     return 9

-                 elif x == 9:

-                     return 10

-                 return 1000

- 

-             def sqlite_order_to_status(x):

-                 if x == 1:

-                     return 3

-                 elif x == 2:

-                     return 6

-                 elif x == 3:

-                     return 7

-                 elif x == 4:

-                     return 4

-                 elif x == 5:

-                     return 0

-                 elif x == 6:

-                     return 1

-                 elif x == 7:

-                     return 5

-                 elif x == 8:

-                     return 2

-                 elif x == 9:

-                     return 8

-                 elif x == 10:

-                     return 9

-                 return 1000

- 

-             conn = db.engine.connect()

-             conn.connection.create_function("status_to_order", 1, sqlite_status_to_order)

-             conn.connection.create_function("order_to_status", 1, sqlite_order_to_status)

-             statement = text(query_select)

-             statement.bindparams(bindparam("copr_dir_id", Integer))

-             result = conn.execute(statement, {"copr_dir_id": copr_dir.id})

-         else:

-             statement = text(query_select)

-             statement.bindparams(bindparam("copr_dir_id", Integer))

-             result = db.engine.execute(statement, {"copr_dir_id": copr_dir.id})

+     def get_packages_with_latest_builds_for_dir(cls, copr_dir_id):

+         packages = (models.Package.query.filter_by(copr_dir_id=copr_dir_id)

+                                         .order_by(models.Package.name).all())

+         pkg_ids = [package.id for package in packages]

+         builds_ids = models.Build.query \

+             .filter(models.Build.package_id.in_(pkg_ids)) \

+             .with_entities(func.max(models.Build.id)) \

+             .group_by(models.Build.package_id)

+         packages_map = {package.id: package for package in packages}

+         builds = (

+             models.Build.query.filter(models.Build.id.in_(builds_ids))

+                               .options(selectinload('build_chroots')))

+         for build in builds:

+             if not build.package_id:

+                 continue

+             packages_map[build.package_id].latest_build = build

  

-         return result

+         return list(packages)

  

      @classmethod

      def get_list_by_copr(cls, copr_id, package_name):

@@ -880,6 +880,9 @@

      # the info that the build was resubmitted

      resubmitted_from_id = db.Column(db.Integer)

  

+     _cached_status = None

+     _cached_status_set = None

+ 

      @property

      def user_name(self):

          return self.user.name
@@ -1020,6 +1023,12 @@

  

      @property

      def status(self):

+         if getattr(self, '_cached_status_set') is None:

+             self._cached_status = self._status()

+             self._cached_status_set = True

+         return self._cached_status

+ 

+     def _status(self):

          """

          Return build status.

          """

@@ -16,29 +16,29 @@

      <tbody>

      {# packages here is not a package object #}

      {% for package in packages %}

+       {% if package.latest_build %}

+         {% set pkg_version = package.latest_build.pkg_version %}

+         {% set submitted_on = package.latest_build.submitted_on %}

+       {% endif %}

        <tr>

          <td data-order="{{ package.name }}">

            <b><a href="{{ copr_url('coprs_ns.copr_package', copr, package_name=package.name) }}">{{ package.name }}</a></b>

          </td>

  

-         <td data-order="{{ package.pkg_version }}">

-           {% if package.pkg_version %}

-             {{ package.pkg_version }}

-           {% else %}

-             -

-           {% endif %}

+         <td data-order="{{ pkg_version }}">

+             {{ pkg_version or '-' }}

          </td>

  

-         <td data-order="{{ package.submitted_on }}">

-           {% if package.submitted_on  %}

-             {{ package.submitted_on|time_ago() }} ago

+         <td data-order="{{ submitted_on or 0 }}">

+           {% if submitted_on %}

+             {{ submitted_on | time_ago() }} ago

            {% else %}

              -

            {% endif %}

          </td>

          <td>

-           {% if package.status is not none %}

-             {{ build_state(package) }}

+           {% if package.latest_build %}

+             {{ build_state(package.latest_build) }}

            {% else %}

              not built yet

            {% endif %}

@@ -53,7 +53,7 @@

  @req_with_copr

  def copr_builds(copr):

      flashes = flask.session.pop('_flashes', [])

-     dirname = flask.request.args.get('dirname', '')

+     dirname = flask.request.args.get('dirname')

      builds_query = builds_logic.BuildsLogic.get_copr_builds_list(copr, dirname)

      response = flask.Response(stream_with_context(helpers.stream_template("coprs/detail/builds.html",

                                copr=copr,

@@ -21,13 +21,15 @@

  @req_with_copr

  def copr_packages(copr):

      flashes = flask.session.pop('_flashes', [])

-     packages_query = PackagesLogic.get_copr_packages_list(copr.main_dir)

-     response = flask.Response(stream_with_context(helpers.stream_template("coprs/detail/packages.html",

-                                  copr=copr,

-                                  packages=list(packages_query),

-                                  flashes=flashes,

-                                  )))

- 

+     packages = PackagesLogic.get_packages_with_latest_builds_for_dir(

+         copr.main_dir.id)

+     response = flask.Response(

+         stream_with_context(helpers.stream_template(

+             "coprs/detail/packages.html",

+             copr=copr,

+             packages=packages,

+             flashes=flashes,

+         )))

      flask.session.pop('_flashes', [])

      return response

  

no initial comment

Metadata Update from @praiskup:
- Pull-request tagged with: needs-work

4 years ago

Using the following script, I got the following changes (before/after this
patch):

    https://copr-fe-dev.cloud.fedoraproject.org/coprs/iucar/cran/builds/       11.663    19.819
    https://copr-fe-dev.cloud.fedoraproject.org/coprs/g/copr/copr-dev/builds/   3.086    1.659
    https://copr-fe-dev.cloud.fedoraproject.org/coprs/g/copr/copr/builds/       1.959    0.204
    https://copr-fe-dev.cloud.fedoraproject.org/coprs/iucar/cran/packages/      7.579    7.718
    https://copr-fe-dev.cloud.fedoraproject.org/coprs/g/copr/copr-dev/packages/ 2.56     0.098
    https://copr-fe-dev.cloud.fedoraproject.org/coprs/g/copr/copr/packages/     2.575    0.09

(measured from copr-be-dev)

So.... it's not a complete win, but the "normal" copr projects receive quite a
big speedup, while the largest project in copr slows down. The important thing
is that the additional cost in calculation is done mostly in jinja (rendering
jinja) and sqlalchemy (mapping query result to ORM). So it is a) trivially
parallelizable at httpd level (so we are not blocked on postgresql) and b) it
can be easily cached (perhaps somewhat intelligently, only if the query is too
large).

measured urls='
        https://copr-fe-dev.cloud.fedoraproject.org/coprs/iucar/cran/builds/
        https://copr-fe-dev.cloud.fedoraproject.org/coprs/g/copr/copr-dev/builds/
        https://copr-fe-dev.cloud.fedoraproject.org/coprs/g/copr/copr/builds/

        https://copr-fe-dev.cloud.fedoraproject.org/coprs/iucar/cran/packages/
        https://copr-fe-dev.cloud.fedoraproject.org/coprs/g/copr/copr-dev/packages/
        https://copr-fe-dev.cloud.fedoraproject.org/coprs/g/copr/copr/packages/
'

analyze_url()
{
        truncate -s 0 stats
        count=20
        echo -n "average for $1 "

        for _ in $(seq $count); do
                /bin/time '--format=%e' -o stats -a curl "$1" >/dev/null 2>/dev/null
        done

        awk '{a+=$1} END{print a/NR}' stats
}

for url in $urls; do
   analyze_url "$url"
done

Another benefit is that we could live without the ugly in-database functions (which aren't giving ideal results anyways) so the code is much easier to read/understand now.

another measurement:

https://copr-fe-dev.cloud.fedoraproject.org/coprs/iucar/cran/builds/          11.2655 =>  20.604
https://copr-fe-dev.cloud.fedoraproject.org/coprs/g/copr/copr-dev/builds/     3.11    =>   2.056
https://copr-fe-dev.cloud.fedoraproject.org/coprs/g/copr/copr/builds/         1.925   =>   0.279
https://copr-fe-dev.cloud.fedoraproject.org/coprs/iucar/cran/packages/        7.755   =>   8.068
https://copr-fe-dev.cloud.fedoraproject.org/coprs/g/copr/copr-dev/packages/   2.5555  =>   0.0915
https://copr-fe-dev.cloud.fedoraproject.org/coprs/g/copr/copr/packages/       2.5185  =>   0.088

rebased onto fc4182313bae7860daffdf60cd8d1c83b30bc956

4 years ago

rebased onto d875ec0e5abb84ca80caeb3355a1720a164e4a53

4 years ago

Metadata Update from @praiskup:
- Pull-request untagged with: needs-work

4 years ago

rebased onto 496c18a

4 years ago

Pull-Request has been merged by praiskup

4 years ago