#135 avoid JSONB column type
Closed 2 years ago by ralph. Opened 2 years ago by dcallagh.
dcallagh/waiverdb issue-134  into  master

avoid JSONB column type
Dan Callaghan • 2 years ago  
file modified
+3 -3
@@ -6,7 +6,7 @@ 

  from flask import Blueprint, request, current_app

  from flask_restful import Resource, Api, reqparse, marshal_with, marshal

  from werkzeug.exceptions import BadRequest, UnsupportedMediaType, Forbidden, ServiceUnavailable

- from sqlalchemy.sql.expression import func

+ from sqlalchemy.sql.expression import func, cast

  

  from waiverdb import __version__

  from waiverdb.models import db, Waiver
@@ -143,7 +143,7 @@ 

              if since_end:

                  query = query.filter(Waiver.timestamp <= since_end)

          if not args['include_obsolete']:

-             subquery = db.session.query(func.max(Waiver.id)).group_by(Waiver.subject,

+             subquery = db.session.query(func.max(Waiver.id)).group_by(cast(Waiver.subject, db.Text),

                                                                        Waiver.testcase)

              query = query.filter(Waiver.id.in_(subquery))

          query = query.order_by(Waiver.timestamp.desc())
@@ -396,7 +396,7 @@ 

              if since_end:

                  query = query.filter(Waiver.timestamp <= since_end)

          if not data.get('include_obsolete', False):

-             subquery = db.session.query(func.max(Waiver.id)).group_by(Waiver.subject,

+             subquery = db.session.query(func.max(Waiver.id)).group_by(cast(Waiver.subject, db.Text),

                                                                        Waiver.testcase)

              query = query.filter(Waiver.id.in_(subquery))

  

@@ -1,4 +1,4 @@ 

- """Change waiver.subject column type to JSONB

+ """(OBSOLETED) Change waiver.subject column type to JSONB

  

  Revision ID: 1797bff52162

  Revises: ed43eb9b221c
@@ -10,14 +10,15 @@ 

  revision = '1797bff52162'

  down_revision = 'ed43eb9b221c'

  

- from alembic import op

- from sqlalchemy import Text

- from sqlalchemy.dialects.postgresql import JSONB

- 

  

  def upgrade():

-     op.alter_column('waiver', 'subject', type_=JSONB, postgresql_using='subject::jsonb')

+     # This migration originally attempted to use the JSONB column type,

+     # which would fail on Postgres 9.2.

+     # So this is now replaced by ce8a1351ecdc_change_subject_to_json.py

+     # however we keep this as a no-op, to avoid breaking any Waiverdb

+     # deployment where this migration had successfully been applied.

+     pass

  

  

  def downgrade():

-     op.alter_column('waiver', 'subject', type_=Text)

+     pass

@@ -0,0 +1,37 @@ 

+ """Change waiver.subject column type to JSON

+ 

+ Revision ID: ce8a1351ecdc

+ Revises: 1797bff52162

+ Create Date: 2018-02-28 14:17:15.545322

+ 

+ """

+ 

+ # revision identifiers, used by Alembic.

+ revision = 'ce8a1351ecdc'

+ down_revision = '1797bff52162'

+ 

+ import json

+ from alembic import op

+ from sqlalchemy import Text, text

+ from sqlalchemy.dialects.postgresql import JSON

+ from waiverdb.models.base import json_serializer

+ 

+ 

+ def upgrade():

+     # Re-serialize all subject values to ensure they match the new, consistent

+     # serialization we are using.

+     connection = op.get_bind()

+     for row in connection.execute('SELECT id, subject FROM waiver'):

+         fixed_subject = json_serializer(json.loads(row['subject']))

+         connection.execute(text('UPDATE waiver SET subject = :subject WHERE id = :id'),

+                            subject=fixed_subject, id=row['id'])

+ 

+     op.drop_index('ix_waiver_subject')

+     op.alter_column('waiver', 'subject', type_=JSON, postgresql_using='subject::json')

+     op.create_index('ix_waiver_subject', 'waiver', [text('CAST(subject AS TEXT)')])

+ 

+ 

+ def downgrade():

+     op.drop_index('ix_waiver_subject')

+     op.alter_column('waiver', 'subject', type_=Text)

+     op.create_index('ix_waiver_subject', 'waiver', ['subject'])

file modified
+13 -1
@@ -1,5 +1,17 @@ 

  # SPDX-License-Identifier: GPL-2.0+

  

+ import json

  from flask_sqlalchemy import SQLAlchemy

  

- db = SQLAlchemy()

+ 

+ json_serializer = json.encoder.JSONEncoder(sort_keys=True, separators=(',', ':')).encode

+ 

+ 

+ class SQLAlchemyWithCustomisations(SQLAlchemy):

+ 

+     def apply_driver_hacks(self, app, info, options):

+         super(SQLAlchemyWithCustomisations, self).apply_driver_hacks(app, info, options)

+         options['json_serializer'] = json_serializer

+ 

+ 

+ db = SQLAlchemyWithCustomisations()

file modified
+22 -5
@@ -2,14 +2,16 @@ 

  

  import datetime

  from .base import db

- from sqlalchemy import or_, and_

- from sqlalchemy.dialects.postgresql import JSONB

+ from sqlalchemy import or_, and_, cast

+ from sqlalchemy.dialects.postgresql import JSON

+ from sqlalchemy.ext.hybrid import hybrid_method

+ from .base import json_serializer

  

  

  class Waiver(db.Model):

      id = db.Column(db.Integer, primary_key=True)

      result_id = db.Column(db.Integer, nullable=True)

-     subject = db.Column(JSONB, nullable=False, index=True)

+     subject = db.Column(JSON, nullable=False)

      testcase = db.Column(db.Text, nullable=False, index=True)

      username = db.Column(db.String(255), nullable=False)

      proxied_by = db.Column(db.String(255))
@@ -17,6 +19,9 @@ 

      waived = db.Column(db.Boolean, nullable=False, default=False)

      comment = db.Column(db.Text)

      timestamp = db.Column(db.DateTime, default=datetime.datetime.utcnow)

+     __table_args__ = (

+         db.Index('ix_waiver_subject', cast(subject, db.Text)),

+     )

  

      def __init__(self, subject, testcase, username, product_version, waived=False,

                   comment=None, proxied_by=None):
@@ -37,11 +42,23 @@ 

      def by_results(cls, query, results):

          return query.filter(or_(*[

              and_(

-                 cls.subject == d['subject'],

+                 cls.subject_matches(d['subject']),

                  cls.testcase == d['testcase']

              ) if d.get('testcase', None) else

              and_(

-                 cls.subject == d['subject']

+                 cls.subject_matches(d['subject']),

              ) if d.get('subject', None) else

              and_() for d in results

          ]))

+ 

+     @hybrid_method

+     def subject_matches(self, other_subject):

+         return self.subject == other_subject

+ 

+     @subject_matches.expression

+     def subject_matches(cls, other_subject):  # pylint: disable=E0213

+         # Postgres JSON type does not actually permit equality comparison.

+         # We need to compare textual representations on the database side.

+         # Note this won't work if the RHS is a SQL expression, only because

+         # I am too lazy to implement that for now.

+         return cast(cls.subject, db.Text) == json_serializer(other_subject)

We want to stay compatible with Postgres 9.2 which lacks the JSONB
column type.

We can use the older JSON column type instead, but it needs some extra
massaging to allow equality comparisons across JSON values. (JSONB
handles this itself.)

The short version is: we compare JSON values as strings, which means we
also need to ensure the string serialization is always consistent when
we send it to the database. The sort_keys=True argument to JSONEncoder()
gives us that.

Fixes #134.

Pull-Request has been closed by ralph

2 years ago

Build 9b6e24b6e42d1726784b1a1fe96b0ced8272e063 FAILED!
Rebase or make new commits to rebuild.