#136 avoid JSONB column type
Merged 6 years ago by dcallagh. Opened 6 years ago by dcallagh.
dcallagh/waiverdb issue-134-take-2  into  master

avoid JSONB column type
Dan Callaghan • 6 years ago  
file modified
+3 -3
@@ -6,7 +6,7 @@ 

  from flask import Blueprint, request, current_app

  from flask_restful import Resource, Api, reqparse, marshal_with, marshal

  from werkzeug.exceptions import BadRequest, UnsupportedMediaType, Forbidden, ServiceUnavailable

- from sqlalchemy.sql.expression import func

+ from sqlalchemy.sql.expression import func, cast

  

  from waiverdb import __version__

  from waiverdb.models import db, Waiver
@@ -143,7 +143,7 @@ 

              if since_end:

                  query = query.filter(Waiver.timestamp <= since_end)

          if not args['include_obsolete']:

-             subquery = db.session.query(func.max(Waiver.id)).group_by(Waiver.subject,

+             subquery = db.session.query(func.max(Waiver.id)).group_by(cast(Waiver.subject, db.Text),

                                                                        Waiver.testcase)

              query = query.filter(Waiver.id.in_(subquery))

          query = query.order_by(Waiver.timestamp.desc())
@@ -396,7 +396,7 @@ 

              if since_end:

                  query = query.filter(Waiver.timestamp <= since_end)

          if not data.get('include_obsolete', False):

-             subquery = db.session.query(func.max(Waiver.id)).group_by(Waiver.subject,

+             subquery = db.session.query(func.max(Waiver.id)).group_by(cast(Waiver.subject, db.Text),

                                                                        Waiver.testcase)

              query = query.filter(Waiver.id.in_(subquery))

  

@@ -1,4 +1,4 @@ 

- """Change waiver.subject column type to JSONB

+ """(OBSOLETED) Change waiver.subject column type to JSONB

  

  Revision ID: 1797bff52162

  Revises: ed43eb9b221c
@@ -10,14 +10,15 @@ 

  revision = '1797bff52162'

  down_revision = 'ed43eb9b221c'

  

- from alembic import op

- from sqlalchemy import Text

- from sqlalchemy.dialects.postgresql import JSONB

- 

  

  def upgrade():

-     op.alter_column('waiver', 'subject', type_=JSONB, postgresql_using='subject::jsonb')

+     # This migration originally attempted to use the JSONB column type,

+     # which would fail on Postgres 9.2.

+     # So this is now replaced by ce8a1351ecdc_change_subject_to_json.py

+     # however we keep this as a no-op, to avoid breaking any Waiverdb

+     # deployment where this migration had successfully been applied.

+     pass

  

  

  def downgrade():

-     op.alter_column('waiver', 'subject', type_=Text)

+     pass

@@ -0,0 +1,37 @@ 

+ """Change waiver.subject column type to JSON

+ 

+ Revision ID: ce8a1351ecdc

+ Revises: 1797bff52162

+ Create Date: 2018-02-28 14:17:15.545322

+ 

+ """

+ 

+ # revision identifiers, used by Alembic.

+ revision = 'ce8a1351ecdc'

+ down_revision = '1797bff52162'

+ 

+ import json

+ from alembic import op

+ from sqlalchemy import Text, text

+ from sqlalchemy.dialects.postgresql import JSON

+ from waiverdb.models.base import json_serializer

+ 

+ 

+ def upgrade():

+     # Re-serialize all subject values to ensure they match the new, consistent

+     # serialization we are using.

+     connection = op.get_bind()

+     for row in connection.execute('SELECT id, subject FROM waiver'):

+         fixed_subject = json_serializer(json.loads(row['subject']))

+         connection.execute(text('UPDATE waiver SET subject = :subject WHERE id = :id'),

+                            subject=fixed_subject, id=row['id'])

+ 

+     op.drop_index('ix_waiver_subject')

+     op.alter_column('waiver', 'subject', type_=JSON, postgresql_using='subject::json')

+     op.create_index('ix_waiver_subject', 'waiver', [text('CAST(subject AS TEXT)')])

+ 

+ 

+ def downgrade():

+     op.drop_index('ix_waiver_subject')

+     op.alter_column('waiver', 'subject', type_=Text)

+     op.create_index('ix_waiver_subject', 'waiver', ['subject'])

file modified
+59
@@ -1,5 +1,64 @@ 

  # SPDX-License-Identifier: GPL-2.0+

  

+ import json

+ from sqlalchemy.dialects.postgresql import JSON

+ from sqlalchemy.dialects.postgresql.psycopg2 import _PGJSON

+ from sqlalchemy.sql.elements import ColumnElement, Null

+ from sqlalchemy.sql.expression import cast

+ from sqlalchemy.sql.sqltypes import Text

  from flask_sqlalchemy import SQLAlchemy

  

+ 

+ json_serializer = json.encoder.JSONEncoder(sort_keys=True, separators=(',', ':')).encode

+ 

+ 

+ # Note that we have to inherit from the psycopg2-specific _PGJSON type,

+ # instead of the more general Postgres-specific JSON type. That's because

+ # SQLAlchemy helpfully "adapts" the general JSON type to _PGJSON when it sees

+ # we are using psycopg2, thereby defeating our customisations below.

+ # Inheriting from _PGJSON tells SQLAlchemy not do that adaptation.

+ # The magic occurs in SQLALchemy adapt_type() and PGDialect_psycopg2.colspecs.

+ class EqualityComparableJSONType(_PGJSON):

+     """

+     Like the standard JSON column type, but supports equality comparison of

+     entire JSON structures. Postgres doesn't permit this natively on the JSON

+     type so we have to instead cast to TEXT before comparing (and therefore we

+     have to also go to lengths to ensure the JSON we feed in is consistently

+     serialized).

+     """

+ 

+     class Comparator(JSON.Comparator):

+ 

+         def __eq__(self, other):

+             # If the RHS is a SQL expression, just assume its SQL type is

+             # already JSON and thus CAST(AS TEXT) it as well.

+             if isinstance(other, ColumnElement):

+                 rhs = cast(other, Text)

+             else:

+                 rhs = json_serializer(other)

+             return cast(self.expr, Text) == rhs

+ 

+     comparator_factory = Comparator

+ 

+     def bind_processor(self, dialect):

+         def process(value):

+             # JSON.NULL is a sentinel object meaning JSON "null" (SQLAlchemy 1.1+ only)

+             if hasattr(self, 'NULL') and value is self.NULL:

+                 value = None

+             elif isinstance(value, Null) or (value is None and self.none_as_null):

+                 return None

+             return json_serializer(value)

+         return process

+ 

+     def result_processor(self, dialect, coltype):

+         if dialect._has_native_json:

+             return None

+ 

+         def process(value):

+             if value is None:

+                 return None

+             return json.loads(value)

+         return process

+ 

+ 

  db = SQLAlchemy()

file modified
+6 -4
@@ -1,15 +1,14 @@ 

  # SPDX-License-Identifier: GPL-2.0+

  

  import datetime

- from .base import db

- from sqlalchemy import or_, and_

- from sqlalchemy.dialects.postgresql import JSONB

+ from .base import db, EqualityComparableJSONType

+ from sqlalchemy import or_, and_, cast

  

  

  class Waiver(db.Model):

      id = db.Column(db.Integer, primary_key=True)

      result_id = db.Column(db.Integer, nullable=True)

-     subject = db.Column(JSONB, nullable=False, index=True)

+     subject = db.Column(EqualityComparableJSONType, nullable=False)

      testcase = db.Column(db.Text, nullable=False, index=True)

      username = db.Column(db.String(255), nullable=False)

      proxied_by = db.Column(db.String(255))
@@ -17,6 +16,9 @@ 

      waived = db.Column(db.Boolean, nullable=False, default=False)

      comment = db.Column(db.Text)

      timestamp = db.Column(db.DateTime, default=datetime.datetime.utcnow)

+     __table_args__ = (

+         db.Index('ix_waiver_subject', cast(subject, db.Text)),

+     )

  

      def __init__(self, subject, testcase, username, product_version, waived=False,

                   comment=None, proxied_by=None):

We want to stay compatible with Postgres 9.2 which lacks the JSONB
column type.

We can use the older JSON column type instead, but it needs some extra
massaging to allow equality comparisons across JSON values. (JSONB
handles this itself.)

The short version is: we compare JSON values as strings, which means we
also need to ensure the string serialization is always consistent when
we send it to the database. The sort_keys=True argument to JSONEncoder()
gives us that.

Fixes #134.

Alternative to PR#135. This version uses a custom SQLAlchemy column type to encapsulate the necessary CAST() stuff for equality comparisons. I like this version a bit nicer, although the custom type requires some ridiculous hacks (the _PGJSON thing).

Also, this doesn't help for the GROUP BY clause which still needs an explicit CAST(), unfortunately.

Both look fine to me.
I really don't like the cast thing :( but I guess we don't have a choice.

Yeah - my vote is on this one too.

@dcallagh, merge away!

1 new commit added

  • tweak for SQLAlchemy 1.0 compatibility (EPEL7)
6 years ago

Pull-Request has been merged by dcallagh

6 years ago