#540 Use whoosh for hubs search
Opened 6 years ago by shaily. Modified 6 years ago
shaily/fedora-hubs search  into  develop

file modified
+3
@@ -12,6 +12,7 @@ 

  import hubs.models

  from hubs.utils import username2avatar, hub2groupavatar

  from hubs.utils.fedmsg import get_fedmsg_config

+ from hubs.utils.search import get_hub_index

  

  app = flask.Flask(__name__)

  
@@ -36,6 +37,8 @@ 

  

  fedmsg_config = get_fedmsg_config()

  

+ with app.app_context():

+     get_hub_index()

  

  # Database

  

file modified
+2
@@ -51,6 +51,8 @@ 

  DATAGREPPER_URI = 'https://apps.fedoraproject.org/datagrepper'

  DEFAULT_SERVER_NAME = "hubs.fedoraproject.org"

  

+ HUB_INDEX_DIR = '/var/tmp/hub_index'

+ 

  MANAGE_MEMBERSHIP_IN_FAS = True

  

  EMAIL_HOST = "localhost"

file modified
+14 -1
@@ -37,6 +37,7 @@ 

      add_group_widgets, add_user_widgets, add_stream_widgets,

  )

  from hubs.utils import username2avatar

+ from hubs.utils.search import get_hub_index

  from hubs.utils.fedmsg import publish

  from hubs.signals import hub_created

  from hubs.widgets import clean_input
@@ -132,6 +133,14 @@ 

          return [assoc.user for assoc in self.associations

                  if assoc.role == 'stargazer']

  

+     def update_index(self):

+         """ Add or update hub details in the search index. """

+         hub_index = get_hub_index()

+         writer = hub_index.writer()

+         writer.update_document(id=str(self.id), name=self.name,

+                                summary=self.config['summary'])

+         writer.commit()

+ 

      def publish(self, topic, extra_msg=None):

          msg = {

              "hub_name": self.name,
@@ -231,10 +240,11 @@ 

          return hub

  

      @classmethod

-     def create_group_hub(cls, name):

+     def create_group_hub(cls, name, summary):

          session = Session()

          hub = cls(name=name, hub_type="team")

          session.add(hub)

+         hub.config["summary"] = summary

          # Commit before sending the signal or other processes won't see it

          # (the workers). Flushing isn't enough.

          session.commit()
@@ -257,9 +267,11 @@ 

              add_user_widgets(self)

              user = User.query.get(self.name)

              self.subscribe(user, role='owner')

+             self.update_index()

          elif self.hub_type == "team":

              add_group_widgets(self)

              self.publish("hub.created")

+             self.update_index()

          elif self.hub_type == "stream":

              add_stream_widgets(self)

  
@@ -269,6 +281,7 @@ 

              key for key in new_config.keys()

              if new_config.get(key) != old_config.get(key)

          ]

+         self.update_index()

          # Notify but don't send the config values on the bus, there

          # may be private stuff there.

          self.publish("hub.updated", {

file added
+31
@@ -0,0 +1,31 @@ 

+ from __future__ import unicode_literals

+ 

+ import os

+ 

+ from flask import current_app

+ from whoosh.fields import Schema, ID, TEXT

+ from whoosh.analysis import StandardAnalyzer, CharsetFilter

+ from whoosh.index import open_dir, create_in

+ from whoosh.support.charset import accent_map

+ 

+ 

+ def get_hub_index():

+     """

+     Returns a whoosh.index.Index object for hub search

+     Used for adding, updating and searching across the

+     indexed collection of documents.

+ 

+     If the index directory is not present, creates it.

+ 

+     Returns:

+         Index: Represents an indexed collection of documents.

+     """

+     index_dir = current_app.config.get('HUB_INDEX_DIR')

+     if not os.path.exists(index_dir):

+         os.mkdir(index_dir)

+         accent_folding = StandardAnalyzer() | CharsetFilter(accent_map)

+         schema = Schema(id=ID(unique=True, stored=True), name=TEXT(),

+                         summary=TEXT(analyzer=accent_folding))

+         create_in(index_dir, schema)

+     ix = open_dir(index_dir)

+     return ix

file modified
+10 -11
@@ -12,9 +12,12 @@ 

  from six.moves.urllib import parse as urlparse

  from sqlalchemy import or_, and_

  from sqlalchemy.orm.exc import NoResultFound

+ from whoosh.qparser import MultifieldParser

+ from whoosh.query import Prefix

  

  from hubs.models import Hub, HubConfig, Widget

  from hubs.models.constants import HUB_TYPES

+ from hubs.utils.search import get_hub_index

  

  

  log = logging.getLogger(__name__)
@@ -51,17 +54,13 @@ 

  

  

  def query_hubs(querystring):

-     query = Hub.query.join(HubConfig).filter(

-         Hub.hub_type.in_(["user", "team"]),

-         or_(

-             Hub.name.ilike('%{}%'.format(querystring)),

-             and_(

-                 HubConfig.key == "summary",

-                 HubConfig.value.ilike('%{}%'.format(querystring)),

-             )

-         )

-     )

-     return query.all()

+     hub_index = get_hub_index()

+     with hub_index.searcher() as searcher:

+         query = MultifieldParser(["name", "summary"], hub_index.schema,

+                                  plugins=[], termclass=Prefix)

+         results = searcher.search(query.parse(querystring))

+         hubs = [get_hub_by_id(result['id']) for result in results]

+     return hubs

  

  

  def get_widget_instance(idx):

file modified
+5 -10
@@ -92,10 +92,9 @@ 

  

  def create_teams():

      # Ambassadors

-     hub = hubs.models.Hub(name='ambassadors', hub_type="team")

+     hub = hubs.models.Hub.create_group_hub(name='ambassadors', summary='Fedora Ambassadors')

      db.add(hub)

      hub.config.update(dict(

-         summary='Fedora Ambassadors',

          avatar=placekitten,

          description=(

              'Ambassadors are the representatives of Fedora. Ambassadors '
@@ -122,9 +121,8 @@ 

      db.commit()

  

  # ############# CommOps

-     hub = hubs.models.Hub(name='commops', hub_type="team")

+     hub = hubs.models.Hub.create_group_hub(name='commops', summary='The Fedora Community Operations Team')

      db.add(hub)

-     hub.config["summary"] = 'The Fedora Community Operations Team'

      hub.config["chat_domain"] = 'irc.freenode.net'

      hub.config["chat_channel"] = '#fedora-commops'

      hub.config["pagure"] = ['fedora-commops']
@@ -149,9 +147,8 @@ 

      db.commit()

  

  # ############# Marketing team

-     hub = hubs.models.Hub(name='marketing', hub_type="team")

+     hub = hubs.models.Hub.create_group_hub(name='marketing', summary='The Fedora Marketing Team')

      db.add(hub)

-     hub.config["summary"] = 'The Fedora Marketing Team'

      hub.config["description"] = (

          'The Fedora Marketing Team develops and executes marketing strategies'

          ' to promote the usage and support of Fedora worldwide. Through the'
@@ -182,9 +179,8 @@ 

      db.commit()

  

  # ############# Design team

-     hub = hubs.models.Hub(name='designteam', hub_type="team")

+     hub = hubs.models.Hub.create_group_hub(name='designteam', summary='The Fedora Design Team')

      db.add(hub)

-     hub.config["summary"] = 'The Fedora Design Team'

      hub.config["description"] = (

          'The Design Team is the design group of the Fedora project. Our'

          ' interests are not only in creating graphics for use by the'
@@ -216,9 +212,8 @@ 

  

  

  # ############# Infra team -- commented out, as there is no infra FAS group yet

-     # hub = hubs.models.Hub(name='infrastructure', hub_type="team")

+     # hub = hubs.models.Hub.create_group_hub(name='infrastructure', summary='The Fedora Infra Team')

      # db.add(hub)

-     # hub.config["summary"] = 'The Fedora Infra Team'

      # hub.config["description"] = """

      # The Infrastructure Team consists of dedicated volunteers and professional

      # managing the servers, building the tools and utilities, and creating new

file modified
+1
@@ -28,3 +28,4 @@ 

  six

  sqlalchemy

  txredisapi

+ whoosh

no initial comment

1 new commit added

  • Disable plugins
6 years ago

rebased onto 9f31a3b3b798ebc12431709a65c15ace3bf125ea

6 years ago

rebased onto 6c9067c

6 years ago

rebased onto 58fd905

6 years ago

rebased onto cb6049c

6 years ago

rebased onto eeb9e6a

6 years ago

rebased onto 267e2c0

6 years ago

rebased onto 266044f

6 years ago

I recommend adding a docblock here.

I recommend a docblock here.

Does hubs have an initialization function that gets called during application startup? I usually recommend against having imports cause "actions" to happen, because this can cause tricky to debug problems (circular imports, difficult to predict startup order, and difficult testing). A lot of web frameworks give you hooks that will get called when the application starts up, and these are nice because they solve the above problems.

I recommend adding tests for this change, but LGTM otherwise.

rebased onto c43463c

6 years ago

2 new commits added

  • Don't execute index function on import
  • Add docblocks to search code
6 years ago

There seems to be some problem with my setup, I'm getting an unsupported pickle protocol: 4 error on the python27 environment and missing packages on python36 while running tests using tox.

I'll try to setup the VM afresh again and add tests.

rebased onto ca5e659

6 years ago

3 new commits added

  • Add tests for search
  • Explicitly fire hub created signal for team hubs
  • Change search analyzer for summary
6 years ago

rebased onto 1c584f7

6 years ago

rebased onto 011913e

6 years ago

1 new commit added

  • Fix team hub creation in tests
6 years ago

7 new commits added

  • Fix team hub creation in tests
  • Add tests for search
  • Explicitly fire hub created signal for team hubs
  • Change search analyzer for summary
  • Don't execute index function on import
  • Add docblocks to search code
  • Use whoosh for hubs search
6 years ago

1 new commit added

  • Add search index update test
6 years ago

Added tests, requesting review.

1 new commit added

  • Update populate.py to create team hubs correctly
6 years ago

rebased onto 5012475

6 years ago

rebased onto e6cd180

6 years ago

rebased onto d688d92

6 years ago

Ideally you have chosen another method name, this seems a bit too generic, and could be mixed up with "getting" an index value. Maybe update_index()?

This should be a configuration value, so it can be changed by the site admin.

rebased onto 62b952a

6 years ago

1 new commit added

  • Changes after review
6 years ago

1 new commit added

  • Change config key name
6 years ago

Why are you getting the hub index here? It does not look like you're using that variable, or importing it from anywhere. Or did I miss it?

Oh, I'm just setting up the hub search index directory at the initialization of the application, as suggested by @bowlofeggs
I'll remove the variable assignment.

1 new commit added

  • Remove redundant variable assignment
6 years ago

rebased onto 8d7a216

6 years ago

1 new commit added

  • Fix team hub creation in populate.py
6 years ago