#16 Parse and store Event dates
Merged 3 years ago by frantisekz. Opened 3 years ago by frantisekz.

@@ -0,0 +1,83 @@ 

+ """Event Dates

+ 

+ Revision ID: 31a71e8c7dfa

+ Revises: 1e0fb82777af

+ Create Date: 2020-11-19 09:50:46.323162

+ 

+ """

+ 

+ import requests

+ import re

+ import datetime

+ 

+ from sqlalchemy.orm import sessionmaker

+ 

+ from testdays.models.testday import Event

+ from testdays.lib import wiki

+ 

+ Session = sessionmaker()

+ 

+ # revision identifiers, used by Alembic.

+ revision = '31a71e8c7dfa'

+ down_revision = '1e0fb82777af'

+ branch_labels = None

+ depends_on = None

+ 

+ from alembic import op

+ import sqlalchemy as sa

+ 

+ def process_past_event_dates():

+ 

+     connection = op.get_bind()

+     session = Session(bind=connection)

+     events = session.query(Event).order_by(Event.created_at.desc())

+     for event in events:

+         print("Processing TD %s" % event.name)

+ 

+         # Some testdays urls are redirect, follow and clean target

+         # Eg. http://fedoraproject.org/wiki/Test_Day:F33_SwapOnZRAM

+         # to

+         # https://fedoraproject.org/wiki/Test_Day:2020-07-06_Swap_on_ZRAM?rd=Test_Day:F33_SwapOnZRAM

+         r = requests.get(event.testday_url)

+ 

+         # Urls after redirects contain unnecessary ?rd=someting parameter, our code would crash on that, purge it

+         # Eg. https://fedoraproject.org/wiki/Test_Day:2020-07-06_Swap_on_ZRAM?rd=Test_Day:F33_SwapOnZRAM

+         # to https://fedoraproject.org/wiki/Test_Day:2020-07-06_Swap_on_ZRAM

+         to_clean = re.findall(r'\?rd.*', r.url)

+         if len(to_clean) == 1:

+             cleaned_url = r.url.replace(re.findall(r'\?rd.*', r.url)[0], "")

+         else:

+             cleaned_url = r.url

+ 

+         # Some more cleaning

+         # https://fedoraproject.org/w/index.php?title=Test_Day:2018-04-11_Cloud-Atomic_Testday

+         # to

+         # https://fedoraproject.org/Wiki/Test_Day:2018-04-11_Cloud-Atomic_Testday

+         cleaned_url = cleaned_url.replace("fedoraproject.org/w/index.php?title=", "fedoraproject.org/wiki/")

+ 

+         # Parse dates

+         start, end = wiki.get_td_dates(cleaned_url)

+ 

+         if start == datetime.datetime(1900, 1, 1, 0, 0):

+             print("Could not parse event's date, using default")

+ 

+         event.testday_start = start

+         event.testday_end = end

+         session.commit()

+ 

+ def upgrade():

+     # ### commands auto generated by Alembic - please adjust! ###

+     op.add_column('event', sa.Column('testday_end', sa.DateTime(), nullable=True))

+     op.add_column('event', sa.Column('testday_start', sa.DateTime(), nullable=True))

+     # ### end Alembic commands ###

+ 

+     # Process data changes

+     process_past_event_dates()

+ 

+ 

+ 

+ def downgrade():

+     # ### commands auto generated by Alembic - please adjust! ###

+     op.drop_column('event', 'testday_start')

+     op.drop_column('event', 'testday_end')

+     # ### end Alembic commands ###

@@ -88,7 +88,8 @@ 

      if td_url.startswith("https"):

          td_url = td_url.replace("https", "http", 1)

  

-     event = Event(td_name, url, td_url)

+     start, end = wiki.get_td_dates(td_url)

+     event = Event(td_name, url, td_url, start, end)

  

      # Testcase Sections

      category = None
@@ -172,6 +173,11 @@ 

  

                  event.id = old_event.id

                  event.resultsdb_job_uuid = old_event.resultsdb_job_uuid

+                 # Update start/end

+                 start, end = wiki.get_td_dates(event.testday_url)

+                 event.testday_start = start

+                 event.testday_end = end

+ 

                  db.session.merge(event)

                  db.session.commit()

                  flash('Event was successfully updated')

file modified
+41 -1
@@ -5,6 +5,45 @@ 

  from urllib.parse import urljoin

  import urllib

  import simplejson

+ import re

+ import datetime

+ 

+ def get_td_dates(url):

+     """

+     Parses infobox of testday wiki page, returns array of testday dates [start, end] or

+     (datetime.datetime(1900, 1, 1, 0, 0), datetime.datetime(1900, 1, 1, 0, 0)) when parsing fails.

+     """

+     url = url.replace("http://", "https://")

+     page = __strip_url(url)

+     w = fedora.client.wiki.Wiki()

+     params = {'action':'parse','format':'json','prop':'wikitext','page':'%s' % page}

+     result = w.send_request('api.php', req_params=params)

+     # Clean up api returned data for further processing through regex

+     try:

+         result = result["parse"]["wikitext"]["*"]

+     except KeyError:

+         return (datetime.datetime(1900, 1, 1, 0, 0), datetime.datetime(1900, 1, 1, 0, 0))

+     result = result.replace("\n", "")

+     result = result.replace("'", "")

+     try:

+         # Infobox contains string like '| date = 2020-09-21 | time' or '| date = 2020-09-21 to 2020-09-22 | time'

+         # We're getting content from between '| date = ' and ' | time'

+         dates = re.findall(r'date = (.*)\| time', result)[0]

+     except IndexError:

+         return (datetime.datetime(1900, 1, 1, 0, 0), datetime.datetime(1900, 1, 1, 0, 0))

+     if " to " in dates:

+         dates = dates.split(" to ")

+     else:

+         dates = dates.split(" - ")

+     try:

+         dates = [datetime.datetime.strptime(date, "%Y-%m-%d") for date in dates]

+     except ValueError:

+         return (datetime.datetime(1900, 1, 1, 0, 0), datetime.datetime(1900, 1, 1, 0, 0))

+     if len(dates) == 1:

+         dates.append(dates[0])

+     return dates

+ 

+ 

  

  def __strip_url(url):

      for s in ["https://fedoraproject.org/wiki/", fedora.client.wiki.Wiki().base_url]:
@@ -70,6 +109,7 @@ 

  

  if __name__ == '__main__':

      import pprint

-     page = 'User:Jskladan/Sandbox:TestdayAppTemplate'

+     import ipdb; ipdb.set_trace()

+     page = 'Test_Day:Fedora_33_CoreOS_2020-11-06'

      print("Fetching metadata from %s" % page)

      pprint.pprint(get_page_section(page, 'TestdayApp Metadata'))

file modified
+5 -1
@@ -29,16 +29,20 @@ 

      name = db.Column(db.Text)

      metadata_url = db.Column(db.Text)

      testday_url = db.Column(db.Text)

+     testday_start = db.Column(db.DateTime)

+     testday_end = db.Column(db.DateTime)

      resultsdb_job_id = db.Column(db.Integer)

      resultsdb_job_uuid = db.Column(db.String(36))

      created_at = db.Column(db.DateTime, default = datetime.datetime.utcnow)

  

      categories = db.relation('Category', backref='event', order_by='Category.id')

  

-     def __init__(self, name, metadata_url, testday_url=None, resultsdb_job_uuid=None):

+     def __init__(self, name, metadata_url, testday_url=None, testday_start=None, testday_end=None, resultsdb_job_uuid=None):

          self.name = name

          self.metadata_url = metadata_url

          self.testday_url = testday_url

+         self.testday_start = testday_start

+         self.testday_end = testday_end

          self.resultsdb_job_uuid = resultsdb_job_uuid

  

  class Category(db.Model):

This will be split into multiple Pull Requests.

In the first one, the aim is to add functions to parse testday date ranges from wiki, update db models, wire the parsing to testday creation (not tested yet) and wire up code to update past testdays (not tested yet) .

And finally, I'll change the UI (with other tweaks) to distinguish active and past testdays.

rebased onto 8c2c42d

3 years ago

1 new commit added

  • Process past events
3 years ago

2 new commits added

  • Process past events
  • Parse and store Event dates
3 years ago

I don't see how this could work, if you are on the 1e0fb82777af revision, the Event table does not have the start/end columns yet.

If you want to retrospectively fill the dates, do it as a part of the DB schema upgrade.

Ah, nevermind, the current_rev is filled before the upgrade... I'd still rather see the misc.testdays_process_dates() call in the migration script, though

Since you are using the method out of the wiki.py scope, please remove the __ prefix.

How about start, end = ... instead? May not be the best option, given how you use the data later on, just pointing to the option of "better readability".

Also, since the testdays_process_dates() is only used here (to actually "fill in the missing dates"), consider renaming it to something more indicative of what it does.

As I already mentioned elsewhere, since this is only really ever used to "fill in the data for old testdays", consider moving this code into the migration script instead. Would make way more sense than the current solution.

In general, rather than returing None, consider raising a custom exception instead. That IMO makes more sense in the context of what the code does, and the code "working around" the "it either returns None or a tuple with the dates" concept would also get nicer.

I don't really like this. I understand the "saving characters" motivation, but adding an arbitrary attribute to the class is not the best solution to the problem at hand.

I'd rather see something in the line of:

def __init__(self, name, metadata_url, testday_url=None, testday_start=None, testday_end=None, resultsdb_job_uuid=None):
    self.testday_start = testday_start or datetime.datetime(1900, 1, 1, 0, 0)
    self.testday_end = testday_end or datetime.datetime(1900, 1, 1, 0, 0)

Not a huge fan of the blanket except statement. Ideally just catch what is necessary, if that's not possible, then explain the reasoning in a comment, and at least log the exception, so one could investigate.

1 new commit added

  • Feedback
3 years ago

absolute nitpick: (None, None)

Might be worth explaining the intent here

1 new commit added

  • Fixup
3 years ago

Once you explain the regex better (e.g. in->out data), and maybe address the nitpick, feel free to merge. THX!

1 new commit added

  • Nits
3 years ago

1 new commit added

  • Code comments
3 years ago

1 new commit added

  • Polish
3 years ago

1 new commit added

  • Simplify
3 years ago

1 new commit added

  • Final, ultimate form
3 years ago

1 new commit added

  • µNits
3 years ago

rebased onto 503f10b

3 years ago

Pull-Request has been merged by frantisekz

3 years ago