PR#16: Parse and store Event dates - fedora-qa/testdays-web

fedora-qa / testdays-web

#16 Parse and store Event dates

Merged 3 years ago by frantisekz. Opened 3 years ago by frantisekz.

dates into develop

Parse and store Event dates

František Zatloukal • 3 years ago

503f10b

alembic/versions/31a71e8c7dfa_event_dates.py

file added

+83

		`@@ -0,0 +1,83 @@`
		`+ """Event Dates`
		`+`
		`+ Revision ID: 31a71e8c7dfa`
		`+ Revises: 1e0fb82777af`
		`+ Create Date: 2020-11-19 09:50:46.323162`
		`+`
		`+ """`
		`+`
		`+ import requests`
		`+ import re`
		`+ import datetime`
		`+`
		`+ from sqlalchemy.orm import sessionmaker`
		`+`
		`+ from testdays.models.testday import Event`
		`+ from testdays.lib import wiki`
		`+`
		`+ Session = sessionmaker()`
		`+`
		`+ # revision identifiers, used by Alembic.`
		`+ revision = '31a71e8c7dfa'`
		`+ down_revision = '1e0fb82777af'`
		`+ branch_labels = None`
		`+ depends_on = None`
		`+`
		`+ from alembic import op`
		`+ import sqlalchemy as sa`
		`+`
		`+ def process_past_event_dates():`
		`+`
		`+ connection = op.get_bind()`
		`+ session = Session(bind=connection)`
		`+ events = session.query(Event).order_by(Event.created_at.desc())`
		`+ for event in events:`
		`+ print("Processing TD %s" % event.name)`
		`+`
		`+ # Some testdays urls are redirect, follow and clean target`
		`+ # Eg. http://fedoraproject.org/wiki/Test_Day:F33_SwapOnZRAM`
		`+ # to`
		`+ # https://fedoraproject.org/wiki/Test_Day:2020-07-06_Swap_on_ZRAM?rd=Test_Day:F33_SwapOnZRAM`
		`+ r = requests.get(event.testday_url)`
		`+`
		`+ # Urls after redirects contain unnecessary ?rd=someting parameter, our code would crash on that, purge it`
		`+ # Eg. https://fedoraproject.org/wiki/Test_Day:2020-07-06_Swap_on_ZRAM?rd=Test_Day:F33_SwapOnZRAM`
		`+ # to https://fedoraproject.org/wiki/Test_Day:2020-07-06_Swap_on_ZRAM`
		`+ to_clean = re.findall(r'\?rd.*', r.url)`
		`+ if len(to_clean) == 1:`
		`+ cleaned_url = r.url.replace(re.findall(r'\?rd.*', r.url)[0], "")`
		`+ else:`
		`+ cleaned_url = r.url`
		`+`
		`+ # Some more cleaning`
		`+ # https://fedoraproject.org/w/index.php?title=Test_Day:2018-04-11_Cloud-Atomic_Testday`
		`+ # to`
		`+ # https://fedoraproject.org/Wiki/Test_Day:2018-04-11_Cloud-Atomic_Testday`
		`+ cleaned_url = cleaned_url.replace("fedoraproject.org/w/index.php?title=", "fedoraproject.org/wiki/")`
		`+`
		`+ # Parse dates`
		`+ start, end = wiki.get_td_dates(cleaned_url)`
		`+`
		`+ if start == datetime.datetime(1900, 1, 1, 0, 0):`
		`+ print("Could not parse event's date, using default")`
		`+`
		`+ event.testday_start = start`
		`+ event.testday_end = end`
		`+ session.commit()`
		`+`
		`+ def upgrade():`
		`+ # ### commands auto generated by Alembic - please adjust! ###`
		`+ op.add_column('event', sa.Column('testday_end', sa.DateTime(), nullable=True))`
		`+ op.add_column('event', sa.Column('testday_start', sa.DateTime(), nullable=True))`
		`+ # ### end Alembic commands ###`
		`+`
		`+ # Process data changes`
		`+ process_past_event_dates()`
		`+`
		`+`
		`+`
		`+ def downgrade():`
		`+ # ### commands auto generated by Alembic - please adjust! ###`
		`+ op.drop_column('event', 'testday_start')`
		`+ op.drop_column('event', 'testday_end')`
		`+ # ### end Alembic commands ###`

testdays/controllers/admin.py

file modified

+7 -1

		`@@ -88,7 +88,8 @@`
		`if td_url.startswith("https"):`
		`td_url = td_url.replace("https", "http", 1)`

		`- event = Event(td_name, url, td_url)`
		`+ start, end = wiki.get_td_dates(td_url)`
		`+ event = Event(td_name, url, td_url, start, end)`

		`# Testcase Sections`
		`category = None`
		`@@ -172,6 +173,11 @@`

		`event.id = old_event.id`
		`event.resultsdb_job_uuid = old_event.resultsdb_job_uuid`
		`+ # Update start/end`
		`+ start, end = wiki.get_td_dates(event.testday_url)`
		`+ event.testday_start = start`
		`+ event.testday_end = end`
		`+`
		`db.session.merge(event)`
		`db.session.commit()`
		`flash('Event was successfully updated')`

testdays/lib/wiki.py

file modified

+41 -1

		`@@ -5,6 +5,45 @@`
		`from urllib.parse import urljoin`
		`import urllib`
		`import simplejson`
		`+ import re`
		`+ import datetime`
		`+`
		`+ def get_td_dates(url):`
		`+ """`
		`+ Parses infobox of testday wiki page, returns array of testday dates [start, end] or`
		`+ (datetime.datetime(1900, 1, 1, 0, 0), datetime.datetime(1900, 1, 1, 0, 0)) when parsing fails.`
		`+ """`
		`+ url = url.replace("http://", "https://")`
		`+ page = __strip_url(url)`
		`+ w = fedora.client.wiki.Wiki()`
		`+ params = {'action':'parse','format':'json','prop':'wikitext','page':'%s' % page}`
		`+ result = w.send_request('api.php', req_params=params)`
		`+ # Clean up api returned data for further processing through regex`
		`+ try:`
		`+ result = result["parse"]["wikitext"]["*"]`
		`+ except KeyError:`
		`+ return (datetime.datetime(1900, 1, 1, 0, 0), datetime.datetime(1900, 1, 1, 0, 0))`
		`+ result = result.replace("\n", "")`
		`+ result = result.replace("'", "")`
		`+ try:`
		`+ # Infobox contains string like '\| date = 2020-09-21 \| time' or '\| date = 2020-09-21 to 2020-09-22 \| time'`
		`+ # We're getting content from between '\| date = ' and ' \| time'`
		`+ dates = re.findall(r'date = (.*)\\| time', result)[0]`
		`+ except IndexError:`
		`+ return (datetime.datetime(1900, 1, 1, 0, 0), datetime.datetime(1900, 1, 1, 0, 0))`
		`+ if " to " in dates:`
		`+ dates = dates.split(" to ")`
		`+ else:`
		`+ dates = dates.split(" - ")`
		`+ try:`
		`+ dates = [datetime.datetime.strptime(date, "%Y-%m-%d") for date in dates]`
		`+ except ValueError:`
		`+ return (datetime.datetime(1900, 1, 1, 0, 0), datetime.datetime(1900, 1, 1, 0, 0))`
		`+ if len(dates) == 1:`
		`+ dates.append(dates[0])`
		`+ return dates`
		`+`
		`+`

		`def __strip_url(url):`
		`for s in ["https://fedoraproject.org/wiki/", fedora.client.wiki.Wiki().base_url]:`
		`@@ -70,6 +109,7 @@`

		`if __name__ == '__main__':`
		`import pprint`
		`- page = 'User:Jskladan/Sandbox:TestdayAppTemplate'`
		`+ import ipdb; ipdb.set_trace()`
		`+ page = 'Test_Day:Fedora_33_CoreOS_2020-11-06'`
		`print("Fetching metadata from %s" % page)`
		`pprint.pprint(get_page_section(page, 'TestdayApp Metadata'))`

testdays/models/testday.py

file modified

+5 -1

		`@@ -29,16 +29,20 @@`
		`name = db.Column(db.Text)`
		`metadata_url = db.Column(db.Text)`
		`testday_url = db.Column(db.Text)`
		`+ testday_start = db.Column(db.DateTime)`
		`+ testday_end = db.Column(db.DateTime)`
		`resultsdb_job_id = db.Column(db.Integer)`
		`resultsdb_job_uuid = db.Column(db.String(36))`
		`created_at = db.Column(db.DateTime, default = datetime.datetime.utcnow)`

		`categories = db.relation('Category', backref='event', order_by='Category.id')`

		`- def __init__(self, name, metadata_url, testday_url=None, resultsdb_job_uuid=None):`
		`+ def __init__(self, name, metadata_url, testday_url=None, testday_start=None, testday_end=None, resultsdb_job_uuid=None):`
		`self.name = name`
		`self.metadata_url = metadata_url`
		`self.testday_url = testday_url`
		`+ self.testday_start = testday_start`
		`+ self.testday_end = testday_end`
		`self.resultsdb_job_uuid = resultsdb_job_uuid`

		`class Category(db.Model):`

frantisekz commented 3 years ago

This will be split into multiple Pull Requests.

In the first one, the aim is to add functions to parse testday date ranges from wiki, update db models, wire the parsing to testday creation (not tested yet) and wire up code to update past testdays (not tested yet) .

And finally, I'll change the UI (with other tweaks) to distinguish active and past testdays.

rebased onto 8c2c42d

3 years ago

1 new commit added

Process past events

3 years ago

2 new commits added

Process past events
Parse and store Event dates

3 years ago

jskladan commented on line 13 of testdays/cli.py 3 years ago

I don't see how this could work, if you are on the 1e0fb82777af revision, the Event table does not have the start/end columns yet.

If you want to retrospectively fill the dates, do it as a part of the DB schema upgrade.

jskladan commented on line 13 of testdays/cli.py 3 years ago

Ah, nevermind, the current_rev is filled before the upgrade... I'd still rather see the misc.testdays_process_dates() call in the migration script, though

jskladan commented on line 8 of testdays/lib/wiki.py 3 years ago

Since you are using the method out of the wiki.py scope, please remove the __ prefix.

jskladan commented on line 6 of testdays/controllers/admin.py 3 years ago

How about start, end = ... instead? May not be the best option, given how you use the data later on, just pointing to the option of "better readability".

Edited 3 years ago by jskladan

jskladan commented on line 13 of testdays/cli.py 3 years ago

Also, since the testdays_process_dates() is only used here (to actually "fill in the missing dates"), consider renaming it to something more indicative of what it does.

jskladan commented on line 9 of testdays/lib/misc.py 3 years ago

As I already mentioned elsewhere, since this is only really ever used to "fill in the data for old testdays", consider moving this code into the migration script instead. Would make way more sense than the current solution.

jskladan commented on line 8 of testdays/lib/wiki.py 3 years ago

In general, rather than returing None, consider raising a custom exception instead. That IMO makes more sense in the context of what the code does, and the code "working around" the "it either returns None or a tuple with the dates" concept would also get nicer.

jskladan commented on line 14 of testdays/models/testday.py 3 years ago

I don't really like this. I understand the "saving characters" motivation, but adding an arbitrary attribute to the class is not the best solution to the problem at hand.

I'd rather see something in the line of:

def __init__(self, name, metadata_url, testday_url=None, testday_start=None, testday_end=None, resultsdb_job_uuid=None):
    self.testday_start = testday_start or datetime.datetime(1900, 1, 1, 0, 0)
    self.testday_end = testday_end or datetime.datetime(1900, 1, 1, 0, 0)

jskladan commented on line 31 of testdays/lib/misc.py 3 years ago

Not a huge fan of the blanket except statement. Ideally just catch what is necessary, if that's not possible, then explain the reasoning in a comment, and at least log the exception, so one could investigate.

1 new commit added