From 8936e0497bba79522317e02d74c2c4cb5b944ad4 Mon Sep 17 00:00:00 2001 From: Matthew Miller Date: Jun 19 2020 17:23:50 +0000 Subject: rename --- diff --git a/README.md b/README.md new file mode 100644 index 0000000..b71e838 --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +No dinosaurs were harmed in the making of this script. + +All slicing is completely nondestructive. + +Going back in time and hunting dinosaurs is known to not work out. + +We would never. \ No newline at end of file diff --git a/brontosaurus-extractor.py b/brontosaurus-extractor.py deleted file mode 100755 index 2cf8bc0..0000000 --- a/brontosaurus-extractor.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/python3 - -import os -import sqlite3 -import pprint - -import pandas -import numpy as np -import matplotlib as m -m.use("Agg") -import matplotlib.pyplot as plt -import matplotlib.dates as dates - -m.style.use('seaborn-whitegrid') -m.rcParams['font.size'] = 12 -m.rcParams['font.family'] = 'Overpass' -m.rcParams['legend.frameon'] = False -figsize=(16, 9) -dpi=300 - - -DATAFILE = 'db/demo-data.db' -MIN_THRESHOLD = 200 - -DATABASE = sqlite3.connect(DATAFILE,detect_types=sqlite3.PARSE_DECLTYPES) - - -def get_fedora_versions(): - """Returns a list of Fedora versions present in the dataset. Leaves out versions where the total number of entries is less than global MIN_THRESHOLD""" - c = DATABASE.cursor() - c.execute("""SELECT os_version,SUM(count) FROM countme_totals WHERE - os_name='Fedora' AND - repo_tag='updates-released-f'||os_version AND - os_arch=repo_arch - GROUP BY os_version""") - l = [] - for version,count in c.fetchall(): - if count >= MIN_THRESHOLD: - l.append(version) - return l - -def get_query_for(column): - """Returns a query string for the proper column.""" - # Constructing SQL query strings is Considered Bad, but here we are only - # using four pre-set values (count, countme, os_variant, os_arch) and therefore - # It's Fine -- What Could Go Wrong? - assert column == 'total' or column == 'countme' or column == 'os_variant' or column == 'os_arch' - - rawstring = "SELECT weeknum,SUM(count) AS sum FROM countme_totals WHERE os_name='Fedora' AND os_version=:version AND repo_tag='updates-released-f'||os_version {} GROUP BY weeknum ORDER BY weeknum" - - if column == 'total': - querystring = rawstring.format("") - else: - querystring = rawstring.format("AND " + column + "=:match") - - - return querystring - - -def get_conditional_total(version, selector, match): - """Returns a dataframe with weeknum as index and the total results for the given fedora version condition""" - - df = pandas.read_sql_query(get_query_for(selector),params={"version":version,"match":match},con=DATABASE,index_col='weeknum'); - df.fillna(value=0,inplace=True) - - if match == None: - df.rename(columns={"sum": selector},inplace=True) - else: - df.rename(columns={"sum": selector + "=" + match},inplace=True) - - return df.astype('int') - - - -fedora_dataframes = {} - -for version in get_fedora_versions(): - - # start by inserting the total - fedora_dataframes[version] = get_conditional_total(version,'total',None) - - # now add the four countme possibilities as their own columns - for countme in ['1','2','3','4']: - fedora_dataframes[version] = fedora_dataframes[version].join(get_conditional_total(version,'countme',countme),on='weeknum').fillna(value=0).astype('int') - - print("Fedora {0}:".format(version)) - pprint.pprint(fedora_dataframes[version]) - print("-----------------------------") - - -DATABASE.close() - \ No newline at end of file diff --git a/brontosaurus-slicer.py b/brontosaurus-slicer.py new file mode 100755 index 0000000..2cf8bc0 --- /dev/null +++ b/brontosaurus-slicer.py @@ -0,0 +1,92 @@ +#!/usr/bin/python3 + +import os +import sqlite3 +import pprint + +import pandas +import numpy as np +import matplotlib as m +m.use("Agg") +import matplotlib.pyplot as plt +import matplotlib.dates as dates + +m.style.use('seaborn-whitegrid') +m.rcParams['font.size'] = 12 +m.rcParams['font.family'] = 'Overpass' +m.rcParams['legend.frameon'] = False +figsize=(16, 9) +dpi=300 + + +DATAFILE = 'db/demo-data.db' +MIN_THRESHOLD = 200 + +DATABASE = sqlite3.connect(DATAFILE,detect_types=sqlite3.PARSE_DECLTYPES) + + +def get_fedora_versions(): + """Returns a list of Fedora versions present in the dataset. Leaves out versions where the total number of entries is less than global MIN_THRESHOLD""" + c = DATABASE.cursor() + c.execute("""SELECT os_version,SUM(count) FROM countme_totals WHERE + os_name='Fedora' AND + repo_tag='updates-released-f'||os_version AND + os_arch=repo_arch + GROUP BY os_version""") + l = [] + for version,count in c.fetchall(): + if count >= MIN_THRESHOLD: + l.append(version) + return l + +def get_query_for(column): + """Returns a query string for the proper column.""" + # Constructing SQL query strings is Considered Bad, but here we are only + # using four pre-set values (count, countme, os_variant, os_arch) and therefore + # It's Fine -- What Could Go Wrong? + assert column == 'total' or column == 'countme' or column == 'os_variant' or column == 'os_arch' + + rawstring = "SELECT weeknum,SUM(count) AS sum FROM countme_totals WHERE os_name='Fedora' AND os_version=:version AND repo_tag='updates-released-f'||os_version {} GROUP BY weeknum ORDER BY weeknum" + + if column == 'total': + querystring = rawstring.format("") + else: + querystring = rawstring.format("AND " + column + "=:match") + + + return querystring + + +def get_conditional_total(version, selector, match): + """Returns a dataframe with weeknum as index and the total results for the given fedora version condition""" + + df = pandas.read_sql_query(get_query_for(selector),params={"version":version,"match":match},con=DATABASE,index_col='weeknum'); + df.fillna(value=0,inplace=True) + + if match == None: + df.rename(columns={"sum": selector},inplace=True) + else: + df.rename(columns={"sum": selector + "=" + match},inplace=True) + + return df.astype('int') + + + +fedora_dataframes = {} + +for version in get_fedora_versions(): + + # start by inserting the total + fedora_dataframes[version] = get_conditional_total(version,'total',None) + + # now add the four countme possibilities as their own columns + for countme in ['1','2','3','4']: + fedora_dataframes[version] = fedora_dataframes[version].join(get_conditional_total(version,'countme',countme),on='weeknum').fillna(value=0).astype('int') + + print("Fedora {0}:".format(version)) + pprint.pprint(fedora_dataframes[version]) + print("-----------------------------") + + +DATABASE.close() + \ No newline at end of file