PR#10: Added basic code-style validation - fedora-contributor-trends

fedora-contributor-trends

#10 Added basic code-style validation

Merged 3 years ago by mattdm. Opened 3 years ago by josseline.

josseline/fedora-contributor-trends basic-validation into main

Fixed all the codestyles error and added flake8 config file

Josseline Perdomo • 3 years ago

02b8055

.flake8

file added

		`@@ -0,0 +1,3 @@`
		`+ [flake8]`
		`+ max-line-length = 120`
		`+ exclude = .git, __pycache__, env, venv, .env, .venv, ENV, env.bak, venv.bak`

fedmsgconfig.py

file modified

+1 -1

		`@@ -1,1 +1,1 @@`
		`- config = {'endpoints': {}}`
		`\ No newline at end of file`
		`+ config = {"endpoints": {}}`

generate-activity-charts.py

file modified

+341 -212

		`@@ -1,305 +1,434 @@`
		`#!/usr/bin/python3`
		`import os`
		`- import pandas`
		`+`
		`import matplotlib as m`
		`+ import matplotlib.pyplot as plt`
		`+ import pandas`
		`+`
		`m.use("Agg")`
		`- import matplotlib.pyplot as plt`
		`- m.rcParams['font.size'] = 12`
		`- m.rcParams['font.family'] = 'Overpass'`
		`- m.rcParams['legend.frameon'] = False`
		`+`
		`+ m.rcParams["font.size"] = 12`
		`+ m.rcParams["font.family"] = "Overpass"`
		`+ m.rcParams["legend.frameon"] = False`

		`try:`
		`- os.makedirs('./images')`
		`+ os.makedirs("./images")`
		`except OSError:`
		`pass`

		`- datagit=pandas.read_csv("data/org.fedoraproject.prod.git.receive.bucketed-activity.csv",parse_dates=[0])`
		`- datagit.set_index('weekstart',inplace=True)`
		`-`
		`- graph=datagit[['users1','users9','users40','userrest']].rename(columns={"users1": "Top 1%","users9":"Top 9%","users40":"Top 40%","userrest":"Remaining 50%"}).plot.area(figsize=(16, 9),`
		`- color=['#579d1c','#ffd320', '#ff420e', '#004586' ],`
		`- grid=True,yticks=range(0,301,25))`
		`- #graph.legend(ncol=4)`
		`+ datagit = pandas.read_csv(`
		`+ "data/org.fedoraproject.prod.git.receive.bucketed-activity.csv", parse_dates=[0]`
		`+ )`
		`+ datagit.set_index("weekstart", inplace=True)`
		`+`
		`+ graph = (`
		`+ datagit[["users1", "users9", "users40", "userrest"]]`
		`+ .rename(`
		`+ columns={`
		`+ "users1": "Top 1%",`
		`+ "users9": "Top 9%",`
		`+ "users40": "Top 40%",`
		`+ "userrest": "Remaining 50%",`
		`+ }`
		`+ )`
		`+ .plot.area(`
		`+ figsize=(16, 9),`
		`+ color=["#579d1c", "#ffd320", "#ff420e", "#004586"],`
		`+ grid=True,`
		`+ yticks=range(0, 301, 25),`
		`+ )`
		`+ )`
		`+ # graph.legend(ncol=4)`
		`# totally abusing this.`
		`- plt.suptitle("Number of Contributors Making Changes to Packages Each Week",fontsize=24)`
		`- graph.set_title("Grouped by Quarterly Activity Level of Each Contributor",fontsize=16)`
		`- graph.set_xlabel('')`
		`- fig=graph.get_figure()`
		`- fig.savefig('images/git.user.count.svg',dpi=300)`
		`+ plt.suptitle("Number of Contributors Making Changes to Packages Each Week", fontsize=24)`
		`+ graph.set_title("Grouped by Quarterly Activity Level of Each Contributor", fontsize=16)`
		`+ graph.set_xlabel("")`
		`+ fig = graph.get_figure()`
		`+ fig.savefig("images/git.user.count.svg", dpi=300)`

		`#############################################`

		`- datagit['msgstotal']=datagit[['msgs1','msgs9','msgs40','msgsrest']].sum(1)`
		`- datagit['msgs1%']=100*datagit['msgs1']/datagit['msgstotal']`
		`- datagit['msgs9%']=100*datagit['msgs9']/datagit['msgstotal']`
		`- datagit['msgs40%']=100*datagit['msgs40']/datagit['msgstotal']`
		`- datagit['msgsrest%']=100*datagit['msgsrest']/datagit['msgstotal']`
		`-`
		`-`
		`-`
		`-`
		`- m.rcParams['legend.frameon'] = True`
		`- graph=datagit[['msgs1%','msgs9%','msgs40%','msgsrest%']].rename(columns={"msgs1%": "Top 1%","msgs9%":"Top 9%","msgs40%":"Top 40%","msgsrest%":"Remaining 50%"}).plot.area(figsize=(16, 9),`
		`- color=['#579d1c','#ffd320', '#ff420e', '#004586' ],`
		`- grid=True,ylim=(0,100))`
		`- plt.suptitle("Percent of Package Changes Each Week From Each Activity Level Group",fontsize=24)`
		`- graph.set_title("",fontsize=16)`
		`- graph.set_xlabel('')`
		`-`
		`- fig=graph.get_figure()`
		`- fig.savefig('images/git.activity.share.svg',dpi=300)`
		`+ datagit["msgstotal"] = datagit[["msgs1", "msgs9", "msgs40", "msgsrest"]].sum(1)`
		`+ datagit["msgs1%"] = 100 * datagit["msgs1"] / datagit["msgstotal"]`
		`+ datagit["msgs9%"] = 100 * datagit["msgs9"] / datagit["msgstotal"]`
		`+ datagit["msgs40%"] = 100 * datagit["msgs40"] / datagit["msgstotal"]`
		`+ datagit["msgsrest%"] = 100 * datagit["msgsrest"] / datagit["msgstotal"]`
		`+`
		`+`
		`+ m.rcParams["legend.frameon"] = True`
		`+ graph = (`
		`+ datagit[["msgs1%", "msgs9%", "msgs40%", "msgsrest%"]]`
		`+ .rename(`
		`+ columns={`
		`+ "msgs1%": "Top 1%",`
		`+ "msgs9%": "Top 9%",`
		`+ "msgs40%": "Top 40%",`
		`+ "msgsrest%": "Remaining 50%",`
		`+ }`
		`+ )`
		`+ .plot.area(`
		`+ figsize=(16, 9),`
		`+ color=["#579d1c", "#ffd320", "#ff420e", "#004586"],`
		`+ grid=True,`
		`+ ylim=(0, 100),`
		`+ )`
		`+ )`
		`+ plt.suptitle(`
		`+ "Percent of Package Changes Each Week From Each Activity Level Group", fontsize=24`
		`+ )`
		`+ graph.set_title("", fontsize=16)`
		`+ graph.set_xlabel("")`
		`+`
		`+ fig = graph.get_figure()`
		`+ fig.savefig("images/git.activity.share.svg", dpi=300)`

		`###############################################`

		`- #graph=datagit[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),`
		`+ # graph=datagit[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),`
		`# color='#579d1c',`
		`# grid=True,legend=False)`
		`- #plt.suptitle("New Packaging Contributor Count Per Week",fontsize=24)`
		`- #graph.set_title('')`
		`- #graph.set_xlabel('')`
		`- #fig=graph.get_figure()`
		`- #fig.savefig('images/git.newusers.svg',dpi=300)`
		`+ # plt.suptitle("New Packaging Contributor Count Per Week",fontsize=24)`
		`+ # graph.set_title('')`
		`+ # graph.set_xlabel('')`
		`+ # fig=graph.get_figure()`
		`+ # fig.savefig('images/git.newusers.svg',dpi=300)`

		`#############################################`

		`- #datagit['newuseractions%']=100*datagit['newuseractions']/datagit['msgstotal']`
		`- #datagit['monthuseractions%']=100*datagit['monthuseractions']/datagit['msgstotal']`
		`- #datagit['yearuseractions%']=100*datagit['yearuseractions']/datagit['msgstotal']`
		`- #datagit['olderuseractions%']=100*datagit['olderuseractions']/datagit['msgstotal']`
		`-`
		`-`
		`+ # datagit['newuseractions%']=100*datagit['newuseractions']/datagit['msgstotal']`
		`+ # datagit['monthuseractions%']=100*datagit['monthuseractions']/datagit['msgstotal']`
		`+ # datagit['yearuseractions%']=100*datagit['yearuseractions']/datagit['msgstotal']`
		`+ # datagit['olderuseractions%']=100*datagit['olderuseractions']/datagit['msgstotal']`


		`- #m.rcParams['legend.frameon'] = True`
		`- #graph=datagit[['newuseractions%','monthuseractions%','yearuseractions%','olderuseractions%']][42:].rename(columns={"newuseractions%": "New This Week","monthuseractions%":"New This Month","yearuseractions%":"New This Year","olderuseractions%":"Old School"}).plot.area(figsize=(16, 9),`
		`+ # m.rcParams['legend.frameon'] = True`
		`+ # graph=datagit[['newuseractions%','monthuseractions%','yearuseractions%','olderuseractions%']][42:]`
		`+ # .rename(columns={"newuseractions%": "New This Week","monthuseractions%":"New This Month",`
		`+ # "yearuseractions%":"New This Year","olderuseractions%":"Old School"})`
		`+ # .plot.area(figsize=(16, 9),`
		`# color=['#579d1c','#ffd320', '#ff420e', '#004586' ],`
		`# grid=True,ylim=(0,100))`
		`- #plt.suptitle("Percent of Package Changes Each Week By Time Since Packager's First Action",fontsize=24)`
		`- #graph.set_title("",fontsize=16)`
		`- #graph.set_xlabel('')`
		`+ # plt.suptitle("Percent of Package Changes Each Week By Time Since Packager's First Action",fontsize=24)`
		`+ # graph.set_title("",fontsize=16)`
		`+ # graph.set_xlabel('')`
		`#`
		`- #fig=graph.get_figure()`
		`- #fig.savefig('images/git.activity.length.svg',dpi=300)`
		`+ # fig=graph.get_figure()`
		`+ # fig.savefig('images/git.activity.length.svg',dpi=300)`
		`#`
		`################################################################################################################`
		`################################################################################################################`

		`- databodhi=pandas.read_csv("data/org.fedoraproject.prod.bodhi.update.comment.bucketed-activity.csv",parse_dates=[0])`
		`- databodhi.set_index('weekstart',inplace=True)`
		`-`
		`- graph=databodhi[['users1','users9','users40','userrest']].rename(columns={"users1": "Top 1%","users9":"Top 9%","users40":"Top 40%","userrest":"Remaining 50%"}).plot.area(figsize=(16, 9),`
		`- color=['#579d1c','#ffd320', '#ff420e', '#004586' ],`
		`- grid=True,yticks=range(0,301,25))`
		`- #graph.legend(ncol=4)`
		`+ databodhi = pandas.read_csv(`
		`+ "data/org.fedoraproject.prod.bodhi.update.comment.bucketed-activity.csv",`
		`+ parse_dates=[0],`
		`+ )`
		`+ databodhi.set_index("weekstart", inplace=True)`
		`+`
		`+ graph = (`
		`+ databodhi[["users1", "users9", "users40", "userrest"]]`
		`+ .rename(`
		`+ columns={`
		`+ "users1": "Top 1%",`
		`+ "users9": "Top 9%",`
		`+ "users40": "Top 40%",`
		`+ "userrest": "Remaining 50%",`
		`+ }`
		`+ )`
		`+ .plot.area(`
		`+ figsize=(16, 9),`
		`+ color=["#579d1c", "#ffd320", "#ff420e", "#004586"],`
		`+ grid=True,`
		`+ yticks=range(0, 301, 25),`
		`+ )`
		`+ )`
		`+ # graph.legend(ncol=4)`
		`# totally abusing this.`
		`- plt.suptitle("Number of Contributors Providing Feedback on Package Updates Each Week",fontsize=24)`
		`- graph.set_title("Grouped by Quarterly Activity Level of Each Contributor",fontsize=16)`
		`- graph.set_xlabel('')`
		`- fig=graph.get_figure()`
		`- fig.savefig('images/bodhi.user.count.svg',dpi=300)`
		`+ plt.suptitle(`
		`+ "Number of Contributors Providing Feedback on Package Updates Each Week",`
		`+ fontsize=24,`
		`+ )`
		`+ graph.set_title("Grouped by Quarterly Activity Level of Each Contributor", fontsize=16)`
		`+ graph.set_xlabel("")`
		`+ fig = graph.get_figure()`
		`+ fig.savefig("images/bodhi.user.count.svg", dpi=300)`

		`#############################################`

		`- databodhi['msgstotal']=databodhi[['msgs1','msgs9','msgs40','msgsrest']].sum(1)`
		`- databodhi['msgs1%']=100*databodhi['msgs1']/databodhi['msgstotal']`
		`- databodhi['msgs9%']=100*databodhi['msgs9']/databodhi['msgstotal']`
		`- databodhi['msgs40%']=100*databodhi['msgs40']/databodhi['msgstotal']`
		`- databodhi['msgsrest%']=100*databodhi['msgsrest']/databodhi['msgstotal']`
		`-`
		`-`
		`-`
		`-`
		`- m.rcParams['legend.frameon'] = True`
		`- graph=databodhi[['msgs1%','msgs9%','msgs40%','msgsrest%']].rename(columns={"msgs1%": "Top 1%","msgs9%":"Top 9%","msgs40%":"Top 40%","msgsrest%":"Remaining 50%"}).plot.area(figsize=(16, 9),`
		`- color=['#579d1c','#ffd320', '#ff420e', '#004586' ],`
		`- grid=True,ylim=(0,100))`
		`- plt.suptitle("Percent of Update Feedback Each Week From Each Activity Level Group",fontsize=24)`
		`- graph.set_title("",fontsize=16)`
		`- graph.set_xlabel('')`
		`-`
		`- fig=graph.get_figure()`
		`- fig.savefig('images/bodhi.activity.share.svg',dpi=300)`
		`+ databodhi["msgstotal"] = databodhi[["msgs1", "msgs9", "msgs40", "msgsrest"]].sum(1)`
		`+ databodhi["msgs1%"] = 100 * databodhi["msgs1"] / databodhi["msgstotal"]`
		`+ databodhi["msgs9%"] = 100 * databodhi["msgs9"] / databodhi["msgstotal"]`
		`+ databodhi["msgs40%"] = 100 * databodhi["msgs40"] / databodhi["msgstotal"]`
		`+ databodhi["msgsrest%"] = 100 * databodhi["msgsrest"] / databodhi["msgstotal"]`
		`+`
		`+`
		`+ m.rcParams["legend.frameon"] = True`
		`+ graph = (`
		`+ databodhi[["msgs1%", "msgs9%", "msgs40%", "msgsrest%"]]`
		`+ .rename(`
		`+ columns={`
		`+ "msgs1%": "Top 1%",`
		`+ "msgs9%": "Top 9%",`
		`+ "msgs40%": "Top 40%",`
		`+ "msgsrest%": "Remaining 50%",`
		`+ }`
		`+ )`
		`+ .plot.area(`
		`+ figsize=(16, 9),`
		`+ color=["#579d1c", "#ffd320", "#ff420e", "#004586"],`
		`+ grid=True,`
		`+ ylim=(0, 100),`
		`+ )`
		`+ )`
		`+ plt.suptitle(`
		`+ "Percent of Update Feedback Each Week From Each Activity Level Group", fontsize=24`
		`+ )`
		`+ graph.set_title("", fontsize=16)`
		`+ graph.set_xlabel("")`
		`+`
		`+ fig = graph.get_figure()`
		`+ fig.savefig("images/bodhi.activity.share.svg", dpi=300)`

		`###############################################`

		`- #graph=databodhi[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),`
		`+ # graph=databodhi[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),`
		`# color='#579d1c',`
		`# grid=True,legend=False)`
		`- #plt.suptitle("New Update Testing Contributor Count Per Week",fontsize=24)`
		`- #graph.set_title('')`
		`- #graph.set_xlabel('')`
		`- #fig=graph.get_figure()`
		`- #fig.savefig('images/bodhi.newusers.svg',dpi=300)`
		`+ # plt.suptitle("New Update Testing Contributor Count Per Week",fontsize=24)`
		`+ # graph.set_title('')`
		`+ # graph.set_xlabel('')`
		`+ # fig=graph.get_figure()`
		`+ # fig.savefig('images/bodhi.newusers.svg',dpi=300)`

		`#############################################`

		`- #databodhi['newuseractions%']=100*databodhi['newuseractions']/databodhi['msgstotal']`
		`- #databodhi['monthuseractions%']=100*databodhi['monthuseractions']/databodhi['msgstotal']`
		`- #databodhi['yearuseractions%']=100*databodhi['yearuseractions']/databodhi['msgstotal']`
		`- #databodhi['olderuseractions%']=100*databodhi['olderuseractions']/databodhi['msgstotal']`
		`+ # databodhi['newuseractions%']=100*databodhi['newuseractions']/databodhi['msgstotal']`
		`+ # databodhi['monthuseractions%']=100*databodhi['monthuseractions']/databodhi['msgstotal']`
		`+ # databodhi['yearuseractions%']=100*databodhi['yearuseractions']/databodhi['msgstotal']`
		`+ # databodhi['olderuseractions%']=100*databodhi['olderuseractions']/databodhi['msgstotal']`


		`-`
		`-`
		`- #m.rcParams['legend.frameon'] = True`
		`- #graph=databodhi[['newuseractions%','monthuseractions%','yearuseractions%','olderuseractions%']][42:].rename(columns={"newuseractions%": "New This Week","monthuseractions%":"New This Month","yearuseractions%":"New This Year","olderuseractions%":"Old School"}).plot.area(figsize=(16, 9),`
		`+ # m.rcParams['legend.frameon'] = True`
		`+ # graph=databodhi[['newuseractions%','monthuseractions%','yearuseractions%','olderuseractions%']][42:]`
		`+ # .rename(columns={"newuseractions%": "New This Week","monthuseractions%":"New This Month",`
		`+ # "yearuseractions%":"New This Year","olderuseractions%":"Old School"}).plot.area(figsize=(16, 9),`
		`# color=['#579d1c','#ffd320', '#ff420e', '#004586' ],`
		`# grid=True,ylim=(0,100))`
		`- #plt.suptitle("Percent of Update Feedback Each Week By Time Since Packager's First Action",fontsize=24)`
		`- #graph.set_title("",fontsize=16)`
		`- #graph.set_xlabel('')`
		`+ # plt.suptitle("Percent of Update Feedback Each Week By Time Since Packager's First Action",fontsize=24)`
		`+ # graph.set_title("",fontsize=16)`
		`+ # graph.set_xlabel('')`
		`#`
		`- #fig=graph.get_figure()`
		`- #fig.savefig('images/bodhi.activity.length.svg',dpi=300)`
		`+ # fig=graph.get_figure()`
		`+ # fig.savefig('images/bodhi.activity.length.svg',dpi=300)`


		`################################################################################################################`
		`################################################################################################################`

		`- datawiki=pandas.read_csv("data/org.fedoraproject.prod.wiki.article.edit.bucketed-activity.csv",parse_dates=[0])`
		`- datawiki.set_index('weekstart',inplace=True)`
		`-`
		`- graph=datawiki[['users1','users9','users40','userrest']].rename(columns={"users1": "Top 1%","users9":"Top 9%","users40":"Top 40%","userrest":"Remaining 50%"}).plot.area(figsize=(16, 9),`
		`- color=['#579d1c','#ffd320', '#ff420e', '#004586' ],`
		`- grid=True,yticks=range(0,301,25))`
		`- #graph.legend(ncol=4)`
		`+ datawiki = pandas.read_csv(`
		`+ "data/org.fedoraproject.prod.wiki.article.edit.bucketed-activity.csv",`
		`+ parse_dates=[0],`
		`+ )`
		`+ datawiki.set_index("weekstart", inplace=True)`
		`+`
		`+ graph = (`
		`+ datawiki[["users1", "users9", "users40", "userrest"]]`
		`+ .rename(`
		`+ columns={`
		`+ "users1": "Top 1%",`
		`+ "users9": "Top 9%",`
		`+ "users40": "Top 40%",`
		`+ "userrest": "Remaining 50%",`
		`+ }`
		`+ )`
		`+ .plot.area(`
		`+ figsize=(16, 9),`
		`+ color=["#579d1c", "#ffd320", "#ff420e", "#004586"],`
		`+ grid=True,`
		`+ yticks=range(0, 301, 25),`
		`+ )`
		`+ )`
		`+ # graph.legend(ncol=4)`
		`# totally abusing this.`
		`- plt.suptitle("Number of Wiki Editors Each Week",fontsize=24)`
		`- graph.set_title("Grouped by Quarterly Activity Level of Each Contributor",fontsize=16)`
		`- graph.set_xlabel('')`
		`- fig=graph.get_figure()`
		`- fig.savefig('images/wiki.user.count.svg',dpi=300)`
		`+ plt.suptitle("Number of Wiki Editors Each Week", fontsize=24)`
		`+ graph.set_title("Grouped by Quarterly Activity Level of Each Contributor", fontsize=16)`
		`+ graph.set_xlabel("")`
		`+ fig = graph.get_figure()`
		`+ fig.savefig("images/wiki.user.count.svg", dpi=300)`

		`#############################################`

		`- datawiki['msgstotal']=datawiki[['msgs1','msgs9','msgs40','msgsrest']].sum(1)`
		`- datawiki['msgs1%']=100*datawiki['msgs1']/datawiki['msgstotal']`
		`- datawiki['msgs9%']=100*datawiki['msgs9']/datawiki['msgstotal']`
		`- datawiki['msgs40%']=100*datawiki['msgs40']/datawiki['msgstotal']`
		`- datawiki['msgsrest%']=100*datawiki['msgsrest']/datawiki['msgstotal']`
		`-`
		`-`
		`-`
		`-`
		`- m.rcParams['legend.frameon'] = True`
		`- graph=datawiki[['msgs1%','msgs9%','msgs40%','msgsrest%']].rename(columns={"msgs1%": "Top 1%","msgs9%":"Top 9%","msgs40%":"Top 40%","msgsrest%":"Remaining 50%"}).plot.area(figsize=(16, 9),`
		`- color=['#579d1c','#ffd320', '#ff420e', '#004586' ],`
		`- grid=True,ylim=(0,100))`
		`- plt.suptitle("Percent of Wiki Edits Each Week From Each Activity Level Group",fontsize=24)`
		`- graph.set_title("",fontsize=16)`
		`- graph.set_xlabel('')`
		`-`
		`- fig=graph.get_figure()`
		`- fig.savefig('images/wiki.activity.share.svg',dpi=300)`
		`+ datawiki["msgstotal"] = datawiki[["msgs1", "msgs9", "msgs40", "msgsrest"]].sum(1)`
		`+ datawiki["msgs1%"] = 100 * datawiki["msgs1"] / datawiki["msgstotal"]`
		`+ datawiki["msgs9%"] = 100 * datawiki["msgs9"] / datawiki["msgstotal"]`
		`+ datawiki["msgs40%"] = 100 * datawiki["msgs40"] / datawiki["msgstotal"]`
		`+ datawiki["msgsrest%"] = 100 * datawiki["msgsrest"] / datawiki["msgstotal"]`
		`+`
		`+`
		`+ m.rcParams["legend.frameon"] = True`
		`+ graph = (`
		`+ datawiki[["msgs1%", "msgs9%", "msgs40%", "msgsrest%"]]`
		`+ .rename(`
		`+ columns={`
		`+ "msgs1%": "Top 1%",`
		`+ "msgs9%": "Top 9%",`
		`+ "msgs40%": "Top 40%",`
		`+ "msgsrest%": "Remaining 50%",`
		`+ }`
		`+ )`
		`+ .plot.area(`
		`+ figsize=(16, 9),`
		`+ color=["#579d1c", "#ffd320", "#ff420e", "#004586"],`
		`+ grid=True,`
		`+ ylim=(0, 100),`
		`+ )`
		`+ )`
		`+ plt.suptitle(`
		`+ "Percent of Wiki Edits Each Week From Each Activity Level Group", fontsize=24`
		`+ )`
		`+ graph.set_title("", fontsize=16)`
		`+ graph.set_xlabel("")`
		`+`
		`+ fig = graph.get_figure()`
		`+ fig.savefig("images/wiki.activity.share.svg", dpi=300)`

		`###############################################`

		`- #graph=datawiki[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),`
		`+ # graph=datawiki[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),`
		`# color='#579d1c',`
		`# grid=True,legend=False)`
		`- #plt.suptitle("New Wiki Contributor Count Per Week",fontsize=24)`
		`- #graph.set_title('')`
		`- #graph.set_xlabel('')`
		`- #fig=graph.get_figure()`
		`- #fig.savefig('images/wiki.newusers.svg',dpi=300)`
		`+ # plt.suptitle("New Wiki Contributor Count Per Week",fontsize=24)`
		`+ # graph.set_title('')`
		`+ # graph.set_xlabel('')`
		`+ # fig=graph.get_figure()`
		`+ # fig.savefig('images/wiki.newusers.svg',dpi=300)`
		`###############################################`

		`- #graph=datawiki[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),`
		`+ # graph=datawiki[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),`
		`# color='#579d1c',`
		`# grid=True,legend=False)`
		`- #plt.suptitle("New Wiki Contributor Count Per Week",fontsize=24)`
		`- #graph.set_title('')`
		`- #graph.set_xlabel('')`
		`- #fig=graph.get_figure()`
		`- #fig.savefig('images/wiki.newusers.svg',dpi=300)`
		`+ # plt.suptitle("New Wiki Contributor Count Per Week",fontsize=24)`
		`+ # graph.set_title('')`
		`+ # graph.set_xlabel('')`
		`+ # fig=graph.get_figure()`
		`+ # fig.savefig('images/wiki.newusers.svg',dpi=300)`

		`#############################################`

		`- #datawiki['newuseractions%']=100*datawiki['newuseractions']/datawiki['msgstotal']`
		`- #datawiki['monthuseractions%']=100*datawiki['monthuseractions']/datawiki['msgstotal']`
		`- #datawiki['yearuseractions%']=100*datawiki['yearuseractions']/datawiki['msgstotal']`
		`- #datawiki['olderuseractions%']=100*datawiki['olderuseractions']/datawiki['msgstotal']`
		`-`
		`+ # datawiki['newuseractions%']=100*datawiki['newuseractions']/datawiki['msgstotal']`
		`+ # datawiki['monthuseractions%']=100*datawiki['monthuseractions']/datawiki['msgstotal']`
		`+ # datawiki['yearuseractions%']=100*datawiki['yearuseractions']/datawiki['msgstotal']`
		`+ # datawiki['olderuseractions%']=100*datawiki['olderuseractions']/datawiki['msgstotal']`


		`-`
		`- #m.rcParams['legend.frameon'] = True`
		`- #graph=datawiki[['newuseractions%','monthuseractions%','yearuseractions%','olderuseractions%']][42:].rename(columns={"newuseractions%": "New This Week","monthuseractions%":"New This Month","yearuseractions%":"New This Year","olderuseractions%":"Old School"}).plot.area(figsize=(16, 9),`
		`+ # m.rcParams['legend.frameon'] = True`
		`+ # graph=datawiki[['newuseractions%','monthuseractions%','yearuseractions%','olderuseractions%']][42:]`
		`+ # .rename(columns={"newuseractions%": "New This Week","monthuseractions%":"New This Month",`
		`+ # "yearuseractions%":"New This Year","olderuseractions%":"Old School"}).plot.area(figsize=(16, 9),`
		`# color=['#579d1c','#ffd320', '#ff420e', '#004586' ],`
		`# grid=True,ylim=(0,100))`
		`- #plt.suptitle("Percent of Wiki Edits Each Week By Time Since Editor's First Edit",fontsize=24)`
		`- #graph.set_title("",fontsize=16)`
		`- #graph.set_xlabel('')`
		`+ # plt.suptitle("Percent of Wiki Edits Each Week By Time Since Editor's First Edit",fontsize=24)`
		`+ # graph.set_title("",fontsize=16)`
		`+ # graph.set_xlabel('')`
		`#`
		`- #fig=graph.get_figure()`
		`- #fig.savefig('images/wiki.activity.length.svg',dpi=300)`
		`+ # fig=graph.get_figure()`
		`+ # fig.savefig('images/wiki.activity.length.svg',dpi=300)`

		`###############################################`
		`###############################################`
		`- datapagure=pandas.read_csv("data/io.pagure.prod.pagure.git.receive.bucketed-activity.csv",parse_dates=[0])`
		`- datapagure.set_index('weekstart',inplace=True)`
		`-`
		`- graph=datapagure[['users1','users9','users40','userrest']].rename(columns={"users1": "Top 1%","users9":"Top 9%","users40":"Top 40%","userrest":"Remaining 50%"}).plot.area(figsize=(16, 9),`
		`- color=['#579d1c','#ffd320', '#ff420e', '#004586' ],`
		`- grid=True,yticks=range(0,25,5))`
		`- #graph.legend(ncol=4)`
		`+ datapagure = pandas.read_csv(`
		`+ "data/io.pagure.prod.pagure.git.receive.bucketed-activity.csv", parse_dates=[0]`
		`+ )`
		`+ datapagure.set_index("weekstart", inplace=True)`
		`+`
		`+ graph = (`
		`+ datapagure[["users1", "users9", "users40", "userrest"]]`
		`+ .rename(`
		`+ columns={`
		`+ "users1": "Top 1%",`
		`+ "users9": "Top 9%",`
		`+ "users40": "Top 40%",`
		`+ "userrest": "Remaining 50%",`
		`+ }`
		`+ )`
		`+ .plot.area(`
		`+ figsize=(16, 9),`
		`+ color=["#579d1c", "#ffd320", "#ff420e", "#004586"],`
		`+ grid=True,`
		`+ yticks=range(0, 25, 5),`
		`+ )`
		`+ )`
		`+ # graph.legend(ncol=4)`
		`# totally abusing this.`
		`- plt.suptitle("Number of Contributors Making Commits to Pagure Each Week",fontsize=24)`
		`- graph.set_title("Grouped by Quarterly Activity Level of Each Contributor",fontsize=16)`
		`- graph.set_xlabel('')`
		`- fig=graph.get_figure()`
		`- fig.savefig('images/pagure.user.count.svg',dpi=300)`
		`+ plt.suptitle("Number of Contributors Making Commits to Pagure Each Week", fontsize=24)`
		`+ graph.set_title("Grouped by Quarterly Activity Level of Each Contributor", fontsize=16)`
		`+ graph.set_xlabel("")`
		`+ fig = graph.get_figure()`
		`+ fig.savefig("images/pagure.user.count.svg", dpi=300)`

		`#############################################`

		`- datapagure['msgstotal']=datapagure[['msgs1','msgs9','msgs40','msgsrest']].sum(1)`
		`- datapagure['msgs1%']=100*datapagure['msgs1']/datapagure['msgstotal']`
		`- datapagure['msgs9%']=100*datapagure['msgs9']/datapagure['msgstotal']`
		`- datapagure['msgs40%']=100*datapagure['msgs40']/datapagure['msgstotal']`
		`- datapagure['msgsrest%']=100*datapagure['msgsrest']/datapagure['msgstotal']`
		`-`
		`-`
		`-`
		`-`
		`- m.rcParams['legend.frameon'] = True`
		`- graph=datapagure[['msgs1%','msgs9%','msgs40%','msgsrest%']].rename(columns={"msgs1%": "Top 1%","msgs9%":"Top 9%","msgs40%":"Top 40%","msgsrest%":"Remaining 50%"}).plot.area(figsize=(16, 9),`
		`- color=['#579d1c','#ffd320', '#ff420e', '#004586' ],`
		`- grid=True,ylim=(0,100))`
		`- plt.suptitle("Percent of Pagure Commits Each Week From Each Activity Level Group",fontsize=24)`
		`- graph.set_title("",fontsize=16)`
		`- graph.set_xlabel('')`
		`-`
		`- fig=graph.get_figure()`
		`- fig.savefig('images/pagure.activity.share.svg',dpi=300)`
		`+ datapagure["msgstotal"] = datapagure[["msgs1", "msgs9", "msgs40", "msgsrest"]].sum(1)`
		`+ datapagure["msgs1%"] = 100 * datapagure["msgs1"] / datapagure["msgstotal"]`
		`+ datapagure["msgs9%"] = 100 * datapagure["msgs9"] / datapagure["msgstotal"]`
		`+ datapagure["msgs40%"] = 100 * datapagure["msgs40"] / datapagure["msgstotal"]`
		`+ datapagure["msgsrest%"] = 100 * datapagure["msgsrest"] / datapagure["msgstotal"]`
		`+`
		`+`
		`+ m.rcParams["legend.frameon"] = True`
		`+ graph = (`
		`+ datapagure[["msgs1%", "msgs9%", "msgs40%", "msgsrest%"]]`
		`+ .rename(`
		`+ columns={`
		`+ "msgs1%": "Top 1%",`
		`+ "msgs9%": "Top 9%",`
		`+ "msgs40%": "Top 40%",`
		`+ "msgsrest%": "Remaining 50%",`
		`+ }`
		`+ )`
		`+ .plot.area(`
		`+ figsize=(16, 9),`
		`+ color=["#579d1c", "#ffd320", "#ff420e", "#004586"],`
		`+ grid=True,`
		`+ ylim=(0, 100),`
		`+ )`
		`+ )`
		`+ plt.suptitle(`
		`+ "Percent of Pagure Commits Each Week From Each Activity Level Group", fontsize=24`
		`+ )`
		`+ graph.set_title("", fontsize=16)`
		`+ graph.set_xlabel("")`
		`+`
		`+ fig = graph.get_figure()`
		`+ fig.savefig("images/pagure.activity.share.svg", dpi=300)`

		`###############################################`

		`- #graph=datapagure[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),`
		`+ # graph=datapagure[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),`
		`# color='#579d1c',`
		`# grid=True,legend=False)`
		`- #plt.suptitle("New Pagure Contributor Count Per Week",fontsize=24)`
		`- #graph.set_title('')`
		`- #graph.set_xlabel('')`
		`- #fig=graph.get_figure()`
		`- #fig.savefig('images/pagure.newusers.svg',dpi=300)`
		`+ # plt.suptitle("New Pagure Contributor Count Per Week",fontsize=24)`
		`+ # graph.set_title('')`
		`+ # graph.set_xlabel('')`
		`+ # fig=graph.get_figure()`
		`+ # fig.savefig('images/pagure.newusers.svg',dpi=300)`

		`#############################################`

		`- #datapagure['newuseractions%']=100*datapagure['newuseractions']/datapagure['msgstotal']`
		`- #datapagure['monthuseractions%']=100*datapagure['monthuseractions']/datapagure['msgstotal']`
		`- #datapagure['yearuseractions%']=100*datapagure['yearuseractions']/datapagure['msgstotal']`
		`- #datapagure['olderuseractions%']=100*datapagure['olderuseractions']/datapagure['msgstotal']`
		`-`
		`-`
		`+ # datapagure['newuseractions%']=100*datapagure['newuseractions']/datapagure['msgstotal']`
		`+ # datapagure['monthuseractions%']=100*datapagure['monthuseractions']/datapagure['msgstotal']`
		`+ # datapagure['yearuseractions%']=100*datapagure['yearuseractions']/datapagure['msgstotal']`
		`+ # datapagure['olderuseractions%']=100*datapagure['olderuseractions']/datapagure['msgstotal']`


		`- #m.rcParams['legend.frameon'] = True`
		`- #graph=datapagure[['newuseractions%','monthuseractions%','yearuseractions%','olderuseractions%']][42:].rename(columns={"newuseractions%": "New This Week","monthuseractions%":"New This Month","yearuseractions%":"New This Year","olderuseractions%":"Old School"}).plot.area(figsize=(16, 9),`
		`+ # m.rcParams['legend.frameon'] = True`
		`+ # graph=datapagure[['newuseractions%','monthuseractions%','yearuseractions%','olderuseractions%']][42:]`
		`+ # .rename(columns={"newuseractions%": "New This Week","monthuseractions%":"New This Month",`
		`+ # "yearuseractions%":"New This Year","olderuseractions%":"Old School"}).plot.area(figsize=(16, 9),`
		`# color=['#579d1c','#ffd320', '#ff420e', '#004586' ],`
		`# grid=True,ylim=(0,100))`
		`- #plt.suptitle("Percent of Pagure Commits Each Week By Time Since Packager's First Action",fontsize=24)`
		`- #graph.set_title("",fontsize=16)`
		`- #graph.set_xlabel('')`
		`+ # plt.suptitle("Percent of Pagure Commits Each Week By Time Since Packager's First Action",fontsize=24)`
		`+ # graph.set_title("",fontsize=16)`
		`+ # graph.set_xlabel('')`
		`#`
		`- #fig=graph.get_figure()`
		`- #fig.savefig('images/pagure.activity.length.svg',dpi=300)`
		`+ # fig=graph.get_figure()`
		`+ # fig.savefig('images/pagure.activity.length.svg',dpi=300)`

generate-contributor-charts.py

file modified

+72 -33

		`@@ -1,51 +1,90 @@`
		`#!/usr/bin/python3`
		`import os`
		`- import pandas`
		`+`
		`import matplotlib as m`
		`+ import matplotlib.pyplot as plt`
		`+ import pandas`
		`+`
		`m.use("Agg")`
		`- import matplotlib.pyplot as plt`
		`- m.rcParams['font.size'] = 12`
		`- m.rcParams['font.family'] = 'Overpass'`
		`- m.rcParams['legend.frameon'] = True`
		`+`
		`+ m.rcParams["font.size"] = 12`
		`+ m.rcParams["font.family"] = "Overpass"`
		`+ m.rcParams["legend.frameon"] = True`

		`try:`
		`- os.makedirs('./images')`
		`+ os.makedirs("./images")`
		`except OSError:`
		`pass`

		`- data=pandas.read_csv("data/contributor-count.csv",parse_dates=[0])`
		`- data.set_index('weekstart',inplace=True)`
		`-`
		`+ data = pandas.read_csv("data/contributor-count.csv", parse_dates=[0])`
		`+ data.set_index("weekstart", inplace=True)`

		`- graph=data[['oldactive','midactive','newactive']].rename(columns={"oldactive": "Old School","midactive":"Intermediate","newactive":"New Contributors"}).plot.area(figsize=(16, 9),`
		`- color=[ '#ff420e','#ffd320', '#579d1c' ], # '#004586'`
		`- grid=True,stacked=True ,yticks=range(0,451,25))`
		`- data[['rawcount']].rename(columns={"rawcount": "All Contributors\nincluding less active"}).plot(figsize=(16, 9),`
		`- ax=graph ,yticks=range(0,426,25))`
		`-`
		`- graph.xaxis.grid(True, which='minor', linestyle='-', linewidth=0.25)`
		`- graph.yaxis.grid(True, which='major', linestyle='-', linewidth=0.25)`

		`- plt.suptitle("Fedora Contributors by Week",fontsize=24)`
		`- graph.set_title("Stacked graph of contributors with measured activity each week — and at least four weeks total in the last year.\n“Old school” contributors have been active for longer than two years; new contributors, less than one.\nBlue line shows all contributors active this week regardless of amount of other activity.",fontsize=12)`
		`- graph.set_xlabel('')`
		`+ graph = (`
		`+ data[["oldactive", "midactive", "newactive"]]`
		`+ .rename(`
		`+ columns={`
		`+ "oldactive": "Old School",`
		`+ "midactive": "Intermediate",`
		`+ "newactive": "New Contributors",`
		`+ }`
		`+ )`
		`+ .plot.area(`
		`+ figsize=(16, 9),`
		`+ color=["#ff420e", "#ffd320", "#579d1c"], # '#004586'`
		`+ grid=True,`
		`+ stacked=True,`
		`+ yticks=range(0, 451, 25),`
		`+ )`
		`+ )`
		`+ data[["rawcount"]].rename(`
		`+ columns={"rawcount": "All Contributors\nincluding less active"}`
		`+ ).plot(figsize=(16, 9), ax=graph, yticks=range(0, 426, 25))`

		`- fig=graph.get_figure()`
		`- fig.savefig('images/active-contributors-by-week.svg',dpi=300)`
		`+ graph.xaxis.grid(True, which="minor", linestyle="-", linewidth=0.25)`
		`+ graph.yaxis.grid(True, which="major", linestyle="-", linewidth=0.25)`

		`+ plt.suptitle("Fedora Contributors by Week", fontsize=24)`
		`+ graph.set_title(`
		`+ "Stacked graph of contributors with measured activity each week — and at least four weeks total in the last year.\n"`
		`+ "“Old school” contributors have been active for longer than two years; new contributors, less than one.\n"`
		`+ "Blue line shows all contributors active this week regardless of amount of other activity.",`
		`+ fontsize=12,`
		`+ )`
		`+ graph.set_xlabel("")`

		`+ fig = graph.get_figure()`
		`+ fig.savefig("images/active-contributors-by-week.svg", dpi=300)`


		`- graph=data[['oldcore','midcore','newcore']].rename(columns={"oldcore": "Old School","midcore":"Intermediate","newcore":"New Contributors"}).plot.area(figsize=(16, 9),`
		`- color=[ '#ff420e', '#ffd320', '#579d1c' ], # '#004586'`
		`- grid=True,stacked=True ,yticks=range(0,101,25))`
		`-`
		`- graph.xaxis.grid(True, which='minor', linestyle='-', linewidth=0.25)`
		`- graph.yaxis.grid(True, which='major', linestyle='-', linewidth=0.25)`
		`+ graph = (`
		`+ data[["oldcore", "midcore", "newcore"]]`
		`+ .rename(`
		`+ columns={`
		`+ "oldcore": "Old School",`
		`+ "midcore": "Intermediate",`
		`+ "newcore": "New Contributors",`
		`+ }`
		`+ )`
		`+ .plot.area(`
		`+ figsize=(16, 9),`
		`+ color=["#ff420e", "#ffd320", "#579d1c"], # '#004586'`
		`+ grid=True,`
		`+ stacked=True,`
		`+ yticks=range(0, 101, 25),`
		`+ )`
		`+ )`

		`- plt.suptitle("Core Fedora Contributors by Week",fontsize=24)`
		`- graph.set_title("Stacked graph of contributors with measured activity this week — and at least four weeks total in the last year.\nOld school contributors have been active for longer than two years; new contributors, less than one.\n“Core” means part of the set doing about ⅔s of all actions over the past year.",fontsize=12)`
		`- graph.set_xlabel('')`
		`- fig=graph.get_figure()`
		`- fig.savefig('images/active-core-contributors-by-week.svg',dpi=300)`
		`+ graph.xaxis.grid(True, which="minor", linestyle="-", linewidth=0.25)`
		`+ graph.yaxis.grid(True, which="major", linestyle="-", linewidth=0.25)`

		`+ plt.suptitle("Core Fedora Contributors by Week", fontsize=24)`
		`+ graph.set_title(`
		`+ "Stacked graph of contributors with measured activity this week — and at least four weeks total in the last year.\n"`
		`+ "Old school contributors have been active for longer than two years; new contributors, less than one.\n"`
		`+ "“Core” means part of the set doing about ⅔s of all actions over the past year.",`
		`+ fontsize=12,`
		`+ )`
		`+ graph.set_xlabel("")`
		`+ fig = graph.get_figure()`
		`+ fig.savefig("images/active-core-contributors-by-week.svg", dpi=300)`

new-and-old-users-report.py

file modified

+160 -134

		`@@ -10,7 +10,7 @@`
		`# BUT if --csv (or --csvh, for csv with header) is given, it gives the number for _that week only_`

		`firstseen = collections.OrderedDict()`
		`- lastseen = collections.OrderedDict()`
		`+ lastseen = collections.OrderedDict()`
		`actioncount = collections.defaultdict(int)`
		`weeksactive = collections.defaultdict(int)`
		`oldschoolornew = {}`
		`@@ -18,157 +18,183 @@`


		`n = len(sys.argv[1:])`
		`- csvoutput=False`
		`+ csvoutput = False`
		`if n == 0:`
		`- reportweek = int((datetime.datetime.now()-datetime.datetime.strptime("2012-01-01", "%Y-%m-%d")).days/7)-1`
		`+ reportweek = (`
		`+ int(`
		`+ (`
		`+ datetime.datetime.now()`
		`+ - datetime.datetime.strptime("2012-01-01", "%Y-%m-%d")`
		`+ ).days`
		`+ / 7`
		`+ )`
		`+ - 1`
		`+ )`
		`elif n == 1:`
		`- reportweek=int(sys.argv[1])`
		`+ reportweek = int(sys.argv[1])`
		`elif sys.argv[1] == "--csv":`
		`- reportweek=int(sys.argv[2])`
		`- csvoutput=True`
		`- csvheader=False`
		`+ reportweek = int(sys.argv[2])`
		`+ csvoutput = True`
		`+ csvheader = False`
		`elif sys.argv[1] == "--csvh":`
		`- reportweek=int(sys.argv[2])`
		`- csvoutput=True`
		`- csvheader=True`
		`+ reportweek = int(sys.argv[2])`
		`+ csvoutput = True`
		`+ csvheader = True`
		`else:`
		`- sys.exit(1)`
		`+ sys.exit(1)`


		`-`
		`- reporttime = datetime.datetime.strptime("2012-01-01", "%Y-%m-%d") + datetime.timedelta(days=reportweek*7+6)`
		`+ reporttime = datetime.datetime.strptime("2012-01-01", "%Y-%m-%d") + datetime.timedelta(`
		`+ days=reportweek * 7 + 6`
		`+ )`
		`# 52 weeks is precise enough for metrics gathering :)`
		`lastyear = reporttime - datetime.timedelta(364)`
		`twoyears = reporttime - datetime.timedelta(728)`
		`-`
		`- weeks = range(reportweek-51,reportweek+1)`

		`- datasources = ( "org.fedoraproject.prod.bodhi.update.comment",`
		`- "org.fedoraproject.prod.git.receive",`
		`- "org.fedoraproject.prod.irc.karma",`
		`- "org.fedoraproject.prod.wiki.article.edit",`
		`- "org.fedoraproject.prod.infragit.receive"`
		`- )`
		`+ weeks = range(reportweek - 51, reportweek + 1)`
		`+`
		`+ datasources = (`
		`+ "org.fedoraproject.prod.bodhi.update.comment",`
		`+ "org.fedoraproject.prod.git.receive",`
		`+ "org.fedoraproject.prod.irc.karma",`
		`+ "org.fedoraproject.prod.wiki.article.edit",`
		`+ "org.fedoraproject.prod.infragit.receive",`
		`+ )`

		`for datasource in datasources:`
		`- for week in weeks:`
		`- try:`
		`- datafragment=pandas.read_csv("data/weekly/{}.userdata.{:05}.csv".format(datasource,week),parse_dates=[2,3])`
		`- except FileNotFoundError:`
		`- # ignore missing data.... probably should errror on _everything_ missing (FIXME)`
		`- continue`
		`-`
		`- for index, row in datafragment.iterrows():`
		`- user=row['user']`
		`-`
		`- totalactions += row['actions']`
		`- actioncount[user]+=row['actions']`
		`-`
		`- if not user in weeksactive:`
		`- weeksactive[user]=set()`
		`- weeksactive[user].add(week)`
		`-`
		`- if not user in firstseen:`
		`- firstseen[user]=row['firstseen']`
		`- else:`
		`- if row['firstseen'] < firstseen[user]:`
		`- firstseen[user]=row['firstseen']`
		`-`
		`- if not user in lastseen:`
		`- lastseen[user]=row['lastseen']`
		`- else:`
		`- if row['lastseen'] < lastseen[user]:`
		`- lastseen[user]=row['lastseen']`
		`-`
		`- if firstseen[user] < twoyears:`
		`- oldschoolornew[user]="old-school"`
		`- elif firstseen[user] >= lastyear:`
		`- oldschoolornew[user]="new contributor"`
		`- else:`
		`- oldschoolornew[user]=""`
		`-`
		`- rawcount=0`
		`- oldcount=0`
		`- midcount=0`
		`- newcount=0`
		`- allactive=0`
		`-`
		`- accumulator=0`
		`- topusers=[]`
		`+ for week in weeks:`
		`+ try:`
		`+ datafragment = pandas.read_csv(`
		`+ "data/weekly/{}.userdata.{:05}.csv".format(datasource, week),`
		`+ parse_dates=[2, 3],`
		`+ )`
		`+ except FileNotFoundError:`
		`+ # ignore missing data.... probably should errror on _everything_ missing (FIXME)`
		`+ continue`
		`+`
		`+ for index, row in datafragment.iterrows():`
		`+ user = row["user"]`
		`+`
		`+ totalactions += row["actions"]`
		`+ actioncount[user] += row["actions"]`
		`+`
		`+ if user not in weeksactive:`
		`+ weeksactive[user] = set()`
		`+ weeksactive[user].add(week)`
		`+`
		`+ if user not in firstseen:`
		`+ firstseen[user] = row["firstseen"]`
		`+ else:`
		`+ if row["firstseen"] < firstseen[user]:`
		`+ firstseen[user] = row["firstseen"]`
		`+`
		`+ if user not in lastseen:`
		`+ lastseen[user] = row["lastseen"]`
		`+ else:`
		`+ if row["lastseen"] < lastseen[user]:`
		`+ lastseen[user] = row["lastseen"]`
		`+`
		`+ if firstseen[user] < twoyears:`
		`+ oldschoolornew[user] = "old-school"`
		`+ elif firstseen[user] >= lastyear:`
		`+ oldschoolornew[user] = "new contributor"`
		`+ else:`
		`+ oldschoolornew[user] = ""`
		`+`
		`+ rawcount = 0`
		`+ oldcount = 0`
		`+ midcount = 0`
		`+ newcount = 0`
		`+ allactive = 0`
		`+`
		`+ accumulator = 0`
		`+ topusers = []`
		`for user in sorted(actioncount, key=actioncount.get, reverse=True):`
		`- accumulator+=actioncount[user]`
		`- topusers.append(user)`
		`- #print("{:20} {}".format(user,oldschoolornew[user]))`
		`- if accumulator>totalactions*2.0/3:`
		`- break`
		`-`
		`- newcore=0`
		`- midcore=0`
		`- oldcore=0`
		`+ accumulator += actioncount[user]`
		`+ topusers.append(user)`
		`+ # print("{:20} {}".format(user,oldschoolornew[user]))`
		`+ if accumulator > totalactions * 2.0 / 3:`
		`+ break`
		`+`
		`+ newcore = 0`
		`+ midcore = 0`
		`+ oldcore = 0`

		`for user in oldschoolornew:`

		`- # in csv mode, only report on activity this week`
		`- if csvoutput and not reportweek in weeksactive[user]:`
		`- continue`
		`-`
		`- rawcount+=1`
		`-`
		`- # only count users who are active`
		`- # at least 4 distinct weeks in the past year`
		`- if len(weeksactive[user]) < 4:`
		`- continue`
		`-`
		`-`
		`- allactive+=1`
		`- if oldschoolornew[user] == "old-school":`
		`- oldcount+=1`
		`- if user in topusers:`
		`- oldcore+=1`
		`- elif oldschoolornew[user] == "new contributor":`
		`- newcount+=1`
		`- if user in topusers:`
		`- newcore+=1`
		`- else:`
		`- midcount+=1`
		`- if user in topusers:`
		`- midcore+=1`
		`-`
		`+ # in csv mode, only report on activity this week`
		`+ if csvoutput and reportweek not in weeksactive[user]:`
		`+ continue`
		`+`
		`+ rawcount += 1`
		`+`
		`+ # only count users who are active`
		`+ # at least 4 distinct weeks in the past year`
		`+ if len(weeksactive[user]) < 4:`
		`+ continue`
		`+`
		`+ allactive += 1`
		`+ if oldschoolornew[user] == "old-school":`
		`+ oldcount += 1`
		`+ if user in topusers:`
		`+ oldcore += 1`
		`+ elif oldschoolornew[user] == "new contributor":`
		`+ newcount += 1`
		`+ if user in topusers:`
		`+ newcore += 1`
		`+ else:`
		`+ midcount += 1`
		`+ if user in topusers:`
		`+ midcore += 1`
		`+`

		`if csvoutput:`
		`- if csvheader:`
		`- print("weekstart,rawcount,oldactive,midactive,newactive,oldcore,midcore,newcore")`
		`- print("{0:%Y-%m-%d}".format(reporttime),rawcount,`
		`- oldcount,midcount,newcount,`
		`- oldcore,midcore,newcore,`
		`- sep=",")`
		`- sys.exit(0)`
		`-`
		`-`
		`- print ("Report for year ending {:%Y-%m-%d} (through week #{}):".format(reporttime,reportweek))`
		`- print ("")`
		`- print ("Raw total contributors: {:>5}".format(rawcount))`
		`- print ("Total active contributors: {:>5}".format(allactive))`
		`- print ("Core contributors (⅔ actions) {:>5}".format(len(topusers)))`
		`- print ("")`
		`- print ("Old-school contributors: {:>5}".format(oldcount))`
		`- print ("Intermediate contributors: {:>5}".format(midcount))`
		`- print ("New contributors this year: {:>5}".format(newcount))`
		`- print ("")`
		`- print ("Old core contributors: {:>5}".format(oldcore))`
		`- print ("Intermediate core contributors:{:>5}".format(midcore))`
		`- print ("New core contributors: {:>5}".format(newcore))`
		`- print ("\n")`
		`- print ("This report is an aggregate of dist-git, bodhi karma, wiki edits,")`
		`- print ("infra git, and irc cookies. It doesn't measure all Fedora activity.")`
		`- print ("")`
		`- print ("Active means at least four separate weeks of activity.")`
		`- print ("Core means part of the set doing about ⅔s of all actions.")`
		`- print ("Old-school contributors started at least two years (104 weeks) ago.")`
		`- print ("New contributors are new in the past 52 weeks.")`
		`- print ("Note that by this metric, \"mattdm\" is not a core contributor.")`
		`-`
		`- print ("\n-------------------------------------------\n")`
		`+ if csvheader:`
		`+ print(`
		`+ "weekstart,rawcount,oldactive,midactive,newactive,oldcore,midcore,newcore"`
		`+ )`
		`+ print(`
		`+ "{0:%Y-%m-%d}".format(reporttime),`
		`+ rawcount,`
		`+ oldcount,`
		`+ midcount,`
		`+ newcount,`
		`+ oldcore,`
		`+ midcore,`
		`+ newcore,`
		`+ sep=",",`
		`+ )`
		`+ sys.exit(0)`
		`+`
		`+`
		`+ print(`
		`+ "Report for year ending {:%Y-%m-%d} (through week #{}):".format(`
		`+ reporttime, reportweek`
		`+ )`
		`+ )`
		`+ print("")`
		`+ print("Raw total contributors: {:>5}".format(rawcount))`
		`+ print("Total active contributors: {:>5}".format(allactive))`
		`+ print("Core contributors (⅔ actions) {:>5}".format(len(topusers)))`
		`+ print("")`
		`+ print("Old-school contributors: {:>5}".format(oldcount))`
		`+ print("Intermediate contributors: {:>5}".format(midcount))`
		`+ print("New contributors this year: {:>5}".format(newcount))`
		`+ print("")`
		`+ print("Old core contributors: {:>5}".format(oldcore))`
		`+ print("Intermediate core contributors:{:>5}".format(midcore))`
		`+ print("New core contributors: {:>5}".format(newcore))`
		`+ print("\n")`
		`+ print("This report is an aggregate of dist-git, bodhi karma, wiki edits,")`
		`+ print("infra git, and irc cookies. It doesn't measure all Fedora activity.")`
		`+ print("")`
		`+ print("Active means at least four separate weeks of activity.")`
		`+ print("Core means part of the set doing about ⅔s of all actions.")`
		`+ print("Old-school contributors started at least two years (104 weeks) ago.")`
		`+ print("New contributors are new in the past 52 weeks.")`
		`+ print('Note that by this metric, "mattdm" is not a core contributor.')`
		`+`
		`+ print("\n-------------------------------------------\n")`

		`for user in topusers:`
		`- print("{:20} {}".format(user,oldschoolornew[user]))`
		`+ print("{:20} {}".format(user, oldschoolornew[user]))`

utils.py

file modified

+6 -7

		`@@ -1,6 +1,7 @@`
		`import requests`

		`- url = 'https://apps.fedoraproject.org/datagrepper/raw'`
		`+ url = "https://apps.fedoraproject.org/datagrepper/raw"`
		`+`

		`def grep(tries=0, **kwargs):`
		`response = requests.get(url, params=kwargs)`
		`@@ -11,15 +12,15 @@`
		`yield item`

		`data = response.json()`
		`- pages = data['pages']`
		`+ pages = data["pages"]`

		`- for message in data['raw_messages']:`
		`+ for message in data["raw_messages"]:`
		`yield message`

		`for page in range(1, pages):`
		`for attempt in range(20):`
		`try:`
		`- kwargs['page'] = page`
		`+ kwargs["page"] = page`
		`response = requests.get(url, params=kwargs)`
		`try:`
		`data = response.json()`
		`@@ -35,7 +36,5 @@`
		`break`
		`else:`
		`raise ValueError("Ran out of retries")`
		`- for message in data.get('raw_messages', []):`
		`+ for message in data.get("raw_messages", []):`
		`yield message`
		`-`
		`-`

weekly-user-activity.py

file modified

+245 -177

		`@@ -4,9 +4,10 @@`
		`#`
		`# output: a CSV file with fields:`
		`#`
		`- # date, msgs1, msgs9, msgs40, msgsrest, users1, users9, users40, userrest, newusers, actionsnew, actionsmonth, actionsyear, actionsolder, newspammers, spamactions, botactions, relengactions`
		`+ # date, msgs1, msgs9, msgs40, msgsrest, users1, users9, users40, userrest, newusers, actionsnew, actionsmonth,`
		`+ # actionsyear, actionsolder, newspammers, spamactions, botactions, relengactions`
		`#`
		`- # where and 1, 9, 40, rest correspond to activity from the cohort of`
		`+ # where and 1, 9, 40, rest correspond to activity from the cohort of`
		`# users in the top 1%, next 9%, next 40% or rest in that quarter (where`
		`# quarter is a sliding 13-week window) and users is the count of users in`
		`# that cohort that week while msgs is overall work. display the user count`
		`@@ -18,202 +19,214 @@`
		`#`
		`# todo: create those graphs here in addition to CSV`

		`- import utils`
		`-`
		`- import fedmsg.meta`
		`- import fedmsg.config`
		`- config = fedmsg.config.load_config(filenames=['fedmsgconfig.py'])`
		`- fedmsg.meta.make_processors(**config)`

		`-`
		`-`
		`- import time`
		`+ import collections`
		`import datetime`
		`import logging`
		`import os`
		`+ import pickle`
		`+ import re`
		`import sys`
		`+ import time`

		`- import string`
		`- import re`
		`+ import fedmsg.meta`

		`- import collections`
		`- import pprint`
		`+ import utils`

		`- import pickle`
		`+ config = fedmsg.config.load_config(filenames=["fedmsgconfig.py"])`
		`+ fedmsg.meta.make_processors(**config)`

		`- #logging.basicConfig(level=logging.DEBUG)`
		`+ # logging.basicConfig(level=logging.DEBUG)`
		`logging.basicConfig(level=logging.ERROR)`


		`class TimeoutError(Exception):`
		`"""too much timeout"""`
		`+`
		`pass`

		`+`
		`class InvalidDiscriminantError(Exception):`
		`"""invalid discriminant"""`
		`+`
		`pass`


		`- spammers = [line.rstrip('\n') for line in open('badpeople.list')]`
		`- bots = [line.rstrip('\n') for line in open('bots.list')]`
		`+ spammers = [line.rstrip("\n") for line in open("badpeople.list")]`
		`+ bots = [line.rstrip("\n") for line in open("bots.list")]`

		`epoch = datetime.datetime.utcfromtimestamp(0)`

		`- ipaddrre = re.compile("^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")`
		`+ ipaddrre = re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")`

		`discriminant = sys.argv[-1]`
		`- if __file__.split('/')[-1] in discriminant:`
		`+ if __file__.split("/")[-1] in discriminant:`
		`print("usage: '$ ./weekly-user-activity.py TOPIC'")`
		`sys.exit(1)`
		`-`
		`- if not re.match("^[a-z\.]*$", discriminant):`
		`+`
		`+ if not re.match(r"^[a-z\.]*$", discriminant):`
		`print("bad discriminant")`
		`sys.exit(2)`

		`-`
		`print("operating with discriminant", discriminant)`

		`verboten = [`
		`- 'org.fedoraproject.prod.buildsys.rpm.sign',`
		`- 'org.fedoraproject.prod.buildsys.repo.init',`
		`- 'org.fedoraproject.prod.buildsys.tag',`
		`- 'org.fedoraproject.prod.buildsys.untag',`
		`+ "org.fedoraproject.prod.buildsys.rpm.sign",`
		`+ "org.fedoraproject.prod.buildsys.repo.init",`
		`+ "org.fedoraproject.prod.buildsys.tag",`
		`+ "org.fedoraproject.prod.buildsys.untag",`
		`]`

		`try:`
		`- os.makedirs('./data')`
		`+ os.makedirs("./data")`
		`except OSError:`
		`pass`
		`try:`
		`- os.makedirs('./data/weekly')`
		`+ os.makedirs("./data/weekly")`
		`except OSError:`
		`pass`
		`try:`
		`- os.makedirs('./cache')`
		`+ os.makedirs("./cache")`
		`except OSError:`
		`pass`
		`-`
		`-`
		`- weeknum=0`
		`+`
		`+ weeknum = 0`
		`# the year in which fedmesg starts.`
		`starttime = datetime.datetime.strptime("2012-01-01", "%Y-%m-%d")`

		`+ WeekActions = collections.namedtuple(`
		`+ "WeekActions", ["week", "useractions", "newusers", "actionsbyage", "nonhuman"]`
		`+ )`

		`-`
		`- WeekActions = collections.namedtuple('WeekActions',['week','useractions','newusers','actionsbyage','nonhuman'])`
		`-`
		`- firstseen={}`
		`- lastseen={}`
		`+ firstseen = {}`
		`+ lastseen = {}`

		`# 13 weeks = 1 quarter (rolling)`
		`- ring = collections.deque(maxlen=13)`
		`+ ring = collections.deque(maxlen=13)`

		`- with open('data/%s.bucketed-activity.csv' % (discriminant), 'w') as bucketcsv:`
		`- bucketcsv.write("weekstart,msgs1,msgs9,msgs40,msgsrest,users1,users9,users40,userrest,newusercount,newuseractions,monthuseractions,yearuseractions,olderuseractions,newspammers,spamactions,botactions,relengactions\n")`
		`+ with open("data/%s.bucketed-activity.csv" % (discriminant), "w") as bucketcsv:`
		`+ bucketcsv.write(`
		`+ "weekstart,msgs1,msgs9,msgs40,msgsrest,users1,users9,users40,userrest,newusercount,newuseractions,"`
		`+ "monthuseractions,yearuseractions,olderuseractions,newspammers,spamactions,botactions,relengactions\n"`
		`+ )`
		`bucketcsv.flush()`
		`-`
		`- while starttime < datetime.datetime.now() + datetime.timedelta(42): # weeks in the future because see below`
		`- endtime = starttime + datetime.timedelta(7)`
		`- weekinfo = WeekActions(starttime, collections.Counter(), collections.Counter(), collections.Counter(), collections.Counter())`
		`- weekbreakdown=collections.Counter()`

		`- print("Working on %s / %s" % (discriminant, starttime.strftime("%Y-%m-%d")),)`
		`+ while starttime < datetime.datetime.now() + datetime.timedelta(`
		`+ 42`
		`+ ): # weeks in the future because see below`
		`+ endtime = starttime + datetime.timedelta(7)`
		`+ weekinfo = WeekActions(`
		`+ starttime,`
		`+ collections.Counter(),`
		`+ collections.Counter(),`
		`+ collections.Counter(),`
		`+ collections.Counter(),`
		`+ )`
		`+ weekbreakdown = collections.Counter()`
		`+`
		`+ print(`
		`+ "Working on %s / %s" % (discriminant, starttime.strftime("%Y-%m-%d")),`
		`+ )`
		`+`
		`+ msgcachefile = (`
		`+ "cache/" + discriminant + "." + starttime.strftime("%Y-%m-%d") + ".pickle"`
		`+ )`

		`- msgcachefile = "cache/" + discriminant + "." + starttime.strftime("%Y-%m-%d") + ".pickle"`
		`-`
		`if os.path.exists(msgcachefile):`

		`- with open(msgcachefile,"r") as msgcache:`
		`- [firstseen,lastseen,weekinfo,weekbreakdown]=pickle.load(msgcache)`
		`- print("(cached)")`
		`+ with open(msgcachefile, "r") as msgcache:`
		`+ [firstseen, lastseen, weekinfo, weekbreakdown] = pickle.load(msgcache)`
		`+ print("(cached)")`

		`else:`
		`-`
		`- for attempt in range(10):`
		`- try:`
		`- messages = utils.grep(`
		`- rows_per_page=100,`
		`- meta='usernames',`
		`- start=int((starttime-epoch).total_seconds()),`
		`- end=int((endtime - epoch).total_seconds()),`
		`- order='asc', # Start at the beginning, end at now.`
		`- topic=discriminant,`
		`- # Cut this stuff out, because its just so spammy.`
		`- not_user=['anonymous','koschei'],`
		`- not_topic=verboten,`
		`- )`
		`- except IOError:`
		`- print("Retrying.")`
		`- time.sleep(5)`
		`- else:`
		`- break`
		`- else:`
		`- raise TimeoutError("too much timeout")`
		`-`
		`- for i, msg in enumerate(messages):`
		`- # sanity check`
		`- if msg['topic'] in verboten:`
		`- raise InvalidDiscriminantError("hell")`
		`-`
		`- for user in msg['meta']['usernames']:`
		`- if user == 'releng':`
		`- weekinfo.nonhuman['relengactions'] +=1`
		`- continue`
		`- if user in bots:`
		`- weekinfo.nonhuman['botactions'] +=1`
		`- continue`
		`- if user in spammers:`
		`- weekinfo.nonhuman['spamactions'] +=1`
		`- if not user in firstseen:`
		`- firstseen[user]=datetime.datetime.fromtimestamp(msg['timestamp'])`
		`- weekinfo.nonhuman['newspammers'] +=1`
		`- continue`
		`- if '@' in user:`
		`- # some msgs put email for anon users`
		`- continue`
		`- if ipaddrre.match(user):`
		`- # some msgs (wiki) put ip addr for anon users`
		`- continue`
		`-`
		`- weekinfo.useractions[user] += 1`
		`- weekbreakdown[user] += 1`
		`-`
		`- if not user in firstseen:`
		`- firstseen[user]=datetime.datetime.fromtimestamp(msg['timestamp'])`
		`-`
		`- if (starttime - firstseen[user]).days < 7:`
		`- weekinfo.actionsbyage['new'] += 1`
		`- elif (starttime - firstseen[user]).days < 31:`
		`- weekinfo.actionsbyage['month'] += 1`
		`- elif (starttime - firstseen[user]).days < 365:`
		`- weekinfo.actionsbyage['year'] += 1`
		`- else:`
		`- weekinfo.actionsbyage['older'] += 1`
		`-`
		`- lastseen[user]=datetime.datetime.fromtimestamp(msg['timestamp'])`
		`-`
		`-`
		`- if i % 50 == 0:`
		`- sys.stdout.write(".")`
		`- sys.stdout.flush()`
		`-`
		`- print()`
		`- #pprint.pprint(dict(weekinfo.useractions))`
		`-`
		`- # don't cache the current week (may not be comlete), and definitely`
		`- # don't cache the future weeks (certainly not complete)`
		`- if endtime < (datetime.datetime.now() - datetime.timedelta(1)) :`
		`- sys.stdout.write("Saving... ")`
		`- sys.stdout.flush()`
		`- with open(msgcachefile+".temp","wb") as msgcache:`
		`- pickle.dump((firstseen,lastseen,weekinfo,weekbreakdown),msgcache)`
		`- os.rename(msgcachefile+".temp",msgcachefile)`
		`- print("saved.")`

		`+ for attempt in range(10):`
		`+ try:`
		`+ messages = utils.grep(`
		`+ rows_per_page=100,`
		`+ meta="usernames",`
		`+ start=int((starttime - epoch).total_seconds()),`
		`+ end=int((endtime - epoch).total_seconds()),`
		`+ order="asc", # Start at the beginning, end at now.`
		`+ topic=discriminant,`
		`+ # Cut this stuff out, because its just so spammy.`
		`+ not_user=["anonymous", "koschei"],`
		`+ not_topic=verboten,`
		`+ )`
		`+ except IOError:`
		`+ print("Retrying.")`
		`+ time.sleep(5)`
		`+ else:`
		`+ break`
		`+ else:`
		`+ raise TimeoutError("too much timeout")`
		`+`
		`+ for i, msg in enumerate(messages):`
		`+ # sanity check`
		`+ if msg["topic"] in verboten:`
		`+ raise InvalidDiscriminantError("hell")`
		`+`
		`+ for user in msg["meta"]["usernames"]:`
		`+ if user == "releng":`
		`+ weekinfo.nonhuman["relengactions"] += 1`
		`+ continue`
		`+ if user in bots:`
		`+ weekinfo.nonhuman["botactions"] += 1`
		`+ continue`
		`+ if user in spammers:`
		`+ weekinfo.nonhuman["spamactions"] += 1`
		`+ if user not in firstseen:`
		`+ firstseen[user] = datetime.datetime.fromtimestamp(`
		`+ msg["timestamp"]`
		`+ )`
		`+ weekinfo.nonhuman["newspammers"] += 1`
		`+ continue`
		`+ if "@" in user:`
		`+ # some msgs put email for anon users`
		`+ continue`
		`+ if ipaddrre.match(user):`
		`+ # some msgs (wiki) put ip addr for anon users`
		`+ continue`
		`+`
		`+ weekinfo.useractions[user] += 1`
		`+ weekbreakdown[user] += 1`
		`+`
		`+ if user not in firstseen:`
		`+ firstseen[user] = datetime.datetime.fromtimestamp(`
		`+ msg["timestamp"]`
		`+ )`
		`+`
		`+ if (starttime - firstseen[user]).days < 7:`
		`+ weekinfo.actionsbyage["new"] += 1`
		`+ elif (starttime - firstseen[user]).days < 31:`
		`+ weekinfo.actionsbyage["month"] += 1`
		`+ elif (starttime - firstseen[user]).days < 365:`
		`+ weekinfo.actionsbyage["year"] += 1`
		`+ else:`
		`+ weekinfo.actionsbyage["older"] += 1`
		`+`
		`+ lastseen[user] = datetime.datetime.fromtimestamp(msg["timestamp"])`
		`+`
		`+ if i % 50 == 0:`
		`+ sys.stdout.write(".")`
		`+ sys.stdout.flush()`
		`+`
		`+ print()`
		`+ # pprint.pprint(dict(weekinfo.useractions))`
		`+`
		`+ # don't cache the current week (may not be comlete), and definitely`
		`+ # don't cache the future weeks (certainly not complete)`
		`+ if endtime < (datetime.datetime.now() - datetime.timedelta(1)):`
		`+ sys.stdout.write("Saving... ")`
		`+ sys.stdout.flush()`
		`+ with open(msgcachefile + ".temp", "wb") as msgcache:`
		`+ pickle.dump(`
		`+ (firstseen, lastseen, weekinfo, weekbreakdown), msgcache`
		`+ )`
		`+ os.rename(msgcachefile + ".temp", msgcachefile)`
		`+ print("saved.")`

		`ring.append(weekinfo)`
		`-`
		`-`

		`# okay, so, bear with me here. Comments are for explaining confusing`
		`# conceptual things in code, right? okay, hold on to your seats.`
		`@@ -223,62 +236,117 @@`
		`# gonna write lines from 6 weeks earlier, because finally we have the`
		`# needed info. so, we jump back 6 weeks (42 days) from starttime.`
		`# this is the same as jumping back 7 elements in the deque (if it's that deep)`
		`-`
		`- if len(ring)>6:`
		`+`
		`+ if len(ring) > 6:`

		`# first, we're bucketing all the users by percent of activity`
		`- usertotals=collections.Counter()`
		`+ usertotals = collections.Counter()`
		`for week in ring:`
		`usertotals += week.useractions`
		`userrank = {}`
		`userbucket = {}`
		`- i=len(usertotals)+1`
		`- for name in sorted(usertotals,key=usertotals.get):`
		`- userrank[name]=i`
		`- i-=1`
		`- if i<len(usertotals)*0.01: # top 1%`
		`- userbucket[name]=1`
		`- elif i<len(usertotals)*0.10: # next 9% (otherwise top 10%)`
		`- userbucket[name]=2`
		`- elif i<len(usertotals)*0.50: # next 40%`
		`- userbucket[name]=3`
		`- else: # the bottom half`
		`- userbucket[name]=4`
		`-`
		`- workweek=ring[len(ring)-7] # jump back same amount into the deque`
		`+ i = len(usertotals) + 1`
		`+ for name in sorted(usertotals, key=usertotals.get):`
		`+ userrank[name] = i`
		`+ i -= 1`
		`+ if i < len(usertotals) * 0.01: # top 1%`
		`+ userbucket[name] = 1`
		`+ elif i < len(usertotals) * 0.10: # next 9% (otherwise top 10%)`
		`+ userbucket[name] = 2`
		`+ elif i < len(usertotals) * 0.50: # next 40%`
		`+ userbucket[name] = 3`
		`+ else: # the bottom half`
		`+ userbucket[name] = 4`
		`+`
		`+ workweek = ring[len(ring) - 7] # jump back same amount into the deque`

		`bucketscores = {}`
		`- bucketscores[1]=0`
		`- bucketscores[2]=0`
		`- bucketscores[3]=0`
		`- bucketscores[4]=0`
		`+ bucketscores[1] = 0`
		`+ bucketscores[2] = 0`
		`+ bucketscores[3] = 0`
		`+ bucketscores[4] = 0`
		`bucketcount = {}`
		`- bucketcount[1]=0`
		`- bucketcount[2]=0`
		`- bucketcount[3]=0`
		`- bucketcount[4]=0`
		`+ bucketcount[1] = 0`
		`+ bucketcount[2] = 0`
		`+ bucketcount[3] = 0`
		`+ bucketcount[4] = 0`

		`for username in workweek.useractions.keys():`
		`- bucketscores[userbucket[username]] += workweek.useractions[username]`
		`- bucketcount[userbucket[username]] += 1`
		`-`
		- print("%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d" % (workweek.week.strftime('%Y-%m-%d'), bucketscores[1], bucketscores[2], bucketscores[3], bucketscores[4], bucketcount[1], bucketcount[2], bucketcount[3], bucketcount[4],workweek.newusers['count'],workweek.actionsbyage['new'],workweek.actionsbyage['month'],workweek.actionsbyage['year'],workweek.actionsbyage['older'],workweek.nonhuman['newspammers,'],workweek.nonhuman['spamactions,'], workweek.nonhuman['botactions'], workweek.nonhuman['relengactions']))
		`-`
		`- if any((bucketscores[1], bucketscores[2], bucketscores[3], bucketscores[4])):`
		- bucketcsv.write("%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n" % (workweek.week.strftime('%Y-%m-%d'), bucketscores[1], bucketscores[2], bucketscores[3], bucketscores[4], bucketcount[1], bucketcount[2], bucketcount[3], bucketcount[4],workweek.newusers['count'],workweek.actionsbyage['new'],workweek.actionsbyage['month'],workweek.actionsbyage['year'],workweek.actionsbyage['older'],workweek.nonhuman['newspammers,'],workweek.nonhuman['spamactions,'], workweek.nonhuman['botactions'], workweek.nonhuman['relengactions']))
		`+ bucketscores[userbucket[username]] += workweek.useractions[username]`
		`+ bucketcount[userbucket[username]] += 1`
		`+`
		`+ print(`
		`+ "%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d"`
		`+ % (`
		`+ workweek.week.strftime("%Y-%m-%d"),`
		`+ bucketscores[1],`
		`+ bucketscores[2],`
		`+ bucketscores[3],`
		`+ bucketscores[4],`
		`+ bucketcount[1],`
		`+ bucketcount[2],`
		`+ bucketcount[3],`
		`+ bucketcount[4],`
		`+ workweek.newusers["count"],`
		`+ workweek.actionsbyage["new"],`
		`+ workweek.actionsbyage["month"],`
		`+ workweek.actionsbyage["year"],`
		`+ workweek.actionsbyage["older"],`
		`+ workweek.nonhuman["newspammers,"],`
		`+ workweek.nonhuman["spamactions,"],`
		`+ workweek.nonhuman["botactions"],`
		`+ workweek.nonhuman["relengactions"],`
		`+ )`
		`+ )`
		`+`
		`+ if any(`
		`+ (bucketscores[1], bucketscores[2], bucketscores[3], bucketscores[4])`
		`+ ):`
		`+ bucketcsv.write(`
		`+ "%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n"`
		`+ % (`
		`+ workweek.week.strftime("%Y-%m-%d"),`
		`+ bucketscores[1],`
		`+ bucketscores[2],`
		`+ bucketscores[3],`
		`+ bucketscores[4],`
		`+ bucketcount[1],`
		`+ bucketcount[2],`
		`+ bucketcount[3],`
		`+ bucketcount[4],`
		`+ workweek.newusers["count"],`
		`+ workweek.actionsbyage["new"],`
		`+ workweek.actionsbyage["month"],`
		`+ workweek.actionsbyage["year"],`
		`+ workweek.actionsbyage["older"],`
		`+ workweek.nonhuman["newspammers,"],`
		`+ workweek.nonhuman["spamactions,"],`
		`+ workweek.nonhuman["botactions"],`
		`+ workweek.nonhuman["relengactions"],`
		`+ )`
		`+ )`
		`bucketcsv.flush()`

		`-`
		`# unless we're in the future, save the weekly userdata csv`
		`if starttime < datetime.datetime.now():`
		`- with open('data/weekly/%s.userdata.%05d.csv' % (discriminant,weeknum), 'w') as weekcsv:`
		`- weekcsv.write("%s,%s,%s,%s\n" % ("user","actions","firstseen","lastseen"))`
		`+ with open(`
		`+ "data/weekly/%s.userdata.%05d.csv" % (discriminant, weeknum), "w"`
		`+ ) as weekcsv:`
		`+ weekcsv.write(`
		`+ "%s,%s,%s,%s\n" % ("user", "actions", "firstseen", "lastseen")`
		`+ )`
		`for user in sorted(weekbreakdown, key=weekbreakdown.get, reverse=True):`
		`- weekcsv.write("%s,%s,%s,%s\n" % (user,weekbreakdown[user],firstseen[user].strftime('%Y-%m-%d'),lastseen[user].strftime('%Y-%m-%d')))`
		`- print('Wrote data/weekly/%s.userdata.%05d.csv' % (discriminant,weeknum))`
		`+ weekcsv.write(`
		`+ "%s,%s,%s,%s\n"`
		`+ % (`
		`+ user,`
		`+ weekbreakdown[user],`
		`+ firstseen[user].strftime("%Y-%m-%d"),`
		`+ lastseen[user].strftime("%Y-%m-%d"),`
		`+ )`
		`+ )`
		`+ print("Wrote data/weekly/%s.userdata.%05d.csv" % (discriminant, weeknum))`

		`# and loop around`
		`- starttime=endtime`
		`- weeknum+=1`
		`-`
		`-`
		`\ No newline at end of file`
		`+ starttime = endtime`
		`+ weeknum += 1`

josseline commented 3 years ago

Resolves #1

Fixed all the code-style errors, according to flake8 output with the default settings.
Added new flake8 config file, exclude possibles env/ directories.

I think flake8 would be added in the requirements.txt file, added in PR #9.