#10 Added basic code-style validation
Merged 3 years ago by mattdm. Opened 3 years ago by josseline.
josseline/fedora-contributor-trends basic-validation  into  main

file added
+3
@@ -0,0 +1,3 @@ 

+ [flake8]

+ max-line-length = 120

+ exclude = .git, __pycache__, env, venv, .env, .venv, ENV, env.bak, venv.bak

file modified
+1 -1
@@ -1,1 +1,1 @@ 

- config = {'endpoints': {}} 

\ No newline at end of file

+ config = {"endpoints": {}}

file modified
+341 -212
@@ -1,305 +1,434 @@ 

  #!/usr/bin/python3

  import os

- import pandas

+ 

  import matplotlib as m

+ import matplotlib.pyplot as plt

+ import pandas

+ 

  m.use("Agg")

- import matplotlib.pyplot as plt 

- m.rcParams['font.size'] = 12

- m.rcParams['font.family'] = 'Overpass'

- m.rcParams['legend.frameon'] = False

+ 

+ m.rcParams["font.size"] = 12

+ m.rcParams["font.family"] = "Overpass"

+ m.rcParams["legend.frameon"] = False

  

  try:

-     os.makedirs('./images')

+     os.makedirs("./images")

  except OSError:

      pass

  

- datagit=pandas.read_csv("data/org.fedoraproject.prod.git.receive.bucketed-activity.csv",parse_dates=[0])

- datagit.set_index('weekstart',inplace=True)

- 

- graph=datagit[['users1','users9','users40','userrest']].rename(columns={"users1": "Top 1%","users9":"Top 9%","users40":"Top 40%","userrest":"Remaining 50%"}).plot.area(figsize=(16, 9),

-                                                               color=['#579d1c','#ffd320', '#ff420e', '#004586' ],

-                                                               grid=True,yticks=range(0,301,25))

- #graph.legend(ncol=4)

+ datagit = pandas.read_csv(

+     "data/org.fedoraproject.prod.git.receive.bucketed-activity.csv", parse_dates=[0]

+ )

+ datagit.set_index("weekstart", inplace=True)

+ 

+ graph = (

+     datagit[["users1", "users9", "users40", "userrest"]]

+     .rename(

+         columns={

+             "users1": "Top 1%",

+             "users9": "Top 9%",

+             "users40": "Top 40%",

+             "userrest": "Remaining 50%",

+         }

+     )

+     .plot.area(

+         figsize=(16, 9),

+         color=["#579d1c", "#ffd320", "#ff420e", "#004586"],

+         grid=True,

+         yticks=range(0, 301, 25),

+     )

+ )

+ # graph.legend(ncol=4)

  # totally abusing this.

- plt.suptitle("Number of Contributors Making Changes to Packages Each Week",fontsize=24)

- graph.set_title("Grouped by Quarterly Activity Level of Each Contributor",fontsize=16)

- graph.set_xlabel('')

- fig=graph.get_figure()

- fig.savefig('images/git.user.count.svg',dpi=300)

+ plt.suptitle("Number of Contributors Making Changes to Packages Each Week", fontsize=24)

+ graph.set_title("Grouped by Quarterly Activity Level of Each Contributor", fontsize=16)

+ graph.set_xlabel("")

+ fig = graph.get_figure()

+ fig.savefig("images/git.user.count.svg", dpi=300)

  

  #############################################

  

- datagit['msgstotal']=datagit[['msgs1','msgs9','msgs40','msgsrest']].sum(1)

- datagit['msgs1%']=100*datagit['msgs1']/datagit['msgstotal']

- datagit['msgs9%']=100*datagit['msgs9']/datagit['msgstotal']

- datagit['msgs40%']=100*datagit['msgs40']/datagit['msgstotal']

- datagit['msgsrest%']=100*datagit['msgsrest']/datagit['msgstotal']

- 

- 

- 

- 

- m.rcParams['legend.frameon'] = True

- graph=datagit[['msgs1%','msgs9%','msgs40%','msgsrest%']].rename(columns={"msgs1%": "Top 1%","msgs9%":"Top 9%","msgs40%":"Top 40%","msgsrest%":"Remaining 50%"}).plot.area(figsize=(16, 9),

-                                                               color=['#579d1c','#ffd320', '#ff420e', '#004586' ],

-                                                               grid=True,ylim=(0,100))

- plt.suptitle("Percent of Package Changes Each Week From Each Activity Level Group",fontsize=24)

- graph.set_title("",fontsize=16)

- graph.set_xlabel('')

- 

- fig=graph.get_figure()

- fig.savefig('images/git.activity.share.svg',dpi=300)

+ datagit["msgstotal"] = datagit[["msgs1", "msgs9", "msgs40", "msgsrest"]].sum(1)

+ datagit["msgs1%"] = 100 * datagit["msgs1"] / datagit["msgstotal"]

+ datagit["msgs9%"] = 100 * datagit["msgs9"] / datagit["msgstotal"]

+ datagit["msgs40%"] = 100 * datagit["msgs40"] / datagit["msgstotal"]

+ datagit["msgsrest%"] = 100 * datagit["msgsrest"] / datagit["msgstotal"]

+ 

+ 

+ m.rcParams["legend.frameon"] = True

+ graph = (

+     datagit[["msgs1%", "msgs9%", "msgs40%", "msgsrest%"]]

+     .rename(

+         columns={

+             "msgs1%": "Top 1%",

+             "msgs9%": "Top 9%",

+             "msgs40%": "Top 40%",

+             "msgsrest%": "Remaining 50%",

+         }

+     )

+     .plot.area(

+         figsize=(16, 9),

+         color=["#579d1c", "#ffd320", "#ff420e", "#004586"],

+         grid=True,

+         ylim=(0, 100),

+     )

+ )

+ plt.suptitle(

+     "Percent of Package Changes Each Week From Each Activity Level Group", fontsize=24

+ )

+ graph.set_title("", fontsize=16)

+ graph.set_xlabel("")

+ 

+ fig = graph.get_figure()

+ fig.savefig("images/git.activity.share.svg", dpi=300)

  

  ###############################################

  

- #graph=datagit[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),

+ # graph=datagit[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),

  #                                                              color='#579d1c',

  #                                                              grid=True,legend=False)

- #plt.suptitle("New Packaging Contributor Count Per Week",fontsize=24)

- #graph.set_title('')

- #graph.set_xlabel('')

- #fig=graph.get_figure()

- #fig.savefig('images/git.newusers.svg',dpi=300)

+ # plt.suptitle("New Packaging Contributor Count Per Week",fontsize=24)

+ # graph.set_title('')

+ # graph.set_xlabel('')

+ # fig=graph.get_figure()

+ # fig.savefig('images/git.newusers.svg',dpi=300)

  

  #############################################

  

- #datagit['newuseractions%']=100*datagit['newuseractions']/datagit['msgstotal']

- #datagit['monthuseractions%']=100*datagit['monthuseractions']/datagit['msgstotal']

- #datagit['yearuseractions%']=100*datagit['yearuseractions']/datagit['msgstotal']

- #datagit['olderuseractions%']=100*datagit['olderuseractions']/datagit['msgstotal']

- 

- 

+ # datagit['newuseractions%']=100*datagit['newuseractions']/datagit['msgstotal']

+ # datagit['monthuseractions%']=100*datagit['monthuseractions']/datagit['msgstotal']

+ # datagit['yearuseractions%']=100*datagit['yearuseractions']/datagit['msgstotal']

+ # datagit['olderuseractions%']=100*datagit['olderuseractions']/datagit['msgstotal']

  

  

- #m.rcParams['legend.frameon'] = True

- #graph=datagit[['newuseractions%','monthuseractions%','yearuseractions%','olderuseractions%']][42:].rename(columns={"newuseractions%": "New This Week","monthuseractions%":"New This Month","yearuseractions%":"New This Year","olderuseractions%":"Old School"}).plot.area(figsize=(16, 9),

+ # m.rcParams['legend.frameon'] = True

+ # graph=datagit[['newuseractions%','monthuseractions%','yearuseractions%','olderuseractions%']][42:]

+ # .rename(columns={"newuseractions%": "New This Week","monthuseractions%":"New This Month",

+ # "yearuseractions%":"New This Year","olderuseractions%":"Old School"})

+ # .plot.area(figsize=(16, 9),

  #                                                              color=['#579d1c','#ffd320', '#ff420e', '#004586' ],

  #                                                              grid=True,ylim=(0,100))

- #plt.suptitle("Percent of Package Changes Each Week By Time Since Packager's First Action",fontsize=24)

- #graph.set_title("",fontsize=16)

- #graph.set_xlabel('')

+ # plt.suptitle("Percent of Package Changes Each Week By Time Since Packager's First Action",fontsize=24)

+ # graph.set_title("",fontsize=16)

+ # graph.set_xlabel('')

  #

- #fig=graph.get_figure()

- #fig.savefig('images/git.activity.length.svg',dpi=300)

+ # fig=graph.get_figure()

+ # fig.savefig('images/git.activity.length.svg',dpi=300)

  #

  ################################################################################################################

  ################################################################################################################

  

- databodhi=pandas.read_csv("data/org.fedoraproject.prod.bodhi.update.comment.bucketed-activity.csv",parse_dates=[0])

- databodhi.set_index('weekstart',inplace=True)

- 

- graph=databodhi[['users1','users9','users40','userrest']].rename(columns={"users1": "Top 1%","users9":"Top 9%","users40":"Top 40%","userrest":"Remaining 50%"}).plot.area(figsize=(16, 9),

-                                                               color=['#579d1c','#ffd320', '#ff420e', '#004586' ],

-                                                               grid=True,yticks=range(0,301,25))

- #graph.legend(ncol=4)

+ databodhi = pandas.read_csv(

+     "data/org.fedoraproject.prod.bodhi.update.comment.bucketed-activity.csv",

+     parse_dates=[0],

+ )

+ databodhi.set_index("weekstart", inplace=True)

+ 

+ graph = (

+     databodhi[["users1", "users9", "users40", "userrest"]]

+     .rename(

+         columns={

+             "users1": "Top 1%",

+             "users9": "Top 9%",

+             "users40": "Top 40%",

+             "userrest": "Remaining 50%",

+         }

+     )

+     .plot.area(

+         figsize=(16, 9),

+         color=["#579d1c", "#ffd320", "#ff420e", "#004586"],

+         grid=True,

+         yticks=range(0, 301, 25),

+     )

+ )

+ # graph.legend(ncol=4)

  # totally abusing this.

- plt.suptitle("Number of Contributors Providing Feedback on Package Updates Each Week",fontsize=24)

- graph.set_title("Grouped by Quarterly Activity Level of Each Contributor",fontsize=16)

- graph.set_xlabel('')

- fig=graph.get_figure()

- fig.savefig('images/bodhi.user.count.svg',dpi=300)

+ plt.suptitle(

+     "Number of Contributors Providing Feedback on Package Updates Each Week",

+     fontsize=24,

+ )

+ graph.set_title("Grouped by Quarterly Activity Level of Each Contributor", fontsize=16)

+ graph.set_xlabel("")

+ fig = graph.get_figure()

+ fig.savefig("images/bodhi.user.count.svg", dpi=300)

  

  #############################################

  

- databodhi['msgstotal']=databodhi[['msgs1','msgs9','msgs40','msgsrest']].sum(1)

- databodhi['msgs1%']=100*databodhi['msgs1']/databodhi['msgstotal']

- databodhi['msgs9%']=100*databodhi['msgs9']/databodhi['msgstotal']

- databodhi['msgs40%']=100*databodhi['msgs40']/databodhi['msgstotal']

- databodhi['msgsrest%']=100*databodhi['msgsrest']/databodhi['msgstotal']

- 

- 

- 

- 

- m.rcParams['legend.frameon'] = True

- graph=databodhi[['msgs1%','msgs9%','msgs40%','msgsrest%']].rename(columns={"msgs1%": "Top 1%","msgs9%":"Top 9%","msgs40%":"Top 40%","msgsrest%":"Remaining 50%"}).plot.area(figsize=(16, 9),

-                                                               color=['#579d1c','#ffd320', '#ff420e', '#004586' ],

-                                                               grid=True,ylim=(0,100))

- plt.suptitle("Percent of Update Feedback Each Week From Each Activity Level Group",fontsize=24)

- graph.set_title("",fontsize=16)

- graph.set_xlabel('')

- 

- fig=graph.get_figure()

- fig.savefig('images/bodhi.activity.share.svg',dpi=300)

+ databodhi["msgstotal"] = databodhi[["msgs1", "msgs9", "msgs40", "msgsrest"]].sum(1)

+ databodhi["msgs1%"] = 100 * databodhi["msgs1"] / databodhi["msgstotal"]

+ databodhi["msgs9%"] = 100 * databodhi["msgs9"] / databodhi["msgstotal"]

+ databodhi["msgs40%"] = 100 * databodhi["msgs40"] / databodhi["msgstotal"]

+ databodhi["msgsrest%"] = 100 * databodhi["msgsrest"] / databodhi["msgstotal"]

+ 

+ 

+ m.rcParams["legend.frameon"] = True

+ graph = (

+     databodhi[["msgs1%", "msgs9%", "msgs40%", "msgsrest%"]]

+     .rename(

+         columns={

+             "msgs1%": "Top 1%",

+             "msgs9%": "Top 9%",

+             "msgs40%": "Top 40%",

+             "msgsrest%": "Remaining 50%",

+         }

+     )

+     .plot.area(

+         figsize=(16, 9),

+         color=["#579d1c", "#ffd320", "#ff420e", "#004586"],

+         grid=True,

+         ylim=(0, 100),

+     )

+ )

+ plt.suptitle(

+     "Percent of Update Feedback Each Week From Each Activity Level Group", fontsize=24

+ )

+ graph.set_title("", fontsize=16)

+ graph.set_xlabel("")

+ 

+ fig = graph.get_figure()

+ fig.savefig("images/bodhi.activity.share.svg", dpi=300)

  

  ###############################################

  

- #graph=databodhi[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),

+ # graph=databodhi[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),

  #                                                              color='#579d1c',

  #                                                              grid=True,legend=False)

- #plt.suptitle("New Update Testing Contributor Count Per Week",fontsize=24)

- #graph.set_title('')

- #graph.set_xlabel('')

- #fig=graph.get_figure()

- #fig.savefig('images/bodhi.newusers.svg',dpi=300)

+ # plt.suptitle("New Update Testing Contributor Count Per Week",fontsize=24)

+ # graph.set_title('')

+ # graph.set_xlabel('')

+ # fig=graph.get_figure()

+ # fig.savefig('images/bodhi.newusers.svg',dpi=300)

  

  #############################################

  

- #databodhi['newuseractions%']=100*databodhi['newuseractions']/databodhi['msgstotal']

- #databodhi['monthuseractions%']=100*databodhi['monthuseractions']/databodhi['msgstotal']

- #databodhi['yearuseractions%']=100*databodhi['yearuseractions']/databodhi['msgstotal']

- #databodhi['olderuseractions%']=100*databodhi['olderuseractions']/databodhi['msgstotal']

+ # databodhi['newuseractions%']=100*databodhi['newuseractions']/databodhi['msgstotal']

+ # databodhi['monthuseractions%']=100*databodhi['monthuseractions']/databodhi['msgstotal']

+ # databodhi['yearuseractions%']=100*databodhi['yearuseractions']/databodhi['msgstotal']

+ # databodhi['olderuseractions%']=100*databodhi['olderuseractions']/databodhi['msgstotal']

  

  

- 

- 

- #m.rcParams['legend.frameon'] = True

- #graph=databodhi[['newuseractions%','monthuseractions%','yearuseractions%','olderuseractions%']][42:].rename(columns={"newuseractions%": "New This Week","monthuseractions%":"New This Month","yearuseractions%":"New This Year","olderuseractions%":"Old School"}).plot.area(figsize=(16, 9),

+ # m.rcParams['legend.frameon'] = True

+ # graph=databodhi[['newuseractions%','monthuseractions%','yearuseractions%','olderuseractions%']][42:]

+ # .rename(columns={"newuseractions%": "New This Week","monthuseractions%":"New This Month",

+ # "yearuseractions%":"New This Year","olderuseractions%":"Old School"}).plot.area(figsize=(16, 9),

  #                                                              color=['#579d1c','#ffd320', '#ff420e', '#004586' ],

  #                                                              grid=True,ylim=(0,100))

- #plt.suptitle("Percent of Update Feedback Each Week By Time Since Packager's First Action",fontsize=24)

- #graph.set_title("",fontsize=16)

- #graph.set_xlabel('')

+ # plt.suptitle("Percent of Update Feedback Each Week By Time Since Packager's First Action",fontsize=24)

+ # graph.set_title("",fontsize=16)

+ # graph.set_xlabel('')

  #

- #fig=graph.get_figure()

- #fig.savefig('images/bodhi.activity.length.svg',dpi=300)

+ # fig=graph.get_figure()

+ # fig.savefig('images/bodhi.activity.length.svg',dpi=300)

  

  

  ################################################################################################################

  ################################################################################################################

  

- datawiki=pandas.read_csv("data/org.fedoraproject.prod.wiki.article.edit.bucketed-activity.csv",parse_dates=[0])

- datawiki.set_index('weekstart',inplace=True)

- 

- graph=datawiki[['users1','users9','users40','userrest']].rename(columns={"users1": "Top 1%","users9":"Top 9%","users40":"Top 40%","userrest":"Remaining 50%"}).plot.area(figsize=(16, 9),

-                                                               color=['#579d1c','#ffd320', '#ff420e', '#004586' ],

-                                                               grid=True,yticks=range(0,301,25))

- #graph.legend(ncol=4)

+ datawiki = pandas.read_csv(

+     "data/org.fedoraproject.prod.wiki.article.edit.bucketed-activity.csv",

+     parse_dates=[0],

+ )

+ datawiki.set_index("weekstart", inplace=True)

+ 

+ graph = (

+     datawiki[["users1", "users9", "users40", "userrest"]]

+     .rename(

+         columns={

+             "users1": "Top 1%",

+             "users9": "Top 9%",

+             "users40": "Top 40%",

+             "userrest": "Remaining 50%",

+         }

+     )

+     .plot.area(

+         figsize=(16, 9),

+         color=["#579d1c", "#ffd320", "#ff420e", "#004586"],

+         grid=True,

+         yticks=range(0, 301, 25),

+     )

+ )

+ # graph.legend(ncol=4)

  # totally abusing this.

- plt.suptitle("Number of Wiki Editors Each Week",fontsize=24)

- graph.set_title("Grouped by Quarterly Activity Level of Each Contributor",fontsize=16)

- graph.set_xlabel('')

- fig=graph.get_figure()

- fig.savefig('images/wiki.user.count.svg',dpi=300)

+ plt.suptitle("Number of Wiki Editors Each Week", fontsize=24)

+ graph.set_title("Grouped by Quarterly Activity Level of Each Contributor", fontsize=16)

+ graph.set_xlabel("")

+ fig = graph.get_figure()

+ fig.savefig("images/wiki.user.count.svg", dpi=300)

  

  #############################################

  

- datawiki['msgstotal']=datawiki[['msgs1','msgs9','msgs40','msgsrest']].sum(1)

- datawiki['msgs1%']=100*datawiki['msgs1']/datawiki['msgstotal']

- datawiki['msgs9%']=100*datawiki['msgs9']/datawiki['msgstotal']

- datawiki['msgs40%']=100*datawiki['msgs40']/datawiki['msgstotal']

- datawiki['msgsrest%']=100*datawiki['msgsrest']/datawiki['msgstotal']

- 

- 

- 

- 

- m.rcParams['legend.frameon'] = True

- graph=datawiki[['msgs1%','msgs9%','msgs40%','msgsrest%']].rename(columns={"msgs1%": "Top 1%","msgs9%":"Top 9%","msgs40%":"Top 40%","msgsrest%":"Remaining 50%"}).plot.area(figsize=(16, 9),

-                                                               color=['#579d1c','#ffd320', '#ff420e', '#004586' ],

-                                                               grid=True,ylim=(0,100))

- plt.suptitle("Percent of Wiki Edits Each Week From Each Activity Level Group",fontsize=24)

- graph.set_title("",fontsize=16)

- graph.set_xlabel('')

- 

- fig=graph.get_figure()

- fig.savefig('images/wiki.activity.share.svg',dpi=300)

+ datawiki["msgstotal"] = datawiki[["msgs1", "msgs9", "msgs40", "msgsrest"]].sum(1)

+ datawiki["msgs1%"] = 100 * datawiki["msgs1"] / datawiki["msgstotal"]

+ datawiki["msgs9%"] = 100 * datawiki["msgs9"] / datawiki["msgstotal"]

+ datawiki["msgs40%"] = 100 * datawiki["msgs40"] / datawiki["msgstotal"]

+ datawiki["msgsrest%"] = 100 * datawiki["msgsrest"] / datawiki["msgstotal"]

+ 

+ 

+ m.rcParams["legend.frameon"] = True

+ graph = (

+     datawiki[["msgs1%", "msgs9%", "msgs40%", "msgsrest%"]]

+     .rename(

+         columns={

+             "msgs1%": "Top 1%",

+             "msgs9%": "Top 9%",

+             "msgs40%": "Top 40%",

+             "msgsrest%": "Remaining 50%",

+         }

+     )

+     .plot.area(

+         figsize=(16, 9),

+         color=["#579d1c", "#ffd320", "#ff420e", "#004586"],

+         grid=True,

+         ylim=(0, 100),

+     )

+ )

+ plt.suptitle(

+     "Percent of Wiki Edits Each Week From Each Activity Level Group", fontsize=24

+ )

+ graph.set_title("", fontsize=16)

+ graph.set_xlabel("")

+ 

+ fig = graph.get_figure()

+ fig.savefig("images/wiki.activity.share.svg", dpi=300)

  

  ###############################################

  

- #graph=datawiki[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),

+ # graph=datawiki[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),

  #                                                              color='#579d1c',

  #                                                              grid=True,legend=False)

- #plt.suptitle("New Wiki Contributor Count Per Week",fontsize=24)

- #graph.set_title('')

- #graph.set_xlabel('')

- #fig=graph.get_figure()

- #fig.savefig('images/wiki.newusers.svg',dpi=300)

+ # plt.suptitle("New Wiki Contributor Count Per Week",fontsize=24)

+ # graph.set_title('')

+ # graph.set_xlabel('')

+ # fig=graph.get_figure()

+ # fig.savefig('images/wiki.newusers.svg',dpi=300)

  ###############################################

  

- #graph=datawiki[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),

+ # graph=datawiki[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),

  #                                                              color='#579d1c',

  #                                                              grid=True,legend=False)

- #plt.suptitle("New Wiki Contributor Count Per Week",fontsize=24)

- #graph.set_title('')

- #graph.set_xlabel('')

- #fig=graph.get_figure()

- #fig.savefig('images/wiki.newusers.svg',dpi=300)

+ # plt.suptitle("New Wiki Contributor Count Per Week",fontsize=24)

+ # graph.set_title('')

+ # graph.set_xlabel('')

+ # fig=graph.get_figure()

+ # fig.savefig('images/wiki.newusers.svg',dpi=300)

  

  #############################################

  

- #datawiki['newuseractions%']=100*datawiki['newuseractions']/datawiki['msgstotal']

- #datawiki['monthuseractions%']=100*datawiki['monthuseractions']/datawiki['msgstotal']

- #datawiki['yearuseractions%']=100*datawiki['yearuseractions']/datawiki['msgstotal']

- #datawiki['olderuseractions%']=100*datawiki['olderuseractions']/datawiki['msgstotal']

- 

+ # datawiki['newuseractions%']=100*datawiki['newuseractions']/datawiki['msgstotal']

+ # datawiki['monthuseractions%']=100*datawiki['monthuseractions']/datawiki['msgstotal']

+ # datawiki['yearuseractions%']=100*datawiki['yearuseractions']/datawiki['msgstotal']

+ # datawiki['olderuseractions%']=100*datawiki['olderuseractions']/datawiki['msgstotal']

  

  

- 

- #m.rcParams['legend.frameon'] = True

- #graph=datawiki[['newuseractions%','monthuseractions%','yearuseractions%','olderuseractions%']][42:].rename(columns={"newuseractions%": "New This Week","monthuseractions%":"New This Month","yearuseractions%":"New This Year","olderuseractions%":"Old School"}).plot.area(figsize=(16, 9),

+ # m.rcParams['legend.frameon'] = True

+ # graph=datawiki[['newuseractions%','monthuseractions%','yearuseractions%','olderuseractions%']][42:]

+ # .rename(columns={"newuseractions%": "New This Week","monthuseractions%":"New This Month",

+ # "yearuseractions%":"New This Year","olderuseractions%":"Old School"}).plot.area(figsize=(16, 9),

  #                                                              color=['#579d1c','#ffd320', '#ff420e', '#004586' ],

  #                                                              grid=True,ylim=(0,100))

- #plt.suptitle("Percent of Wiki Edits Each Week By Time Since Editor's First Edit",fontsize=24)

- #graph.set_title("",fontsize=16)

- #graph.set_xlabel('')

+ # plt.suptitle("Percent of Wiki Edits Each Week By Time Since Editor's First Edit",fontsize=24)

+ # graph.set_title("",fontsize=16)

+ # graph.set_xlabel('')

  #

- #fig=graph.get_figure()

- #fig.savefig('images/wiki.activity.length.svg',dpi=300)

+ # fig=graph.get_figure()

+ # fig.savefig('images/wiki.activity.length.svg',dpi=300)

  

  ###############################################

  ###############################################

- datapagure=pandas.read_csv("data/io.pagure.prod.pagure.git.receive.bucketed-activity.csv",parse_dates=[0])

- datapagure.set_index('weekstart',inplace=True)

- 

- graph=datapagure[['users1','users9','users40','userrest']].rename(columns={"users1": "Top 1%","users9":"Top 9%","users40":"Top 40%","userrest":"Remaining 50%"}).plot.area(figsize=(16, 9),

-                                                               color=['#579d1c','#ffd320', '#ff420e', '#004586' ],

-                                                               grid=True,yticks=range(0,25,5))

- #graph.legend(ncol=4)

+ datapagure = pandas.read_csv(

+     "data/io.pagure.prod.pagure.git.receive.bucketed-activity.csv", parse_dates=[0]

+ )

+ datapagure.set_index("weekstart", inplace=True)

+ 

+ graph = (

+     datapagure[["users1", "users9", "users40", "userrest"]]

+     .rename(

+         columns={

+             "users1": "Top 1%",

+             "users9": "Top 9%",

+             "users40": "Top 40%",

+             "userrest": "Remaining 50%",

+         }

+     )

+     .plot.area(

+         figsize=(16, 9),

+         color=["#579d1c", "#ffd320", "#ff420e", "#004586"],

+         grid=True,

+         yticks=range(0, 25, 5),

+     )

+ )

+ # graph.legend(ncol=4)

  # totally abusing this.

- plt.suptitle("Number of Contributors Making Commits to Pagure Each Week",fontsize=24)

- graph.set_title("Grouped by Quarterly Activity Level of Each Contributor",fontsize=16)

- graph.set_xlabel('')

- fig=graph.get_figure()

- fig.savefig('images/pagure.user.count.svg',dpi=300)

+ plt.suptitle("Number of Contributors Making Commits to Pagure Each Week", fontsize=24)

+ graph.set_title("Grouped by Quarterly Activity Level of Each Contributor", fontsize=16)

+ graph.set_xlabel("")

+ fig = graph.get_figure()

+ fig.savefig("images/pagure.user.count.svg", dpi=300)

  

  #############################################

  

- datapagure['msgstotal']=datapagure[['msgs1','msgs9','msgs40','msgsrest']].sum(1)

- datapagure['msgs1%']=100*datapagure['msgs1']/datapagure['msgstotal']

- datapagure['msgs9%']=100*datapagure['msgs9']/datapagure['msgstotal']

- datapagure['msgs40%']=100*datapagure['msgs40']/datapagure['msgstotal']

- datapagure['msgsrest%']=100*datapagure['msgsrest']/datapagure['msgstotal']

- 

- 

- 

- 

- m.rcParams['legend.frameon'] = True

- graph=datapagure[['msgs1%','msgs9%','msgs40%','msgsrest%']].rename(columns={"msgs1%": "Top 1%","msgs9%":"Top 9%","msgs40%":"Top 40%","msgsrest%":"Remaining 50%"}).plot.area(figsize=(16, 9),

-                                                               color=['#579d1c','#ffd320', '#ff420e', '#004586' ],

-                                                               grid=True,ylim=(0,100))

- plt.suptitle("Percent of Pagure Commits Each Week From Each Activity Level Group",fontsize=24)

- graph.set_title("",fontsize=16)

- graph.set_xlabel('')

- 

- fig=graph.get_figure()

- fig.savefig('images/pagure.activity.share.svg',dpi=300)

+ datapagure["msgstotal"] = datapagure[["msgs1", "msgs9", "msgs40", "msgsrest"]].sum(1)

+ datapagure["msgs1%"] = 100 * datapagure["msgs1"] / datapagure["msgstotal"]

+ datapagure["msgs9%"] = 100 * datapagure["msgs9"] / datapagure["msgstotal"]

+ datapagure["msgs40%"] = 100 * datapagure["msgs40"] / datapagure["msgstotal"]

+ datapagure["msgsrest%"] = 100 * datapagure["msgsrest"] / datapagure["msgstotal"]

+ 

+ 

+ m.rcParams["legend.frameon"] = True

+ graph = (

+     datapagure[["msgs1%", "msgs9%", "msgs40%", "msgsrest%"]]

+     .rename(

+         columns={

+             "msgs1%": "Top 1%",

+             "msgs9%": "Top 9%",

+             "msgs40%": "Top 40%",

+             "msgsrest%": "Remaining 50%",

+         }

+     )

+     .plot.area(

+         figsize=(16, 9),

+         color=["#579d1c", "#ffd320", "#ff420e", "#004586"],

+         grid=True,

+         ylim=(0, 100),

+     )

+ )

+ plt.suptitle(

+     "Percent of Pagure Commits Each Week From Each Activity Level Group", fontsize=24

+ )

+ graph.set_title("", fontsize=16)

+ graph.set_xlabel("")

+ 

+ fig = graph.get_figure()

+ fig.savefig("images/pagure.activity.share.svg", dpi=300)

  

  ###############################################

  

- #graph=datapagure[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),

+ # graph=datapagure[['newusercount']].rename(columns={"newusercount": "New Users"}).plot.area(figsize=(16, 9),

  #                                                              color='#579d1c',

  #                                                              grid=True,legend=False)

- #plt.suptitle("New Pagure Contributor Count Per Week",fontsize=24)

- #graph.set_title('')

- #graph.set_xlabel('')

- #fig=graph.get_figure()

- #fig.savefig('images/pagure.newusers.svg',dpi=300)

+ # plt.suptitle("New Pagure Contributor Count Per Week",fontsize=24)

+ # graph.set_title('')

+ # graph.set_xlabel('')

+ # fig=graph.get_figure()

+ # fig.savefig('images/pagure.newusers.svg',dpi=300)

  

  #############################################

  

- #datapagure['newuseractions%']=100*datapagure['newuseractions']/datapagure['msgstotal']

- #datapagure['monthuseractions%']=100*datapagure['monthuseractions']/datapagure['msgstotal']

- #datapagure['yearuseractions%']=100*datapagure['yearuseractions']/datapagure['msgstotal']

- #datapagure['olderuseractions%']=100*datapagure['olderuseractions']/datapagure['msgstotal']

- 

- 

+ # datapagure['newuseractions%']=100*datapagure['newuseractions']/datapagure['msgstotal']

+ # datapagure['monthuseractions%']=100*datapagure['monthuseractions']/datapagure['msgstotal']

+ # datapagure['yearuseractions%']=100*datapagure['yearuseractions']/datapagure['msgstotal']

+ # datapagure['olderuseractions%']=100*datapagure['olderuseractions']/datapagure['msgstotal']

  

  

- #m.rcParams['legend.frameon'] = True

- #graph=datapagure[['newuseractions%','monthuseractions%','yearuseractions%','olderuseractions%']][42:].rename(columns={"newuseractions%": "New This Week","monthuseractions%":"New This Month","yearuseractions%":"New This Year","olderuseractions%":"Old School"}).plot.area(figsize=(16, 9),

+ # m.rcParams['legend.frameon'] = True

+ # graph=datapagure[['newuseractions%','monthuseractions%','yearuseractions%','olderuseractions%']][42:]

+ # .rename(columns={"newuseractions%": "New This Week","monthuseractions%":"New This Month",

+ # "yearuseractions%":"New This Year","olderuseractions%":"Old School"}).plot.area(figsize=(16, 9),

  #                                                              color=['#579d1c','#ffd320', '#ff420e', '#004586' ],

  #                                                              grid=True,ylim=(0,100))

- #plt.suptitle("Percent of Pagure Commits Each Week By Time Since Packager's First Action",fontsize=24)

- #graph.set_title("",fontsize=16)

- #graph.set_xlabel('')

+ # plt.suptitle("Percent of Pagure Commits Each Week By Time Since Packager's First Action",fontsize=24)

+ # graph.set_title("",fontsize=16)

+ # graph.set_xlabel('')

  #

- #fig=graph.get_figure()

- #fig.savefig('images/pagure.activity.length.svg',dpi=300)

+ # fig=graph.get_figure()

+ # fig.savefig('images/pagure.activity.length.svg',dpi=300)

file modified
+72 -33
@@ -1,51 +1,90 @@ 

  #!/usr/bin/python3

  import os

- import pandas

+ 

  import matplotlib as m

+ import matplotlib.pyplot as plt

+ import pandas

+ 

  m.use("Agg")

- import matplotlib.pyplot as plt 

- m.rcParams['font.size'] = 12

- m.rcParams['font.family'] = 'Overpass'

- m.rcParams['legend.frameon'] = True

+ 

+ m.rcParams["font.size"] = 12

+ m.rcParams["font.family"] = "Overpass"

+ m.rcParams["legend.frameon"] = True

  

  try:

-     os.makedirs('./images')

+     os.makedirs("./images")

  except OSError:

      pass

  

- data=pandas.read_csv("data/contributor-count.csv",parse_dates=[0])

- data.set_index('weekstart',inplace=True)

- 

+ data = pandas.read_csv("data/contributor-count.csv", parse_dates=[0])

+ data.set_index("weekstart", inplace=True)

  

- graph=data[['oldactive','midactive','newactive']].rename(columns={"oldactive": "Old School","midactive":"Intermediate","newactive":"New Contributors"}).plot.area(figsize=(16, 9),

-                                                               color=[ '#ff420e','#ffd320',  '#579d1c' ], # '#004586'

-                                                               grid=True,stacked=True ,yticks=range(0,451,25))

- data[['rawcount']].rename(columns={"rawcount": "All Contributors\nincluding less active"}).plot(figsize=(16, 9),

-                                                               ax=graph ,yticks=range(0,426,25))

-                                                               

- graph.xaxis.grid(True, which='minor', linestyle='-', linewidth=0.25)

- graph.yaxis.grid(True, which='major', linestyle='-', linewidth=0.25)

  

- plt.suptitle("Fedora Contributors by Week",fontsize=24)

- graph.set_title("Stacked graph of contributors with measured activity each week — and at least four weeks total in the last year.\n“Old school” contributors have been active for longer than two years; new contributors, less than one.\nBlue line shows all contributors active this week regardless of amount of other activity.",fontsize=12)

- graph.set_xlabel('')

+ graph = (

+     data[["oldactive", "midactive", "newactive"]]

+     .rename(

+         columns={

+             "oldactive": "Old School",

+             "midactive": "Intermediate",

+             "newactive": "New Contributors",

+         }

+     )

+     .plot.area(

+         figsize=(16, 9),

+         color=["#ff420e", "#ffd320", "#579d1c"],  # '#004586'

+         grid=True,

+         stacked=True,

+         yticks=range(0, 451, 25),

+     )

+ )

+ data[["rawcount"]].rename(

+     columns={"rawcount": "All Contributors\nincluding less active"}

+ ).plot(figsize=(16, 9), ax=graph, yticks=range(0, 426, 25))

  

- fig=graph.get_figure()

- fig.savefig('images/active-contributors-by-week.svg',dpi=300)

+ graph.xaxis.grid(True, which="minor", linestyle="-", linewidth=0.25)

+ graph.yaxis.grid(True, which="major", linestyle="-", linewidth=0.25)

  

+ plt.suptitle("Fedora Contributors by Week", fontsize=24)

+ graph.set_title(

+     "Stacked graph of contributors with measured activity each week — and at least four weeks total in the last year.\n"

+     "“Old school” contributors have been active for longer than two years; new contributors, less than one.\n"

+     "Blue line shows all contributors active this week regardless of amount of other activity.",

+     fontsize=12,

+ )

+ graph.set_xlabel("")

  

+ fig = graph.get_figure()

+ fig.savefig("images/active-contributors-by-week.svg", dpi=300)

  

  

- graph=data[['oldcore','midcore','newcore']].rename(columns={"oldcore": "Old School","midcore":"Intermediate","newcore":"New Contributors"}).plot.area(figsize=(16, 9),

-                                                               color=[  '#ff420e', '#ffd320', '#579d1c' ], # '#004586'

-                                                               grid=True,stacked=True ,yticks=range(0,101,25))

-                                                               

- graph.xaxis.grid(True, which='minor', linestyle='-', linewidth=0.25)

- graph.yaxis.grid(True, which='major', linestyle='-', linewidth=0.25)

+ graph = (

+     data[["oldcore", "midcore", "newcore"]]

+     .rename(

+         columns={

+             "oldcore": "Old School",

+             "midcore": "Intermediate",

+             "newcore": "New Contributors",

+         }

+     )

+     .plot.area(

+         figsize=(16, 9),

+         color=["#ff420e", "#ffd320", "#579d1c"],  # '#004586'

+         grid=True,

+         stacked=True,

+         yticks=range(0, 101, 25),

+     )

+ )

  

- plt.suptitle("Core Fedora Contributors by Week",fontsize=24)

- graph.set_title("Stacked graph of contributors with measured activity this week — and at least four weeks total in the last year.\nOld school contributors have been active for longer than two years; new contributors, less than one.\n“Core” means part of the set doing about ⅔s of all actions over the past year.",fontsize=12)

- graph.set_xlabel('')

- fig=graph.get_figure()

- fig.savefig('images/active-core-contributors-by-week.svg',dpi=300)

+ graph.xaxis.grid(True, which="minor", linestyle="-", linewidth=0.25)

+ graph.yaxis.grid(True, which="major", linestyle="-", linewidth=0.25)

  

+ plt.suptitle("Core Fedora Contributors by Week", fontsize=24)

+ graph.set_title(

+     "Stacked graph of contributors with measured activity this week — and at least four weeks total in the last year.\n"

+     "Old school contributors have been active for longer than two years; new contributors, less than one.\n"

+     "“Core” means part of the set doing about ⅔s of all actions over the past year.",

+     fontsize=12,

+ )

+ graph.set_xlabel("")

+ fig = graph.get_figure()

+ fig.savefig("images/active-core-contributors-by-week.svg", dpi=300)

file modified
+160 -134
@@ -10,7 +10,7 @@ 

  # BUT if --csv (or --csvh, for csv with header) is given, it gives the number for _that week only_

  

  firstseen = collections.OrderedDict()

- lastseen  = collections.OrderedDict()

+ lastseen = collections.OrderedDict()

  actioncount = collections.defaultdict(int)

  weeksactive = collections.defaultdict(int)

  oldschoolornew = {}
@@ -18,157 +18,183 @@ 

  

  

  n = len(sys.argv[1:])

- csvoutput=False

+ csvoutput = False

  if n == 0:

-   reportweek = int((datetime.datetime.now()-datetime.datetime.strptime("2012-01-01", "%Y-%m-%d")).days/7)-1

+     reportweek = (

+         int(

+             (

+                 datetime.datetime.now()

+                 - datetime.datetime.strptime("2012-01-01", "%Y-%m-%d")

+             ).days

+             / 7

+         )

+         - 1

+     )

  elif n == 1:

-    reportweek=int(sys.argv[1])

+     reportweek = int(sys.argv[1])

  elif sys.argv[1] == "--csv":

-    reportweek=int(sys.argv[2])

-    csvoutput=True

-    csvheader=False

+     reportweek = int(sys.argv[2])

+     csvoutput = True

+     csvheader = False

  elif sys.argv[1] == "--csvh":

-    reportweek=int(sys.argv[2])

-    csvoutput=True

-    csvheader=True

+     reportweek = int(sys.argv[2])

+     csvoutput = True

+     csvheader = True

  else:

-   sys.exit(1)

+     sys.exit(1)

  

  

- 

- reporttime = datetime.datetime.strptime("2012-01-01", "%Y-%m-%d") + datetime.timedelta(days=reportweek*7+6)

+ reporttime = datetime.datetime.strptime("2012-01-01", "%Y-%m-%d") + datetime.timedelta(

+     days=reportweek * 7 + 6

+ )

  # 52 weeks is precise enough for metrics gathering :)

  lastyear = reporttime - datetime.timedelta(364)

  twoyears = reporttime - datetime.timedelta(728)

-    

- weeks = range(reportweek-51,reportweek+1)

  

- datasources = ( "org.fedoraproject.prod.bodhi.update.comment",

-                 "org.fedoraproject.prod.git.receive",

-                 "org.fedoraproject.prod.irc.karma",

-                 "org.fedoraproject.prod.wiki.article.edit",

-                 "org.fedoraproject.prod.infragit.receive"

-                 )

+ weeks = range(reportweek - 51, reportweek + 1)

+ 

+ datasources = (

+     "org.fedoraproject.prod.bodhi.update.comment",

+     "org.fedoraproject.prod.git.receive",

+     "org.fedoraproject.prod.irc.karma",

+     "org.fedoraproject.prod.wiki.article.edit",

+     "org.fedoraproject.prod.infragit.receive",

+ )

  

  for datasource in datasources:

-   for week in weeks:

-     try:

-       datafragment=pandas.read_csv("data/weekly/{}.userdata.{:05}.csv".format(datasource,week),parse_dates=[2,3])

-     except FileNotFoundError:

-       # ignore missing data.... probably should errror on _everything_ missing (FIXME)

-       continue

-       

-     for index, row in datafragment.iterrows():

-       user=row['user']

- 

-       totalactions += row['actions']

-       actioncount[user]+=row['actions']

- 

-       if not user in weeksactive:

-         weeksactive[user]=set()

-       weeksactive[user].add(week)

- 

-       if not user in firstseen:

-         firstseen[user]=row['firstseen']

-       else:

-         if row['firstseen'] < firstseen[user]:

-           firstseen[user]=row['firstseen']

- 

-       if not user in lastseen:

-         lastseen[user]=row['lastseen']

-       else:

-         if row['lastseen'] < lastseen[user]:

-           lastseen[user]=row['lastseen']

-           

-       if firstseen[user] < twoyears:

-         oldschoolornew[user]="old-school"

-       elif firstseen[user] >= lastyear:

-         oldschoolornew[user]="new contributor"

-       else:

-         oldschoolornew[user]=""

- 

- rawcount=0

- oldcount=0

- midcount=0

- newcount=0

- allactive=0

- 

- accumulator=0

- topusers=[]

+     for week in weeks:

+         try:

+             datafragment = pandas.read_csv(

+                 "data/weekly/{}.userdata.{:05}.csv".format(datasource, week),

+                 parse_dates=[2, 3],

+             )

+         except FileNotFoundError:

+             # ignore missing data.... probably should errror on _everything_ missing (FIXME)

+             continue

+ 

+         for index, row in datafragment.iterrows():

+             user = row["user"]

+ 

+             totalactions += row["actions"]

+             actioncount[user] += row["actions"]

+ 

+             if user not in weeksactive:

+                 weeksactive[user] = set()

+             weeksactive[user].add(week)

+ 

+             if user not in firstseen:

+                 firstseen[user] = row["firstseen"]

+             else:

+                 if row["firstseen"] < firstseen[user]:

+                     firstseen[user] = row["firstseen"]

+ 

+             if user not in lastseen:

+                 lastseen[user] = row["lastseen"]

+             else:

+                 if row["lastseen"] < lastseen[user]:

+                     lastseen[user] = row["lastseen"]

+ 

+             if firstseen[user] < twoyears:

+                 oldschoolornew[user] = "old-school"

+             elif firstseen[user] >= lastyear:

+                 oldschoolornew[user] = "new contributor"

+             else:

+                 oldschoolornew[user] = ""

+ 

+ rawcount = 0

+ oldcount = 0

+ midcount = 0

+ newcount = 0

+ allactive = 0

+ 

+ accumulator = 0

+ topusers = []

  for user in sorted(actioncount, key=actioncount.get, reverse=True):

-   accumulator+=actioncount[user]

-   topusers.append(user)

-   #print("{:20} {}".format(user,oldschoolornew[user]))

-   if accumulator>totalactions*2.0/3:

-     break

-   

- newcore=0  

- midcore=0

- oldcore=0

+     accumulator += actioncount[user]

+     topusers.append(user)

+     # print("{:20} {}".format(user,oldschoolornew[user]))

+     if accumulator > totalactions * 2.0 / 3:

+         break

+ 

+ newcore = 0

+ midcore = 0

+ oldcore = 0

  

  for user in oldschoolornew:

  

-   # in csv mode, only report on activity *this* week

-   if csvoutput and not reportweek in weeksactive[user]:

-     continue

- 

-   rawcount+=1

- 

-   # only count users who are active

-   # at least 4 distinct weeks in the past year

-   if len(weeksactive[user]) < 4:

-     continue

- 

-     

-   allactive+=1

-   if oldschoolornew[user] == "old-school":

-     oldcount+=1

-     if user in topusers:

-       oldcore+=1

-   elif oldschoolornew[user] == "new contributor":

-     newcount+=1

-     if user in topusers:

-       newcore+=1

-   else:

-     midcount+=1

-     if user in topusers:

-       midcore+=1

-   

+     # in csv mode, only report on activity *this* week

+     if csvoutput and reportweek not in weeksactive[user]:

+         continue

+ 

+     rawcount += 1

+ 

+     # only count users who are active

+     # at least 4 distinct weeks in the past year

+     if len(weeksactive[user]) < 4:

+         continue

+ 

+     allactive += 1

+     if oldschoolornew[user] == "old-school":

+         oldcount += 1

+         if user in topusers:

+             oldcore += 1

+     elif oldschoolornew[user] == "new contributor":

+         newcount += 1

+         if user in topusers:

+             newcore += 1

+     else:

+         midcount += 1

+         if user in topusers:

+             midcore += 1

+ 

  

  if csvoutput:

-   if csvheader:

-     print("weekstart,rawcount,oldactive,midactive,newactive,oldcore,midcore,newcore")

-   print("{0:%Y-%m-%d}".format(reporttime),rawcount,

-         oldcount,midcount,newcount,

-         oldcore,midcore,newcore,

-         sep=",")

-   sys.exit(0)

- 

-       

- print ("Report for year ending {:%Y-%m-%d} (through week #{}):".format(reporttime,reportweek))

- print ("")

- print ("Raw total contributors:        {:>5}".format(rawcount))   

- print ("Total active contributors:     {:>5}".format(allactive))   

- print ("Core contributors (⅔ actions)  {:>5}".format(len(topusers)))

- print ("")

- print ("Old-school contributors:       {:>5}".format(oldcount))

- print ("Intermediate contributors:     {:>5}".format(midcount))

- print ("New contributors this year:    {:>5}".format(newcount))

- print ("")

- print ("Old core contributors:         {:>5}".format(oldcore))

- print ("Intermediate core contributors:{:>5}".format(midcore))

- print ("New core contributors:         {:>5}".format(newcore))

- print ("\n")

- print ("This report is an aggregate of dist-git, bodhi karma, wiki edits,")

- print ("infra git, and irc cookies. It doesn't measure all Fedora activity.")

- print ("")

- print ("Active means at least four separate weeks of activity.")

- print ("Core means part of the set doing about ⅔s of all actions.")

- print ("Old-school contributors started at least two years (104 weeks) ago.")

- print ("New contributors are new in the past 52 weeks.")

- print ("Note that by this metric, \"mattdm\" is not a core contributor.")

- 

- print ("\n-------------------------------------------\n")

+     if csvheader:

+         print(

+             "weekstart,rawcount,oldactive,midactive,newactive,oldcore,midcore,newcore"

+         )

+     print(

+         "{0:%Y-%m-%d}".format(reporttime),

+         rawcount,

+         oldcount,

+         midcount,

+         newcount,

+         oldcore,

+         midcore,

+         newcore,

+         sep=",",

+     )

+     sys.exit(0)

+ 

+ 

+ print(

+     "Report for year ending {:%Y-%m-%d} (through week #{}):".format(

+         reporttime, reportweek

+     )

+ )

+ print("")

+ print("Raw total contributors:        {:>5}".format(rawcount))

+ print("Total active contributors:     {:>5}".format(allactive))

+ print("Core contributors (⅔ actions)  {:>5}".format(len(topusers)))

+ print("")

+ print("Old-school contributors:       {:>5}".format(oldcount))

+ print("Intermediate contributors:     {:>5}".format(midcount))

+ print("New contributors this year:    {:>5}".format(newcount))

+ print("")

+ print("Old core contributors:         {:>5}".format(oldcore))

+ print("Intermediate core contributors:{:>5}".format(midcore))

+ print("New core contributors:         {:>5}".format(newcore))

+ print("\n")

+ print("This report is an aggregate of dist-git, bodhi karma, wiki edits,")

+ print("infra git, and irc cookies. It doesn't measure all Fedora activity.")

+ print("")

+ print("Active means at least four separate weeks of activity.")

+ print("Core means part of the set doing about ⅔s of all actions.")

+ print("Old-school contributors started at least two years (104 weeks) ago.")

+ print("New contributors are new in the past 52 weeks.")

+ print('Note that by this metric, "mattdm" is not a core contributor.')

+ 

+ print("\n-------------------------------------------\n")

  

  for user in topusers:

-   print("{:20} {}".format(user,oldschoolornew[user]))

+     print("{:20} {}".format(user, oldschoolornew[user]))

file modified
+6 -7
@@ -1,6 +1,7 @@ 

  import requests

  

- url = 'https://apps.fedoraproject.org/datagrepper/raw'

+ url = "https://apps.fedoraproject.org/datagrepper/raw"

+ 

  

  def grep(tries=0, **kwargs):

      response = requests.get(url, params=kwargs)
@@ -11,15 +12,15 @@ 

              yield item

  

      data = response.json()

-     pages = data['pages']

+     pages = data["pages"]

  

-     for message in data['raw_messages']:

+     for message in data["raw_messages"]:

          yield message

  

      for page in range(1, pages):

          for attempt in range(20):

              try:

-                 kwargs['page'] = page

+                 kwargs["page"] = page

                  response = requests.get(url, params=kwargs)

                  try:

                      data = response.json()
@@ -35,7 +36,5 @@ 

                  break

          else:

              raise ValueError("Ran out of retries")

-         for message in data.get('raw_messages', []):

+         for message in data.get("raw_messages", []):

              yield message

- 

- 

file modified
+245 -177
@@ -4,9 +4,10 @@ 

  #

  # output: a CSV file with fields:

  #

- # date, msgs1, msgs9, msgs40, msgsrest, users1, users9, users40, userrest, newusers, actionsnew, actionsmonth, actionsyear, actionsolder, newspammers, spamactions, botactions, relengactions

+ # date, msgs1, msgs9, msgs40, msgsrest, users1, users9, users40, userrest, newusers, actionsnew, actionsmonth,

+ # actionsyear, actionsolder, newspammers, spamactions, botactions, relengactions

  #

- # where and 1, 9, 40, rest correspond to activity from the cohort of 

+ # where and 1, 9, 40, rest correspond to activity from the cohort of

  # users in the top 1%, next 9%, next 40% or rest in that quarter (where

  # quarter is a sliding 13-week window) and users is the count of users in

  # that cohort that week while msgs is overall work. display the user count
@@ -18,202 +19,214 @@ 

  #

  # todo: create those graphs here in addition to CSV

  

- import utils

- 

- import fedmsg.meta

- import fedmsg.config

- config = fedmsg.config.load_config(filenames=['fedmsgconfig.py'])

- fedmsg.meta.make_processors(**config)

  

- 

- 

- import time

+ import collections

  import datetime

  import logging

  import os

+ import pickle

+ import re

  import sys

+ import time

  

- import string

- import re

+ import fedmsg.meta

  

- import collections

- import pprint

+ import utils

  

- import pickle

+ config = fedmsg.config.load_config(filenames=["fedmsgconfig.py"])

+ fedmsg.meta.make_processors(**config)

  

- #logging.basicConfig(level=logging.DEBUG)

+ # logging.basicConfig(level=logging.DEBUG)

  logging.basicConfig(level=logging.ERROR)

  

  

  class TimeoutError(Exception):

      """too much timeout"""

+ 

      pass

  

+ 

  class InvalidDiscriminantError(Exception):

      """invalid discriminant"""

+ 

      pass

  

  

- spammers = [line.rstrip('\n') for line in open('badpeople.list')]

- bots     = [line.rstrip('\n') for line in open('bots.list')]

+ spammers = [line.rstrip("\n") for line in open("badpeople.list")]

+ bots = [line.rstrip("\n") for line in open("bots.list")]

  

  epoch = datetime.datetime.utcfromtimestamp(0)

  

- ipaddrre = re.compile("^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")

+ ipaddrre = re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")

  

  discriminant = sys.argv[-1]

- if __file__.split('/')[-1] in discriminant:

+ if __file__.split("/")[-1] in discriminant:

      print("usage: '$ ./weekly-user-activity.py TOPIC'")

      sys.exit(1)

-     

- if not re.match("^[a-z\.]*$", discriminant):

+ 

+ if not re.match(r"^[a-z\.]*$", discriminant):

      print("bad discriminant")

      sys.exit(2)

  

- 

  print("operating with discriminant", discriminant)

  

  verboten = [

-     'org.fedoraproject.prod.buildsys.rpm.sign',

-     'org.fedoraproject.prod.buildsys.repo.init',

-     'org.fedoraproject.prod.buildsys.tag',

-     'org.fedoraproject.prod.buildsys.untag',

+     "org.fedoraproject.prod.buildsys.rpm.sign",

+     "org.fedoraproject.prod.buildsys.repo.init",

+     "org.fedoraproject.prod.buildsys.tag",

+     "org.fedoraproject.prod.buildsys.untag",

  ]

  

  try:

-     os.makedirs('./data')

+     os.makedirs("./data")

  except OSError:

      pass

  try:

-     os.makedirs('./data/weekly')

+     os.makedirs("./data/weekly")

  except OSError:

      pass

  try:

-     os.makedirs('./cache')

+     os.makedirs("./cache")

  except OSError:

      pass

-     

-    

- weeknum=0

+ 

+ weeknum = 0

  # the year in which fedmesg starts.

  starttime = datetime.datetime.strptime("2012-01-01", "%Y-%m-%d")

  

+ WeekActions = collections.namedtuple(

+     "WeekActions", ["week", "useractions", "newusers", "actionsbyage", "nonhuman"]

+ )

  

- 

- WeekActions = collections.namedtuple('WeekActions',['week','useractions','newusers','actionsbyage','nonhuman'])

- 

- firstseen={}

- lastseen={}

+ firstseen = {}

+ lastseen = {}

  

  # 13 weeks = 1 quarter (rolling)

- ring        = collections.deque(maxlen=13)

+ ring = collections.deque(maxlen=13)

  

- with open('data/%s.bucketed-activity.csv' % (discriminant), 'w') as bucketcsv:

-     bucketcsv.write("weekstart,msgs1,msgs9,msgs40,msgsrest,users1,users9,users40,userrest,newusercount,newuseractions,monthuseractions,yearuseractions,olderuseractions,newspammers,spamactions,botactions,relengactions\n")

+ with open("data/%s.bucketed-activity.csv" % (discriminant), "w") as bucketcsv:

+     bucketcsv.write(

+         "weekstart,msgs1,msgs9,msgs40,msgsrest,users1,users9,users40,userrest,newusercount,newuseractions,"

+         "monthuseractions,yearuseractions,olderuseractions,newspammers,spamactions,botactions,relengactions\n"

+     )

      bucketcsv.flush()

-     

-     while starttime < datetime.datetime.now() + datetime.timedelta(42): # weeks in the future because see below

-         endtime   = starttime + datetime.timedelta(7)

-         weekinfo  = WeekActions(starttime, collections.Counter(), collections.Counter(), collections.Counter(), collections.Counter())

-         weekbreakdown=collections.Counter()

  

-         print("Working on %s / %s" % (discriminant, starttime.strftime("%Y-%m-%d")),)

+     while starttime < datetime.datetime.now() + datetime.timedelta(

+             42

+     ):  # weeks in the future because see below

+         endtime = starttime + datetime.timedelta(7)

+         weekinfo = WeekActions(

+             starttime,

+             collections.Counter(),

+             collections.Counter(),

+             collections.Counter(),

+             collections.Counter(),

+         )

+         weekbreakdown = collections.Counter()

+ 

+         print(

+             "Working on %s / %s" % (discriminant, starttime.strftime("%Y-%m-%d")),

+         )

+ 

+         msgcachefile = (

+                 "cache/" + discriminant + "." + starttime.strftime("%Y-%m-%d") + ".pickle"

+         )

  

-         msgcachefile = "cache/" + discriminant + "." + starttime.strftime("%Y-%m-%d") + ".pickle"

-         

          if os.path.exists(msgcachefile):

  

-           with open(msgcachefile,"r") as msgcache:

-             [firstseen,lastseen,weekinfo,weekbreakdown]=pickle.load(msgcache)

-             print("(cached)")

+             with open(msgcachefile, "r") as msgcache:

+                 [firstseen, lastseen, weekinfo, weekbreakdown] = pickle.load(msgcache)

+                 print("(cached)")

  

          else:

-         

-           for attempt in range(10):

-               try:

-                   messages = utils.grep(

-                       rows_per_page=100,

-                       meta='usernames',

-                       start=int((starttime-epoch).total_seconds()),

-                       end=int((endtime - epoch).total_seconds()),

-                       order='asc',  # Start at the beginning, end at now.

-                       topic=discriminant,

-                       # Cut this stuff out, because its just so spammy.

-                       not_user=['anonymous','koschei'],

-                       not_topic=verboten,

-                   )

-               except IOError:

-                   print("Retrying.")

-                   time.sleep(5)

-               else:

-                   break

-           else:

-               raise TimeoutError("too much timeout")

- 

-           for i, msg in enumerate(messages):

-               # sanity check

-               if msg['topic'] in verboten:

-                   raise InvalidDiscriminantError("hell")

- 

-               for user in msg['meta']['usernames']:

-                  if user == 'releng':

-                      weekinfo.nonhuman['relengactions'] +=1

-                      continue

-                  if user in bots:

-                      weekinfo.nonhuman['botactions'] +=1

-                      continue

-                  if user in spammers:

-                      weekinfo.nonhuman['spamactions'] +=1

-                      if not user in firstseen:

-                          firstseen[user]=datetime.datetime.fromtimestamp(msg['timestamp'])

-                          weekinfo.nonhuman['newspammers'] +=1

-                      continue

-                  if '@' in user:

-                      # some msgs put email for anon users

-                      continue

-                  if ipaddrre.match(user):

-                      # some msgs (wiki) put ip addr for anon users

-                      continue

-                   

-                  weekinfo.useractions[user] += 1

-                  weekbreakdown[user] += 1

-                  

-                  if not user in firstseen:

-                     firstseen[user]=datetime.datetime.fromtimestamp(msg['timestamp'])

-                      

-                  if (starttime - firstseen[user]).days < 7:

-                      weekinfo.actionsbyage['new'] += 1

-                  elif (starttime - firstseen[user]).days < 31:

-                      weekinfo.actionsbyage['month'] += 1

-                  elif (starttime - firstseen[user]).days < 365:

-                      weekinfo.actionsbyage['year'] += 1

-                  else:

-                      weekinfo.actionsbyage['older'] += 1

-                  

-                  lastseen[user]=datetime.datetime.fromtimestamp(msg['timestamp'])

- 

-               

-               if i % 50 == 0:

-                   sys.stdout.write(".")

-                   sys.stdout.flush()

-            

-           print()      

-           #pprint.pprint(dict(weekinfo.useractions))

-          

-           # don't cache the current week (may not be comlete), and definitely

-           # don't cache the future weeks (certainly not complete)

-           if endtime < (datetime.datetime.now() - datetime.timedelta(1)) :

-               sys.stdout.write("Saving... ")

-               sys.stdout.flush()

-               with open(msgcachefile+".temp","wb") as msgcache:

-                   pickle.dump((firstseen,lastseen,weekinfo,weekbreakdown),msgcache)

-               os.rename(msgcachefile+".temp",msgcachefile)

-               print("saved.")

  

+             for attempt in range(10):

+                 try:

+                     messages = utils.grep(

+                         rows_per_page=100,

+                         meta="usernames",

+                         start=int((starttime - epoch).total_seconds()),

+                         end=int((endtime - epoch).total_seconds()),

+                         order="asc",  # Start at the beginning, end at now.

+                         topic=discriminant,

+                         # Cut this stuff out, because its just so spammy.

+                         not_user=["anonymous", "koschei"],

+                         not_topic=verboten,

+                     )

+                 except IOError:

+                     print("Retrying.")

+                     time.sleep(5)

+                 else:

+                     break

+             else:

+                 raise TimeoutError("too much timeout")

+ 

+             for i, msg in enumerate(messages):

+                 # sanity check

+                 if msg["topic"] in verboten:

+                     raise InvalidDiscriminantError("hell")

+ 

+                 for user in msg["meta"]["usernames"]:

+                     if user == "releng":

+                         weekinfo.nonhuman["relengactions"] += 1

+                         continue

+                     if user in bots:

+                         weekinfo.nonhuman["botactions"] += 1

+                         continue

+                     if user in spammers:

+                         weekinfo.nonhuman["spamactions"] += 1

+                         if user not in firstseen:

+                             firstseen[user] = datetime.datetime.fromtimestamp(

+                                 msg["timestamp"]

+                             )

+                             weekinfo.nonhuman["newspammers"] += 1

+                         continue

+                     if "@" in user:

+                         # some msgs put email for anon users

+                         continue

+                     if ipaddrre.match(user):

+                         # some msgs (wiki) put ip addr for anon users

+                         continue

+ 

+                     weekinfo.useractions[user] += 1

+                     weekbreakdown[user] += 1

+ 

+                     if user not in firstseen:

+                         firstseen[user] = datetime.datetime.fromtimestamp(

+                             msg["timestamp"]

+                         )

+ 

+                     if (starttime - firstseen[user]).days < 7:

+                         weekinfo.actionsbyage["new"] += 1

+                     elif (starttime - firstseen[user]).days < 31:

+                         weekinfo.actionsbyage["month"] += 1

+                     elif (starttime - firstseen[user]).days < 365:

+                         weekinfo.actionsbyage["year"] += 1

+                     else:

+                         weekinfo.actionsbyage["older"] += 1

+ 

+                     lastseen[user] = datetime.datetime.fromtimestamp(msg["timestamp"])

+ 

+                 if i % 50 == 0:

+                     sys.stdout.write(".")

+                     sys.stdout.flush()

+ 

+             print()

+             # pprint.pprint(dict(weekinfo.useractions))

+ 

+             # don't cache the current week (may not be comlete), and definitely

+             # don't cache the future weeks (certainly not complete)

+             if endtime < (datetime.datetime.now() - datetime.timedelta(1)):

+                 sys.stdout.write("Saving... ")

+                 sys.stdout.flush()

+                 with open(msgcachefile + ".temp", "wb") as msgcache:

+                     pickle.dump(

+                         (firstseen, lastseen, weekinfo, weekbreakdown), msgcache

+                     )

+                 os.rename(msgcachefile + ".temp", msgcachefile)

+                 print("saved.")

  

          ring.append(weekinfo)

-         

-          

  

          # okay, so, bear with me here. Comments are for explaining confusing

          # conceptual things in code, right? okay, hold on to your seats.
@@ -223,62 +236,117 @@ 

          # gonna write lines from 6 weeks earlier, because finally we have the

          # needed info. so, we jump back 6 weeks (42 days) from starttime.

          # this is the same as jumping back 7 elements in the deque (if it's that deep)

-         

-         if len(ring)>6: 

+ 

+         if len(ring) > 6:

  

              # first, we're bucketing all the users by percent of activity

-             usertotals=collections.Counter()

+             usertotals = collections.Counter()

              for week in ring:

                  usertotals += week.useractions

              userrank = {}

              userbucket = {}

-             i=len(usertotals)+1

-             for name in sorted(usertotals,key=usertotals.get):

-                userrank[name]=i

-                i-=1

-                if i<len(usertotals)*0.01: # top 1%

-                   userbucket[name]=1

-                elif i<len(usertotals)*0.10: # next 9% (otherwise top 10%)

-                   userbucket[name]=2

-                elif i<len(usertotals)*0.50: # next 40%

-                   userbucket[name]=3

-                else:                        # the bottom half

-                   userbucket[name]=4           

- 

-             workweek=ring[len(ring)-7] # jump back same amount into the deque

+             i = len(usertotals) + 1

+             for name in sorted(usertotals, key=usertotals.get):

+                 userrank[name] = i

+                 i -= 1

+                 if i < len(usertotals) * 0.01:  # top 1%

+                     userbucket[name] = 1

+                 elif i < len(usertotals) * 0.10:  # next 9% (otherwise top 10%)

+                     userbucket[name] = 2

+                 elif i < len(usertotals) * 0.50:  # next 40%

+                     userbucket[name] = 3

+                 else:  # the bottom half

+                     userbucket[name] = 4

+ 

+             workweek = ring[len(ring) - 7]  # jump back same amount into the deque

  

              bucketscores = {}

-             bucketscores[1]=0

-             bucketscores[2]=0

-             bucketscores[3]=0

-             bucketscores[4]=0

+             bucketscores[1] = 0

+             bucketscores[2] = 0

+             bucketscores[3] = 0

+             bucketscores[4] = 0

              bucketcount = {}

-             bucketcount[1]=0

-             bucketcount[2]=0

-             bucketcount[3]=0

-             bucketcount[4]=0

+             bucketcount[1] = 0

+             bucketcount[2] = 0

+             bucketcount[3] = 0

+             bucketcount[4] = 0

  

              for username in workweek.useractions.keys():

-                 bucketscores[userbucket[username]] +=  workweek.useractions[username]

-                 bucketcount[userbucket[username]]  +=  1

-               

-             print("%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d" % (workweek.week.strftime('%Y-%m-%d'), bucketscores[1], bucketscores[2], bucketscores[3], bucketscores[4], bucketcount[1], bucketcount[2], bucketcount[3], bucketcount[4],workweek.newusers['count'],workweek.actionsbyage['new'],workweek.actionsbyage['month'],workweek.actionsbyage['year'],workweek.actionsbyage['older'],workweek.nonhuman['newspammers,'],workweek.nonhuman['spamactions,'], workweek.nonhuman['botactions'], workweek.nonhuman['relengactions']))

- 

-             if any((bucketscores[1], bucketscores[2], bucketscores[3], bucketscores[4])):

-                 bucketcsv.write("%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n" % (workweek.week.strftime('%Y-%m-%d'), bucketscores[1], bucketscores[2], bucketscores[3], bucketscores[4], bucketcount[1], bucketcount[2], bucketcount[3], bucketcount[4],workweek.newusers['count'],workweek.actionsbyage['new'],workweek.actionsbyage['month'],workweek.actionsbyage['year'],workweek.actionsbyage['older'],workweek.nonhuman['newspammers,'],workweek.nonhuman['spamactions,'], workweek.nonhuman['botactions'], workweek.nonhuman['relengactions']))

+                 bucketscores[userbucket[username]] += workweek.useractions[username]

+                 bucketcount[userbucket[username]] += 1

+ 

+             print(

+                 "%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d"

+                 % (

+                     workweek.week.strftime("%Y-%m-%d"),

+                     bucketscores[1],

+                     bucketscores[2],

+                     bucketscores[3],

+                     bucketscores[4],

+                     bucketcount[1],

+                     bucketcount[2],

+                     bucketcount[3],

+                     bucketcount[4],

+                     workweek.newusers["count"],

+                     workweek.actionsbyage["new"],

+                     workweek.actionsbyage["month"],

+                     workweek.actionsbyage["year"],

+                     workweek.actionsbyage["older"],

+                     workweek.nonhuman["newspammers,"],

+                     workweek.nonhuman["spamactions,"],

+                     workweek.nonhuman["botactions"],

+                     workweek.nonhuman["relengactions"],

+                 )

+             )

+ 

+             if any(

+                     (bucketscores[1], bucketscores[2], bucketscores[3], bucketscores[4])

+             ):

+                 bucketcsv.write(

+                     "%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n"

+                     % (

+                         workweek.week.strftime("%Y-%m-%d"),

+                         bucketscores[1],

+                         bucketscores[2],

+                         bucketscores[3],

+                         bucketscores[4],

+                         bucketcount[1],

+                         bucketcount[2],

+                         bucketcount[3],

+                         bucketcount[4],

+                         workweek.newusers["count"],

+                         workweek.actionsbyage["new"],

+                         workweek.actionsbyage["month"],

+                         workweek.actionsbyage["year"],

+                         workweek.actionsbyage["older"],

+                         workweek.nonhuman["newspammers,"],

+                         workweek.nonhuman["spamactions,"],

+                         workweek.nonhuman["botactions"],

+                         workweek.nonhuman["relengactions"],

+                     )

+                 )

                  bucketcsv.flush()

  

- 

          # unless we're in the future, save the weekly userdata csv

          if starttime < datetime.datetime.now():

-             with open('data/weekly/%s.userdata.%05d.csv' % (discriminant,weeknum), 'w') as weekcsv:

-                 weekcsv.write("%s,%s,%s,%s\n" % ("user","actions","firstseen","lastseen"))

+             with open(

+                     "data/weekly/%s.userdata.%05d.csv" % (discriminant, weeknum), "w"

+             ) as weekcsv:

+                 weekcsv.write(

+                     "%s,%s,%s,%s\n" % ("user", "actions", "firstseen", "lastseen")

+                 )

                  for user in sorted(weekbreakdown, key=weekbreakdown.get, reverse=True):

-                     weekcsv.write("%s,%s,%s,%s\n" % (user,weekbreakdown[user],firstseen[user].strftime('%Y-%m-%d'),lastseen[user].strftime('%Y-%m-%d')))

-             print('Wrote data/weekly/%s.userdata.%05d.csv' % (discriminant,weeknum))

+                     weekcsv.write(

+                         "%s,%s,%s,%s\n"

+                         % (

+                             user,

+                             weekbreakdown[user],

+                             firstseen[user].strftime("%Y-%m-%d"),

+                             lastseen[user].strftime("%Y-%m-%d"),

+                         )

+                     )

+             print("Wrote data/weekly/%s.userdata.%05d.csv" % (discriminant, weeknum))

  

          # and loop around

-         starttime=endtime

-         weeknum+=1

- 

-              

\ No newline at end of file

+         starttime = endtime

+         weeknum += 1

Resolves #1

  • Fixed all the code-style errors, according to flake8 output with the default settings.
  • Added new flake8 config file, exclude possibles env/ directories.

I think flake8 would be added in the requirements.txt file, added in PR #9.

Great beautification work and yes flake8 need to be added to requeriements.txt but first, we need to merge this one.

+1 for this merge

Pull-Request has been merged by mattdm

3 years ago