Blob Blame History Raw
#!/usr/bin/env python3

###################
#
# wp-count_authors.py
#
#   Copyright 2019 by Ben Cotton. Licensed under the GNU General Public
#      License, version 3: https://www.gnu.org/licenses/gpl-3.0.en.html
#
# This script polls the WordPress API to get a count of posts by author, since
# the WordPress API doesn't provide this directly. The main purpose of this is
# to make it easier to award author badges.
#
###################

import argparse
import json
import requests
import sys

# How many entries per page do we want? WordPress limits this to 100
WP_PER_PAGE = 100
# URL endpoints
WP_POSTS_API_URL = '/wp-json/wp/v2/posts'
WP_USERS_API_URL = '/wp-json/wp/v2/users'


def wp_get(url, page):
    '''
    Get the output from the WordPress API
    Return the total page count and the response

    We have to specify a non-default User-Agent because WP Engine gives
    HTTP 403 to requests using "python-requests/*"
    '''
    api_params = {'per_page': WP_PER_PAGE, 'page': page}
    if options.after_date:
        api_params.update({'after': options.after_date})
    if options.before_date:
        api_params.update({'before': options.before_date})
    try:
        if options.verbose: sys.stderr.write("Calling %s\n" % options.host + url)
        response = requests.get(options.host + url, \
            params=api_params, \
            headers={'User-Agent': 'wp-count_authors.py'})
        total_pages = int(response.headers['X-WP-TotalPages'])
        if options.verbose: sys.stderr.write("\tProcessing page %i of %i\n" % (page, total_pages))
    except Exception as e:
        print("Well heck! %s" % e)
    
    return (total_pages, response.json())

parser = argparse.ArgumentParser()
parser.add_argument("--host", help="WordPress host to query", \
    default="https://communityblog.fedoraproject.org")
parser.add_argument("-v", "--verbose", help="Enable verbose output", \
    action="store_true")
parser.add_argument("-a", "--after", help="Only posts after specified date", \
    action="store", dest="after_date")
parser.add_argument("-b", "--before", help="Only posts before specified date", \
    action="store", dest="before_date")
parser.add_argument("-n", help="Sort output numerically instead of alphabetically", \
    action="store_true", dest="sortbycount", default=False)
options = parser.parse_args()

# The posts API requires a full ISO 8061 string, but that's annoying.
# Let's add the time of day unless the user specified one
if options.after_date and 'T' not in options.after_date:
    options.after_date += 'T00:00:00'
if options.before_date and 'T' not in options.before_date:
    options.before_date += 'T00:00:00'

# Get the posts
total_pages = 2
page = 1
post_count = {}
print("Getting all posts")
while (page <= total_pages):
    (total_pages, api_response) = wp_get(WP_POSTS_API_URL, page)
    for post in api_response:
        post_count[post['author']] = post_count.get(post['author'], 0) + 1
    page += 1

# Get the author list
print("Getting all authors")
total_pages = 2
page = 1
authors = {}
while (page <= total_pages):
    (total_pages, api_response) = wp_get(WP_USERS_API_URL, page)
    for author in api_response:
        authors[author['id']] = author['slug']
    page += 1

# Map the counts to authors
print("Mapping the post counts to FAS IDs")
posts_authored = {}
for author in authors.keys():
    try:
        posts_authored[authors[author]] = post_count[author]
    except KeyError:
        if options.verbose:
            sys.stderr.write("Skipping %s because they have no articles\n" % authors[author])
        
print("+---+---+---+---+---+---+---+")
if options.sortbycount:
    sorted_authors = sorted(posts_authored.items(), key=lambda x: x[1], reverse=True)
else:
    sorted_authors = posts_authored.items()

for output in sorted_authors:
    fasid = output[0]
    num_posts = output[1]
    print(f"{num_posts}\t{fasid}")