#!/usr/bin/env python3
###################
#
# wp-count_authors.py
#
# Copyright 2019 by Ben Cotton. Licensed under the GNU General Public
# License, version 3: https://www.gnu.org/licenses/gpl-3.0.en.html
#
# This script polls the WordPress API to get a count of posts by author, since
# the WordPress API doesn't provide this directly. The main purpose of this is
# to make it easier to award author badges.
#
###################
import argparse
import json
import requests
import sys
# How many entries per page do we want? WordPress limits this to 100
WP_PER_PAGE = 100
# URL endpoints
WP_POSTS_API_URL = '/wp-json/wp/v2/posts'
WP_USERS_API_URL = '/wp-json/wp/v2/users'
def wp_get(url, page):
'''
Get the output from the WordPress API
Return the total page count and the response
We have to specify a non-default User-Agent because WP Engine gives
HTTP 403 to requests using "python-requests/*"
'''
api_params = {'per_page': WP_PER_PAGE, 'page': page}
if options.after_date:
api_params.update({'after': options.after_date})
if options.before_date:
api_params.update({'before': options.before_date})
try:
if options.verbose: sys.stderr.write("Calling %s\n" % options.host + url)
response = requests.get(options.host + url, \
params=api_params, \
headers={'User-Agent': 'wp-count_authors.py'})
total_pages = int(response.headers['X-WP-TotalPages'])
if options.verbose: sys.stderr.write("\tProcessing page %i of %i\n" % (page, total_pages))
except Exception as e:
print("Well heck! %s" % e)
return (total_pages, response.json())
parser = argparse.ArgumentParser()
parser.add_argument("--host", help="WordPress host to query", \
default="https://communityblog.fedoraproject.org")
parser.add_argument("-v", "--verbose", help="Enable verbose output", \
action="store_true")
parser.add_argument("-a", "--after", help="Only posts after specified date", \
action="store", dest="after_date")
parser.add_argument("-b", "--before", help="Only posts before specified date", \
action="store", dest="before_date")
parser.add_argument("-n", help="Sort output numerically instead of alphabetically", \
action="store_true", dest="sortbycount", default=False)
options = parser.parse_args()
# The posts API requires a full ISO 8061 string, but that's annoying.
# Let's add the time of day unless the user specified one
if options.after_date and 'T' not in options.after_date:
options.after_date += 'T00:00:00'
if options.before_date and 'T' not in options.before_date:
options.before_date += 'T00:00:00'
# Get the posts
total_pages = 2
page = 1
post_count = {}
print("Getting all posts")
while (page <= total_pages):
(total_pages, api_response) = wp_get(WP_POSTS_API_URL, page)
for post in api_response:
post_count[post['author']] = post_count.get(post['author'], 0) + 1
page += 1
# Get the author list
print("Getting all authors")
total_pages = 2
page = 1
authors = {}
while (page <= total_pages):
(total_pages, api_response) = wp_get(WP_USERS_API_URL, page)
for author in api_response:
authors[author['id']] = author['slug']
page += 1
# Map the counts to authors
print("Mapping the post counts to FAS IDs")
posts_authored = {}
for author in authors.keys():
try:
posts_authored[authors[author]] = post_count[author]
except KeyError:
if options.verbose:
sys.stderr.write("Skipping %s because they have no articles\n" % authors[author])
print("+---+---+---+---+---+---+---+")
if options.sortbycount:
sorted_authors = sorted(posts_authored.items(), key=lambda x: x[1], reverse=True)
else:
sorted_authors = posts_authored.items()
for output in sorted_authors:
fasid = output[0]
num_posts = output[1]
print(f"{num_posts}\t{fasid}")