WordPress Words Written Python Script

Terminal

Here’s the Python script I wrote that calculates how many words and articles I’ve written on this website. I run this at the end of the year, change the dates, and use it for my yearly stats posts. No idea if it’s of any interest to anyone, but if you run a WordPress blog it should be pretty plug-in-play by changing the URL, author ID, and dates.

import requests
from bs4 import BeautifulSoup
import re

# Configuration
base_url = "https://YOURWORDPRESSURL/wp-json/wp/v2/posts"
author_id = 1
start_date = "2002-01-01T00:00:00"
end_date = "2024-12-31T23:59:59"
per_page = 100
total_word_count = 0
total_articles = 0
page = 1

def clean_content(content):
	# Step 1: Remove HTML tags
	soup = BeautifulSoup(content, "html.parser")
	plain_text = soup.get_text()

	# Step 2: Remove plain-text URLs using regex
	url_pattern = r'http[s]?://\S+'
	plain_text_no_urls = re.sub(url_pattern, '', plain_text)

	return plain_text_no_urls

print("Starting to fetch posts...")
# Fetch posts
while True:
	params = {
		"author": author_id,
		"after": start_date,
		"before": end_date,
		"per_page": per_page,
		"page": page,
	}
	response = requests.get(base_url, params=params)

	# Handle errors
	if response.status_code == 400:  # Invalid page number
		print("No more posts to fetch.")
		break

	if response.status_code != 200:
		print(f"Error: {response.status_code} - {response.text}")
		break

	posts = response.json()

	# Break if no posts on this page
	if not posts:
		print(f"Completed fetching posts. Total pages processed: {page - 1}")
		break

	print(f"Fetching page {page} with up to {per_page} posts...")

	# Count words and articles
	for post in posts:
		total_articles += 1
		content = post.get("content", {}).get("rendered", "")
		cleaned_content = clean_content(content)
		total_word_count += len(cleaned_content.split())

	page += 1

# Counting feedback
print(f"Counting complete. Processed {total_articles:,} articles.")

# Format word count and article count with commas
formatted_word_count = f"{total_word_count:,}"
formatted_articles = f"{total_articles:,}"

# Output results
print(f"Total articles published: {formatted_articles}")
print(f"Total words written: {formatted_word_count}")