Here’s the Python script I wrote that calculates how many words and articles I’ve written on this website. I run this at the end of the year, change the dates, and use it for my yearly stats posts. No idea if it’s of any interest to anyone, but if you run a WordPress blog it should be pretty plug-in-play by changing the URL, author ID, and dates.
import requests
from bs4 import BeautifulSoup
import re
# Configuration
base_url = "https://YOURWORDPRESSURL/wp-json/wp/v2/posts"
author_id = 1
start_date = "2002-01-01T00:00:00"
end_date = "2024-12-31T23:59:59"
per_page = 100
total_word_count = 0
total_articles = 0
page = 1
def clean_content(content):
# Step 1: Remove HTML tags
soup = BeautifulSoup(content, "html.parser")
plain_text = soup.get_text()
# Step 2: Remove plain-text URLs using regex
url_pattern = r'http[s]?://\S+'
plain_text_no_urls = re.sub(url_pattern, '', plain_text)
return plain_text_no_urls
print("Starting to fetch posts...")
# Fetch posts
while True:
params = {
"author": author_id,
"after": start_date,
"before": end_date,
"per_page": per_page,
"page": page,
}
response = requests.get(base_url, params=params)
# Handle errors
if response.status_code == 400: # Invalid page number
print("No more posts to fetch.")
break
if response.status_code != 200:
print(f"Error: {response.status_code} - {response.text}")
break
posts = response.json()
# Break if no posts on this page
if not posts:
print(f"Completed fetching posts. Total pages processed: {page - 1}")
break
print(f"Fetching page {page} with up to {per_page} posts...")
# Count words and articles
for post in posts:
total_articles += 1
content = post.get("content", {}).get("rendered", "")
cleaned_content = clean_content(content)
total_word_count += len(cleaned_content.split())
page += 1
# Counting feedback
print(f"Counting complete. Processed {total_articles:,} articles.")
# Format word count and article count with commas
formatted_word_count = f"{total_word_count:,}"
formatted_articles = f"{total_articles:,}"
# Output results
print(f"Total articles published: {formatted_articles}")
print(f"Total words written: {formatted_word_count}")