A couple people asked how to see their stats after I talked about it on the Dabs post yesterday. Here's the script, updated to be usable by people who aren't Python nerds.

And if you're not a programmer, maybe wait until some other folks have looked over the code to make sure it's not doing anything funky. I've tested this and didn't put anything malicious in there, but it's still good practice.

For comparison, National Novel Writing Month entails writing 50,000 words at an average rate of 1,667/day.

"""
This script displays a wordcount for all your comments & posts on Hexbear

If you don't know how to run a Python script, you can paste the entire thing
into an online compiler since running unvetted scripts from internet
strangers is a bad idea.

STEP 1) Go here: https://www.programiz.com/python-programming/online-compiler/
STEP 2) Copy-paste this entire script into the box
STEP 3) Replace "liberal" with your username. Do not remove quotation marks
STEP 4) Click the Run button
"""


USERNAME = "liberal"


# If you're not a programmer, just ignore the rest of the file


import json
from datetime import datetime
from string import punctuation
from urllib.request import Request, urlopen as fetch
from urllib.parse import urlparse, urlencode, urlunparse
from urllib.error import HTTPError


# Some people will include the u/ or /u/ prefix
if USERNAME.startswith('/'):
    USERNAME = USERNAME[1:]
if USERNAME.startswith('u/'):
    USERNAME = USERNAME[2:]


def word_list(s: str) -> list:
    """Naively split a string into words. May have weird edge cases with Markdown and doesn't ignore quotes"""
    mapping = str.maketrans('', '', punctuation)
    no_punctuation = s.translate(mapping)
    return no_punctuation.split()


def fetch_json(url: str):
    """Fetch/parse json response at url"""
    request = Request(url, headers={
        'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
        'Content-Type': 'application/json'
    })
    try:
        response = fetch(request)
    except HTTPError as error:
        message = error.fp.read()
        data = (
          json.loads(message) if message.startswith(b'{')
          else {'error': str(message, encoding='utf-8')} if len(message) > 0
          else {'error': 'unknown error'}
        )
        return {'success': False, 'data': data}
    else:
        data = json.load(response)
        return {'success': True, 'data': data}


def build_url(*, username: str):
    """Build API url for fetching user details"""

    base_url = 'https://hexbear.net/'
    path = 'api/v1/user'
    query = {
        'username': username,
        'sort': 'New',
        'limit': 30000,
        'saved_only': 'false'  # Why on earth would Python not encode False with a lowercase f by default?
    }
    return urlunparse(urlparse(base_url)._replace(
        path=path,
        query=urlencode(query)
    ))


def main():
    url = build_url(username=USERNAME)
    response = fetch_json(url)
    success, data = response['success'], response['data']
    if not success:
        return print('An error occured: {}'.format(data['error']))
    
    comments = [c['content'] for c in data['comments']]
    posts = [f"{p['name']} {p['body']}" for p in data['posts']]
    activities = comments + posts
    words = [word for activity in activities for word in word_list(activity)]

    total_count = len(words)
    unique_count = len(set(words))
    account_created = data['user']['published'][:10]
    account_created = datetime.strptime(account_created, '%Y-%m-%d').date()
    days_lapsed = (datetime.now().date() - account_created).days
    print(f'/u/{USERNAME} has written {total_count:,} ({unique_count:,} unique) words since {account_created}.')
    print(f"That's an average of {total_count // days_lapsed:,} per day.")


if __name__ == '__main__':
    main()
      • marxisthayaca [he/him,they/them]
        ·
        3 年前

        I post a lot of book club announcements and those are a lot of stuff to write. Technically I can't claim all the credit for a lot of that.

        • KermitTheFraud [they/them]
          hexagon
          ·
          3 年前

          That’s fair. I was originally going to try to exclude quoted text but I don’t think it would have been worth the time honestly