from config import config import model import os from comment_tools import safeparsename import urlparse import md5 def shorten(uri): # Break apart the URI according to RFC 3986 parts = urlparse.urlsplit(uri) # Create a list of the host name, the components of the path, and themd5 hash of the whole URI simple_name = [parts.scheme, parts.hostname] simple_name.extend(parts.path.split("/")[1:]) # Return the list concatenated with dashes. return "-".join(simple_name) outdir = config.get('dir', 'commentsbyauthor') comments = config.get('dir', 'comments') # Dictionary mapping commenter URI to a set of alias's they've used # Also a list of comments as (entry, commentid) tuples by_uri = {} for entry in os.listdir(comments): if os.path.isdir(os.path.join(comments, entry)): coll = model.comments(entry) if coll: for id in coll.id_list(): member = coll.get(id) author = safeparsename(member.get('author', '')) if author['uri']: uri = author['uri'] if uri not in by_uri: by_uri[uri] = {"names": set(), "comments": []} by_uri[uri]['names'].add(author['name']) by_uri[uri]['comments'].append((entry, id)) for k, v in by_uri.iteritems(): try: print shorten(k), k except: pass