import subprocess
from html.parser import HTMLParser
from os.path import abspath, dirname
from urllib.request import urlopen
class WiktionaryLatestVersionParser(HTMLParser):
def __init__(self, current_version, *args, **kwargs):
self.latest_version = current_version
super().__init__(*args, **kwargs)
def handle_starttag(self, tag, attrs):
if tag != 'a':
return
href = dict(attrs)['href'][0:-1]
if href == 'latest':
return
self.latest_version = max(self.latest_version, href)
def nix_prefetch_url(url, algo='sha256'):
"""Prefetches the content of the given URL."""
print(f'nix-prefetch-url {url}')
out = subprocess.check_output(['nix-prefetch-url', '--type', algo, url])
return out.rstrip()
current_version = subprocess.check_output([
'nix', 'eval', '--raw',
'-f', dirname(abspath(__file__)) + '/../../../..',
'dictdDBs.wiktionary.version',
])
parser = WiktionaryLatestVersionParser(current_version)
with urlopen('https://dumps.wikimedia.org/enwiktionary/') as resp:
parser.feed(resp.read())
print(parser.latest_version)