#!/usr/bin/env bash # Usage: # ./fetch-language-hashes <tessdataRev> [<language code>…] # # Fetches all languages if no language codes are given. # # Example: # ./fetch-language-hashes 4.0.0 eng spa # # Output: # eng = "0iy0..."; # spa = "15kw..."; set -e (( $# >= 1 )) || exit 1 tessdataRev=$1 shift if (( $# > 0 )); then langCodes="$@" else repoPage=$(curl -fs https://github.com/tesseract-ocr/tessdata/tree/$tessdataRev || { >&2 echo "Invalid tessdataRev: $tessdataRev" exit 1 }) langCodes=$(echo $(echo "$repoPage" | grep -ohP "(?<=/)[^/]+?(?=\.traineddata)" | sort)) fi for lang in $langCodes; do url=https://github.com/tesseract-ocr/tessdata/raw/$tessdataRev/$lang.traineddata hash=$(nix-prefetch-url $url 2>/dev/null) echo "$lang = \"$hash\";" done