#!/usr/bin/env bash # # fetch-shape-corpus.sh - Download open-licensed font corpus for glyph shape DB # # This script downloads fonts from the manifest file and copies their LICENSE # files to build/font-licenses/. The script is idempotent - it skips downloads # for fonts that are already present. # # Usage: bash scripts/fetch-shape-corpus.sh # set -euo pipefail # Colors for output readonly GREEN='\033[0;32m' readonly YELLOW='\033[1;33m' readonly NC='\033[0m' # No Color log_info() { echo -e "${GREEN}[INFO]${NC} $1" } log_skip() { echo -e "${YELLOW}[SKIP]${NC} $1" } # Function to download a font # Usage: download_font download_font() { local family_name="$1" local url="$2" local target_file="$3" local family_slug="$4" local license_id="$5" # Create temp directory for download local temp_dir temp_dir=$(mktemp -d) trap "rm -rf ${temp_dir}" RETURN local filename filename=$(basename "${url}") # Download to temp log_info " Fetching ${filename}..." if ! curl -fsSL "${url}" -o "${temp_dir}/${filename}"; then echo " Error: Failed to download ${url}" return 1 fi local downloaded_file="${temp_dir}/${filename}" local target_path="${CORPUS_DIR}/${target_file}" # Handle different file types case "${filename}" in *.zip) # Unzip and find target font unzip -q "${downloaded_file}" -d "${temp_dir}/extracted" find_and_copy_font "${temp_dir}/extracted" "${target_file}" "${target_path}" extract_license_from_archive "${temp_dir}/extracted" "${family_slug}" "${family_name}" "${url}" "${license_id}" ;; *.tar.gz|*.tgz) # Extract tar.gz and find target font mkdir -p "${temp_dir}/extracted" tar -xzf "${downloaded_file}" -C "${temp_dir}/extracted" find_and_copy_font "${temp_dir}/extracted" "${target_file}" "${target_path}" extract_license_from_archive "${temp_dir}/extracted" "${family_slug}" "${family_name}" "${url}" "${license_id}" ;; *.ttf|*.otf) # Direct font file - just copy mkdir -p "$(dirname "${target_path}")" cp "${downloaded_file}" "${target_path}" log_info " Installed: ${target_file}" # For direct downloads, we can't extract LICENSE from the archive # Create a placeholder license file with download URL cat > "${LICENSE_DIR}/${family_slug}.txt" < "${LICENSE_DIR}/${family_slug}.txt" < "${LICENSE_DIR}/${family_slug}.txt" <