okh-tool 2.4.2

A CLI tool to deal with Open Know-How (OKH) data files. Its main functionalities are: validation of and conversion between the different formats.
#!/usr/bin/env bash
# SPDX-FileCopyrightText: 2022 Robin Vobruba <hoijui.quaero@gmail.com>
#
# SPDX-License-Identifier: AGPL-3.0-or-later

# See the output of "$0 -h" for details.

# Exit immediately on each error and unset variable;
# see: https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/
set -Eeuo pipefail
#set -Eeu

script_path="$(readlink -f "${BASH_SOURCE[0]}")"
script_dir="$(dirname "$script_path")"
script_name="$(basename "$script_path")"

# initial default values
APP_NAME="OKH LOSH-v1 Crawled Data Validator"
# https://github.com/OpenKnowHow/okh-search/blob/master/projects_okhs.csv
CRAWLED_DATA_CLONE_URL="https://gitlab.opensourceecology.de/verein/projekte/losh-rdf.git"
proj_root_dir="$script_dir/.."
build_dir="$proj_root_dir/target"
local_repo_dir="$build_dir/losh-crawled"
local_repo_val_rep_dir="$build_dir/losh-crawled-validation-report"
OKH_TOOL="${OKH_TOOL:-$(which okh-tool || true)}"
okh_tool="${OKH_TOOL:-$(find "$build_dir/release" -maxdepth 1 -type f -name okh-tool || true)}"
fetch_data=false
convert=true
clean=false
validate=true

function print_help() {

	echo "$APP_NAME - Locally clones the YAML and RDF/Turtle data"
	echo "crawled within the context of LOSH, cleans and validates it."
	echo
	echo "Usage:"
	echo "  $script_name [OPTION...]"
	echo "Options:"
	echo "  -h, --help          Print this usage help and exit"
	echo "Examples:"
	echo "  $script_name"
}

# read command-line args
POSITIONAL=()
while [[ $# -gt 0 ]]
do
	arg="$1"
	shift # $2 -> $1, $3 -> $2, ...

	case "$arg" in
		-h|--help)
			print_help
			exit 0
			;;
		*) # non-/unknown option
			POSITIONAL+=("$arg") # save it in an array for later
			;;
	esac
done
set -- "${POSITIONAL[@]}" # restore positional parameters

# Returns a checksum for a supplied file
function cs() {
	file="$1"
	if [ -f "$file" ]
	then
		shasum --text "$file" | sed -e 's/ .*$//'
	else
		echo "0"
	fi
}

# From:
# https://stackoverflow.com/a/37840948/586229
function url_decode() {
	: "${*//+/ }"
	echo -e "${_//%/\\x}"
}

if [ -z "$okh_tool" ] || ! [ -f "$okh_tool" ]
then
	>&2 echo "ERROR: No valid path to the okh-tool could be figured out. please set OKH_TOOL accordingly!"
	exit 1
else
	echo "INFO: Using $("$okh_tool" --version) at '$okh_tool'."
fi

mkdir -p "$build_dir"
mkdir -p "$(dirname "$local_repo_dir")"
mkdir -p "$local_repo_val_rep_dir"

if $fetch_data
then
	echo
	echo "################################################################################"
	echo "Fetch crawled data ..."
	if [ -e "$local_repo_dir" ]
	then
		git -C "$local_repo_dir" fetch
		git -C "$local_repo_dir" rebase origin/main
	else
		git clone "$CRAWLED_DATA_CLONE_URL" "$local_repo_dir"
	fi
fi

if $convert
then
	echo
	echo "################################################################################"
	echo "Converting OKH-LOSH-v1-krawler YAML to OKH-LOSH-v1 (official specsification conformant) TOML files ..."
	echo "(This is mostly just a format conversion, with some additional filtering and adhering to the spec)"
	"$script_dir/filter-yaml" "$build_dir/losh-crawled/RDF/"
fi

if $clean
then
	echo
	echo "################################################################################"
	echo "\"Cleaning\" OKH v1 YAML files ..."
	find "$yamls_orig_dir" -type f -iregex ".*\.[yY][aA]?[mM][lL]" | while read -r yaml_orig_path
	do
		yaml_file="${yaml_orig_path/#*\/}"
		yaml_clean_path="$yamls_clean_dir/$yaml_file"

		if [[ "$yaml_file" =~ .*BadenLab____Hyperspectral-scanner.* ]] \
			|| [[ "$yaml_file" =~ .*_php_title_Composite_Materials_Resin_Mixing.*.yml ]] \
			|| [[ "$yaml_file" =~ .*____jbon____Ball-Machine____master____okh-ballSortingMachine_yml.yml ]] \
			|| [[ "$yaml_file" =~ .*title_Hexayurt____Book.yml ]]
		then
			echo
			echo "Skipped cleaning of '$yaml_file'."
			continue
		fi

		echo
		echo "Cleaning '$yaml_file' ..."
		# HACKY fixes
		sed \
			-e 's|FALSE|false|g' \
			-e 's|TRUE|true|g' \
			-e 's|^licensor: Field Ready|licensor:\n  name: Field Ready|g' \
			-e 's|^development-stage: \[value\]||' \
			-e 's|flase|false|g' \
			-e 's|^%Open know-how manifest 0.1$||g' \
			-e 's|^---$||g' \
			-e 's|: @Du33Jerry|: "@Du33Jerry"|g' \
			-e 's|name: $|name: ANONYMOUS|g' \
			-e 's| bchow(at)seas(dot)upenn(dot)edu| bchow@seas.upenn.edu|g' \
			-e 's| j\.bonvoisin\[at\]bath\.ac\.uk| j.bonvoisin@bath.ac.uk|g' \
			-e 's|^<|#<|g' \
			-e 's|^ in <b|# in <b|g' \
			-e 's|^title: \([^>|]\)|title: >\n  \1|g' \
			-e 's|^description: \([^>|]\)|description: >\n  \1|g' \
			-e 's|: [.][/]|: |' \
			-e 's|\s*# required .*$||' \
			-e 's|: CC |: CC-|' \
			-e 's|: CERN OHL v1.2|: CERN-OHL-1.2|' \
			-e 's|: CERN$|: CERN-OHL-1.1|' \
			"$yaml_orig_path" \
			> "$yaml_clean_path"
		if ! grep -q "^version: " < "$yaml_clean_path"
		then
			{
				echo
				echo "version: UNVERSIONED"
				echo
			} >> "$yaml_clean_path"
		fi
		if ! grep -q "^licensor:" < "$yaml_clean_path"
		then
			{
				echo
				echo "licensor:"
				echo "  name: ANONYMOUS"
				echo
			} >> "$yaml_clean_path"
		fi
		echo "Done cleaning '$yaml_file'."
	done
fi

if $validate
then
	echo
	echo "################################################################################"
	echo "Validating OKH LOSH TOML files ..."
	"$okh_tool" val \
		--okh-version losh \
		--continue \
		--recursive \
		"$local_repo_dir" \
		> "$local_repo_val_rep_dir/report_log.txt"
	echo "Done validating OKH LOSH TOML files."
fi