set -Eeuo pipefail
script_path="$(readlink -f "${BASH_SOURCE[0]}")"
script_dir="$(dirname "$script_path")"
script_name="$(basename "$script_path")"
APP_NAME="OKH LOSH-v1 Crawled Data Validator"
CRAWLED_DATA_CLONE_URL="https://gitlab.opensourceecology.de/verein/projekte/losh-rdf.git"
proj_root_dir="$script_dir/.."
build_dir="$proj_root_dir/target"
local_repo_dir="$build_dir/losh-crawled"
local_repo_val_rep_dir="$build_dir/losh-crawled-validation-report"
OKH_TOOL="${OKH_TOOL:-$(which okh-tool || true)}"
okh_tool="${OKH_TOOL:-$(find "$build_dir/release" -maxdepth 1 -type f -name okh-tool || true)}"
fetch_data=false
convert=true
clean=false
validate=true
function print_help() {
echo "$APP_NAME - Locally clones the YAML and RDF/Turtle data"
echo "crawled within the context of LOSH, cleans and validates it."
echo
echo "Usage:"
echo " $script_name [OPTION...]"
echo "Options:"
echo " -h, --help Print this usage help and exit"
echo "Examples:"
echo " $script_name"
}
POSITIONAL=()
while [[ $# -gt 0 ]]
do
arg="$1"
shift
case "$arg" in
-h|--help)
print_help
exit 0
;;
*) POSITIONAL+=("$arg") ;;
esac
done
set -- "${POSITIONAL[@]}"
function cs() {
file="$1"
if [ -f "$file" ]
then
shasum --text "$file" | sed -e 's/ .*$//'
else
echo "0"
fi
}
function url_decode() {
: "${*//+/ }"
echo -e "${_//%/\\x}"
}
if [ -z "$okh_tool" ] || ! [ -f "$okh_tool" ]
then
>&2 echo "ERROR: No valid path to the okh-tool could be figured out. please set OKH_TOOL accordingly!"
exit 1
else
echo "INFO: Using $("$okh_tool" --version) at '$okh_tool'."
fi
mkdir -p "$build_dir"
mkdir -p "$(dirname "$local_repo_dir")"
mkdir -p "$local_repo_val_rep_dir"
if $fetch_data
then
echo
echo "################################################################################"
echo "Fetch crawled data ..."
if [ -e "$local_repo_dir" ]
then
git -C "$local_repo_dir" fetch
git -C "$local_repo_dir" rebase origin/main
else
git clone "$CRAWLED_DATA_CLONE_URL" "$local_repo_dir"
fi
fi
if $convert
then
echo
echo "################################################################################"
echo "Converting OKH-LOSH-v1-krawler YAML to OKH-LOSH-v1 (official specsification conformant) TOML files ..."
echo "(This is mostly just a format conversion, with some additional filtering and adhering to the spec)"
"$script_dir/filter-yaml" "$build_dir/losh-crawled/RDF/"
fi
if $clean
then
echo
echo "################################################################################"
echo "\"Cleaning\" OKH v1 YAML files ..."
find "$yamls_orig_dir" -type f -iregex ".*\.[yY][aA]?[mM][lL]" | while read -r yaml_orig_path
do
yaml_file="${yaml_orig_path/#*\/}"
yaml_clean_path="$yamls_clean_dir/$yaml_file"
if [[ "$yaml_file" =~ .*BadenLab____Hyperspectral-scanner.* ]] \
|| [[ "$yaml_file" =~ .*_php_title_Composite_Materials_Resin_Mixing.*.yml ]] \
|| [[ "$yaml_file" =~ .*____jbon____Ball-Machine____master____okh-ballSortingMachine_yml.yml ]] \
|| [[ "$yaml_file" =~ .*title_Hexayurt____Book.yml ]]
then
echo
echo "Skipped cleaning of '$yaml_file'."
continue
fi
echo
echo "Cleaning '$yaml_file' ..."
sed \
-e 's|FALSE|false|g' \
-e 's|TRUE|true|g' \
-e 's|^licensor: Field Ready|licensor:\n name: Field Ready|g' \
-e 's|^development-stage: \[value\]||' \
-e 's|flase|false|g' \
-e 's|^%Open know-how manifest 0.1$||g' \
-e 's|^---$||g' \
-e 's|: @Du33Jerry|: "@Du33Jerry"|g' \
-e 's|name: $|name: ANONYMOUS|g' \
-e 's| bchow(at)seas(dot)upenn(dot)edu| bchow@seas.upenn.edu|g' \
-e 's| j\.bonvoisin\[at\]bath\.ac\.uk| j.bonvoisin@bath.ac.uk|g' \
-e 's|^<|#<|g' \
-e 's|^ in <b|# in <b|g' \
-e 's|^title: \([^>|]\)|title: >\n \1|g' \
-e 's|^description: \([^>|]\)|description: >\n \1|g' \
-e 's|: [.][/]|: |' \
-e 's|\s*# required .*$||' \
-e 's|: CC |: CC-|' \
-e 's|: CERN OHL v1.2|: CERN-OHL-1.2|' \
-e 's|: CERN$|: CERN-OHL-1.1|' \
"$yaml_orig_path" \
> "$yaml_clean_path"
if ! grep -q "^version: " < "$yaml_clean_path"
then
{
echo
echo "version: UNVERSIONED"
echo
} >> "$yaml_clean_path"
fi
if ! grep -q "^licensor:" < "$yaml_clean_path"
then
{
echo
echo "licensor:"
echo " name: ANONYMOUS"
echo
} >> "$yaml_clean_path"
fi
echo "Done cleaning '$yaml_file'."
done
fi
if $validate
then
echo
echo "################################################################################"
echo "Validating OKH LOSH TOML files ..."
"$okh_tool" val \
--okh-version losh \
--continue \
--recursive \
"$local_repo_dir" \
> "$local_repo_val_rep_dir/report_log.txt"
echo "Done validating OKH LOSH TOML files."
fi