use strict;
use POSIX;
use IO::Handle;
use Carp;
use TOML::Parser;
use Data::Dumper;
use Time::HiRes;
use Digest::MD5 qw(md5_hex);
use File::Compare;
our $max_rate = 2;
open DEBUG, ">&STDERR" or die $!;
autoflush DEBUG 1;
sub run_curl {
my ($datalog, $output, $url, @xopts) = @_;
my @curl = (qw(curl -Ssf -L --proto-redir -all), @xopts);
push @curl, '-o', "$output.tmp", $url;
our $last_curl;
$last_curl //= 0.;
my $now = Time::HiRes::time;
my $delay = 1./$max_rate - ($now - $last_curl);
Time::HiRes::sleep $delay if $delay > 0;
$last_curl = $now;
$!=$?=0; my $r = system @curl; die "curl failed ($? $!): @curl" if $r;
my $logtime = strftime "%F %T UTC", gmtime time;
print $datalog "$logtime: downloaded into $output from $url\n"
or die $!;
rename "$output.tmp", "$output" or die "install $output: $!";
}
sub cfg_lookup_1 ($@) {
my ($dict, @keys) = @_;
foreach my $k (@keys) {
confess unless ref $dict eq 'HASH';
$dict = $dict->{$k};
return undef if !defined $dict;
}
return $dict;
}
sub mk_cfg_lookup ($$@) {
my ($groups, $thisgroup, @keysprefix) = @_;
return sub {
my $cgroup = $thisgroup;
for (;;) {
my $got = cfg_lookup_1($cgroup, @keysprefix, @_);
return $got if defined $got;
my $inherit = cfg_lookup_1($cgroup, qw(inherit));
return undef unless $inherit;
$cgroup = $groups->{$inherit};
confess "$inherit".Dumper($groups,$inherit)."?" unless $cgroup;
}
}
}
sub cfg_affixes ($$$) {
my ($cfg, $keybase, $middle) = @_;
return
($cfg->("${keybase}_prefix") // '')
.$middle
.($cfg->("${keybase}_suffix") // '');
}
sub method_none { return sub { } }
sub methodlic_none { undef }
sub methodisoffline_none { 0 }
sub methodlic_wikimedia ($) {
my ($scraper) = @_;
my $spdx = ${ \ $scraper->('spdx') };
return <<END.($spdx =~ m{/} ? <<END : '').<<END;
These files were all obtained from
${ \ $scraper->('site-title') }
They are all available under at least, and distributed here under,
$spdx
END
(deepending on the file - consult the source file for details)
END
as well as possibly other licences. There is NO WARRANTY.
See <file>.download-log for the original URL and download timestamp.
The wikitext of the File: page on the wiki is in <file>.wikitext, and
contains the authorship and derivation information, as well as
information about any alternative licence terms.
[ This LICENCE file was generated by media-scraper and should not
be manually edited ]
END
}
sub method_wikimedia ($$$) {
my ($scraper, $methname) = @_;
return sub {
my ($lbase, $ldest, $rstem) = @_;
my $rfilename = cfg_affixes $scraper, 'filename', $rstem;
my $url = cfg_affixes $scraper, 'url', $rfilename;
my $wt = "$lbase.wikitext";
my $datalog = new IO::File "$lbase.download-log", '>>' or die $!;
print $datalog "\n" or die $!;
run_curl $datalog, $wt, $url;
open WT, "$wt" or die $!;
my (@lics) = @{ $scraper->('licences') };
s/\W/\\$&/g foreach @lics;
s/\\\*$/.*/g foreach @lics;
my $lic1_re = '(?:'.(join '|', @lics).')';
my $ok;
while (<WT>) {
s/\s+$//;
if (m{^ \{\{ ($lic1_re) \}\} $}xi ||
m{^ \{\{ self\| (?:[^{}]*\|)? ($lic1_re) (?:\|[^{}]*)? \}\} $}xi) {
print "licence=$1 ";
$ok = 1;
last;
}
}
if (!$ok) {
die "\nfile $wt from $url no appropriate licence $lic1_re";
}
my $hash_prefix = '';
if ($scraper->('data_url_hashprefix')) {
md5_hex($rfilename) =~ m{^((.).)} or die;
$hash_prefix .= "$2/$1/";
}
my $data_url = cfg_affixes $scraper, 'data_url', $hash_prefix.$rfilename;
run_curl $datalog, $ldest, $data_url;
close $datalog or die $!;
};
}
sub methodisoffline_wikimedia { 0 }
sub method_cards_oxymoron {
my ($scraper, $methname) = @_;
return sub {
my ($lbase, $ldest, $rstem) = @_;
my $lgif = $lbase;
$lgif =~ m{/card-oxymoron-(\w+)-(\w+)$} or die "$lbase ?";
my $basename = $`;
$lgif = "$basename/cards/src/\l$1$2.gif";
if (stat $lgif) {
} elsif ($! != ENOENT) {
die "$lgif $!";
} else {
print STDERR "\nbuilding $basename...\n";
$!=$?=0; system "$basename/build" and die "$! $?";
}
$!=$?=0; system qw(convert), $lgif, $ldest and die "$! $?";
}
}
sub methodlic_cards_oxymoron { undef }
sub methodisoffline_cards_oxymoron ($$) { 1 }
our $offline;
while (@ARGV && $ARGV[0] =~ m/^-/) {
$_ = shift @ARGV;
last if m/^-$/;
if (m/^--offline$/) {
$offline = 1;
} else {
die "bad option: \`$_'";
}
}
my $input = $ARGV[0] // die;
$input =~ m/\.toml$/ or die "$input ?";
my $basename = $`;
mkdir $basename or $!==EEXIST or die "mkdir $basename: $!";
my $parser = TOML::Parser->new();
my $libinfo = $parser->parse_file($input);
my $groups = $libinfo->{group};
my $scraper = sub { $libinfo->{scraper}{$_[0]} };
my $method = $scraper->('method');
$method =~ s/-/_/g;
my $method_fn = ${*::}{"method_$method"};
my $methodlic_fn = ${*::}{"methodlic_$method"};
my $licpath = "$basename/LICENCE";
my $methodisoffline_fn = ${*::}{"methodisoffline_$method"};
my $method_lictext = $methodlic_fn->($scraper);
if (defined $method_lictext) {
my $licfile = new IO::File "$licpath.tmp", '>' or die $!;
print $licfile <<END, $method_lictext, <<END or die $!;
SPDX-License-Identifier: ${ \ $scraper->('spdx') }
(applies to the contents of this directory unless otherwise stated)
END
The download was done by media-scraper, controlled by $input.
END
close $licfile or die $!;
}
my $makepath = "$basename/files.make";
my $makefile = new IO::File "$makepath.tmp", '>' or die $!;
foreach my $groupname (sort keys %$groups) {
my $group_dict = $groups->{$groupname};
my $gcfg = mk_cfg_lookup($groups, $group_dict);
my $method_impl = $method_fn->($scraper, $method);
foreach (split(/\n/, $gcfg->('files'))) {
s/^\s+//;
next if m/^\#/ || m/^$/ || m/^\:/;
m/^(\S+)\s+(\S+)/ or die "bad line in files: \`$_'";
my $lministem = $1;
my $rministem = $2;
my $lstem = cfg_affixes $gcfg, 'item', $lministem;
my $rstem = cfg_affixes $gcfg, 'stem', $rministem;
my $lbase = "$basename/$lstem";
my $lupstream = "$lbase.svg";
my $lprocessed = "$lbase.usvg";
print DEBUG "file $lstem ";
my $process_colour = sub ($$) {
my ($linput, $lprocessed) = @_;
print $makefile <<END or die $!;
LIBRARY_FILES += $lprocessed
LIBRARY_FILE_INPUTS += $lprocessed:$linput
$lprocessed: $linput $licpath $input
\$(LIBRARY_PROCESS_SVG)
END
};
my $process_colours = sub {
my $colours = $gcfg->('colours');
if (!keys %$colours) {
$process_colour->($lupstream, $lprocessed, "");
return;
}
foreach my $colour (sort keys %$colours) {
my $cspec = $colours->{$colour};
my $abbrev = $cspec->{abbrev} or confess;
my $ncoloured = $lupstream;
$ncoloured =~ s/\.svg$/.coloured$&/;
my $outfile = $lprocessed;
$ncoloured =~ s/_c/$abbrev/ or confess "$outfile ?";
$outfile =~ s/_c/$abbrev/ or confess "$outfile ?";
my $coloured = $lupstream;
my $ci = 0;
my $cfp;
my $emitmap = sub {
my ($from, $to) = @_;
confess if $from =~ m/.\W|[^\w#]/
|| $to =~ m/.\W|[^\w#]/;
$coloured = $ncoloured;
if (!defined $cfp) {
$cfp ="\$@.$ci.tmp";
print $makefile <<END or die $!;
LIBRARY_CLEAN += $ncoloured
$ncoloured: $lupstream \$(MAKEFILE_DEP) \$(USVG_DEP) $makepath
\$(USVG_CMD) - -c <\$< >$cfp
END
}
$ci++;
my $nfp = "\$@.$ci.tmp";
print $makefile <<END or die $!;
\$(RECOLOUR_SVG) -f '$from' -t '$to' $cfp >$nfp
END
$cfp = $nfp;
};
my %map = %{ $cspec->{map} // { } };
while (keys %map) {
my $from = (sort keys %map)[0];
my @maybe_cycle = ();
my $cycle;
for (;;) {
push @maybe_cycle, $from;
my $to = $map{$from};
if (!exists $map{$to}) {
last;
}
$from = $to;
if (grep { $_ eq $to } @maybe_cycle) {
$cycle = $to;
last;
}
}
my $emit_most = sub ($) {
my ($end) = @_;
$end //= $#maybe_cycle;
foreach my $i (@maybe_cycle[0..$end]) {
$emitmap->($i, $map{$i});
}
};
if (defined $cycle) {
my $temp = 'abcbfb'; my $aside = $maybe_cycle[-1];
$emitmap->($aside, $temp);
$emit_most->($#maybe_cycle-1);
$emitmap->($temp, $map{$aside});
print $makefile <<END or die $!;
# cycle: @maybe_cycle
END
} else {
$emit_most->();
}
delete $map{$_} foreach @maybe_cycle;
}
if ($ci) {
print $makefile <<END or die $!;
test -s $cfp
mv -f $cfp \$@
END
}
$process_colour->($coloured, $outfile);
}
};
$process_colours->();
if (! $methodisoffline_fn->($basename, $lupstream)) {
if (stat $lupstream) {
print DEBUG "already.\n";
next;
}
die "$lupstream $!" unless $!==ENOENT;
if ($offline) {
print DEBUG "missing.\n";
warn "offline but $lupstream missing\n";
next;
}
}
$method_impl->($lbase, $lupstream, $rstem);
print DEBUG "done.\n";
}
}
close $makefile or die $!;
if (defined($method_lictext)) {
my $cmp = compare("$licpath.tmp", $licpath);
die if $cmp < 0;
if ($cmp) {
rename "$licpath.tmp", $licpath or die $!;
} else {
remove "$licpath.tmp";
}
}
rename "$makepath.tmp", $makepath or die $!;