X-Git-Url: https://git.madduck.net/code/myrepos.git/blobdiff_plain/af9a9d12caffaccb04e0e989a5b71e18370953f4..60545177be81428ab485ff3bc1eb93895194c683:/webcheckout diff --git a/webcheckout b/webcheckout index 33bccb9..6df790d 100755 --- a/webcheckout +++ b/webcheckout @@ -1,14 +1,76 @@ #!/usr/bin/perl + +=head1 NAME + +webcheckout - check out repositories referenced on a web page + +=head1 SYNOPSIS + +B [options] url [destdir] + +=head1 DESCRIPTION + +B downloads an url and parses it, looking for version control +repositories referenced by the page. It checks out each repository into +a subdirectory of the current directory, using whatever VCS program is +appropriate for that repository (git, svn, etc). + +The information about the repositories is embedded in the web page using +the rel=vcs microformat, which is documented at +. + +If the optional destdir parameter is specified, VCS programs will be asked +to check out repositories into that directory. If there are multiple +repositories to check out, each will be checked out into a separate +subdirectory of the destdir. + +=head1 OPTIONS + +=over 4 + +=item -a, --auth + +Prefer authenticated repositories. By default, webcheckout will use +anonymous repositories when possible. If you have an account that +allows you to use authenticated repositories, you might want to use this +option. + +=item -n + +Do not actually check anything out, just print out the commands that would +be run to check out the repositories. + +=item -q + +Quiet mode. Do not print out the commands being run. (The VCS commands +may still be noisy however.) + +=back + +=head1 AUTHOR + +Copyright 2009 Joey Hess + +Licensed under the GNU GPL version 2 or higher. + +This program is included in mr + +=cut + use LWP::Simple; use HTML::Parser; +use Getopt::Long; use warnings; use strict; +# What to download. +my $url; + # Controls whether to print what is being done. -my $verbose=1; +my $quiet=0; # Controls whether to actually check anything out. -my $noact=1; +my $noact=0; # Controls whether to perfer repos that use authentication. my $want_auth=0; @@ -35,9 +97,28 @@ my @anon_urls=( qr/^http:\/\//i, # generally the worst transport ); +sub getopts { + Getopt::Long::Configure("bundling", "no_permute"); + my $result=GetOptions( + "q|quiet" => \$quiet, + "n|noact" => \$noact, + "a|auth", => \$want_auth, + ); + if (! $result || @ARGV < 1) { + die "usage: webcheckout [options] url [destdir]\n"; + } + + $url=shift @ARGV; + $destdir=shift @ARGV; + + if ($noact) { + $quiet=0; + } +} + sub doit { my @args=grep { defined } @_; - print join(" ", @args)."\n" if $verbose; + print join(" ", @args)."\n" unless $quiet; return 0 if $noact; return system(@args); } @@ -46,23 +127,26 @@ sub doit { sub better { my ($a, $b)=@_; - my $firstanon=$b; + my @anon; foreach my $r (@anon_urls) { if ($a->{href} =~ /$r/) { - $firstanon=$a; - last; + push @anon, $a; } elsif ($b->{href} =~ /$r/) { - $firstanon=$b; - last; + push @anon, $b; } } if ($want_auth) { - return $firstanon != $a; + # Whichever is authed is better. + return 1 if ! @anon || ! grep { $_ eq $a } @anon; + return 0 if ! grep { $_ eq $b } @anon; + # Neither is authed, so the better anon method wins. + return $anon[0] == $a; } else { - return $firstanon == $a; + # Better anon method wins. + return @anon && $anon[0] == $a; } } @@ -71,21 +155,26 @@ sub better { sub dedup { my %seenhref; my %bytitle; + my @others; foreach my $repo (@_) { if (exists $repo->{title} && - length $repo->{title} && - exists $bytitle{$repo->{title}}) { - my $other=$bytitle{$repo->{title}}; - next unless better($repo, $other); - delete $bytitle{$other->{title}} + length $repo->{title}) { + if (exists $bytitle{$repo->{title}}) { + my $other=$bytitle{$repo->{title}}; + next unless better($repo, $other); + delete $bytitle{$other->{title}} + } + + if (! $seenhref{$repo->{href}}++) { + $bytitle{$repo->{title}}=$repo; + } } - - if (! $seenhref{$repo->{href}}++) { - $bytitle{$repo->{title}}=$repo; + else { + push @others, $repo; } } - return values %bytitle; + return values %bytitle, @others; } sub parse { @@ -108,10 +197,7 @@ sub parse { return @ret; } -my $url=shift; -if (! defined $url) { - die "usage: webcheckout url\n"; -} +getopts(); my $page=get($url); if (! defined $page) { @@ -123,6 +209,15 @@ if (! @repos) { die "no repositories found on $url\n"; } +if (defined $destdir && @repos > 1) { + # create subdirs of $destdir for the multiple repos + if (! $noact) { + mkdir($destdir); + chdir($destdir) || die "failed to chdir to $destdir: $!"; + } + $destdir=undef; +} + my $errors=0; foreach my $repo (@repos) { my $handler=$handlers{$repo->{type}};