+#!/usr/bin/perl
+use LWP::Simple;
+use HTML::Parser;
+use warnings;
+use strict;
+
+# Controls whether to print what is being done.
+my $verbose=1;
+
+# Controls whether to actually check anything out.
+my $noact=1;
+
+# Controls whether to perfer repos that use authentication.
+my $want_auth=0;
+
+# Controls where to check out to. If not set, the vcs is allowed to
+# decide.
+my $destdir;
+
+# how to perform checkouts
+my %handlers=(
+ git => sub { doit("git", "clone", shift, $destdir) },
+ svn => sub { doit("svn", "checkout", shift, $destdir) },
+ bzr => sub { doit("bzr", "branch", shift, $destdir) },
+);
+
+# Regexps matching urls that are used for anonymous
+# repository checkouts. The order is significant:
+# urls matching earlier in the list are preferred over
+# those matching later.
+my @anon_urls=(
+ qr/^git:\/\//i,
+ qr/^bzr:\/\//i,
+ qr/^svn:\/\//i,
+ qr/^http:\/\//i, # generally the worst transport
+);
+
+sub doit {
+ my @args=grep { defined } @_;
+ print join(" ", @args)."\n" if $verbose;
+ return 0 if $noact;
+ return system(@args);
+}
+
+# Is repo a better than repo b?
+sub better {
+ my ($a, $b)=@_;
+
+ my $firstanon=$b;
+ foreach my $r (@anon_urls) {
+ if ($a->{href} =~ /$r/) {
+ $firstanon=$a;
+ last;
+ }
+ elsif ($b->{href} =~ /$r/) {
+ $firstanon=$b;
+ last;
+ }
+ }
+
+ if ($want_auth) {
+ return $firstanon != $a;
+ }
+ else {
+ return $firstanon == $a;
+ }
+}
+
+# Eliminate duplicate repositories from list.
+# Duplicate repositories have the same title, or the same href.
+sub dedup {
+ my %seenhref;
+ my %bytitle;
+ foreach my $repo (@_) {
+ if (exists $repo->{title} &&
+ length $repo->{title} &&
+ exists $bytitle{$repo->{title}}) {
+ my $other=$bytitle{$repo->{title}};
+ next unless better($repo, $other);
+ delete $bytitle{$other->{title}}
+ }
+
+ if (! $seenhref{$repo->{href}}++) {
+ $bytitle{$repo->{title}}=$repo;
+ }
+ }
+
+ return values %bytitle;
+}
+
+sub parse {
+ my $page=shift;
+
+ my @ret;
+ my $parser=HTML::Parser->new(api_version => 3);
+ $parser->handler(start => sub {
+ my $tagname=shift;
+ my $attr=shift;
+ return if lc $tagname ne 'link';
+ return if ! exists $attr->{rel} || lc $attr->{rel} ne 'vcs';
+ return if ! exists $attr->{href} || ! length $attr->{href};
+ return if ! exists $attr->{type} || ! length $attr->{type};
+ push @ret, $attr;
+ }, "tagname, attr");
+ $parser->parse($page);
+ $parser->eof;
+
+ return @ret;
+}
+
+my $url=shift;
+if (! defined $url) {
+ die "usage: webcheckout url\n";
+}
+
+my $page=get($url);
+if (! defined $page) {
+ die "failed to download $url\n";
+}
+
+my @repos=dedup(parse($page));
+if (! @repos) {
+ die "no repositories found on $url\n";
+}
+
+my $errors=0;
+foreach my $repo (@repos) {
+ my $handler=$handlers{$repo->{type}};
+ if ($handler) {
+ if ($handler->($repo->{href}) != 0) {
+ print STDERR "failed to checkout ".$repo->{href}."\n";
+ $errors++;
+ }
+ }
+ else {
+ print STDERR "unknown repository type ".$repo->{type}.
+ " for ".$repo->{href}."\n";
+ $errors++;
+ }
+}
+exit($errors > 0);
+
+#use Data::Dumper;
+#print Dumper(\@repos);