#!/usr/bin/perl use LWP::Simple; use HTML::Parser; use warnings; use strict; # Controls whether to print what is being done. my $verbose=1; # Controls whether to actually check anything out. my $noact=1; # Controls whether to perfer repos that use authentication. my $want_auth=0; # Controls where to check out to. If not set, the vcs is allowed to # decide. my $destdir; # how to perform checkouts my %handlers=( git => sub { doit("git", "clone", shift, $destdir) }, svn => sub { doit("svn", "checkout", shift, $destdir) }, bzr => sub { doit("bzr", "branch", shift, $destdir) }, ); # Regexps matching urls that are used for anonymous # repository checkouts. The order is significant: # urls matching earlier in the list are preferred over # those matching later. my @anon_urls=( qr/^git:\/\//i, qr/^bzr:\/\//i, qr/^svn:\/\//i, qr/^http:\/\//i, # generally the worst transport ); sub doit { my @args=grep { defined } @_; print join(" ", @args)."\n" if $verbose; return 0 if $noact; return system(@args); } # Is repo a better than repo b? sub better { my ($a, $b)=@_; my $firstanon=$b; foreach my $r (@anon_urls) { if ($a->{href} =~ /$r/) { $firstanon=$a; last; } elsif ($b->{href} =~ /$r/) { $firstanon=$b; last; } } if ($want_auth) { return $firstanon != $a; } else { return $firstanon == $a; } } # Eliminate duplicate repositories from list. # Duplicate repositories have the same title, or the same href. sub dedup { my %seenhref; my %bytitle; foreach my $repo (@_) { if (exists $repo->{title} && length $repo->{title} && exists $bytitle{$repo->{title}}) { my $other=$bytitle{$repo->{title}}; next unless better($repo, $other); delete $bytitle{$other->{title}} } if (! $seenhref{$repo->{href}}++) { $bytitle{$repo->{title}}=$repo; } } return values %bytitle; } sub parse { my $page=shift; my @ret; my $parser=HTML::Parser->new(api_version => 3); $parser->handler(start => sub { my $tagname=shift; my $attr=shift; return if lc $tagname ne 'link'; return if ! exists $attr->{rel} || lc $attr->{rel} ne 'vcs'; return if ! exists $attr->{href} || ! length $attr->{href}; return if ! exists $attr->{type} || ! length $attr->{type}; push @ret, $attr; }, "tagname, attr"); $parser->parse($page); $parser->eof; return @ret; } my $url=shift; if (! defined $url) { die "usage: webcheckout url\n"; } my $page=get($url); if (! defined $page) { die "failed to download $url\n"; } my @repos=dedup(parse($page)); if (! @repos) { die "no repositories found on $url\n"; } my $errors=0; foreach my $repo (@repos) { my $handler=$handlers{$repo->{type}}; if ($handler) { if ($handler->($repo->{href}) != 0) { print STDERR "failed to checkout ".$repo->{href}."\n"; $errors++; } } else { print STDERR "unknown repository type ".$repo->{type}. " for ".$repo->{href}."\n"; $errors++; } } exit($errors > 0); #use Data::Dumper; #print Dumper(\@repos);