X-Git-Url: https://git.madduck.net/code/myrepos.git/blobdiff_plain/7cbacd59bf538bac9e5868a005be162b265e44d6..HEAD:/webcheckout diff --git a/webcheckout b/webcheckout index 4841fee..e48d9de 100755 --- a/webcheckout +++ b/webcheckout @@ -16,7 +16,7 @@ a subdirectory of the current directory, using whatever VCS program is appropriate for that repository (git, svn, etc). The information about the repositories is embedded in the web page using -the rel=vcs microformat, which is documented at +the rel=vcs-* microformat, which is documented at . If the optional destdir parameter is specified, VCS programs will be asked @@ -35,25 +35,33 @@ anonymous repositories when possible. If you have an account that allows you to use authenticated repositories, you might want to use this option. -=item -n +=item --no-act, -n Do not actually check anything out, just print out the commands that would be run to check out the repositories. -=item -q +=item --quiet, -q Quiet mode. Do not print out the commands being run. (The VCS commands may still be noisy however.) =back +=head1 PREREQUISITES + +To use this program you will need lots of VCS programs installed, +obviously. It also depends on the perl LWP and HTML::Parser modules. + +If the perl URI module is installed, webcheckout can heuristically guess +what you mean by partial URLs, such as "kitenet.net/~joey"' + =head1 AUTHOR Copyright 2009 Joey Hess Licensed under the GNU GPL version 2 or higher. -This program is included in mr +This program is included in myrepos =cut @@ -75,7 +83,7 @@ my $noact=0; # Controls whether to perfer repos that use authentication. my $want_auth=0; -# Controls where to check out to. If not set, the vcs is allowed to +# Controls where to check out to. If not set, the VCS is allowed to # decide. my $destdir; @@ -111,6 +119,11 @@ sub getopts { $url=shift @ARGV; $destdir=shift @ARGV; + eval q{use URI::Heuristic}; + if (! $@) { + $url=URI::Heuristic::uf_uristr($url); + } + if ($noact) { $quiet=0; } @@ -127,23 +140,26 @@ sub doit { sub better { my ($a, $b)=@_; - my $firstanon=$b; + my @anon; foreach my $r (@anon_urls) { if ($a->{href} =~ /$r/) { - $firstanon=$a; - last; + push @anon, $a; } elsif ($b->{href} =~ /$r/) { - $firstanon=$b; - last; + push @anon, $b; } } if ($want_auth) { - return $firstanon != $a; + # Whichever is authed is better. + return 1 if ! @anon || ! grep { $_ eq $a } @anon; + return 0 if ! grep { $_ eq $b } @anon; + # Neither is authed, so the better anon method wins. + return $anon[0] == $a; } else { - return $firstanon == $a; + # Better anon method wins. + return @anon && $anon[0] == $a; } } @@ -179,15 +195,38 @@ sub parse { my @ret; my $parser=HTML::Parser->new(api_version => 3); + my $abody=undef; + my $aref=undef; $parser->handler(start => sub { my $tagname=shift; my $attr=shift; - return if lc $tagname ne 'link'; - return if ! exists $attr->{rel} || lc $attr->{rel} ne 'vcs'; + return if ! exists $attr->{href} || ! length $attr->{href}; - return if ! exists $attr->{type} || ! length $attr->{type}; + return if ! exists $attr->{rel} || $attr->{rel} !~ /^vcs-(.+)/i; + $attr->{type}=lc($1); + + # need to collect the body of the tag if there is no title + if ($tagname eq "a" && ! exists $attr->{title}) { + $abody=""; + $aref=$attr; + } + push @ret, $attr; }, "tagname, attr"); + $parser->handler(text => sub { + if (defined $aref) { + $abody.=join(" ", @_); + } + }, "text"); + $parser->handler(end => sub { + my $tagname=shift; + if ($tagname eq "a" && defined $aref) { + $aref->{title}=$abody; + $aref=undef; + $abody=undef; + } + }, "tagname"); + $parser->report_tags(qw{link a}); $parser->parse($page); $parser->eof; @@ -206,6 +245,10 @@ if (! @repos) { die "no repositories found on $url\n"; } +#use Data::Dumper; +#print Dumper(\@repos); +#exit; + if (defined $destdir && @repos > 1) { # create subdirs of $destdir for the multiple repos if (! $noact) { @@ -231,6 +274,3 @@ foreach my $repo (@repos) { } } exit($errors > 0); - -#use Data::Dumper; -#print Dumper(\@repos);