Add support for skip = lazy, a mode where mr only operates on repositories that are...

[code/myrepos.git] / webcheckout
diff --git a/webcheckout b/webcheckout

index 19d46208e94965a0b657a05ff7516ef878bd5163..0c93eefcd8d10e41d9ba649438a353debe91f6b8 100755 (executable)
--- a/webcheckout
+++ b/webcheckout
@@ -2,7 +2,7 @@
  
  =head1 NAME
  
  
  =head1 NAME
  
-debcheckout - check out repositories referenced on a web page
+webcheckout - check out repositories referenced on a web page
  
  =head1 SYNOPSIS
  
  
  =head1 SYNOPSIS
  
@@ -16,7 +16,7 @@ a subdirectory of the current directory, using whatever VCS program is
  appropriate for that repository (git, svn, etc).
  
  The information about the repositories is embedded in the web page using
  appropriate for that repository (git, svn, etc).
  
  The information about the repositories is embedded in the web page using
-the rel=vcs microformat, which is documented at
+the rel=vcs-* microformat, which is documented at
  <http://kitenet.net/~joey/rfc/rel-vcs/>.
  
  If the optional destdir parameter is specified, VCS programs will be asked
  <http://kitenet.net/~joey/rfc/rel-vcs/>.
  
  If the optional destdir parameter is specified, VCS programs will be asked
@@ -35,18 +35,26 @@ anonymous repositories when possible. If you have an account that
  allows you to use authenticated repositories, you might want to use this
  option.
  
  allows you to use authenticated repositories, you might want to use this
  option.
  
-=item -n
+=item --no-act, -n
  
  Do not actually check anything out, just print out the commands that would
  be run to check out the repositories.
  
  
  Do not actually check anything out, just print out the commands that would
  be run to check out the repositories.
  
-=item -q
+=item --quiet, -q
  
  Quiet mode. Do not print out the commands being run. (The VCS commands
  may still be noisy however.)
  
  =back
  
  
  Quiet mode. Do not print out the commands being run. (The VCS commands
  may still be noisy however.)
  
  =back
  
+=head1 PREREQUISITES
+
+To use this program you will need lots of VCS programs installed,
+obviously. It also depends on the perl LWP and HTML::Parser modules.
+
+If the perl URI module is installed, webcheckout can heuristically guess
+what you mean by partial URLs, such as "kitenet.net/~joey"'
+
  =head1 AUTHOR
  
  Copyright 2009 Joey Hess <joey@kitenet.net>
  =head1 AUTHOR
  
  Copyright 2009 Joey Hess <joey@kitenet.net>
@@ -75,7 +83,7 @@ my $noact=0;
  # Controls whether to perfer repos that use authentication.
  my $want_auth=0;
  
  # Controls whether to perfer repos that use authentication.
  my $want_auth=0;
  
-# Controls where to check out to. If not set, the vcs is allowed to
+# Controls where to check out to. If not set, the VCS is allowed to
  # decide.
  my $destdir;
  
  # decide.
  my $destdir;
  
@@ -111,6 +119,11 @@ sub getopts {
         $url=shift @ARGV;
         $destdir=shift @ARGV;
  
         $url=shift @ARGV;
         $destdir=shift @ARGV;
  
+       eval q{use URI::Heuristic};
+       if (! $@) {
+               $url=URI::Heuristic::uf_uristr($url);
+       }
+
         if ($noact) {
                 $quiet=0;
         }
         if ($noact) {
                 $quiet=0;
         }
@@ -127,23 +140,26 @@ sub doit {
  sub better {
         my ($a, $b)=@_;
  
  sub better {
         my ($a, $b)=@_;
  
-       my $firstanon=$b;
+       my @anon;
         foreach my $r (@anon_urls) {
                 if ($a->{href} =~ /$r/) {
         foreach my $r (@anon_urls) {
                 if ($a->{href} =~ /$r/) {
-                       $firstanon=$a;
-                       last;
+                       push @anon, $a;
                 }
                 elsif ($b->{href} =~ /$r/) {
                 }
                 elsif ($b->{href} =~ /$r/) {
-                       $firstanon=$b;
-                       last;
+                       push @anon, $b;
                 }
         }
  
         if ($want_auth) {
                 }
         }
  
         if ($want_auth) {
-               return $firstanon != $a;
+               # Whichever is authed is better.
+               return 1 if ! @anon || ! grep { $_ eq $a } @anon;
+               return 0 if ! grep { $_ eq $b } @anon;
+               # Neither is authed, so the better anon method wins.
+               return $anon[0] == $a;
         }
         else {
         }
         else {
-               return $firstanon == $a;
+               # Better anon method wins.
+               return @anon && $anon[0] == $a;
         }
  }
  
         }
  }
  
@@ -152,21 +168,26 @@ sub better {
  sub dedup {
         my %seenhref;
         my %bytitle;
  sub dedup {
         my %seenhref;
         my %bytitle;
+       my @others;
         foreach my $repo (@_) {
                 if (exists $repo->{title} &&
         foreach my $repo (@_) {
                 if (exists $repo->{title} &&
-                   length $repo->{title} &&
-                   exists $bytitle{$repo->{title}}) {
-                       my $other=$bytitle{$repo->{title}};
-                       next unless better($repo, $other);
-                       delete $bytitle{$other->{title}}
+                   length $repo->{title}) {
+                       if (exists $bytitle{$repo->{title}}) {
+                               my $other=$bytitle{$repo->{title}};
+                               next unless better($repo, $other);
+                               delete $bytitle{$other->{title}}
+                       }
+
+                       if (! $seenhref{$repo->{href}}++) {
+                               $bytitle{$repo->{title}}=$repo;
+                       }
                 }
                 }
-
-               if (! $seenhref{$repo->{href}}++) {
-                       $bytitle{$repo->{title}}=$repo;
+               else {
+                       push @others, $repo;
                 }
         }
  
                 }
         }
  
-       return values %bytitle;
+       return values %bytitle, @others;
  }
  
  sub parse {
  }
  
  sub parse {
@@ -174,15 +195,38 @@ sub parse {
  
         my @ret;
         my $parser=HTML::Parser->new(api_version => 3);
  
         my @ret;
         my $parser=HTML::Parser->new(api_version => 3);
+       my $abody=undef;
+       my $aref=undef;
         $parser->handler(start => sub {
                 my $tagname=shift;
                 my $attr=shift;
         $parser->handler(start => sub {
                 my $tagname=shift;
                 my $attr=shift;
-               return if lc $tagname ne 'link';
-               return if ! exists $attr->{rel} || lc $attr->{rel} ne 'vcs';
+
                 return if ! exists $attr->{href} || ! length $attr->{href};
                 return if ! exists $attr->{href} || ! length $attr->{href};
-               return if ! exists $attr->{type} || ! length $attr->{type};
+               return if ! exists $attr->{rel} || $attr->{rel} !~ /^vcs-(.+)/i;
+               $attr->{type}=lc($1);
+
+               # need to collect the body of the <a> tag if there is no title
+               if ($tagname eq "a" && ! exists $attr->{title}) {
+                       $abody="";
+                       $aref=$attr;
+               }
+
                 push @ret, $attr;
         }, "tagname, attr");
                 push @ret, $attr;
         }, "tagname, attr");
+       $parser->handler(text => sub {
+               if (defined $aref) {
+                       $abody.=join(" ", @_);
+               }
+       }, "text");
+       $parser->handler(end => sub {
+               my $tagname=shift;
+               if ($tagname eq "a" && defined $aref) {
+                       $aref->{title}=$abody;
+                       $aref=undef;
+                       $abody=undef;
+               }
+       }, "tagname");
+       $parser->report_tags(qw{link a});
         $parser->parse($page);
         $parser->eof;
  
         $parser->parse($page);
         $parser->eof;
  
@@ -190,7 +234,6 @@ sub parse {
  }
  
  getopts();
  }
  
  getopts();
-print "$url\n";
  
  my $page=get($url);
  if (! defined $page) {
  
  my $page=get($url);
  if (! defined $page) {
@@ -202,10 +245,16 @@ if (! @repos) {
         die "no repositories found on $url\n";
  }
  
         die "no repositories found on $url\n";
  }
  
+#use Data::Dumper;
+#print Dumper(\@repos);
+#exit;
+
  if (defined $destdir && @repos > 1) {
         # create subdirs of $destdir for the multiple repos
  if (defined $destdir && @repos > 1) {
         # create subdirs of $destdir for the multiple repos
-       mkdir($destdir);
-       chdir($destdir) || die "failed to chdir to $destdir: $!";
+       if (! $noact) {
+               mkdir($destdir);
+               chdir($destdir) || die "failed to chdir to $destdir: $!";
+       }
         $destdir=undef;
  }
  
         $destdir=undef;
  }
  
@@ -225,6 +274,3 @@ foreach my $repo (@repos) {
         }
  }
  exit($errors > 0);
         }
  }
  exit($errors > 0);
-
-#use Data::Dumper;
-#print Dumper(\@repos);