X-Git-Url: https://git.madduck.net/code/myrepos.git/blobdiff_plain/4d25de5c4ddfe4ad14823b550a0903e594207774..83b06b2499053e1c03aa7b85b199a439dc9a396d:/webcheckout
diff --git a/webcheckout b/webcheckout
index 19d4620..e48d9de 100755
--- a/webcheckout
+++ b/webcheckout
@@ -2,7 +2,7 @@
=head1 NAME
-debcheckout - check out repositories referenced on a web page
+webcheckout - check out repositories referenced on a web page
=head1 SYNOPSIS
@@ -16,7 +16,7 @@ a subdirectory of the current directory, using whatever VCS program is
appropriate for that repository (git, svn, etc).
The information about the repositories is embedded in the web page using
-the rel=vcs microformat, which is documented at
+the rel=vcs-* microformat, which is documented at
.
If the optional destdir parameter is specified, VCS programs will be asked
@@ -35,25 +35,33 @@ anonymous repositories when possible. If you have an account that
allows you to use authenticated repositories, you might want to use this
option.
-=item -n
+=item --no-act, -n
Do not actually check anything out, just print out the commands that would
be run to check out the repositories.
-=item -q
+=item --quiet, -q
Quiet mode. Do not print out the commands being run. (The VCS commands
may still be noisy however.)
=back
+=head1 PREREQUISITES
+
+To use this program you will need lots of VCS programs installed,
+obviously. It also depends on the perl LWP and HTML::Parser modules.
+
+If the perl URI module is installed, webcheckout can heuristically guess
+what you mean by partial URLs, such as "kitenet.net/~joey"'
+
=head1 AUTHOR
Copyright 2009 Joey Hess
Licensed under the GNU GPL version 2 or higher.
-This program is included in mr
+This program is included in myrepos
=cut
@@ -75,7 +83,7 @@ my $noact=0;
# Controls whether to perfer repos that use authentication.
my $want_auth=0;
-# Controls where to check out to. If not set, the vcs is allowed to
+# Controls where to check out to. If not set, the VCS is allowed to
# decide.
my $destdir;
@@ -111,6 +119,11 @@ sub getopts {
$url=shift @ARGV;
$destdir=shift @ARGV;
+ eval q{use URI::Heuristic};
+ if (! $@) {
+ $url=URI::Heuristic::uf_uristr($url);
+ }
+
if ($noact) {
$quiet=0;
}
@@ -127,23 +140,26 @@ sub doit {
sub better {
my ($a, $b)=@_;
- my $firstanon=$b;
+ my @anon;
foreach my $r (@anon_urls) {
if ($a->{href} =~ /$r/) {
- $firstanon=$a;
- last;
+ push @anon, $a;
}
elsif ($b->{href} =~ /$r/) {
- $firstanon=$b;
- last;
+ push @anon, $b;
}
}
if ($want_auth) {
- return $firstanon != $a;
+ # Whichever is authed is better.
+ return 1 if ! @anon || ! grep { $_ eq $a } @anon;
+ return 0 if ! grep { $_ eq $b } @anon;
+ # Neither is authed, so the better anon method wins.
+ return $anon[0] == $a;
}
else {
- return $firstanon == $a;
+ # Better anon method wins.
+ return @anon && $anon[0] == $a;
}
}
@@ -152,21 +168,26 @@ sub better {
sub dedup {
my %seenhref;
my %bytitle;
+ my @others;
foreach my $repo (@_) {
if (exists $repo->{title} &&
- length $repo->{title} &&
- exists $bytitle{$repo->{title}}) {
- my $other=$bytitle{$repo->{title}};
- next unless better($repo, $other);
- delete $bytitle{$other->{title}}
+ length $repo->{title}) {
+ if (exists $bytitle{$repo->{title}}) {
+ my $other=$bytitle{$repo->{title}};
+ next unless better($repo, $other);
+ delete $bytitle{$other->{title}}
+ }
+
+ if (! $seenhref{$repo->{href}}++) {
+ $bytitle{$repo->{title}}=$repo;
+ }
}
-
- if (! $seenhref{$repo->{href}}++) {
- $bytitle{$repo->{title}}=$repo;
+ else {
+ push @others, $repo;
}
}
- return values %bytitle;
+ return values %bytitle, @others;
}
sub parse {
@@ -174,15 +195,38 @@ sub parse {
my @ret;
my $parser=HTML::Parser->new(api_version => 3);
+ my $abody=undef;
+ my $aref=undef;
$parser->handler(start => sub {
my $tagname=shift;
my $attr=shift;
- return if lc $tagname ne 'link';
- return if ! exists $attr->{rel} || lc $attr->{rel} ne 'vcs';
+
return if ! exists $attr->{href} || ! length $attr->{href};
- return if ! exists $attr->{type} || ! length $attr->{type};
+ return if ! exists $attr->{rel} || $attr->{rel} !~ /^vcs-(.+)/i;
+ $attr->{type}=lc($1);
+
+ # need to collect the body of the tag if there is no title
+ if ($tagname eq "a" && ! exists $attr->{title}) {
+ $abody="";
+ $aref=$attr;
+ }
+
push @ret, $attr;
}, "tagname, attr");
+ $parser->handler(text => sub {
+ if (defined $aref) {
+ $abody.=join(" ", @_);
+ }
+ }, "text");
+ $parser->handler(end => sub {
+ my $tagname=shift;
+ if ($tagname eq "a" && defined $aref) {
+ $aref->{title}=$abody;
+ $aref=undef;
+ $abody=undef;
+ }
+ }, "tagname");
+ $parser->report_tags(qw{link a});
$parser->parse($page);
$parser->eof;
@@ -190,7 +234,6 @@ sub parse {
}
getopts();
-print "$url\n";
my $page=get($url);
if (! defined $page) {
@@ -202,10 +245,16 @@ if (! @repos) {
die "no repositories found on $url\n";
}
+#use Data::Dumper;
+#print Dumper(\@repos);
+#exit;
+
if (defined $destdir && @repos > 1) {
# create subdirs of $destdir for the multiple repos
- mkdir($destdir);
- chdir($destdir) || die "failed to chdir to $destdir: $!";
+ if (! $noact) {
+ mkdir($destdir);
+ chdir($destdir) || die "failed to chdir to $destdir: $!";
+ }
$destdir=undef;
}
@@ -225,6 +274,3 @@ foreach my $repo (@repos) {
}
}
exit($errors > 0);
-
-#use Data::Dumper;
-#print Dumper(\@repos);