X-Git-Url: https://git.madduck.net/code/myrepos.git/blobdiff_plain/60545177be81428ab485ff3bc1eb93895194c683..2c77b1b9a86f35ebd68757aec8f5432e38b63f6d:/webcheckout
diff --git a/webcheckout b/webcheckout
index 6df790d..0c93eef 100755
--- a/webcheckout
+++ b/webcheckout
@@ -16,7 +16,7 @@ a subdirectory of the current directory, using whatever VCS program is
 appropriate for that repository (git, svn, etc).
 
 The information about the repositories is embedded in the web page using
-the rel=vcs microformat, which is documented at
+the rel=vcs-* microformat, which is documented at
 .
 
 If the optional destdir parameter is specified, VCS programs will be asked
@@ -35,18 +35,26 @@ anonymous repositories when possible. If you have an account that
 allows you to use authenticated repositories, you might want to use this
 option.
 
-=item -n
+=item --no-act, -n
 
 Do not actually check anything out, just print out the commands that would
 be run to check out the repositories.
 
-=item -q
+=item --quiet, -q
 
 Quiet mode. Do not print out the commands being run. (The VCS commands
 may still be noisy however.)
 
 =back
 
+=head1 PREREQUISITES
+
+To use this program you will need lots of VCS programs installed,
+obviously. It also depends on the perl LWP and HTML::Parser modules.
+
+If the perl URI module is installed, webcheckout can heuristically guess
+what you mean by partial URLs, such as "kitenet.net/~joey"'
+
 =head1 AUTHOR
 
 Copyright 2009 Joey Hess 
@@ -75,7 +83,7 @@ my $noact=0;
 # Controls whether to perfer repos that use authentication.
 my $want_auth=0;
 
-# Controls where to check out to. If not set, the vcs is allowed to
+# Controls where to check out to. If not set, the VCS is allowed to
 # decide.
 my $destdir;
 
@@ -111,6 +119,11 @@ sub getopts {
 	$url=shift @ARGV;
 	$destdir=shift @ARGV;
 
+	eval q{use URI::Heuristic};
+	if (! $@) {
+		$url=URI::Heuristic::uf_uristr($url);
+	}
+
 	if ($noact) {
 		$quiet=0;
 	}
@@ -182,15 +195,38 @@ sub parse {
 
 	my @ret;
 	my $parser=HTML::Parser->new(api_version => 3);
+	my $abody=undef;
+	my $aref=undef;
 	$parser->handler(start => sub {
 		my $tagname=shift;
 		my $attr=shift;
-		return if lc $tagname ne 'link';
-		return if ! exists $attr->{rel} || lc $attr->{rel} ne 'vcs';
+
 		return if ! exists $attr->{href} || ! length $attr->{href};
-		return if ! exists $attr->{type} || ! length $attr->{type};
+		return if ! exists $attr->{rel} || $attr->{rel} !~ /^vcs-(.+)/i;
+		$attr->{type}=lc($1);
+
+		# need to collect the body of the  tag if there is no title
+		if ($tagname eq "a" && ! exists $attr->{title}) {
+			$abody="";
+			$aref=$attr;
+		}
+
 		push @ret, $attr;
 	}, "tagname, attr");
+	$parser->handler(text => sub {
+		if (defined $aref) {
+			$abody.=join(" ", @_);
+		}
+	}, "text");
+	$parser->handler(end => sub {
+		my $tagname=shift;
+		if ($tagname eq "a" && defined $aref) {
+			$aref->{title}=$abody;
+			$aref=undef;
+			$abody=undef;
+		}
+	}, "tagname");
+	$parser->report_tags(qw{link a});
 	$parser->parse($page);
 	$parser->eof;
 
@@ -209,6 +245,10 @@ if (! @repos) {
 	die "no repositories found on $url\n";
 }
 
+#use Data::Dumper;
+#print Dumper(\@repos);
+#exit;
+
 if (defined $destdir && @repos > 1) {
 	# create subdirs of $destdir for the multiple repos
 	if (! $noact) {
@@ -234,6 +274,3 @@ foreach my $repo (@repos) {
 	}
 }
 exit($errors > 0);
-
-#use Data::Dumper;
-#print Dumper(\@repos);