From af9a9d12caffaccb04e0e989a5b71e18370953f4 Mon Sep 17 00:00:00 2001
From: Joey Hess <joey@gnu.kitenet.net>
Date: Tue, 6 Jan 2009 20:59:58 -0500
Subject: [PATCH] Add webcheckout command. See
 http://kitenet.net/~joey/rfc/rel-vcs/

---
 debian/changelog |   6 ++
 webcheckout      | 144 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 150 insertions(+)
 create mode 100755 webcheckout

diff --git a/debian/changelog b/debian/changelog
index 6628030..a613e50 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+mr (0.36) UNRELEASED; urgency=low
+
+  * Add webcheckout command. See http://kitenet.net/~joey/rfc/rel-vcs/
+
+ -- Joey Hess <joeyh@debian.org>  Tue, 06 Jan 2009 20:59:20 -0500
+
 mr (0.35) unstable; urgency=low
 
   * Warn if an include command fails nonzero. Closes: #495306
diff --git a/webcheckout b/webcheckout
new file mode 100755
index 0000000..33bccb9
--- /dev/null
+++ b/webcheckout
@@ -0,0 +1,144 @@
+#!/usr/bin/perl
+use LWP::Simple;
+use HTML::Parser;
+use warnings;
+use strict;
+
+# Controls whether to print what is being done.
+my $verbose=1;
+
+# Controls whether to actually check anything out.
+my $noact=1;
+
+# Controls whether to perfer repos that use authentication.
+my $want_auth=0;
+
+# Controls where to check out to. If not set, the vcs is allowed to
+# decide.
+my $destdir;
+
+# how to perform checkouts
+my %handlers=(
+	git => sub { doit("git", "clone", shift, $destdir) },
+	svn => sub { doit("svn", "checkout", shift, $destdir) },
+	bzr => sub { doit("bzr", "branch", shift, $destdir) },
+);
+
+# Regexps matching urls that are used for anonymous
+# repository checkouts. The order is significant:
+# urls matching earlier in the list are preferred over
+# those matching later.
+my @anon_urls=(
+	qr/^git:\/\//i,
+	qr/^bzr:\/\//i,
+	qr/^svn:\/\//i,
+	qr/^http:\/\//i, # generally the worst transport
+);
+
+sub doit {
+	my @args=grep { defined } @_;
+	print join(" ", @args)."\n" if $verbose;
+	return 0 if $noact;
+	return system(@args);
+}
+
+# Is repo a better than repo b?
+sub better {
+	my ($a, $b)=@_;
+
+	my $firstanon=$b;
+	foreach my $r (@anon_urls) {
+		if ($a->{href} =~ /$r/) {
+			$firstanon=$a;
+			last;
+		}
+		elsif ($b->{href} =~ /$r/) {
+			$firstanon=$b;
+			last;
+		}
+	}
+
+	if ($want_auth) {
+		return $firstanon != $a;
+	}
+	else {
+		return $firstanon == $a;
+	}
+}
+
+# Eliminate duplicate repositories from list.
+# Duplicate repositories have the same title, or the same href.
+sub dedup {
+	my %seenhref;
+	my %bytitle;
+	foreach my $repo (@_) {
+		if (exists $repo->{title} &&
+		    length $repo->{title} &&
+		    exists $bytitle{$repo->{title}}) {
+			my $other=$bytitle{$repo->{title}};
+			next unless better($repo, $other);
+			delete $bytitle{$other->{title}}
+		}
+
+		if (! $seenhref{$repo->{href}}++) {
+			$bytitle{$repo->{title}}=$repo;
+		}
+	}
+
+	return values %bytitle;
+}
+
+sub parse {
+	my $page=shift;
+
+	my @ret;
+	my $parser=HTML::Parser->new(api_version => 3);
+	$parser->handler(start => sub {
+		my $tagname=shift;
+		my $attr=shift;
+		return if lc $tagname ne 'link';
+		return if ! exists $attr->{rel} || lc $attr->{rel} ne 'vcs';
+		return if ! exists $attr->{href} || ! length $attr->{href};
+		return if ! exists $attr->{type} || ! length $attr->{type};
+		push @ret, $attr;
+	}, "tagname, attr");
+	$parser->parse($page);
+	$parser->eof;
+
+	return @ret;
+}
+
+my $url=shift;
+if (! defined $url) {
+	die "usage: webcheckout url\n";
+}
+
+my $page=get($url);
+if (! defined $page) {
+	die "failed to download $url\n";
+}
+
+my @repos=dedup(parse($page));
+if (! @repos) {
+	die "no repositories found on $url\n";
+}
+
+my $errors=0;
+foreach my $repo (@repos) {
+	my $handler=$handlers{$repo->{type}};
+	if ($handler) {
+		if ($handler->($repo->{href}) != 0) {
+			print STDERR "failed to checkout ".$repo->{href}."\n";
+			$errors++;
+		}
+	}
+	else {
+		print STDERR "unknown repository type ".$repo->{type}.
+			" for ".$repo->{href}."\n";
+		$errors++;
+	}
+}
+exit($errors > 0);
+
+#use Data::Dumper;
+#print Dumper(\@repos);
-- 
2.39.5