From af9a9d12caffaccb04e0e989a5b71e18370953f4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 6 Jan 2009 20:59:58 -0500 Subject: [PATCH] Add webcheckout command. See http://kitenet.net/~joey/rfc/rel-vcs/ --- debian/changelog | 6 ++ webcheckout | 144 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+) create mode 100755 webcheckout diff --git a/debian/changelog b/debian/changelog index 6628030..a613e50 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +mr (0.36) UNRELEASED; urgency=low + + * Add webcheckout command. See http://kitenet.net/~joey/rfc/rel-vcs/ + + -- Joey Hess Tue, 06 Jan 2009 20:59:20 -0500 + mr (0.35) unstable; urgency=low * Warn if an include command fails nonzero. Closes: #495306 diff --git a/webcheckout b/webcheckout new file mode 100755 index 0000000..33bccb9 --- /dev/null +++ b/webcheckout @@ -0,0 +1,144 @@ +#!/usr/bin/perl +use LWP::Simple; +use HTML::Parser; +use warnings; +use strict; + +# Controls whether to print what is being done. +my $verbose=1; + +# Controls whether to actually check anything out. +my $noact=1; + +# Controls whether to perfer repos that use authentication. +my $want_auth=0; + +# Controls where to check out to. If not set, the vcs is allowed to +# decide. +my $destdir; + +# how to perform checkouts +my %handlers=( + git => sub { doit("git", "clone", shift, $destdir) }, + svn => sub { doit("svn", "checkout", shift, $destdir) }, + bzr => sub { doit("bzr", "branch", shift, $destdir) }, +); + +# Regexps matching urls that are used for anonymous +# repository checkouts. The order is significant: +# urls matching earlier in the list are preferred over +# those matching later. +my @anon_urls=( + qr/^git:\/\//i, + qr/^bzr:\/\//i, + qr/^svn:\/\//i, + qr/^http:\/\//i, # generally the worst transport +); + +sub doit { + my @args=grep { defined } @_; + print join(" ", @args)."\n" if $verbose; + return 0 if $noact; + return system(@args); +} + +# Is repo a better than repo b? +sub better { + my ($a, $b)=@_; + + my $firstanon=$b; + foreach my $r (@anon_urls) { + if ($a->{href} =~ /$r/) { + $firstanon=$a; + last; + } + elsif ($b->{href} =~ /$r/) { + $firstanon=$b; + last; + } + } + + if ($want_auth) { + return $firstanon != $a; + } + else { + return $firstanon == $a; + } +} + +# Eliminate duplicate repositories from list. +# Duplicate repositories have the same title, or the same href. +sub dedup { + my %seenhref; + my %bytitle; + foreach my $repo (@_) { + if (exists $repo->{title} && + length $repo->{title} && + exists $bytitle{$repo->{title}}) { + my $other=$bytitle{$repo->{title}}; + next unless better($repo, $other); + delete $bytitle{$other->{title}} + } + + if (! $seenhref{$repo->{href}}++) { + $bytitle{$repo->{title}}=$repo; + } + } + + return values %bytitle; +} + +sub parse { + my $page=shift; + + my @ret; + my $parser=HTML::Parser->new(api_version => 3); + $parser->handler(start => sub { + my $tagname=shift; + my $attr=shift; + return if lc $tagname ne 'link'; + return if ! exists $attr->{rel} || lc $attr->{rel} ne 'vcs'; + return if ! exists $attr->{href} || ! length $attr->{href}; + return if ! exists $attr->{type} || ! length $attr->{type}; + push @ret, $attr; + }, "tagname, attr"); + $parser->parse($page); + $parser->eof; + + return @ret; +} + +my $url=shift; +if (! defined $url) { + die "usage: webcheckout url\n"; +} + +my $page=get($url); +if (! defined $page) { + die "failed to download $url\n"; +} + +my @repos=dedup(parse($page)); +if (! @repos) { + die "no repositories found on $url\n"; +} + +my $errors=0; +foreach my $repo (@repos) { + my $handler=$handlers{$repo->{type}}; + if ($handler) { + if ($handler->($repo->{href}) != 0) { + print STDERR "failed to checkout ".$repo->{href}."\n"; + $errors++; + } + } + else { + print STDERR "unknown repository type ".$repo->{type}. + " for ".$repo->{href}."\n"; + $errors++; + } +} +exit($errors > 0); + +#use Data::Dumper; +#print Dumper(\@repos); -- 2.39.2