#!/usr/bin/perl -w # # pbase.pl # # Recursively download image galleries from PBase. # # 2006-03-16 Fredrik Roubert # use strict; use Fcntl; use Errno 'EEXIST'; use File::Spec; use LWP::ConnCache; use LWP::UserAgent; use HTTP::Response; use HTTP::Headers; use constant BASE => 'http://www.pbase.com/'; use constant EURL => 'Non-existent gallery specified in URL'; sub get_url ($); sub get_dir ($$); sub get_image ($$$); sub parse_dir ($$$); sub parse_image ($$); sub download ($$); if ($#ARGV == -1) { print STDERR "Usage: $0 [URL...]\n"; exit 1; } my ($cc, $ua); $cc = LWP::ConnCache->new; $ua = LWP::UserAgent->new(conn_cache => $cc, env_proxy => 1); foreach my $arg (@ARGV) { if ($arg =~ m{^(?i:@{[BASE]})?([^/]+)(?:/([^/]*)/?)?$}) { get_dir $1, defined $2 && $2 ne '' ? $2 : 'root'; next; } if ($arg =~ m{^(?i:@{[BASE]})?([^/]+)/image/(\d+)$}) { get_image $1, '.', $2; next; } print STDERR "[$arg] @{[EURL]}\n"; } sub get_url ($) { my ($url) = splice @_; my ($r); $r = $ua->get($url); return $r if $r->is_success && $r->content !~ m{\b@{[EURL]}\b}; print STDERR "[$_[0]] @{[$r->is_success ? EURL : $r->status_line]}\n"; return undef; } sub get_dir ($$) { my ($root, $dir) = splice @_; my ($r); $r = get_url BASE . $root . '/' . $dir; parse_dir $root, $dir, $r->content if defined $r; } sub get_image ($$$) { my ($root, $dir, $image) = splice @_; my ($r); $r = get_url BASE . $root . '/image/' . $image . '/original'; parse_image $dir, $r->content if defined $r; } sub parse_dir ($$$) { my ($root, $dir, $html) = splice @_; my ($begin, @image, @dir); $begin = 0; while ($html =~ m{]*?\bhref="(.+?)"}gi) { my $url = $1; if ($url =~ m{\bview=tree$}) { $begin = 1; next; } next unless $begin; if ($url =~ m{/$root/image/(\d+)$}) { push @image, $1; next; } if ($url =~ m{/$root/([^/&]+)$}) { push @dir, $1 unless $1 eq 'profile'; next; } } map { get_dir $root, $_ } @dir; if (@image) { unless (-d $dir || mkdir $dir) { print STDERR "$dir: $!\n"; return; } map { get_image $root, $dir, $_ } @image; } } sub parse_image ($$) { my ($dir, $html) = splice @_; my ($url, $path); $html =~ m{]*?\bclass="display"[^>]*?\bsrc="(.+?)"}i; $url = $1; $url =~ m{/([^/]+)$}; $path = File::Spec->catfile($dir, $1); download $url, $path; } sub download ($$) { my ($url, $path) = splice @_; my ($r, $n, $tmp); $n = 0; $tmp = $path; until (sysopen FILE, $tmp, O_WRONLY|O_CREAT|O_EXCL|O_BINARY) { unless ($!{EEXIST}) { print STDERR "$tmp: $!\n"; return; } $tmp = $path . '.' . ++ $n; } $path = $tmp; print "`$url' -> `$path'\n"; $r = get_url $url; if (defined $r) { syswrite FILE, $r->content; $r = $ua->head($url) unless $r->last_modified; utime time, $r->last_modified, $path; } close FILE; }