#!/usr/bin/perl
use strict;
use warnings;
## solis.pl, a script to mirror NSO/SOLIS quicklook photospheric vector magnetic fields
## and create a simple HTML display.
#
# Changed $spacedirpath to wherever and create a dir ./solis/ within. Then just run the script
# with no arguments. The output HTML path and name is in $SOLISmovieHTML and defaults to solis.html
# within $spacedirpath. I have it set up as a cronjob to run once per day. Since it is set by
# by default to use the quicklook page, the filling factor images are ignored.
use LWP::UserAgent;
use URI::URL;
use HTML::LinkExtor;
#use Image::Magick;
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
$ua->agent("Opera/9.80 (X11\; Linux x86_64\; U\; en) Presto/2.9.168 Version/11.50");
my @ns_headers = (
'Accept' => 'image/gif, image/x-xbitmap, image/jpeg,
image/pjpeg, image/png, */*',
'Accept-Charset' => 'iso-8859-1,*,utf-8',
'Accept-Language' => 'en-US',
);
my $spacedirpath = 'home/superkuh/www/spaceweather';
my $solisdir = 'solis';
my $useragent = "--user-agent=\"Opera/9.80 (X11\; Linux x86_64\; U\; en) Presto/2.9.168 Version/11.50\"";
my $SOLISmovieHTML = "/$spacedirpath/solis.html";
# window.open('http://solis.nso.edu/vsm/betadatapub/QLARBetaView.php?fname=/2011/09/svsm_v01_11302_201109291850.jpg&width=600', 'WIN', 'width=1206, height=1000, toolbar=no, menubar=yes, status=no, scrollbars=yes')
my @activeregions;
# Verified
# @activeregions = httpextract('http://solis.nso.edu/vsm/betadatapub/BetaPage2.php?type=6302v', 'window\.open\(\'(.+)\', \'WIN\',');
# Quicklook
@activeregions = httpextract('http://solis.nso.edu/vsm/betadatapub/QLBetaPage.php', 'window\.open\(\'(.+)\', \'WIN\',');
print scalar(@activeregions) . " active regions found.\n";
print "$_\n" foreach @activeregions;
print "\n\n";
my %regions;
foreach my $url (@activeregions) {
my @images;
#
VSM Active Region NOAA AR 11302 Date 9/29/2011 |
# VSM Active Region NOAA AR (\d+) Date \d{1,2}/\d{1,2}/\d{4} <\/th>
my ($AR) = httpextract("$url",' | VSM Active Region NOAA AR (\d+) Date \d{1,2}/\d{1,2}/\d{4} <\/th>');
print "AR $AR\n";
@images = httpextractimglinks($url);
#$regions{$AR} = \@images;
$regions{$AR} = {
'images' => \@images,
'url' => $url
}
}
print "Mirroring and Generating SOLIS AR Pages... \n";
open (SOLISHTML, ">$SOLISmovieHTML") or die "can't make $SOLISmovieHTML.\n$!";
print SOLISHTML "Photosphere Vector Magnetic Field by NSO/SOLIS, SOLIS Active Regions\n";
foreach my $region (keys %regions) {
if (-d "/$spacedirpath/$solisdir/$region") {
print "dir for $region exists.\n";
} else {
mkdir "/$spacedirpath/$solisdir/$region";
print "Created dir /$spacedirpath/$solisdir/$region/\n";
}
my @existingimages;
my %imagecheck;
my $previmagedate;
@existingimages = $spacedirpath/$solisdir/$region/*.jpg>;
%imagecheck = map { makekeyfor($_) => 1 } @existingimages;
if ($existingimages[0]) {
$existingimages[0] =~ /_(\d{12})\.jpg$/;
$previmagedate = $1;
} else {
$previmagedate = "";
}
#print "\n\nTESTING: @{$regions{$region}}[1]\n\n";
#print "\n\nTESTING: image1: @{$regions{$region}->{'images'}}[1] , url: $regions{$region}{'url'}\n\n";
@{$regions{$region}->{'images'}}[1] =~ m#_(\d{12})\.jpg$#;
my $imagedate = $1;
my $datadir = "/$spacedirpath/$solisdir/$region";
if ($previmagedate ne $imagedate) {
if ($previmagedate) {
%imagecheck = ();
`rm $datadir/*`;
print "\n\nDeleting old files in $datadir!\n\n";
} else {
}
}
print "duplicate images: " . scalar(keys %imagecheck) . "\n";
print SOLISHTML "SOLIS AR $region\n";
my $imagecount = 1;
foreach my $imgurl (@{$regions{$region}->{'images'}}) {
next if $imgurl =~ /logo_med.jpg/;
next if $imgurl =~ /_v10_/;
print "$imgurl \n";
$imgurl =~ m#/\d\d/(.+)$#;
my $imagename = $1;
#$imagename =~ /_(\d{12})\.jpg$/;
#my $imagedate = $1;
#$ua->default_header('Referer' => "$imgurl");
print SOLISHTML "";
print SOLISHTML "";
if (0 == $imagecount % 2) {
print SOLISHTML " \n";
}
if ($imagecheck{$imagename}) {
print "exists, skipping $imagename.\n";
} else {
my $response = $ua->get($imgurl, @ns_headers);
my $result = $response->status_line;
my $content = $response->content();
print ("$imgurl - $result\n");
if ($result =~ /200/) {
open (FILE, ">/$spacedirpath/$solisdir/$region/$imagename") or warn "Could not open output!: $!";
binmode (FILE);
print FILE $content;
close FILE;
print system "convert -strip -scale 50% -quality 75 /$spacedirpath/$solisdir/$region/$imagename /$spacedirpath/$solisdir/$region/$imagename";
} else {
warn "could not download file $imgurl, not including in html.\n";
}
}
}
}
close SOLISHTML;
sub httpextract {
my ($url, $regex) = @_;
my @urls;
print "Gathering URLs...";
my $response = $ua->get("$url", @ns_headers);
if ($response->is_success) {
my $html = $response->decoded_content;
my @htmluh = split(/\n/, $html);
foreach my $line (@htmluh) {
# window\.open\('(.+)', 'WIN',
if ($line =~ m#$regex#) {
my $match = $1;
push(@urls, $match);
}
}
}
return @urls;
}
sub httpextractimglinks {
my ($url) = @_;
print "Extracting image URLs... $url\n";
my @images;
my $p = HTML::LinkExtor->new(sub {my($tag, %attr) = @_;return if $tag ne 'img';push(@images, values %attr);});
#my $p = HTML::LinkExtor->new(\&callback);
my $res = $ua->request(HTTP::Request->new(GET => $url),
sub {$p->parse($_[0])});
my $base = $res->base;
@images = map { $_ = url($_, $base)->abs; } @images;
#print join("\n", @images), "\n";
#print "\# " . scalar(@images) . " images in \@images\n\n";
my @uhwtf = @images;
return @uhwtf;
}
sub makekeyfor {
my $filepath = shift;
# /home/superkuh/www/spaceweather/solis/11305/svsm_v02_11305_201109291850.jpg
$filepath =~ m#\d{5}/(.+\.jpg)$#;
my $name = $1;
#print "keyname: $name\n";
return $name;
}
|