# # JPEGinfo.pm # # 2001-10-16 Fredrik Roubert # package JPEGinfo; require 5; use strict; use FileHandle; use IO::Seekable; =head1 NAME JPEGinfo - reads and edits JPEG file meta data =head1 SYNOPSIS =head2 JPEGinfo::read use JPEGinfo; use FileHandle; my ($fh, %info); $fh = new FileHandle "test.jpg", "r" or die; %info = JPEGinfo::read $fh; $fh->close; foreach my $key (keys %info) { print "$key => $info{$key}\n"; } =head2 JPEGinfo::edit use JPEGinfo; use FileHandle; my ($src, $dst, %info); $src = new FileHandle "src.jpg", "r" or die; $dst = new FileHandle "dst.jpg", "w" or die; %info = JPEGinfo::edit $src, $dst, "This is a new comment."; $src->close; $dst->close; foreach my $key (keys %info) { print "$key => $info{$key}\n"; } =head1 DESCRIPTION The JPEGinfo::read() function takes a FileHandle argument, and returns a hash containing the file meta data, or error messages. The JPEGinfo::edit() funktions takes two FileHandle arguments (source and destination respectively), the new comment as a string, and an optional regular expression. If an existing comment in the file matches this regular expression, the existing comment will be replaced. The function returns a hash that might contain error messages. =head1 AUTHOR Copyright (C) 2001 Fredrik Roubert EFE. All rights reserved. This package is free software and is provided "as is" without express or implied warranty. It may be used, redistributed and/or modified under the terms of the Perl Artistic License (see http://www.perl.com/perl/misc/Artistic.html). =cut use constant M_SOF0 => 0xc0; # Start Of Frame N use constant M_SOF1 => 0xc1; # N indicates which compression process use constant M_SOF2 => 0xc2; # Only SOF0-SOF2 are now in common use use constant M_SOF3 => 0xc3; # NB: codes C4 and CC are NOT SOF markers use constant M_SOF5 => 0xc5; use constant M_SOF6 => 0xc6; use constant M_SOF7 => 0xc7; use constant M_SOF9 => 0xc9; use constant M_SOF10 => 0xca; use constant M_SOF11 => 0xcb; use constant M_SOF13 => 0xcd; use constant M_SOF14 => 0xce; use constant M_SOF15 => 0xcf; use constant M_SOI => 0xd8; # Start Of Image (beginning of datastream) use constant M_EOI => 0xd9; # End Of Image (end of datastream) use constant M_SOS => 0xda; # Start Of Scan (begins compressed data) use constant M_APP1 => 0xe1; # Exif block use constant M_APP12 => 0xec; # Some cameras store meta data here use constant M_COM => 0xfe; # Comment use constant FMT_BYTE => 0x01; # An 8-bit unsigned integer use constant FMT_ASCII => 0x02; # An 8-bit byte with 7-bit ASCII code use constant FMT_SHORT => 0x03; # A 16-bit (2-byte) unsigned integer use constant FMT_LONG => 0x04; # A 32-bit (4-byte) unsigned integer use constant FMT_RATIONAL => 0x05; # Two LONGs use constant FMT_UNDEFINED => 0x07; # An 8-bit byte use constant FMT_SLONG => 0x09; # A 32-bit (4-byte) signed integer use constant FMT_SRATIONAL => 0x0a; # Two SLONGs use constant TAG_DATETIME => 0x0132; use constant TAG_DATETIMEORIGINAL => 0x9003; use constant TAG_DATETIMEDIGITIZED => 0x9004; use constant TAG_EXIF_OFFSET => 0x8769; use constant TAG_INTEROP_OFFSET => 0xa005; use constant ESOF => 'Bogus SOF marker length'; use constant EMARKER => 'Erroneous JPEG marker length'; use constant ESOI => 'Expected SOI marker first'; use constant EGARBAGE => 'Garbage data found in JPEG file'; use constant ENOJPEG => 'Not a JPEG file'; use constant EEOF => 'Premature EOF in JPEG file'; use constant EREAD => 'Read error'; use constant ESOS => 'SOS without prior SOFn'; use constant EWRITE => 'Write error'; use constant EFORMAT => 'Illegal format code in Exif directory'; use constant EBYTEORDER => 'Invalid byte order marker in Exif block'; use constant EHEADER => 'Malformed header in Exif block'; sub read ($;$); sub edit ($$$;$); sub read_1_byte ($\%); sub read_2_bytes ($\%); sub read_string ($$\%); sub read_length ($\%); sub write_2_bytes ($$\%); sub next_marker ($\%); sub write_marker ($$\%); sub process_SOFn ($\%); sub process_COM ($\%); sub write_COM ($$\%); sub skip_variable ($\%); sub copy_variable ($$\%); sub process_APP12 ($\%); sub get_Exif_short ($$$); sub get_Exif_long ($$$); sub process_Exif ($\%); sub process_Exif_dir ($$$\%); sub parse_Exif_date ($$$\%); sub read ($;$) { my ($fh, $metadata) = splice @_, 0; my (%info, $marker); return %info unless defined ($marker = read_1_byte $fh, %info); unless ($marker == 0xff) { $info{'error'} = ENOJPEG; return %info; } return %info unless defined ($marker = read_1_byte $fh, %info); unless ($marker == M_SOI) { $info{'error'} = ESOI; return %info; } for (;;) { return %info unless defined ($marker = next_marker $fh, %info); if ( $marker == M_SOF0 || $marker == M_SOF1 || $marker == M_SOF2 || $marker == M_SOF3 || $marker == M_SOF5 || $marker == M_SOF6 || $marker == M_SOF7 || $marker == M_SOF9 || $marker == M_SOF10 || $marker == M_SOF11 || $marker == M_SOF13 || $marker == M_SOF14 || $marker == M_SOF15) { return %info unless defined (process_SOFn $fh, %info); } elsif ($marker == M_SOS || $marker == M_EOI) { return %info; } elsif ($marker == M_COM) { return %info unless defined (process_COM $fh, %info); } elsif ($marker == M_APP1 && $metadata) { return %info unless defined (process_Exif $fh, %info); } elsif ($marker == M_APP12 && $metadata) { return %info unless defined (process_APP12 $fh, %info); } else { return %info unless defined (skip_variable $fh, %info); } } } sub edit ($$$;$) { my ($src, $dst, $com, $re) = splice @_, 0; my (%info, $marker, $length, $buf); return %info unless defined ($marker = read_1_byte $src, %info); unless ($marker == 0xff) { $info{'error'} = ENOJPEG; return %info; } return %info unless defined ($marker = read_1_byte $src, %info); unless ($marker == M_SOI) { $info{'error'} = ESOI; return %info; } return %info unless defined write_marker M_SOI, $dst, %info; for (;;) { return %info unless defined ($marker = next_marker $src, %info); if ( $marker == M_SOF0 || $marker == M_SOF1 || $marker == M_SOF2 || $marker == M_SOF3 || $marker == M_SOF5 || $marker == M_SOF6 || $marker == M_SOF7 || $marker == M_SOF9 || $marker == M_SOF10 || $marker == M_SOF11 || $marker == M_SOF13 || $marker == M_SOF14 || $marker == M_SOF15) { last; } elsif ($marker == M_SOS) { $info{'error'} = ESOS; return %info; } elsif ($marker == M_EOI) { last; } elsif ($marker == M_COM && defined $re) { my (%tmp, $com); unless (defined process_COM $src, %tmp) { $info{'error'} = $tmp{'error'}; return %info; } $com = $tmp{'COM'}[0]; unless ($com =~ m/$re/s) { return %info unless defined write_COM $com, $dst, %info; } } else { return %info unless defined write_marker $marker, $dst, %info; return %info unless defined copy_variable $src, $dst, %info; } } if (defined $com && $com ne '') { return %info unless defined write_COM $com, $dst, %info; } return %info unless defined write_marker $marker, $dst, %info; for (;;) { $length = CORE::read $src, $buf, 65536; unless (defined $length) { $info{'error'} = EREAD; return %info; } last if $length == 0; unless (print $dst $buf) { $info{'error'} = EWRITE; return %info; } } return %info; } sub read_1_byte ($\%) { my $c; unless (defined ($c = $_[0]->getc)) { ${$_[1]}{'error'} = EEOF; return undef; } return ord $c; } sub read_2_bytes ($\%) { my ($c1, $c2); unless (defined ($c1 = $_[0]->getc) && defined ($c2 = $_[0]->getc)) { ${$_[1]}{'error'} = EEOF; return undef; } return ((ord $c1) << 8) | ord $c2; } sub read_string ($$\%) { my $tmp; unless ($_[0] == CORE::read $_[1], $tmp, $_[0]) { ${$_[2]}{'error'} = EEOF; return undef; } return $tmp; } sub read_length ($\%) { my $length; return undef unless defined ($length = read_2_bytes $_[0], %{$_[1]}); unless ($length >= 2) { ${$_[1]}{'error'} = EMARKER; return undef; } return $length - 2; } sub write_2_bytes ($$\%) { my ($x, $fh) = splice @_, 0, 2; unless (print $fh (chr (($x >> 8) & 0xff), chr ($x & 0xff))) { ${$_[0]}{'error'} = EWRITE; return undef; } return 1; } sub next_marker ($\%) { my $fh = shift; my ($c, $discarded_bytes); $discarded_bytes = 0; return undef unless defined ($c = read_1_byte $fh, %{$_[0]}); while ($c != 0xff) { $discarded_bytes ++; return undef unless defined ($c = read_1_byte $fh, %{$_[0]}); } do { return undef unless defined ($c = read_1_byte $fh, %{$_[0]}); } while ($c == 0xff); if ($discarded_bytes > 0) { ${$_[0]}{'warning'} = EGARBAGE; } return $c; } sub write_marker ($$\%) { my ($marker, $fh) = splice @_, 0, 2; unless (print $fh chr 0xff, chr $marker) { ${$_[0]}{'error'} = EWRITE; return undef; } return 1; } sub process_SOFn ($\%) { my $fh = shift; my ($length, $image_height, $image_width, $data_precision, $num_components); return undef unless defined ($length = read_length $fh, %{$_[0]}); return undef unless defined ($data_precision = read_1_byte $fh, %{$_[0]}); return undef unless defined ($image_height = read_2_bytes $fh, %{$_[0]}); ${$_[0]}{'Height'} = $image_height; return undef unless defined ($image_width = read_2_bytes $fh, %{$_[0]}); ${$_[0]}{'Width'} = $image_width; return undef unless defined ($num_components = read_1_byte $fh, %{$_[0]}); unless ($length == 6 + $num_components * 3) { ${$_[0]}{'error'} = ESOF; return undef; } unless (seek $fh, $num_components * 3, SEEK_CUR) { ${$_[0]}{'error'} = EEOF; return undef; } return 1; } sub process_COM ($\%) { my $fh = shift; my ($length, $tmp); return undef unless defined ($length = read_length $fh, %{$_[0]}); return undef unless defined ($tmp = read_string $length, $fh, %{$_[0]}); push @{${$_[0]}{'COM'}}, $tmp; return 1; } sub write_COM ($$\%) { my ($com, $fh) = splice @_, 0, 2; return undef unless defined write_marker M_COM, $fh, %{$_[0]}; return undef unless defined write_2_bytes 2 + length $com, $fh, %{$_[0]}; unless (print $fh $com) { ${$_[0]}{'error'} = EWRITE; return undef; } return 1; } sub skip_variable ($\%) { my $fh = shift; my $length; return undef unless defined ($length = read_length $fh, %{$_[0]}); unless (seek $fh, $length, SEEK_CUR) { ${$_[0]}{'error'} = EEOF; return undef; } return 1; } sub copy_variable ($$\%) { my ($src, $dst) = splice @_, 0, 2; my ($length, $buf); return undef unless defined ($length = read_length $src, %{$_[0]}); return undef unless defined write_2_bytes $length + 2, $dst, %{$_[0]}; return undef unless defined ($buf = read_string $length, $src, %{$_[0]}); unless (print $dst $buf) { ${$_[0]}{'error'} = EWRITE; return undef; } return 1; } sub process_APP12 ($\%) { my $fh = shift; my (%tmp, $com); unless (defined process_COM $fh, %tmp) { ${$_[0]}{'error'} = $tmp{'error'}; return undef; } $com = $tmp{'COM'}[0]; # # Used at least by some Olympus cameras. # if ($com =~ m/^TimeDate=(\d+)/m) { ${$_[0]}{'APP12'} = { 'TimeDate' => [ localtime $1 ] }; } return 1; } sub get_Exif_short ($$$) { return unpack $_[0] ? 'n' : 'v', substr $_[1], $_[2], 2; } sub get_Exif_long ($$$) { return unpack $_[0] ? 'N' : 'V', substr $_[1], $_[2], 4; } sub process_Exif ($\%) { my $fh = shift; my (%exif, $length, $block, $tmp, $endian); return undef unless defined ($length = read_length $fh, %{$_[0]}); return undef unless defined ($block = read_string $length, $fh, %{$_[0]}); return 1 unless $block =~ m/^Exif\0\0/; $tmp = substr $block, 6, 2; if ($tmp eq 'II') { $endian = 0; } elsif ($tmp eq 'MM') { $endian = 1; } else { ${$_[0]}{'warning'} = EBYTEORDER; return 1; } unless ((0x2a == get_Exif_short $endian, $block, 8) && (0x08 == get_Exif_long $endian, $block, 10)) { ${$_[0]}{'warning'} = EHEADER; return 1; } process_Exif_dir 14, $endian, $block, %{$_[0]}; return 1; } sub process_Exif_dir ($$$\%) { my ($start, $endian, $block) = splice @_, 0, 3; my ($entries, $subdir); my %bytes = ( FMT_BYTE + 0 => 1, FMT_ASCII + 0 => 1, FMT_SHORT + 0 => 2, FMT_LONG + 0 => 4, FMT_RATIONAL + 0 => 8, FMT_UNDEFINED + 0 => 1, FMT_SLONG + 0 => 4, FMT_SRATIONAL + 0 => 8, ); $entries = get_Exif_short $endian, $block, $start; for (my $i = 0; $i < $entries; $i ++) { my ($tag, $format, $components, $bytes, $entry, $value); $entry = $start + 2 + $i * 12; $tag = get_Exif_short $endian, $block, $entry; $format = get_Exif_short $endian, $block, $entry + 2; $components = get_Exif_long $endian, $block, $entry + 4; unless (defined $bytes{$format}) { ${$_[0]}{'warning'} = EFORMAT; next; } $bytes = $components * $bytes{$format}; $value = $bytes <= 4 ? $entry + 8 : 6 + get_Exif_long $endian, $block, $entry + 8; if ($tag == TAG_DATETIME) { parse_Exif_date 'DateTime', $block, $value, %{$_[0]}; } elsif ($tag == TAG_DATETIMEORIGINAL) { parse_Exif_date 'DateTimeOriginal', $block, $value, %{$_[0]}; } elsif ($tag == TAG_DATETIMEDIGITIZED) { parse_Exif_date 'DateTimeDigitized', $block, $value, %{$_[0]}; } elsif ($tag == TAG_EXIF_OFFSET || $tag == TAG_INTEROP_OFFSET) { $subdir = get_Exif_long $endian, $block, $value; process_Exif_dir $subdir + 6, $endian, $block, %{$_[0]}; } } $subdir = get_Exif_long $endian, $block, $start + 2 + $entries * 12; if ($subdir) { process_Exif_dir $subdir + 6, $endian, $block, %{$_[0]}; } } sub parse_Exif_date ($$$\%) { if (substr($_[1], $_[2], 19) =~ m/^(\d{4}):(\d{2}):(\d{2}) (\d{2}):(\d{2}):(\d{2})$/ && $1 > 1900 && $2 >= 1 && $2 <= 12 && $3 >= 1 && $3 <= 31 && $4 >= 0 && $4 <= 23 && $5 >= 0 && $5 <= 59 && $6 >= 0 && $6 <= 59) { ${$_[3]}{'Exif'}{$_[0]} = [ $6, $5, $4, $3, $2 - 1, $1 - 1900 ]; } } 1; # successful loading of module