Index: /status
===================================================================
--- /status (revision 424)
+++ /status (revision 425)
@@ -10,4 +10,5 @@
 grabber         ninemsn             0.05
 grabber         yahoo7web           0.04
+grabber         ten_website         0.01
 reconciler      reconciler_mk2      0.18
 postprocessor   imdb_augment_data   0.06
Index: /grabbers/ten_website.conf
===================================================================
--- /grabbers/ten_website.conf (revision 425)
+++ /grabbers/ten_website.conf (revision 425)
@@ -0,0 +1,16 @@
+$config = {
+            'max_reliable_days' => 7,
+            'channels' => 'TEN',
+            'option_anon_socks' => '',
+            'option_days_offset' => '--offset',
+            'option_offset_eats_days' => 1,
+            'regions' => '',
+            'option_ready' => '--version',
+            'desc' => 'guide data for TEN only from http://www.ten.com.au',
+            'max_days' => 7,
+            'max_runtime' => '60',
+            'category' => 1,
+            'quality' => 3,
+            'cache' => 1,
+            'option_days' => '--days'
+          };
Index: /grabbers/ten_website
===================================================================
--- /grabbers/ten_website (revision 418)
+++ /grabbers/ten_website (revision 425)
@@ -32,5 +32,4 @@
 my $channels, my $opt_channels;
 my $data_cache;
-my $writer;
 my $ua;
 my $conn_cache;
@@ -38,5 +37,6 @@
 my $d;
 my $opt;
-my %charset;
+my %amp = ( nbsp => ' ', qw{ amp & lt < gt > apos ' quot " } );
+
 
 #
@@ -65,6 +65,6 @@
 	'warper'	=> \$opt->{warper},
 	'lang=s'	=> \$opt->{lang},
+	'no-hdtv-flags'	=> \$opt->{no_hdtv_flags},
 	'obfuscate'	=> \$opt->{obfuscate},
-	'anonsocks=s'	=> \$opt->{anon_socks},
 
 	'ocr-learn-mode' => \$opt->{ocr_learn_mode},
@@ -86,14 +86,21 @@
 $opt->{days} = 7 if ($opt->{days} > 7); # limit to a max of 7 days
 
+# check XMLTV version for HDTV compatability
+my @xmltv_version = split(/\./,$XMLTV::VERSION);
+if (($xmltv_version[0] <= 0) && ($xmltv_version[1] <= "5") && ($xmltv_version[2] <= "43")) {
+	&log("XMLTV version ".$XMLTV::VERSION." too old to support HDTV flags. Disabling HDTV flags.");
+	$opt->{no_hdtv_flags} = 1;
+	$stats{disabled_hdtv_flag}++;
+}
+
 #
 # go go go!
 #
 
-&log(sprintf "going to grab %d days%s of data into %s (%s%s%s%s%s)",
+&log(sprintf "going to grab %d days%s of data into %s (%s%s%s%s)",
 	$opt->{days},
 	(defined $opt->{offset} ? " (skipping first $opt->{offset} days)" : ""),
 	$opt->{outputfile},
 	(defined $opt->{fast} ? "with haste" : "slowly"),
-	(defined $opt->{anon_socks} ? ", via multiple endpoints" : ""),
 	(defined $opt->{warper} ? ", anonymously" : ""),
 	(defined $opt->{no_details} ? ", without details" : ", with details"),
@@ -114,13 +121,13 @@
 &set_ua;
 &setup_charset;
-&setup_socks if (defined $opt->{anon_socks});
 
 &set_region;
-&start_writing_xmltv;
 
 &get_summary_pages;
+&get_detail_pages unless (defined $opt->{no_details});
+
+&write_xmltv;
 
 &write_cache unless (defined $opt->{no_cache});
-$writer->end();
 
 &print_stats;
@@ -141,4 +148,5 @@
 	--no-cache		don't use a cache to optimize (reduce) number of web queries
 	--no-details		don't fetch detailed descriptions (default: do)
+	--no-hdtv-flags		don't mark HD programs as being in HDTV (default: do)
 	--cache-file=file	where to store cache (default "$opt->{cache_file}")
 	--fast			don't run slow - get data as quick as you can - not recommended
@@ -154,4 +162,5 @@
 
 	--ocr-learn-mode	put $progname into OCR learning mode to learn the text
+
 EOF
 ;
@@ -168,10 +177,4 @@
 		local (@ARGV, $/) = ($opt->{cache_file});
 		no warnings 'all'; eval <>; die "$@" if $@;
-
-		my $cache_items = 0;
-		foreach (keys %{$data_cache}) {
-			$cache_items++;
-		}
-		&log("$cache_items programmes loaded from cache.");
 	} else {
 		printf "WARNING: no programme cache $opt->{cache_file} - have to fetch all details\n";
@@ -193,11 +196,18 @@
 	} else {
 		# cleanup old entries from cache
-		for my $cache_key (keys %{$data_cache}) {
-			my ($starttime, @rest) = split(/:/,$cache_key);
-			if ($starttime < (time-86400)) {
-				delete $data_cache->{$cache_key};
+		for my $k (keys %{($data_cache->{id_cache})}) {
+			if ($data_cache->{id_cache}->{$k}->{last_used} < (time-(86400*14))) {
+				delete $data_cache->{id_cache}->{$k};
 				$stats{expired_from_cache}++;
 			}
 		}
+
+		for my $k (keys %{($data_cache->{detail_cache})}) {
+			if ($data_cache->{detail_cache}->{$k}->{last_used} < (time-(86400*14))) {
+				delete $data_cache->{detail_cache}->{$k};
+				$stats{expired_from_cache}++;
+			}
+		}
+
 		print F Data::Dumper->Dump([$data_cache], ["data_cache"]);
 		close F;
@@ -224,4 +234,6 @@
 	if ($reqtype eq "GET") {
 		$request = HTTP::Request->new(GET => $url);
+	} elsif ($reqtype eq "HEAD") {
+		$request = HTTP::Request->new(HEAD => $url);
 	} elsif ($reqtype eq "POST") {
 		$request = HTTP::Request->new(POST => $url);
@@ -273,5 +285,4 @@
 
 		my $sleep_for = 60;
-		$sleep_for = 10 if (defined $opt->{anon_socks});
 
 		&log("attempt $attempts of $retrycount failed to fetch $url, sleeping for $sleep_for secs: $status");
@@ -291,8 +302,6 @@
 	$stats{http_successful_requests}++;
 
-	if ((!defined $opt->{fast}) && (!defined $opt->{anon_socks})) {
-		my $sleeptimer = int(rand(6)) + 17;  # sleep anywhere from 17 to 23 seconds
-		$stats{slept_for} += $sleeptimer;
-		sleep $sleeptimer;
+	if ($reqtype eq "HEAD") {
+		return $response->header("Content-Length");
 	}
 
@@ -328,7 +337,4 @@
 # leading/trailing spaces in strings, translations of html stuff etc
 #   -- taken & modified from Michael 'Immir' Smith's excellent tv_grab_au
-
-my %amp;
-BEGIN { %amp = ( nbsp => ' ', qw{ amp & lt < gt > apos ' quot " } ) }
 
 sub cleanup {
@@ -346,6 +352,8 @@
 ##############################################################################
 
-sub start_writing_xmltv
-{
+sub write_xmltv
+{
+	my $writer;
+
 	my %writer_args = ( encoding => 'ISO-8859-1' );
 	if ($opt->{outputfile}) {
@@ -360,10 +368,27 @@
 	      'generator-info-name' => "$progname $version"} );
 
-	for my $channel (sort keys %{$channels}) {
-		$writer->write_channel( {
-			'display-name' => [[ $channel, $opt->{lang} ]],
-			'id' => $channels->{$channel}
-			} );
-	}
+	$writer->write_channel( {
+		'display-name' => [[ "TEN", $opt->{lang} ]], 'id' => $channels->{TEN} } );
+
+	foreach my $prog (@{($d->{progs})}) {
+		# convert epoch starttime into XMLTV starttime
+		next if (!defined $prog->{starttime});
+		$prog->{start} = strftime "%Y%m%d%H%M", localtime($prog->{starttime});
+		delete $prog->{starttime};
+
+		# convert epoch stoptime into XMLTV stoptime
+		next if (!defined $prog->{stoptime});
+		$prog->{stop} = strftime "%Y%m%d%H%M", localtime($prog->{stoptime});
+		delete $prog->{stoptime};
+
+		delete $prog->{details};
+		delete $prog->{id};
+
+		&cleanup($prog);
+		printf "DEBUG: programme xmltv: ".Dumper($prog) if (defined $opt->{debug});
+		$writer->write_programme($prog);
+	}
+
+	$writer->end();
 }
 
@@ -464,4 +489,6 @@
 	my $postvars = "__VIEWSTATE=".urlify($opt->{viewstate})."&new_site_id=".$reg."&_ctl1.x=0&_ctl1.y=0";
 	$data = &get_url($url, 5, undef, "POST", $postvars);
+
+	$stats{programmes} = 0 if (!defined $stats{programmes});
 }
 
@@ -488,5 +515,5 @@
 		$timeattr[0] = 0; # zero sec
 		$timeattr[1] = 0; # zero min
-		$timeattr[2] = 6; # 6am
+		$timeattr[2] = 0; # midnight
 		my $day_start = mktime(@timeattr);
 
@@ -501,4 +528,5 @@
 {
 	my ($day_start, $day_num) = @_;
+	my %seen_prog;
 
 	my $url = "http://www.ten.com.au/programGuide.aspx?section=programGuide";
@@ -508,5 +536,5 @@
 	$postvars .= "&storeDate=".urlify($opt->{storedate}) if (defined $opt->{storedate});
 
-#	&log("parse_summary_page debug: day_start $day_start day_num $day_num POST $postvars") if (defined $opt->{debug});
+	&log("parse_summary_page debug: day_start $day_start day_num $day_num POST $postvars") if (defined $opt->{debug} && $opt->{debug} > 2);
 
 	my $data = &get_url($url, 5, undef, "POST", $postvars);
@@ -534,8 +562,8 @@
 	}
 
-	$stats{programmes} = 0 if (!defined $stats{programmes});
 	my $progs_in_day = 0;
 
-	for my $tree_pg ($tree->look_down('_tag' => 'tr')) {
+	my @tree_rows = $tree->look_down('_tag' => 'tr');
+	foreach my $tree_pg (@tree_rows) {
 		my $yellow_row_count = 0;
 		my $prog_bg;
@@ -548,32 +576,54 @@
 		foreach my $prog_td ($tree_pg->look_down('_tag' => 'td')) {
 			my $prog_td_class = $prog_td->attr('class');
-			if ((defined $prog_td_class) && ($prog_td_class eq "pgimgcell")) {
+			if ((defined $prog_td_class) && ($prog_td_class eq "pgimgcell") && (!defined $prog_bg)) {
 				if (my $style_tag = $prog_td->attr('style')) {
-					$prog_bg = $1 if ($style_tag =~ /^background:url\((.*)\) /);
-				}
-
-				if (my $img_tag = $prog_td->look_down('_tag' => 'img', 'class' => 'pgimg')) {
+					if ($style_tag =~ /^background:url\((.*)\) /) {
+						$prog_bg = $1;
+						&log("got prog_bg '$prog_bg'") if (defined $opt->{debug} && $opt->{debug} > 1);
+					}
+				}
+
+				if ((!defined $prog_fg) && (my $img_tag = $prog_td->look_down('_tag' => 'img', 'class' => 'pgimg'))) {
 					$prog_fg = $img_tag->attr('src');
-				}
-
-				if (my $link_tag = $prog_td->look_down('_tag' => 'a', 'href' => '#')) {
+					&log("got prog_fg '$prog_fg'") if (defined $opt->{debug} && $opt->{debug} > 1);
+				}
+
+				if ((!defined $prog_details) && (my $link_tag = $prog_td->look_down('_tag' => 'a', 'href' => '#'))) {
 					my $link_url = $link_tag->attr('onClick');
-					$prog_details = $1 if ($link_url =~ /^window\.open\('(.+?)'/);
+					if ($link_url =~ /^window\.open\('(.+?)'/) {
+						$prog_details = $1;
+						&log("got prog_details '$prog_details'") if (defined $opt->{debug} && $opt->{debug} > 1);
+					}
 				}
 			} elsif ((defined $prog_td_class) && ($prog_td_class eq "yellow")) {
 				$yellow_row_count++;
 				if ($yellow_row_count == 1) {		# HD flag
-					$prog_hd = 1 if ($prog_td->as_text() =~ /HD/);
+					if ($prog_td->as_text() =~ /HD/) {
+						$prog_hd = 1;
+						&log("prog is in HD") if (defined $opt->{debug} && $opt->{debug} > 1);
+					} else {
+						$prog_hd = 0;
+					}
 				} elsif ($yellow_row_count == 2) {	# CC flag
-					$prog_cc = 1 if ($prog_td->as_text() =~ /Y/);
+					if ($prog_td->as_text() =~ /Y/) {
+						$prog_cc = 1;
+						&log("prog has closed captions") if (defined $opt->{debug} && $opt->{debug} > 1);
+					} else {
+						$prog_cc = 0;
+					}
 				} elsif ($yellow_row_count == 3) {	# rating
 					$prog_rating = $prog_td->as_text();
-				}
-			}
-		}
+					&log("prog has rating '$prog_rating'") if (defined $opt->{debug} && $opt->{debug} > 1);
+				}
+			}
+		}
+
+		&log("finished evaluating row") if (defined $opt->{debug} && $opt->{debug} > 1);
 
 		if ((defined $prog_fg) && (defined $prog_bg) && (defined $prog_details)) {
+			next if (defined $seen_prog{$prog_details});
+			$seen_prog{$prog_details}++;
+
 			$progs_in_day++;
-			$stats{programmes}++;
 			&parse_one_summary_prog($day_start, $day_num, $progs_in_day, $prog_fg, $prog_bg, $prog_details, $prog_rating, $prog_hd, $prog_cc);
 		}
@@ -582,4 +632,11 @@
 	&log("WARNING: Only $progs_in_day programmes seen on day $day_num in '$url' (POST $postvars). ".
 	  "Data may be bad.") if ($progs_in_day < 10);
+
+	unless (defined $opt->{fast}) {
+		my $sleep_for = 20 + int(rand(5));
+		&log(" .. found $progs_in_day programmes on day $day_num, sleeping for $sleep_for seconds.");
+		sleep $sleep_for;
+		$stats{slept_for} += $sleep_for;
+	}
 }
 
@@ -590,4 +647,5 @@
 {
 	my ($day_start, $day_num, $progs_in_day, $prog_fg, $prog_bg, $prog_details, $prog_rating, $prog_hd, $prog_cc) = @_;
+	my $prog;
 
 	my $id;
@@ -596,25 +654,151 @@
 	my $s;
 
-	if (!defined $data_cache->{$id}) {
-		my $fg_gif_image = &get_url("http://www.ten.com.au".$prog_fg,3);
-		my $bg_gif_image = &get_url("http://www.ten.com.au".$prog_bg,3);
-
-		my $fg_image = GD::Image->newFromGifData($fg_gif_image);
-		my $bg_image = GD::Image->newFromPngData($bg_gif_image);
-
-		$bg_image->copyMerge($fg_image, 0, 0, 0, 0, $fg_image->width, $fg_image->height, 100);
-
-		# remove underline
-		my $white = $bg_image->colorExact(255,255,255);
-		$bg_image->filledRectangle(0, 14, $fg_image->width, $fg_image->height, $white);
-
-		$s = &parse_characters($id, $bg_image,($day_num == 1 ? 6 : 14));
-		$data_cache->{$id}->{ocr_text} = $s;
+	if (!defined $data_cache->{id_cache}->{$id}) {
+		$s = &ocr_image($id, "http://www.ten.com.au".$prog_fg, "http://www.ten.com.au".$prog_bg, 3, ($day_num == 1 ? 6 : 14), 0);
+		$data_cache->{id_cache}->{$id}->{ocr_text} = $s;
 	} else {
-		$s = $data_cache->{$id}->{ocr_text};
-	}
-
-	&log("parse_one_summary_prog: d".$day_num."p".$progs_in_day." ".$prog_details.": ".$s) if (defined $opt->{debug});
-
+		$s = $data_cache->{id_cache}->{$id}->{ocr_text};
+		$stats{used_cached_items}++;
+	}
+	$data_cache->{id_cache}->{$id}->{last_used} = time;
+
+	if ($s =~ /\s*(\d+):(\d+)\s*(A|P)M\s+(.*)$/) {
+		my $prog_hr = $1;
+		$prog_hr = 0 if ($prog_hr == 12);
+
+		$prog->{starttime} = (($prog_hr * 60) + $2) * 60; # seconds
+		$prog->{starttime} += (60*60*12) if (lc($3) eq "p");
+
+		$prog->{title} = [[ $4, $opt->{lang} ]];
+	} elsif ($s =~ /\s*(\d+)\.(\d+)\.(\d+)\s+(\d+):(\d+)\s*(A|P)M\s+(.*)$/) {
+		my $prog_hr = $4;
+		$prog_hr = 0 if ($prog_hr == 12);
+
+		$prog->{starttime} = (($prog_hr * 60) + $5) * 60; # seconds
+		$prog->{starttime} += (60*60*12) if (lc($6) eq "p");
+
+		$prog->{title} = [[ $7, $opt->{lang} ]];
+	}
+
+	if (!defined $prog->{title}) {
+		&log("could not parse progname from OCR string '$s'. Format changed?");
+		$stats{unparsable_name}++;
+		return;
+	}
+	if (!defined $prog->{starttime}) {
+		&log("could not parse start time from OCR string '$s'. Format changed?");
+		$stats{unparsable_time}++;
+		return;
+	}
+
+	if ($prog->{starttime} < (12*60*60)) {
+		$prog->{starttime} += (24*60*60) if (defined $d->{seen_pm}->[$day_num]);
+	} else {
+		$d->{seen_pm}->[$day_num] = 1 if (!defined $d->{seen_pm}->[$day_num]);
+	}
+	$prog->{starttime} += $day_start;
+
+	$prog->{id} = $id;
+	$prog->{details} = $prog_details;
+	$prog->{channel} = $channels->{TEN};
+
+	if ((defined $prog_rating) && ($prog_rating ne "")) {
+		my @ratings;
+		push(@ratings, [$prog_rating, 'ABA', undef]);
+		$prog->{rating} = [ @ratings ];
+	}
+
+	$prog->{subtitles} = [ { 'type' => 'teletext' } ] if ($prog_cc);
+	if ($prog_hd) {
+		$prog->{video}->{aspect} = "16:9";	# widescreen
+		$prog->{video}->{quality} = "HDTV" unless (defined $opt->{no_hdtv_flags});
+	}
+
+	push(@{($d->{progs})},$prog);
+
+	if ($stats{programmes} > 0) {
+		# set previous stoptime based on this starttime
+		$d->{progs}->[($stats{programmes}-1)]->{stoptime} = $prog->{starttime};
+	}
+	$stats{programmes}++;
+
+	&log("parse_one_summary_prog: d".$day_num."p".$progs_in_day." ".$prog_details.": start:".$prog->{starttime}." name:".$prog->{title}->[0]->[0]) if (defined $opt->{debug});
+
+}
+
+##############################################################################
+
+sub get_detail_pages
+{
+	my $prog_count;
+	$stats{used_detailed_cache} = 0;
+	&log("fetching up to ".$stats{programmes}." detail pages...");
+
+	foreach my $prog (@{($d->{progs})}) {
+		my $was_in_cache = 0;
+
+		$prog_count++;
+		my $details = $prog->{details};
+
+		if (($prog_count % 10) == 1) {
+			&log(" .. at programme ".$prog_count." of ".$stats{programmes}." (".$stats{used_detailed_cache}." from cache)");
+		}
+
+		if (!defined $data_cache->{detail_cache}->{$details}) {
+			my $url = "http://www.ten.com.au/".$details;
+			my $data = &get_url($url,5);
+			my $tree = HTML::TreeBuilder->new_from_content($data) if (defined $data);
+			if ((!$data) || (!$tree)) {
+				&log("url '$url' doesn't seem to contain any valid details. Has the format changed?");
+				$stats{bad_detail_response}++;
+				return;
+			}
+
+			# parse description from first 'class=info' table cell
+			my $prog_desc = $tree->look_down('_tag' => 'td', 'class' => 'info', 'style' => 'text-align:justify;');
+			if (defined $prog_desc) {
+				$data_cache->{detail_cache}->{$details}->{desc} = [[ $prog_desc->as_text(), $opt->{lang} ]];
+				&log("got desc '".$prog_desc->as_text()."'") if (defined $opt->{debug} && $opt->{debug} > 1);
+			}
+
+			my $genre_group = &get_url("http://www.ten.com.au/pgutil/epfront.ashx?cd=2&id=".$prog->{id},3,undef,"HEAD");
+			if ((defined $genre_group) && ($genre_group ne "")) {
+				if (defined $d->{gset}->{$genre_group}) {
+					$data_cache->{detail_cache}->{$details}->{category} = [[ translate_category($d->{gset}->{$genre_group}), $opt->{lang} ]];
+				} else {
+					$data_cache->{detail_cache}->{$details}->{category} = [[ $genre_group, $opt->{lang} ]];
+					&log("unknown genre group '$genre_group' for prog '".$prog->{title}->[0]->[0]."'");
+					$stats{unknown_genre}++;
+				}
+			}
+
+			if ($data =~ /EPISODE:/) {
+				my $s = &ocr_image($prog->{id}, "http://www.ten.com.au/pgutil/epfront.ashx?cd=1&id=".$prog->{id}, "http://www.ten.com.au/pgutil/epback.ashx?cd=1&id=".$prog->{id}, 3, 0, 1);
+				if ((defined $s) && ($s ne "")) {
+					$data_cache->{detail_cache}->{$details}->{'sub-title'} = [[ $s, $opt->{lang} ]];
+				}
+			}
+
+			$stats{used_detailed_cache}++;
+			$was_in_cache = 1;
+		}
+
+		$data_cache->{detail_cache}->{$details}->{last_used} = time;
+
+		$prog->{desc} = $data_cache->{detail_cache}->{$details}->{desc}
+		    if (defined $data_cache->{detail_cache}->{$details}->{desc});
+
+		$prog->{category} = $data_cache->{detail_cache}->{$details}->{category}
+		    if (defined $data_cache->{detail_cache}->{$details}->{category});
+
+		$prog->{'sub-title'} = $data_cache->{detail_cache}->{$details}->{'sub-title'}
+		    if (defined $data_cache->{detail_cache}->{$details}->{'sub-title'});
+
+		unless ((defined $opt->{fast}) || ($was_in_cache)) {
+			my $sleep_for = 3 + int(rand(2));
+			sleep $sleep_for;
+			$stats{slept_for} += $sleep_for;
+		}
+	}
 }
 
@@ -641,5 +825,4 @@
 	}
 
-	# scan right until 
 	my $char_x1 = 0;
 	my $char_x2 = 0;
@@ -685,7 +868,7 @@
 			$charnum++;
 
-			if ((!defined $charset{$md5}) || ($charset{$md5} eq "?")) {
+			if ((!defined $d->{charset}->{$md5}) || ($d->{charset}->{$md5} eq "?")) {
 				if (defined $opt->{ocr_learn_mode}) {
-					$charset{$md5} = "[".$md5."]";
+					$d->{charset}->{$md5} = "[".$md5."]";
 					$s .= "[".$md5."]";
 				} else {
@@ -693,5 +876,5 @@
 				}
 			} else {
-				$s .= $charset{$md5};
+				$s .= $d->{charset}->{$md5};
 			}
 
@@ -708,204 +891,262 @@
 sub setup_charset
 {
-	$charset{"87b7de3dbbeda572e883253803f73a78"}="e";
-	$charset{"caa8c600dd0aecf49f445753963e97b5"}=":";
-	$charset{"46854a6efed48426f1018828cca41ac2"}="o";
-	$charset{"f64db65ec25ba73bdced42fcf01be00a"}="r";
-	$charset{"d0ec43eddec59827259b46c460386ae0"}="l";
-	$charset{"d4fb05e2702c4199a73794b5def96ec2"}="G";
-	$charset{"ca1cbc6861523c4608f19365cac6dde0"}="0";
-	$charset{"a49667c09e9d4be0b595578d51eeb60d"}="M";
-	$charset{"1a9de7fb6f1c93f3ffaa15816549e43a"}="6";
-	$charset{"fbec6375cab7ff5b9d4b4783c7aab13b"}="s";
-	$charset{"b1465cc2781264fff5a55a9e9b3d8064"}="A";
-	$charset{"b61b8c026407890a23276d41125d7e98"}="h";
-	$charset{"8e2a682942360201f924e694dc70fa43"}="T";
-	$charset{"c1e402fec1d35694b1898b1f1dbb16bb"}="w";
-	$charset{"bed2eaba5e16b7246bb1f5b94d44b61c"}="h";
-	$charset{"c0107f886a27e42ce8fd2eca63a5ebcc"}="D";
-	$charset{"8300a291d7dae2e876126878c98af6d1"}="S";
-	$charset{"7edd9f81d7da6577d57da07f93f95b87"}="E";
-	$charset{"ec5d1b2140213fdbbf3c837400b2d3c5"}="e";
-	$charset{"14e3c224bd590504ffa95a1987ac3fbc"}="n";
-	$charset{"55ced8bf6a2a2482f578bc988b60b5ed"}="a";
-	$charset{"51bc70bfed877b2bf7300a5023a88634"}="o";
-	$charset{"5ae0c7cc64eb457ed198ee008fcd52f9"}="d";
-	$charset{"8769704a7c47684c74d841673664f942"}="V";
-	$charset{"763602fa61fe36273a3492f3fbae0ff8"}="t";
-	$charset{"0efb2ccf6c4e8b3084e56da89ad6629b"}="7";
-	$charset{"70dddd8427594526c8fd308b6151d673"}="i";
-	$charset{"75419c36d52e0f29143d4ecf3c5fd2fd"}="W";
-	$charset{"592559bccc3f515e5d2a93622320a1a2"}="m";
-	$charset{"4e4073891b344c07deceee07cd6ba348"}="g";
-	$charset{"9e2f28787475e105da5221e20eb7a137"}="r";
-	$charset{"f7e8dade3df2070be62dd206ef0cc8f1"}="5";
-	$charset{"62982338ad7a6b499056bac67f840d83"}="N";
-	$charset{"97fd0fdcedad187e8bf877adc5d580c5"}="2";
-	$charset{"35f9e067a546f3ae0057065223fe4c33"}="3";
-	$charset{"97f15e1d2ad1cb232147d0b6f01c8022"}="O";
-	$charset{"4c52033ce6a724d184d9c8d23a960d6e"}="P";
-	$charset{"9fbd3153eb8e55a0a1f453ee33e6bafd"}="i";
-	$charset{"e7a5cbb21f17f35f2f141e63f37a45fb"}="c";
-	$charset{"f7e8dade3df2070be62dd206ef0cc8f1"}="5";
-	$charset{"646db1d6726727e809ed1eb7ea11f545"}="8";
-	$charset{"c5ed676c18b62bc6885e34bf527e66af"}="Y";
-	$charset{"99bea8c75f15219ca16a7229b3938665"}="u";
-	$charset{"a9ea989899145834e84daf0abc5964f0"}="!";
-	$charset{"8113592ffa186852672d458f5bd86135"}="k";
-	$charset{"fec8880342772dd7e83ca9ffeed0e216"}="l";
-	$charset{"185a57d42d98c6cbd85135d9e8295501"}="D";
-	$charset{"606bf5428471cfd5de3434374c281334"}="y";
-	$charset{"50692dc12cde0fae151d9a0c2563c81d"}="J";
-	$charset{"f3045893d14fbb5f20e215a38617aee4"}="0";
-	$charset{"cf78a362c08ef3b9284ade8113e670d7"}="R";
-	$charset{"0de173cf09ded97fff935aa24f7f8bfe"}="z";
-	$charset{"cf63706b1f8eaa1c9120e1f9794918c3"}="T";
-	$charset{"f9f2e0d23af08cb6fbeacb686992f633"}="v";
-	$charset{"c1777c45a7d53a5d557c5da145bea080"}="'";
-	$charset{"88f4902f74cf89846318c96003466835"}="p";
-	$charset{"2b821839a93b75e470d04a5e2c1971b3"}="J";
-	$charset{"120cfb2dcf74d7900dc22d44bea9db09"}="H";
-	$charset{"298b488eb21a879c4cf9007c05283a15"}="s";
-	$charset{"08021ebe5ef72c0ed41b438fd794e71e"}="tt";
-	$charset{"b24415f6bccb3a9ad482156a524dbf1e"}="y";
-	$charset{"6c27fb8ed1d2d451785d957138ca0902"}="u";
-	$charset{"5a6e6307a1b18b409618616556a327e5"}="E";
-	$charset{"8d4430c7857a01d4805b4666c54fe114"}="b";
-	$charset{"cf9c23550ff1fde3b19b593966fdd391"}="S";
-	$charset{"97986e54d74ef7047eebc1169134564f"}="B";
-	$charset{"561fda757040c25038687752394d39a8"}="M";
-	$charset{"8015f8d4c3d6574c9ec73b412ece2013"}="L";
-	$charset{"7fdc4d50db244ad00f11d7c362f10b8f"}="9";
-	$charset{"73f0455d71b4156ab2bbebb7fac004ca"}="4";
-	$charset{"401ccf9844fe6399f13597cb458abedb"}="a";
-	$charset{"28de7104f0f94e161104c407071a5e91"}="m";
-	$charset{"cc282e429660787afc4a292a6e35cb2a"}="F";
-	$charset{"449bb458f502dbb10cf71673d1bd7ac4"}="5";
-	$charset{"eb6c72d1cb3b32bfcf646e2c5dafc4d2"}="N";
-	$charset{"aa364cab095bc5f46f855c9772619f5e"}="1";
-	$charset{"6aadacaa0e0b622fe755be8615f67f87"}="2";
-	$charset{"0f87f473885da54c2a7c886ae92f0ddd"}="R";
-	$charset{"17d10978ffc796cc024c68afa3fb463c"}="I";
-	$charset{"70b21817f2611845e464f8b551c73b71"}="1";
-	$charset{"f5a215139fdc4921b4fad687e0899fdc"}="H";
-	$charset{"8b9e1cc11d23773ca68afaea3064902a"}="A";
-	$charset{"6adf28b9140e9b236394bd6956638630"}="9";
-	$charset{"0d2eeea7b20edb640d5556ea8528ba67"}="K";
-	$charset{"3503cdc59df22be3b6242db35cfe3482"}="f";
-	$charset{"b5856240a388696d55ea99fad53166ce"}="W";
-	$charset{"4b8e08032dde00ced51e8435820be5e1"}="n";
-	$charset{"38b1c7da79cbbac219c590129f40cca1"}="k";
-	$charset{"1697c04376dac187f028f240cb0ccc9a"}="C";
-	$charset{"2e03a06a91a1993a5c6e15b43784e5c3"}="3";
-	$charset{"840e43645d65217fd0d57914321db2bf"}=":";
-	$charset{"5e871ec322ade9e74d44285c3ddad972"}="L";
-	$charset{"e3bcb0065109e004bc6b18b1403fb810"}="rt";
-	$charset{"9f32b9cd5083733eead4380bb6551ac5"}="B";
-	$charset{"148cafcb02f1a203866f583dbdb253af"}="&";
-	$charset{"519cc9d317d1a6db113c0da6e5560e71"}="d";
-	$charset{"40ec9716cfe72fe54201dae866e70ec5"}="V";
-	$charset{"ace17452c10518e97caba9493898c910"}="U";
-	$charset{"d244b3a33602a55c1ee8cf9c570dced9"}="-";
-	$charset{"25ee9123a9fdb7c164b29dfaa50d10b7"}="6";
-	$charset{"a87bc5bc8b3e5df44df2e2405561dd83"}=".";
-	$charset{"f6e64e873007d53c7bf7873d639f4678"}=".";
-	$charset{"de8b17aa3cf358a1e8b9496dd99e20f1"}="7";
-	$charset{"21e73997781a1af8c506eded30c6143f"}="4";
-	$charset{"4518bf9cb085588761164be21442aa5d"}="F";
-	$charset{"c38b4e845130be00f1a27a023241a500"}="!";
-	$charset{"f5a9cba4badf510bbde66e1012647c8c"}="O";
-	$charset{"c67d0abf9dd1bf2352613c243de4649b"}="P";
-	$charset{"453b59cf0cb2813958d5518fc668639c"}="Z";
-	$charset{"3dad6dcdedabfbb99ef2067f38d6bd67"}="B";
-	$charset{"b2da7f7ca8c9be23ca445a7df954a4f2"}="8";
-	$charset{"8a3bf2c9eb10c811e50c91759e6e57cc"}="G";
-	$charset{"f9e0333c0725c22b198bc0c3a7aa4a51"}="x";
-	$charset{"61ea6df7256f910d1cb031979d7d1eda"}="C";
-	$charset{"588b076556aa1b58810fe1f97fa77371"}="Y";
-	$charset{"8a3bf2c9eb10c811e50c91759e6e57cc"}="G";
-	$charset{"61ea6df7256f910d1cb031979d7d1eda"}="C";
-}
-
-##############################################################################
-
-sub setup_socks
-{
-	use LWP::Protocol::http;
-	my $orig_new_socket = \&LWP::Protocol::http::_new_socket;
-
-	# override LWP::Protocol::http's _new_socket method with our own
-	local($^W) = 0;
-	*LWP::Protocol::http::_new_socket = \&socks_new_socket;
-
-	# test that it works
-	&log("configured to use Tor, testing that it works by connecting to www.google.com ...");
-	my $data = &get_url("http://www.google.com/",10);
-	if (($data) && ($data =~ /Google/i)) {
-		&log("success.  Tor appears to be working!");
-		return;
-	}
-
-	&log("ERROR: Could not connect to www.google.com via Tor, disabling Tor.");
-	&log("       DATA FETCHING WILL BE VERY SLOW.");
-	&log("       DISABLING DETAILS-FETCHING BECAUSE OF THIS - SIGNIFICANTLY LOWER DATA QUALITY!!");
-
-	$opt->{no_details} = 1;
-	delete $opt->{anon_socks};
-	$stats{fallback_to_non_tor}++;
-
-	*LWP::Protocol::http::_new_socket = $orig_new_socket;
-}
-
-##############################################################################
-# our own SOCKS4Aified version of LWP::Protocol::http::_new_socket
-
-sub socks_new_socket
-{
-	my($self, $host, $port, $timeout) = @_;
-
-	my ($socks_ip,$socks_port) = split(/:/,$opt->{anon_socks});
-	$socks_ip = "127.0.0.1" if (!defined $socks_ip);
-	$socks_port = "9050" if (!defined $socks_port);
-
-	local($^W) = 0;  # IO::Socket::INET can be noisy
-	my $sock = $self->socket_class->new(
-		PeerAddr => $socks_ip,
-		PeerPort => $socks_port,
-		Proto    => 'tcp');
-
-	unless ($sock) {
-		# IO::Socket::INET leaves additional error messages in $@
-		$@ =~ s/^.*?: //;
-		&log("Can't connect to $host:$port ($@)");
-		return undef;
-	}
-
-	# perl 5.005's IO::Socket does not have the blocking method.
-	eval { $sock->blocking(0); };
-
-	# establish connectivity with socks server - SOCKS4A protocol
-	print { $sock } pack("CCnN", 0x04, 0x01, $port, 1) .
-		(pack 'x') .
-		$host . (pack 'x');
-
-	my $received = "";
-	my $timeout_time = time + $timeout;
-	while ($sock->sysread($received, 8) && (length($received) < 8) ) {
-		select(undef, undef, undef, 0.25);
-		last if ($timeout_time < time);
-	}
-
-	if ($timeout_time < time) {
-		&log("Timeout ($timeout) while connecting via SOCKS server");
-		return $sock;
-	}
-
-	my ($null_byte, $req_status, $port_num, $ip_addr) = unpack('CCnN',$received);
-	&log("Connection via SOCKS4A server rejected or failed") if ($req_status == 0x5b);
-	&log("Connection via SOCKS4A server because client is not running identd") if ($req_status == 0x5c);
-	&log("Connection via SOCKS4A server because client's identd could not confirm the user") if ($req_status == 0x5d);
-
-	$sock;
-}
-
-##############################################################################
+	my %charset = qw{
+		87b7de3dbbeda572e883253803f73a78 e caa8c600dd0aecf49f445753963e97b5 : 46854a6efed48426f1018828cca41ac2 o
+		f64db65ec25ba73bdced42fcf01be00a r d0ec43eddec59827259b46c460386ae0 l d4fb05e2702c4199a73794b5def96ec2 G
+		ca1cbc6861523c4608f19365cac6dde0 0 a49667c09e9d4be0b595578d51eeb60d M 1a9de7fb6f1c93f3ffaa15816549e43a 6
+		fbec6375cab7ff5b9d4b4783c7aab13b s b1465cc2781264fff5a55a9e9b3d8064 A b61b8c026407890a23276d41125d7e98 h
+		8e2a682942360201f924e694dc70fa43 T c1e402fec1d35694b1898b1f1dbb16bb w bed2eaba5e16b7246bb1f5b94d44b61c h
+		c0107f886a27e42ce8fd2eca63a5ebcc D 8300a291d7dae2e876126878c98af6d1 S 7edd9f81d7da6577d57da07f93f95b87 E
+		ec5d1b2140213fdbbf3c837400b2d3c5 e 14e3c224bd590504ffa95a1987ac3fbc n 55ced8bf6a2a2482f578bc988b60b5ed a
+		51bc70bfed877b2bf7300a5023a88634 o 5ae0c7cc64eb457ed198ee008fcd52f9 d 8769704a7c47684c74d841673664f942 V
+		763602fa61fe36273a3492f3fbae0ff8 t 0efb2ccf6c4e8b3084e56da89ad6629b 7 70dddd8427594526c8fd308b6151d673 i
+		75419c36d52e0f29143d4ecf3c5fd2fd W 592559bccc3f515e5d2a93622320a1a2 m 4e4073891b344c07deceee07cd6ba348 g
+		9e2f28787475e105da5221e20eb7a137 r f7e8dade3df2070be62dd206ef0cc8f1 5 62982338ad7a6b499056bac67f840d83 N
+		97fd0fdcedad187e8bf877adc5d580c5 2 35f9e067a546f3ae0057065223fe4c33 3 97f15e1d2ad1cb232147d0b6f01c8022 O
+		4c52033ce6a724d184d9c8d23a960d6e P 9fbd3153eb8e55a0a1f453ee33e6bafd i e7a5cbb21f17f35f2f141e63f37a45fb c
+		f7e8dade3df2070be62dd206ef0cc8f1 5 646db1d6726727e809ed1eb7ea11f545 8 c5ed676c18b62bc6885e34bf527e66af Y
+		99bea8c75f15219ca16a7229b3938665 u a9ea989899145834e84daf0abc5964f0 ! 8113592ffa186852672d458f5bd86135 k
+		fec8880342772dd7e83ca9ffeed0e216 l 185a57d42d98c6cbd85135d9e8295501 D 606bf5428471cfd5de3434374c281334 y
+		50692dc12cde0fae151d9a0c2563c81d J f3045893d14fbb5f20e215a38617aee4 0 cf78a362c08ef3b9284ade8113e670d7 R
+		0de173cf09ded97fff935aa24f7f8bfe z cf63706b1f8eaa1c9120e1f9794918c3 T f9f2e0d23af08cb6fbeacb686992f633 v
+		c1777c45a7d53a5d557c5da145bea080 ' 88f4902f74cf89846318c96003466835 p 2b821839a93b75e470d04a5e2c1971b3 J
+		120cfb2dcf74d7900dc22d44bea9db09 H 298b488eb21a879c4cf9007c05283a15 s 08021ebe5ef72c0ed41b438fd794e71e tt
+		b24415f6bccb3a9ad482156a524dbf1e y 6c27fb8ed1d2d451785d957138ca0902 u 5a6e6307a1b18b409618616556a327e5 E
+		8d4430c7857a01d4805b4666c54fe114 b cf9c23550ff1fde3b19b593966fdd391 S 97986e54d74ef7047eebc1169134564f B
+		561fda757040c25038687752394d39a8 M 8015f8d4c3d6574c9ec73b412ece2013 L 7fdc4d50db244ad00f11d7c362f10b8f 9
+		73f0455d71b4156ab2bbebb7fac004ca 4 401ccf9844fe6399f13597cb458abedb a 28de7104f0f94e161104c407071a5e91 m
+		cc282e429660787afc4a292a6e35cb2a F 449bb458f502dbb10cf71673d1bd7ac4 5 eb6c72d1cb3b32bfcf646e2c5dafc4d2 N
+		aa364cab095bc5f46f855c9772619f5e 1 6aadacaa0e0b622fe755be8615f67f87 2 0f87f473885da54c2a7c886ae92f0ddd R
+		17d10978ffc796cc024c68afa3fb463c I 70b21817f2611845e464f8b551c73b71 1 f5a215139fdc4921b4fad687e0899fdc H
+		8b9e1cc11d23773ca68afaea3064902a A 6adf28b9140e9b236394bd6956638630 9 0d2eeea7b20edb640d5556ea8528ba67 K
+		3503cdc59df22be3b6242db35cfe3482 f b5856240a388696d55ea99fad53166ce W 4b8e08032dde00ced51e8435820be5e1 n
+		38b1c7da79cbbac219c590129f40cca1 k 1697c04376dac187f028f240cb0ccc9a C 2e03a06a91a1993a5c6e15b43784e5c3 3
+		840e43645d65217fd0d57914321db2bf : 5e871ec322ade9e74d44285c3ddad972 L e3bcb0065109e004bc6b18b1403fb810 rt
+		9f32b9cd5083733eead4380bb6551ac5 B 148cafcb02f1a203866f583dbdb253af & 519cc9d317d1a6db113c0da6e5560e71 d
+		40ec9716cfe72fe54201dae866e70ec5 V ace17452c10518e97caba9493898c910 U d244b3a33602a55c1ee8cf9c570dced9 -
+		25ee9123a9fdb7c164b29dfaa50d10b7 6 a87bc5bc8b3e5df44df2e2405561dd83 . f6e64e873007d53c7bf7873d639f4678 .
+		de8b17aa3cf358a1e8b9496dd99e20f1 7 21e73997781a1af8c506eded30c6143f 4 4518bf9cb085588761164be21442aa5d F
+		c38b4e845130be00f1a27a023241a500 ! f5a9cba4badf510bbde66e1012647c8c O c67d0abf9dd1bf2352613c243de4649b P
+		453b59cf0cb2813958d5518fc668639c Z 3dad6dcdedabfbb99ef2067f38d6bd67 B b2da7f7ca8c9be23ca445a7df954a4f2 8
+		8a3bf2c9eb10c811e50c91759e6e57cc G f9e0333c0725c22b198bc0c3a7aa4a51 x 61ea6df7256f910d1cb031979d7d1eda C
+		588b076556aa1b58810fe1f97fa77371 Y 8a3bf2c9eb10c811e50c91759e6e57cc G 61ea6df7256f910d1cb031979d7d1eda C
+
+		5892305501d6d7b3c944edcdfac487b0 W cb28d04e3bbe3bfd0bf0086b5b50b50e a d9f38cfa215b61b0baf8d3232ab71e5a c
+		bcfbf5865682d0d691b0ba7ad34b4e5f k 0ff718ec0df83d26df8ef58f27af3e1d y df27299772b1c1fa25bc74e3e0b28519 M
+		e74795b60c312f1fa48d956433cffd67 d e1bcd7c44b8fd705281926db43eae7f2 n c677cf0e0d2124629e224628a01a96fe e
+		df74545eacbaf90dc1206ef81be97bbb s 118dfa4e0e53dbde0a74554c16f4b6e8 A 6ac1a637edb8d167b9b0263b72d30d50 B
+		ab0135e45bdc858357c40d35e2a6d662 l 202c9276948bf52699ef2521988c2ed0 z f65810bb9e22c25d31a442b3ff1ec3e8 i
+		f079edc2a2167e5c3b5a0250130ad3cc g 6ac1a637edb8d167b9b0263b72d30d50 B 238d4f228563b5efcd46fdb0ee0fa367 ttl
+		df74545eacbaf90dc1206ef81be97bbb s acf02f7463a907c98ccfdaf1364e506a ( 761086404df3dd6a879c15722e6b5c72 P
+		f932ec8ddd3f2edd739a715090614687 1 ff8c0771c4dc7c6a1867ada5d47c1446 ) 26e1d6a4efa3a6e7d107a7003924ad9f rt
+		27e56f6930a29f7ccb1f2ed98c2c99be G 168527e83abcaee41f74514b627b651a ra 9ab22051e33a6755e407cc69ea9d02b4 a
+		9ab22051e33a6755e407cc69ea9d02b4 p 0e6e0a842f847b0997de866dcb69fd7d th 98dee45f3aa315b8d6d1c2a83208e158 u
+		90b5c188102f105c0cab2556d27b0788 rd 154690fb8d4578148e1513ab0f921076 P 34065c67fbb12cce0561001cd462d573 a
+		1d58b69f2b50b50daacfb7645a0fdd18 rt 13f9bf707f893bc39e10ce0475e151a7 I 3af52f596fd1c33743a59d7fa816aaa3 ts
+		2e1e8bd83e52ee09bb58297aeb1da158 Th 1e128beba3aff04a49fba2b291603579 Re aa296120499cf1ee8868ec6759895f9b m
+		f74eae3e6c5426b5da01fb1ad236e1a2 Tw faa3822c5ea6489c829cafc96ba86271 o a507f381a52898da1b4c63a3252559ef N
+		0e07a84d610ae1d5f823c02573825438 h 49b35e005120197a73685301f17ddd92 b f66ffe4a80deebe8ffca678d33e33f7b rs
+		2e1e8bd83e52ee09bb58297aeb1da158 Th 28a61bb021be4f7b4d43c3a995207169 re 33f3092e1d836e03bbbe45cf77f46183 S
+		517c4ccbb8292617db5d758e868023a0 M ac8de377a8f7d07007d10ad37eeaa88b r e42460162dfa7d0d9ad67efe32f9505c .
+		7f84b8c690c3b0412a0514e117a04c69 S fd84447f45a91a443e1863fa7a2c830e p 1e92bddfb0b4813630d147a38863543d ri
+		84db1131cd6f3ed6f630e58b879f781f tz 0fd741130b71b082f1eeebda6e2e2811 G a26fbebcec2437f07bad0ad6f6dc2313 o
+		29f53067840a08d6ca5c34834ad14e77 e 673324edd255d182fad9267db821f230 s d3f8a87a788b91db4886c6a4c0e5a82d To
+		11a9bc26a268f7cd5787ccae1a3a7fd6 to a68667571be8a5b2aaf5fd4f4f429d41 D 8d4c375b6b8db04ccee5077e5ba33863 Re
+		ed445642499ca8148938c51518771540 e a1d72e973b08017846fcd70a732b3143 i 70a7a183ec29e18634005ddde569f65d a
+		29dc936fcdb2723b69c638a022135ff2 tch 3961534a0448ed072632dce5dba32d2a e e27d84de85414214f105583f45d406d7 d
+		0dc0ef29925f3ddffb70ce1107ca1b4d ri b46e207278c9048939ff4eb56d1aa847 t 53f78e0dc0417e0f6a455299e15dca0c V
+		ef32aff5c88702eb5ed51c3a6836a583 7 732b43290b91d76547d1e4dd5e85ab8f - ade03db1bcb287d34d4ca9c9bd82c227 r
+		2f49cdc45bf918107fd3001a57d334cc U 96ab55702d9094de2f158ec3a5f1dd00 n a1a6c673257c30fe6b02ed3a5de7acec to
+		222c34badb06b16ff61a3bfdbd2087c5 l b6e528d8cb510fceabfcb1d280e539d9 W bd4a858bb84721b3c83498f9e4e33b20 a
+		222c34badb06b16ff61a3bfdbd2087c5 l a49e3b56b645aa6dc1de7a81898c92ba th b2c89ec08fe126b2e147bc3fceb5b72e S
+		05dd472da0bb30cf7eb463c5eea42aca u ce6488a8ce8ae8a8e81bdc631880780d c 000312319671d8f7f93eb9461828c238 s
+		49ba6d6bfe0d856eb6808ab901bf0ec3 F 207d6b243ade809ae1cad6507711d528 ro 37138974a7027ed973547cce5fba5db7 m
+		c458ef3d193bfddaecd9970d9a57f844 P bd4a858bb84721b3c83498f9e4e33b20 a af722d233b9e8ae897b72d15fd8b5bc4 ti
+		7f76b9fb361c686de8ec1c828c71da4b v 4dcdd7bc37f7b3dae2943ddb8618bbc1 9 8fc445dd8da1ee8f8542ca18a4816109 V
+		5eee84d45d3263e5db81dfcc62d101fa 2 
+		};
+	$d->{charset} = \%charset;
+
+
+	my %gset = qw{491 News 508 Children 531 Entertainment 496 Drama 533 Infotainment 507 Religion};
+	$d->{gset} = \%gset;
+}
+
+##############################################################################
+
+sub ocr_image
+{
+	my ($id, $fg_url, $bg_url, $tries, $space_width, $multiline) = @_;
+	$multiline = 0 if (!defined $multiline);
+
+	my $fg_gif_image = &get_url($fg_url, $tries);
+	my $bg_png_image = &get_url($bg_url, $tries);
+
+	my $fg_image = GD::Image->newFromGifData($fg_gif_image);
+	my $bg_image = GD::Image->newFromPngData($bg_png_image);
+
+	$bg_image->copyMerge($fg_image, 0, 0, 0, 0, $fg_image->width, $fg_image->height, 100);
+
+	if (!$multiline) {
+		# remove underline
+		my $white = $bg_image->colorExact(255,255,255);
+		$bg_image->filledRectangle(0, 14, $fg_image->width, $fg_image->height, $white);
+
+		return parse_characters($id, $bg_image, $space_width);
+	}
+
+	return parse_multiline_characters($id, $bg_image);
+}
+
+##############################################################################
+
+sub parse_multiline_characters
+{
+	my ($imgname,$i) = @_;
+
+	my ($width, $height) = $i->getBounds;
+	my $bg = $i->getPixel(0,0);
+	&log("image bounds: x=$width, y=$height") if ((defined $opt->{debug}) && ($opt->{debug} > 3));
+
+	# flatten image colours
+	my @bgcolour;
+	foreach my $index (0..$i->colorsTotal) {
+		my @r = $i->rgb($index);
+		my $total = $r[0]+$r[1]+$r[2];
+
+		if ($total > (240*3)) {
+			$bgcolour[$index] = 1;
+		} else {
+			$bgcolour[$index] = 0;
+		}
+	}
+
+	my $last_char_x;
+	my $charnum = 0;
+	my $s = "";
+
+	my $done = 0;
+	my $char_y1 = 0;
+	my $char_y2 = 0;
+
+	while (!$done) {
+		# 1. find first non-blank horizontal
+		$char_y1 = $char_y2;
+		my $blank_y_line = 1;	# until proven otherwise
+		while (($blank_y_line) && ($char_y1 < ($height-1))) {
+			my $char_x = 0;
+			while ($char_x < ($width-1)) {
+				my $index = $i->getPixel($char_x, $char_y1);
+				$blank_y_line = 0 if ($bgcolour[($i->getPixel($char_x,$char_y1))] == 0);
+				$char_x++;
+			}
+			if ($blank_y_line) {
+				&log("[1] whole-of-line y $char_y1 was blank!") if ((defined $opt->{debug}) && ($opt->{debug} > 3));
+				$char_y1++;
+			}
+		}
+		if ($blank_y_line) {
+			&log("[1] reached end of image without finding anymore non-blank y lines. end of image!") if ((defined $opt->{debug}) && ($opt->{debug} > 3));
+			$done = 1;
+			next;
+		}
+		&log("[1] non-blank horizontal line found: y1=$char_y1") if ((defined $opt->{debug}) && ($opt->{debug} > 3));
+
+		# 2. find first blank horizontal
+		$char_y2 = $char_y1;
+		my $nonblank_y_line = 0;
+		while (($nonblank_y_line == 0) && ($char_y2 < ($height-1))) {
+			my $char_x = 0;
+			$nonblank_y_line = 1;
+			while ($char_x < ($width-1)) {
+				my $index = $i->getPixel($char_x, $char_y2);
+				$nonblank_y_line = 0 if ($bgcolour[($i->getPixel($char_x,$char_y2))] == 0);
+				$char_x++;
+			}
+			if ($nonblank_y_line == 0) {
+				&log("[2] whole-of-line y $char_y2 was nonblank!") if ((defined $opt->{debug}) && ($opt->{debug} > 3));
+				$char_y2++;
+			}
+		}
+		&log("[2] blank horizontal line found: y2=$char_y2") if ((defined $opt->{debug}) && ($opt->{debug} > 3));
+
+		my $done_line = 0;
+		my $char_x1 = 0;
+		my $char_x2 = 0;
+		while (!$done_line) {
+			# 3. find first non-blank vertical between char_y1 and char_y2
+			$char_x1 = $char_x2;
+			my $blank_x_line = 1;	# until proven otherwise
+			while (($blank_x_line) && ($char_x1 < ($width-1))) {
+				my $char_y = $char_y1;
+				while ($char_y < $char_y2) {
+					my $index = $i->getPixel($char_x1,$char_y);
+					$blank_x_line = 0 if ($bgcolour[($i->getPixel($char_x1,$char_y))] == 0);
+					$char_y++;
+				}
+				if ($blank_x_line) {
+					&log("[3] whole-of-line x $char_x1 was blank!") if ((defined $opt->{debug}) && ($opt->{debug} > 3));
+					$char_x1++;
+				}
+			}
+			if ($blank_x_line) {
+				&log("[3] end of this line (x1 is $char_x1), looking for next line...") if ((defined $opt->{debug}) && ($opt->{debug} > 3));
+				$done_line = 1;
+				$s .= " " if ($s ne "");
+				next;
+			}
+			&log("[3] non-blank vertical line found: x1=$char_x1") if ((defined $opt->{debug}) && ($opt->{debug} > 3));
+
+			# 4. find first blank vertical between char_y1 and char_y2
+			$char_x2 = $char_x1;
+			my $nonblank_x_line = 0;
+			while (($nonblank_x_line == 0) && ($char_x2 < ($width-1))) {
+				my $char_y = $char_y1;
+				$nonblank_x_line = 1;
+				while ($char_y < $char_y2) {
+					my $index = $i->getPixel($char_x2,$char_y);
+					$nonblank_x_line = 0 if ($bgcolour[($i->getPixel($char_x2,$char_y))] == 0);
+					$char_y++;
+				}
+				if ($nonblank_x_line == 0) {
+					&log("[4] whole-of-line x $char_x2 wasn't blank!") if ((defined $opt->{debug}) && ($opt->{debug} > 3));
+					$char_x2++;
+				}
+			}
+			&log("blank vertical line found: x2=$char_x2") if ((defined $opt->{debug}) && ($opt->{debug} > 3));
+
+			&log("looking at character between: x1,y1 $char_x1,$char_y1 and x2,y2 $char_x2,$char_y2 ........") if ((defined $opt->{debug}) && ($opt->{debug} > 3));
+			# 5. insert spaces
+			if ((defined $last_char_x) && (($char_x1 - $last_char_x) >= 3)) {
+				$s .= " ";
+				$charnum++;
+			}
+			$last_char_x = $char_x2;
+
+			# 6. md5 char
+			my $str = pack('NN',($char_x2-$char_x1),($char_y2-$char_y1));
+			foreach my $x ($char_x1..($char_x2-1)) {
+				foreach my $y ($char_y1..($char_y2-1)) {
+					$str .= pack('N', $bgcolour[($i->getPixel($x, $y))]);
+				}
+			}
+			my $md5 = Digest::MD5::md5_hex($str);
+			$charnum++;
+
+			# 7. insert char
+			if ((!defined $d->{charset}->{$md5}) || ($d->{charset}->{$md5} eq "?")) {
+				if (defined $opt->{ocr_learn_mode}) {
+					$d->{charset}->{$md5} = "[".$md5."]";
+					$s .= "[".$md5."]";
+				} else {
+					$s .= "?";
+				}
+			} else {
+				$s .= $d->{charset}->{$md5};
+			}
+		}
+	}
+
+	&log("multiline ocr got '$s'") if (defined $opt->{debug} && $opt->{debug} > 1);
+	return $s;
+}
