Changeset 96
Legend:
- Unmodified
- Added
- Removed
-
shepherd
r94 r96 43 43 use DateTime::Format::Strptime; 44 44 use POSIX qw(strftime); 45 use Date::Manip; 45 46 use Time::HiRes qw(gettimeofday tv_interval); 46 47 use Algorithm::Diff; 48 use List::Compare; 47 49 48 50 # --------------------------------------------------------------------------- … … 116 118 117 119 print ucfirst($progname) . " v$version\n\n"; 118 #print "Cwd: $CWD.\n";119 120 120 121 # Any options Shepherd doesn't understand, we'll pass to the grabber(s) … … 224 225 my $found_data_percent = 0; 225 226 226 print "\nGrabber stage :\n";227 print "\nGrabber stage.\n"; 227 228 228 229 my $grabber; … … 230 231 while ($grabber = choose_grabber()) 231 232 { 233 234 my $missing_before; 235 232 236 $used_grabbers++; 233 237 … … 247 251 # very little cost in grabbing that extra data, and we can use it in the reconciler 248 252 # to verify that everything looks OK. 249 if ( $components->{$grabber}->{config}->{category}== 1)250 { 251 print "CAT1 grabber: grabbing timeslice.\n" ;253 if (query_config($grabber, 'category') == 1) 254 { 255 print "CAT1 grabber: grabbing timeslice.\n" if ($debug); 252 256 if ($timeslice->{start} != 1) 253 257 { 254 258 $comm .= " " . 255 $components->{$grabber}->{config}->{option_offset}.259 query_config($grabber, 'option_offset') . 256 260 " " . 257 261 ($timeslice->{start} - 1); … … 261 265 if ($timeslice->{start} != 1 262 266 and 263 ! $components->{$grabber}->{config}->{option_offset_eats_days})267 !query_config($grabber, 'option_offset_eats_days')) 264 268 { 265 269 $n -= $timeslice->{start}; 266 270 } 267 271 $comm .= " " . 268 $components->{$grabber}->{config}->{option_days}.272 query_config($grabber, 'option_days') . 269 273 " " . 270 274 $n; … … 279 283 write_file($tmpcf, 'temporary channels', [ $tmpchans ], [ 'channels' ]); 280 284 $comm .= " --channels_file $tmpcf"; 285 286 if (query_config($grabber, 'cache')) 287 { 288 $missing_before = convert_chandays_to_list(detect_missing_data()); 289 } 281 290 } 282 291 else 283 292 { 293 print "CAT2 grabber: grabbing everything.\n" if ($debug); 284 294 $comm .= " --days $days" if ($days); 285 295 $comm .= " --offset $opt->{offset}" if ($opt->{offset}); … … 306 316 307 317 last if ($found_data_percent >= $channel_ok_threshold_percent); 318 319 # Record what we grabbed for cacheable C1 grabbers 320 if ($missing_before) 321 { 322 323 my $missing_after = convert_chandays_to_list(detect_missing_data()); 324 print "Missing before: " . Dumper($missing_before) . "\nMissing after:" . Dumper($missing_after); 325 my $list = List::Compare->new($missing_before, $missing_after); 326 my @grabbed = $list->get_symmetric_difference(); 327 print "Difference: " . Dumper(\@grabbed); 328 record_cached($grabber, @grabbed); 329 write_config_file(); 330 } 308 331 } 309 332 … … 337 360 { 338 361 $gscore->{$_} = 0; 339 # Cache stuff: not enabled yet 340 # if ($components->{$_}->{config}->{category} == 1 341 # and 342 # $components->{$_}->{config}->{cache}) 343 # { 344 # $gscore->{$_ . ' [cache]'} = 0; 345 # } 362 if (query_config($_, 'category') == 1 363 and 364 query_config($_, 'cache')) 365 { 366 $gscore->{$_ . ' [cache]'} = 0; 367 } 346 368 } 347 369 } … … 380 402 else 381 403 { 382 if ( $components->{$grabber}->{config}->{category}== 2)404 if (query_config($grabber, 'category') == 2) 383 405 { 384 406 # We might want to run C1 grabbers multiple times … … 417 439 my $missing_slice = create_missing_slice(); 418 440 441 my $bestdq = 0; 442 419 443 # So! Compare C2 grabbers against the raw missing file, because we'll get 420 444 # everything. But compare C1 grabbers against the timeslice, because we'll … … 426 450 427 451 $hits = 0; 428 429 if ($grabber =~ /(.*) \[cache\]$/)430 { 431 $hits = find_cache_hits($1, $missing);432 $cat = 2;433 $ dq = $components->{$1}->{config}->{quality};452 $cat = query_config($grabber, 'category'); 453 $dq = query_config($grabber, 'quality'); 454 455 if ($grabber =~ /\[cache\]/) 456 { 457 $hits = find_cache_hits($grabber, $missing); 434 458 } 435 459 else 436 460 { 437 my $key = $missing;438 if ($c omponents->{$grabber}->{config}->{category}== 1)461 my $key; 462 if ($cat == 1) 439 463 { 440 464 $key = $missing_slice; 465 print "Grabber $grabber is Category 1: comparing capability to best timeslice.\n" if ($debug); 466 } 467 else 468 { 469 $key = $missing; 470 print "Grabber $grabber is Category 2: comparing capability to all wanted channels and days.\n" if ($debug); 441 471 } 442 472 foreach my $day (sort keys %$key) … … 444 474 my $val = supports_day($grabber, $day); 445 475 next unless ($val); 446 print "Day $day:" ;476 print "Day $day:" if ($debug); 447 477 foreach my $ch (@{$key->{$day}}) 448 478 { … … 455 485 print "\n"; 456 486 $hits = 1 if ($hits > 0 and $hits < 1); 457 458 $cat = $components->{$grabber}->{config}->{category}; 459 unless ($cat) 460 { 461 print "WARNING: Grabber $grabber has no category support ". 462 "in config.\n"; 463 $cat = 1; 464 } 465 466 $dq = $components->{$grabber}->{config}->{quality}; 467 unless ($dq) 468 { 469 print "WARNING: Grabber $grabber has no quality support ". 470 "in config.\n"; 471 $dq = 1; 472 } 473 } 474 } 487 } 488 } 489 475 490 $mult = 1; 476 $mult ++if ($cat == 2);491 $mult += 2 if ($cat == 2); 477 492 $mult *= 2 ** ($dq-1); 493 $score = int($hits * $mult); 478 494 479 495 $score = int($hits * $mult); … … 481 497 $gscore->{$grabber} = $score; 482 498 $total += $score; 483 } 499 500 # Keep track of the best quality of valid grabbers 501 if ($score and $dq > $bestdq) 502 { 503 $bestdq = $dq; 504 } 505 } 506 507 # Eliminate grabbers of data quality 1 if there are any quality 2s 508 # or 3s present. 509 foreach (keys %$gscore) 510 { 511 if ($gscore->{$_} 512 and 513 query_config($_, 'quality') == 1 514 and 515 $bestdq > 1) 516 { 517 $total -= $gscore->{$_}; 518 $gscore->{$_} = 0; 519 print "Zeroing grabber $_ due to low data quality.\n" if ($debug); 520 } 521 } 522 484 523 return $total; 485 524 } … … 489 528 my ($grabber, $ch) = @_; 490 529 491 my $channels_supported = $components->{$grabber}->{config}->{channels};530 my $channels_supported = query_config($grabber, 'channels'); 492 531 unless (defined $channels_supported) 493 532 { … … 508 547 my ($grabber, $day) = @_; 509 548 510 return 0 unless ($day <= $components->{$grabber}->{config}->{max_days});511 return 0.5 if ($day > $components->{$grabber}->{config}->{max_reliable_days});549 return 0 unless ($day <= query_config($grabber, 'max_days')); 550 return 0.5 if ($day > query_config($grabber, 'max_reliable_days')); 512 551 return 1; 513 552 } … … 517 556 my ($grabber, $missing) = @_; 518 557 519 return 5; 558 if ($grabber =~ /(.*) \[cache\]/) 559 { 560 $grabber = $1; 561 } 562 563 return 0 unless ($components->{$grabber}->{cached}); 564 565 my $hits = 0; 566 567 foreach my $day (keys %$missing) 568 { 569 my $date = substr(DateCalc("today", "+ " . ($day - 1) . " days"), 0, 8); 570 foreach my $ch (@{$missing->{$day}}) 571 { 572 $hits++ if (grep(/^$date:$ch$/, @{$components->{$grabber}->{cached}})); 573 } 574 } 575 return $hits; 520 576 } 521 577 … … 525 581 # made more fine-grained if we think grabbers will support that. 526 582 # 583 # TODO: Currently buggy in that it thinks we need 24 hours worth of 584 # data for day 1 when we don't. 585 # 527 586 sub detect_missing_data 528 587 { … … 531 590 my $timeslots_per_day = (24 * 60 * 60) / $timeslot_size; 532 591 592 # print "Channel data:\n" . Dumper($channel_data); 533 593 foreach my $ch (keys %$channels) 534 594 { … … 615 675 return $ret; 616 676 } 677 678 sub record_cached 679 { 680 my ($grabber, @grabbed) = shift; 681 682 print "Recording cache for grabber $grabber.\n" if ($debug); 683 684 my $gcache = $components->{$grabber}->{cached}; 685 $gcache = [ ] unless ($gcache); 686 my @newcache; 687 my $today = strftime("%Y%m%d", localtime); 688 689 # remove old chandays 690 foreach my $chanday (@$gcache) 691 { 692 $chanday =~ /(\d+):(.*)/; 693 if ($1 >= $today) 694 { 695 push (@newcache, $chanday); 696 } 697 } 698 699 # record new chandays 700 foreach (@grabbed) 701 { 702 push (@newcache, $_) unless (grep(/^$_$/, @newcache)); 703 } 704 $components->{$grabber}->{cached} = [ @newcache ]; 705 } 706 707 # Takes a 'missing' hash and returns it as a list like this: 708 # ( "20061018:ABC", "20061018:Seven", ... ) 709 sub convert_chandays_to_list 710 { 711 my $h = shift; 712 713 my @ret; 714 foreach my $day (keys %$h) 715 { 716 my $date = substr(DateCalc("today", "+ " . ($day - 1) . " days"), 0, 8); 717 foreach my $ch (@{$h->{$day}}) 718 { 719 push (@ret, "$date:$ch"); 720 } 721 } 722 @ret = sort @ret; 723 return \@ret; 724 } 725 617 726 618 727 # interpret xmltv data from this grabber/postprocessor … … 1320 1429 } 1321 1430 return @ret; 1431 } 1432 1433 sub query_config 1434 { 1435 my ($grabber, $key) = @_; 1436 1437 if ($grabber =~ /(.*) \[cache\]/) 1438 { 1439 $grabber = $1; 1440 } 1441 return undef unless ($components->{$grabber}); 1442 return $components->{$grabber}->{config}->{$key}; 1322 1443 } 1323 1444
