| 221 | | # Subs: Grabbing |
| 222 | | # ----------------------------------------- |
| 223 | | |
| 224 | | sub grab_data |
| 225 | | { |
| 226 | | my $used_grabbers = 0; |
| 227 | | |
| 228 | | &log("\nGrabber stage.\n"); |
| 229 | | |
| 230 | | &analyze_plugin_data("",1); |
| 231 | | |
| 232 | | while (my $grabber = choose_grabber()) |
| 233 | | { |
| 234 | | $grabber_found_all_data = 0; |
| 235 | | $used_grabbers++; |
| 236 | | |
| 237 | | $components->{$grabber}->{laststatus} = "unknown"; |
| 238 | | |
| 239 | | &log((sprintf "\nSHEPHERD: Using grabber: (%d) %s\n", $used_grabbers, $grabber)); |
| 240 | | |
| 241 | | my $output = "$CWD/grabbers/$grabber/output.xmltv"; |
| 242 | | |
| 243 | | my $comm = "$CWD/grabbers/$grabber/$grabber " . |
| 244 | | "--region $region " . |
| 245 | | "--output $output"; |
| 246 | | |
| 247 | | # Category 1 grabbers (i.e. slow ones) are requested to only fetch the timeslice |
| 248 | | # that we need. Category 2 grabbers are requested to get everything, since there's |
| 249 | | # very little cost in grabbing that extra data, and we can use it in the reconciler |
| 250 | | # to verify that everything looks OK. |
| 251 | | if (query_config($grabber, 'category') == 1) |
| 252 | | { |
| 253 | | &log("$grabber is Category 1: grabbing timeslice.\n") if ($debug); |
| 254 | | |
| 255 | | record_requested_chandays($grabber, $timeslice); |
| 256 | | |
| 257 | | if ($timeslice->{start} != 0) |
| 258 | | { |
| 259 | | $comm .= " " . |
| 260 | | query_config($grabber, 'option_days_offset') . |
| 261 | | " " . |
| 262 | | $timeslice->{start}; |
| 263 | | } |
| 264 | | |
| 265 | | my $n = $timeslice->{stop} + 1; |
| 266 | | if ($timeslice->{start} != 0 |
| 267 | | and |
| 268 | | !query_config($grabber, 'option_offset_eats_days')) |
| 269 | | { |
| 270 | | $n -= $timeslice->{start}; |
| 271 | | } |
| 272 | | $comm .= " " . |
| 273 | | query_config($grabber, 'option_days') . |
| 274 | | " " . |
| 275 | | $n; |
| 276 | | |
| 277 | | # Write a temporary channels file specifying only the channels we want |
| 278 | | my $tmpchans; |
| 279 | | foreach (@{$timeslice->{chans}}) |
| 280 | | { |
| 281 | | $tmpchans->{$_} = $channels->{$_}; |
| 282 | | } |
| 283 | | my $tmpcf = "$CWD/channels.conf.tmp"; |
| 284 | | write_file($tmpcf, 'temporary channels', [ $tmpchans ], [ 'channels' ]); |
| 285 | | $comm .= " --channels_file $tmpcf"; |
| 286 | | } |
| 287 | | else |
| 288 | | { |
| 289 | | &log("$grabber is category 2: grabbing everything.\n") if ($debug); |
| 290 | | $comm .= " --days $days" if ($days); |
| 291 | | $comm .= " --offset $opt->{offset}" if ($opt->{offset}); |
| 292 | | $comm .= " --channels_file $channels_file"; |
| 293 | | } |
| 294 | | $comm .= " --debug" if ($debug); |
| 295 | | $comm .= " @ARGV" if (@ARGV); |
| 296 | | |
| 297 | | my $retval = 0; |
| 298 | | if ((defined $opt->{dontcallgrabbers}) && ($opt->{dontcallgrabbers})) { |
| 299 | | &log("SHEPHERD: not calling grabber because of --dontcallgrabbers option, but will instead use existing $output\n"); |
| 300 | | &log("SHEPHERD: would have called: $comm\n") if ($debug); |
| 301 | | } else { |
| 302 | | &log("SHEPHERD: Excuting command: $comm\n"); |
| 303 | | chdir "$CWD/grabbers/$grabber/"; |
| 304 | | $retval = call_prog($comm); |
| 305 | | chdir $CWD; |
| 306 | | } |
| 307 | | |
| 308 | | if ($retval != 0) { |
| 309 | | &log("grabber returned with non-zero return code $retval: assuming it failed.\n"); |
| 310 | | next; |
| 311 | | } |
| 312 | | |
| 313 | | # soak up the data we just collected |
| 314 | | &soak_up_data($grabber, $output, "grabber"); |
| 315 | | $components->{$grabber}->{laststatus} = $plugin_data->{$grabber}->{laststatus}; |
| 316 | | $components->{$grabber}->{lastdata} = time if ($plugin_data->{$grabber}->{valid}); |
| 317 | | |
| 318 | | # check to see if we have all the data we want |
| 319 | | $grabber_found_all_data = &analyze_plugin_data("analysis of all grabbers so far"); |
| 320 | | |
| 321 | | # Record what we grabbed from cacheable C1 grabbers |
| 322 | | if (query_config($grabber, 'category') == 1 and query_config($grabber, 'cache')) |
| 323 | | { |
| 324 | | my $missing_before = convert_dayhash_to_list($missing); |
| 325 | | my $missing_after = convert_dayhash_to_list(detect_missing_data()); |
| 326 | | my $list = List::Compare->new($missing_before, $missing_after); |
| 327 | | my @grabbed = $list->get_symmetric_difference(); |
| 328 | | &log("Grabbed: " . join (', ', @grabbed) . ".\n") if ($debug); |
| 329 | | record_cached($grabber, @grabbed); |
| 330 | | write_config_file(); |
| 331 | | } |
| 332 | | |
| 333 | | last if ($grabber_found_all_data); |
| 334 | | } |
| 335 | | |
| 336 | | |
| 337 | | if ($used_grabbers == 0) |
| 338 | | { |
| 339 | | &log("No valid grabbers installed/enabled!\n"); |
| 340 | | return; |
| 341 | | } |
| 342 | | |
| 343 | | unless ($grabber_found_all_data) |
| 344 | | { |
| 345 | | &log("SHEPHERD: Ran through ALL grabbers but still missing data!!! :(\n"); |
| 346 | | return; |
| 347 | | } |
| 348 | | } |
| 349 | | |
| 350 | | # ----------------------------------------- |
| 351 | | # Subs: Intelli-random grabber selection |
| 352 | | # ----------------------------------------- |
| 353 | | |
| 354 | | sub choose_grabber |
| 355 | | { |
| 356 | | if (defined $gscore) # Reset score hash |
| 357 | | { |
| 358 | | foreach (keys %$gscore) |
| 359 | | { |
| 360 | | $gscore->{$_} = 0; |
| 361 | | } |
| 362 | | } |
| 363 | | else # Create score hash |
| 364 | | { |
| 365 | | foreach (query_grabbers()) |
| 366 | | { |
| 367 | | unless ($components->{$_}->{disabled}) |
| 368 | | { |
| 369 | | $gscore->{$_} = 0; |
| 370 | | if (query_config($_, 'category') == 1 and query_config($_, 'cache')) |
| 371 | | { |
| 372 | | $gscore->{$_ . ' [cache]'} = 0; |
| 373 | | } |
| 374 | | } |
| 375 | | } |
| 376 | | } |
| 377 | | |
| 378 | | $missing = detect_missing_data(); |
| 379 | | $timeslice = find_best_timeslice(); |
| 380 | | |
| 381 | | if ($debug) |
| 382 | | { |
| 383 | | &log((sprintf "Best timeslice: day%s of channels %s (%d chandays).\n", |
| 384 | | ($timeslice->{start} == $timeslice->{stop} ? |
| 385 | | " $timeslice->{start}" : |
| 386 | | "s $timeslice->{start} - $timeslice->{stop}"), |
| 387 | | join(', ', @{$timeslice->{chans}}), |
| 388 | | $timeslice->{chandays})); |
| 389 | | } |
| 390 | | |
| 391 | | my $total = score_grabbers(); |
| 392 | | |
| 393 | | if ($debug) |
| 394 | | { |
| 395 | | &log("Grabber selection:\n"); |
| 396 | | foreach (sort { $gscore->{$b} <=> $gscore->{$a} } keys %$gscore) |
| 397 | | { |
| 398 | | next if ($_ =~ /\[cache\]/); |
| 399 | | |
| 400 | | my $score = $gscore->{$_}; |
| 401 | | my $cscore = $gscore->{"$_ [cache]"}; |
| 402 | | my $cstr = $cscore ? "(inc. $cscore cache pts)" : ""; |
| 403 | | |
| 404 | | if ($opt->{randomize}) |
| 405 | | { |
| 406 | | &log((sprintf "%15s %6.1f%% %9s %s\n", |
| 407 | | $_, |
| 408 | | ($total ? 100* $score / $total : 0), |
| 409 | | "$score pts", |
| 410 | | $cstr)); |
| 411 | | } |
| 412 | | else |
| 413 | | { |
| 414 | | &log((sprintf "%15s %4s pts %s\n", |
| 415 | | $_, |
| 416 | | $score, |
| 417 | | $cstr)); |
| 418 | | } |
| 419 | | } |
| 420 | | } |
| 421 | | |
| 422 | | return undef unless ($total); |
| 423 | | |
| 424 | | # Select a grabber |
| 425 | | |
| 426 | | # If the user has specified a pref_title_source -- i.e. he is |
| 427 | | # transitioning from a known grabber -- then we make sure it |
| 428 | | # has run at least once, to build the list of title translations. |
| 429 | | if ($pref_title_source) |
| 430 | | { |
| 431 | | my @prefs = split(/,/, $pref_title_source); |
| 432 | | foreach my $grabber (@prefs) |
| 433 | | { |
| 434 | | unless ($components->{$grabber}->{lastdata}) |
| 435 | | { |
| 436 | | &log("Need to build title translation list for transitional grabber $grabber.\n"); |
| 437 | | return select_grabber($grabber, $gscore) if ($gscore->{$grabber}); |
| 438 | | &log("WARNING: Can't run $grabber to build title translation list!\n"); |
| 439 | | } |
| 440 | | } |
| 441 | | } |
| 442 | | |
| 443 | | # Either do it randomly based on scores, or just return the |
| 444 | | # highest-scoring grabber, depending on whether --randomize has |
| 445 | | # been used. |
| 446 | | |
| 447 | | my $r = int(rand($total)); |
| 448 | | my $c = 0; |
| 449 | | my $best; |
| 450 | | |
| 451 | | foreach my $grabber (keys %$gscore) |
| 452 | | { |
| 453 | | next if (!$gscore->{$grabber} or $grabber =~ /\[cache\]/); |
| 454 | | if ($opt->{randomize}) |
| 455 | | { |
| 456 | | if ($r >= $c and $r < ($c + $gscore->{$grabber})) |
| 457 | | { |
| 458 | | return select_grabber($grabber, $gscore); |
| 459 | | } |
| 460 | | $c += $gscore->{$grabber}; |
| 461 | | } |
| 462 | | else |
| 463 | | { |
| 464 | | if (!$best or $gscore->{$grabber} > $gscore->{$best}) |
| 465 | | { |
| 466 | | $best = $grabber; |
| 467 | | } |
| 468 | | } |
| 469 | | } |
| 470 | | |
| 471 | | if ($opt->{randomize} or !$best) |
| 472 | | { |
| 473 | | die "ERROR: failed to choose grabber."; |
| 474 | | } |
| 475 | | return select_grabber($best, $gscore); |
| 476 | | } |
| 477 | | |
| 478 | | sub select_grabber |
| 479 | | { |
| 480 | | my ($grabber, $gscore) = @_; |
| 481 | | |
| 482 | | &log("Selected $grabber.\n") if ($debug); |
| 483 | | if (query_config($grabber, 'category') == 2) |
| 484 | | { |
| 485 | | # We might want to run C1 grabbers multiple times |
| 486 | | # to grab various timeslices, but not C2 grabbers, |
| 487 | | # which should get everything at once. |
| 488 | | delete $gscore->{$grabber}; |
| 489 | | } |
| 490 | | return $grabber; |
| 491 | | } |
| 492 | | |
| 493 | | # Grabbers earn 1 point for each slot or chanday they can fill. |
| 494 | | # This score is multiplied if the grabber: |
| 495 | | # * is a category 2 grabber (i.e. fast/cheap) |
| 496 | | # * is a category 1 grabber that has the data we want in a cache |
| 497 | | # * can supply high-quality data |
| 498 | | # Very low quality grabbers score 0 unless we need them; i.e. they're backups. |
| 499 | | sub score_grabbers |
| 500 | | { |
| 501 | | my ($score, $total, $day, $catbonus, $dqbonus, $mult, $key); |
| 502 | | |
| 503 | | my $bestdq = 0; |
| 504 | | |
| 505 | | # Compare C2 grabbers against the raw missing file, because we'll get |
| 506 | | # everything. But compare C1 grabbers against the timeslice, because we'll |
| 507 | | # only ask them for a slice. This goes for the [cache] and regular C1s. |
| 508 | | foreach my $grabber (keys %$gscore) |
| 509 | | { |
| 510 | | # for each slot, say whether we can fill it or not -- that is, |
| 511 | | # whether we support this channel and this day #. |
| 512 | | |
| 513 | | my $hits = 0; |
| 514 | | my $cat = query_config($grabber, 'category'); |
| 515 | | my $dq = query_config($grabber, 'quality'); |
| 516 | | |
| 517 | | if ($cat == 1) |
| 518 | | { |
| 519 | | $key = cut_down_missing($grabber); |
| 520 | | # &log("Grabber $grabber is Category 1: comparing capability to best timeslice.\n") if ($debug); |
| 521 | | } |
| 522 | | else |
| 523 | | { |
| 524 | | $key = $missing; |
| 525 | | # &log("Grabber $grabber is Category 2: comparing capability to all wanted channels and days.\n") if ($debug); |
| 526 | | } |
| 527 | | |
| 528 | | if ($grabber =~ /\[cache\]/) |
| 529 | | { |
| 530 | | $hits = find_cache_hits($grabber, $key); |
| 531 | | } |
| 532 | | else |
| 533 | | { |
| 534 | | foreach my $day (sort keys %$key) |
| 535 | | { |
| 536 | | my $val = supports_day($grabber, $day); |
| 537 | | next unless ($val); |
| 538 | | # &log("Day $day:") if ($debug); |
| 539 | | foreach my $ch (@{$key->{$day}}) |
| 540 | | { |
| 541 | | if (supports_channel($grabber, $ch, $day)) |
| 542 | | { |
| 543 | | # &log(" $ch") if ($debug); |
| 544 | | $hits += $val; |
| 545 | | } |
| 546 | | } |
| 547 | | # &log("\n") if $debug; |
| 548 | | $hits = 1 if ($hits > 0 and $hits < 1); |
| 549 | | } |
| 550 | | } |
| 551 | | |
| 552 | | my $catbonus = 1; |
| 553 | | $catbonus = 3 if ($cat == 2); |
| 554 | | if ($grabber =~ /\[cache\]/) |
| 555 | | { |
| 556 | | # Bonus is on a sliding scale between 1 and 2 depending on |
| 557 | | # % of required data in cache |
| 558 | | $catbonus += $hits / $timeslice->{chandays}; |
| 559 | | } |
| 560 | | |
| 561 | | my $dqbonus = 2 ** ($dq-1); |
| 562 | | |
| 563 | | my $mult = $dq ** $catbonus; |
| 564 | | |
| 565 | | my $score = int($hits * $mult); |
| 566 | | |
| 567 | | if ($debug) |
| 568 | | { |
| 569 | | my $str = sprintf "Grabber %s can supply %d chandays", |
| 570 | | $grabber, $hits; |
| 571 | | if ($hits) |
| 572 | | { |
| 573 | | $str .= sprintf " at x%.1f (cat: %d, DQ: %d): %d pts", |
| 574 | | $mult, |
| 575 | | $cat, |
| 576 | | $dq, |
| 577 | | $score; |
| 578 | | } |
| 579 | | &log("$str.\n"); |
| 580 | | } |
| 581 | | |
| 582 | | $gscore->{$grabber} += $score; |
| 583 | | $total += $score; |
| 584 | | if ($grabber =~ /\[cache\]/) |
| 585 | | { |
| 586 | | $gscore->{query_name($grabber)} += $score; |
| 587 | | } |
| 588 | | |
| 589 | | if ($score and $dq > $bestdq) |
| 590 | | { |
| 591 | | $bestdq = $dq; |
| 592 | | } |
| 593 | | } |
| 594 | | |
| 595 | | # Eliminate grabbers of data quality 1 if there are any better-quality |
| 596 | | # alternatives when using randomize. |
| 597 | | if ($opt->{randomize}) |
| 598 | | { |
| 599 | | foreach (keys %$gscore) |
| 600 | | { |
| 601 | | if ($gscore->{$_} |
| 602 | | and |
| 603 | | query_config($_, 'quality') == 1 |
| 604 | | and |
| 605 | | $bestdq > 1) |
| 606 | | { |
| 607 | | $total -= $gscore->{$_}; |
| 608 | | $gscore->{$_} = 0; |
| 609 | | &log("Zeroing grabber $_ due to low data quality.\n") if ($debug); |
| 610 | | } |
| 611 | | } |
| 612 | | } |
| 613 | | |
| 614 | | return $total; |
| 615 | | } |
| 616 | | |
| 617 | | # Return 1 if the grabber can provide data for this channel, else 0. |
| 618 | | sub supports_channel |
| 619 | | { |
| 620 | | my ($grabber, $ch, $day) = @_; |
| 621 | | |
| 622 | | my $mdpc = query_config($grabber, 'max_days_per_chan'); |
| 623 | | if ($mdpc) |
| 624 | | { |
| 625 | | if ($mdpc->{$ch}) |
| 626 | | { |
| 627 | | return ($mdpc->{$ch} > $day); |
| 628 | | } |
| 629 | | } |
| 630 | | |
| 631 | | my $channels_supported = query_config($grabber, 'channels'); |
| 632 | | unless (defined $channels_supported) |
| 633 | | { |
| 634 | | &log("WARNING: Grabber $grabber has no channel support " . |
| 635 | | "specified in config.\n"); |
| 636 | | $channels_supported = ''; |
| 637 | | } |
| 638 | | |
| 639 | | return 1 unless ($channels_supported); # Empty string means we support all |
| 640 | | |
| 641 | | $ch =~ s/ /_/g; |
| 642 | | my $match = ($channels_supported =~ /\b$ch\b/); |
| 643 | | my $exceptions = ($channels_supported =~/^-/); |
| 644 | | return ($match != $exceptions); |
| 645 | | } |
| 646 | | |
| 647 | | # Return 0 if the grabber can't provide data for this day, |
| 648 | | # 1 if it can reliably, and 0.5 if it can unreliably. |
| 649 | | # |
| 650 | | # Note that a max_days of 7 means the grabber can retrieve data for |
| 651 | | # today plus 6 days. |
| 652 | | sub supports_day |
| 653 | | { |
| 654 | | my ($grabber, $day) = @_; |
| 655 | | |
| 656 | | return 0 unless ($day < query_config($grabber, 'max_days')); |
| 657 | | return 0.5 if ($day >= query_config($grabber, 'max_reliable_days')); |
| 658 | | return 1; |
| 659 | | } |
| 660 | | |
| 661 | | sub find_cache_hits |
| 662 | | { |
| 663 | | my ($grabber, $key) = @_; |
| 664 | | |
| 665 | | $grabber = query_name($grabber); |
| 666 | | |
| 667 | | return 0 unless ($components->{$grabber}->{cached}); |
| 668 | | |
| 669 | | my $hits = 0; |
| 670 | | |
| 671 | | foreach my $day (keys %$key) |
| 672 | | { |
| 673 | | my $date = substr(DateCalc("today", "+ $day days"), 0, 8); |
| 674 | | foreach my $ch (@{$key->{$day}}) |
| 675 | | { |
| 676 | | $hits++ if (grep(/^$date:$ch$/, @{$components->{$grabber}->{cached}})); |
| 677 | | } |
| 678 | | } |
| 679 | | return $hits; |
| 680 | | } |
| 681 | | |
| 682 | | # Build a dayhash of what channel/day data we're currently missing. |
| 683 | | # I think granularity of one day is good for now; could possibly be |
| 684 | | # made more fine-grained if we think grabbers will support that. |
| 685 | | sub detect_missing_data |
| 686 | | { |
| 687 | | my $m = { }; |
| 688 | | |
| 689 | | my $chandays = 0; |
| 690 | | foreach my $ch (keys %$channels) |
| 691 | | { |
| 692 | | # is this channel missing too much data? |
| 693 | | unless ($channel_data->{$ch}->{analysis}->{data_ok}) { |
| 694 | | # not ok - record which days are bad |
| 695 | | foreach my $day (@{($channel_data->{$ch}->{analysis}->{day})}) { |
| 696 | | push(@{($m->{($day->{num})})}, $ch) unless ($day->{day_ok}); |
| 697 | | } |
| 698 | | } |
| 699 | | } |
| 700 | | |
| 701 | | foreach my $day (keys %$m) |
| 702 | | { |
| 703 | | $m->{$day} = [ sort @{$m->{$day}} ]; |
| 704 | | $chandays += scalar(@{$m->{$day}}) if ($debug); |
| 705 | | } |
| 706 | | |
| 707 | | if ($debug) |
| 708 | | { |
| 709 | | &log("Need data for days " . join(", ", sort keys %$m) . |
| 710 | | " ($chandays chandays).\n"); |
| 711 | | } |
| 712 | | return $m; |
| 713 | | } |
| 714 | | |
| 715 | | # Find the largest timeslice in the current $missing dayhash; i.e. |
| 716 | | # something like "Days 4 - 6 of ABC and SBS." This works by iterating |
| 717 | | # through the days and looking for overlaps where consecutive days |
| 718 | | # want the same channels. |
| 719 | | sub find_best_timeslice |
| 720 | | { |
| 721 | | my ($overlap, $a); |
| 722 | | my $slice = { 'chandays' => 0 }; |
| 723 | | |
| 724 | | foreach my $day (0 .. $days-1) |
| 725 | | { |
| 726 | | consider_slice($slice, $day, $day, @{$missing->{$day}}); |
| 727 | | $overlap = $missing->{$day}; |
| 728 | | foreach my $nextday (($day + 1) .. $days-1) |
| 729 | | { |
| 730 | | last unless ($missing->{$nextday}); |
| 731 | | $a = Algorithm::Diff::LCS($overlap, $missing->{$nextday}); |
| 732 | | last unless ($a and @{$a}); |
| 733 | | consider_slice($slice, $day, $nextday, @{$a}); |
| 734 | | $overlap = $a; |
| 735 | | } |
| 736 | | } |
| 737 | | return $slice; |
| 738 | | } |
| 739 | | |
| 740 | | sub consider_slice |
| 741 | | { |
| 742 | | my ($slice, $startday, $stopday, @chans) = @_; |
| 743 | | |
| 744 | | my $challenger = ($stopday - $startday + 1) * scalar(@chans); |
| 745 | | return unless ($challenger > $slice->{chandays}); |
| 746 | | |
| 747 | | # We have a winner! |
| 748 | | $slice->{start} = $startday; |
| 749 | | $slice->{stop} = $stopday; |
| 750 | | $slice->{chans} = [ @chans ]; |
| 751 | | $slice->{chandays} = $challenger; |
| 752 | | } |
| 753 | | |
| 754 | | # Record what a cacheable C1 grabber has just retrieved for us, |
| 755 | | # so we know next time that this data can be grabbed quickly. |
| 756 | | sub record_cached |
| 757 | | { |
| 758 | | my ($grabber, @grabbed) = @_; |
| 759 | | |
| 760 | | &log("Recording cache for grabber $grabber.\n") if ($debug); |
| 761 | | |
| 762 | | my $gcache = $components->{$grabber}->{cached}; |
| 763 | | $gcache = [ ] unless ($gcache); |
| 764 | | my @newcache; |
| 765 | | my $today = strftime("%Y%m%d", localtime); |
| 766 | | |
| 767 | | # remove old chandays |
| 768 | | foreach my $chanday (@$gcache) |
| 769 | | { |
| 770 | | $chanday =~ /(\d+):(.*)/; |
| 771 | | if ($1 >= $today) |
| 772 | | { |
| 773 | | push (@newcache, $chanday); |
| 774 | | } |
| 775 | | } |
| 776 | | |
| 777 | | # record new chandays |
| 778 | | foreach my $chanday (@grabbed) |
| 779 | | { |
| 780 | | push (@newcache, $chanday) unless (grep(/^$chanday$/, @newcache)); |
| 781 | | } |
| 782 | | $components->{$grabber}->{cached} = [ @newcache ]; |
| 783 | | } |
| 784 | | |
| 785 | | # Takes a dayhash and returns it as a list like this: |
| 786 | | # ( "20061018:ABC", "20061018:Seven", ... ) |
| 787 | | sub convert_dayhash_to_list |
| 788 | | { |
| 789 | | my $h = shift; |
| 790 | | |
| 791 | | my @ret; |
| 792 | | foreach my $day (keys %$h) |
| 793 | | { |
| 794 | | my $date = substr(DateCalc("today", "+ $day days"), 0, 8); |
| 795 | | foreach my $ch (@{$h->{$day}}) |
| 796 | | { |
| 797 | | push (@ret, "$date:$ch"); |
| 798 | | } |
| 799 | | } |
| 800 | | @ret = sort @ret; |
| 801 | | return \@ret; |
| 802 | | } |
| 803 | | |
| 804 | | # If we're about to re-try a grabber, make sure that we're not asking |
| 805 | | # it for the same data. That is, prevent a broken C1 grabber causing |
| 806 | | # an infinite loop. |
| 807 | | sub record_requested_chandays |
| 808 | | { |
| 809 | | my ($grabber, $slice) = @_; |
| 810 | | |
| 811 | | &log("Recording timeslice request; will not request these chandays " . |
| 812 | | "from $grabber again.\n") if ($debug); |
| 813 | | |
| 814 | | my @requested; |
| 815 | | for my $day ($slice->{start} .. $slice->{stop}) |
| 816 | | { |
| 817 | | foreach my $ch (@{$slice->{chans}}) |
| 818 | | { |
| 819 | | push @requested, "$day:$ch"; |
| 820 | | } |
| 821 | | } |
| 822 | | if ($grabbed->{$grabber}) |
| 823 | | { |
| 824 | | push @{$grabbed->{$grabber}}, @requested; |
| 825 | | } |
| 826 | | else |
| 827 | | { |
| 828 | | $grabbed->{$grabber} = [ @requested ]; |
| 829 | | } |
| 830 | | } |
| 831 | | |
| 832 | | # If this grabber has been called previously, remove those chandays |
| 833 | | # from the current request -- we don't want to ask it over and over |
| 834 | | # for a timeslice that it has already failed to provide. |
| 835 | | sub cut_down_missing |
| 836 | | { |
| 837 | | my $grabber = shift; |
| 838 | | |
| 839 | | $grabber = query_name($grabber); |
| 840 | | my $dayhash = {}; |
| 841 | | |
| 842 | | # Take the timeslice and expand it to a dayhash, while pruning |
| 843 | | # any chandays that have previously been requested from this |
| 844 | | # grabber. |
| 845 | | foreach my $day ($timeslice->{start} .. $timeslice->{stop}) |
| 846 | | { |
| 847 | | my @chans; |
| 848 | | foreach my $ch (@{$timeslice->{chans}}) |
| 849 | | { |
| 850 | | unless ($grabbed->{$grabber} and grep(/$day:$ch/, @{$grabbed->{$grabber}})) |
| 851 | | { |
| 852 | | push (@chans, $ch) |
| 853 | | } |
| 854 | | } |
| 855 | | $dayhash->{$day} = [ @chans ] if (@chans); |
| 856 | | } |
| 857 | | |
| 858 | | return $dayhash; |
| 859 | | } |
| 860 | | |
| 861 | | # ----------------------------------------- |
| 862 | | # Subs: Analyzing data |
| 863 | | # ----------------------------------------- |
| 864 | | |
| 865 | | # interpret xmltv data from this grabber/postprocessor |
| 866 | | sub soak_up_data |
| 867 | | { |
| 868 | | my ($plugin, $output, $plugintype) = @_; |
| 869 | | |
| 870 | | if (! -r $output) { |
| 871 | | &log((sprintf "SHEPHERD: Warning: plugin '%s' output file '%s' does not exist\n",$plugin,$output)); |
| 872 | | return; |
| 873 | | } |
| 874 | | |
| 875 | | my $this_plugin = $plugin_data->{$plugin}; |
| 876 | | &log((sprintf "SHEPHERD: Started parsing XMLTV from '%s' in '%s' .. any errors below are from parser:\n",$plugin,$output)); |
| 877 | | eval { $this_plugin->{xmltv} = XMLTV::parsefiles($output); }; |
| 878 | | &log((sprintf "SHEPHERD: Completed XMLTV parsing from '%s'\n",$plugin)); |
| 879 | | |
| 880 | | if (!($this_plugin->{xmltv})) { |
| 881 | | &log("WARNING: Plugin $plugin didn't seem to return any valid XMLTV!\n"); |
| 882 | | return; |
| 883 | | } |
| 884 | | |
| 885 | | $this_plugin->{valid} = 1; |
| 886 | | $this_plugin->{output_filename} = $output; |
| 887 | | |
| 888 | | my $xmltv = $this_plugin->{xmltv}; |
| 889 | | my ($encoding, $credits, $chan, $progs) = @$xmltv; |
| 890 | | $this_plugin->{total_duration} = 0; |
| 891 | | $this_plugin->{programmes} = 0; |
| 892 | | $this_plugin->{progs_with_invalid_date} = 0; # explicitly track unparsable dates |
| 893 | | $this_plugin->{progs_with_unknown_channel} = 0; # explicitly track unknown channels |
| 894 | | |
| 895 | | my $seen_channels_with_data = 0; |
| 896 | | |
| 897 | | # |
| 898 | | # first iterate through all programmes and see if there are any channels we don't know about |
| 899 | | # |
| 900 | | my %chan_xml_list; |
| 901 | | foreach my $ch (sort keys %{$channels}) { |
| 902 | | $chan_xml_list{($channels->{$ch})} = 1; |
| 903 | | } |
| 904 | | foreach my $prog (@$progs) { |
| 905 | | if (!defined $chan_xml_list{($prog->{channel})}) { |
| 906 | | $this_plugin->{progs_with_unknown_channel}++; |
| 907 | | &log((sprintf " - WARNING: plugin '%s' returned data for unknown channel '%s': ignored.\n",$plugin,$prog->{channel})); |
| 908 | | $chan_xml_list{($prog->{channel})} = 1; # so we warn only once |
| 909 | | } |
| 910 | | } |
| 911 | | |
| 912 | | # iterate thru channels |
| 913 | | foreach my $ch (sort keys %{$channels}) { |
| 914 | | my $seen_progs_on_this_channel = 0; |
| 915 | | |
| 916 | | # iterate thru programmes per channel |
| 917 | | foreach my $prog (@$progs) { |
| 918 | | next if ($prog->{channel} ne $channels->{$ch}); |
| 919 | | |
| 920 | | my $t1 = &parse_xmltv_date($prog->{start}); |
| 921 | | my $t2 = &parse_xmltv_date($prog->{stop}); |
| 922 | | |
| 923 | | if (!$t1 || !$t2) { |
| 924 | | &log((sprintf " - WARNING: plugin '%s' returned programme data with invalid timestamp format: \"%s\": can't parse.\n", |
| 925 | | $plugin,(!$t1 ? $prog->{start} : $prog->{stop}))) if (!$this_plugin->{progs_with_invalid_date}); |
| 926 | | $this_plugin->{progs_with_invalid_date}++; |
| 927 | | next; |
| 928 | | } |
| 929 | | |
| 930 | | # store plugin-specific stats |
| 931 | | $this_plugin->{programmes}++; |
| 932 | | $this_plugin->{total_duration} += ($t2 - $t1); |
| 933 | | $seen_progs_on_this_channel++; |
| 934 | | $this_plugin->{earliest_data_seen} = $t1 if (!defined $this_plugin->{earliest_data_seen}); |
| 935 | | $this_plugin->{earliest_data_seen} = $t1 if ($t1 < $this_plugin->{earliest_data_seen}); |
| 936 | | $this_plugin->{latest_data_seen} = $t2 if (!defined $this_plugin->{latest_data_seen}); |
| 937 | | $this_plugin->{latest_data_seen} = $t2 if ($t2 > $this_plugin->{latest_data_seen}); |
| 938 | | |
| 939 | | # store channel-specific stats |
| 940 | | $channel_data->{$ch}->{programmes}++; |
| 941 | | $channel_data->{$ch}->{total_duration} += ($t2 - $t1); |
| 942 | | |
| 943 | | # programme is outside the timeslots we are interested in. |
| 944 | | next if ($t1 > $policy{endtime}); |
| 945 | | next if ($t2 < $policy{starttime}); |
| 946 | | |
| 947 | | # store timeslot info |
| 948 | | my $start_slotnum = 0; |
| 949 | | $start_slotnum = int(($t1 - $policy{starttime}) / $policy{timeslot_size}) |
| 950 | | if ($t1 >= $policy{starttime}); |
| 951 | | |
| 952 | | my $end_slotnum = ($policy{num_timeslots}-1); |
| 953 | | $end_slotnum = int(($t2 - $policy{starttime}) / $policy{timeslot_size}) |
| 954 | | if ($t2 < $policy{endtime}); |
| 955 | | |
| 956 | | # add this programme into the global timeslots table for this channel |
| 957 | | foreach my $slotnum ($start_slotnum..$end_slotnum) { |
| 958 | | $channel_data->{$ch}->{timeslots}[$slotnum]++; |
| 959 | | } |
| 960 | | } |
| 961 | | |
| 962 | | $seen_channels_with_data++ if ($seen_progs_on_this_channel > 0); |
| 963 | | } |
| 964 | | |
| 965 | | # print some stats about what we saw! |
| 966 | | &log((sprintf "SHEPHERD: %s '%s' returned data for %d channels, %d programmes, %dd%02dh%02dm%02ds duration, %s%s\n", |
| 967 | | ucfirst($plugintype), $plugin, $seen_channels_with_data, $this_plugin->{programmes}, |
| 968 | | int($this_plugin->{total_duration} / 86400), # days |
| 969 | | int(($this_plugin->{total_duration} % 86400) / 3600), # hours |
| 970 | | int(($this_plugin->{total_duration} % 3600) / 60), # mins |
| 971 | | int($this_plugin->{total_duration} % 60), # sec |
| 972 | | (defined $this_plugin->{earliest_data_seen} ? (strftime "%a %e %b %H:%M - ", localtime($this_plugin->{earliest_data_seen})) : 'no data'), |
| 973 | | (defined $this_plugin->{latest_data_seen} ? (strftime "%a %e %b %H:%M", localtime($this_plugin->{latest_data_seen})) : ''))); |
| 974 | | |
| 975 | | $this_plugin->{laststatus} = sprintf "%dch/%dpr/%dhrs %s-%s", |
| 976 | | $seen_channels_with_data, $this_plugin->{programmes}, |
| 977 | | int($this_plugin->{total_duration} / 3600), |
| 978 | | (defined $this_plugin->{earliest_data_seen} ? (strftime "%a%d%b", localtime($this_plugin->{earliest_data_seen})) : 'no'), |
| 979 | | (defined $this_plugin->{latest_data_seen} ? (strftime "%a%d%b", localtime($this_plugin->{latest_data_seen})) : 'data'); |
| 980 | | |
| 981 | | $plugin_data->{$plugin} = $this_plugin; |
| 982 | | } |
| 983 | | |
| 984 | | |
| 985 | | # analyze grabber data - do we have all the data we want? |
| 986 | | sub analyze_plugin_data |
| 987 | | { |
| 988 | | my ($analysistype,$quiet) = @_; |
| 989 | | &log("SHEPHERD: $analysistype:\n") unless $quiet; |
| 990 | | |
| 991 | | my $total_channels = 0; |
| 992 | | |
| 993 | | my $overall_data_ok = 1; # until proven otherwise |
| 994 | | |
| 995 | | # iterate across each channel |
| 996 | | foreach my $ch (sort keys %{$channels}) { |
| 997 | | $total_channels++; |
| 998 | | |
| 999 | | my $data; |
| 1000 | | my $lastpol = ""; |
| 1001 | | $data->{data_ok} = 1; # unless proven otherwise |
| 1002 | | $data->{have} = 0; |
| 1003 | | $data->{missing} = 0; |
| 1004 | | |
| 1005 | | for my $slotnum (0..($policy{num_timeslots}-1)) { |
| 1006 | | my $bucket_start_offset = ($slotnum * $policy{timeslot_size}); |
| 1007 | | |
| 1008 | | # work out day number of when this bucket is. |
| 1009 | | # number from 0 onwards. (i.e. today=0). |
| 1010 | | # for a typical 7 day grabber this will actually mean 8 days of data (0-7) |
| 1011 | | # with days 0 and 7 truncated to half-days |
| 1012 | | my $day = int(($bucket_start_offset + $policy{first_bucket_offset}) / 86400); |
| 1013 | | |
| 1014 | | if (!defined $data->{day}->[$day]) { |
| 1015 | | $data->{day}->[$day]->{num} = $day; |
| 1016 | | $data->{day}->[$day]->{have} = 0; |
| 1017 | | $data->{day}->[$day]->{missing} = 0; |
| 1018 | | $data->{day}->[$day]->{missing_peak} = 0; |
| 1019 | | $data->{day}->[$day]->{missing_nonpeak} = 0; |
| 1020 | | $data->{day}->[$day]->{missing_other} = 0; |
| 1021 | | |
| 1022 | | $data->{day}->[$day]->{day_ok} = 1; # until proven otherwise |
| 1023 | | |
| 1024 | | # day changed, dump any 'already_missing' data |
| 1025 | | &dump_already_missing($data); |
| 1026 | | } |
| 1027 | | |
| 1028 | | # we have programming data for this bucket. great! process next bucket |
| 1029 | | if ((defined $channel_data->{$ch}->{timeslots}[$slotnum]) && |
| 1030 | | ($channel_data->{$ch}->{timeslots}[$slotnum] > 0)) { |
| 1031 | | |
| 1032 | | # if we have missing data queued up, push it now |
| 1033 | | &dump_already_missing($data); |
| 1034 | | |
| 1035 | | &dump_already_missing_period($data->{day}->[$day],$lastpol) |
| 1036 | | if ($lastpol ne ""); |
| 1037 | | |
| 1038 | | $data->{day}->[$day]->{have} += $policy{timeslot_size}; |
| 1039 | | $data->{have} += $policy{timeslot_size}; |
| 1040 | | next; |
| 1041 | | } |
| 1042 | | |
| 1043 | | # we don't have programming for this channel for this bucket |
| 1044 | | |
| 1045 | | # some grabbers take HOURS to run. if this bucket (missing data) is for |
| 1046 | | # a time period now in the past, then don't include it |
| 1047 | | next if (($bucket_start_offset + $policy{starttime}) < time); |
| 1048 | | |
| 1049 | | # work out the localtime of when this bucket is |
| 1050 | | my $bucket_seconds_offset = ($bucket_start_offset + $policy{first_bucket_offset}) % 86400; |
| 1051 | | |
| 1052 | | # store details of where we are missing data |
| 1053 | | if (!defined $data->{already_missing}) { |
| 1054 | | $data->{already_missing} = sprintf "#%d/%02d:%02d", |
| 1055 | | $day, |
| 1056 | | int($bucket_seconds_offset / 3600), |
| 1057 | | int(($bucket_seconds_offset % 3600) / 60); |
| 1058 | | } |
| 1059 | | $data->{already_missing_last} = $bucket_seconds_offset + $policy{timeslot_size} - 1; |
| 1060 | | |
| 1061 | | $data->{day}->[$day]->{missing} += $policy{timeslot_size}; |
| 1062 | | $data->{missing} += $policy{timeslot_size}; |
| 1063 | | |
| 1064 | | # work out what policy missing data for this bucket fits into |
| 1065 | | my $pol; |
| 1066 | | if (($bucket_seconds_offset >= $policy{peak_start}) && |
| 1067 | | (($bucket_seconds_offset+$policy{timeslot_size}) <= $policy{peak_stop})) { |
| 1068 | | $pol = "peak"; |
| 1069 | | } elsif (($bucket_seconds_offset >= $policy{nonpeak_start}) && |
| 1070 | | (($bucket_seconds_offset+$policy{timeslot_size}) <= $policy{nonpeak_stop})) { |
| 1071 | | $pol = "nonpeak"; |
| 1072 | | } else { |
| 1073 | | $pol = "other"; |
| 1074 | | } |
| 1075 | | |
| 1076 | | &dump_already_missing_period($data->{day}->[$day],$lastpol) |
| 1077 | | if (($lastpol ne $pol) && ($lastpol ne "")); |
| 1078 | | |
| 1079 | | $lastpol = $pol; |
| 1080 | | |
| 1081 | | $data->{day}->[$day]->{"missing_".$pol} += $policy{timeslot_size}; |
| 1082 | | |
| 1083 | | $data->{day}->[$day]->{"already_missing_".$pol."_start"} = $bucket_seconds_offset |
| 1084 | | if (!defined $data->{day}->[$day]->{"already_missing_".$pol."_start"}); |
| 1085 | | $data->{day}->[$day]->{"already_missing_".$pol."_stop"} = $bucket_seconds_offset + $policy{timeslot_size} - 1; |
| 1086 | | |
| 1087 | | $data->{day}->[$day]->{day_ok} = 0 if ($data->{day}->[$day]->{missing_peak} > $policy{peak_max_missing}); |
| 1088 | | $data->{day}->[$day]->{day_ok} = 0 if ($data->{day}->[$day]->{missing_nonpeak} > $policy{nonpeak_max_missing}); |
| 1089 | | $data->{day}->[$day]->{day_ok} = 0 if ($data->{day}->[$day]->{missing_other} > $policy{other_max_missing}); |
| 1090 | | $data->{data_ok} = 0 if ($data->{day}->[$day]->{day_ok} == 0); |
| 1091 | | $overall_data_ok = 0 if ($data->{data_ok} == 0); |
| 1092 | | } |
| 1093 | | |
| 1094 | | # finished all timeslots in this channel. |
| 1095 | | # if we have missing data queued up, push it now |
| 1096 | | &dump_already_missing($data); |
| 1097 | | |
| 1098 | | # fill in any last missing period data |
| 1099 | | foreach my $day (@{($data->{day})}) { |
| 1100 | | &dump_already_missing_period($day,"peak"); |
| 1101 | | &dump_already_missing_period($day,"nonpeak"); |
| 1102 | | &dump_already_missing_period($day,"other"); |
| 1103 | | } |
| 1104 | | |
| 1105 | | my $statusstring = sprintf " > ch %s: %s programming: %s\n", |
| 1106 | | $ch, pretty_duration($data->{have}), |
| 1107 | | $data->{data_ok} ? "PASS (within thresholds)" : "FAIL, missing data over policy threshold:"; |
| 1108 | | |
| 1109 | | # display per-day missing data statistics |
| 1110 | | foreach my $day (@{($data->{day})}) { |
| 1111 | | unless ($day->{day_ok}) { |
| 1112 | | $statusstring .= sprintf "\t".(strftime("%a %e %b",localtime($policy{starttime}+($day->{num}*86400)))).": "; |
| 1113 | | |
| 1114 | | # do we have any data for this day? |
| 1115 | | $statusstring .= "peak ".join(", ",(@{($day->{missing_peak_table})})) |
| 1116 | | if (($day->{missing_peak}) && ($day->{missing_peak} > $policy{peak_max_missing})); |
| 1117 | | |
| 1118 | | $statusstring .= sprintf "%snon-peak %s", |
| 1119 | | ($day->{missing_peak} ? " / " : ""), |
| 1120 | | join(", ",(@{($day->{missing_nonpeak_table})})) |
| 1121 | | if (($day->{missing_nonpeak}) && ($day->{missing_nonpeak} > $policy{nonpeak_max_missing})); |
| 1122 | | |
| 1123 | | $statusstring .= sprintf "%sother %s", |
| 1124 | | (($day->{missing_peak} + $day->{missing_nonpeak}) > 0 ? " / " : ""), |
| 1125 | | join(", ",(@{($day->{missing_other_table})})) |
| 1126 | | if (($day->{missing_other}) && ($day->{missing_other} > $policy{other_max_missing})); |
| 1127 | | |
| 1128 | | $statusstring .= "\n"; |
| 1129 | | } |
| 1130 | | } |
| 1131 | | &log($statusstring) unless $quiet; |
| 1132 | | |
| 1133 | | delete $channel_data->{$ch}->{analysis} if (defined $channel_data->{$ch}->{analysis}); |
| 1134 | | $channel_data->{$ch}->{analysis} = $data; |
| 1135 | | } |
| 1136 | | |
| 1137 | | &log((sprintf " > OVERALL: %s\n", ($overall_data_ok ? "PASS" : "FAIL"))) unless $quiet; |
| 1138 | | |
| 1139 | | return $overall_data_ok; # return 1 for good, 0 for need more |
| 1140 | | } |
| 1141 | | |
| 1142 | | # helper routine for filling in 'missing_all' array |
| 1143 | | sub dump_already_missing |
| 1144 | | { |
| 1145 | | my $d = shift; |
| 1146 | | if (defined $d->{already_missing}) { |
| 1147 | | $d->{already_missing} .= sprintf "-%02d:%02d", |
| 1148 | | int($d->{already_missing_last} / 3600), |
| 1149 | | int(($d->{already_missing_last} % 3600) / 60) |
| 1150 | | if (defined $d->{already_missing_last}); |
| 1151 | | push(@{($d->{missing_all})}, $d->{already_missing}); |
| 1152 | | delete $d->{already_missing}; |
| 1153 | | delete $d->{already_missing_last}; |
| 1154 | | } |
| 1155 | | } |
| 1156 | | |
| 1157 | | # helper routine for filling in per-day missing data |
| 1158 | | # specific to peak/nonpeak/other |
| 1159 | | sub dump_already_missing_period |
| 1160 | | { |
| 1161 | | my ($d,$p) = @_; |
| 1162 | | my $startvar = "already_missing_".$p."_start"; |
| 1163 | | my $stopvar = "already_missing_".$p."_stop"; |
| 1164 | | |
| 1165 | | if (defined $d->{$startvar}) { |
| 1166 | | push(@{($d->{"missing_".$p."_table"})}, |
| 1167 | | sprintf "%02d:%02d-%02d:%02d", |
| 1168 | | int($d->{$startvar} / 3600), |
| 1169 | | int(($d->{$startvar} % 3600) / 60), |
| 1170 | | int($d->{$stopvar} / 3600), |
| 1171 | | int(($d->{$stopvar} % 3600) / 60)); |
| 1172 | | delete $d->{$startvar}; |
| 1173 | | delete $d->{$stopvar}; |
| 1174 | | } |
| 1175 | | } |
| 1176 | | |
| 1177 | | # given a duration (seconds), return it in a pretty "{days}d{hr}h{min}m" string |
| 1178 | | # and indication of whether the duration is over its threshold or not |
| 1179 | | sub pretty_duration |
| 1180 | | { |
| 1181 | | my ($d,$crit) = @_; |
| 1182 | | my $s = ""; |
| 1183 | | $s .= sprintf "%dd",int($d / (60*60*24)) if ($d >= (60*60*24)); |
| 1184 | | $s .= sprintf "%dh",int(($d % (60*60*24)) / (60*60)) if (($d % (60*60*24)) >= (60*60)); |
| 1185 | | $s .= sprintf "%dm",int(($d % (60*60)) / 60) if (($d % (60*60)) >= 60); |
| 1186 | | $s .= "no" if ($s eq ""); |
| 1187 | | |
| 1188 | | if (defined $crit) { |
| 1189 | | $s .= "[!]" if ($d > $crit); |
| 1190 | | } |
| 1191 | | return $s; |
| 1192 | | } |
| 1193 | | |
| 1194 | | # work out date range we are expecting data to be in |
| 1195 | | sub calc_date_range |
| 1196 | | { |
| 1197 | | # work out GMT offset - we only do this once |
| 1198 | | if (!$gmt_offset) { |
| 1199 | | # work out our gmt offset |
| 1200 | | my @l = localtime(43200), my @g = gmtime(43200); |
| 1201 | | $gmt_offset = (($l[2] - $g[2])*(60*60)) + (($l[1] - $g[1])*60); |
| 1202 | | } |
| 1203 | | |
| 1204 | | $policy{starttime} = time; |
| 1205 | | |
| 1206 | | # set endtime as per $days less 1 day + hours left today |
| 1207 | | $policy{endtime} = $policy{starttime} + ((60*60*24)*($days-1)) + (86400 - (($policy{starttime} + $gmt_offset) % 86400)); |
| 1208 | | |
| 1209 | | # normalize starttime to beginning of next bucket |
| 1210 | | $policy{starttime} += ($policy{timeslot_size} - ($policy{starttime} % $policy{timeslot_size})); |
| 1211 | | |
| 1212 | | # work out how many seconds into a day our first bucket starts |
| 1213 | | $policy{first_bucket_offset} = ($policy{starttime} + $gmt_offset) % 86400; |
| 1214 | | |
| 1215 | | # normalize endtime to end of previous bucket |
| 1216 | | $policy{endtime} -= ($policy{endtime} % $policy{timeslot_size}); |
| 1217 | | |
| 1218 | | # if we are working with an --offset, apply it now. |
| 1219 | | $policy{starttime} += (86400 * $opt->{offset}) if ($opt->{offset}); |
| 1220 | | |
| 1221 | | # work out number of buckets |
| 1222 | | $policy{num_timeslots} = ($policy{endtime} - $policy{starttime}) / $policy{timeslot_size}; |
| 1223 | | } |
| 1224 | | |
| 1225 | | |
| 1226 | | # strptime type date parsing - BUT - if no timezone is present, treat time as being in localtime |
| 1227 | | # rather than the various other perl implementation which treat it as being in UTC/GMT |
| 1228 | | sub parse_xmltv_date |
| 1229 | | { |
| 1230 | | my $datestring = shift; |
| 1231 | | my @t; # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst |
| 1232 | | my $tz_offset = 0; |
| 1233 | | |
| 1234 | | if ($datestring =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})/) { |
| 1235 | | ($t[5],$t[4],$t[3],$t[2],$t[1],$t[0]) = (int($1)-1900,int($2)-1,int($3),int($4),int($5),0); |
| 1236 | | ($t[6],$t[7],$t[8]) = (-1,-1,-1); |
| 1237 | | |
| 1238 | | # if input data has a timezone offset, then offset by that |
| 1239 | | if ($datestring =~ /\+(\d{2})(\d{2})/) { |
| 1240 | | $tz_offset = $gmt_offset - (($1*(60*60)) + ($2*60)); |
| 1241 | | } elsif ($datestring =~ /\-(\d{2})(\d{2})/) { |
| 1242 | | $tz_offset = $gmt_offset + (($1*(60*60)) + ($2*60)); |
| 1243 | | } |
| 1244 | | |
| 1245 | | my $e = mktime(@t); |
| 1246 | | return ($e+$tz_offset) if ($e > 1); |
| 1247 | | } |
| 1248 | | return undef; |
| 1249 | | } |
| 1250 | | |
| 1251 | | # ----------------------------------------- |
| 1252 | | # Subs: Reconciling data |
| 1253 | | # ----------------------------------------- |
| 1254 | | |
| 1255 | | # for all the data we have, try to pick the best bits! |
| 1256 | | sub reconcile_data |
| 1257 | | { |
| 1258 | | &log("\nReconciling data:\n\n"); |
| 1259 | | |
| 1260 | | my $num_grabbers = 0; |
| 1261 | | my $input_files = ""; |
| 1262 | | my @input_file_list; |
| 1263 | | |
| 1264 | | &log("Preferred title preferences from '$pref_title_source'\n") |
| 1265 | | if ((defined $pref_title_source) && |
| 1266 | | ($plugin_data->{$pref_title_source}) && |
| 1267 | | ($plugin_data->{$pref_title_source}->{valid})); |
| 1268 | | |
| 1269 | | &log("Preference for whose data we prefer as follows:\n"); |
| 1270 | | foreach my $proggy (sort { $components->{$b}->{config}->{quality} <=> $components->{$a}->{config}->{quality} } query_grabbers()) { |
| 1271 | | if ((!$components->{$proggy}->{disabled}) && ($plugin_data->{$proggy}) && ($plugin_data->{$proggy}->{valid})) { |
| 1272 | | $num_grabbers++; |
| 1273 | | &log((sprintf " %d. %s (%s)\n", $num_grabbers, $proggy, $plugin_data->{$proggy}->{output_filename})); |
| 1274 | | |
| 1275 | | $input_files .= $plugin_data->{$proggy}->{output_filename}." "; |
| 1276 | | push(@input_file_list,$plugin_data->{$proggy}->{output_filename}); |
| 1277 | | } |
| 1278 | | } |
| 1279 | | |
| 1280 | | if ($num_grabbers == 0) { |
| 1281 | | die "Nothing to reconcile! There is no valid grabber data!\n"; |
| 1282 | | } |
| 1283 | | |
| 1284 | | foreach my $reconciler (sort { $components->{$a} <=> $components->{$b} } query_reconcilers()) { |
| 1285 | | next if ($components->{$reconciler}->{disabled}); |
| 1286 | | next if (!$components->{$reconciler}->{ready}); |
| 1287 | | |
| 1288 | | $reconciler_found_all_data = &call_data_processor("reconciler",$reconciler,$input_files); |
| 1289 | | |
| 1290 | | if ((!$reconciler_found_all_data) && ($grabber_found_all_data)) { |
| 1291 | | # urgh. this reconciler did a bad bad thing ... |
| 1292 | | &log("SHEPHERD: XML data from reconciler $reconciler appears bogus, will try to use another reconciler\n"); |
| 1293 | | } else { |
| 1294 | | &log("SHEPHERD: Data from reconciler $reconciler looks good\n"); |
| 1295 | | $input_postprocess_file = $plugin_data->{$reconciler}->{output_filename}; |
| 1296 | | } |
| 1297 | | |
| 1298 | | last if ($input_postprocess_file ne ""); |
| 1299 | | } |
| 1300 | | |
| 1301 | | if ($input_postprocess_file eq "") { |
| 1302 | | # no reconcilers worked!! |
| 1303 | | &log("SHEPHERD: WARNING: No reconcilers seemed to work! Falling back to concatenating the data together!\n"); |
| 1304 | | |
| 1305 | | my %w_args = (); |
| 1306 | | $input_postprocess_file = "$CWD/input_preprocess.xmltv"; |
| 1307 | | my $fh = new IO::File ">$input_postprocess_file" || die "could not open $input_postprocess_file for writing: $!\n"; |
| 1308 | | %w_args = (OUTPUT => $fh); |
| 1309 | | XMLTV::catfiles(\%w_args, @input_file_list); |
| 1310 | | } |
| 1311 | | } |
| 1312 | | |
| 1313 | | |
| 1314 | | # ----------------------------------------- |
| 1315 | | # Subs: Postprocessing |
| 1316 | | # ----------------------------------------- |
| 1317 | | |
| 1318 | | sub postprocess_data |
| 1319 | | { |
| 1320 | | # for our first postprocessor, we feed it ALL of the XMLTV files we have |
| 1321 | | # as each postprocessor runs, we feed in the output from the previous one |
| 1322 | | # Shepherd checks the "completeness" of the data that comes out of a postprocessor & automatically |
| 1323 | | # reverts back to the previous postprocessor if it was shown to be bad |
| 1324 | | |
| 1325 | | # first time around: feed in reconciled data ($input_postprocess_file) |
| 1326 | | |
| 1327 | | &log("\nPostprocessing stage:\n"); |
| 1328 | | |
| 1329 | | foreach my $postprocessor (sort { $components->{$a} <=> $components->{$b} } query_postprocessors()) { |
| 1330 | | next if ($components->{$postprocessor}->{disabled}); |
| 1331 | | next if (!$components->{$postprocessor}->{ready}); |
| 1332 | | |
| 1333 | | my $found_all_data = call_data_processor("postprocessor",$postprocessor,$input_postprocess_file); |
| 1334 | | |
| 1335 | | if ($found_all_data) { |
| 1336 | | # accept what this postprocessor did to our output ... |
| 1337 | | &log("SHEPHERD: accepting output from postprocessor $postprocessor, feeding it into next stage\n"); |
| 1338 | | $input_postprocess_file = $plugin_data->{$postprocessor}->{output_filename}; |
| 1339 | | delete $components->{$postprocessor}->{conescutive_failures} if (defined $components->{$postprocessor}->{conescutive_failures}); |
| 1340 | | next; |
| 1341 | | } |
| 1342 | | |
| 1343 | | # urgh. this postprocessor did a bad bad thing ... |
| 1344 | | &log("SHEPHERD: XML data from postprocessor $postprocessor rejected, using XML from previous stage\n"); |
| 1345 | | |
| 1346 | | if (defined $components->{$postprocessor}->{conescutive_failures}) { |
| 1347 | | $components->{$postprocessor}->{conescutive_failures}++; |
| 1348 | | } else { |
| 1349 | | $components->{$postprocessor}->{conescutive_failures} = 1; |
| 1350 | | } |
| 1351 | | &log((sprintf "SHEPHERD: Postprocessor \"%s\" has now failed %d times in a row. %d more and it will be automatically disabled.\n", |
| 1352 | | $postprocessor, |
| 1353 | | $components->{$postprocessor}->{conescutive_failures}, |
| 1354 | | ($policy{postprocessor_disable_failure_threshold} - $components->{$postprocessor}->{conescutive_failures}))); |
| 1355 | | |
| 1356 | | if ($components->{$postprocessor}->{conescutive_failures} >= $policy{postprocessor_disable_failure_threshold}) { |
| 1357 | | &log("SHEPHERD: Disabling Postprocessor \"$postprocessor\".\n"); |
| 1358 | | $components->{$postprocessor}->{disabled} = 1; |
| 1359 | | } |
| 1360 | | } |
| 1361 | | } |
| 1362 | | |
| 1363 | | |
| 1364 | | # ----------------------------------------- |
| 1365 | | # Subs: Postprocessing/Reconciler helpers |
| 1366 | | # ----------------------------------------- |
| 1367 | | |
| 1368 | | sub call_data_processor |
| 1369 | | { |
| 1370 | | my ($data_processor_type, $data_processor_name, $input_files) = @_; |
| 1371 | | |
| 1372 | | $components->{$data_processor_name}->{lastdata} = time; |
| 1373 | | $components->{$data_processor_name}->{laststatus} = "unknown"; |
| 1374 | | |
| 1375 | | &log((sprintf "\nSHEPHERD: Using %s: %s\n",$data_processor_type,$data_processor_name)); |
| 1376 | | |
| 1377 | | my $output = sprintf "%s/%ss/%s/output.xmltv",$CWD,$data_processor_type,$data_processor_name; |
| 1378 | | my $comm = sprintf "%s/%ss/%s/%s",$CWD,$data_processor_type,$data_processor_name,$data_processor_name; |
| 1379 | | $comm .= " --region $region" . |
| 1380 | | " --channels_file $channels_file" . |
| 1381 | | " --output $output"; |
| 1382 | | $comm .= " --days $days" if ($days); |
| 1383 | | $comm .= " --offset $opt->{offset}" if ($opt->{offset}); |
| 1384 | | $comm .= " --debug" if ($debug); |
| 1385 | | $comm .= " @ARGV" if (@ARGV); |
| 1386 | | |
| 1387 | | $comm .= " --preftitle ".$plugin_data->{$pref_title_source}->{output_filename} |
| 1388 | | if (($data_processor_type eq "reconciler") && |
| 1389 | | (defined $pref_title_source) && |
| 1390 | | ($plugin_data->{$pref_title_source}) && |
| 1391 | | ($plugin_data->{$pref_title_source}->{valid})); |
| 1392 | | |
| 1393 | | $comm .= " $input_files"; |
| 1394 | | &log("SHEPHERD: Excuting command: $comm\n"); |
| 1395 | | |
| 1396 | | my $dir = sprintf "%s/%ss/%s/",$CWD,$data_processor_type,$data_processor_name; |
| 1397 | | chdir $dir; |
| 1398 | | my $retval = call_prog($comm); |
| 1399 | | chdir $CWD; |
| 1400 | | |
| 1401 | | if ($retval != 0) { |
| 1402 | | &log("$data_processor_type returned with non-zero return code $retval: assuming it failed.\n"); |
| 1403 | | return 0; |
| 1404 | | } |
| 1405 | | |
| 1406 | | # |
| 1407 | | # soak up the data we just collected and check it |
| 1408 | | # YES - these are the SAME routines we used in the previous 'grabber' phase |
| 1409 | | # but the difference here is that we clear out our 'channel_data' beforehand |
| 1410 | | # so we can independently analyze the impact of this postprocessor. |
| 1411 | | # if it clearly returns bad data, don't use that data (go back one step) and |
| 1412 | | # flag the postprocessor as having failed. after 3 consecutive failures, disable it |
| 1413 | | # |
| 1414 | | |
| 1415 | | # clear out channel_data |
| 1416 | | foreach my $ch (keys %{$channels}) { |
| 1417 | | delete $channel_data->{$ch}; |
| 1418 | | } |
| 1419 | | |
| 1420 | | # process and analyze it! |
| 1421 | | &soak_up_data($data_processor_name, $output, $data_processor_type); |
| 1422 | | my $have_all_data = &analyze_plugin_data("$data_processor_type $data_processor_name"); |
| 1423 | | |
| 1424 | | $components->{$data_processor_name}->{laststatus} = $plugin_data->{$data_processor_name}->{laststatus}; |
| 1425 | | |
| 1426 | | return $have_all_data; |
| 1427 | | } |
| 1428 | | |
| 1429 | | |
| 1430 | | sub output_data |
| 1431 | | { |
| 1432 | | # $input_postprocess_file contains our final output |
| 1433 | | # send it to whereever --output told us to! |
| 1434 | | |
| 1435 | | my $output_filename = "$CWD/output.xmltv"; |
| 1436 | | $output_filename = $opt->{output} if ($opt->{output}); |
| 1437 | | |
| 1438 | | open(OUTFILE,">$output_filename") || die "could not open output file $output_filename for writing: $!\n"; |
| 1439 | | |
| 1440 | | if (!(open(INFILE,"<$input_postprocess_file"))) { |
| 1441 | | &log((sprintf "WARNING: could not open input file \"%s\": %s\n", $input_postprocess_file, $!)); |
| 1442 | | &log("Output XMLTV data may be damanged as a result!\n"); |
| 1443 | | } else { |
| 1444 | | while (<INFILE>) { |
| 1445 | | print OUTFILE $_; |
| 1446 | | } |
| 1447 | | close(INFILE); |
| 1448 | | close(OUTFILE); |
| 1449 | | } |
| 1450 | | |
| 1451 | | &log("Final output stored in $output_filename.\n"); |
| 1452 | | } |
| 1453 | | |
| 1454 | | # ----------------------------------------- |