#!/usr/local/bin/perl
#
# ListHTMLTitles
# ©2001 Joshua S. Jacob
#
# version: 1.0.1
#
# written by: Josh Jacob (josh.jacob@joshjacob.com)
#
# description: This Perl script takes as input a directory and will output
# to a file a list of all HTML files and the titles in the
#
tags.
#
# license: This software is provided "as is" and without any express or
# implied warranties, including, without limitation, the implied
# warranties of merchantability or fitness for a particular
# purpose. The copyright holder shall not be held liable for
# any damages whatsoever arising from the use of, misuse of, or
# inability to use this software.
#
#
# constants
#
$gLogFileName = "ListHTMLTitles.log";
# list of file extenstions to look in
@gFileExtensions = (".html", ".htm", ".asp");
# list of directories to skip - regular expressions
@gSkipDirectories = ('^_(.*)$', '\.(.*)$');
$gRootDirectory = "";
$gDisplayFlat = 0;
#
# function: checkExtension()
#
sub checkExtension()
{
# get inputs
my $inPath = @_[0];
my $inFileName = @_[1];
my $inLevel = @_[2];
my $extension;
my $counter;
my $fileString;
# for each extension in list
foreach $extension (@gFileExtensions)
{
# if the file matches an extension...
if ("$inPath$inFileName" =~ /$extension$/i)
{
if ($gDisplayFlat == 1)
{
$_ = $inPath;
s/$gRootDirectory//ig;
print LOG "$_";
}
else
{
# tab in based on level
for ($counter = 0; $counter < $inLevel; $counter++)
{
print LOG "\t";
}
}
# print file name
print LOG "$inFileName";
if ($gDisplayFlat == 1)
{
print LOG "\t";
}
else
{
print LOG "\n";
# tab in based on level
for ($counter = -1; $counter < $inLevel; $counter++)
{
print LOG "\t";
}
}
# open file
open (HTMLFILE, $inPath . $inFileName);
# get the whole file as a string
while ()
{
$fileString = $fileString . $_;
}
# close the file
close HTMLFILE;
# if the title pattern matches in the file...
if ($fileString =~ /(.*)<\/title>/is)
{
# get the matches title
$_ = $1;
# replace any newlines
s/[\n\r\f]+/ /ig;
# log the title
print LOG "$_\n";
}
# ...else the page doesn't have a title
else
{
# log no title
print LOG "-- no title found --\n";
}
}
}
}
#
# function: printDir()
#
sub printDir()
{
# get inputs
my $inDir = @_[0];
my $inLevel = @_[1];
my @myDir;
my $myFile;
my $counter;
my $skipDir;
# if the directory can be opened for reading
if (opendir(DIR, $inDir))
{
# get all the files
@myDir = readdir DIR;
# close the directory
close DIR;
# for each file in the file list
foreach $myFile (@myDir)
{
# if it's not an icon and not a hidden file...
if (($myFile ne "Icon\n") && ($myFile !~ /^\./))
{
# if it's a directory...
if (-d "$inDir$myFile")
{
$skipDir = 0;
foreach $dir (@gSkipDirectories)
{
if ("$myFile" =~ /$dir/i)
{
$skipDir = 1;
}
}
if ($skipDir == 0)
{
if ($gDisplayFlat != 1)
{
# tab in based on level
for ($counter = 0; $counter < $inLevel; $counter++)
{
print LOG "\t";
}
# print file name
print LOG "$myFile:\n";
}
# recurse
&printDir("$inDir$myFile:", $inLevel + 1);
}
}
# ...else it's a file
else
{
# check the file
&checkExtension($inDir, $myFile, $inLevel);
}
}
}
}
}
#
# main
#
# open log file for writing
open (LOG, ">:$gLogFileName") || print "Can't write log file :(\n\n";
# for each item passed in
foreach $file (@ARGV)
{
# if it's not the icon file and not a hidden file...
if (($file ne "Icon\n") && ($file !~ /^\./))
{
# if it's a directory...
if (-d $file)
{
# log it as the root directory
print LOG "Root directory -- $file\n\n";
# store the root directory
$gRootDirectory = $file;
# pass the directory to printDir with the initial level or zero
&printDir($file, 0);
}
}
}
# close log file
close LOG;