#!/usr/local/bin/perl # # ListHTMLTitles # ©2001 Joshua S. Jacob # # version: 1.0.1 # # written by: Josh Jacob (josh.jacob@joshjacob.com) # # description: This Perl script takes as input a directory and will output # to a file a list of all HTML files and the titles in the # tags. # # license: This software is provided "as is" and without any express or # implied warranties, including, without limitation, the implied # warranties of merchantability or fitness for a particular # purpose. The copyright holder shall not be held liable for # any damages whatsoever arising from the use of, misuse of, or # inability to use this software. # # # constants # $gLogFileName = "ListHTMLTitles.log"; # list of file extenstions to look in @gFileExtensions = (".html", ".htm", ".asp"); # list of directories to skip - regular expressions @gSkipDirectories = ('^_(.*)$', '\.(.*)$'); $gRootDirectory = ""; $gDisplayFlat = 0; # # function: checkExtension() # sub checkExtension() { # get inputs my $inPath = @_[0]; my $inFileName = @_[1]; my $inLevel = @_[2]; my $extension; my $counter; my $fileString; # for each extension in list foreach $extension (@gFileExtensions) { # if the file matches an extension... if ("$inPath$inFileName" =~ /$extension$/i) { if ($gDisplayFlat == 1) { $_ = $inPath; s/$gRootDirectory//ig; print LOG "$_"; } else { # tab in based on level for ($counter = 0; $counter < $inLevel; $counter++) { print LOG "\t"; } } # print file name print LOG "$inFileName"; if ($gDisplayFlat == 1) { print LOG "\t"; } else { print LOG "\n"; # tab in based on level for ($counter = -1; $counter < $inLevel; $counter++) { print LOG "\t"; } } # open file open (HTMLFILE, $inPath . $inFileName); # get the whole file as a string while () { $fileString = $fileString . $_; } # close the file close HTMLFILE; # if the title pattern matches in the file... if ($fileString =~ /(.*)<\/title>/is) { # get the matches title $_ = $1; # replace any newlines s/[\n\r\f]+/ /ig; # log the title print LOG "<title>$_\n"; } # ...else the page doesn't have a title else { # log no title print LOG "-- no title found --\n"; } } } } # # function: printDir() # sub printDir() { # get inputs my $inDir = @_[0]; my $inLevel = @_[1]; my @myDir; my $myFile; my $counter; my $skipDir; # if the directory can be opened for reading if (opendir(DIR, $inDir)) { # get all the files @myDir = readdir DIR; # close the directory close DIR; # for each file in the file list foreach $myFile (@myDir) { # if it's not an icon and not a hidden file... if (($myFile ne "Icon\n") && ($myFile !~ /^\./)) { # if it's a directory... if (-d "$inDir$myFile") { $skipDir = 0; foreach $dir (@gSkipDirectories) { if ("$myFile" =~ /$dir/i) { $skipDir = 1; } } if ($skipDir == 0) { if ($gDisplayFlat != 1) { # tab in based on level for ($counter = 0; $counter < $inLevel; $counter++) { print LOG "\t"; } # print file name print LOG "$myFile:\n"; } # recurse &printDir("$inDir$myFile:", $inLevel + 1); } } # ...else it's a file else { # check the file &checkExtension($inDir, $myFile, $inLevel); } } } } } # # main # # open log file for writing open (LOG, ">:$gLogFileName") || print "Can't write log file :(\n\n"; # for each item passed in foreach $file (@ARGV) { # if it's not the icon file and not a hidden file... if (($file ne "Icon\n") && ($file !~ /^\./)) { # if it's a directory... if (-d $file) { # log it as the root directory print LOG "Root directory -- $file\n\n"; # store the root directory $gRootDirectory = $file; # pass the directory to printDir with the initial level or zero &printDir($file, 0); } } } # close log file close LOG;