I found a pretty cool bash script file that you can use to download an entire repository file listing to a local text file you could then search through later, looking for a particular file. You could then use wget to fetch the file you need from that listing. This script was written by David C. Rankin. So thanks David for your bash script file.
Here it is unchanged as I found it.
#!/bin/bash --norc
##
## Title: LynxDump
## Name: lynxdump.sh
## Usage: lynxdump -b|--base] url-with-links -o|--outfile outfile] -d|--dirs] -r|--rpm]
## Version: 0.0.1
## Date: 06/06/2010, 05:15:34 PM
## Author: David C. Rankin, J.D.,P.E
## Summary: lynxdump takes a list of urls (or a baseURL and one or more directories using that base)
## queries the remote site with 'lynx -dump' and then parses the output to create a text file
## containing a list of links to the files on the remote site that can be used as an input
## file to wget ('wget -i outfile') to automate downloading of all files in each of the URLs
## or directories specified.
##
## Run as: user with sufficient permission to save to 'outfile'
## Requires: /usr/bin/lynx
##
#
## basic usage function displaying any information provided as an error, then
## displaying the proper scripts usage and finally exiting."
usage() {
-n $1 ]] && echo -e "
$1"
echo -e "
Usage: ${0##*/} -h|--help] -v|--verbose] -r|--rpm] --nodebug] -d|--dirs]
\
-b|--base] url-with-links
\
-o|--outfile outfile]
"
echo -e " ${0##*/} uses 'lynx -dump' to capture all links from 'url-with-links' and"
echo -e " parses the output leaving only the direct URLs. The resulting links written"
echo -e " to 'outfile' (default: ./lynxdump.txt) can be used with 'wget -i outfile'"
echo -e " to retrieve all files from the remote host.
"
echo -e " Options:"
echo -e " -h | --help show this help and exit (must be only option given)."
echo -e " -b | --base the next URL provides the baseURL information as well as a directory"
echo -e " (i.e. -b http://download.lynx.org/docs) All other urls with the same"
echo -e " baseURL need only provide the directory name (i.e. download, svn)."
echo -e " -d | --dirs include sub-directories in the list of links."
echo -e " -o | --outfile the following command line option profides the output file name."
echo -e " -r | --rpm changes dump file parsing so that only rpm links are saved."
echo -e " --nodebug excludes debuginfo and debugsource files. (use with -r | --rpm)"
echo -e " -v | --verbose additional output of script operations.
"
echo -e " Example:"
echo -e " lynxdump -b http://download.opensuse.org/repositories/X11/i586 src x86_64 --rpm --nodebug
"
echo -e " creates an output file with the links to rpms in ../X11/i586 ../X11/src and"
echo -e " ../X11/src directories without the debuginfo or debugsource files included.
"
exit 1
}
## fn stripSlash - parse input search dir and strip trailing / and set SEARCHDIR
## Usage: VAR=$(stripSlash VAR)
stripSlash() {
-z $1 ]] && { echo "WARNING: Nothing passed to fn 'stripSlash'" >&2; return 1; }
testSTR="$1"
lastCHAR=${1:$((${#testSTR}-1))}
$lastCHAR == / ]] && echo "${testSTR%/*}" || echo "$testSTR"
}
## function to get links from URL's and write to output-file
## Usage: getdump "URL-withlinks" "output-file"
getdump() {
-n "$1" ]] && -n "$2" ]] || usage "Error: Invalid URL or output file passed to fn 'getdump' in ${0##*/}"
url="$1"
ofile="$2"
## Check flags to determine how links should be parsed and execute 'lynx -dump'
if $flagRPM -eq 1 ]]; then
$flagV -eq 1 ]] && {
echo " getdump: $url, flagRPM = 1"
echo -e " --> using RPM parse"
echo -e " lynx -dump \"\$url\" | grep -e '^.*rpm$' | grep http | sed -e 's/^.*\shttp/http/'"
}
if $noDebug -eq 1 ]]; then
$flagV -eq 1 ]] && echo -e "
Excluding ALL debuginfo and debugsource rpms.
"
if lynx -dump "$url" | grep -e '^.*rpm$' | grep -v '.delta.rpm' | grep -v 'debugsource' | grep -v 'debuginfo' | grep http | sed -e 's/^.*\shttp/http/' >> $ofile; then
$flagV -eq 1 ]] && echo -e " lynxdump & parse -- OK
" >&2
else
echo -e " lynxdump & parse of $url -- FAILED
" >&2
fi
else
if lynx -dump "$url" | grep -e '^.*rpm$' | grep http | sed -e 's/^.*\shttp/http/' >> $ofile; then
$flagV -eq 1 ]] && echo -e " lynxdump & parse -- OK
" >&2
else
echo -e " lynxdump & parse of $url -- FAILED
" >&2
fi
fi
elif $flagDir -eq 1 ]]; then
$flagV -eq 1 ]] && {
echo " getdump: $url, allow directories"
echo -e " --> using parse that allows subdirectories"
echo -e " lynx -dump \"\$url\" | grep http | grep -v =] | sed -e 's/^.*\shttp/http/'"
}
if lynx -dump "$url" | grep http | grep -v =] | sed -e 's/^.*\shttp/http/' >> $ofile; then
$flagV -eq 1 ]] && echo -e " lynxdump & parse -- OK
" >&2
else
echo -e " lynxdump & parse of $url -- FAILED
" >&2
fi
else
$flagV -eq 1 ]] && {
echo " getdump: $url, default"
echo -e " --> using Default parse"
echo -e " lynx -dump \"\$url\" | grep http | grep -v =] | sed -e 's/^.*\shttp/http/' -e '/\/$/d'"
}
if lynx -dump "$url" | grep http | grep -v =] | sed -e 's/^.*\shttp/http/' -e '/\/$/d' >> $ofile; then
$flagV -eq 1 ]] && echo -e " lynxdump of & parse -- OK
" >&2
else
echo -e " lynxdump & parse of $url -- FAILED
" >&2
fi
fi
return 0
}
## Test for input and lynx or shoe help
-z $1 ]] && usage "Error: No input URL provided, read below to resolve this problem..."
$1 == -h ]] || $1 == --help ]] && usage
if ! which lynx >/dev/null; then
usage "Error: this script requires the program 'lynx'. Make sure it is installed
and make sure it is in your current path statement"
fi
## declare arrays and initialize variables
declare -a cliArray urlArray
cliArray=( "$@" )
baseSet=0
flagRPM=0
flagDir=0
noDebug=0
## Parse Command Line if arguments > 1; else set urlArray[0]="$1"
## Build urlArray to retrieve files from remote host
if ${#cliArray@]} -gt 1 ]]; then
let index=0
for ((i=0;i<${#cliArray@]};i++)); do
## Test if skipidx set, if so, unset & continue
-n $skipidx ]] && $i -eq $skipidx ]] && { unset skipidx; continue; }
## Check cli flags and add urls to urlArray
case "${cliArray$i]}" in
-b | --base )
$baseSet -eq 1 ]] && {
flagV -eq 1 ]] && echo -e "
Error: baseURL already set -- ignoring second '${cliArray$i]}'
"
continue
}
baseSet=1
fullURL="${cliArray$((i+1))]}"
if $fullURL =~ / ]]; then
fullURL=$(stripSlash $fullURL)
baseURL=${fullURL%/*}
urlArray$index]=${fullURL}
flagV -eq 1 ]] && printf " Added \$urlArray %3d] - %s
" $i "${urlArray${index}]}"
((index++))
skipidx=$((i+1))
else
usage "ERROR: Invalid URL following (-b|--base) option"
fi;;
-d | --dirs )
flagDir=1;;
-o | --outfile )
ofile=${cliArray$((i+1))]}
skipidx=$((i+1));;
-r | --rpm )
flagRPM=1;;
-v | --verbose )
flagV=1;;
--nodebug )
noDebug=1;;
* )
testURL="${cliArray$i]}"
if $testURL =~ / ]]; then
urlArray$index]=${testURL}
else
urlArray$index]="$baseURL/$testURL"
fi
flagV -eq 1 ]] && printf " Added \$urlArray %3d] - %s
" $index "${urlArray${index}]}"
((index++));;
esac
done
else
## Just set the first element of urlArray to the single URL provided
urlArray[0]="${cliArray[0]}"
fi
## set and clear output file if not set above
-z $ofile ]] && {
echo -e "
Output file (using default) --> './lynxdump.txt'
" >&2
ofile=./lynxdump.txt
}
:>$ofile
flagV -eq 1 ]] && echo -e " ofile set: $ofile
"
## Loop through urlArray calling lynx -dump and parse the output creating a list of links
## to be used with 'wget -i outfile' to retrieve the files from the remote host.
if ${#urlArray@]} -gt 1 ]]; then
flagV -eq 1 ]] && echo -e "
\$baseURL: $baseURL
"
for ((i=0;i<${#urlArray@]};i++)); do
url="${urlArray$i]}"
flagV -eq 1 ]] && printf " urlArray%d]: %s
" $i "$url"
# printf " urlArray%d]: %s
" $i "$url"
getdump "$url" $ofile
done
else
url="${urlArray[0]}"
flagV -eq 1 ]] && printf "
Single URL: %s
" "$url"
getdump "$url" $ofile
fi
echo -e ' Done!'
echo -e "
N O T E: ALWAYS check and edit your output file before using it with wget to"
echo -e " make sure you eliminate any links pointing to directories or mirrors,"
echo -e " it will save you download time and diskspace ;-)
"
exit 0
###### Scraps ######
# cleanup () {
#
# -e $tmpfile ]] && rm $tmpfile
#
# }
## Set trap after all needed variables set
# trap cleanup SIGTERM EXIT
# tmpfile=/tmp/lynxdump.txt
# if -n $skipidx ]] && $i -eq $skipidx ]]; then
# printf " skipidx %3d] in *; not adding %s
" $skipidx "${cliArray${i}]}"
# else
# testURL="${cliArray$i]}"
# if $testURL =~ / ]]; then
# urlArray$index]=${testURL}
# else
# urlArray$index]="$baseURL/$testURL"
# fi
# printf " Added \$urlArray %3d] - %s
" $index "${urlArray${index}]}"
# ((index++))
# fi
## Check flags to determine how links should be parsed and build dumpstring
# if flagRPM -eq 1 ]]; then
# flagV -eq 1 ]] && echo " getdump: $url, flagRPM = 1"
# # echo -e " --> using RPM parse"
# dumpSTR="grep -e '^.*rpm$' | grep http | sed -e 's/^.*\shttp/http/'"
# elif flagDir -eq 1 ]]; then
# flagV -eq 1 ]] && echo " getdump: $url, allow directories"
# # echo -e " --> using parse that allows subdirectories"
# dumpSTR="grep http | grep -v =] | sed -e 's/^.*\shttp/http/'"
# else
# flagV -eq 1 ]] && echo " getdump: $url, default"
# # echo -e " --> using Default parse"
# dumpSTR "grep http | grep -v =] | sed -e 's/^.*\shttp/http/' -e '/\/$/d'"
# fi
#
# ## Perform retrieval of links with lynx -dump and parse with dumpSTR
# if lynx -dump "$url" | "$dumpSTR" >> $ofile; then
# echo -e " lynxdump & parse -- OK
" >&2
# else
# echo -e " lynxdump & parse -- FAILED
" >&2
# fi
To use this script you must install the terminal web program called lynx. Open up YaST / Software Management and search on and install lynx and any required programs it needs.
Thank You,