#!/bin/bash # # pak-web (was txt2gzip) -- 2008-08-21, last edit 2008-09-13 # # part of the pak-web-site project, this is a wrapper script for the # pak-web-site gawk script to coordinate the compression of web and # text files as well as provde a restore option # # Copyright (C) 2008 Grant Coady GPLv2 # # Home site: http://bugsplatter.id.au/pakweb/ # # call: # to shrink the web site and compress .txt files: # pak-web # # to shrink a single page: # pak-web usedir # # to restore the web site and remove gzipped .txt files: # pak-web restore # # adjust to suit your site web_doc_root="/home/web/bugsplatter" PATH="/usr/local/bin:$PATH" # for root cron job (junkshow) # gzrate -- how to display shrink vs source filesize # 1: saved: (filesize - gzipsize) / filesize # 0: by size: gzipsize / filesize gzrate=1 rename_src2html() # - { find . -name "*.html.src" | while read name do printf " mv -f %-40s %s\n" $name ${name%.*} mv -f $name ${name%.*} done echo "" } restore_html_files() # "-" or pak-web caller's $PWD { echo " restore *.html.src files" if [ -z "$1" ] then rename_src2html else [ -d $1 ] || exit 1 cd $1 && rename_src2html && cd - > /dev/null fi } # remove all gzipped files, restore site for random review and editing restore_web_site() # - { echo " pak-web remove gzipped files" find . -name "*.html.gz" -delete find . -name "*.txt.gz" | xargs rm -f rm -f bugs.css.gz rm -f msie.css.gz restore_html_files $1 } gzip_text_files() # "" or pak-web caller's $PWD { echo "gzip text files" if [ -n "$1" ] then [ -d $1 ] || exit 1 cd $1 || exit 1 fi [ -r bugs.css ] && gzip -c bugs.css > bugs.css.gz [ -r msie.css ] && gzip -c msie.css > msie.css.gz find . -name "*.txt" | \ grep -v networkmonitor | \ grep -v public | \ grep -v robots.txt | \ while read name do gzip -c $name > $name.gz done } # drive the pak-web-site script, needs pak-web-scan to make a list of ssi # include files and the find command to provide a list of target files. # may be called from below the $doc_root directory to refresh single web # page, examples: junkshow for hourly page updates, netdraw for the after # midnight summary image updates shrink_web_site() # "" or pak-web caller's $PWD { if [ -n "$1" ] then [ -d $1 ] || exit 1 cd $1 || exit 1 local ssi_list="$(pak-web-scan)" cd - > /dev/null local thisdir=$(basename $1) pak-web-site -v gzrate=$gzrate -v ssi_list="$ssi_list" \ $(find . -name "*.html" -type f | \ grep $thisdir | sort | \ egrep -v 'archive|public') else pak-web-site -v gzrate=$gzrate -v ssi_list="$(pak-web-scan)" \ $(find . -name "*.html" -type f | \ sort | egrep -v 'archive|public') fi } # script driver callerpwd=$PWD cd $web_doc_root || exit 1 case $1 in -d|dir) # rebuild current directory and below only restore_html_files $callerpwd shrink_web_site $callerpwd gzip_text_files $callerpwd ;; -r|restore) # undo all merged web pages and remove gzipped text files restore_web_site ;; -u|usedir) [ -d $2 ] || exit restore_html_files $2 shrink_web_site $2 gzip_text_files $2 ;; *) # default is to shrink entire web site and gzip text files shrink_web_site gzip_text_files ;; esac # end