I have a batch script and python script combination that I found in LinuxFormat for searching and replacing in a OpenOffice file. It changes quote formating for when you cut and paste documents not created in OpenOffice. Might as well post it:
Code:
#!/bin/bash
# Script to change quotes in Open Office
cd ~/bin/odtscript
TMPDIR=/tmp/ODFfixit.$(date +%y%m%d.%H%M%S).$$
if rm -rf $TMPDIR && mkdir $TMPDIR; then
****: #be happy
else
****echo >&2 "Can't (re)create $TMPDIR; aborting"
****exit 1
fi
OLDFILE=$1
NEWFILE=$2
if [[ $# -eq 2 ]] &&
****touch $NEWFILE && rm -f $NEWFILE &&
****unzip -q $OLDFILE -d $TMPDIR; then
****: # All goog
else
****echo >&2 "Usage: $0 OLDFILE NEWFILE"
****rm -rf $TMPDIR
****exit 1
fi
F=$(unzip -l $OLDFILE | sed -n '/:[0-9][0-9]/s|^.*:.. *||p')
if echo "$F" | grep -q '^content\.xml$'; then
****: # good
else
****echo >&2 "content.xml not in $OLDFILE; aborting"
****exit 1
fi
mv $TMPDIR/content.xml $TMPDIR/OLDcontent.xml
if ./fixit.py $TMPDIR/OLDcontent.xml > $TMPDIR/content.xml; then
****: # worked
else
****echo ?&2 "fixit.py failed in $TMPDIR; aborting"
****exit 1
fi
if (cd $TMPDIR; zip -q - $F) | cat > $NEWFILE; then
****# worked?
****rm -rf $TMPDIR
else
****#something bad
****echo >&2 "zip failed in $TMPDIR on $F"
fi
Code:
#!/usr/bin/python -tt
import xml.dom.minidom
import sys
import re
DEBUG = 0
def dprint(what):
****if DEBUG == 0 :**return
****sys.stderr.write(what.encode('ascii','replace') + '\n')
emDash =u'\u2014'
enDash =u'\u2013'
sDquote=u'\u201c'
eDquote=u'\u201d'
sSquote=u'\u2018'
eSquote=u'\u2019'
sDpat = re.compile(r'(\A|(?<=\s))"(?=\S)',re.U)
eDpat = re.compile(r'("\Z)|("(?=\s))', re.U)
sSpat = re.compile(r"(\A|(?<=\s))'(?=\S)", re.U)
eSpat = re.compile(r"(?<=\S)'", re.U)
def fixdata(td, depth):
****dprint("depth=%d: childNode: %s" %(depth, td.data))
****
****td.data = td.data.replace('--', emDash)
****td.data = td.data.replace(enDash, emDash)
****td.data = sDpat.sub(sDquote, td.data)
****td.data = eDpat.sub(eDquote, td.data)
****td.data = sSpat.sub(sSquote, td.data)
****td.data = eSpat.sub(eSquote, td.data)
def handle_xml_tree(aNode, depth):
****if aNode.hasChildNodes():
********for kid in aNode.childNodes:
************handle_xml_tree(kid, depth+1)
****else:
********if 'data' in dir(aNode):
************fixdata(aNode, depth)
****
def doit(argv):
****doc = xml.dom.minidom.parse(argv[1])
****handle_xml_tree(doc, 0)
****sys.stdout.write(doc.toxml('utf-8'))
****************
if __name__ == "__main__":
****doit(sys.argv)
For an explanation of the Code you might search for archives of old LinuxFormat magazines.
Garvan