#!/usr/bin/python #desp2.py, Dave Pawson; http://www.dpawson.co.uk # Abstract parse a file by unit, and process # according to keyword. # # # Original: 2006-02-06T13:29:58.0Z # :Initial issued # # # Copyright Dave Pawson, 2006 # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # uses searchReplace.py # Generates contacts.xml from Evolution (vsn) contacts file # prog="desp2.py" version="1.0" debug=6 import sys import re import getopt import os from Ft.Xml import MarkupWriter # #Process file through to output, tokenising lines # def processFileLine(fh,writer): for line in fh: pr = 0 # Processed flag #print line[0:3] for kwd, vals in despatcher.iteritems(): p = re.compile(kwd) m = p.match(line) if (m): #If there is a match on line. pr = 1 if (len(m.group()) == len(kwd)): if (debug > 5): print "match on [%s] " \ % (kwd) end=m.end() rest=unicode(line[end:len(line) - 1]) despatcher[kwd](rest,writer) if (len(line) > 1 and (pr == 0)): print "No match for line [%s] " % \ (line[:len(line) - 1]) prCont(line[:len(line) - 1],writer) # # The 'prX' functions have a common model. Name, line of content # and output file writer. This must not change or despatcher # will fail. # # #Process a file in chunks; That's between two 'gappy' pieces of text # #from chunks import prChunk def processFileChunks(fh,writer): blankLines = 0 # count of prior blank lines linenum=0 # keep track of lines if needed chunk=[] blankLines = 0 while 1: line = fh.readline() if line: if line != "\n": chunk.append(line) if (debug >6):print "linenum: %d: %s" % (linenum, line) else: blankLines +=1 if (len(chunk) >0): prChunk(chunk, writer, blankLines) blankLines = 0 chunk=[] linenum +=1 else: break # # Process a chunk; That's between two 'gappy' pieces of text # nbl is the count of blank lines # def prChunk (chunk, writer, nbl): if (len(chunk) == 0): print "Null chunk" sys.exit(2) leaderBlanks ="" for i in range(nbl): leaderBlanks+="\n" line = topline(chunk) for kwd,vals in despatcher.iteritems(): p = re.compile(kwd) m = p.match(line) if (m): #If there is a match on line. pr = 1 if (len(m.group()) == len(kwd)): if (debug > 6): print "match on [%s] " \ % (kwd) end=m.end() content=stripLeading(chunk,kwd) #remove kwd fm 1st line rest=unicode(prInlines(content)) despatcher[kwd](rest,writer) if (len(line) > 1 and (pr == 0)): print "No match for line [%s] " % \ (line[:len(line) - 1]) prCont(line[:len(line) - 1],writer) # #Strip leading (chars) + \w{L}from the chunk, # then return the remainder of the chunk # typically used to remove leading keywords and ws # def stripLeading(chunk, chars): tempChunk=chunk line = chunk[0] charsLen = len(chars) if (debug >6): print "stripLead: [%s] " % line[charsLen:].lstrip() tempChunk[0]= line[charsLen:].lstrip() return tempChunk # #Return the top line of the chunk # def topline(chunk): from types import ListType if type(chunk) is ListType: line = chunk[0] return line else: print "Chunk is not a list" print chunk sys.exit(2) # # Process inlines. Generally any input not taking up an entire line, # @param chunk, array of lines of text # Returns the modified, updated, lines, as a chunk # Does not output them to any file. def prInlines(chunk): lines = dechunk(chunk)# Convert to a single long line if (debug > 6):print "prInlines: [%s]" % lines mod2 = escapeSpecials(lines) if (debug > 6):print "prInlines: ["+mod2+"....]" mod3=prText(mod2) #replace escaped characters return mod3 # Replace escaped chars with entities # # def escapeSpecials(str): tmp=str ret=str patt=re.compile(r'&') m=patt.search(str) if m: tmp=string.replace(str,"&","&") patt=re.compile(r'<') m=patt.search(ret) if m: ret = string.replace(tmp,"<","<") return ret # #prText. Process plain text for escaped characters. # def prText(lines): patt = re.compile(r"\\(.)") retval = patt.sub(r"\1",lines) if (debug >6): print "prText: %s ..." % retval[0:20] return retval # # Convert a chunk to a single line, joined with ' '. # from types import * def dechunk(chunk): lines = ' '.join(chunk) lines = lines.replace('\n', ' ') if (debug == 6):print "dechunk ["+lines+"]" if (not (type(lines) is StringType)): print "Dechunk: wrong type: Quitting %s" % typof return lines # # # Process keyword1 # def prkwd1 (line, writer): writer.startElement(u'k1') writer.text(unicode(line.strip())) writer.endElement(u"k1") # # Process the version info # def prVersion (line, writer): writer.startElement(u'ver') writer.text(unicode(line)) writer.endElement(u"ver") # # Process the keyword2 # def prkwd2 (line, writer): writer.startElement(u'k2') writer.text(unicode(line.strip())) writer.endElement(u"k2") # # Process the keyword3 # def prkwd3 (line, writer): writer.startElement(u'k3') writer.text(unicode(line)) writer.endElement(u"k3") # # process as a continuation line. # def prCont(line, writer): writer.startElement(u'cont') writer.text(unicode(line)) writer.endElement(u"cont") # #Specials I don't want. Processed to output though # with 'special' element # def prSpecial(line,writer): writer.startElement(u"special") writer.text(unicode(line)) writer.endElement(u"special") # # Process the version info # def prComment (line, writer): writer.comment(unicode(line)) # #Items which are definately unwanted. # def prDump(line,writer): x=1 # #Basic replace. Process input file through to output file # Add document element # def replaceit(infilename,out): fh = open(infilename) writer=MarkupWriter(out,indent=u"yes") writer.startDocument() writer.startElement(u'doc', attributes={u'src':unicode(infilename)}) processFileLine(fh,writer) writer.endElement(u'doc') # Now process using chunks instead of just lines fh.close() fh = open(infilename) writer.startElement(u'doc', attributes={u'src':unicode(infilename)}) processFileChunks(fh,writer) writer.endElement(u'doc') fh.close() out.close() return None # # # #def initKwds(): for k,v in despatcher.iteritems(): kwds.append(k) # # Print usage instructions # def usage(): print "%s.py Version %s" % (prog,version) print "Usage: \n python %s.py -i -o " % prog print "\t\t Produces an XML file in the output, based on input keywords " # # Main program entry # def main(): if len(sys.argv) < 2: usage() sys.exit(2) print "%s Version %s, " % (prog,version) outfile=sys.stdout infilename = None try: opts, args = getopt.getopt(sys.argv[1:], "h:i:o:", ["help","input=" ,"output="]) except getopt.GetoptError: usage() # print help information and exit: sys.exit(2) if (len(opts) < 2): usage() sys.exit(2) for o,a in opts: if o in ("-i","--input"): infilename=os.path.realpath(a) print "input is ",infilename if o in ("-o","--output"): outfile=a if o in ("-h", "--help"): usage() sys.exit() if not( os.path.isfile(infilename)): sys.stderr.write ("\t\t Error, '%s' is not a file" % infilename) sys.exit(2) if (os.path.isfile(outfile)): sys.stderr.write ("\t\t Warning: %s will be overwritten \n" % outfile) #sys.stderr.write("Press Ctl-C within 2 secs to kill\n\n\t\t ......\n"); import time #time.sleep(1) try: if (outfile is not sys.stdout): if (os.path.isdir(outfile)): sys.stderr.write("Output file must be a writable file\n\n") sys.exit(2) out = open(outfile,'w') else: out = sys.stdout except EnvironmentError: sys.stderr.write("%s not writable, Quitting" %outfile) sys.exit(2) if debug > 6: print "main: %s Version %s, Processing %s to %s" % (prog,version,infilename,outfile) replaceit(infilename,out) # #list holding keywords. #Must match the despatcher # Whose methods must have the same parameters # #Keyword definitions. # kwds = [] kwdsRegex = '(?:' + '|'.join(kwds) + ')' kwdsPatt = re.compile(kwdsRegex) # #Despatcher. Despatch on keyword through to # common function with same parameters. # For convenience, the ordering is the same as # the list of keywords # despatcher = { 'keyword1':prkwd1 ,'keyword2':prkwd2 ,'keyword3':prkwd3 ,'VERSION':prVersion ,'unknown':prSpecial ,'#':prComment } if __name__ == "__main__": main()