File Comparison With Some DB Bells and Whistles
Python November Post-A-Day | 2009-11-04 |
Right. So yesterday's post was mind-numbingly simple. But that's the just the file comparison. The script that I actually use also hits the db and generates a slew of additional updates for me, based on the results of that diff.
#!/usr/bin/python
# usage: ./compare.py pwdata102409.txt pwdata103109.txt used_codes_wk6
import sys, datetime, MySQLdb as Database
todaysdate = datetime.datetime.now().strftime("%Y%m%d")
file1 = sys.argv[1]
file2 = sys.argv[2]
# for archiving purposes, we keep each week's changes in a separate table
# - it's a little weird, I know, but we have our reasons - don't ask
thisweekstable = sys.argv[3]
# a list to hold all the updated codes
diffcodes = []
def writeChanges():
f1 = open(file1, "r")
f2 = open(file2, "r")
fileOne = f1.readlines()
fileTwo = f2.readlines()
f1.close()
f2.close()
outFile1 = open(todaysdate + "_results.txt", "w")
outFile2 = open(todaysdate + "_updates.txt", "w")
x = 0
for i in fileOne:
if i != fileTwo[x]:
outFile1.write(i.rstrip() + " <> " + fileTwo[x].rstrip() + "\n")
code = fileTwo[x].split("|")[0]
codes_used = fileTwo[x].split("|")[3]
codes_limit = fileTwo[x].split("|")[4].rstrip()
outFile2.write("UPDATE used_codes SET codes_used='" + codes_used + "', code_limit='" + code_limit + "' WHERE passcode = '" + code + "';" + "\n")
# take the code that's been split out from the record in fileTwo and add it to the list
diffcodes.append(code)
x += 1
outFile1.close()
outFile2.close()
print "number of records changed: " + str(x)
return diffcodes
def writeUpdates():
outFile3 = open(todaysdate + "_doubleused.txt", "w")
outFile4 = open(todaysdate + "_singleused.txt", "w")
db = Database.connect("server", "username", "password", "dbname")
cursor = db.cursor()
# convert diffcodes to a simple comma-delimited sequence before passing them to the select
codes = "', '".join(map(str, diffcodes))
# for codes that come back with a count greater than one,
# update the main coupons table to set the used value to 8
sqlOne = """SELECT passcode FROM %s WHERE passcode IN ('%s')
GROUP BY passcode HAVING COUNT(passcode) > '1'""" %(thisweekstable, codes)
cursor.execute(sqlOne)
double = cursor.fetchall()
doubles = []
for code in double:
doubles.append(code[0])
doubleused = "', '".join(map(str, doubles))
# again, yeah, I'm writing the updates to a file before running them against production
# - it's just a quirk, I like to eyeball everything first, then run updates from the command line
outFile3.write("UPDATE coupons SET used = '8' WHERE code IN (" + doubleused + ");")
# for the remaining codes, those with a count equal to 1
sqlTwo = """SELECT passcode FROM %s WHERE passcode IN ('%s')
GROUP BY passcode HAVING COUNT(passcode) = '1'""" %(thisweekstable, codes)
cursor.execute(sqlTwo)
single = cursor.fetchall()
singles = []
for code in single:
singles.append(code[0])
singlecodes = "', '".join(map(str, singles))
# do another select to identify a subset that meets one of two conditions
# my 'end_date' column is a date type, so I'm passing in a YYYY-MM-DD date to do the comparison,
# where I might otherwise just use UNIX_TIMESTAMP()
currentdate = datetime.date.today()
sqlThree = """SELECT passcode FROM %s WHERE ((code_limit = '1' AND end_date < '%s')
OR (codes_used = '4' AND code_limit != '1'))
AND passcode IN ('%s')""" %(thisweekstable, currentdate, singlecodes)
cursor.execute(sqlThree)
sused = cursor.fetchall()
ssused = []
for code in sused:
ssused.append(code[0])
singleused = "', '".join(map(str, ssused))
outFile4.write("UPDATE coupons SET used = '4' WHERE code IN ('" + singleused + "');")
cursor.close()
db.close()
outFile3.close()
outFile4.close()
writeChanges()
writeUpdates()