#!/usr/bin/env python
import random, string, time
def rand():
return start_date + random.random() * (end_date - start_date)
def fmt(seconds):
return '[%s]' % time.ctime(seconds)
def find(seconds):
last = None
for sec in times:
if sec < seconds:
return sec, last
last = sec
datfile = "clamblocks.dat"
count = 0
lines = 100000
samples = 100000
times = []
fp = open(datfile, "r")
while True:
line = fp.readline()
if not line:
break
line = string.split(line[:-1])
times.append(string.atoi(line[5]))
count += 1
if (count == lines):
break
start_date = times[-1]
end_date = times[0]
print "picking random dates between %s and %s" % (fmt(start_date), fmt(end_date))
before_sum = 0
after_sum = 0
count = 0
while True:
t = rand()
before, after = find(t)
before_sum += t - before
after_sum += after - t
count += 1
if count % 1000 == 0:
print ("(%6d) %s is %6.2f seconds after %s (%6.2f) and %6.2f seconds after %s (%6.2f)" %
(count,
fmt(t),
t - before, fmt(before), before_sum / count,
after - t, fmt(after), after_sum / count))
I'm not a python programmer, but in the find routine, it looks like your returning the newer block, and then the older block. but in the main routine, your looking for the older block then the newer block.
Have you tried running the code with a small subset, just so you can manually verify the output.
I would expect the averages to be 1/2 the block time.
Maybe I don't know what I'm talking about.... (Won't be the first time)