-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathgenDist2.py
More file actions
executable file
·216 lines (175 loc) · 7.41 KB
/
genDist2.py
File metadata and controls
executable file
·216 lines (175 loc) · 7.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#! /usr/bin/python
"""
Generates a PDF/CDF from the output of the parser. Stdout shows a summary of
the data that can be --graphable, latex --tabularSummary, or human readable and
the CDF/PDF statistics are output to OUTPUT_FILE.
Usage:
genDist2.py [-h] [--graphable] FILE [OUTPUT_FILE]
genDist2.py [-h] --tabularSummary [FILES...]
Options:
-h --help Show this help messages
--graphable Prints the average log message statistics (i.e. average
static characters, int specifiers, etc) to stdout that
can be directly used in gnuplot, split by number of
dynamic variables
Without this parameter, it is human readable only
--tabularSummary Prints the log message statistics aggregated by
system in a format that can be directly imported
into a latex file.
FILE File to process
FILES Files to process
OUTPUT_FILE Output file to store CDF-PDF statistics to
"""
import re, os
from docopt import docopt
def processFile(filename, outputFile=None):
numLogs = 0
totalInts = 0
totalFloats = 0
totalStrings = 0
totalSpecials = 0
totalStaticChars = 0
systemName = os.path.splitext(os.path.basename(filename))[0]
pattern = re.compile("(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(.+)")
with open(filename) as iFile:
staticChars2Count = {}
dnyaVarsCount = {}
dynaVars2staticTotal = {}
dynaVars2Ints = {}
dynaVars2Floats = {}
dynaVars2Strings = {}
dynaVars2Special = {}
cdfPdfString = ""
for line in iFile.readlines():
if line[0] == '#':
continue
match = pattern.match(line)
if match:
staticChars = int(match.group(1))
dynaVars = int(match.group(2))
numInts = int(match.group(3))
numFloats = int(match.group(4))
numStrings = int(match.group(5))
numSpecial = int(match.group(6))
logStr = match.group(7)
if not staticChars2Count.get(staticChars):
staticChars2Count[staticChars] = 0
if not dnyaVarsCount.get(dynaVars):
dnyaVarsCount[dynaVars] = 0
dynaVars2staticTotal[dynaVars] = 0
dynaVars2Ints[dynaVars] = 0
dynaVars2Floats[dynaVars] = 0
dynaVars2Strings[dynaVars] = 0
dynaVars2Special[dynaVars] = 0
staticChars2Count[staticChars] += 1
dnyaVarsCount[dynaVars] += 1
dynaVars2staticTotal[dynaVars] += staticChars
dynaVars2Ints[dynaVars] += numInts
dynaVars2Floats[dynaVars] += numFloats
dynaVars2Strings[dynaVars] += numStrings
dynaVars2Special[dynaVars] += numSpecial
numLogs += 1
totalInts += numInts
totalFloats += numFloats
totalStrings += numStrings
totalSpecials += numSpecial
totalStaticChars += staticChars
cdfPdfString += "# %-8s%-10s%-10s\r\n" % ("Key", "PDF", "CDF")
totalDynamics = totalInts + totalFloats + totalStrings + totalSpecials
totalCount = 0
totalBytes = 0
numStaticChars = staticChars2Count.keys()
numStaticChars.sort(key=int)
for staticChars in numStaticChars:
count = staticChars2Count[staticChars]
totalCount += int(count)
totalBytes += int(staticChars) * int(count)
cdfPdfString += "%-10s%-10s%-10d\r\n" % (staticChars, count, totalCount)
if outputFile:
with open(outputFile, 'w') as oFile:
oFile.write(cdfPdfString)
if arguments['--tabularSummary']:
print "% System Name & Average Static Chars & Avg ints & Avg floats & Avg strings & Avg Specials & Num Logs"
out = "%s & %5.2lf & %5.2lf & %5.2lf & %5.2lf & %5.2lf & %d \\\\" % (
systemName,
totalStaticChars * 1.0 / numLogs,
totalInts * 1.0 / numLogs,
totalFloats * 1.0 / numLogs,
totalStrings * 1.0 / numLogs,
totalSpecials * 1.0 / numLogs,
numLogs
)
print out
else:
if not arguments['--graphable']:
print "%-30s: Average log length was %.2lf characters per message" % (
filename, totalBytes * 1.0 / totalCount)
for dynaVars in dnyaVarsCount.keys():
totalStatic = dynaVars2staticTotal[dynaVars]
count = 1.0 * dnyaVarsCount[dynaVars]
totalDynaTypes = 0.01 * (
dynaVars2Ints[dynaVars] + dynaVars2Floats[dynaVars] +
dynaVars2Strings[dynaVars] + dynaVars2Special[dynaVars])
if totalDynaTypes == 0:
totalDynaTypes = 0.01 # Avoids division by 0
print "\t For %-2d dynamic variables, average static was %5.1lf (%4d/%4d) " \
"=> %3.0lf%% %3d Ints %3.0lf%% %3d Floats %3.0lf%% %3d Strings %3.0lf%% %3d Others" % (
dynaVars, totalStatic / count, totalStatic, count,
dynaVars2Ints[dynaVars] / totalDynaTypes,
dynaVars2Ints[dynaVars],
dynaVars2Floats[dynaVars] / totalDynaTypes,
dynaVars2Floats[dynaVars],
dynaVars2Strings[dynaVars] / totalDynaTypes,
dynaVars2Strings[dynaVars],
dynaVars2Special[dynaVars] / totalDynaTypes,
dynaVars2Special[dynaVars])
print "# Total number of log statements processed is %u" % numLogs
print "# Average log message has %.2lf static characters/message, %5.2lf ints %5.2lf floats %5.2lf strings and %5.2lf specials\r\n" % (
totalStaticChars * 1.0 / numLogs,
totalInts * 1.0 / numLogs,
totalFloats * 1.0 / numLogs,
totalStrings * 1.0 / numLogs,
totalSpecials * 1.0 / numLogs,
)
else:
print "# Input file: " + filename
print "# %-20s %15s %15s %15s %15s %15s %15s %15s" % (
"System", "Dynamic Vars", "Num Logs", "Avg Static", "Avg Ints",
"Avg Floats", "Avg Strings", "Avg Others")
for dynaVars in dnyaVarsCount.keys():
totalStatic = dynaVars2staticTotal[dynaVars]
count = 1.0 * dnyaVarsCount[dynaVars]
totalDynaTypes = 0.01 * (
dynaVars2Ints[dynaVars] + dynaVars2Floats[dynaVars] +
dynaVars2Strings[dynaVars] + dynaVars2Special[dynaVars])
if totalDynaTypes == 0:
totalDynaTypes = 0.01 # Avoids division by 0
print "%-20s %15d %15d %15.2lf %15.2lf %15.2lf %15.2lf %15.2lf" % (
systemName,
dynaVars, count, totalStatic / count,
dynaVars2Ints[dynaVars] / count,
dynaVars2Floats[dynaVars] / count,
dynaVars2Strings[dynaVars] / count,
dynaVars2Special[dynaVars] / count,
)
# Two new lines for gnuplot to separate this as an index
print ""
print ""
if __name__ == "__main__":
arguments = docopt(__doc__, version='PDF/CDF Generator v1')
filename = arguments['FILE']
outputFile = arguments['OUTPUT_FILE']
if arguments['--tabularSummary']:
print """
\\resizebox{\\columnwidth}{!}{%
\\centering
\\setlength\\tabcolsep{4pt}
\\begin{tabular}{ l | r | r | r | r | r | r }
\\textbf{System Name} & \\textbf{\\shortstack[c]{Static\\\\Chars}} & \\textbf{Integers} & \\textbf{Floats} & \\textbf{Strings} & \\textbf{Others} & \\textbf{Logs} \\\\
\\midrule
"""
for file in arguments['FILES']:
processFile(file, outputFile)
print "\\end{tabular}}"
else:
processFile(filename, outputFile)