histogram.go (9425B)
1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package trace 6 7 // This file implements histogramming for RPC statistics collection. 8 9 import ( 10 "bytes" 11 "fmt" 12 "html/template" 13 "log" 14 "math" 15 "sync" 16 17 "golang.org/x/net/internal/timeseries" 18 ) 19 20 const ( 21 bucketCount = 38 22 ) 23 24 // histogram keeps counts of values in buckets that are spaced 25 // out in powers of 2: 0-1, 2-3, 4-7... 26 // histogram implements timeseries.Observable 27 type histogram struct { 28 sum int64 // running total of measurements 29 sumOfSquares float64 // square of running total 30 buckets []int64 // bucketed values for histogram 31 value int // holds a single value as an optimization 32 valueCount int64 // number of values recorded for single value 33 } 34 35 // addMeasurement records a value measurement observation to the histogram. 36 func (h *histogram) addMeasurement(value int64) { 37 // TODO: assert invariant 38 h.sum += value 39 h.sumOfSquares += float64(value) * float64(value) 40 41 bucketIndex := getBucket(value) 42 43 if h.valueCount == 0 || (h.valueCount > 0 && h.value == bucketIndex) { 44 h.value = bucketIndex 45 h.valueCount++ 46 } else { 47 h.allocateBuckets() 48 h.buckets[bucketIndex]++ 49 } 50 } 51 52 func (h *histogram) allocateBuckets() { 53 if h.buckets == nil { 54 h.buckets = make([]int64, bucketCount) 55 h.buckets[h.value] = h.valueCount 56 h.value = 0 57 h.valueCount = -1 58 } 59 } 60 61 func log2(i int64) int { 62 n := 0 63 for ; i >= 0x100; i >>= 8 { 64 n += 8 65 } 66 for ; i > 0; i >>= 1 { 67 n += 1 68 } 69 return n 70 } 71 72 func getBucket(i int64) (index int) { 73 index = log2(i) - 1 74 if index < 0 { 75 index = 0 76 } 77 if index >= bucketCount { 78 index = bucketCount - 1 79 } 80 return 81 } 82 83 // Total returns the number of recorded observations. 84 func (h *histogram) total() (total int64) { 85 if h.valueCount >= 0 { 86 total = h.valueCount 87 } 88 for _, val := range h.buckets { 89 total += int64(val) 90 } 91 return 92 } 93 94 // Average returns the average value of recorded observations. 95 func (h *histogram) average() float64 { 96 t := h.total() 97 if t == 0 { 98 return 0 99 } 100 return float64(h.sum) / float64(t) 101 } 102 103 // Variance returns the variance of recorded observations. 104 func (h *histogram) variance() float64 { 105 t := float64(h.total()) 106 if t == 0 { 107 return 0 108 } 109 s := float64(h.sum) / t 110 return h.sumOfSquares/t - s*s 111 } 112 113 // StandardDeviation returns the standard deviation of recorded observations. 114 func (h *histogram) standardDeviation() float64 { 115 return math.Sqrt(h.variance()) 116 } 117 118 // PercentileBoundary estimates the value that the given fraction of recorded 119 // observations are less than. 120 func (h *histogram) percentileBoundary(percentile float64) int64 { 121 total := h.total() 122 123 // Corner cases (make sure result is strictly less than Total()) 124 if total == 0 { 125 return 0 126 } else if total == 1 { 127 return int64(h.average()) 128 } 129 130 percentOfTotal := round(float64(total) * percentile) 131 var runningTotal int64 132 133 for i := range h.buckets { 134 value := h.buckets[i] 135 runningTotal += value 136 if runningTotal == percentOfTotal { 137 // We hit an exact bucket boundary. If the next bucket has data, it is a 138 // good estimate of the value. If the bucket is empty, we interpolate the 139 // midpoint between the next bucket's boundary and the next non-zero 140 // bucket. If the remaining buckets are all empty, then we use the 141 // boundary for the next bucket as the estimate. 142 j := uint8(i + 1) 143 min := bucketBoundary(j) 144 if runningTotal < total { 145 for h.buckets[j] == 0 { 146 j++ 147 } 148 } 149 max := bucketBoundary(j) 150 return min + round(float64(max-min)/2) 151 } else if runningTotal > percentOfTotal { 152 // The value is in this bucket. Interpolate the value. 153 delta := runningTotal - percentOfTotal 154 percentBucket := float64(value-delta) / float64(value) 155 bucketMin := bucketBoundary(uint8(i)) 156 nextBucketMin := bucketBoundary(uint8(i + 1)) 157 bucketSize := nextBucketMin - bucketMin 158 return bucketMin + round(percentBucket*float64(bucketSize)) 159 } 160 } 161 return bucketBoundary(bucketCount - 1) 162 } 163 164 // Median returns the estimated median of the observed values. 165 func (h *histogram) median() int64 { 166 return h.percentileBoundary(0.5) 167 } 168 169 // Add adds other to h. 170 func (h *histogram) Add(other timeseries.Observable) { 171 o := other.(*histogram) 172 if o.valueCount == 0 { 173 // Other histogram is empty 174 } else if h.valueCount >= 0 && o.valueCount > 0 && h.value == o.value { 175 // Both have a single bucketed value, aggregate them 176 h.valueCount += o.valueCount 177 } else { 178 // Two different values necessitate buckets in this histogram 179 h.allocateBuckets() 180 if o.valueCount >= 0 { 181 h.buckets[o.value] += o.valueCount 182 } else { 183 for i := range h.buckets { 184 h.buckets[i] += o.buckets[i] 185 } 186 } 187 } 188 h.sumOfSquares += o.sumOfSquares 189 h.sum += o.sum 190 } 191 192 // Clear resets the histogram to an empty state, removing all observed values. 193 func (h *histogram) Clear() { 194 h.buckets = nil 195 h.value = 0 196 h.valueCount = 0 197 h.sum = 0 198 h.sumOfSquares = 0 199 } 200 201 // CopyFrom copies from other, which must be a *histogram, into h. 202 func (h *histogram) CopyFrom(other timeseries.Observable) { 203 o := other.(*histogram) 204 if o.valueCount == -1 { 205 h.allocateBuckets() 206 copy(h.buckets, o.buckets) 207 } 208 h.sum = o.sum 209 h.sumOfSquares = o.sumOfSquares 210 h.value = o.value 211 h.valueCount = o.valueCount 212 } 213 214 // Multiply scales the histogram by the specified ratio. 215 func (h *histogram) Multiply(ratio float64) { 216 if h.valueCount == -1 { 217 for i := range h.buckets { 218 h.buckets[i] = int64(float64(h.buckets[i]) * ratio) 219 } 220 } else { 221 h.valueCount = int64(float64(h.valueCount) * ratio) 222 } 223 h.sum = int64(float64(h.sum) * ratio) 224 h.sumOfSquares = h.sumOfSquares * ratio 225 } 226 227 // New creates a new histogram. 228 func (h *histogram) New() timeseries.Observable { 229 r := new(histogram) 230 r.Clear() 231 return r 232 } 233 234 func (h *histogram) String() string { 235 return fmt.Sprintf("%d, %f, %d, %d, %v", 236 h.sum, h.sumOfSquares, h.value, h.valueCount, h.buckets) 237 } 238 239 // round returns the closest int64 to the argument 240 func round(in float64) int64 { 241 return int64(math.Floor(in + 0.5)) 242 } 243 244 // bucketBoundary returns the first value in the bucket. 245 func bucketBoundary(bucket uint8) int64 { 246 if bucket == 0 { 247 return 0 248 } 249 return 1 << bucket 250 } 251 252 // bucketData holds data about a specific bucket for use in distTmpl. 253 type bucketData struct { 254 Lower, Upper int64 255 N int64 256 Pct, CumulativePct float64 257 GraphWidth int 258 } 259 260 // data holds data about a Distribution for use in distTmpl. 261 type data struct { 262 Buckets []*bucketData 263 Count, Median int64 264 Mean, StandardDeviation float64 265 } 266 267 // maxHTMLBarWidth is the maximum width of the HTML bar for visualizing buckets. 268 const maxHTMLBarWidth = 350.0 269 270 // newData returns data representing h for use in distTmpl. 271 func (h *histogram) newData() *data { 272 // Force the allocation of buckets to simplify the rendering implementation 273 h.allocateBuckets() 274 // We scale the bars on the right so that the largest bar is 275 // maxHTMLBarWidth pixels in width. 276 maxBucket := int64(0) 277 for _, n := range h.buckets { 278 if n > maxBucket { 279 maxBucket = n 280 } 281 } 282 total := h.total() 283 barsizeMult := maxHTMLBarWidth / float64(maxBucket) 284 var pctMult float64 285 if total == 0 { 286 pctMult = 1.0 287 } else { 288 pctMult = 100.0 / float64(total) 289 } 290 291 buckets := make([]*bucketData, len(h.buckets)) 292 runningTotal := int64(0) 293 for i, n := range h.buckets { 294 if n == 0 { 295 continue 296 } 297 runningTotal += n 298 var upperBound int64 299 if i < bucketCount-1 { 300 upperBound = bucketBoundary(uint8(i + 1)) 301 } else { 302 upperBound = math.MaxInt64 303 } 304 buckets[i] = &bucketData{ 305 Lower: bucketBoundary(uint8(i)), 306 Upper: upperBound, 307 N: n, 308 Pct: float64(n) * pctMult, 309 CumulativePct: float64(runningTotal) * pctMult, 310 GraphWidth: int(float64(n) * barsizeMult), 311 } 312 } 313 return &data{ 314 Buckets: buckets, 315 Count: total, 316 Median: h.median(), 317 Mean: h.average(), 318 StandardDeviation: h.standardDeviation(), 319 } 320 } 321 322 func (h *histogram) html() template.HTML { 323 buf := new(bytes.Buffer) 324 if err := distTmpl().Execute(buf, h.newData()); err != nil { 325 buf.Reset() 326 log.Printf("net/trace: couldn't execute template: %v", err) 327 } 328 return template.HTML(buf.String()) 329 } 330 331 var distTmplCache *template.Template 332 var distTmplOnce sync.Once 333 334 func distTmpl() *template.Template { 335 distTmplOnce.Do(func() { 336 // Input: data 337 distTmplCache = template.Must(template.New("distTmpl").Parse(` 338 <table> 339 <tr> 340 <td style="padding:0.25em">Count: {{.Count}}</td> 341 <td style="padding:0.25em">Mean: {{printf "%.0f" .Mean}}</td> 342 <td style="padding:0.25em">StdDev: {{printf "%.0f" .StandardDeviation}}</td> 343 <td style="padding:0.25em">Median: {{.Median}}</td> 344 </tr> 345 </table> 346 <hr> 347 <table> 348 {{range $b := .Buckets}} 349 {{if $b}} 350 <tr> 351 <td style="padding:0 0 0 0.25em">[</td> 352 <td style="text-align:right;padding:0 0.25em">{{.Lower}},</td> 353 <td style="text-align:right;padding:0 0.25em">{{.Upper}})</td> 354 <td style="text-align:right;padding:0 0.25em">{{.N}}</td> 355 <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .Pct}}%</td> 356 <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .CumulativePct}}%</td> 357 <td><div style="background-color: blue; height: 1em; width: {{.GraphWidth}};"></div></td> 358 </tr> 359 {{end}} 360 {{end}} 361 </table> 362 `)) 363 }) 364 return distTmplCache 365 }