trace.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. /*
  2. Copyright 2015 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package trace
  14. import (
  15. "bytes"
  16. "context"
  17. "fmt"
  18. "math/rand"
  19. "sync"
  20. "time"
  21. "k8s.io/klog/v2"
  22. )
  23. var klogV = func(lvl klog.Level) bool {
  24. return klog.V(lvl).Enabled()
  25. }
  26. // Field is a key value pair that provides additional details about the trace.
  27. type Field struct {
  28. Key string
  29. Value interface{}
  30. }
  31. func (f Field) format() string {
  32. return fmt.Sprintf("%s:%v", f.Key, f.Value)
  33. }
  34. func writeFields(b *bytes.Buffer, l []Field) {
  35. for i, f := range l {
  36. b.WriteString(f.format())
  37. if i < len(l)-1 {
  38. b.WriteString(",")
  39. }
  40. }
  41. }
  42. func writeTraceItemSummary(b *bytes.Buffer, msg string, totalTime time.Duration, startTime time.Time, fields []Field) {
  43. b.WriteString(fmt.Sprintf("%q ", msg))
  44. if len(fields) > 0 {
  45. writeFields(b, fields)
  46. b.WriteString(" ")
  47. }
  48. b.WriteString(fmt.Sprintf("%vms (%v)", durationToMilliseconds(totalTime), startTime.Format("15:04:05.000")))
  49. }
  50. func durationToMilliseconds(timeDuration time.Duration) int64 {
  51. return timeDuration.Nanoseconds() / 1e6
  52. }
  53. type traceItem interface {
  54. // rLock must be called before invoking time or writeItem.
  55. rLock()
  56. // rUnlock must be called after processing the item is complete.
  57. rUnlock()
  58. // time returns when the trace was recorded as completed.
  59. time() time.Time
  60. // writeItem outputs the traceItem to the buffer. If stepThreshold is non-nil, only output the
  61. // traceItem if its the duration exceeds the stepThreshold.
  62. // Each line of output is prefixed by formatter to visually indent nested items.
  63. writeItem(b *bytes.Buffer, formatter string, startTime time.Time, stepThreshold *time.Duration)
  64. }
  65. type traceStep struct {
  66. stepTime time.Time
  67. msg string
  68. fields []Field
  69. }
  70. // rLock doesn't need to do anything because traceStep instances are immutable.
  71. func (s traceStep) rLock() {}
  72. func (s traceStep) rUnlock() {}
  73. func (s traceStep) time() time.Time {
  74. return s.stepTime
  75. }
  76. func (s traceStep) writeItem(b *bytes.Buffer, formatter string, startTime time.Time, stepThreshold *time.Duration) {
  77. stepDuration := s.stepTime.Sub(startTime)
  78. if stepThreshold == nil || *stepThreshold == 0 || stepDuration >= *stepThreshold || klogV(4) {
  79. b.WriteString(fmt.Sprintf("%s---", formatter))
  80. writeTraceItemSummary(b, s.msg, stepDuration, s.stepTime, s.fields)
  81. }
  82. }
  83. // Trace keeps track of a set of "steps" and allows us to log a specific
  84. // step if it took longer than its share of the total allowed time
  85. type Trace struct {
  86. // constant fields
  87. name string
  88. fields []Field
  89. startTime time.Time
  90. parentTrace *Trace
  91. // fields guarded by a lock
  92. lock sync.RWMutex
  93. threshold *time.Duration
  94. endTime *time.Time
  95. traceItems []traceItem
  96. }
  97. func (t *Trace) rLock() {
  98. t.lock.RLock()
  99. }
  100. func (t *Trace) rUnlock() {
  101. t.lock.RUnlock()
  102. }
  103. func (t *Trace) time() time.Time {
  104. if t.endTime != nil {
  105. return *t.endTime
  106. }
  107. return t.startTime // if the trace is incomplete, don't assume an end time
  108. }
  109. func (t *Trace) writeItem(b *bytes.Buffer, formatter string, startTime time.Time, stepThreshold *time.Duration) {
  110. if t.durationIsWithinThreshold() || klogV(4) {
  111. b.WriteString(fmt.Sprintf("%v[", formatter))
  112. writeTraceItemSummary(b, t.name, t.TotalTime(), t.startTime, t.fields)
  113. if st := t.calculateStepThreshold(); st != nil {
  114. stepThreshold = st
  115. }
  116. t.writeTraceSteps(b, formatter+" ", stepThreshold)
  117. b.WriteString("]")
  118. return
  119. }
  120. // If the trace should not be written, still check for nested traces that should be written
  121. for _, s := range t.traceItems {
  122. if nestedTrace, ok := s.(*Trace); ok {
  123. nestedTrace.writeItem(b, formatter, startTime, stepThreshold)
  124. }
  125. }
  126. }
  127. // New creates a Trace with the specified name. The name identifies the operation to be traced. The
  128. // Fields add key value pairs to provide additional details about the trace, such as operation inputs.
  129. func New(name string, fields ...Field) *Trace {
  130. return &Trace{name: name, startTime: time.Now(), fields: fields}
  131. }
  132. // Step adds a new step with a specific message. Call this at the end of an execution step to record
  133. // how long it took. The Fields add key value pairs to provide additional details about the trace
  134. // step.
  135. func (t *Trace) Step(msg string, fields ...Field) {
  136. t.lock.Lock()
  137. defer t.lock.Unlock()
  138. if t.traceItems == nil {
  139. // traces almost always have less than 6 steps, do this to avoid more than a single allocation
  140. t.traceItems = make([]traceItem, 0, 6)
  141. }
  142. t.traceItems = append(t.traceItems, traceStep{stepTime: time.Now(), msg: msg, fields: fields})
  143. }
  144. // Nest adds a nested trace with the given message and fields and returns it.
  145. // As a convenience, if the receiver is nil, returns a top level trace. This allows
  146. // one to call FromContext(ctx).Nest without having to check if the trace
  147. // in the context is nil.
  148. func (t *Trace) Nest(msg string, fields ...Field) *Trace {
  149. newTrace := New(msg, fields...)
  150. if t != nil {
  151. newTrace.parentTrace = t
  152. t.lock.Lock()
  153. t.traceItems = append(t.traceItems, newTrace)
  154. t.lock.Unlock()
  155. }
  156. return newTrace
  157. }
  158. // Log is used to dump all the steps in the Trace. It also logs the nested trace messages using indentation.
  159. // If the Trace is nested it is not immediately logged. Instead, it is logged when the trace it is nested within
  160. // is logged.
  161. func (t *Trace) Log() {
  162. endTime := time.Now()
  163. t.lock.Lock()
  164. t.endTime = &endTime
  165. t.lock.Unlock()
  166. // an explicit logging request should dump all the steps out at the higher level
  167. if t.parentTrace == nil { // We don't start logging until Log or LogIfLong is called on the root trace
  168. t.logTrace()
  169. }
  170. }
  171. // LogIfLong only logs the trace if the duration of the trace exceeds the threshold.
  172. // Only steps that took longer than their share or the given threshold are logged.
  173. // If klog is at verbosity level 4 or higher and the trace took longer than the threshold,
  174. // all substeps and subtraces are logged. Otherwise, only those which took longer than
  175. // their own threshold.
  176. // If the Trace is nested it is not immediately logged. Instead, it is logged when the trace it
  177. // is nested within is logged.
  178. func (t *Trace) LogIfLong(threshold time.Duration) {
  179. t.lock.Lock()
  180. t.threshold = &threshold
  181. t.lock.Unlock()
  182. t.Log()
  183. }
  184. // logTopLevelTraces finds all traces in a hierarchy of nested traces that should be logged but do not have any
  185. // parents that will be logged, due to threshold limits, and logs them as top level traces.
  186. func (t *Trace) logTrace() {
  187. t.lock.RLock()
  188. defer t.lock.RUnlock()
  189. if t.durationIsWithinThreshold() {
  190. var buffer bytes.Buffer
  191. traceNum := rand.Int31()
  192. totalTime := t.endTime.Sub(t.startTime)
  193. buffer.WriteString(fmt.Sprintf("Trace[%d]: %q ", traceNum, t.name))
  194. if len(t.fields) > 0 {
  195. writeFields(&buffer, t.fields)
  196. buffer.WriteString(" ")
  197. }
  198. // if any step took more than it's share of the total allowed time, it deserves a higher log level
  199. buffer.WriteString(fmt.Sprintf("(%v) (total time: %vms):", t.startTime.Format("02-Jan-2006 15:04:05.000"), totalTime.Milliseconds()))
  200. stepThreshold := t.calculateStepThreshold()
  201. t.writeTraceSteps(&buffer, fmt.Sprintf("\nTrace[%d]: ", traceNum), stepThreshold)
  202. buffer.WriteString(fmt.Sprintf("\nTrace[%d]: [%v] [%v] END\n", traceNum, t.endTime.Sub(t.startTime), totalTime))
  203. klog.Info(buffer.String())
  204. return
  205. }
  206. // If the trace should not be logged, still check if nested traces should be logged
  207. for _, s := range t.traceItems {
  208. if nestedTrace, ok := s.(*Trace); ok {
  209. nestedTrace.logTrace()
  210. }
  211. }
  212. }
  213. func (t *Trace) writeTraceSteps(b *bytes.Buffer, formatter string, stepThreshold *time.Duration) {
  214. lastStepTime := t.startTime
  215. for _, stepOrTrace := range t.traceItems {
  216. stepOrTrace.rLock()
  217. stepOrTrace.writeItem(b, formatter, lastStepTime, stepThreshold)
  218. lastStepTime = stepOrTrace.time()
  219. stepOrTrace.rUnlock()
  220. }
  221. }
  222. func (t *Trace) durationIsWithinThreshold() bool {
  223. if t.endTime == nil { // we don't assume incomplete traces meet the threshold
  224. return false
  225. }
  226. return t.threshold == nil || *t.threshold == 0 || t.endTime.Sub(t.startTime) >= *t.threshold
  227. }
  228. // TotalTime can be used to figure out how long it took since the Trace was created
  229. func (t *Trace) TotalTime() time.Duration {
  230. return time.Since(t.startTime)
  231. }
  232. // calculateStepThreshold returns a threshold for the individual steps of a trace, or nil if there is no threshold and
  233. // all steps should be written.
  234. func (t *Trace) calculateStepThreshold() *time.Duration {
  235. if t.threshold == nil {
  236. return nil
  237. }
  238. lenTrace := len(t.traceItems) + 1
  239. traceThreshold := *t.threshold
  240. for _, s := range t.traceItems {
  241. nestedTrace, ok := s.(*Trace)
  242. if ok {
  243. nestedTrace.lock.RLock()
  244. if nestedTrace.threshold != nil {
  245. traceThreshold = traceThreshold - *nestedTrace.threshold
  246. lenTrace--
  247. }
  248. nestedTrace.lock.RUnlock()
  249. }
  250. }
  251. // the limit threshold is used when the threshold(
  252. //remaining after subtracting that of the child trace) is getting very close to zero to prevent unnecessary logging
  253. limitThreshold := *t.threshold / 4
  254. if traceThreshold < limitThreshold {
  255. traceThreshold = limitThreshold
  256. lenTrace = len(t.traceItems) + 1
  257. }
  258. stepThreshold := traceThreshold / time.Duration(lenTrace)
  259. return &stepThreshold
  260. }
  261. // ContextTraceKey provides a common key for traces in context.Context values.
  262. type ContextTraceKey struct{}
  263. // FromContext returns the trace keyed by ContextTraceKey in the context values, if one
  264. // is present, or nil If there is no trace in the Context.
  265. // It is safe to call Nest() on the returned value even if it is nil because ((*Trace)nil).Nest returns a top level
  266. // trace.
  267. func FromContext(ctx context.Context) *Trace {
  268. if v, ok := ctx.Value(ContextTraceKey{}).(*Trace); ok {
  269. return v
  270. }
  271. return nil
  272. }
  273. // ContextWithTrace returns a context with trace included in the context values, keyed by ContextTraceKey.
  274. func ContextWithTrace(ctx context.Context, trace *Trace) context.Context {
  275. return context.WithValue(ctx, ContextTraceKey{}, trace)
  276. }