Source file src/cmd/compile/internal/base/startheap.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package base
     6  
     7  import (
     8  	"fmt"
     9  	"os"
    10  	"runtime"
    11  	"runtime/debug"
    12  	"runtime/metrics"
    13  	"sync"
    14  )
    15  
    16  // forEachGC calls fn each GC cycle until it returns false.
    17  func forEachGC(fn func() bool) {
    18  	type T [32]byte // large enough to avoid runtime's tiny object allocator
    19  	var finalizer func(*T)
    20  	finalizer = func(p *T) {
    21  
    22  		if fn() {
    23  			runtime.SetFinalizer(p, finalizer)
    24  		}
    25  	}
    26  
    27  	finalizer(new(T))
    28  }
    29  
    30  // AdjustStartingHeap modifies GOGC so that GC should not occur until the heap
    31  // grows to the requested size.  This is intended but not promised, though it
    32  // is true-mostly, depending on when the adjustment occurs and on the
    33  // compiler's input and behavior.  Once the live heap is approximately half
    34  // this size, GOGC is reset to its value when AdjustStartingHeap was called;
    35  // subsequent GCs may reduce the heap below the requested size, but this
    36  // function does not affect that.
    37  //
    38  // logHeapTweaks (-d=gcadjust=1) enables logging of GOGC adjustment events.
    39  //
    40  // The temporarily requested GOGC is derated from what would be the "obvious"
    41  // value necessary to hit the starting heap goal because the obvious
    42  // (goal/live-1)*100 value seems to grow RSS a little more than it "should"
    43  // (compared to GOMEMLIMIT, e.g.) and the assumption is that the GC's control
    44  // algorithms are tuned for GOGC near 100, and not tuned for huge values of
    45  // GOGC.  Different derating factors apply for "lo" and "hi" values of GOGC;
    46  // lo is below derateBreak, hi is above derateBreak.  The derating factors,
    47  // expressed as integer percentages, are derateLoPct and derateHiPct.
    48  // 60-75 is an okay value for derateLoPct, 30-65 seems like a good value for
    49  // derateHiPct, and 600 seems like a good value for derateBreak.  If these
    50  // are zero, defaults are used instead.
    51  //
    52  // NOTE: If you think this code would help startup time in your own
    53  // application and you decide to use it, please benchmark first to see if it
    54  // actually works for you (it may not: the Go compiler is not typical), and
    55  // whatever the outcome, please leave a comment on bug #56546.  This code
    56  // uses supported interfaces, but depends more than we like on
    57  // current+observed behavior of the garbage collector, so if many people need
    58  // this feature, we should consider/propose a better way to accomplish it.
    59  func AdjustStartingHeap(requestedHeapGoal, derateBreak, derateLoPct, derateHiPct uint64, logHeapTweaks bool) {
    60  	mp := runtime.GOMAXPROCS(0)
    61  
    62  	const (
    63  		SHgoal   = "/gc/heap/goal:bytes"
    64  		SHcount  = "/gc/cycles/total:gc-cycles"
    65  		SHallocs = "/gc/heap/allocs:bytes"
    66  		SHfrees  = "/gc/heap/frees:bytes"
    67  	)
    68  
    69  	var sample = []metrics.Sample{{Name: SHgoal}, {Name: SHcount}, {Name: SHallocs}, {Name: SHfrees}}
    70  
    71  	const (
    72  		SH_GOAL   = 0
    73  		SH_COUNT  = 1
    74  		SH_ALLOCS = 2
    75  		SH_FREES  = 3
    76  
    77  		MB = 1_000_000
    78  	)
    79  
    80  	// These particular magic numbers are designed to make the RSS footprint of -d=-gcstart=2000
    81  	// resemble that of GOMEMLIMIT=2000MiB GOGC=10000 when building large projects
    82  	// (e.g. the Go compiler itself, and the microsoft's typescript AST package),
    83  	// with the further restriction that these magic numbers did a good job of reducing user-cpu
    84  	// for builds at either gcstart=2000 or gcstart=128.
    85  	//
    86  	// The benchmarking to obtain this was (a version of):
    87  	//
    88  	// for i in {1..50} ; do
    89  	//     for what in std cmd/compile cmd/fix cmd/go github.com/microsoft/typescript-go/internal/ast ; do
    90  	//       whatbase=`basename ${what}`
    91  	//       for sh in 128 2000 ; do
    92  	//         for br in 500 600 ; do
    93  	//           for shlo in 65 70; do
    94  	//             for shhi in 55 60 ; do
    95  	//               benchcmd -n=2 ${whatbase} go build -a \
    96  	//               -gcflags=all=-d=gcstart=${sh},gcstartloderate=${shlo},gcstarthiderate=${shhi},gcstartbreak=${br} \
    97  	//               ${what} | tee -a startheap${sh}_${br}_${shhi}_${shlo}.bench
    98  	//             done
    99  	//           done
   100  	//         done
   101  	//       done
   102  	//     done
   103  	// done
   104  	//
   105  	// benchcmd is "go install github.com/aclements/go-misc/benchcmd@latest"
   106  
   107  	if derateBreak == 0 {
   108  		derateBreak = 600
   109  	}
   110  	if derateLoPct == 0 {
   111  		derateLoPct = 70
   112  	}
   113  	if derateHiPct == 0 {
   114  		derateHiPct = 55
   115  	}
   116  
   117  	gogcDerate := func(myGogc uint64) uint64 {
   118  		if myGogc < derateBreak {
   119  			return (myGogc * derateLoPct) / 100
   120  		}
   121  		return (myGogc * derateHiPct) / 100
   122  	}
   123  
   124  	// Assumptions and observations of Go's garbage collector, as of Go 1.17-1.20:
   125  
   126  	// - the initial heap goal is 4MiB, by fiat.  It is possible for Go to start
   127  	//   with a heap as small as 512k, so this may change in the future.
   128  
   129  	// - except for the first heap goal, heap goal is a function of
   130  	//   observed-live at the previous GC and current GOGC.  After the first
   131  	//   GC, adjusting GOGC immediately updates GOGC; before the first GC,
   132  	//   adjusting GOGC does not modify goal (but the change takes effect after
   133  	//   the first GC).
   134  
   135  	// - the before/after first GC behavior is not guaranteed anywhere, it's
   136  	//   just behavior, and it's a bad idea to rely on it.
   137  
   138  	// - we don't know exactly when GC will run, even after we adjust GOGC; the
   139  	//   first GC may not have happened yet, may have already happened, or may
   140  	//   be currently in progress, and GCs can start for several reasons.
   141  
   142  	// - forEachGC above will run the provided function at some delay after each
   143  	//   GC's mark phase terminates; finalizers are run after marking as the
   144  	//   spans containing finalizable objects are swept, driven by GC
   145  	//   background activity and allocation demand.
   146  
   147  	// - "live at last GC" is not available through the current metrics
   148  	//    interface. Instead, live is estimated by knowing the adjusted value of
   149  	//    GOGC and the new heap goal following a GC (this requires knowing that
   150  	//    at least one GC has occurred):
   151  	//		  estLive = 100 * newGoal / (100 + currentGogc)
   152  	//    this new value of GOGC
   153  	//		  newGogc = 100*requestedHeapGoal/estLive - 100
   154  	//    will result in the desired goal. The logging code checks that the
   155  	//    resulting goal is correct.
   156  
   157  	// There's a small risk that the finalizer will be slow to run after a GC
   158  	// that expands the goal to a huge value, and that this will lead to
   159  	// out-of-memory.  This doesn't seem to happen; in experiments on a variety
   160  	// of machines with a variety of extra loads to disrupt scheduling, the
   161  	// worst overshoot observed was 50% past requestedHeapGoal.
   162  
   163  	metrics.Read(sample)
   164  	for _, s := range sample {
   165  		if s.Value.Kind() == metrics.KindBad {
   166  			// Just return, a slightly slower compilation is a tolerable outcome.
   167  			if logHeapTweaks {
   168  				fmt.Fprintf(os.Stderr, "GCAdjust: Regret unexpected KindBad for metric %s\n", s.Name)
   169  			}
   170  			return
   171  		}
   172  	}
   173  
   174  	// Tinker with GOGC to make the heap grow rapidly at first.
   175  	currentGoal := sample[SH_GOAL].Value.Uint64() // Believe this will be 4MByte or less, perhaps 512k
   176  	myGogc := 100 * requestedHeapGoal / currentGoal
   177  	myGogc = gogcDerate(myGogc)
   178  	if myGogc <= 125 {
   179  		return
   180  	}
   181  
   182  	if logHeapTweaks {
   183  		sample := append([]metrics.Sample(nil), sample...) // avoid races with GC callback
   184  		AtExit(func() {
   185  			metrics.Read(sample)
   186  			goal := sample[SH_GOAL].Value.Uint64()
   187  			count := sample[SH_COUNT].Value.Uint64()
   188  			oldGogc := debug.SetGCPercent(100)
   189  			if oldGogc == 100 {
   190  				fmt.Fprintf(os.Stderr, "GCAdjust: AtExit goal %dMB gogc %d count %d maxprocs %d\n",
   191  					goal/MB, oldGogc, count, mp)
   192  			} else {
   193  				inUse := sample[SH_ALLOCS].Value.Uint64() - sample[SH_FREES].Value.Uint64()
   194  				overPct := 100 * (int(inUse) - int(requestedHeapGoal)) / int(requestedHeapGoal)
   195  				fmt.Fprintf(os.Stderr, "GCAdjust: AtExit goal %dMB gogc %d count %d maxprocs %d overPct %d\n",
   196  					goal/MB, oldGogc, count, mp, overPct)
   197  
   198  			}
   199  		})
   200  	}
   201  
   202  	originalGOGC := debug.SetGCPercent(int(myGogc))
   203  
   204  	// forEachGC finalizers ought not overlap, but they could run in separate threads.
   205  	// This ought not matter, but just in case it bothers the/a race detector,
   206  	// use this mutex.
   207  	var forEachGCLock sync.Mutex
   208  
   209  	adjustFunc := func() bool {
   210  
   211  		forEachGCLock.Lock()
   212  		defer forEachGCLock.Unlock()
   213  
   214  		metrics.Read(sample)
   215  		goal := sample[SH_GOAL].Value.Uint64()
   216  		count := sample[SH_COUNT].Value.Uint64()
   217  
   218  		if goal <= requestedHeapGoal { // Stay the course
   219  			if logHeapTweaks {
   220  				fmt.Fprintf(os.Stderr, "GCAdjust: Reuse GOGC adjust, current goal %dMB, count is %d, current gogc %d\n",
   221  					goal/MB, count, myGogc)
   222  			}
   223  			return true
   224  		}
   225  
   226  		// Believe goal has been adjusted upwards, else it would be less-than-or-equal to requestedHeapGoal
   227  		calcLive := 100 * goal / (100 + myGogc)
   228  
   229  		if 2*calcLive < requestedHeapGoal { // calcLive can exceed requestedHeapGoal!
   230  			myGogc = 100*requestedHeapGoal/calcLive - 100
   231  			myGogc = gogcDerate(myGogc)
   232  
   233  			if myGogc > 125 {
   234  				// Not done growing the heap.
   235  				oldGogc := debug.SetGCPercent(int(myGogc))
   236  
   237  				if logHeapTweaks {
   238  					// Check that the new goal looks right
   239  					inUse := sample[SH_ALLOCS].Value.Uint64() - sample[SH_FREES].Value.Uint64()
   240  					metrics.Read(sample)
   241  					newGoal := sample[SH_GOAL].Value.Uint64()
   242  					pctOff := 100 * (int64(newGoal) - int64(requestedHeapGoal)) / int64(requestedHeapGoal)
   243  					// Check that the new goal is close to requested.  3% of make.bash fails this test.  Why, TBD.
   244  					if pctOff < 2 {
   245  						fmt.Fprintf(os.Stderr, "GCAdjust: Retry GOGC adjust, current goal %dMB, count is %d, gogc was %d, is now %d, calcLive %dMB pctOff %d\n",
   246  							goal/MB, count, oldGogc, myGogc, calcLive/MB, pctOff)
   247  					} else {
   248  						// The GC is being annoying and not giving us the goal that we requested, say more to help understand when/why.
   249  						fmt.Fprintf(os.Stderr, "GCAdjust: Retry GOGC adjust, current goal %dMB, count is %d, gogc was %d, is now %d, calcLive %dMB pctOff %d inUse %dMB\n",
   250  							goal/MB, count, oldGogc, myGogc, calcLive/MB, pctOff, inUse/MB)
   251  					}
   252  				}
   253  				return true
   254  			}
   255  		}
   256  
   257  		// In this case we're done boosting GOGC, set it to its original value and don't set a new finalizer.
   258  		oldGogc := debug.SetGCPercent(originalGOGC)
   259  		// inUse helps estimate how late the finalizer ran; at the instant the previous GC ended,
   260  		// it was (in theory) equal to the previous GC's heap goal.  In a growing heap it is
   261  		// expected to grow to the new heap goal.
   262  		if logHeapTweaks {
   263  			inUse := sample[SH_ALLOCS].Value.Uint64() - sample[SH_FREES].Value.Uint64()
   264  			overPct := 100 * (int(inUse) - int(requestedHeapGoal)) / int(requestedHeapGoal)
   265  			fmt.Fprintf(os.Stderr, "GCAdjust: Reset GOGC adjust, old goal %dMB, count is %d, gogc was %d, gogc is now %d, calcLive %dMB inUse %dMB overPct %d\n",
   266  				goal/MB, count, oldGogc, originalGOGC, calcLive/MB, inUse/MB, overPct)
   267  		}
   268  		return false
   269  	}
   270  
   271  	forEachGC(adjustFunc)
   272  }
   273  

View as plain text