From 66086e3333066852f10f8628b69d8fb7c0a36834 Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Wed, 24 Aug 2016 13:55:23 +0200 Subject: [PATCH 01/21] Introduced CacheLock.forPath() class method This centralises the code for creating a system-wide lock given some path. The function also respects the lock timeout environment variable. I factor this code out into a separate method since I plan to introduce more, finer-grained locks, which will very much use the same logic. --- clcache.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/clcache.py b/clcache.py index 70ed2f6b..6c871987 100644 --- a/clcache.py +++ b/clcache.py @@ -270,6 +270,12 @@ def acquire(self): def release(self): windll.kernel32.ReleaseMutex(self._mutex) + @staticmethod + def forPath(path): + timeoutMs = int(os.environ.get('CLCACHE_OBJECT_CACHE_TIMEOUT_MS', 10 * 1000)) + lockName = path.replace(':', '-').replace('\\', '-') + return CacheLock(lockName, timeoutMs) + class CompilerArtifactsSection(object): def __init__(self, compilerArtifactsSectionDir): @@ -424,9 +430,7 @@ def __init__(self, cacheDirectory=None): ensureDirectoryExists(compilerArtifactsRootDir) self.compilerArtifactsRepository = CompilerArtifactsRepository(compilerArtifactsRootDir) - lockName = self.cacheDirectory().replace(':', '-').replace('\\', '-') - timeoutMs = int(os.environ.get('CLCACHE_OBJECT_CACHE_TIMEOUT_MS', 10 * 1000)) - self.lock = CacheLock(lockName, timeoutMs) + self.lock = CacheLock.forPath(self.cacheDirectory()) self.configuration = Configuration(os.path.join(self.dir, "config.txt")) self.statistics = Statistics(os.path.join(self.dir, "stats.txt")) From 1bc537eb2d6c81e7d076babaf750bc2e1b47bae0 Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Wed, 24 Aug 2016 13:57:26 +0200 Subject: [PATCH 02/21] Introduced (yet unused) locks for artifact/manifest sections The plan is that these locks synchronise access to an individual section of the cache. --- clcache.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clcache.py b/clcache.py index 6c871987..1f9823d3 100644 --- a/clcache.py +++ b/clcache.py @@ -127,6 +127,7 @@ def __str__(self): class ManifestSection(object): def __init__(self, manifestSectionDir): self.manifestSectionDir = manifestSectionDir + self.lock = CacheLock.forPath(self.manifestSectionDir) def manifestPath(self, manifestHash): return os.path.join(self.manifestSectionDir, manifestHash + ".json") @@ -280,6 +281,7 @@ def forPath(path): class CompilerArtifactsSection(object): def __init__(self, compilerArtifactsSectionDir): self.compilerArtifactsSectionDir = compilerArtifactsSectionDir + self.lock = CacheLock.forPath(self.compilerArtifactsSectionDir) def cacheEntryDir(self, key): return os.path.join(self.compilerArtifactsSectionDir, key) From 8b51fdd5f3c39ed832605267c37d82ef6ce102ba Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Wed, 24 Aug 2016 14:23:38 +0200 Subject: [PATCH 03/21] Introduced Statistics.lock field This is a system-wide lock to be used for synchronising accesses to the statistics of a cache. --- clcache.py | 1 + 1 file changed, 1 insertion(+) diff --git a/clcache.py b/clcache.py index 1f9823d3..9536c452 100644 --- a/clcache.py +++ b/clcache.py @@ -557,6 +557,7 @@ class Statistics(object): def __init__(self, statsFile): self._statsFile = statsFile self._stats = None + self.lock = CacheLock.forPath(self._statsFile) def __enter__(self): self._stats = PersistentJSONDict(self._statsFile) From 797d8d4e0c14f0f22eaf93ff74756b689c490c57 Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Wed, 24 Aug 2016 14:25:25 +0200 Subject: [PATCH 04/21] Redefined Cache.lock field in terms of section & statistics locks This reimplements the global 'Cache.lock' lock such that it's defined in terms of the individual section and the statistics locks. This means that acquiring and releasing Cache.lock acquires and releases locks for all sections and for the statistics. This is slower than before (because it requires acquiring and releasing up to 513 locks) but it should be only rarely needed - mostly, when cleaning the cache. --- clcache.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/clcache.py b/clcache.py index 9536c452..8f7859e9 100644 --- a/clcache.py +++ b/clcache.py @@ -10,6 +10,7 @@ import cProfile import codecs from collections import defaultdict, namedtuple +import contextlib import errno import hashlib import json @@ -153,6 +154,18 @@ def getManifest(self, manifestHash): return None +@contextlib.contextmanager +def allSectionsLocked(repository): + sections = list(repository.sections()) + for section in sections: + section.lock.acquire() + try: + yield + finally: + for section in sections: + section.lock.release() + + class ManifestRepository(object): # Bump this counter whenever the current manifest file format changes. # E.g. changing the file format from {'oldkey': ...} to {'newkey': ...} requires @@ -432,11 +445,17 @@ def __init__(self, cacheDirectory=None): ensureDirectoryExists(compilerArtifactsRootDir) self.compilerArtifactsRepository = CompilerArtifactsRepository(compilerArtifactsRootDir) - self.lock = CacheLock.forPath(self.cacheDirectory()) - self.configuration = Configuration(os.path.join(self.dir, "config.txt")) self.statistics = Statistics(os.path.join(self.dir, "stats.txt")) + @property + @contextlib.contextmanager + def lock(self): + with allSectionsLocked(self.manifestRepository), \ + allSectionsLocked(self.compilerArtifactsRepository), \ + self.statistics.lock: + yield + def cacheDirectory(self): return self.dir From ce6ee2d2fcb587101070a137b24e401e3fe0df94 Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Wed, 24 Aug 2016 15:32:03 +0200 Subject: [PATCH 05/21] Use section-specific locks in favor of global Cache.lock This (big) patch attempts to improve concurrency by avoiding the global cache lock. Instead, we will lock only those manifest and compiler artifact sections which we deal with. The only cases where we need to synchronize all concurrenct processes is when updating the statistics file, because there's only a single statistics file. At least for cache hits this could be avoided by tracking the number of cache hits per section, too. To avoid deadlocks, the locks have to acquired in the same order for all execution paths (the order is defined in Cache.lock, i.e. first manifests, then artifacts, then statistics). Hence, locking of the artifact section had to be pulled out of the addObjectToCache() function since that function was called with the stats locked already - a violation of the locking order. Furthermore, we can no longer perform cache.clean() in addObjectToCache() because cache.clean() acquires the global lock, so e.g. this sequence of steps was possible in non-direct mode: 1. 'A' locks cache section 'x' 2. 'A' locks global statistics lock 3. 'B' locks cache section 'y' 4. 'A' needs to lock all sections to do a cleanup At this point, 'B' cannot proceed because 'A' still holds the statistics lock, but 'A' cannot proceed because 'B' still holds the lock on section 'y'. This issue is caused by -- from B's vie -- the statistics lock being locked before a section lock. This must never happen. At the point addObjectToCache() is called, we already have the statistics locked and we know that it may be that the cache size limit was just exceeded, so it's a good moment to determine that a cleanup is needed. It's not a good moment to *perform* the cleanup though. Instead, let the function return a flag which is propagated all the way back to processCompileRequest(). The flag indicates whether cleanup is needed, and if so, processCompileRequest() will acquire Cache.lock (which acquires all sections and statistics in the correct order) to do the cleanup. --- clcache.py | 141 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 84 insertions(+), 57 deletions(-) diff --git a/clcache.py b/clcache.py index 8f7859e9..a377acae 100644 --- a/clcache.py +++ b/clcache.py @@ -1342,37 +1342,49 @@ def parseIncludesSet(compilerOutput, sourceFile, strip): return includesSet, compilerOutput -def addObjectToCache(stats, cache, cachekey, artifacts): +def addObjectToCache(stats, cache, section, cachekey, artifacts): + # This function asserts that the caller locked 'section' and 'stats' + # already and also saves them printTraceStatement("Adding file {} to cache using key {}".format(artifacts.objectFilePath, cachekey)) - cache.compilerArtifactsRepository.section(cachekey).setEntry(cachekey, artifacts) + + section.setEntry(cachekey, artifacts) stats.registerCacheEntry(os.path.getsize(artifacts.objectFilePath)) + with cache.configuration as cfg: - cache.clean(stats, cfg.maximumCacheSize()) + return stats.currentCacheSize() >= cfg.maximumCacheSize() def processCacheHit(cache, objectFile, cachekey): - with cache.statistics as stats: - stats.registerCacheHit() printTraceStatement("Reusing cached object for key {} for object file {}".format(cachekey, objectFile)) - if os.path.exists(objectFile): - os.remove(objectFile) + section = cache.compilerArtifactsRepository.section(cachekey) - cachedArtifacts = section.getEntry(cachekey) - copyOrLink(cachedArtifacts.objectFilePath, objectFile) - printTraceStatement("Finished. Exit code 0") - return 0, cachedArtifacts.stdout, cachedArtifacts.stderr + with section.lock: + with cache.statistics.lock, cache.statistics as stats: + stats.registerCacheHit() + + if os.path.exists(objectFile): + os.remove(objectFile) + + cachedArtifacts = section.getEntry(cachekey) + copyOrLink(cachedArtifacts.objectFilePath, objectFile) + printTraceStatement("Finished. Exit code 0") + return 0, cachedArtifacts.stdout, cachedArtifacts.stderr, False def postprocessObjectEvicted(cache, objectFile, cachekey, compilerResult): printTraceStatement("Cached object already evicted for key {} for object {}".format(cachekey, objectFile)) returnCode, compilerOutput, compilerStderr = compilerResult - with cache.lock, cache.statistics as stats: + cleanupRequired = False + + section = cache.compilerArtifactsRepository.section(cachekey) + with section.lock, cache.statistics.lock, cache.statistics as stats: stats.registerEvictedMiss() if returnCode == 0 and os.path.exists(objectFile): - addObjectToCache(stats, cache, cachekey, CompilerArtifacts(objectFile, compilerOutput, compilerStderr)) + artifacts = CompilerArtifacts(objectFile, compilerOutput, compilerStderr) + cleanupRequired = addObjectToCache(stats, cache, section, cachekey, artifacts) - return compilerResult + return compilerResult + (cleanupRequired,) def createManifest(manifestHash, includePaths): @@ -1403,13 +1415,16 @@ def postprocessHeaderChangedMiss( if returnCode == 0 and os.path.exists(objectFile): manifest, cachekey = createManifest(manifestHash, includePaths) - with cache.lock, cache.statistics as stats: + cleanupRequired = False + section = cache.compilerArtifactsRepository.section(cachekey) + with manifestSection.lock, section.lock, cache.statistics.lock, cache.statistics as stats: stats.registerHeaderChangedMiss() if returnCode == 0 and os.path.exists(objectFile): - addObjectToCache(stats, cache, cachekey, CompilerArtifacts(objectFile, compilerOutput, compilerStderr)) + artifacts = CompilerArtifacts(objectFile, compilerOutput, compilerStderr) + cleanupRequired = addObjectToCache(stats, cache, section, cachekey, artifacts) manifestSection.setManifest(manifestHash, manifest) - return returnCode, compilerOutput, compilerStderr + return returnCode, compilerOutput, compilerStderr, cleanupRequired def postprocessNoManifestMiss( @@ -1423,14 +1438,17 @@ def postprocessNoManifestMiss( if returnCode == 0 and os.path.exists(objectFile): manifest, cachekey = createManifest(manifestHash, includePaths) - with cache.lock, cache.statistics as stats: + cleanupRequired = False + section = cache.compilerArtifactsRepository.section(cachekey) + with manifestSection.lock, section.lock, cache.statistics.lock, cache.statistics as stats: stats.registerSourceChangedMiss() if returnCode == 0 and os.path.exists(objectFile): + artifacts = CompilerArtifacts(objectFile, compilerOutput, compilerStderr) # Store compile output and manifest - addObjectToCache(stats, cache, cachekey, CompilerArtifacts(objectFile, compilerOutput, compilerStderr)) + cleanupRequired = addObjectToCache(stats, cache, section, cachekey, artifacts) manifestSection.setManifest(manifestHash, manifest) - return returnCode, compilerOutput, compilerStderr + return returnCode, compilerOutput, compilerStderr, cleanupRequired def main(): @@ -1507,7 +1525,7 @@ def main(): def updateCacheStatistics(cache, method): - with cache.lock, cache.statistics as stats: + with cache.statistics.lock, cache.statistics as stats: method(stats) @@ -1526,11 +1544,18 @@ def processCompileRequest(cache, compiler, args): else: assert objectFile is not None if 'CLCACHE_NODIRECT' in os.environ: - compilerResult = processNoDirect(cache, objectFile, compiler, cmdLine, environment) + returnCode, compilerOutput, compilerStderr, cleanupRequired = \ + processNoDirect(cache, objectFile, compiler, cmdLine, environment) else: - compilerResult = processDirect(cache, objectFile, compiler, cmdLine, sourceFiles[0]) - printTraceStatement("Finished. Exit code {0:d}".format(compilerResult[0])) - return compilerResult + returnCode, compilerOutput, compilerStderr, cleanupRequired = \ + processDirect(cache, objectFile, compiler, cmdLine, sourceFiles[0]) + printTraceStatement("Finished. Exit code {0:d}".format(returnCode)) + + if cleanupRequired: + with cache.lock: + cleanCache(cache) + + return returnCode, compilerOutput, compilerStderr except InvalidArgumentError: printTraceStatement("Cannot cache invocation as {}: invalid argument".format(cmdLine)) updateCacheStatistics(cache, Statistics.registerCallWithInvalidArgument) @@ -1562,8 +1587,7 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): baseDir = normalizeBaseDir(os.environ.get('CLCACHE_BASEDIR')) manifestHash = ManifestRepository.getManifestHash(compiler, cmdLine, sourceFile) manifestSection = cache.manifestRepository.section(manifestHash) - with cache.lock: - createNewManifest = False + with manifestSection.lock: manifest = manifestSection.getManifest(manifestHash) if manifest is not None: # NOTE: command line options already included in hash for manifest name @@ -1575,49 +1599,52 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): cachekey = manifest.includesContentToObjectMap.get(includesContentHash) assert cachekey is not None - if cache.compilerArtifactsRepository.section(cachekey).hasEntry(cachekey): - return processCacheHit(cache, objectFile, cachekey) - else: - postProcessing = lambda compilerResult: postprocessObjectEvicted( - cache, objectFile, cachekey, compilerResult) + + artifactSection = cache.compilerArtifactsRepository.section(cachekey) + with artifactSection.lock: + if artifactSection.hasEntry(cachekey): + return processCacheHit(cache, objectFile, cachekey) + compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True) + return postprocessObjectEvicted(cache, objectFile, cachekey, compilerResult) + except IncludeChangedException: - createNewManifest = True - postProcessing = lambda compilerResult: postprocessHeaderChangedMiss( + stripIncludes = False + if '/showIncludes' not in cmdLine: + cmdLine.insert(0, '/showIncludes') + stripIncludes = True + compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True) + return postprocessHeaderChangedMiss( cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes) except IncludeNotFoundException: # register nothing. This is probably just a compile error - postProcessing = None + return invokeRealCompiler(compiler, cmdLine, captureOutput=True) else: - createNewManifest = True - postProcessing = lambda compilerResult: postprocessNoManifestMiss( + stripIncludes = False + if '/showIncludes' not in cmdLine: + cmdLine.insert(0, '/showIncludes') + stripIncludes = True + compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True) + return postprocessNoManifestMiss( cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes) - if createNewManifest: - stripIncludes = False - if '/showIncludes' not in cmdLine: - cmdLine.insert(0, '/showIncludes') - stripIncludes = True - - compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True) - if postProcessing: - compilerResult = postProcessing(compilerResult) - return compilerResult - def processNoDirect(cache, objectFile, compiler, cmdLine, environment): cachekey = CompilerArtifactsRepository.computeKeyNodirect(compiler, cmdLine, environment) - with cache.lock: - if cache.compilerArtifactsRepository.section(cachekey).hasEntry(cachekey): + section = cache.compilerArtifactsRepository.section(cachekey) + cleanupRequired = False + with section.lock: + if section.hasEntry(cachekey): return processCacheHit(cache, objectFile, cachekey) - compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True, environment=environment) - returnCode, compilerStdout, compilerStderr = compilerResult - with cache.lock, cache.statistics as stats: - stats.registerCacheMiss() - if returnCode == 0 and os.path.exists(objectFile): - addObjectToCache(stats, cache, cachekey, CompilerArtifacts(objectFile, compilerStdout, compilerStderr)) + compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True, environment=environment) + returnCode, compilerStdout, compilerStderr = compilerResult + with cache.statistics.lock, cache.statistics as stats: + stats.registerCacheMiss() + if returnCode == 0 and os.path.exists(objectFile): + artifacts = CompilerArtifacts(objectFile, compilerStdout, compilerStderr) + cleanupRequired = addObjectToCache(stats, cache, section, cachekey, artifacts) - return returnCode, compilerStdout, compilerStderr + return returnCode, compilerStdout, compilerStderr, cleanupRequired if __name__ == '__main__': if 'CLCACHE_PROFILE' in os.environ: From 56b6349a18c4fbaf8b94548c65d5bf57b06513ed Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Wed, 24 Aug 2016 16:01:11 +0200 Subject: [PATCH 06/21] Reorder branches in processDirect() By checking for 'manifest == None' early and returning early, we can reduce the indentation depth for the larger 'manifest != None' branch. --- clcache.py | 60 +++++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/clcache.py b/clcache.py index a377acae..6e68e1da 100644 --- a/clcache.py +++ b/clcache.py @@ -1589,36 +1589,7 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): manifestSection = cache.manifestRepository.section(manifestHash) with manifestSection.lock: manifest = manifestSection.getManifest(manifestHash) - if manifest is not None: - # NOTE: command line options already included in hash for manifest name - try: - includesContentHash = ManifestRepository.getIncludesContentHashForFiles({ - expandBasedirPlaceholder(path, baseDir):contentHash - for path, contentHash in manifest.includeFiles.items() - }) - - cachekey = manifest.includesContentToObjectMap.get(includesContentHash) - assert cachekey is not None - - artifactSection = cache.compilerArtifactsRepository.section(cachekey) - with artifactSection.lock: - if artifactSection.hasEntry(cachekey): - return processCacheHit(cache, objectFile, cachekey) - compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True) - return postprocessObjectEvicted(cache, objectFile, cachekey, compilerResult) - - except IncludeChangedException: - stripIncludes = False - if '/showIncludes' not in cmdLine: - cmdLine.insert(0, '/showIncludes') - stripIncludes = True - compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True) - return postprocessHeaderChangedMiss( - cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes) - except IncludeNotFoundException: - # register nothing. This is probably just a compile error - return invokeRealCompiler(compiler, cmdLine, captureOutput=True) - else: + if manifest is None: stripIncludes = False if '/showIncludes' not in cmdLine: cmdLine.insert(0, '/showIncludes') @@ -1627,6 +1598,35 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): return postprocessNoManifestMiss( cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes) + # NOTE: command line options already included in hash for manifest name + try: + includesContentHash = ManifestRepository.getIncludesContentHashForFiles({ + expandBasedirPlaceholder(path, baseDir):contentHash + for path, contentHash in manifest.includeFiles.items() + }) + + cachekey = manifest.includesContentToObjectMap.get(includesContentHash) + assert cachekey is not None + + artifactSection = cache.compilerArtifactsRepository.section(cachekey) + with artifactSection.lock: + if artifactSection.hasEntry(cachekey): + return processCacheHit(cache, objectFile, cachekey) + compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True) + return postprocessObjectEvicted(cache, objectFile, cachekey, compilerResult) + + except IncludeChangedException: + stripIncludes = False + if '/showIncludes' not in cmdLine: + cmdLine.insert(0, '/showIncludes') + stripIncludes = True + compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True) + return postprocessHeaderChangedMiss( + cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes) + except IncludeNotFoundException: + # register nothing. This is probably just a compile error + return invokeRealCompiler(compiler, cmdLine, captureOutput=True) + def processNoDirect(cache, objectFile, compiler, cmdLine, environment): cachekey = CompilerArtifactsRepository.computeKeyNodirect(compiler, cmdLine, environment) From ab0dde3694094da6d3932c926d23d8d7de327877 Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Wed, 24 Aug 2016 16:02:46 +0200 Subject: [PATCH 07/21] Don't handle IncludeNotFoundException in processDirect() It's really an exceptional issue, let's handle that on the caller side so that we get the 'forward to real compiler' logic for free. --- clcache.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/clcache.py b/clcache.py index 6e68e1da..b9bc28ca 100644 --- a/clcache.py +++ b/clcache.py @@ -1579,6 +1579,8 @@ def processCompileRequest(cache, compiler, args): except CalledForPreprocessingError: printTraceStatement("Cannot cache invocation as {}: called for preprocessing".format(cmdLine)) updateCacheStatistics(cache, Statistics.registerCallForPreprocessing) + except IncludeNotFoundException: + pass return invokeRealCompiler(compiler, args[1:]) @@ -1623,10 +1625,6 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True) return postprocessHeaderChangedMiss( cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes) - except IncludeNotFoundException: - # register nothing. This is probably just a compile error - return invokeRealCompiler(compiler, cmdLine, captureOutput=True) - def processNoDirect(cache, objectFile, compiler, cmdLine, environment): cachekey = CompilerArtifactsRepository.computeKeyNodirect(compiler, cmdLine, environment) From abab8ceed28e20bf9663903cae5eb10ad6f06ebb Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Wed, 24 Aug 2016 16:04:01 +0200 Subject: [PATCH 08/21] Reduced indentation depth in processDirect() It's just ManifestRepository.getIncludesContentHashForFiles() which can raise IncludeChangedException, so only that needs to be covered by try/except; doing so, moving code out of the 'try', allows reducing the indentation depth. --- clcache.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/clcache.py b/clcache.py index b9bc28ca..dfb1411b 100644 --- a/clcache.py +++ b/clcache.py @@ -1606,17 +1606,6 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): expandBasedirPlaceholder(path, baseDir):contentHash for path, contentHash in manifest.includeFiles.items() }) - - cachekey = manifest.includesContentToObjectMap.get(includesContentHash) - assert cachekey is not None - - artifactSection = cache.compilerArtifactsRepository.section(cachekey) - with artifactSection.lock: - if artifactSection.hasEntry(cachekey): - return processCacheHit(cache, objectFile, cachekey) - compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True) - return postprocessObjectEvicted(cache, objectFile, cachekey, compilerResult) - except IncludeChangedException: stripIncludes = False if '/showIncludes' not in cmdLine: @@ -1626,6 +1615,17 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): return postprocessHeaderChangedMiss( cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes) + cachekey = manifest.includesContentToObjectMap.get(includesContentHash) + assert cachekey is not None + + artifactSection = cache.compilerArtifactsRepository.section(cachekey) + with artifactSection.lock: + if artifactSection.hasEntry(cachekey): + return processCacheHit(cache, objectFile, cachekey) + compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True) + return postprocessObjectEvicted(cache, objectFile, cachekey, compilerResult) + + def processNoDirect(cache, objectFile, compiler, cmdLine, environment): cachekey = CompilerArtifactsRepository.computeKeyNodirect(compiler, cmdLine, environment) section = cache.compilerArtifactsRepository.section(cachekey) From fbc943547e65e3d7cdf80f41f294a0df99120e24 Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Fri, 26 Aug 2016 20:39:26 +0200 Subject: [PATCH 09/21] Avoid unneeded locking in postprocess{NoManifest|HeaderChanged}Miss These functions, just like postprocessObjectEvicted(), don't need to lock the cache section: they are already called while the cache section is locked (in processDirect()). --- clcache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clcache.py b/clcache.py index dfb1411b..2904ad62 100644 --- a/clcache.py +++ b/clcache.py @@ -1417,7 +1417,7 @@ def postprocessHeaderChangedMiss( cleanupRequired = False section = cache.compilerArtifactsRepository.section(cachekey) - with manifestSection.lock, section.lock, cache.statistics.lock, cache.statistics as stats: + with section.lock, cache.statistics.lock, cache.statistics as stats: stats.registerHeaderChangedMiss() if returnCode == 0 and os.path.exists(objectFile): artifacts = CompilerArtifacts(objectFile, compilerOutput, compilerStderr) @@ -1440,7 +1440,7 @@ def postprocessNoManifestMiss( cleanupRequired = False section = cache.compilerArtifactsRepository.section(cachekey) - with manifestSection.lock, section.lock, cache.statistics.lock, cache.statistics as stats: + with section.lock, cache.statistics.lock, cache.statistics as stats: stats.registerSourceChangedMiss() if returnCode == 0 and os.path.exists(objectFile): artifacts = CompilerArtifacts(objectFile, compilerOutput, compilerStderr) From 0954a471440ad023e804e602adfa29aa88401463 Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Mon, 29 Aug 2016 12:48:35 +0200 Subject: [PATCH 10/21] Document section-based locking improvements --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71f7ff4c..e31ec4cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ clcache changelog * Improvement: Timeout errors when accessing the cache now generate friendlier error messages mentioning the possibility to work around the issue using the `CLCACHE_OBJECT_CACHE_TIMEOUT_MS` environment variable. + * Improvement: Greatly improved concurrency of clcache such that concurrent + invocations of the tool no longer block each other. ## clcache 3.2.0 (2016-07-28) From 9e131b90ae94fc45de66a959ba9d4f7097263128 Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Fri, 2 Sep 2016 12:22:49 +0200 Subject: [PATCH 11/21] Factored most code out of postprocessHeaderChangedMiss() The majority of this code appears to be repeated in postprocessNoManifestMiss(). The only difference is the statistics field which is updated. So in order to consolidate this code, let's introduce a new postprocessHeaderChangedMiss() function which is parametrised on the statistics field to update. --- clcache.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/clcache.py b/clcache.py index 2904ad62..54c78a77 100644 --- a/clcache.py +++ b/clcache.py @@ -1407,8 +1407,8 @@ def createManifest(manifestHash, includePaths): return manifest, cachekey -def postprocessHeaderChangedMiss( - cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes): +def postprocessUnusableManifestMiss( + cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes, reason): returnCode, compilerOutput, compilerStderr = compilerResult includePaths, compilerOutput = parseIncludesSet(compilerOutput, sourceFile, stripIncludes) @@ -1418,7 +1418,7 @@ def postprocessHeaderChangedMiss( cleanupRequired = False section = cache.compilerArtifactsRepository.section(cachekey) with section.lock, cache.statistics.lock, cache.statistics as stats: - stats.registerHeaderChangedMiss() + reason(stats) if returnCode == 0 and os.path.exists(objectFile): artifacts = CompilerArtifacts(objectFile, compilerOutput, compilerStderr) cleanupRequired = addObjectToCache(stats, cache, section, cachekey, artifacts) @@ -1427,6 +1427,12 @@ def postprocessHeaderChangedMiss( return returnCode, compilerOutput, compilerStderr, cleanupRequired +def postprocessHeaderChangedMiss + cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes): + return postprocessUnusableManifestMiss( + cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes, \ + Statistics.registerHeaderChangedMiss) + def postprocessNoManifestMiss( cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes): returnCode, compilerOutput, compilerStderr = compilerResult From 087f4a3c59519316e02c9ffd7196b22d84f4e790 Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Fri, 2 Sep 2016 12:24:43 +0200 Subject: [PATCH 12/21] Define postprocessNoManifestMiss() in terms of postprocessUnusuableManifestMiss() Fixes some code duplication. --- clcache.py | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/clcache.py b/clcache.py index 54c78a77..d889dee1 100644 --- a/clcache.py +++ b/clcache.py @@ -1435,26 +1435,9 @@ def postprocessHeaderChangedMiss def postprocessNoManifestMiss( cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes): - returnCode, compilerOutput, compilerStderr = compilerResult - includePaths, compilerOutput = parseIncludesSet(compilerOutput, sourceFile, stripIncludes) - - manifest = None - cachekey = None - - if returnCode == 0 and os.path.exists(objectFile): - manifest, cachekey = createManifest(manifestHash, includePaths) - - cleanupRequired = False - section = cache.compilerArtifactsRepository.section(cachekey) - with section.lock, cache.statistics.lock, cache.statistics as stats: - stats.registerSourceChangedMiss() - if returnCode == 0 and os.path.exists(objectFile): - artifacts = CompilerArtifacts(objectFile, compilerOutput, compilerStderr) - # Store compile output and manifest - cleanupRequired = addObjectToCache(stats, cache, section, cachekey, artifacts) - manifestSection.setManifest(manifestHash, manifest) - - return returnCode, compilerOutput, compilerStderr, cleanupRequired + return postprocessUnusableManifestMiss( + cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes, \ + Statistics.registerSourceChangedMiss) def main(): From a0da532b3f80d02e705cfa114f3c4741b463a3ae Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Fri, 2 Sep 2016 12:31:44 +0200 Subject: [PATCH 13/21] Resolve code duplication in processDirect() The logic for getting the includes used by a source file and invoking the compiler in case a manifest is not usable was duplicated in processDirect(). Centralize this in postprocessUnusableManifestMiss(). --- clcache.py | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/clcache.py b/clcache.py index d889dee1..5edff85b 100644 --- a/clcache.py +++ b/clcache.py @@ -1408,8 +1408,13 @@ def createManifest(manifestHash, includePaths): def postprocessUnusableManifestMiss( - cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes, reason): - returnCode, compilerOutput, compilerStderr = compilerResult + cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine, reason): + stripIncludes = False + if '/showIncludes' not in cmdLine: + cmdLine = list(cmdLine) + cmdLine.insert(0, '/showIncludes') + stripIncludes = True + returnCode, compilerOutput, compilerStderr = invokeRealCompiler(compiler, cmdLine, captureOutput=True) includePaths, compilerOutput = parseIncludesSet(compilerOutput, sourceFile, stripIncludes) if returnCode == 0 and os.path.exists(objectFile): @@ -1428,15 +1433,15 @@ def postprocessUnusableManifestMiss( def postprocessHeaderChangedMiss - cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes): + cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine): return postprocessUnusableManifestMiss( - cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes, \ + cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine, \ Statistics.registerHeaderChangedMiss) def postprocessNoManifestMiss( - cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes): + cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine): return postprocessUnusableManifestMiss( - cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes, \ + cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine, \ Statistics.registerSourceChangedMiss) @@ -1581,13 +1586,8 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): with manifestSection.lock: manifest = manifestSection.getManifest(manifestHash) if manifest is None: - stripIncludes = False - if '/showIncludes' not in cmdLine: - cmdLine.insert(0, '/showIncludes') - stripIncludes = True - compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True) return postprocessNoManifestMiss( - cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes) + cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine) # NOTE: command line options already included in hash for manifest name try: @@ -1596,13 +1596,8 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): for path, contentHash in manifest.includeFiles.items() }) except IncludeChangedException: - stripIncludes = False - if '/showIncludes' not in cmdLine: - cmdLine.insert(0, '/showIncludes') - stripIncludes = True - compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True) return postprocessHeaderChangedMiss( - cache, objectFile, manifestSection, manifestHash, sourceFile, compilerResult, stripIncludes) + cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine) cachekey = manifest.includesContentToObjectMap.get(includesContentHash) assert cachekey is not None From c6385b3ce7e11caf3487b0c8a35e0dd58a2d22ee Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Fri, 2 Sep 2016 12:34:32 +0200 Subject: [PATCH 14/21] Inline postprocessNoManifestMiss() postprocessHeaderChangedMiss() These two were really just forwarding to postprocessUnusableManifestMiss(), they didn't pull they own weight. So let's remove one level of indirection by inlining these functions. --- clcache.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/clcache.py b/clcache.py index 5edff85b..c65ac54e 100644 --- a/clcache.py +++ b/clcache.py @@ -1432,19 +1432,6 @@ def postprocessUnusableManifestMiss( return returnCode, compilerOutput, compilerStderr, cleanupRequired -def postprocessHeaderChangedMiss - cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine): - return postprocessUnusableManifestMiss( - cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine, \ - Statistics.registerHeaderChangedMiss) - -def postprocessNoManifestMiss( - cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine): - return postprocessUnusableManifestMiss( - cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine, \ - Statistics.registerSourceChangedMiss) - - def main(): if len(sys.argv) == 2 and sys.argv[1] == "--help": print(""" @@ -1586,8 +1573,9 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): with manifestSection.lock: manifest = manifestSection.getManifest(manifestHash) if manifest is None: - return postprocessNoManifestMiss( - cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine) + return postprocessUnusableManifestMiss( + cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine, + Statistics.registerSourceChangedMiss) # NOTE: command line options already included in hash for manifest name try: @@ -1596,8 +1584,9 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): for path, contentHash in manifest.includeFiles.items() }) except IncludeChangedException: - return postprocessHeaderChangedMiss( - cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine) + return postprocessUnusableManifestMiss( + cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine, + Statistics.registerHeaderChangedMiss) cachekey = manifest.includesContentToObjectMap.get(includesContentHash) assert cachekey is not None From ca05cc5ec88b3e3282f9a98d6e163f430994cb20 Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Fri, 2 Sep 2016 12:42:56 +0200 Subject: [PATCH 15/21] Inline postprocessObjectEvicted() Inlining this function shows that there is a strong similarity between processDirect() and processNoDirect(), a duplication I'd like to capture in a shared function in a subsequent commit. --- clcache.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/clcache.py b/clcache.py index c65ac54e..77ec1b0c 100644 --- a/clcache.py +++ b/clcache.py @@ -1371,22 +1371,6 @@ def processCacheHit(cache, objectFile, cachekey): return 0, cachedArtifacts.stdout, cachedArtifacts.stderr, False -def postprocessObjectEvicted(cache, objectFile, cachekey, compilerResult): - printTraceStatement("Cached object already evicted for key {} for object {}".format(cachekey, objectFile)) - returnCode, compilerOutput, compilerStderr = compilerResult - - cleanupRequired = False - - section = cache.compilerArtifactsRepository.section(cachekey) - with section.lock, cache.statistics.lock, cache.statistics as stats: - stats.registerEvictedMiss() - if returnCode == 0 and os.path.exists(objectFile): - artifacts = CompilerArtifacts(objectFile, compilerOutput, compilerStderr) - cleanupRequired = addObjectToCache(stats, cache, section, cachekey, artifacts) - - return compilerResult + (cleanupRequired,) - - def createManifest(manifestHash, includePaths): baseDir = normalizeBaseDir(os.environ.get('CLCACHE_BASEDIR')) @@ -1596,7 +1580,19 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): if artifactSection.hasEntry(cachekey): return processCacheHit(cache, objectFile, cachekey) compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True) - return postprocessObjectEvicted(cache, objectFile, cachekey, compilerResult) + printTraceStatement("Cached object already evicted for key {} for object {}".format(cachekey, objectFile)) + returnCode, compilerOutput, compilerStderr = compilerResult + + cleanupRequired = False + + section = cache.compilerArtifactsRepository.section(cachekey) + with section.lock, cache.statistics.lock, cache.statistics as stats: + stats.registerEvictedMiss() + if returnCode == 0 and os.path.exists(objectFile): + artifacts = CompilerArtifacts(objectFile, compilerOutput, compilerStderr) + cleanupRequired = addObjectToCache(stats, cache, section, cachekey, artifacts) + + return compilerResult + (cleanupRequired,) def processNoDirect(cache, objectFile, compiler, cmdLine, environment): From d084aba608fa0911a7f3bb5399434e0c6c893b44 Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Fri, 2 Sep 2016 12:47:47 +0200 Subject: [PATCH 16/21] Avoid unneeded double-locking in processDirect() As inlining processObjectEvictedMiss() shows, we already locked the artifact section here. We don't need to do it again. --- clcache.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clcache.py b/clcache.py index 77ec1b0c..251f5cce 100644 --- a/clcache.py +++ b/clcache.py @@ -1585,12 +1585,11 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): cleanupRequired = False - section = cache.compilerArtifactsRepository.section(cachekey) - with section.lock, cache.statistics.lock, cache.statistics as stats: + with cache.statistics.lock, cache.statistics as stats: stats.registerEvictedMiss() if returnCode == 0 and os.path.exists(objectFile): artifacts = CompilerArtifacts(objectFile, compilerOutput, compilerStderr) - cleanupRequired = addObjectToCache(stats, cache, section, cachekey, artifacts) + cleanupRequired = addObjectToCache(stats, cache, artifactSection, cachekey, artifacts) return compilerResult + (cleanupRequired,) From 8a0c056edac74fca79fd10946557f22cfc674459 Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Fri, 2 Sep 2016 12:52:42 +0200 Subject: [PATCH 17/21] Make processDirect() and processNoDirect() more similar ...by enlarging the scope of the cleanupRequired variable in processDirect() and by renaming 'section' to 'artifactSection' in processNoDirect(). By making the code more similar, fixing the code duplication between these two should be safer to do, i.e. mistakes are hopefully easier to spot. --- clcache.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/clcache.py b/clcache.py index 251f5cce..13368b69 100644 --- a/clcache.py +++ b/clcache.py @@ -1576,6 +1576,7 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): assert cachekey is not None artifactSection = cache.compilerArtifactsRepository.section(cachekey) + cleanupRequired = False with artifactSection.lock: if artifactSection.hasEntry(cachekey): return processCacheHit(cache, objectFile, cachekey) @@ -1583,8 +1584,6 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): printTraceStatement("Cached object already evicted for key {} for object {}".format(cachekey, objectFile)) returnCode, compilerOutput, compilerStderr = compilerResult - cleanupRequired = False - with cache.statistics.lock, cache.statistics as stats: stats.registerEvictedMiss() if returnCode == 0 and os.path.exists(objectFile): @@ -1596,10 +1595,10 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): def processNoDirect(cache, objectFile, compiler, cmdLine, environment): cachekey = CompilerArtifactsRepository.computeKeyNodirect(compiler, cmdLine, environment) - section = cache.compilerArtifactsRepository.section(cachekey) + artifactSection = cache.compilerArtifactsRepository.section(cachekey) cleanupRequired = False - with section.lock: - if section.hasEntry(cachekey): + with artifactSection.lock: + if artifactSection.hasEntry(cachekey): return processCacheHit(cache, objectFile, cachekey) compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True, environment=environment) @@ -1608,7 +1607,7 @@ def processNoDirect(cache, objectFile, compiler, cmdLine, environment): stats.registerCacheMiss() if returnCode == 0 and os.path.exists(objectFile): artifacts = CompilerArtifacts(objectFile, compilerStdout, compilerStderr) - cleanupRequired = addObjectToCache(stats, cache, section, cachekey, artifacts) + cleanupRequired = addObjectToCache(stats, cache, artifactSection, cachekey, artifacts) return returnCode, compilerStdout, compilerStderr, cleanupRequired From b15677d627a9fd3ea57e956b972a72a4e582173c Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Fri, 2 Sep 2016 12:59:49 +0200 Subject: [PATCH 18/21] Minor whitespace fix Not so minor actually, it makes processDirect() and processNoDirect() more similar. --- clcache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clcache.py b/clcache.py index 13368b69..35182399 100644 --- a/clcache.py +++ b/clcache.py @@ -1580,10 +1580,10 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): with artifactSection.lock: if artifactSection.hasEntry(cachekey): return processCacheHit(cache, objectFile, cachekey) + compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True) printTraceStatement("Cached object already evicted for key {} for object {}".format(cachekey, objectFile)) returnCode, compilerOutput, compilerStderr = compilerResult - with cache.statistics.lock, cache.statistics as stats: stats.registerEvictedMiss() if returnCode == 0 and os.path.exists(objectFile): From 357bdb572be42d8d3e98b22ce9b1d4e2aace57dc Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Fri, 2 Sep 2016 13:02:23 +0200 Subject: [PATCH 19/21] Aligned final 'return' in processDirect() and processNoDirect() The version in processNoDirect() could actually be simplified by adding two tuples. De-denting the return statement in processDirect() makes the code more similar to what processNoDirect() now does. --- clcache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clcache.py b/clcache.py index 35182399..e2a8e299 100644 --- a/clcache.py +++ b/clcache.py @@ -1590,7 +1590,7 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): artifacts = CompilerArtifacts(objectFile, compilerOutput, compilerStderr) cleanupRequired = addObjectToCache(stats, cache, artifactSection, cachekey, artifacts) - return compilerResult + (cleanupRequired,) + return compilerResult + (cleanupRequired,) def processNoDirect(cache, objectFile, compiler, cmdLine, environment): @@ -1609,7 +1609,7 @@ def processNoDirect(cache, objectFile, compiler, cmdLine, environment): artifacts = CompilerArtifacts(objectFile, compilerStdout, compilerStderr) cleanupRequired = addObjectToCache(stats, cache, artifactSection, cachekey, artifacts) - return returnCode, compilerStdout, compilerStderr, cleanupRequired + return compilerResult + (cleanupRequired,) if __name__ == '__main__': if 'CLCACHE_PROFILE' in os.environ: From a23eb2724a4c28302e221d95e0b46b665f168d68 Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Fri, 2 Sep 2016 13:37:14 +0200 Subject: [PATCH 20/21] Made most of processNoDirect() reusable ...by factoring it out into a getOrSetArtifacts() function (I'm not very happy with that name...). The function is much like processNoDirect() except that it expects to be given an artifact section cache key and a function which bumps the statistics in case of a miss. I suspect that this function can also be reused in processDirect(). --- clcache.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/clcache.py b/clcache.py index e2a8e299..3e43c2d6 100644 --- a/clcache.py +++ b/clcache.py @@ -1595,6 +1595,10 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): def processNoDirect(cache, objectFile, compiler, cmdLine, environment): cachekey = CompilerArtifactsRepository.computeKeyNodirect(compiler, cmdLine, environment) + return getOrSetArtifacts(cache, cachekey, objectFile, compiler, cmdLine, Statistics.registerCacheMiss, environment) + + +def getOrSetArtifacts(cache, cachekey, objectFile, compiler, cmdLine, statsField, environment=None): artifactSection = cache.compilerArtifactsRepository.section(cachekey) cleanupRequired = False with artifactSection.lock: @@ -1604,13 +1608,14 @@ def processNoDirect(cache, objectFile, compiler, cmdLine, environment): compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True, environment=environment) returnCode, compilerStdout, compilerStderr = compilerResult with cache.statistics.lock, cache.statistics as stats: - stats.registerCacheMiss() + statsField(stats) if returnCode == 0 and os.path.exists(objectFile): artifacts = CompilerArtifacts(objectFile, compilerStdout, compilerStderr) cleanupRequired = addObjectToCache(stats, cache, artifactSection, cachekey, artifacts) return compilerResult + (cleanupRequired,) + if __name__ == '__main__': if 'CLCACHE_PROFILE' in os.environ: INVOCATION_HASH = getStringHash(','.join(sys.argv)) From 077b3572eabe60c511f32cd71b35fe72853221d7 Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Fri, 2 Sep 2016 13:42:48 +0200 Subject: [PATCH 21/21] Reuse getOrSetArtifats() in processDirect() Saves some code at the expense of a trace statement which is now lost. A small price to pay for the code savings, I think. --- clcache.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/clcache.py b/clcache.py index 3e43c2d6..e2dc30bd 100644 --- a/clcache.py +++ b/clcache.py @@ -1575,22 +1575,7 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): cachekey = manifest.includesContentToObjectMap.get(includesContentHash) assert cachekey is not None - artifactSection = cache.compilerArtifactsRepository.section(cachekey) - cleanupRequired = False - with artifactSection.lock: - if artifactSection.hasEntry(cachekey): - return processCacheHit(cache, objectFile, cachekey) - - compilerResult = invokeRealCompiler(compiler, cmdLine, captureOutput=True) - printTraceStatement("Cached object already evicted for key {} for object {}".format(cachekey, objectFile)) - returnCode, compilerOutput, compilerStderr = compilerResult - with cache.statistics.lock, cache.statistics as stats: - stats.registerEvictedMiss() - if returnCode == 0 and os.path.exists(objectFile): - artifacts = CompilerArtifacts(objectFile, compilerOutput, compilerStderr) - cleanupRequired = addObjectToCache(stats, cache, artifactSection, cachekey, artifacts) - - return compilerResult + (cleanupRequired,) + return getOrSetArtifacts(cache, cachekey, objectFile, compiler, cmdLine, Statistics.registerEvictedMiss) def processNoDirect(cache, objectFile, compiler, cmdLine, environment):