-
Notifications
You must be signed in to change notification settings - Fork 2
fix(lock): implement Redlock single-instance pattern in LockManagerService #537
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,14 +22,18 @@ | |
| */ | ||
| final class LockManagerService implements ILockManagerService { | ||
|
|
||
| const MaxRetries = 3; | ||
| const MaxRetries = 3; | ||
| const BackOffMultiplier = 2.0; | ||
| const BackOffBaseInterval = 100000; // 1 ms | ||
| const BackOffBaseInterval = 100000; // microseconds | ||
|
|
||
| /** | ||
| * @var ICacheService | ||
| */ | ||
| private $cache_service; | ||
|
|
||
| /** @var array<string,string> lock-name → per-call ownership token */ | ||
| private array $tokens = []; | ||
|
|
||
| /** | ||
| * LockManagerService constructor. | ||
| * @param ICacheService $cache_service | ||
|
|
@@ -46,22 +50,24 @@ public function __construct(ICacheService $cache_service){ | |
| */ | ||
| public function acquireLock(string $name, int $lifetime = 3600):LockManagerService | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Default TTL of 3600 s creates hour-long stuck locks in queue workers This service is resolved inside long-lived queue workers ( The PR description already flags this: "Consider a tighter explicit lifetime (e.g. 30 s) that matches the realistic worst-case DB write time." Suggested fix — pass an explicit // SummitOrderService.php:951, 968, 1539
$this->lock_service->lock(
'ticket_type.' . $ticket_type->getId() . '.sell.lock',
function () use (...) { ... },
30 // worst-case DB write time in seconds
); |
||
| { | ||
| Log::debug(sprintf("LockManagerService::acquireLock name %s lifetime %s",$name, $lifetime)); | ||
| $attempt = 0 ; | ||
| Log::debug(sprintf("LockManagerService::acquireLock name %s lifetime %s", $name, $lifetime)); | ||
| $token = bin2hex(random_bytes(16)); | ||
| $attempt = 0; | ||
| do { | ||
| $time = time() + $lifetime + 1; | ||
| $success = $this->cache_service->addSingleValue($name, $time, $time); | ||
| if($success) return $this; | ||
| $wait_interval = self::BackOffBaseInterval * ( self::BackOffMultiplier ^ $attempt ); | ||
| Log::debug(sprintf("LockManagerService::acquireLock name %s retrying in %s microseconds (%s).", $name, $wait_interval, $attempt)); | ||
| $success = $this->cache_service->addSingleValue($name, $token, $lifetime); | ||
| if ($success) { | ||
| $this->tokens[$name] = $token; | ||
| return $this; | ||
| } | ||
| $wait_interval = (int)(self::BackOffBaseInterval * (self::BackOffMultiplier ** $attempt)); | ||
| Log::debug(sprintf("LockManagerService::acquireLock name %s retrying in %s µs (attempt %s)", $name, $wait_interval, $attempt)); | ||
| usleep($wait_interval); | ||
| if($attempt >= (self::MaxRetries - 1 )) { | ||
| // only one time we could use this handle | ||
| if ($attempt >= (self::MaxRetries - 1)) { | ||
| Log::error(sprintf("LockManagerService::acquireLock name %s lifetime %s ERROR MAX RETRIES attempt %s", $name, $lifetime, $attempt)); | ||
| throw new UnacquiredLockException(sprintf("lock name %s", $name)); | ||
| } | ||
| ++$attempt; | ||
| } while(1); | ||
| } while (1); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -70,8 +76,12 @@ public function acquireLock(string $name, int $lifetime = 3600):LockManagerServi | |
| */ | ||
| public function releaseLock(string $name):LockManagerService | ||
| { | ||
| Log::debug(sprintf("LockManagerService::releaseLock name %s",$name)); | ||
| $this->cache_service->delete($name); | ||
| Log::debug(sprintf("LockManagerService::releaseLock name %s", $name)); | ||
| if (!isset($this->tokens[$name])) { | ||
| return $this; | ||
| } | ||
| $this->cache_service->deleteIfValueMatches($name, $this->tokens[$name]); | ||
| unset($this->tokens[$name]); | ||
| return $this; | ||
| } | ||
|
|
||
|
|
@@ -85,27 +95,28 @@ public function releaseLock(string $name):LockManagerService | |
| */ | ||
| public function lock(string $name, Closure $callback, int $lifetime = 3600) | ||
| { | ||
| $result = null; | ||
| $result = null; | ||
| $acquired = false; | ||
| Log::debug(sprintf("LockManagerService::lock name %s lifetime %s", $name, $lifetime)); | ||
|
|
||
| try | ||
| { | ||
| try { | ||
| $this->acquireLock($name, $lifetime); | ||
| $acquired = true; | ||
| Log::debug(sprintf("LockManagerService::lock name %s calling callback", $name)); | ||
| $result = $callback($this); | ||
| } | ||
| catch(UnacquiredLockException $ex) | ||
| { | ||
| catch(UnacquiredLockException $ex) { | ||
| Log::warning($ex); | ||
| throw $ex; | ||
| } | ||
| catch(Exception $ex) | ||
| { | ||
| catch(Exception $ex) { | ||
| Log::error($ex); | ||
| throw $ex; | ||
| } | ||
| finally { | ||
| $this->releaseLock($name); | ||
| if ($acquired) { | ||
| $this->releaseLock($name); | ||
| } | ||
| } | ||
| return $result; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -239,7 +239,7 @@ public function storeHash($name, array $values, $ttl = 0) | |
| public function incCounter($counter_name, $ttl = 0) | ||
| { | ||
| return $this->retryOnConnectionError(function ($conn) use ($counter_name, $ttl) { | ||
| if ($conn->setnx($counter_name, 1)) { | ||
| if ($conn->set($counter_name, 1, ['NX' => true]) !== null) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested fix — collapse into a single atomic command, consistent with the approach used in if ($conn->set($counter_name, 1, ['EX' => (int)$ttl, 'NX' => true]) !== null) {
return 1;
} |
||
| if ($ttl > 0) $conn->expire($counter_name, (int)$ttl); | ||
| return 1; | ||
| } | ||
|
|
@@ -306,12 +306,11 @@ public function setSingleValue($key, $value, $ttl = 0) | |
| public function addSingleValue($key, $value, $ttl = 0) | ||
| { | ||
| return $this->retryOnConnectionError(function ($conn) use ($key, $value, $ttl) { | ||
| $res = $conn->setnx($key, $value); | ||
| if ($res && $ttl > 0) { | ||
| $conn->expire($key, $ttl); | ||
| if ($ttl > 0) { | ||
| return $conn->set($key, $value, 'EX', (int)$ttl, 'NX') !== null; | ||
| } | ||
| return $res; | ||
| }); | ||
| return $conn->set($key, $value, 'NX') !== null; | ||
| }, false); | ||
| } | ||
|
|
||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing integration test: atomic All tests for this path mock
Recommend a
|
||
| public function setKeyExpiration($key, $ttl) | ||
|
|
@@ -331,7 +330,21 @@ public function ttl($key) | |
| return (int)$conn->ttl($key); | ||
| }, 0); | ||
| } | ||
|
|
||
|
|
||
| public function deleteIfValueMatches(string $key, string $expectedValue): bool | ||
| { | ||
| $lua = <<<'LUA' | ||
| if redis.call('get', KEYS[1]) == ARGV[1] then | ||
| return redis.call('del', KEYS[1]) | ||
| else | ||
| return 0 | ||
| end | ||
| LUA; | ||
| return $this->retryOnConnectionError(function ($conn) use ($lua, $key, $expectedValue) { | ||
| return (int)$conn->eval($lua, 1, $key, $expectedValue) === 1; | ||
| }, false); | ||
| } | ||
|
|
||
| /** | ||
| * @param string $cache_region_key | ||
| * @return void | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,156 @@ | ||
| <?php namespace Tests\Unit\Services; | ||
| /** | ||
| * Copyright 2026 OpenStack Foundation | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| **/ | ||
|
|
||
| use App\Services\Utils\Exceptions\UnacquiredLockException; | ||
| use App\Services\Utils\LockManagerService; | ||
| use libs\utils\ICacheService; | ||
| use Mockery; | ||
| use PHPUnit\Framework\TestCase; | ||
| use ReflectionClass; | ||
|
|
||
| /** | ||
| * Regression tests for the four bugs fixed in LockManagerService: | ||
| * | ||
| * 1. Non-atomic acquisition (setnx + expire → SET NX EX) | ||
| * 2. Missing ownership token (timestamp value → random token) | ||
| * 3. Unconditional release in finally (guarded by $acquired flag) | ||
| * 4. Broken exponential backoff (^ XOR → ** power) | ||
| * | ||
| * These tests use a mock ICacheService so they run without Redis. | ||
| * The Alice/Bob scenario demonstrates that a failed acquirer never | ||
| * deletes another process's lock key. | ||
| */ | ||
| class LockManagerServiceOwnershipTest extends TestCase | ||
| { | ||
| protected function setUp(): void | ||
| { | ||
| parent::setUp(); | ||
| \Illuminate\Support\Facades\Facade::clearResolvedInstances(); | ||
| $container = new \Illuminate\Container\Container(); | ||
| $container->instance('app', $container); | ||
| $container->instance('log', new class { | ||
| public function __call($name, $args) {} | ||
| }); | ||
| \Illuminate\Support\Facades\Facade::setFacadeApplication($container); | ||
| } | ||
|
|
||
| protected function tearDown(): void | ||
| { | ||
| \Illuminate\Support\Facades\Facade::clearResolvedInstances(); | ||
| \Illuminate\Support\Facades\Facade::setFacadeApplication(null); | ||
| Mockery::close(); | ||
| parent::tearDown(); | ||
| } | ||
|
|
||
| /** | ||
| * Alice holds the lock. Bob's acquire exhausts retries and throws | ||
| * UnacquiredLockException. Bob's lock() finally block must NOT call | ||
| * deleteIfValueMatches — Bob never owned the key and must not delete it. | ||
| * | ||
| * On main (before fix) this test fails because releaseLock was called | ||
| * unconditionally from the finally block. | ||
| */ | ||
| public function testBobsFailedAcquireNeverDeletesAlicesKey(): void | ||
| { | ||
| // Alice: acquires once, releases once via deleteIfValueMatches. | ||
| $aliceCache = Mockery::mock(ICacheService::class); | ||
| $aliceCache->shouldReceive('addSingleValue')->once()->andReturn(true); | ||
| $aliceCache->shouldReceive('deleteIfValueMatches')->once()->andReturn(true); | ||
|
|
||
| // Bob: fails to acquire on every retry; must never touch Redis for release. | ||
| $bobCache = Mockery::mock(ICacheService::class); | ||
| $bobCache->shouldReceive('addSingleValue') | ||
| ->times(LockManagerService::MaxRetries) | ||
| ->andReturn(false); | ||
| $bobCache->shouldReceive('deleteIfValueMatches')->never(); | ||
|
|
||
| $alice = new LockManagerService($aliceCache); | ||
| $bob = new LockManagerService($bobCache); | ||
|
|
||
| $alice->lock('resource.lock', function () { | ||
| // Alice's critical section. | ||
| }); | ||
|
|
||
| $this->expectException(UnacquiredLockException::class); | ||
| $bob->lock('resource.lock', function () { | ||
| $this->fail('Bob must not enter the critical section.'); | ||
| }); | ||
| // Mockery tearDown asserts deleteIfValueMatches was never called on $bobCache. | ||
| } | ||
|
|
||
| /** | ||
| * Calling releaseLock on a name that was never acquired must be a | ||
| * complete no-op — no Redis command issued, no exception thrown. | ||
| */ | ||
| public function testReleaseLockWithoutAcquireIsNoOp(): void | ||
| { | ||
| $cache = Mockery::mock(ICacheService::class); | ||
| $cache->shouldReceive('deleteIfValueMatches')->never(); | ||
| $cache->shouldReceive('delete')->never(); | ||
|
|
||
| $service = new LockManagerService($cache); | ||
| $service->releaseLock('never.acquired.lock'); | ||
|
|
||
| // Tokens map must still be empty — the no-op must not corrupt state. | ||
| $ref = new ReflectionClass($service); | ||
| $prop = $ref->getProperty('tokens'); | ||
| $prop->setAccessible(true); | ||
| $this->assertEmpty($prop->getValue($service)); | ||
| } | ||
|
|
||
| /** | ||
| * After a full acquire → callback → release cycle the internal tokens | ||
| * map must be empty — no token leak that could cause a future | ||
| * releaseLock call to issue a stale deleteIfValueMatches. | ||
| */ | ||
| public function testTokensClearedAfterSuccessfulLockCycle(): void | ||
| { | ||
| $cache = Mockery::mock(ICacheService::class); | ||
| $cache->shouldReceive('addSingleValue')->once()->andReturn(true); | ||
| $cache->shouldReceive('deleteIfValueMatches')->once()->andReturn(true); | ||
|
|
||
| $service = new LockManagerService($cache); | ||
| $service->lock('test.lock', function () {}, 3600); | ||
|
|
||
| $ref = new ReflectionClass($service); | ||
| $prop = $ref->getProperty('tokens'); | ||
| $prop->setAccessible(true); | ||
| $this->assertEmpty($prop->getValue($service), 'tokens map must be empty after release'); | ||
| } | ||
|
|
||
| /** | ||
| * Structural assertion: addSingleValue is called exactly once per | ||
| * acquisition attempt (not two separate calls for setnx + expire). | ||
| * The call must carry the lock name, a string token, and the lifetime. | ||
| */ | ||
| public function testAddSingleValueCalledOnceWithTokenAndLifetime(): void | ||
| { | ||
| $cache = Mockery::mock(ICacheService::class); | ||
| $cache->shouldReceive('addSingleValue') | ||
| ->once() | ||
| ->with('test.lock', Mockery::type('string'), 3600) | ||
| ->andReturn(true); | ||
| $cache->shouldReceive('deleteIfValueMatches')->once()->andReturn(true); | ||
|
|
||
| $service = new LockManagerService($cache); | ||
| $service->lock('test.lock', function () {}, 3600); | ||
|
|
||
| // Tokens cleared — confirms the single addSingleValue call was paired | ||
| // with exactly one deleteIfValueMatches (not a separate expire call). | ||
| $ref = new ReflectionClass($service); | ||
| $prop = $ref->getProperty('tokens'); | ||
| $prop->setAccessible(true); | ||
| $this->assertEmpty($prop->getValue($service)); | ||
| } | ||
| } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing test: Redis unavailable during There is no test for the case where Suggested outline: public function testReleaseLockWhenRedisDownLeavesKeyUntilTtl(): void
{
$cache = Mockery::mock(ICacheService::class);
$cache->shouldReceive('addSingleValue')->once()->andReturn(true);
// simulate Redis unavailable — deleteIfValueMatches returns false
$cache->shouldReceive('deleteIfValueMatches')->once()->andReturn(false);
$lock = new LockManagerService($cache);
$lock->lock('resource.lock', fn() => null);
// Observable effect: a subsequent acquireLock on the same key will fail
// because the key was NOT deleted — only TTL expiry can free it.
}Documenting this as a test makes the failure mode explicit for future readers. |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Mutable per-acquisition state on a singleton service
LockManagerServiceis bound as a singleton (BaseServicesProvider.php:143). Before this PR the service was stateless; this line introduces the first mutable instance state. In the current stack (PHP-FPM, synchronous queue workers) this is safe because only one coroutine runs at a time per process. However, if Laravel Octane / Swoole is ever introduced, two concurrent coroutines racing on the same key name would overwrite each other's token here — coroutine A'sreleaseLockwould present coroutine B's token todeleteIfValueMatches, releasing B's live lock.Suggested mitigation: either change the binding to
scoped(Octane-safe per-request singleton), or remove instance state entirely by returning the token fromacquireLockand accepting it as a parameter inreleaseLock.