Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions posthog/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,9 @@ def get_tags() -> Dict[str, Any]:
attributed to the person normally.
feature_flags_request_timeout_seconds: Timeout in seconds for feature flag
and remote config requests.
feature_flags_request_max_retries: Number of retries for feature flag
requests after network, transport, or timeout failures. Defaults to 1.
Set to 0 to disable retries.
super_properties: Properties merged into every captured event.
enable_exception_autocapture: Automatically capture uncaught exceptions.
log_captured_exceptions: Also log exceptions captured by error tracking.
Expand Down Expand Up @@ -365,6 +368,7 @@ def get_tags() -> Dict[str, Any]:
disable_geoip = True # type: bool
is_server = True # type: bool
feature_flags_request_timeout_seconds = 3 # type: int
feature_flags_request_max_retries = 1 # type: int
super_properties = None # type: Optional[Dict]
enable_exception_autocapture = False # type: bool
log_captured_exceptions = False # type: bool
Expand Down Expand Up @@ -1156,6 +1160,7 @@ def setup() -> Client:
disable_geoip=disable_geoip,
is_server=is_server,
feature_flags_request_timeout_seconds=feature_flags_request_timeout_seconds,
feature_flags_request_max_retries=feature_flags_request_max_retries,
super_properties=super_properties,
# TODO: Currently this monitoring begins only when the Client is initialised (which happens when you do something with the SDK)
# This kind of initialisation is very annoying for exception capture. We need to figure out a way around this,
Expand Down
8 changes: 8 additions & 0 deletions posthog/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ def __init__(
is_server=True,
historical_migration=False,
feature_flags_request_timeout_seconds=3,
feature_flags_request_max_retries=1,
super_properties=None,
enable_exception_autocapture=False,
log_captured_exceptions=False,
Expand Down Expand Up @@ -296,6 +297,9 @@ def __init__(
historical_migration: Mark events as historical migration imports.
feature_flags_request_timeout_seconds: Timeout in seconds for feature
flag and remote config requests.
feature_flags_request_max_retries: Number of retries for feature flag
requests after network, transport, or timeout failures. Defaults
to 1. Set to 0 to disable retries.
super_properties: Properties merged into every captured event.
enable_exception_autocapture: Automatically capture uncaught
exceptions.
Expand Down Expand Up @@ -376,6 +380,9 @@ def __init__(
self.feature_flags_request_timeout_seconds = (
feature_flags_request_timeout_seconds
)
self.feature_flags_request_max_retries = max(
0, feature_flags_request_max_retries
)
self.poller: Optional[Poller] = None
self.distinct_ids_feature_flags_reported = SizeLimitedDict(MAX_DICT_SIZE, set)
self.flag_fallback_cache_url = flag_fallback_cache_url
Expand Down Expand Up @@ -899,6 +906,7 @@ def _get_flags_decision(
self.api_key,
self.host,
timeout=self.feature_flags_request_timeout_seconds,
max_retries=self.feature_flags_request_max_retries,
**request_data,
)

Expand Down
64 changes: 35 additions & 29 deletions posthog/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import re
import socket
import time
from dataclasses import dataclass
from datetime import date, datetime, timezone
from gzip import GzipFile
Expand Down Expand Up @@ -41,8 +42,7 @@
if hasattr(socket, attr):
KEEP_ALIVE_SOCKET_OPTIONS.append((socket.SOL_TCP, getattr(socket, attr), value))

# Status codes that indicate transient server errors worth retrying
RETRY_STATUS_FORCELIST = [408, 500, 502, 503, 504]
Comment on lines -44 to -45

@marandaneto marandaneto Jun 28, 2026

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

idk if that was intentional and why not all the other sdks do that, so i am assuming we should remove it and just retry for transient errors which is what all sdks should do
if all sdks should retry for those http errors as well, then i'd port this to all the other sdks

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@PostHog/team-feature-flags ?

_FEATURE_FLAGS_RETRY_BACKOFF_SECONDS = 0.3


def _mask_tokens_in_url(url: str) -> str:
Expand Down Expand Up @@ -90,23 +90,14 @@ def _build_session(socket_options: Optional[SocketOptions] = None) -> requests.S
def _build_flags_session(
socket_options: Optional[SocketOptions] = None,
) -> requests.Session:
"""
Build a session for feature flag requests with POST retries.
"""Build a session for feature flag requests.

Feature flag requests are idempotent (read-only), so retrying POST
requests is safe. This session retries on transient server errors
(408, 5xx) and network failures with exponential backoff
(0.5s, 1s delays between retries).
/flags retries are handled explicitly in ``flags()`` so that only
transport failures are retried. HTTP status responses must surface as API
errors without retrying.
"""
adapter = HTTPAdapterWithSocketOptions(
max_retries=Retry(
total=2,
connect=2,
read=2,
backoff_factor=0.5,
status_forcelist=RETRY_STATUS_FORCELIST,
allowed_methods=["POST"],
),
max_retries=Retry(total=0, connect=0, read=0, status=0),
socket_options=socket_options,
)
session = requests.Session()
Expand Down Expand Up @@ -306,26 +297,41 @@ def _process_response(
raise APIError(res.status_code, res.text, retry_after=retry_after)


def _feature_flags_retry_delay(failed_attempt: int) -> float:
return _FEATURE_FLAGS_RETRY_BACKOFF_SECONDS * (2**failed_attempt)


def flags(
api_key: str,
host: Optional[str] = None,
gzip: bool = False,
timeout: int = 15,
max_retries: int = 1,
**kwargs,
) -> Any:
"""Post the kwargs to the flags API endpoint with automatic retries."""
res = post(
api_key,
host,
"/flags/?v=2",
gzip,
timeout,
session=_get_flags_session(),
**kwargs,
)
return _process_response(
res, success_message="Feature flags evaluated successfully"
)
"""Post the kwargs to the flags API endpoint with transport retries."""
retries = max(0, max_retries)
failed_attempt = 0

while True:
try:
res = post(
api_key,
host,
"/flags/?v=2",
gzip,
timeout,
session=_get_flags_session(),
**kwargs,
)
return _process_response(
res, success_message="Feature flags evaluated successfully"
)
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
if failed_attempt >= retries:
raise
time.sleep(_feature_flags_retry_delay(failed_attempt))
failed_attempt += 1


def remote_config(
Expand Down
41 changes: 40 additions & 1 deletion posthog/test/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -976,6 +976,7 @@ def test_basic_capture_with_feature_flags_returns_active_only(self, patch_flags)
"random_key",
"https://us.i.posthog.com",
timeout=3,
max_retries=1,
distinct_id="distinct_id",
groups={},
person_properties={},
Expand All @@ -984,6 +985,30 @@ def test_basic_capture_with_feature_flags_returns_active_only(self, patch_flags)
device_id=None,
)

@parameterized.expand(
[
("default", 1),
("disabled", 0),
("two_retries", 2),
]
)
def test_feature_flags_request_max_retries_is_forwarded(
self, _name, expected_max_retries
):
with mock.patch("posthog.client.flags") as patch_flags:
patch_flags.return_value = {"featureFlags": {}, "featureFlagPayloads": {}}
client = Client(
FAKE_TEST_API_KEY,
feature_flags_request_max_retries=expected_max_retries,
personal_api_key=FAKE_TEST_API_KEY,
)

client.get_all_flags("distinct_id")

self.assertEqual(
patch_flags.call_args.kwargs["max_retries"], expected_max_retries
)

@mock.patch("posthog.client.flags")
def test_basic_capture_with_feature_flags_and_disable_geoip_returns_correctly(
self, patch_flags
Expand Down Expand Up @@ -1041,6 +1066,7 @@ def test_basic_capture_with_feature_flags_and_disable_geoip_returns_correctly(
"random_key",
"https://us.i.posthog.com",
timeout=12,
max_retries=1,
distinct_id="distinct_id",
groups={},
person_properties={},
Expand Down Expand Up @@ -2261,6 +2287,7 @@ def test_disable_geoip_default_on_decide(self, patch_flags):
"random_key",
"https://us.i.posthog.com",
timeout=3,
max_retries=1,
distinct_id="some_id",
groups={},
person_properties={"distinct_id": "some_id"},
Expand All @@ -2277,6 +2304,7 @@ def test_disable_geoip_default_on_decide(self, patch_flags):
"random_key",
"https://us.i.posthog.com",
timeout=3,
max_retries=1,
distinct_id="feature_enabled_distinct_id",
groups={},
person_properties={"distinct_id": "feature_enabled_distinct_id"},
Expand All @@ -2291,6 +2319,7 @@ def test_disable_geoip_default_on_decide(self, patch_flags):
"random_key",
"https://us.i.posthog.com",
timeout=3,
max_retries=1,
distinct_id="all_flags_payloads_id",
groups={},
person_properties={"distinct_id": "all_flags_payloads_id"},
Expand Down Expand Up @@ -2337,6 +2366,7 @@ def test_default_properties_get_added_properly(self, patch_flags):
"random_key",
"http://app2.posthog.com",
timeout=3,
max_retries=1,
distinct_id="some_id",
groups={"company": "id:5", "instance": "app.posthog.com"},
person_properties={"distinct_id": "some_id", "x1": "y1"},
Expand Down Expand Up @@ -2365,6 +2395,7 @@ def test_default_properties_get_added_properly(self, patch_flags):
"random_key",
"http://app2.posthog.com",
timeout=3,
max_retries=1,
distinct_id="some_id",
groups={"company": "id:5", "instance": "app.posthog.com"},
person_properties={"distinct_id": "override"},
Expand All @@ -2386,6 +2417,7 @@ def test_default_properties_get_added_properly(self, patch_flags):
"random_key",
"http://app2.posthog.com",
timeout=3,
max_retries=1,
distinct_id="some_id",
groups={},
person_properties={"distinct_id": "some_id"},
Expand Down Expand Up @@ -2446,7 +2478,11 @@ def test_device_id_is_passed_to_flags_request(
expected_call["flag_keys_to_evaluate"] = expected_flag_keys

patch_flags.assert_called_with(
"random_key", "https://us.i.posthog.com", timeout=3, **expected_call
"random_key",
"https://us.i.posthog.com",
timeout=3,
max_retries=1,
**expected_call,
)

@mock.patch("posthog.client.flags")
Expand All @@ -2472,6 +2508,7 @@ def test_device_id_from_context_is_used_in_flags_request(self, patch_flags):
"random_key",
"https://us.i.posthog.com",
timeout=3,
max_retries=1,
distinct_id="some_id",
groups={},
person_properties={"distinct_id": "some_id"},
Expand All @@ -2492,6 +2529,7 @@ def test_device_id_from_context_is_used_in_flags_request(self, patch_flags):
"random_key",
"https://us.i.posthog.com",
timeout=3,
max_retries=1,
distinct_id="some_id",
groups={},
person_properties={"distinct_id": "some_id"},
Expand Down Expand Up @@ -2521,6 +2559,7 @@ def test_client_set_context_device_id_is_used_in_flags_request(self, patch_flags
"random_key",
"https://us.i.posthog.com",
timeout=3,
max_retries=1,
distinct_id="some_id",
groups={},
person_properties={"distinct_id": "some_id"},
Expand Down
Loading
Loading