Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
operating_systems: '["ubuntu-latest", "windows-latest"]'
python_version_for_codecov: "3.14"
operating_system_for_codecov: ubuntu-latest
tests_concurrency: "1"
tests_concurrency: "16"

integration_tests:
name: Integration tests
Expand All @@ -36,4 +36,4 @@ jobs:
operating_systems: '["ubuntu-latest"]'
python_version_for_codecov: "3.14"
operating_system_for_codecov: ubuntu-latest
tests_concurrency: "1"
tests_concurrency: "16"
3 changes: 3 additions & 0 deletions datamodel_codegen_aliases.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"gitHubGistUrl": "github_gist_url"
}
2 changes: 1 addition & 1 deletion docs/01_overview/code/01_usage_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ async def main() -> None:
return

# Fetch results from the Actor run's default dataset.
dataset_client = apify_client.dataset(call_result['defaultDatasetId'])
dataset_client = apify_client.dataset(call_result.default_dataset_id)
list_items_result = await dataset_client.list_items()
print(f'Dataset: {list_items_result}')
2 changes: 1 addition & 1 deletion docs/01_overview/code/01_usage_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ def main() -> None:
return

# Fetch results from the Actor run's default dataset.
dataset_client = apify_client.dataset(call_result['defaultDatasetId'])
dataset_client = apify_client.dataset(call_result.default_dataset_id)
list_items_result = dataset_client.list_items()
print(f'Dataset: {list_items_result}')
2 changes: 1 addition & 1 deletion docs/02_concepts/code/01_async_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ async def main() -> None:

# Start the Actor and get the run ID
run_result = await actor_client.start()
run_client = apify_client.run(run_result['id'])
run_client = apify_client.run(run_result.id)
log_client = run_client.log()

# Stream the logs
Expand Down
6 changes: 4 additions & 2 deletions docs/02_concepts/code/05_retries_async.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from datetime import timedelta

from apify_client import ApifyClientAsync

TOKEN = 'MY-APIFY-TOKEN'
Expand All @@ -7,6 +9,6 @@ async def main() -> None:
apify_client = ApifyClientAsync(
token=TOKEN,
max_retries=8,
min_delay_between_retries_millis=500, # 0.5s
timeout_secs=360, # 6 mins
min_delay_between_retries=timedelta(milliseconds=500),
timeout=timedelta(seconds=360),
)
8 changes: 5 additions & 3 deletions docs/02_concepts/code/05_retries_sync.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from datetime import timedelta

from apify_client import ApifyClient

TOKEN = 'MY-APIFY-TOKEN'


async def main() -> None:
def main() -> None:
apify_client = ApifyClient(
token=TOKEN,
max_retries=8,
min_delay_between_retries_millis=500, # 0.5s
timeout_secs=360, # 6 mins
min_delay_between_retries=timedelta(milliseconds=500),
timeout=timedelta(seconds=360),
)
5 changes: 4 additions & 1 deletion docs/03_examples/code/01_input_async.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
from datetime import timedelta

from apify_client import ApifyClientAsync

Expand All @@ -16,7 +17,9 @@ async def main() -> None:

# Run the Actor and wait for it to finish up to 60 seconds.
# Input is not persisted for next runs.
run_result = await actor_client.call(run_input=input_data, timeout_secs=60)
run_result = await actor_client.call(
run_input=input_data, timeout=timedelta(seconds=60)
)


if __name__ == '__main__':
Expand Down
4 changes: 3 additions & 1 deletion docs/03_examples/code/01_input_sync.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from datetime import timedelta

from apify_client import ApifyClient

TOKEN = 'MY-APIFY-TOKEN'
Expand All @@ -14,7 +16,7 @@ def main() -> None:

# Run the Actor and wait for it to finish up to 60 seconds.
# Input is not persisted for next runs.
run_result = actor_client.call(run_input=input_data, timeout_secs=60)
run_result = actor_client.call(run_input=input_data, timeout=timedelta(seconds=60))


if __name__ == '__main__':
Expand Down
25 changes: 9 additions & 16 deletions docs/03_examples/code/02_tasks_async.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,16 @@
import asyncio

from apify_client import ApifyClientAsync
from apify_client.clients.resource_clients import TaskClientAsync

TOKEN = 'MY-APIFY-TOKEN'
HASHTAGS = ['zebra', 'lion', 'hippo']


async def run_apify_task(client: TaskClientAsync) -> dict:
result = await client.call()
return result or {}


async def main() -> None:
apify_client = ApifyClientAsync(token=TOKEN)

# Create Apify tasks
apify_tasks = list[dict]()
apify_tasks = []
apify_tasks_client = apify_client.tasks()

for hashtag in HASHTAGS:
Expand All @@ -31,20 +25,19 @@ async def main() -> None:
print('Tasks created:', apify_tasks)

# Create Apify task clients
apify_task_clients = list[TaskClientAsync]()

for apify_task in apify_tasks:
task_id = apify_task['id']
apify_task_client = apify_client.task(task_id)
apify_task_clients.append(apify_task_client)
apify_task_clients = [apify_client.task(task.id) for task in apify_tasks]

print('Task clients created:', apify_task_clients)

# Execute Apify tasks
run_apify_tasks = [run_apify_task(client) for client in apify_task_clients]
task_run_results = await asyncio.gather(*run_apify_tasks)
task_run_results = await asyncio.gather(
*[client.call() for client in apify_task_clients]
)

# Filter out None results (tasks that failed to return a run)
successful_runs = [run for run in task_run_results if run is not None]

print('Task results:', task_run_results)
print('Task results:', successful_runs)


if __name__ == '__main__':
Expand Down
24 changes: 6 additions & 18 deletions docs/03_examples/code/02_tasks_sync.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,14 @@
from apify_client import ApifyClient
from apify_client.clients.resource_clients import TaskClient

TOKEN = 'MY-APIFY-TOKEN'
HASHTAGS = ['zebra', 'lion', 'hippo']


def run_apify_task(client: TaskClient) -> dict:
result = client.call()
return result or {}


def main() -> None:
apify_client = ApifyClient(token=TOKEN)

# Create Apify tasks
apify_tasks = list[dict]()
apify_tasks = []
apify_tasks_client = apify_client.tasks()

for hashtag in HASHTAGS:
Expand All @@ -29,23 +23,17 @@ def main() -> None:
print('Tasks created:', apify_tasks)

# Create Apify task clients
apify_task_clients = list[TaskClient]()

for apify_task in apify_tasks:
task_id = apify_task['id']
apify_task_client = apify_client.task(task_id)
apify_task_clients.append(apify_task_client)
apify_task_clients = [apify_client.task(task.id) for task in apify_tasks]

print('Task clients created:', apify_task_clients)

# Execute Apify tasks
task_run_results = list[dict]()
task_run_results = [client.call() for client in apify_task_clients]

for client in apify_task_clients:
result = run_apify_task(client)
task_run_results.append(result)
# Filter out None results (tasks that failed to return a run)
successful_runs = [run for run in task_run_results if run is not None]

print('Task results:', task_run_results)
print('Task results:', successful_runs)


if __name__ == '__main__':
Expand Down
4 changes: 2 additions & 2 deletions docs/03_examples/code/03_retrieve_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ async def main() -> None:

for dataset_item in actor_datasets.items:
# Dataset items can be handled here. Dataset items can be paginated
dataset_client = apify_client.dataset(dataset_item['id'])
dataset_client = apify_client.dataset(dataset_item.id)
dataset_items = await dataset_client.list_items(limit=1000)

# Items can be pushed to single dataset
merging_dataset_client = apify_client.dataset(merging_dataset['id'])
merging_dataset_client = apify_client.dataset(merging_dataset.id)
await merging_dataset_client.push_items(dataset_items.items)

# ...
Expand Down
4 changes: 2 additions & 2 deletions docs/03_examples/code/03_retrieve_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ def main() -> None:

for dataset_item in actor_datasets.items:
# Dataset items can be handled here. Dataset items can be paginated
dataset_client = apify_client.dataset(dataset_item['id'])
dataset_client = apify_client.dataset(dataset_item.id)
dataset_items = dataset_client.list_items(limit=1000)

# Items can be pushed to single dataset
merging_dataset_client = apify_client.dataset(merging_dataset['id'])
merging_dataset_client = apify_client.dataset(merging_dataset.id)
merging_dataset_client.push_items(dataset_items.items)

# ...
Expand Down
42 changes: 35 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ classifiers = [
]
keywords = ["apify", "api", "client", "automation", "crawling", "scraping"]
dependencies = [
"apify-shared>=2.1.0,<3.0.0",
"colorama>=0.4.0",
"impit>=0.9.2",
"more_itertools>=10.0.0",
"pydantic[email]>=2.11.0",
]

[project.urls]
Expand All @@ -47,22 +47,23 @@ dev = [
# See https://github.com/apify/apify-client-python/pull/582/ for more details.
# We explicitly constrain black>=24.3.0 to override the transitive dependency.
"black>=24.3.0",
"datamodel-code-generator[http,ruff]<1.0.0",
"dycw-pytest-only<3.0.0",
"griffe",
"poethepoet<1.0.0",
"pre-commit<5.0.0",
"pydoc-markdown<5.0.0",
"pytest-asyncio<2.0.0",
"pytest-cov<8.0.0",
"pytest-httpserver<2.0.0",
"pytest-timeout<3.0.0",
"pytest-xdist<4.0.0",
"pytest<9.0.0",
"pytest-httpserver<2.0.0",
"redbaron<1.0.0",
"ruff~=0.15.0",
"setuptools", # setuptools are used by pytest but not explicitly required
"ty~=0.0.0",
"types-colorama<0.5.0",
"ty~=0.0.0",
"werkzeug<4.0.0", # Werkzeug is used by pytest-httpserver
]

Expand Down Expand Up @@ -144,6 +145,12 @@ indent-style = "space"
"N999", # Invalid module name
"T201", # print found
]
"src/apify_client/_models.py" = [
"D", # Everything from the pydocstyle
"E501", # Line too long
"ERA001", # Commented-out code
"TC003", # Move standard library import into a type-checking block
]

[tool.ruff.lint.flake8-quotes]
docstring-quotes = "double"
Expand Down Expand Up @@ -171,10 +178,7 @@ python-version = "3.10"
include = ["src", "tests", "scripts", "docs", "website"]

[[tool.ty.overrides]]
include = [
"docs/**/*.py",
"website/**/*.py",
]
include = ["docs/**/*.py", "website/**/*.py"]

[tool.ty.overrides.rules]
unresolved-import = "ignore"
Expand All @@ -185,6 +189,27 @@ exclude_lines = ["pragma: no cover", "if TYPE_CHECKING:", "assert_never()"]
[tool.ipdb]
context = 7

# https://koxudaxi.github.io/datamodel-code-generator/
[tool.datamodel-codegen]
url = "https://docs.apify.com/api/openapi.json"
input_file_type = "openapi"
output = "src/apify_client/_models.py"
target_python_version = "3.10"
output_model_type = "pydantic_v2.BaseModel"
use_schema_description = true
use_field_description = true
use_union_operator = true
capitalise_enum_members = true
collapse_root_models = true
set_default_enum_member = true
use_annotated = true
wrap_string_literal = true
snake_case_field = true
use_subclass_enum = true
extra_fields = "allow"
aliases = "datamodel_codegen_aliases.json"
formatters = ["ruff-check", "ruff-format"]

# Run tasks with: uv run poe <task>
[tool.poe.tasks]
clean = "rm -rf .coverage .pytest_cache .ruff_cache .ty_cache build dist htmlcov"
Expand Down Expand Up @@ -220,3 +245,6 @@ cwd = "website"
[tool.poe.tasks.run-docs]
shell = "./build_api_reference.sh && corepack enable && yarn && uv run yarn start"
cwd = "website"

[tool.poe.tasks.generate-models]
shell = "uv run datamodel-codegen"
8 changes: 7 additions & 1 deletion scripts/fix_async_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

# Find all classes which end with "ClientAsync" (there should be at most 1 per file)
async_class = red.find('ClassNode', name=re.compile('.*ClientAsync$'))
if not async_class:

if async_class is None:
# No async client class in this file, nothing to fix
continue

# Find the corresponding sync classes (same name, but without -Async)
Expand All @@ -32,6 +34,10 @@
if len(async_method.decorators) and str(async_method.decorators[0].value) == 'ignore_docs':
continue

# Skip methods that don't exist in the sync class
if sync_method is None:
continue

# If the sync method has a docstring, copy it to the async method (with adjustments)
if isinstance(sync_method.value[0].value, str):
sync_docstring = sync_method.value[0].value
Expand Down
16 changes: 12 additions & 4 deletions scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def get_current_package_version() -> str:
# It replaces the version number on the line with the format `version = "1.2.3"`
def set_current_package_version(version: str) -> None:
with open(PYPROJECT_TOML_FILE_PATH, 'r+', encoding='utf-8') as pyproject_toml_file:
updated_pyproject_toml_file_lines = []
updated_pyproject_toml_file_lines = list[str]()
version_string_found = False
for line in pyproject_toml_file:
line_processed = line
Expand All @@ -45,7 +45,15 @@ def set_current_package_version(version: str) -> None:
# Generate convert a docstring from a sync resource client method
# into a doctring for its async resource client analogue
def sync_to_async_docstring(docstring: str) -> str:
substitutions = [(r'Client', r'ClientAsync')]
substitutions = [
(r'Client', r'ClientAsync'),
(r'\bsynchronously\b', r'asynchronously'),
(r'\bSynchronously\b', r'Asynchronously'),
(r'\bsynchronous\b', r'asynchronous'),
(r'\bSynchronous\b', r'Asynchronous'),
(r'Retry a function', r'Retry an async function'),
(r'Function to retry', r'Async function to retry'),
]
res = docstring
for pattern, replacement in substitutions:
res = re.sub(pattern, replacement, res, flags=re.MULTILINE)
Expand All @@ -59,8 +67,8 @@ def get_published_package_versions() -> list:
package_data = json.load(urlopen(package_info_url)) # noqa: S310
published_versions = list(package_data['releases'].keys())
# If the URL returns 404, it means the package has no releases yet (which is okay in our case)
except HTTPError as e:
if e.code != 404:
except HTTPError as exc:
if exc.code != 404:
raise
published_versions = []
return published_versions
Loading