CA
Crawlee & Apify•16mo ago
flat-fuchsia

Run instance of apify client not returning, cant access data scraped from my Apify agent

Hey guys, I'm getting an issue where I the run instance of the client.actor call for the python SDK is not returning ? it just says the call is 'running' so I can't access the items for the agent : def apify_reddit_agent(json_input) :
def apify_reddit_agent(json_input) :
info_array = []
#Changed the API key here to the samuel account instead.
client = ApifyClient(APIFY_API_KEY)

run_input=json_input

print("calling API endpoint")
run = client.actor("trudax/reddit-scraper-lite").call(run_input=run_input)
print("looping through items...")
for item in client.dataset(run["defaultDatasetId"]).iterate_items() :
info_array.append(item)
return info_array
def apify_reddit_agent(json_input) :
info_array = []
#Changed the API key here to the samuel account instead.
client = ApifyClient(APIFY_API_KEY)

run_input=json_input

print("calling API endpoint")
run = client.actor("trudax/reddit-scraper-lite").call(run_input=run_input)
print("looping through items...")
for item in client.dataset(run["defaultDatasetId"]).iterate_items() :
info_array.append(item)
return info_array
Is there anyway I can fix it? I'm thinking of just creating my own library for calling the HTTP endpoints for reliability concerns, as sometimes the SDK works and sometimes it doesn't
5 Replies
mute-gold
mute-gold•16mo ago
Hi, could provide the json_input as well so we can reproduce it? Thanks 🙂
flat-fuchsia
flat-fuchsiaOP•16mo ago
whatever JSON input I give it does not work, but here is an example one (also thanks for responding!) : subreddit_search_json = { "debugMode": False, "includeNSFW": True, "maxComments": 5, "maxCommunitiesCount": 2, "maxItems": 2, "maxPostCount": 5, "maxUserCount": 5, "proxy": { "useApifyProxy": True, "apifyProxyGroups": [ "RESIDENTIAL" ] }, "scrollTimeout": 40, "searchComments": False, "searchCommunities": True, "searchPosts": False, "searchUsers": False, "searches": [ "vietnam", "war", "Art" ], "skipComments": False }
MEE6
MEE6•16mo ago
@waltuh just advanced to level 1! Thanks for your contributions! 🎉
flat-fuchsia
flat-fuchsiaOP•16mo ago
i'm currently on the highest paid tier as well but im considering making my own crawler if the reliability is concern? are there usually problems with data retrieval from a agent after it's succeeded a crawl?
mute-gold
mute-gold•16mo ago
Hi @waltuh , I used the following script filling it my API key ...
# apify_client_bug.py
from apify_client import ApifyClient


def apify_reddit_agent(apify_api_key: str, actor_run_input: dict) -> list:
client = ApifyClient(apify_api_key)

print('Calling API endpoint...')
run = client.actor('trudax/reddit-scraper-lite').call(run_input=actor_run_input)

if not run:
raise ValueError('No dataset found')

print('Extracting dataset id...')
dataset_id = run['defaultDatasetId']

if not isinstance(dataset_id, str):
raise TypeError('No dataset id found')

print('Convert dataset to list and return it...')
return list(client.dataset(dataset_id).iterate_items())


if __name__ == '__main__':
APIFY_API_KEY = '...'
subreddit_search_input = {
'debugMode': False,
'includeNSFW': True,
'maxComments': 5,
'maxCommunitiesCount': 2,
'maxItems': 2,
'maxPostCount': 5,
'maxUserCount': 5,
'proxy': {'useApifyProxy': True, 'apifyProxyGroups': ['RESIDENTIAL']},
'scrollTimeout': 40,
'searchComments': False,
'searchCommunities': True,
'searchPosts': False,
'searchUsers': False,
'searches': ['vietnam', 'war', 'Art'],
'skipComments': False,
}

result = apify_reddit_agent(APIFY_API_KEY, subreddit_search_input)
print(f'Result: {result}')
# apify_client_bug.py
from apify_client import ApifyClient


def apify_reddit_agent(apify_api_key: str, actor_run_input: dict) -> list:
client = ApifyClient(apify_api_key)

print('Calling API endpoint...')
run = client.actor('trudax/reddit-scraper-lite').call(run_input=actor_run_input)

if not run:
raise ValueError('No dataset found')

print('Extracting dataset id...')
dataset_id = run['defaultDatasetId']

if not isinstance(dataset_id, str):
raise TypeError('No dataset id found')

print('Convert dataset to list and return it...')
return list(client.dataset(dataset_id).iterate_items())


if __name__ == '__main__':
APIFY_API_KEY = '...'
subreddit_search_input = {
'debugMode': False,
'includeNSFW': True,
'maxComments': 5,
'maxCommunitiesCount': 2,
'maxItems': 2,
'maxPostCount': 5,
'maxUserCount': 5,
'proxy': {'useApifyProxy': True, 'apifyProxyGroups': ['RESIDENTIAL']},
'scrollTimeout': 40,
'searchComments': False,
'searchCommunities': True,
'searchPosts': False,
'searchUsers': False,
'searches': ['vietnam', 'war', 'Art'],
'skipComments': False,
}

result = apify_reddit_agent(APIFY_API_KEY, subreddit_search_input)
print(f'Result: {result}')
... and it works, here is the output:
$ python apify_client_bug.py
Calling API endpoint...
Extracting dataset id...
Convert dataset to list and return it...
Result: [{'id': '2qkcr', 'name': 't5_2qkcr', 'title': 'Vietnam', 'headerImage': 'https://b.thumbs.redditmedia.com/oWDjFomnZ6sXt5Tn4epD-j-lbEF97L6N87b1lUbsfRI.png', 'description': 'Hello! This is the global dual-language Reddit home of the country Vietnam. Chào mừng bạn đến với ngôi nhà trên Reddit của Việt Nam. r/Vietnam sử dụng cả hai ngôn ngữ tiếng Việt và tiếng Anh.', 'over18': False, 'createdAt': '2008-07-22T04:39:53.000Z', 'scrapedAt': '2024-02-26T10:22:50.648Z', 'numberOfMembers': 753744, 'url': 'https://www.reddit.com/r/VietNam/', 'dataType': 'community'}, {'...': '...'}]
$ python apify_client_bug.py
Calling API endpoint...
Extracting dataset id...
Convert dataset to list and return it...
Result: [{'id': '2qkcr', 'name': 't5_2qkcr', 'title': 'Vietnam', 'headerImage': 'https://b.thumbs.redditmedia.com/oWDjFomnZ6sXt5Tn4epD-j-lbEF97L6N87b1lUbsfRI.png', 'description': 'Hello! This is the global dual-language Reddit home of the country Vietnam. Chào mừng bạn đến với ngôi nhà trên Reddit của Việt Nam. r/Vietnam sử dụng cả hai ngôn ngữ tiếng Việt và tiếng Anh.', 'over18': False, 'createdAt': '2008-07-22T04:39:53.000Z', 'scrapedAt': '2024-02-26T10:22:50.648Z', 'numberOfMembers': 753744, 'url': 'https://www.reddit.com/r/VietNam/', 'dataType': 'community'}, {'...': '...'}]

Did you find this page helpful?