1+ from gql import gql , Client
2+ from gql .transport .requests import RequestsHTTPTransport
3+ import os
4+ from dotenv import load_dotenv
5+ import pandas as pd
6+ import matplotlib .pyplot as plt
7+ from joblib import Memory
8+ from tqdm .auto import tqdm
9+ import time
10+ import re
11+
12+ # Setup caching
13+ memory = Memory (".cache" , verbose = 0 )
14+ cache = memory .cache
15+ wait_time = 1
16+
17+ load_dotenv ()
18+ transport = RequestsHTTPTransport (
19+ url = "https://api.opencollective.com/graphql/v2" ,
20+ headers = {"Personal-Token" : os .getenv ("OPENCOLLECTIVE_API_TOKEN" )},
21+ )
22+ client = Client (transport = transport , fetch_schema_from_transport = True )
23+
24+
25+ @cache (ignore = ["wait_time" ])
26+ def fetch (query , variable_values , wait_time = 0.1 , ** kwargs ):
27+ time .sleep (wait_time )
28+ return client .execute (query , variable_values = variable_values , ** kwargs )
29+
30+
31+ def fetch_climate_orgs (limit = 1000 ):
32+ # Define search terms
33+ search_terms = [
34+ "climate" ,
35+ "for future" ,
36+ "extinction rebellion" ,
37+ "xr" ,
38+ "fossil" ,
39+ "oil" ,
40+ ]
41+
42+ query = gql ("""
43+ query GetAccounts($limit: Int, $offset: Int, $searchTerm: String) {
44+ accounts(
45+ limit: $limit
46+ offset: $offset
47+ isActive: true
48+ searchTerm: $searchTerm
49+ type: COLLECTIVE
50+ ) {
51+ nodes {
52+ slug
53+ name
54+ legalName
55+ description
56+ longDescription
57+ tags
58+ location {
59+ name
60+ address
61+ country
62+ }
63+ stats {
64+ totalAmountReceived {
65+ value
66+ currency
67+ valueInCents
68+ }
69+ totalAmountReceivedTimeSeries {
70+ dateFrom
71+ dateTo
72+ timeUnit
73+ nodes {
74+ date
75+ amount {
76+ value
77+ currency
78+ valueInCents
79+ }
80+ label
81+ }
82+ }
83+ }
84+ }
85+ }
86+ }
87+ """ )
88+
89+ all_orgs = []
90+ seen_slugs = set () # To prevent duplicates
91+
92+ # Fetch orgs for each search term
93+ for term in search_terms :
94+ response = fetch (
95+ query , variable_values = {"limit" : limit , "offset" : 0 , "searchTerm" : term }
96+ )
97+
98+ # Add only unique organizations
99+ for org in response ["accounts" ]["nodes" ]:
100+ if org ["slug" ] not in seen_slugs :
101+ all_orgs .append (org )
102+ seen_slugs .add (org ["slug" ])
103+
104+ print (f"Found { len (all_orgs )} unique organizations" )
105+ return all_orgs
106+
107+
108+ # Fetch transactions for an organization with pagination
109+ @cache
110+ def fetch_transactions (org_slug , total_limit = 100_000 , page_size = 1000 ):
111+ query = gql ("""
112+ query GetAccountTransactions(
113+ $account: [AccountReferenceInput!]
114+ $limit: Int!
115+ $offset: Int!
116+ $orderBy: ChronologicalOrderInput!
117+ ) {
118+ transactions(
119+ account: $account
120+ limit: $limit
121+ offset: $offset
122+ orderBy: $orderBy
123+ ) {
124+ nodes {
125+ id
126+ createdAt
127+ type
128+ amount {
129+ value
130+ currency
131+ }
132+ }
133+ totalCount
134+ }
135+ }
136+ """ )
137+
138+ all_transactions = []
139+ offset = 0
140+ while offset < total_limit :
141+ variables = {
142+ "account" : [{"slug" : org_slug }],
143+ "limit" : min (page_size , total_limit - offset ),
144+ "offset" : offset ,
145+ "orderBy" : {"field" : "CREATED_AT" , "direction" : "DESC" },
146+ }
147+
148+ response = fetch (query , variables , wait_time )
149+ transactions = response ["transactions" ]["nodes" ]
150+ total_count = response ["transactions" ]["totalCount" ]
151+
152+ all_transactions .extend (transactions )
153+
154+ # Break if we've fetched all available transactions
155+ if len (transactions ) < page_size or offset + page_size >= total_count :
156+ break
157+
158+ offset += page_size
159+ print (f"Fetched { len (all_transactions )} transactions for { org_slug } " )
160+ return all_transactions
161+
162+ def get_transactions_df (orgs ):
163+ all_transactions = []
164+ for org in tqdm (orgs ):
165+ transactions = fetch_transactions (org ["slug" ])
166+ if transactions :
167+ # Convert to DataFrame with just date and amount
168+ df = pd .DataFrame (
169+ [
170+ {
171+ "date" : pd .to_datetime (t ["createdAt" ]).floor ("D" ), # Floor to day
172+ "amount" : float (t ["amount" ]["value" ]) if "amount" in t else 0 ,
173+ }
174+ for t in transactions
175+ ]
176+ )
177+ if not df .empty :
178+ df ["organization" ] = org ["name" ]
179+ all_transactions .append (df )
180+ if not all_transactions :
181+ return None
182+ return pd .concat (all_transactions )
183+
184+ def generalize_group_name (name ):
185+ if re .search (r"xr|extinction.?rebellion|scientist.?rebellion" , name .lower ()):
186+ return "Extinction Rebellion"
187+ elif re .search (r"(4|for).?future|fff|klimatreik" , name .lower ()):
188+ return "Fridays For Future"
189+ elif re .search (r"fossil.?free" , name .lower ()):
190+ return "Fossil Free"
191+ else :
192+ return name
193+
194+ def group_by_wealth (df , top_n = 10 ):
195+ # Calculate total donations per organization
196+ total_by_org = df .groupby ("organization" )["amount" ].sum ().sort_values (ascending = False )
197+ # Get top N organizations
198+ top_orgs = set (total_by_org .head (top_n ).index )
199+ # Create a mapping function
200+ def map_org (org ):
201+ return org if org in top_orgs else "Other"
202+ return df .assign (organization = df ["organization" ].apply (map_org ))
203+
204+ def get_monthly_dfs (df , pivot = False ):
205+ monthly = (
206+ df .set_index ("date" )
207+ .groupby (["organization" , pd .Grouper (freq = "W" )])["amount" ]
208+ .sum ()
209+ .reset_index ()
210+ )
211+
212+ # Create separate positive and negative DataFrames
213+ positive_df = monthly [monthly ["amount" ] > 0 ].copy ()
214+ negative_df = monthly [monthly ["amount" ] < 0 ].copy ()
215+
216+ if pivot :
217+ # Pivot to get organizations as columns
218+ positive_pivot = positive_df .pivot (
219+ index = "date" , columns = "organization" , values = "amount"
220+ ).fillna (0 )
221+ negative_pivot = negative_df .pivot (
222+ index = "date" , columns = "organization" , values = "amount"
223+ ).fillna (0 )
224+ return positive_pivot , negative_pivot
225+ else :
226+ return positive_df , negative_df
0 commit comments