z3c0
diff --git a/‎MANIFEST.in‎
Lines changed: 2 additions & 1 deletion b/‎MANIFEST.in‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 3 additions & 20 deletions b/‎README.md‎
Lines changed: 3 additions & 20 deletions
diff --git a/‎tests/test_integration.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/test_integration.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎vistos/VERSION‎
Lines changed: 1 addition & 1 deletion b/‎vistos/VERSION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎vistos/src/duo.py‎
Lines changed: 2 additions & 2 deletions b/‎vistos/src/duo.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎vistos/src/gpo/bioguideretro.py‎
Lines changed: 24 additions & 3 deletions b/‎vistos/src/gpo/bioguideretro.py‎
Lines changed: 24 additions & 3 deletions
@@ -1,2 +1,3 @@
 include vistos/VERSION
-include vistos/src/gpo/index/congress/*.bgmap
+include vistos/src/gpo/index/congress/*.bgmap
+include vistos/src/gpo/index/bills/*.bgmap
@@ -227,7 +227,7 @@ The `terms` property returns a `list` of `BioguideTermRecord` objects describing
 
 `Congress` is used to query a single congress, and takes either a year or number to determine which congress to return.
 
-For example, the following Congress objects all return the 116<sup>th</sup> U. S. Congress:
+For example, the following `Congress` objects all return the 116<sup>th</sup> U. S. Congress:
 
 ``` python
 a = v.Congress(116)
@@ -260,7 +260,7 @@ Calling `get_member_bioguide()` returns a `BioguideMemberRecord` corresponding t
 
 #### `.get_member_govinfo(bioguide_id: str)` <a name="get_member_govinfo"></a>
 
-Calling `get_member_bioguide()` returns a `dict` containing the GovInfo data corresponding to the given Bioguide ID.
+Calling `get_member_govinfo()` returns a `dict` containing the GovInfo data corresponding to the given Bioguide ID.
 
 #### `.number` <a name="congress_number"></a>
 
@@ -289,7 +289,7 @@ print(c.bioguide)
 
 #### `.govinfo` <a name="congress_govinfo"></a>
 
-The `govinfo` property returns GovInfo data as `GovInfoCongressRecord` .
+The `govinfo` property returns GovInfo data as a `GovInfoCongressRecord` .
 
 #### `.members` <a name="congress_members"></a>
 
@@ -521,21 +521,4 @@ If you'd like to contribute to the project, or know of a useful data source, fee
 1. GovInfo data only goes as far back as the 105<sup>th</sup> Congress
 
     The GovInfo API makes congress persons' data available via "Congressional Directories", which are only provided starting with the 105<sup>th</sup> Congress. If data for an earlier congress is needed, use Bioguide data instead.
-
-1. Downloading bills is very slow
-
-    The GovInfo API is geared towards bulk data and does not function efficiently for low-granularity queries. To download the bills for a single congress, V may have to send requests to as many as twenty-thousand different endpoints, taking as long as an hour to download the full dataset. To understand why this is, a deeper explanation of the GovInfo API is needed.
-
-    Firstly, GovInfo datasets are organized by collections, which contain packages. A package is a snapshotted version of a given dataset. For example, in the Congressional Directory collection (denoted as CDIR), each package represents a unique version of a directory. Each time a new directory is created or an existing one is updated, it is made available in the CDIR collection under a new modified date. To get the most recent Congressional Directory for a given congress, you would need to look for the package with the most recent modified date.
-
-    Bills are a unique collection, which are queryable by four parameters: a start date, an end date, the congress number, and the class of the documents you're looking for (in the case of bills, this could be Senate Bills, House Joint Resolutions, Senate Concurrent Resolutions, etc.) Filtering down by any combination of the latter two (congress number and document class) can still result in thousands of records. For example, the 115<sup>th</sup> Congress had 10,740 House bills.
-
-    The maximum dataset size that can be downloaded from a single endpoint is 10,000 records, so in order to download all of the House bills for the 115<sup>th</sup> congress, the start date and end date parameters would have to be used to limit the size of the dataset. However, these date parameters do not use the date that the bills were issued, as one might expect. Instead, they use the last modified date of the packages. This is made even more difficult by the fact that a bill package can be modified outside of the term that it was issued during, so incrementally searching the dates between the beginning and end of the congress you're querying does not work. 
-    
-    If that didn't make matters difficult enough, a large amount of records have modified dates occurring on the same day, meaning that once you've found the right date to query, you'd have to segment your time window even further to accomodate the 10,000-record-limit
-
-    To work around these limitations, V begins searching for bills by doing an "open query" for one record and checking the header information for the total amount of expected records. Using that total amount, V then begins to work its way backwards over each year, until finding records. If the amount of records enocountered is larger than the record limit, V begins searching the months of the year to segment the data futher. If the dataset for a month is larger than the record limit, V searches the days. It repeats this pattern until it finds a unit of time small enough to segment the data to a size below the record limit, all the way down to seconds. V continues this recursize search until it downloads the total expected records. V might end up sending hundreds of requests before even being able to download data, and if 10,000 records were ever modfied in a single second, V would break, as seconds are the maximum depth by which V searches. Obviously, this is not the ideal approach, but it's an approach that works (mostly.)
-
-    It may seem abhorrent - in this era of "big data" and numerous tools capable of acting on hundreds of millions of records in a few seconds - that a dataset in the tens of thousands could take so long to download. However, this approach is a necessary evil until the design of the GovInfo API is improved.
-
 
@@ -101,7 +101,7 @@ def test_parameterless_congress_query(self):
         self.assertEqual(congress_a.number, congress_b.number)
         self.assertEqual(congress_a.start_year, congress_b.start_year)
         self.assertEqual(congress_a.end_year, congress_b.end_year)
-        self.assertEqual(congress_a.bioguide, congress_b.bioguide)
+        self.assertEqual(len(congress_a.bioguide), len(congress_b.bioguide))
 
     def test_govinfo_congress_query(self):
         """Validate requesting govinfo data with a Congress object"""
 
@@ -1 +1 @@
-0.9.65
+0.9.72
@@ -42,7 +42,7 @@ def search_govinfo_members(govinfo_api_key, first_name=None, last_name=None,
 class CongressBills(list):
     """An object for downloading bills for a single Congress"""
 
-    def __init__(self, congress_number, govinfo_api_key, bill_type=None,
+    def __init__(self, congress_number, govinfo_api_key,
                  load_immediately=True):
         self._bills = None
 
@@ -293,7 +293,7 @@ def __init__(self, number_or_year=None, govinfo_api_key=None,
 
             if govinfo_bills_data_exists:
                 self._bills = \
-                    CongressBills(self._number, govinfo_api_key, None, False)
+                    CongressBills(self._number, govinfo_api_key, False)
             else:
                 include_bioguide = True
 
 
@@ -3,9 +3,12 @@
 import json as _json
 import time as _time
 import re as _re
+import sys as _sys
 from typing import List, Optional, Callable
 # from xml.etree import ElementTree as XML
 from defusedxml import ElementTree as _XML
+from queue import PriorityQueue
+from threading import Thread
 
 import requests as _requests
 from bs4 import BeautifulSoup as _BeautifulSoup
@@ -507,9 +510,27 @@ def _query_members_by_id(bioguide_ids: list) -> BioguideMemberList:
     """Gets a BioguideMemberList object corresponding
     to the given list of bioguide IDs"""
     member_records = list()
-    for bioguide_id in bioguide_ids:
-        member_record = _query_member_by_id(bioguide_id)
-        member_records.append(member_record)
+
+    def _get_members_concurrently():
+        while True:
+            bioguide_id = q.get()
+            member_record = _query_member_by_id(bioguide_id)
+            member_records.append(member_record)
+            q.task_done()
+
+    q = PriorityQueue(_util.NUMBER_OF_THREADS * 2)
+    for _ in range(_util.NUMBER_OF_THREADS):
+        t = Thread(target=_get_members_concurrently)
+        t.daemon = True
+        t.start()
+
+    try:
+        for bioguide_id in bioguide_ids:
+            q.put(bioguide_id)
+
+        q.join()
+    except KeyboardInterrupt:
+        _sys.exit(1)
 
     return BioguideMemberList(member_records)