1+ import json
2+ import os .path
3+
4+ from cbsurge .exposure .builtenv .buildings .fgbgdal import get_countries_for_bbox_osm , GMOSM_BUILDINGS_ROOT
5+ from pyogrio .raw import open_arrow , write_arrow
6+ import logging
7+ import time
8+ from tqdm import tqdm
9+ from pyogrio .core import read_info
10+ from osgeo import ogr , osr , gdal
11+
12+
13+
14+
15+ logger = logging .getLogger (__name__ )
16+
17+ ARROWTYPE2OGRTYPE = {'string' :ogr .OFTString , 'double' :ogr .OFTReal , 'int64' :ogr .OFTInteger64 , 'int' :ogr .OFTInteger }
18+
19+
20+ def add_fields_to_layer (layer = None , template_layer_info = None ):
21+
22+ types = dict ([(e [3 :], getattr (ogr , e )) for e in dir (ogr ) if e .startswith ('OFT' )])
23+ for field_dict in template_layer_info ['layers' ][0 ]['fields' ]:
24+ layer .CreateField (ogr .FieldDefn (field_dict ['name' ], types [field_dict ['type' ]]))
25+
26+
27+ def download_pyogrio (bbox = None , out_path = None , batch_size :[int ,None ]= 1000 ):
28+ """
29+ Download/stream buildings from VIDA buildings using pyogrio/pyarrow API
30+
31+ :param bbox: iterable of floats, xmin, ymin, xmax,ymax
32+ :param out_path: str, full path where the buildings layer will be written
33+ :param batch_size: int, default=1000, the max number of buildings to download in one batch
34+ If supplied, the buildings are downloaded in batches otherwise they are streamd through pyarrow library
35+ :return:
36+ """
37+
38+
39+ countries = get_countries_for_bbox_osm (bbox = bbox )
40+ assert len (countries )> 0 , f'The bounding box { bbox } does not intersect any country. Please make sure it makes sense!'
41+
42+ with ogr .GetDriverByName ('FlatGeobuf' ).CreateDataSource (out_path ) as dst_ds :
43+ for country in countries :
44+ remote_country_fgb_url = f'/vsicurl/{ GMOSM_BUILDINGS_ROOT } /country_iso={ country } /{ country } .fgb'
45+ if batch_size is not None :
46+ with open_arrow (remote_country_fgb_url , bbox = bbox , use_pyarrow = True , batch_size = batch_size ) as source :
47+ meta , reader = source
48+ fields = meta .pop ('fields' )
49+ schema = reader .schema
50+
51+ if dst_ds .GetLayerCount () == 0 :
52+ src_epsg = int (meta ['crs' ].split (':' )[- 1 ])
53+ src_srs = osr .SpatialReference ()
54+ src_srs .ImportFromEPSG (src_epsg )
55+ dst_lyr = dst_ds .CreateLayer ('buildings' , geom_type = ogr .wkbPolygon , srs = src_srs )
56+ for name in schema .names :
57+ if 'wkb' in name or 'geometry' in name :continue
58+ field = schema .field (name )
59+ field_type = ARROWTYPE2OGRTYPE [field .type ]
60+ dst_lyr .CreateField (ogr .FieldDefn (name , field_type ))
61+ logger .info (f'Downloading buildings in batches from { remote_country_fgb_url } ' )
62+ for batch in reader :
63+ logger .debug (f'Writing { batch .num_rows } records' )
64+ dst_lyr .WritePyArrow (batch )
65+ else :
66+ with open_arrow (remote_country_fgb_url , bbox = bbox , use_pyarrow = False ) as source :
67+ meta , reader = source
68+ src_epsg = int (meta ['crs' ].split (':' )[- 1 ])
69+ src_srs = osr .SpatialReference ()
70+ src_srs .ImportFromEPSG (src_epsg )
71+ logger .info (f'Streaming buildings from { remote_country_fgb_url } ' )
72+ write_arrow (reader , out_path ,layer = 'buildings' ,driver = 'FlatGeobuf' ,append = True ,
73+ geometry_name = 'wkb_geometry' , geometry_type = 'Polygon' , crs = src_srs .ExportToWkt ())
74+ info = read_info (out_path , layer = 'buildings' )
75+ logger .info (f'{ info ["features" ]} buildings were downloaded from { "," .join (countries )} country datasets' )
76+
77+
78+
79+ def download_gdal (bbox = None , out_path = None , batch_size :[int , None ]= 1000 ):
80+ """
81+ Download/stream buildings from VIDA buildings using gdal/pyarrow API
82+ :param bbox: iterable of floats, xmin, ymin, xmax,ymax
83+ :param out_path: str, full path where the buildings layer will be written
84+ :param batch_size: int, default=1000, the max number of buildings to be downloaded in one batch
85+ If supplied, the buildings are downloaded in batches otherwise they are streamd through pyarrow library.
86+ Batch downloading should be preferred in case of large bounding boxes/area
87+
88+ :return:
89+ """
90+
91+
92+ countries = get_countries_for_bbox_osm (bbox = bbox )
93+ assert len (countries )> 0 , f'The bounding box { bbox } does not intersect any country. Please make sure it makes sense!'
94+ buildings = 0
95+ with ogr .GetDriverByName ('FlatGeobuf' ).CreateDataSource (out_path ) as dst_ds :
96+
97+ for country in get_countries_for_bbox_osm (bbox = bbox ) :
98+ remote_country_fgb_url = f'/vsicurl/{ GMOSM_BUILDINGS_ROOT } /country_iso={ country } /{ country } .fgb'
99+ with ogr .Open (remote_country_fgb_url , gdal .OF_READONLY ) as src_ds :
100+ src_lyr = src_ds .GetLayer (0 )
101+ src_lyr .SetSpatialFilterRect (* bbox )
102+ if batch_size is not None :
103+ stream = src_lyr .GetArrowStream ([f"MAX_FEATURES_IN_BATCH={ batch_size } " ])
104+ else :
105+ stream = src_lyr .GetArrowStream ()
106+ schema = stream .GetSchema ()
107+ if dst_ds .GetLayerCount () == 0 :
108+ src_srs = src_lyr .GetSpatialRef ()
109+ dst_lyr = dst_ds .CreateLayer ('buildings' , geom_type = ogr .wkbPolygon , srs = src_srs )
110+ for i in range (schema .GetChildrenCount ()):
111+ if 'wkb' in schema .GetChild (i ).GetName () or 'geometry' in schema .GetChild (i ).GetName ():continue
112+ dst_lyr .CreateFieldFromArrowSchema (schema .GetChild (i ))
113+ if batch_size is not None :
114+ logger .info (f'Downloading buildings in batches from { remote_country_fgb_url } ' )
115+ else :
116+ logger .info (f'Streaming buildings from { remote_country_fgb_url } ' )
117+ while True :
118+ array = stream .GetNextRecordBatch ()
119+ if array is None :
120+ break
121+ assert dst_lyr .WriteArrowBatch (schema , array ) == ogr .OGRERR_NONE
122+ buildings += array .GetLength ()
123+ logger .info (f'{ buildings } buildings were downloaded from { "," .join (countries )} country datasets' )
124+
125+
126+
127+
128+
129+
130+ if __name__ == '__main__' :
131+ import asyncio
132+ httpx_logger = logging .getLogger ('httpx' )
133+ httpx_logger .setLevel (100 )
134+ logging .basicConfig ()
135+ logger .setLevel (logging .INFO )
136+
137+ nf = 5829
138+ bbox = 33.681335 , - 0.131836 , 35.966492 , 1.158979 # KEN/UGA
139+ #bbox = 19.5128619671,40.9857135911,19.5464217663,41.0120783699 # ALB, Divjake
140+ # bbox = 31.442871,18.062312,42.714844,24.196869 # EGY/SDN
141+ # bbox = 15.034157,49.282809,16.02842,49.66207 # CZE
142+ bbox = 19.350384 ,41.206737 ,20.059003 ,41.571459 # ALB, TIRANA
143+ bbox = 19.726666 ,39.312705 ,20.627545 ,39.869353 , # ALB/GRC
144+
145+ #a = asyncio.run(get_admin_level_bbox(iso3='ZWE'))
146+ #print(a)
147+ url = 'https://undpgeohub.blob.core.windows.net/userdata/9426cffc00b069908b2868935d1f3e90/datasets/bldgsc_20241029084831.fgb/bldgsc.pmtiles?sv=2025-01-05&ss=b&srt=o&se=2025-11-29T15%3A58%3A37Z&sp=r&sig=bQ8pXRRkNqdsJbxcIZ1S596u4ZvFwmQF3TJURt3jSP0%3D'
148+ #validate_source()
149+ out_path = '/tmp/bldgs1.fgb'
150+ # cntry = get_countries_for_bbox_osm(bbox=bbox)
151+ # print(cntry)
152+ start = time .time ()
153+ #asyncio.run(download(bbox=bbox))
154+
155+ download_gdal (bbox = bbox , out_path = out_path , batch_size = 3000 )
156+
157+ end = time .time ()
158+ print ((end - start ))
0 commit comments