# This script is used to download and process images from public services to create # TopoFeatures training data. # IMPORTANT: Assumes run configuration set to /TopoFeatures/Scripts/ directory. from copy import deepcopy import requests import logging from osgeo import ogr import urllib3 import time import os import json from TF_geom_utils import contained_within, check_if_any_segments_are_contained_within from TF_label_maker import get_COCO_formatted_label, run_plot_from_segmentation, coordconverter, \ convert_segment_list_to_point_list, convert_segment_list_to_image_coords from TF_coord_modify import get_scaled_bbox from TF_image_to_geo_converter import ImgToGeoHelper # TODO: Save output to log files urllib3.disable_warnings() log = logging.getLogger() logging.basicConfig(level=logging.INFO) # Save destination for the image sources # TODO blend folders? # What to name each image/label file file_name = "GNIS_ID_" # Limit which features are processed min_FID = 0 max_FID = 99999 # For the initial training runs I only create 100 image/annotation pairs for troubleshooting. # exclude = [i for i in range(11, max_FID)] # If you want to create all data in the shapefile, just uncomment the line below. exclude = [] FID_list = [] # HTMC service URL HTMC_url = "https://services.arcgisonline.com/arcgis/rest/services/USA_Topo_Maps/MapServer/export?" # 3DEP Elevation service URL DEM_url = "https://elevation.nationalmap.gov/arcgis/rest/services/3DEPElevation/ImageServer/exportImage?" # NAIP service URL from Sam NAIP_url = "https://imagery.nationalmap.gov/arcgis/rest/services/USGSNAIPImagery/ImageServer/exportImage?" # "https://gis.apfo.usda.gov/arcgis/rest/services/NAIP/USDA_CONUS_PRIME/ImageServer/exportImage?" # "https://imagery.nationalmap.gov/arcgis/rest/services/USGSNAIPImagery/ImageServer/exportImage?" # "https://services.nationalmap.gov/arcgis/rest/services/USGSNAIPImagery/MapServer/export?" broken link (og) # Render mode for slope hill_render_rule = { "rasterFunction": "Stretch", "rasterFunctionArguments": { "StretchType": "5", "Min": "0", "Max": "255", "DRA": "true" }, "outputPixelType": "U8", "variableName": "DEM" } # Render mode for hillshade slope_render_rule = { "rasterFunction": "Slope", "rasterFunctionArguments": { "ZFactor": 1, "RemoveEdgeEffect": "true", "SlopeType": 3 }, "outputPixelType": "F32", "variableName": "DEM" } # TODO: add other render rules # Input feature class should be a shapefile with polygons all_features_shapefile = '../Shapefiles/TopoFeatures_Shape/TopoFeatures_Shape.shp' # Contains all features we have access to summits_only_shapefile = '../Shapefiles/summit_features_shape/summit_features.shp' # Contains only summit features # This is the shapefile in use shapefile = summits_only_shapefile # Change to summits_only if desired # Spatial reference is specific to the input shapefile spatial_ref = 4326 # Dictionary that maps fcode to category id category_map = { 15048: 24, } headers = {'Accept': 'application/json', 'cache-control': 'max-age=0, no-cache'} dict_of_all_segments_encountered = {} def download(flags: dict, scale_val, env: list, img_w: int, img_h: int, data_dir: str): # The spatial envelope is the minimum bounding rectangle that surrounds the segmentation outline env_min_long = env[0] env_min_lat = env[2] env_max_long = env[1] env_max_lat = env[3] # Create bbox parameter for URL arguments bbox = f"{env_min_long},{env_min_lat},{env_max_long},{env_max_lat}" # Modify the bounding box to better show the feature's context env_min_lat, env_max_lat, env_min_long, env_max_long = get_scaled_bbox(env_min_lat, env_max_lat, env_min_long, env_max_long, .5 * scale_val) new_bbox = f"{env_min_long},{env_min_lat},{env_max_long},{env_max_lat}" # All the extents should be the same, so we just want to grab one of the download urls # from any of the services that we have selected to download images from. any_download_url_for_extent = {} # We also want to track which ones we are going to download for the service call later list_of_download_urls = [] # Lastly we need to keep the corresponding destination folders in a list list_of_download_folders = [] features_downloaded_correctly = True for item in flags: if flags[item][0][0]: # Add all parameters to respective request URLs if item == "htmc": request_url = HTMC_url + f'bbox={new_bbox}&bboxSR={spatial_ref}&imageSR={spatial_ref}&format=jpg&f=pjson&size={img_w},{img_h}' # Use the one below if you want to include the scale parameter # HTMC_request_url = HTMC_url + f'bbox={bbox}&bboxSR={spatial_ref}&imageSR={spatial_ref}&format=jpg&f=pjson&size={image_w},{image_h}' elif item == "naip": request_url = NAIP_url + f'bbox={bbox}&bboxSR={spatial_ref}&imageSR={spatial_ref}&format=jpg&f=pjson&size={img_w},{img_h}' elif item == "slope": request_url = DEM_url + f'bbox={new_bbox}&bboxSR={spatial_ref}&imageSR={spatial_ref}&format=jpg&f=pjson&size={img_w},{img_h}&renderingRule={slope_render_rule}' else: request_url = DEM_url + f'bbox={new_bbox}&bboxSR={spatial_ref}&imageSR={spatial_ref}&format=jpg&f=pjson&size={img_w},{img_h}&renderingRule={hill_render_rule}' # Send request to the image server and get download link # NOTE: This automatically calculates the frame extent based on the given image # dimensions (default 400x400 px) # Effectively keeps on trying to get a download URL for a feature until it successfully # downloads it, otherwise it keeps trying. while True: try: download_url = requests.get(request_url, verify=False, headers=headers).json() print(f"Prelim {item} Download URL: {download_url}") if "extent" not in download_url: raise ValueError except ValueError: print(f"JSON decoding error ({item})") # features_downloaded_correctly = False except: print(f"Bad exception ({item})") features_downloaded_correctly = False break else: any_download_url_for_extent = download_url # features_downloaded_correctly = True list_of_download_urls.append(download_url) list_of_download_folders.append(f"{data_dir}{item}/") break return any_download_url_for_extent, list_of_download_urls, list_of_download_folders, features_downloaded_correctly, bbox, new_bbox def get_features(max_features_to_get: int, starting_num: int, data_dir: str, img_coord_file: str, flags: dict): img_to_geo = ImgToGeoHelper() slope_folder = data_dir + "slope/" hill_folder = data_dir + "hill/" htmc_folder = data_dir + "htmc/" naip_folder = data_dir + "naip/" # Final data folders label_folder = data_dir + "lab/" plotted_folder = data_dir + "plot/" # Initialize the dictionary for the dataset labels COCO_dataset_dict = { # TODO: Use GNIS_ID as image id "images": [], "annotations": [], # Set categories and ids (only summits for now) # "categories": [{"id": 1, "name": "Summit"}] # Below is for all feature classes "categories": [ {"id": 1, "name": "Arch"}, {"id": 2, "name": "Area"}, {"id": 3, "name": "Arroyo"}, {"id": 4, "name": "Bar"}, {"id": 5, "name": "Basin"}, {"id": 6, "name": "Beach"}, {"id": 7, "name": "Bench"}, {"id": 8, "name": "Bend"}, {"id": 9, "name": "Cape"}, {"id": 10, "name": "Cave"}, {"id": 11, "name": "Cliff"}, {"id": 12, "name": "Crater"}, {"id": 13, "name": "Flat"}, {"id": 14, "name": "Gap"}, {"id": 15, "name": "Island"}, {"id": 16, "name": "Isthmus"}, {"id": 17, "name": "Lava"}, {"id": 18, "name": "Levee"}, {"id": 19, "name": "Pillar"}, {"id": 20, "name": "Plain"}, {"id": 21, "name": "Range"}, {"id": 22, "name": "Ridge"}, {"id": 23, "name": "Slope"}, {"id": 24, "name": "Summit"}, {"id": 25, "name": "Valley"}, {"id": 26, "name": "Woods"} ] # TODO: The id numbers below have to be in range(1, num_categories) for detectron to work. # Maybe there is a workaround to this? # Set categories to correspond to FCODE values in the shapefile # "categories": [ # {"id": 15002, "name": "Arch"}, # {"id": 15004, "name": "Area"}, # {"id": 15006, "name": "Arroyo"}, # {"id": 15008, "name": "Bar"}, # {"id": 15010, "name": "Basin"}, # {"id": 15012, "name": "Beach"}, # {"id": 15014, "name": "Bench"}, # {"id": 15016, "name": "Bend"}, # {"id": 15018, "name": "Cape"}, # {"id": 15020, "name": "Cave"}, # {"id": 15022, "name": "Cliff"}, # {"id": 15024, "name": "Crater"}, # {"id": 15028, "name": "Flat"}, # {"id": 15030, "name": "Gap"}, # {"id": 15032, "name": "Island"}, # {"id": 15034, "name": "Isthmus"}, # {"id": 15036, "name": "Lava"}, # {"id": 15037, "name": "Levee"}, # {"id": 15038, "name": "Pillar"}, # {"id": 15040, "name": "Plain"}, # {"id": 15042, "name": "Range"}, # {"id": 15044, "name": "Ridge"}, # {"id": 15046, "name": "Slope"}, # {"id": 15048, "name": "Summit"}, # {"id": 15050, "name": "Valley"}, # {"id": 15052, "name": "Woods"} # ] } failed = {} # ID_counter = 1 annotation_ID = 1 # Make sure all folders to store data are created all_folders = [data_dir, label_folder, plotted_folder] all_folders.append(htmc_folder) if flags["htmc"][0][0] else None all_folders.append(naip_folder) if flags["naip"][0][0] else None all_folders.append(slope_folder) if flags["slope"][0][0] else None all_folders.append(hill_folder) if flags["hill"][0][0] else None for folder in all_folders: if not os.path.exists(folder): os.mkdir(folder) # Get OSGeo driver for the shapefile driver = ogr.GetDriverByName("ESRI Shapefile") if driver is None: log.info('Driver not found') else: log.info('The driver has been located') # Use the driver to open and interpret the shapefile data dataSource = driver.Open(shapefile, 0) if dataSource is None: log.info('Could not open %s ' % shapefile) else: log.info('Opened %s ' % shapefile) for scale_val in [0.25, 0.5, 0.75, 1]: # , 1.25, 1.5, 1.75]: print("Scale: ", scale_val) # Iterate over all the feature IDs for fcode in category_map.keys(): # Get the layer data (there's only 1 layer) layer = dataSource.GetLayer() # Get the layer's spatial reference spatialRef = layer.GetSpatialRef() log.info(f'SpatialReference{spatialRef.GetAuthorityCode(None)}') # Brute force get the category list with FCODEs (how I got the table earlier) # category_list = [] # i = -1 # for feature in layer: # i += 1 # if i not in exclude: # fcode = feature.GetFieldAsInteger("FCODE") # if fcode not in category_list: # category_list.append(fcode) # # print(category_list) # print("Num features:", i + 1) image_count = 1 # Used for determining if we have gotten enough images test_string = "fcode = " + str(fcode) layer.SetAttributeFilter(test_string) # Get the features with the given fcode counter_for_feature_start = 0 # Used for offsetting the current feature we start at # Loop through all the features in the shapefile for feature in layer: # Get feature ID FID = feature.GetFID() # print("FID is:", FID) GNIS_ID = feature.GNIS_ID features_downloaded_correctly = True # Used for modifying the segmentations for a bounding box after we scale it x_inc = 0 y_inc = 0 # If we are not at our starting offset yet just continue if counter_for_feature_start < starting_num: counter_for_feature_start += 1 continue # Used for limiting any features that are not particularly insightful if FID in exclude: print("FID in excluded") if image_count == max_features_to_get: break if min_FID <= FID <= max_FID and FID not in exclude and image_count <= max_features_to_get: log.info(f'Starting FID {FID} Image: {image_count} Ann ID: {annotation_ID}') # Get feature FCODE from osgeo.ogr.Feature object # This corresponds to the feature's category id fcode = feature.GetFieldAsInteger("FCODE") # print("Fcode is:", fcode) # Map the id if you're using all the feature classes category_id = category_map[fcode] # Use id = 1 for summits only # category_id = 1 # print("Category id is:", category_id) # Get the polygon geometry of the feature outline # geom = ((long_0, lat_0, long_1, lat_1, ..., long_n, lat_n, long_0, lat_0)) geom = feature.GetGeometryRef() # print("Geometry : ", geom) # print("Geom:", geom) # Get the geometry's minimum bounding envelope (perfect rectangle) # env = (min_long, max_long, min_lat, max_lat) env = geom.GetEnvelope() # print("Env:", env) # Variable parameters image_w = 400 image_h = 400 # Below are for augmentations # We should mess with variable scale to make all features more visible scale = 32000 x_offset = 0 y_offset = 0 rot_angle = 0 image_size = (image_w, image_h) # print("Image Dimensions:", image_size) any_download_url_for_extent, list_of_download_urls, list_of_download_folders, features_downloaded_correctly, bbox, new_bbox = download( flags, scale_val, env, image_w, image_h, data_dir) # Check to make sure that all the features downloaded correctly, otherwise this feature is # ignored because we don't have proper information for it. if features_downloaded_correctly: print("features downloaded correctly loop") # Get the calculated frame extent, which corresponds to the image's window size, # from the download URL attribute. print("Any_download_url_for_extent: ", any_download_url_for_extent) ext_dict = any_download_url_for_extent['extent'] ext_arr = [] for key in ext_dict.keys(): ext_arr.append(ext_dict[key]) # Frame extent now in array [min_long, min_lat, max_long, max_lat]. frame_extent = ext_arr[0:4] # print("Final Frame Extent:", frame_extent) # print("Bounding box: ", new_bbox) # TODO: # Include segmentations for all features within the current image boundary # (might have different classes) # Close polygon at edge of image if it goes out of the frame file_name_for_coco = file_name + GNIS_ID + "Scale" + str(scale_val) print("Filename for coco: " + file_name_for_coco) # Generate the label for this image's segmentation and add it to the complete dataset. # print(image_size) image_and_annotation = get_COCO_formatted_label( FID, file_name_for_coco, image_size, category_id, geom, bbox, frame_extent, x_inc, y_inc, annotation_ID) # print("Image name: ", image_and_annotation[0]) point_list = (str(geom)[10:-2]).split(',') # image_id = image_and_annotation[1]['image_id'] print("Image id: ", image_count) print("Point List: ", point_list) filename_for_geography = file_name + GNIS_ID + "Scale" + str(scale_val) + '.jpg' print("File name for geography: ", filename_for_geography) # Repeat request until the image is properly downloaded for each of the selected service # calls number_of_bad_responses = 0 print("Length of download urls: ", len(list_of_download_urls)) saved = True for download_link, download_folder in zip(list_of_download_urls, list_of_download_folders): number_of_bad_responses = 0 while True: try: # Attempt to download image from service response = requests.get(download_link['href'], stream=True, verify=False) # Get server response r = response.status_code # print("Response:", r) # Fail if not response.ok: number_of_bad_responses += 1 if number_of_bad_responses > 20: print("Number of bad responses: ", number_of_bad_responses) print("Breaking out because bad responses exceeded") saved = False break print("Number of bad responses: ", number_of_bad_responses) log.info('Bad response from server') # Success if r == 200: if len(response.content) == 3173: saved = False print("Image blank") else: # Open image save location file with open(download_folder + file_name + GNIS_ID + "Scale" + str( scale_val) + '.jpg', 'wb') as handle: print("Image output: ", handle) for block in response.iter_content(1024): while True: try: if not block: log.info( 'Failed to download FID {0}'.format(FID)) saved = False failed.update({str(FID): str(GNIS_ID)}) break # Write image to save location handle.write(block) except (ConnectionError, KeyError): time.sleep(0.5) continue break handle.close() log.info('Successfully saved FID {0} to {1}'.format(FID, download_folder)) except: time.sleep(1) else: if r == 400: time.sleep(0.5) continue break if saved: # First part of label is the image part, second is the annotation COCO_dataset_dict["images"].append(image_and_annotation[0]) COCO_dataset_dict["annotations"].append(image_and_annotation[1]) img_to_geo.set_geo_img_reference(filename_for_geography, new_bbox) # Open label save location and write content to file with open(label_folder + file_name + GNIS_ID + "Scale" + str(scale_val) + '.txt', 'w') as label_file: # print("Image we are creating: ", image_and_annotation[1]) label_file.write(str(image_and_annotation[1])) label_file.close() image_count += 1 annotation_ID += 1 # Plot the feature's outline (only for HTMC now, but just copy/paste if you need other layers) # Assuming that 1 is the standard scale and that's the one we want to plot on. Could also plot for each scale of image image_path = htmc_folder + file_name + str(GNIS_ID) + ".jpg" # "Scale1.jpg" plot_path = plotted_folder + file_name + str(GNIS_ID) + "_htmc.jpg" # Params: image_path, label_path, save_path="./plotted", show_enabled=False seg = image_and_annotation[1]["segmentation"] bb = image_and_annotation[1]["bbox"] try: run_plot_from_segmentation(image_path, seg, bb, plot_path) except FileNotFoundError: print("Image doesn't exist yet -- ignoring plot for this image") # Output completed status log.info("Completed FID {}".format(FID)) # Add FID to list FID_list.append(FID) print("Outside of main loop") # TODO: Check labels (probably a good idea but not strictly necessary) # label_checker(label_folder) # After all the images have been downloaded and labels created, we can finally make and save the dataset # information file that the machine learning model needs to run. with open(label_folder + 'dataset.json', 'w') as label_file: print("Made dataset file") json.dump(COCO_dataset_dict, label_file, indent=4) label_file.close() img_to_geo.write_img_to_geo_to_file(img_coord_file) # print("All segments: ", list_of_all_segments_encountered)