"""Module containing a data wrangler base class for resolving taxon names."""
import json
import requests
import time
import urllib
from lmpy.data_wrangling.base import _DataWrangler
# .....................................................................................
[docs]def resolve_names_gbif(names, wait_time=.5):
"""Resolve names using GBIF's taxonomic name resolution service.
Args:
names (list of str): A list of name strings to resolve.
wait_time (number): A number of seconds to wait after each request to avoid
server ire.
Returns:
dict: Input names are keys and resolved name or None are values.
"""
resolved_names = {}
for name_str in names:
# Get name
other_filters = {'name': name_str.strip(), 'verbose': 'true'}
url = 'http://api.gbif.org/v1/species/match?{}'.format(
urllib.parse.urlencode(other_filters))
response = requests.get(url).json()
if response['status'].lower() in ('accepted', 'synonym'):
resolved_names[name_str] = response['canonicalName']
else:
resolved_names[name_str] = None
if wait_time is not None:
time.sleep(wait_time)
return resolved_names
# .....................................................................................
[docs]class _AcceptedNameWrangler(_DataWrangler):
"""Base class for accepted taxon name wranglers."""
# .......................
def __init__(self, name_map=None, name_resolver=None):
"""Constructor for the base accepted name wrangler.
Args:
name_map (dict or str or None): An existing name mapping.
name_resolver (Method or None): If provided, this should be a function that
takes a list of names as input and returns a dictionary of name
mappings. If omitted, resolving of new names will be skipped.
"""
if name_map is not None:
self._load_name_map(name_map)
else:
self.name_map = {}
self._name_resolver = name_resolver
# .......................
[docs] def _load_name_map(self, name_map):
"""Attempt to load names from the name_map provided.
Args:
name_map (dict or str): A mapping dictionary or a filename with names.
"""
if isinstance(name_map, dict):
self.name_map = name_map
else:
self.name_map = {}
try:
# Try to load JSON names
with open(name_map, mode='rt') as in_json:
self.name_map = json.load(in_json)
except json.JSONDecodeError: # Not a valid json file, try csv
with open(name_map, mode='rt') as in_csv:
for line in in_csv:
in_name, out_name = line.strip().split(',')
self.name_map[in_name] = out_name
# .......................
[docs] def resolve_names(self, names):
"""Attempts to resolve a list of names.
Args:
names (list or str): A list of names to resolve.
Returns:
dict: A dictionary of input name keys and resolved name values.
"""
if isinstance(names, str):
names = [names]
resolved_names = {}
unmatched_names = []
for name in names:
if name in self.name_map.keys():
resolved_names[name] = self.name_map[name]
else:
unmatched_names.append(name)
resolved_names[name] = None
# If we have a name resolver and names to resolve, do it
if self._name_resolver is not None and len(unmatched_names) > 0:
new_names = self._name_resolver(unmatched_names)
# Update name map and return dictionary
self.name_map.update(new_names)
resolved_names.update(new_names)
return resolved_names
# .......................
[docs] def write_map_to_file(self, filename, output_format, mode='wt'):
"""Write the name map to a file so it can be reused.
Args:
filename (str): A file location where the map should be written.
output_format (str): The format to write the map, either 'csv' or 'json'.
mode (str): How the file should be opened.
"""
if output_format.lower() == 'json':
with open(filename, mode=mode) as out_json:
json.dump(self.name_map, out_json)
else:
with open(filename, mode=mode) as out_csv:
out_csv.write('Name,Accepted\n')
for in_name, out_name in self.name_map.items():
out_csv.write(f'{in_name},{out_name}\n')