Database#

Classes and functions to interact with databases of publications.

Models#

Abstract description of GISMAP’ DB interface.

class gismap.sources.models.Author(name: str)[source]#
class gismap.sources.models.DB[source]#
class gismap.sources.models.Publication(title: str, authors: list, venue: str, type: str, year: int)[source]#

DBLP (online)#

Interface for dblp computer science bibliography (https://dblp.org/).

class gismap.sources.dblp.DBLP[source]#
classmethod from_author(a, wait=True)[source]#
Returns:

  • list – Papers available in DBLP.

  • wait (bool) – Wait a bit to avoid 429.

Examples

>>> fabien = DBLPAuthor('Fabien Mathieu', key='66/2077')
>>> publications = sorted(DBLP.from_author(fabien),
...                 key=lambda p: p.title)
>>> publications[0] 
DBLPPublication(title='Achievable catalog size in peer-to-peer video-on-demand systems.',
authors=[DBLPAuthor(name='Yacine Boufkhad', key='75/5742'), DBLPAuthor(name='Fabien Mathieu', key='66/2077'),
DBLPAuthor(name='Fabien de Montgolfier', key='57/6313'), DBLPAuthor(name='Diego Perino', key='03/3645'),
DBLPAuthor(name='Laurent Viennot', key='v/LaurentViennot')],
venue='IPTPS', type='conference', year=2008, key='conf/iptps/BoufkhadMMPV08',
url='https://dblp.org/rec/conf/iptps/BoufkhadMMPV08.html', pages=4)
>>> publications[-1] 
DBLPPublication(title='Upper Bounds for Stabilization in Acyclic Preference-Based Systems.',
authors=[DBLPAuthor(name='Fabien Mathieu', key='66/2077')], venue='SSS', type='conference', year=2007,
key='conf/sss/Mathieu07', url='https://dblp.org/rec/conf/sss/Mathieu07.html', pages='372-382')
classmethod search_author(name, wait=True)[source]#
Parameters:
  • name (str) – People to find.

  • wait (bool) – Wait a bit to avoid 429.

Returns:

Potential matches.

Return type:

list

Examples

>>> fabien = DBLP.search_author("Fabien Mathieu")
>>> fabien
[DBLPAuthor(name='Fabien Mathieu', key='66/2077')]
>>> fabien[0].url
'https://dblp.org/pid/66/2077.html'
>>> manu = DBLP.search_author("Manuel Barragan")
>>> manu 
[DBLPAuthor(name='Manuel Barragan', key='07/10587'),
DBLPAuthor(name='Manuel Barragan', key='83/3865'),
DBLPAuthor(name='Manuel Barragan', key='188/0198')]
>>> DBLP.search_author("NotaSearcherName", wait=False)
[]
class gismap.sources.dblp.DBLPAuthor(name: str, key: str, aliases: list = <factory>)[source]#
class gismap.sources.dblp.DBLPPublication(title: str, authors: list, venue: str, type: str, year: int, key: str, url: str = None, pages: str = None, volume: int = None, number: int = None)[source]#

HAL#

Interface for HyperArticles en Ligne (https://hal.science/).

class gismap.sources.hal.HAL[source]#
classmethod from_author(a)[source]#
Parameters:

a (HALAuthor) – Hal researcher.

Returns:

Papers available in HAL.

Return type:

list

Examples

>>> fabien = HAL.search_author("Fabien Mathieu")[0]
>>> publications = sorted(fabien.get_publications(), key=lambda p: p.title)
>>> publications[2] 
HALPublication(title='Achievable Catalog Size in Peer-to-Peer Video-on-Demand Systems',
authors=[HALAuthor(name='Yacine Boufkhad', key='yacine-boufkhad'),
HALAuthor(name='Fabien Mathieu', key='fabien-mathieu'),
HALAuthor(name='Fabien de Montgolfier', key='949013', key_type='pid'),
HALAuthor(name='Diego Perino', key='Diego Perino', key_type='fullname'),
HALAuthor(name='Laurent Viennot', key='laurentviennot')],
venue='Proceedings of the 7th Internnational Workshop on Peer-to-Peer Systems (IPTPS)', type='conference',
year=2008, key='471724', url='https://inria.hal.science/inria-00471724v1')
>>> diego = publications[2].authors[3]
>>> diego
HALAuthor(name='Diego Perino', key='Diego Perino', key_type='fullname')
>>> len(diego.get_publications())
28
>>> publications[-7] 
HALPublication(title='Upper bounds for stabilization in acyclic preference-based systems',
authors=[HALAuthor(name='Fabien Mathieu', key='fabien-mathieu')],
venue="SSS'07 - 9th international conference on Stabilization, Safety, and Security of Distributed Systems",
type='conference', year=2007, key='668356', url='https://inria.hal.science/hal-00668356v1')

Case of someone with multiple ids one want to cumulate:

>>> maria = HAL.search_author('Maria Potop-Butucaru')
>>> maria  
[HALAuthor(name='Maria Potop-Butucaru', key='858256', key_type='pid'),
HALAuthor(name='Maria Potop-Butucaru', key='841868', key_type='pid')]
>>> len(HAL.from_author(maria[0]))
26
>>> len(maria[1].get_publications())
123

Note: an error is raised if not enough data is provided

>>> HAL.from_author(HALAuthor('Fabien Mathieu'))
Traceback (most recent call last):
...
ValueError: HALAuthor(name='Fabien Mathieu') must have a key for publications to be fetched.
classmethod search_author(name)[source]#
Parameters:

name (str) – People to find.

Returns:

Potential matches.

Return type:

list

Examples

>>> fabien = HAL.search_author("Fabien Mathieu")
>>> fabien
[HALAuthor(name='Fabien Mathieu', key='fabien-mathieu')]
>>> fabien = fabien[0]
>>> fabien.url
'https://hal.science/search/index/?q=*&authIdHal_s=fabien-mathieu'
>>> HAL.search_author("Laurent Viennot")[0]
HALAuthor(name='Laurent Viennot', key='laurentviennot')
>>> HAL.search_author("NotaSearcherName")
[]
>>> HAL.search_author("Ana Busic")
[HALAuthor(name='Ana Busic', key='anabusic')]
>>> HAL.search_author("Potop-Butucaru Maria")  
[HALAuthor(name='Potop-Butucaru Maria', key='858256', key_type='pid'),
HALAuthor(name='Potop-Butucaru Maria', key='841868', key_type='pid')]
>>> diego = HAL.search_author("Diego Perino")
>>> diego  
[HALAuthor(name='Diego Perino', key='847558', key_type='pid'),
HALAuthor(name='Diego Perino', key='978810', key_type='pid')]
>>> diego[1].url
'https://hal.science/search/index/?q=*&authIdPerson_i=978810'
class gismap.sources.hal.HALAuthor(name: str, key: str | int = None, key_type: str = None, aliases: list = <factory>)[source]#
class gismap.sources.hal.HALPublication(title: str, authors: list, venue: str, type: str, year: int, key: str, abstract: str = None, url: str = None)[source]#
classmethod from_json(r)[source]#
Parameters:

r (dict) – De-serialized JSON.

Return type:

HALPublication

gismap.sources.hal.parse_facet_author(a)[source]#
Parameters:

a (str) – Hal facet of author

Return type:

HALAuthor

Multi-source#

Interface for handling multiple sources at once.

class gismap.sources.multi.SourcedAuthor(name: str, sources: list = <factory>)[source]#
class gismap.sources.multi.SourcedPublication(title: str, authors: list, venue: str, type: str, year: int, key: str, sources: list = <factory>)[source]#
gismap.sources.multi.regroup_authors(auth_dict, pub_dict)[source]#

Replace authors of publications with matching authors. Typical use: upgrade DB-specific authors to multisource authors.

Replacement is in place.

Parameters:
  • auth_dict (dict) – Authors to unify.

  • pub_dict (dict) – Publications to unify.

Return type:

None

gismap.sources.multi.regroup_publications(pub_dict, threshold=90, length_impact=0.08)[source]#

Puts together copies of the same publication.

Parameters:
  • pub_dict (dict) – Publications to unify.

  • threshold (float) – Similarity parameter.

  • length_impact (float) – Length impact parameter.

Returns:

Unified publications.

Return type:

dict