Publications (new)
2024
Sánchez-Olivares, E.; Boekhout, H. D.; Saxena, A.; Takes, F. W.
A Framework for Empirically Evaluating Pretrained Link Prediction Models Proceedings Article
In: Cherifi, H.; Rocha, L. M.; Cherifi, C.; Donduran, M. (Ed.): Complex Networks & Their Applications XII. Proceedings of the 12th International Conference on Complex Networks (Complex Networks 2023), pp. 150–161, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-53468-3.
Abstract | Links | BibTeX | Tags: Link Prediction, Pretrained Models, Transfer Learning
@inproceedings{sanchez_olivares_framework_2024,
title = {A Framework for Empirically Evaluating Pretrained Link Prediction Models},
author = {E. Sánchez-Olivares and H. D. Boekhout and A. Saxena and F. W. Takes},
editor = {H. Cherifi and L. M. Rocha and C. Cherifi and M. Donduran},
doi = {10.1007/978-3-031-53468-3_13},
isbn = {978-3-031-53468-3},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
booktitle = {Complex Networks & Their Applications XII. Proceedings of the 12th International Conference on Complex Networks (Complex Networks 2023)},
pages = {150–161},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {This paper proposes a novel framework for empirically assessing the effect of network characteristics on the performance of pretrained link prediction models. In link prediction, the task is to predict missing or future links in a given network dataset. We focus on the pretrained setting, in which such a predictive model is trained on one dataset, and employed on another dataset. The framework allows one to overcome a number of nontrivial challenges in adequately testing the performance of such a pretrained model in a proper cross-validated setting. Experiments are performed on a corpus of 49 structurally diverse real-world complex network datasets from various domains with up to hundreds of thousands of nodes and edges. Overall results indicate that the extent to which a network is clustered is strongly related to whether this network is a suitable candidate to create a pretrained model on. Moreover, we systematically assessed the relationship between topological similarity and performance difference of pretrained models and a model trained on the same data. We find that similar network pairs in terms of clustering coefficient, and to a lesser extent degree assortativity and gini coefficient, yield minimal performance difference. The findings presented in this work pave the way for automated model selection based on topological similarity of the networks, as well as larger-scale deployment of pretrained link prediction models for transfer learning.},
keywords = {Link Prediction, Pretrained Models, Transfer Learning},
pubstate = {published},
tppubtype = {inproceedings}
}
This paper proposes a novel framework for empirically assessing the effect of network characteristics on the performance of pretrained link prediction models. In link prediction, the task is to predict missing or future links in a given network dataset. We focus on the pretrained setting, in which such a predictive model is trained on one dataset, and employed on another dataset. The framework allows one to overcome a number of nontrivial challenges in adequately testing the performance of such a pretrained model in a proper cross-validated setting. Experiments are performed on a corpus of 49 structurally diverse real-world complex network datasets from various domains with up to hundreds of thousands of nodes and edges. Overall results indicate that the extent to which a network is clustered is strongly related to whether this network is a suitable candidate to create a pretrained model on. Moreover, we systematically assessed the relationship between topological similarity and performance difference of pretrained models and a model trained on the same data. We find that similar network pairs in terms of clustering coefficient, and to a lesser extent degree assortativity and gini coefficient, yield minimal performance difference. The findings presented in this work pave the way for automated model selection based on topological similarity of the networks, as well as larger-scale deployment of pretrained link prediction models for transfer learning.